ParsePapers 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/ParsePapers.rb +166 -0
- metadata +44 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: de1b47765e9e26384bac136ee564b4377861f065
|
4
|
+
data.tar.gz: 83358fb338917b1f6251106ac55aaa56e414a567
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 280e9a74bd42a353ae9f5e3f9d7b45bcc6d08bf4a9c2e51a867301780ef7d539d728f69da50723e85138eecb61ea58d062bc122aa3660337ec727f27aa6c5005
|
7
|
+
data.tar.gz: 95e1d4ef7fb0dec4b5abdd5e1e998ee79066c9a40db86577b023ae0b78dcf393443926bd9cfc964b156511d8b22b345b67730a1bb2c29610fb08ef650dad2af9
|
data/lib/ParsePapers.rb
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require 'open-uri'
|
3
|
+
require 'rest-client'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
|
7
|
+
class Question
|
8
|
+
def initialize(q_num, url)
|
9
|
+
@page = Nokogiri::HTML(open(url))
|
10
|
+
@q_num = q_num
|
11
|
+
@url = url
|
12
|
+
end
|
13
|
+
|
14
|
+
def question_image
|
15
|
+
html = all("questions")[q_num - 1].join("")
|
16
|
+
return html_2_image(html)
|
17
|
+
end
|
18
|
+
|
19
|
+
def q_num
|
20
|
+
@q_num
|
21
|
+
end
|
22
|
+
|
23
|
+
def answer_image
|
24
|
+
html = all("answers")[q_num - 1].join("")
|
25
|
+
return html_2_image(html)
|
26
|
+
end
|
27
|
+
|
28
|
+
def exam_notes_image
|
29
|
+
html = all("exam note")[q_num - 1].join("")
|
30
|
+
return html_2_image(html)
|
31
|
+
end
|
32
|
+
|
33
|
+
def source
|
34
|
+
info = (all("info")[q_num - 1]).first
|
35
|
+
return get_detail("Question source", info)
|
36
|
+
end
|
37
|
+
|
38
|
+
def description
|
39
|
+
info = (all("info")[q_num - 1]).first
|
40
|
+
return get_detail("Description", info)
|
41
|
+
end
|
42
|
+
|
43
|
+
def marks
|
44
|
+
info = (all("info")[q_num - 1]).first
|
45
|
+
return get_detail("Marks", info).to_i
|
46
|
+
end
|
47
|
+
|
48
|
+
def topic
|
49
|
+
info = (all("info")[q_num - 1]).first
|
50
|
+
return get_detail("Topic", info)
|
51
|
+
end
|
52
|
+
|
53
|
+
def type
|
54
|
+
info = (all("info")[q_num - 1]).first
|
55
|
+
return get_detail("Type", info)
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def html_2_image(html)
|
62
|
+
x = "<head><link href='http://content.doublestruck.eu/style/ds.css' rel='stylesheet' type='text/css'></head><body> #{html} </body>"
|
63
|
+
b = Net::HTTP.post_form(URI.parse('http://api.page2images.com/html2image'), {'p2i_html'=> x, 'p2i_key' => '0c9242f0d0aeafa3', 'p2i_url' => @url, 'p2i_screen' => "705x0", 'p2i_size' => "705x0"} )
|
64
|
+
return JSON.parse(b.body)["image_url"]
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
def all(type)
|
69
|
+
types = ["questions", "answers", "exam note", "info"]
|
70
|
+
|
71
|
+
hash = Hash[types.map.with_index.to_a] # => {"a"=>0, "b"=>1, "c"=>2}
|
72
|
+
next_type = types[((hash[type]).to_i + 1)]
|
73
|
+
objects = []
|
74
|
+
|
75
|
+
objects = []
|
76
|
+
find_starts_of(type).each_with_index do |i, index|
|
77
|
+
unless i == find_starts_of(type).last then
|
78
|
+
|
79
|
+
object = []
|
80
|
+
for x in i..(find_starts_of(type)[index + 1] - 1)
|
81
|
+
object.push(@page.css("table")[x])
|
82
|
+
end
|
83
|
+
unless parse_type(type) == "N" then
|
84
|
+
object.map! { |x| x.to_html}
|
85
|
+
end
|
86
|
+
|
87
|
+
objects.push object
|
88
|
+
|
89
|
+
else
|
90
|
+
if parse_type(type) == "N" then
|
91
|
+
object = []
|
92
|
+
object.push(@page.css("table")[i])
|
93
|
+
objects.push object
|
94
|
+
else
|
95
|
+
|
96
|
+
object = []
|
97
|
+
for x in i..(find_starts_of(next_type).first - 1)
|
98
|
+
|
99
|
+
object.push(@page.css("table")[x]) unless x == nil
|
100
|
+
end
|
101
|
+
object.map! { |x| x.to_html}
|
102
|
+
objects.push object
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
return objects
|
109
|
+
end
|
110
|
+
|
111
|
+
def is_start_of(type, table)
|
112
|
+
unless table.css("b") == nil then
|
113
|
+
a = table.css("b").to_a
|
114
|
+
objects = []
|
115
|
+
a.each do |x|
|
116
|
+
if /#{parse_type(type)}[1-9]/.match(x.content) != nil then
|
117
|
+
objects.push(x)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
if objects.count > 0 then
|
121
|
+
return true
|
122
|
+
else return false
|
123
|
+
end
|
124
|
+
else
|
125
|
+
return false
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def find_starts_of(type)
|
130
|
+
g = 0
|
131
|
+
test = []
|
132
|
+
objects = []
|
133
|
+
@page.css("table").each_with_index do |object, index|
|
134
|
+
if is_start_of(type, object) then
|
135
|
+
objects.push index
|
136
|
+
end
|
137
|
+
end
|
138
|
+
return objects
|
139
|
+
end
|
140
|
+
|
141
|
+
def get_detail(detail, info)
|
142
|
+
info.css(".indent1new").each do |x|
|
143
|
+
if /#{detail}:/.match(x.content)
|
144
|
+
y = x.content
|
145
|
+
y.slice! "#{detail}: "
|
146
|
+
return y
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def parse_type(type)
|
152
|
+
case type
|
153
|
+
when "question", "a question", "questions"
|
154
|
+
type = "Q"
|
155
|
+
when "marking" , "answer" , "markings" , "answers"
|
156
|
+
type = "M"
|
157
|
+
when "exam note", "exam notes" , "exam_note"
|
158
|
+
type = "E"
|
159
|
+
when "note" , "info"
|
160
|
+
type = "N"
|
161
|
+
else
|
162
|
+
raise "invalid type in parse_types"
|
163
|
+
end
|
164
|
+
return type
|
165
|
+
end
|
166
|
+
end
|
metadata
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ParsePapers
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Hassan Al-ubeidi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-07-22 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A gem for parsing exampro past papers into question objects
|
14
|
+
email: hassanalubeidi1996@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/ParsePapers.rb
|
20
|
+
homepage: http://rubygems.org/gems/ParsePapers
|
21
|
+
licenses:
|
22
|
+
- MIT
|
23
|
+
metadata: {}
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
requirements: []
|
39
|
+
rubyforge_project:
|
40
|
+
rubygems_version: 2.4.5
|
41
|
+
signing_key:
|
42
|
+
specification_version: 4
|
43
|
+
summary: Past paper parser
|
44
|
+
test_files: []
|