ParsePapers 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/ParsePapers.rb +166 -0
  3. metadata +44 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: de1b47765e9e26384bac136ee564b4377861f065
4
+ data.tar.gz: 83358fb338917b1f6251106ac55aaa56e414a567
5
+ SHA512:
6
+ metadata.gz: 280e9a74bd42a353ae9f5e3f9d7b45bcc6d08bf4a9c2e51a867301780ef7d539d728f69da50723e85138eecb61ea58d062bc122aa3660337ec727f27aa6c5005
7
+ data.tar.gz: 95e1d4ef7fb0dec4b5abdd5e1e998ee79066c9a40db86577b023ae0b78dcf393443926bd9cfc964b156511d8b22b345b67730a1bb2c29610fb08ef650dad2af9
@@ -0,0 +1,166 @@
1
+ require "nokogiri"
2
+ require 'open-uri'
3
+ require 'rest-client'
4
+ require 'json'
5
+
6
+
7
+ class Question
8
+ def initialize(q_num, url)
9
+ @page = Nokogiri::HTML(open(url))
10
+ @q_num = q_num
11
+ @url = url
12
+ end
13
+
14
+ def question_image
15
+ html = all("questions")[q_num - 1].join("")
16
+ return html_2_image(html)
17
+ end
18
+
19
+ def q_num
20
+ @q_num
21
+ end
22
+
23
+ def answer_image
24
+ html = all("answers")[q_num - 1].join("")
25
+ return html_2_image(html)
26
+ end
27
+
28
+ def exam_notes_image
29
+ html = all("exam note")[q_num - 1].join("")
30
+ return html_2_image(html)
31
+ end
32
+
33
+ def source
34
+ info = (all("info")[q_num - 1]).first
35
+ return get_detail("Question source", info)
36
+ end
37
+
38
+ def description
39
+ info = (all("info")[q_num - 1]).first
40
+ return get_detail("Description", info)
41
+ end
42
+
43
+ def marks
44
+ info = (all("info")[q_num - 1]).first
45
+ return get_detail("Marks", info).to_i
46
+ end
47
+
48
+ def topic
49
+ info = (all("info")[q_num - 1]).first
50
+ return get_detail("Topic", info)
51
+ end
52
+
53
+ def type
54
+ info = (all("info")[q_num - 1]).first
55
+ return get_detail("Type", info)
56
+ end
57
+
58
+
59
+ private
60
+
61
+ def html_2_image(html)
62
+ x = "<head><link href='http://content.doublestruck.eu/style/ds.css' rel='stylesheet' type='text/css'></head><body> #{html} </body>"
63
+ b = Net::HTTP.post_form(URI.parse('http://api.page2images.com/html2image'), {'p2i_html'=> x, 'p2i_key' => '0c9242f0d0aeafa3', 'p2i_url' => @url, 'p2i_screen' => "705x0", 'p2i_size' => "705x0"} )
64
+ return JSON.parse(b.body)["image_url"]
65
+ end
66
+
67
+
68
+ def all(type)
69
+ types = ["questions", "answers", "exam note", "info"]
70
+
71
+ hash = Hash[types.map.with_index.to_a] # => {"a"=>0, "b"=>1, "c"=>2}
72
+ next_type = types[((hash[type]).to_i + 1)]
73
+ objects = []
74
+
75
+ objects = []
76
+ find_starts_of(type).each_with_index do |i, index|
77
+ unless i == find_starts_of(type).last then
78
+
79
+ object = []
80
+ for x in i..(find_starts_of(type)[index + 1] - 1)
81
+ object.push(@page.css("table")[x])
82
+ end
83
+ unless parse_type(type) == "N" then
84
+ object.map! { |x| x.to_html}
85
+ end
86
+
87
+ objects.push object
88
+
89
+ else
90
+ if parse_type(type) == "N" then
91
+ object = []
92
+ object.push(@page.css("table")[i])
93
+ objects.push object
94
+ else
95
+
96
+ object = []
97
+ for x in i..(find_starts_of(next_type).first - 1)
98
+
99
+ object.push(@page.css("table")[x]) unless x == nil
100
+ end
101
+ object.map! { |x| x.to_html}
102
+ objects.push object
103
+ end
104
+
105
+ end
106
+
107
+ end
108
+ return objects
109
+ end
110
+
111
+ def is_start_of(type, table)
112
+ unless table.css("b") == nil then
113
+ a = table.css("b").to_a
114
+ objects = []
115
+ a.each do |x|
116
+ if /#{parse_type(type)}[1-9]/.match(x.content) != nil then
117
+ objects.push(x)
118
+ end
119
+ end
120
+ if objects.count > 0 then
121
+ return true
122
+ else return false
123
+ end
124
+ else
125
+ return false
126
+ end
127
+ end
128
+
129
+ def find_starts_of(type)
130
+ g = 0
131
+ test = []
132
+ objects = []
133
+ @page.css("table").each_with_index do |object, index|
134
+ if is_start_of(type, object) then
135
+ objects.push index
136
+ end
137
+ end
138
+ return objects
139
+ end
140
+
141
+ def get_detail(detail, info)
142
+ info.css(".indent1new").each do |x|
143
+ if /#{detail}:/.match(x.content)
144
+ y = x.content
145
+ y.slice! "#{detail}: "
146
+ return y
147
+ end
148
+ end
149
+ end
150
+
151
+ def parse_type(type)
152
+ case type
153
+ when "question", "a question", "questions"
154
+ type = "Q"
155
+ when "marking" , "answer" , "markings" , "answers"
156
+ type = "M"
157
+ when "exam note", "exam notes" , "exam_note"
158
+ type = "E"
159
+ when "note" , "info"
160
+ type = "N"
161
+ else
162
+ raise "invalid type in parse_types"
163
+ end
164
+ return type
165
+ end
166
+ end
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ParsePapers
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Hassan Al-ubeidi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-07-22 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A gem for parsing exampro past papers into question objects
14
+ email: hassanalubeidi1996@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/ParsePapers.rb
20
+ homepage: http://rubygems.org/gems/ParsePapers
21
+ licenses:
22
+ - MIT
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.4.5
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Past paper parser
44
+ test_files: []