camdict 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,345 @@
1
+ require 'test/unit'
2
+ require 'camdict'
3
+
4
+ module Camdict
5
+ # this word has two derived words
6
+ PLAGIARIZE = '<h2 class="di-title cdo-section-title-hw">plagiarize</h2>' +
7
+ '<span class="runon"><span class=runon-title" title="Derived word">' +
8
+ '<span class="w">plagiarism</span></span>' +
9
+ '<span class="runon-info"><span class="posgram"><span class="pos">noun' +
10
+ '</span></span></span></span>' +
11
+ '<span class="runon"><span class=runon-title" title="Derived word">' +
12
+ '<span class="w">plagiarist</span></span>'+
13
+ '<span class="runon-info"><span class="posgram"><span class="pos">noun' +
14
+ '</span></span></span></span>'
15
+
16
+ class DefinitionTest < Test::Unit::TestCase
17
+
18
+ def test_pos
19
+ html = '<h2 class="di-title cdo-section-title-hw">favourite</h2>' +
20
+ '<span class="di-info"><span class="posgram">' +
21
+ '<span class="pos" title="A word that ...">noun</span></span>' +
22
+ '<span class="spellvar"><span class="v" title="Variant form">' +
23
+ 'favorite</span></span></span>'
24
+ w = Camdict::Definition.new("favourite", :favourite=>html)
25
+ assert_equal "noun", w.send(:pos)
26
+ w = Camdict::Definition.new("favorite", :favourite=>html)
27
+ assert_equal "noun", w.send(:pos)
28
+ html = '<h2 class="di-title cdo-section-title-hw">look at sth</h2>' +
29
+ '<span class="di-info"><span class="anc-info-head"><span class="pos"' +
30
+ ' title="Verb with an adverb...">phrasal verb</span><span class='+
31
+ '"posgram">' +
32
+ '<span class="pos" title="Verb with an adverb...">verb</span></span>'
33
+ w = Camdict::Definition.new("look at sth","look-at-sth_1" => html)
34
+ assert_equal "phrasal verb", w.send(:pos)
35
+ w = Camdict::Definition.new("plagiarist",:plagiarize => PLAGIARIZE)
36
+ assert_equal "noun", w.send(:pos)
37
+ htmli = '<h2 class="di-title cdo-section-title-hw">pass water</h2>' +
38
+ '<div class="di-body"><div class="idiom-block">' +
39
+ '<span class="idiom-body">'
40
+ w = Camdict::Definition.new("pass water","pass-water" => htmli)
41
+ assert_equal "idiom", w.send(:pos)
42
+ end
43
+
44
+ def test_title_word
45
+ html = '<h2 class="di-title cdo-section-title-hw">aluminium</h2>'
46
+ w = Camdict::Definition.new("aluminium",:aluminium=>html)
47
+ assert_equal "aluminium", w.send(:title_word)
48
+ end
49
+
50
+ def test_derived_words
51
+ w = Camdict::Definition.new("plagiarize",:plagiarize => PLAGIARIZE)
52
+ r = w.send :derived_words
53
+ assert_equal %w(plagiarism plagiarist), r
54
+ w = Camdict::Definition.new("mind", :mind=>"<h1>mind</h1>")
55
+ assert ! (w.send :derived_words)
56
+ end
57
+
58
+ def test_where?
59
+ w = Camdict::Definition.new("plagiarize",:plagiarize => PLAGIARIZE)
60
+ assert_equal "title", w.send(:where?)
61
+ w = Camdict::Definition.new("plagiarism",:plagiarize => PLAGIARIZE)
62
+ assert_equal "derived", w.send(:where?)
63
+ html = '<h2 class="di-title cdo-section-title-hw">knock around/about'
64
+ w = Camdict::Definition.new("knock about","knock-around-about"=>html)
65
+ assert_equal "title", w.send(:where?)
66
+ w = Camdict::Definition.new("knock around","knock-around-about"=>html)
67
+ assert_equal "title", w.send(:where?)
68
+ end
69
+
70
+ def test_get_head_variant
71
+ html = '<h2 class="di-title cdo-section-title-hw">aluminium</h2>' +
72
+ '<span class="di-info"><span class="var"><span class="v" ' +
73
+ 'title="Variant form">aluminum</span></span>'
74
+ w = Camdict::Definition.new("aluminum",:aluminium=>html)
75
+ assert_equal "aluminum", w.send(:get_head_variant).first
76
+ end
77
+
78
+ def test_spell_variant
79
+ html = '<span class="spellvar">'+
80
+ '<h2 class="di-title cdo-section-title-hw">aluminium</h2>' +
81
+ '<span class="v" title="Variant form">aluminum</span></span>'
82
+ w = Camdict::Definition.new("aluminum",:aluminium=>html)
83
+ assert_equal "aluminum", w.send(:spell_variant)
84
+ end
85
+
86
+ def test_get_phrase
87
+ phrase = %q(correct me if I'm wrong but...)
88
+ html = '<h2 class="di-title cdo-section-title-hw">aluminium</h2>' +
89
+ '<span class="phrase-block"><span class="phrase">' + phrase
90
+ w = Camdict::Definition.new("correct me",:correct=>html)
91
+ assert_equal [phrase], w.send(:get_phrase)
92
+ end
93
+
94
+ def test_idiom_explanation
95
+ word = 'have it out with sb'
96
+ meaning = 'to talk to someone about something they have done that makes' +
97
+ ' you angry, in order to try to solve the problem:'
98
+ html = "<h2 class='di-title cdo-section-title-hw'>#{word}</h2>" +
99
+ '<span class="idiom-block"><span class="idiom-body">'+
100
+ "<span class='def-block'><span class='def'>#{meaning}"
101
+ def1 = Camdict::Definition.new(word, "have-it-out-with-sb"=>html)
102
+ assert def1.is_idiom
103
+ assert_equal meaning, def1.explanations.first.meaning
104
+ def2 = Camdict::Definition.new("have it out", "have-it-out-with-sb"=>html)
105
+ assert_equal meaning, def2.explanations.first.meaning
106
+ end
107
+
108
+ def test_parse_ipa
109
+ imagin = %w(26a 2c8 6d e6 64 292 2e 26a 2e 6e).map { |c|
110
+ c.to_i 16}.pack 'U*'
111
+ a = %w(259).map { |c| c.to_i 16}.pack 'U'
112
+ ry = %w(72 2e 69).map {|c| c.to_i 16}.pack 'U*'
113
+ html = '<h2 class="di-title cdo-section-title-hw">imaginary</h2>' +
114
+ "<span class='di-info'><span class='ipa'>#{imagin}<span class='sp'>" +
115
+ "#{a}</span>#{ry}</span>"
116
+ w = Camdict::Definition.new("imaginary",:imaginary=>html)
117
+ node = Nokogiri::HTML(html).css(".ipa")
118
+ ukipa = w.send :parse_ipa, node
119
+ actual = {baseipa: ukipa[:baseipa], sindex: ukipa[:sindex]}
120
+ expected = {baseipa: imagin+a+ry, sindex:[10,1]}
121
+ assert_equal expected, actual
122
+ end
123
+
124
+ def test_join_ipa
125
+ html = '<h2 class="di-title cdo-section-title-hw">understand</h2>'
126
+ w = Camdict::Definition.new("understand",:understand=>html)
127
+ # head-tail hyphen
128
+ understand = {
129
+ :uk_utf8 => %w(2cc 28c 6e 2e 64 259 2c8 73 74 e6 6e 64),
130
+ :us_utf8 => %w(2d 64 25a 2d),
131
+ :expected => %w(2cc 28c 6e 2e 64 25a 2c8 73 74 e6 6e 64),
132
+ :us_inx => nil,
133
+ :uk_inx => nil,
134
+ :spiexp => nil
135
+ }
136
+ imaginary = {
137
+ :uk_utf8 => %w(26a 2c8 6d e6 64 292 2e 26a 2e 6e 259 72 2e 69),
138
+ :us_utf8 => %w(2d 259 2e 6e 65 72 2d),
139
+ :expected => %w(26a 2c8 6d e6 64 292 2e 259 2e 6e 65 72 2e 69),
140
+ :us_inx => nil,
141
+ :uk_inx => [10,1],
142
+ :spiexp => nil
143
+ }
144
+ # left hyphen
145
+ plagiarize = {
146
+ :uk_utf8 => %w(2c8 70 6c 65 26a 2e 64 292 259 72 2e 61 26a 7a),
147
+ :us_utf8 => %w(2d 64 292 259 2e 72 61 26a 7a),
148
+ :expected => %w(2c8 70 6c 65 26a 2e 64 292 259 2e 72 61 26a 7a),
149
+ :us_inx => nil,
150
+ :uk_inx => [8,1],
151
+ :spiexp => nil
152
+ }
153
+ plagiarism = {
154
+ :uk_utf8 => %w(2c8 70 6c 65 26a 2e 64 292 259 72 2e 61 26a 7a),
155
+ :us_utf8 => %w(2d 64 292 259 72 2e 26a 2e 7a 259 6d),
156
+ :expected => %w(2c8 70 6c 65 26a 2e 64 292 259 72 2e 26a 2e 7a 259 6d),
157
+ :us_inx => [3,1,9,1],
158
+ :uk_inx => [8,1],
159
+ :spiexp => [8,1,14,1]
160
+ }
161
+ painting = {
162
+ :uk_utf8 => %w(2c8 70 65 26a 6e 2e 74 26a 14b),
163
+ :us_utf8 => %w(2d 74 32c 26a 14b),
164
+ :expected => %w(2c8 70 65 26a 6e 2e 74 32c 26a 14b),
165
+ :us_inx => nil,
166
+ :uk_inx => nil,
167
+ :spiexp => nil
168
+ }
169
+ dictionary = {
170
+ :uk_utf8 => %w(2c8 64 26a 6b 2e 283 259 6e 2e 259 72 2e 69),
171
+ :us_utf8 => %w(2d 65 72 2e 69),
172
+ :expected => %w(2c8 64 26a 6b 2e 283 259 6e 2e 65 72 2e 69),
173
+ :us_inx => nil,
174
+ :uk_inx => [6,1, 9,1],
175
+ :spiexp => [6,1]
176
+ }
177
+ harmfulness = {
178
+ :uk_utf8 => %w(2c8 68 251 2d0 6d 2e 66 259 6c),
179
+ :us_utf8 => %w(2d 6e 259 73),
180
+ :expected => %w(2c8 68 251 2d0 6d 2e 66 259 6c 6e 259 73),
181
+ :us_inx => nil,
182
+ :uk_inx => [7,1],
183
+ :spiexp => [7,1]
184
+ }
185
+ # right hyphen
186
+ toxic = {
187
+ :uk_utf8 => %w(2c8 74 252 6b 2e 73 26a 6b),
188
+ :us_utf8 => %w(2c8 74 251 2d0 6b 2d),
189
+ :expected => %w(2c8 74 251 2d0 6b 73 26a 6b),
190
+ :us_inx => nil,
191
+ :uk_inx => nil,
192
+ :spiexp => nil
193
+ }
194
+ data = [understand, imaginary, plagiarize, plagiarism, painting,
195
+ harmfulness, toxic]
196
+ data.each_with_index { |word, i|
197
+ full = word[:uk_utf8].map {|c| c.to_i 16}.pack 'U*'
198
+ short = word[:us_utf8].map {|c| c.to_i 16}.pack 'U*'
199
+ full_sp = { baseipa: full, sindex: word[:uk_inx]}
200
+ short_sp = { baseipa: short, sindex: word[:us_inx]}
201
+ us = w.send :join_ipa, full_sp, short_sp
202
+ a = us[:baseipa].unpack 'U*'
203
+ spind = us[:sindex]
204
+ actual = a.map { |n| n.to_s 16 }
205
+ assert_equal word[:expected], actual
206
+ assert_equal word[:spiexp], spind
207
+ }
208
+ end
209
+
210
+ def test_mix_spi
211
+ html = '<h2 class="di-title cdo-section-title-hw">understand</h2>'
212
+ w = Camdict::Definition.new("understand",:understand=>html)
213
+ # an IPA is 12 letters long, 012345678901 -345-,
214
+ lsp = [2,1, 5,1, 9,2]
215
+ lrange = 0..3
216
+ csp = [ 3, 2 ]
217
+ cn = 3
218
+ rrang = 8..12
219
+ expected = [2,1, 6,2, 9,2]
220
+ actual = w.send :mix_spi,lsp, lrange, csp, cn, lsp, rrang
221
+ assert_equal expected, actual
222
+ end
223
+
224
+ def test_get_pronunciation
225
+ title = '<h2 class="di-title cdo-section-title-hw">understand</h2>'
226
+ ogglink = 'http://cam.org/british/ukunder112.ogg'
227
+ mp3link = 'http://cam.org/british/ukunder112.mp3'
228
+ html = title + %q(<span class='di-info'><a class='pron-uk' ) +
229
+ "data-src-ogg='#{ogglink}' data-src-mp3='#{mp3link}' href='#'>"
230
+ w = Camdict::Definition.new("understand",:understand=>html)
231
+ pron = w.send :get_pronunciation
232
+ assert_equal mp3link, pron.uk.mp3
233
+ assert_equal ogglink, pron.uk.ogg
234
+ assert_nil pron.us.mp3
235
+ assert_equal mp3link, w.pronunciation.uk.mp3
236
+ end
237
+
238
+ def test_get_region
239
+ belaughing = {
240
+ :word => 'be laughing',
241
+ :expected => 'UK',
242
+ :piece => ''
243
+ }
244
+ favorite = {
245
+ :word => 'favorite',
246
+ :expected => 'US',
247
+ :piece => "<span class='spellvar'><span class='region'>US</span>" +
248
+ "<span class='v' title='Variant form'>favorite"
249
+ }
250
+ aluminum = {
251
+ :word => 'aluminum',
252
+ :expected => 'US',
253
+ :piece => "<span class='var'><span class='region'>US</span>" +
254
+ "<span class='v' title='Variant form'>aluminum"
255
+ }
256
+ data = [belaughing, favorite, aluminum]
257
+ data.each { |d|
258
+ title = "<h2 class='di-title cdo-section-title-hw'>#{d[:word]}</h2>"
259
+ html = title + %q(<span class='di-info'><a class='lab'><span class=) +
260
+ "'region'>UK</span><span class='usage'>informal</span>"
261
+ w = Camdict::Definition.new(d[:word], d[:word]=>html+d[:piece])
262
+ region = w.send :get_region
263
+ assert_equal d[:expected], region
264
+ }
265
+ end
266
+
267
+ def test_gc
268
+ plagiarize = {
269
+ :word => 'plagiarize',
270
+ :expected => 'I or T',
271
+ :piece => ''
272
+ }
273
+ data = [plagiarize]
274
+ data.each { |d|
275
+ title = "<h2 class='di-title cdo-section-title-hw'>#{d[:word]}</h2>"
276
+ html = title + %q(<span class='di-info'><span class='gcs'>) +
277
+ "<span class='gc'>I</span> or <span class='gc'>T</span>"
278
+ w = Camdict::Definition.new(d[:word], d[:word]=>html+d[:piece])
279
+ actual = w.send :get_gc
280
+ assert_equal d[:expected], actual
281
+ }
282
+ end
283
+
284
+ def test_get_plural
285
+ mouse = {
286
+ :word => 'mouse',
287
+ :expected => 'mice',
288
+ :piece => "<span class='inf'>mice</span>"
289
+ }
290
+ fish = {
291
+ :word => 'fish',
292
+ :expected => %w(fish fishes),
293
+ :piece => '<span class="inf">fish</span> or <span class="inf">fishes'
294
+ }
295
+ data = [mouse, fish]
296
+ data.each { |d|
297
+ title = "<h2 class='di-title cdo-section-title-hw'>#{d[:word]}</h2>"
298
+ html = title + %q(<span class='di-info'><span class='inf-group' ) +
299
+ "type='plural'>"
300
+ w = Camdict::Definition.new(d[:word], d[:word]=>html+d[:piece])
301
+ w.instance_eval { @part_of_speech = 'noun'}
302
+ actual = w.send :get_plural
303
+ assert_equal d[:expected], actual
304
+ }
305
+ end
306
+
307
+ def test_guided_word
308
+ d = { word: 'rubber', expected: 'SUBSTANCE'}
309
+ title = "<h2 class='di-title cdo-section-title-hw'>#{d[:word]}</h2>"
310
+ html = title + %q(<span class='di-info'><strong class='gw'>(SUBSTANCE))
311
+ w = Camdict::Definition.new(d[:word], d[:word]=>html)
312
+ actual = w.send :get_guided_word
313
+ assert_equal d[:expected], actual
314
+ end
315
+
316
+ def test_get_irregular
317
+ blow = {
318
+ :word => 'blow',
319
+ :expected => ['blew','blown', nil],
320
+ :piece => "><span class='inf'>blew</span></span>,<span class='inf-" +
321
+ "group'><span class='inf'>blown</span></span>"
322
+ }
323
+ bet = {
324
+ :word => 'bet',
325
+ :expected => %w(bet bet betting),
326
+ :piece => 'type="pres_part"><span class="inf">betting</span>' +
327
+ '</span><span class="inf-group" type="past"><span class="inf">bet'
328
+ }
329
+ data = [blow, bet]
330
+ data.each { |d|
331
+ title = "<h2 class='di-title cdo-section-title-hw'>#{d[:word]}</h2>"
332
+ html = title + %q(<span class='di-info'><span class='inf-group' )
333
+ w = Camdict::Definition.new(d[:word], d[:word]=>html+d[:piece])
334
+ expected = w.instance_eval {
335
+ @part_of_speech = 'verb'
336
+ sp, pp, pr = d[:expected]
337
+ Camdict::Definition::Irregular.new(sp, pp, pr)
338
+ }
339
+ actual = w.send :get_irregular
340
+ assert_equal expected, actual
341
+ }
342
+ end
343
+
344
+ end
345
+ end
@@ -0,0 +1,71 @@
1
+ require 'test/unit'
2
+ require 'camdict'
3
+
4
+ module Camdict
5
+ class ExplanationTest < Test::Unit::TestCase
6
+
7
+ def test_get_level
8
+ html = '<span class="def-info"><span class="epp-xref B1">B1</span>'
9
+ html = Nokogiri::HTML(html)
10
+ exp = Camdict::Explanation.new(html)
11
+ assert_equal 'B1', (exp.send :get_level)
12
+ end
13
+
14
+ def test_get_meaning
15
+ html = '<span class="def">in <a class="query" href="http://cambridge' +
16
+ '.org/british">agreement </a>with the true facts or with what is ' +
17
+ 'generally accepted:'
18
+ html = Nokogiri::HTML(html)
19
+ exp = Camdict::Explanation.new(html)
20
+ expected =
21
+ 'in agreement with the true facts or with what is generally accepted:'
22
+ assert_equal expected, (exp.send :get_meaning)
23
+ end
24
+
25
+ def test_gc
26
+ # rubber has region, usage, gc
27
+ html = '<span class="def-info"><span class="gcs">U</span>'
28
+ html = Nokogiri::HTML(html)
29
+ exp = Camdict::Explanation.new(html)
30
+ assert_equal 'U', exp.gc
31
+ end
32
+
33
+ def test_get_sentence
34
+ html = '<span class="eg">a correct answer</span>'
35
+ html = Nokogiri::HTML(html)
36
+ exp = Camdict::Explanation::Sentence.new(html)
37
+ assert_equal 'a correct answer', (exp.send :get_sentence)
38
+ end
39
+
40
+ def test_get_examples
41
+ sent1 = %q(It's not correct to describe them as 'student')
42
+ sent2 = %q("Your name is Angela Black?""That is correct.")
43
+ html = "<span class='examp'><span class='eg'>#{sent1}</span></span>"
44
+ html += "<span class='examp'><span class='eg'>#{sent2}</span>" +
45
+ '<span class="usage">formal</span>'
46
+ html = Nokogiri::HTML(html)
47
+ exp = Camdict::Explanation.new(html)
48
+ expected = exp.send :get_examples
49
+ e1, e2 = expected.flatten
50
+ assert_equal sent1, e1.sentence
51
+ assert_equal sent2, e2.sentence
52
+ assert_equal "formal", e2.usage
53
+ end
54
+
55
+ def test_get_synonym
56
+ html = '<span class="entry-xref" type="Synonym"><span class="x-h">right'
57
+ html = Nokogiri::HTML(html)
58
+ exp = Camdict::Explanation.new(html)
59
+ assert_equal "right", (exp.send :get_synonym)
60
+ end
61
+
62
+ def test_get_opposite
63
+ html =
64
+ '<span class="entry-xref" type="Opposite"><span class="x-h">incorrect'
65
+ html = Nokogiri::HTML(html)
66
+ exp = Camdict::Explanation.new(html)
67
+ assert_equal "incorrect", (exp.send :get_opposite)
68
+ end
69
+
70
+ end
71
+ end
@@ -0,0 +1,28 @@
1
+ require 'test/unit'
2
+ require 'camdict'
3
+
4
+ module Camdict
5
+ class HTTPClientTest < Test::Unit::TestCase
6
+
7
+ def test_get_html
8
+ require 'webrick'
9
+
10
+ server = WEBrick::HTTPServer.new(:Port=>0, :BindAddress=>"127.0.0.1",
11
+ :Logger=>WEBrick::Log.new(nil,WEBrick::BasicLog::FATAL))
12
+ Thread.new {
13
+ res = Proc.new { |r, q|
14
+ q.body ="hello"
15
+ }
16
+ server.mount_proc '/hi', nil, &res
17
+ server.start
18
+ }
19
+ Thread.new {
20
+ url = "http://127.0.0.1:#{server.config[:Port]}/hi"
21
+ page = Camdict::HTTP::Client.get_html(url)
22
+ server.stop
23
+ assert_equal "hello", page.text
24
+ }.join
25
+ end
26
+
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: camdict
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Pan Gaoyong
8
+ - 潘高勇
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-01-05 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - '>='
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - '>='
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ description: get definitions, pronunciation and example sentences of a word or phrase
29
+ from the online Cambridge dictionaries.
30
+ email: pan.gaoyong@gmail.com
31
+ executables: []
32
+ extensions: []
33
+ extra_rdoc_files: []
34
+ files:
35
+ - license
36
+ - Rakefile
37
+ - README.md
38
+ - lib/camdict.rb
39
+ - lib/camdict/http_client.rb
40
+ - lib/camdict/definition.rb
41
+ - lib/camdict/explanation.rb
42
+ - lib/camdict/common.rb
43
+ - lib/camdict/word.rb
44
+ - lib/camdict/client.rb
45
+ - test/itest_explanation.rb
46
+ - test/test_common.rb
47
+ - test/itest_client.rb
48
+ - test/itest_definition.rb
49
+ - test/test_definition.rb
50
+ - test/test_http_client.rb
51
+ - test/test_client.rb
52
+ - test/test_explanation.rb
53
+ homepage: https://github.com/pan/camdict
54
+ licenses:
55
+ - MIT
56
+ metadata: {}
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - '>='
64
+ - !ruby/object:Gem::Version
65
+ version: 1.9.3
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ requirements: []
72
+ rubyforge_project:
73
+ rubygems_version: 2.1.11
74
+ signing_key:
75
+ specification_version: 4
76
+ summary: online Cambridge dictionary client
77
+ test_files: []