camdict 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,128 @@
1
+ require 'camdict/common'
2
+
3
+ module Camdict
4
+ # Explanation are inside the def-block node.
5
+ class Explanation
6
+
7
+ # Elementary level. It's a symbol indicating the level when learnders know
8
+ # this meaning.
9
+ # A1: Beginner, A2: Elementary,
10
+ # B1: Intermediate, B2: Upper-Intermediate,
11
+ # C1: Advanced, C2: Proficiency
12
+ attr_reader :level
13
+
14
+ # Get example sentences
15
+ attr_reader :examples
16
+
17
+ # Get synonym word
18
+ attr_reader :synonym
19
+
20
+ # Get opposite word
21
+ attr_reader :opposite
22
+
23
+ # A meaning of the word
24
+ attr_reader :meaning
25
+
26
+ # One or two words usage note. For example, slang.
27
+ attr_reader :usage
28
+
29
+ # The meaning is used in which region - UK or US.
30
+ attr_reader :region
31
+
32
+ # For a specific explanation, the word may have a variant form.
33
+ attr_reader :variant
34
+
35
+ # Grammar code. Full list is http://dictionary.cambridge.org/help/codes.html
36
+ attr_reader :gc
37
+
38
+ # Parse +html+ to get level, meaning, example sentences, synonym, opposite,
39
+ # usage, grammar code, region, variant.
40
+ def initialize(html)
41
+ @html = html
42
+ @level = get_level # String
43
+ @variant = get_variant # String
44
+ @meaning = get_meaning # String
45
+ @gc = css_text(".gcs") # String
46
+ @usage = css_text(".usage") # String
47
+ @region = css_text(".region") # String
48
+ @examples = get_examples # [Sentence]
49
+ @synonym = get_synonym # String
50
+ @opposite = get_opposite # String
51
+ # todo: add usage panel - the word: somewhere.
52
+ end
53
+
54
+ private
55
+ # A meaning may have a symbol representing the difficulty from A1-C2.
56
+ def get_level
57
+ css_text ".def-info .epp-xref"
58
+ end
59
+
60
+ # For an explanation, it may have a variant form word or phrase which has
61
+ # same meaning.
62
+ def get_variant
63
+ css_text ".v[title='Variant form']"
64
+ end
65
+
66
+ # The meaning of a word for this explanation.
67
+ def get_meaning
68
+ css_text(".def")
69
+ end
70
+
71
+ # Get example sentences. Returned results are Sentence or nil.
72
+ def get_examples
73
+ nodes = @html.css(".examp")
74
+ unless nodes.empty?
75
+ @examples = nodes.map { |node|
76
+ Camdict::Explanation::Sentence.new(node)
77
+ }
78
+ end
79
+ end
80
+
81
+ # Parse and get synonym word
82
+ def get_synonym
83
+ css_text ".entry-xref[type='Synonym'] .x-h"
84
+ end
85
+
86
+ # Parse and get opposite word
87
+ def get_opposite
88
+ css_text ".entry-xref[type='Opposite'] .x-h"
89
+ end
90
+
91
+ include Camdict::Common
92
+
93
+ # Parse the html to get the example sentence and its typical usage
94
+ # information associated with this sentence.
95
+ class Sentence
96
+ # Get the grammar code or usage in this sentence.
97
+ # It means how the word is used in this sentence.
98
+ # For example, a grammar code for the word -
99
+ # 'somewhere' is "+to infinitive". I'm looking for somewhere to eat.
100
+ attr_reader :usage
101
+
102
+ # Get one sentence inside an example block.
103
+ attr_reader :sentence
104
+
105
+ # New a sentence object from +html+ containing the eg block.
106
+ def initialize(html)
107
+ @html = html
108
+ @usage = get_usage
109
+ @sentence = get_sentence
110
+ end
111
+
112
+ private
113
+ # Parse html node under block gcs or usage to get its grammar code or
114
+ # usage info for this sentence.
115
+ def get_usage
116
+ css_text(".gcs") || css_text(".usage")
117
+ end
118
+
119
+ # Get sentence inside example block(.eg).
120
+ def get_sentence
121
+ css_text(".eg")
122
+ end
123
+
124
+ include Camdict::Common
125
+ end
126
+
127
+ end
128
+ end
@@ -0,0 +1,22 @@
1
+ module Camdict
2
+ module HTTP
3
+
4
+ require "open-uri"
5
+
6
+ # A default user agent string for this http client. It can be customised.
7
+ AGENT =
8
+ "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0"
9
+
10
+ class Client
11
+
12
+ # Download a html page from a remote site, and return a Nokogiri::HTML
13
+ # +url+ will be escaped by this method, and default +agtstr+ is AGENT.
14
+ def self.get_html(url, agtstr=AGENT)
15
+ url = URI.escape(url)
16
+ Nokogiri::HTML(open(url, "User-Agent"=>agtstr))
17
+ end
18
+
19
+ end
20
+
21
+ end
22
+ end
@@ -0,0 +1,36 @@
1
+ require 'camdict/client'
2
+ require 'camdict/definition'
3
+
4
+ module Camdict
5
+ # Get all definitions data about a word or phrase including IPAs,
6
+ # pronunciations, usage sentences, etc. from Camdict Client.
7
+ class Word
8
+
9
+ # New a +word+ or phrase, default +dictionary+ is british.
10
+ def initialize(word, dictionary=nil)
11
+ @word ||= word
12
+ @dictionary = dictionary
13
+ @raw_definitions = [] # each element is a hash
14
+ @definitions = [] # each element is a Definition object
15
+ end
16
+
17
+ # Get all definitions for this word from remote online dictionary
18
+ def definitions
19
+ client = Camdict::Client.new(@dictionary)
20
+ @raw_definitions = client.html_definition(@word)
21
+ if found?
22
+ @definitions = @raw_definitions.map { |r|
23
+ Camdict::Definition.new(@word, r)
24
+ }
25
+ end
26
+ end
27
+
28
+ # Found in the diciontary? Return number of found entries
29
+ def found?
30
+ @raw_definitions.size
31
+ end
32
+
33
+ alias in? found?
34
+
35
+ end
36
+ end
data/lib/camdict.rb ADDED
@@ -0,0 +1,2 @@
1
+ require "nokogiri"
2
+ require "camdict/word"
data/license ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Pan Gaoyong
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,20 @@
1
+ require 'test/unit'
2
+ require 'camdict'
3
+
4
+ module Camdict
5
+ class ClientiTest < Test::Unit::TestCase
6
+ def test_fetch
7
+ c = Camdict::Client.new
8
+ result = c.send :fetch, "pppppp"
9
+ assert ! result
10
+ end
11
+
12
+ def test_html_definition
13
+ c = Camdict::Client.new
14
+ search_result = c.html_definition("related")
15
+ r = search_result.collect {|r| r.keys}
16
+ assert_equal ["related_1", "related_2"], r.flatten
17
+ end
18
+ end
19
+ end
20
+
@@ -0,0 +1,89 @@
1
+ require 'test/unit'
2
+ require 'camdict'
3
+
4
+ module Camdict
5
+ class DefinitioniTest < Test::Unit::TestCase
6
+
7
+ def test_part_of_speech
8
+ data = {'aluminium' => 'noun', 'aluminum' => 'noun',
9
+ 'look at sth' => 'phrasal verb', 'plagiarist' => 'noun',
10
+ 'pass water' => 'idiom', 'ruby' => ['noun', 'adjective']}
11
+ data.each_pair { |word, exp_result|
12
+ w = Camdict::Word.new(word)
13
+ defa = w.definitions
14
+ defo = defa.pop
15
+ assert_equal exp_result, defo.part_of_speech
16
+ }
17
+ w = Camdict::Word.new('correct')
18
+ defa = w.definitions
19
+ assert_equal 'adjective', defa[0].part_of_speech
20
+ assert_equal 'verb', defa[1].part_of_speech
21
+ end
22
+
23
+ def test_explanations
24
+ w = Camdict::Word.new('pass water')
25
+ defa = w.definitions
26
+ expl = defa[0].explanations.first
27
+ assert_equal "polite expression for urinate", expl.meaning
28
+ end
29
+
30
+ def test_ipa
31
+ imaginary = {
32
+ :word => "imaginary",
33
+ :uk_utf8 => %w(26a 2c8 6d e6 64 292 2e 26a 2e 6e 259 72 2e 69),
34
+ :expected => %w(26a 2c8 6d e6 64 292 2e 259 2e 6e 65 72 2e 69),
35
+ :uk_inx => [10,1],
36
+ :spiexp => nil,
37
+ :which => 0
38
+ }
39
+ plagiarism = {
40
+ :word => "plagiarism",
41
+ :uk_utf8 => %w(2c8 70 6c 65 26a 2e 64 292 259 72 2e 26a 2e 7a 259 6d),
42
+ :expected => %w(2c8 70 6c 65 26a 2e 64 292 25a 2e 26a 2e 7a 259 6d),
43
+ :uk_inx => [8,1,14,1],
44
+ :spiexp => [13,1],
45
+ :which => 0
46
+ }
47
+ aluminum = {
48
+ :word => "aluminum",
49
+ :uk_utf8 => %w(259 2c8 6c 75 2d0 2e 6d 26a 2e 6e 259 6d),
50
+ :expected => %w(259 2c8 6c 75 2d0 2e 6d 26a 2e 6e 259 6d),
51
+ :uk_inx => nil,
52
+ :spiexp => nil,
53
+ :which => 0
54
+ }
55
+ sled = {
56
+ :word => "sled",
57
+ :uk_utf8 => nil,
58
+ :expected => nil,
59
+ :uk_inx => nil,
60
+ :spiexp => nil,
61
+ :which => 1
62
+ }
63
+ data = [imaginary, plagiarism, aluminum, sled]
64
+ data.each { |d|
65
+ w = Camdict::Word.new(d[:word])
66
+ defa = w.definitions
67
+ defo = defa[d[:which]]
68
+ uk = defo.ipa.uk
69
+ us = defo.ipa.us
70
+ uk = uk.unpack('U*').map { |n| n.to_s 16 } if uk
71
+ us = us.unpack('U*').map { |n| n.to_s 16 } if us
72
+ actk = defo.ipa.k
73
+ acts = defo.ipa.s
74
+ assert_equal d[:uk_utf8], uk
75
+ assert_equal d[:expected], us
76
+ assert_equal d[:uk_inx], actk
77
+ assert_equal d[:spiexp], acts
78
+ }
79
+ end
80
+
81
+ def test_region
82
+ w = Camdict::Word.new('rubbers')
83
+ defa = w.definitions
84
+ actual = defa[0].region
85
+ assert_equal "US", actual
86
+ end
87
+
88
+ end
89
+ end
@@ -0,0 +1,34 @@
1
+ require 'test/unit'
2
+ require 'camdict'
3
+
4
+ module Camdict
5
+ class ExplanationiTest < Test::Unit::TestCase
6
+ def test_explanations
7
+ w = Camdict::Word.new('correct')
8
+ defa = w.definitions
9
+ def1 = defa.first #first is adjective
10
+ e1 = def1.explanations.first
11
+ assert_equal "A2", e1.level
12
+ assert_equal "B2", defa.last.explanations.first.level
13
+ assert_equal "I've got 30 exam papers to correct.",
14
+ defa.last.explanations.first.examples.last.sentence
15
+ w = Camdict::Word.new('correctly')
16
+ defa = w.definitions
17
+ def1 = defa.first #first is adjective
18
+ e1 = def1.explanations
19
+ assert_equal "Have I pronounced your name correctly?",
20
+ e1[2].examples[0].sentence
21
+ assert_equal "B1", e1[2].level
22
+ end
23
+
24
+ def test_phrase_meaning
25
+ w = Camdict::Word.new('blow your nose')
26
+ defa = w.definitions
27
+ def1 = defa.first
28
+ el = def1.explanations.last
29
+ exped = 'to force air from your lungs and through your nose to clear it'
30
+ assert_equal exped, el.meaning
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,72 @@
1
+ require 'test/unit'
2
+ require 'camdict'
3
+
4
+ module Camdict
5
+ RESULTLIST = <<EoHTM
6
+ <ul class="result-list">
7
+ <li><a href="http://dictionary.cambridge.org/dictionary/british/related_1" title="Definition of related adjective (CONNECTED) in British English"><span class='arl1'><span class="base"><b class="hw">related</b></span> <span title="A word that describes a noun or pronoun." class="pos">adjective</span> <b class="gw" title="Guide word: helps you find the right meaning when a word has more than one meaning">(CONNECTED)</b></span></a></li>
8
+ <li><a href="http://dictionary.cambridge.org/dictionary/british/related_2" title="Definition of related adjective (FAMILY) in British English"><span class='arl1'><span class="base"><b class="hw">related</b></span> <span title="A word that describes a noun or pronoun." class="pos">adjective</span> <b class="gw" title="Guide word: helps you find the right meaning when a word has more than one meaning">(FAMILY)</b></span></a></li>
9
+ <li><a href="http://dictionary.cambridge.org/dictionary/british/stress-related" title="Definition of stress-related adjective in British English"><span class='arl2'><span class="base"><b class="hw">stress-related</b></span> <span title="A word that describes a noun or pronoun." class="pos">adjective</span></span></a></li>
10
+ </ul>
11
+ EoHTM
12
+
13
+ class ClientTest < Test::Unit::TestCase
14
+
15
+ def test_new
16
+ c = Camdict::Client.new
17
+ assert c.instance_eval { @dictionary == "british" }
18
+ c = Camdict::Client.new("american-english")
19
+ assert c.instance_eval { @dictionary == "american-english" }
20
+ end
21
+
22
+ def test_single_def?
23
+ c = Camdict::Client.new
24
+ html = '<div class="di-head"> <div class="di-title"> <h1 class="hw">'
25
+ assert c.send :single_def?, Nokogiri::HTML(html)
26
+ assert c.send :definition_page?, Nokogiri::HTML(html)
27
+ end
28
+
29
+ def test_entry_id
30
+ c = Camdict::Client.new
31
+ url = "http://dictionary.cambridge.org/british/related_1"
32
+ assert_equal "related_1", c.send( :entry_id, url)
33
+ end
34
+
35
+ def test_matched_word?
36
+ c = Camdict::Client.new
37
+ html = %q(<li><span class="base"><b class="hw">related</b></span></li>)
38
+ html1 = %q(<li><span class="base"><b class="hw">stress-related)
39
+ html2 = %q(<span class="base">knock around/about)
40
+ assert (c.send :matched_word?, "related", Nokogiri::HTML(html))
41
+ assert !(c.send :matched_word?, "related", Nokogiri::HTML(html1))
42
+ assert (c.send :matched_word?, "knock around", Nokogiri::HTML(html2))
43
+ assert (c.send :matched_word?, "knock about", Nokogiri::HTML(html2))
44
+ end
45
+
46
+ def test_mentry_links
47
+ c = Camdict::Client.new
48
+ rurl = "http://dictionary.cambridge.org/dictionary/british/"
49
+ expected_result = %w(related_1 related_2).map { |r|
50
+ rurl + r
51
+ }
52
+ result_list = Nokogiri::HTML(RESULTLIST)
53
+ links = c.send(:mentry_links, "related", result_list)
54
+ assert expected_result == links
55
+ end
56
+
57
+ def test_di_head
58
+ # Nokogiri version 1.? and later required for this test case
59
+ # but previous versions should also work with camdict
60
+ # you won't see this test case failure once
61
+ # https://github.com/sparklemotion/nokogiri/pull/1020 is released.
62
+ c = Camdict::Client.new
63
+ htmla = %q(<div class="di-head">)
64
+ htmlb = '<h2 class="di-title cdo-section-title-hw">aluminium</h2>' +
65
+ '<span class="di-info"><span class="pos">noun</span></span>'
66
+ result = c.send :di_head, Nokogiri::HTML(htmla+htmlb)
67
+ assert_equal(htmlb, result)
68
+ end
69
+
70
+ end
71
+
72
+ end
@@ -0,0 +1,59 @@
1
+ require 'test/unit'
2
+ require 'camdict'
3
+
4
+ module Camdict
5
+ class CommonTest < Test::Unit::TestCase
6
+ include Camdict::Common
7
+
8
+ def test_flatten
9
+ str = "blow a kiss to/at sb"
10
+ expected = ['blow a kiss to sb', 'blow a kiss at sb']
11
+ assert_equal expected, str.flatten
12
+ str = "blow/blew a kiss"
13
+ expected = ['blow a kiss', 'blew a kiss']
14
+ assert_equal expected, str.flatten
15
+ str = "knock around/about"
16
+ expected = ['knock around', 'knock about']
17
+ assert_equal expected, str.flatten
18
+ str = "not give/budge/move an inch"
19
+ expected = ['not give an inch', 'not budge an inch', 'not move an inch']
20
+ assert_equal expected, str.flatten
21
+ end
22
+
23
+ def test_expand
24
+ phra = ['blow your nose', 'blow a kiss to/at sb']
25
+ expected = ['blow your nose', 'blow a kiss to sb', 'blow a kiss at sb']
26
+ assert_equal expected, phra.expand
27
+ end
28
+
29
+ def test_has?
30
+ phra = ['blow your nose', 'blow a kiss to/at sb']
31
+ assert phra.has? "blow your nose"
32
+ assert phra.has? "blow a kiss to sb"
33
+ assert phra.has? "a kiss to sb"
34
+ assert phra.has? "kiss at sb"
35
+ assert "blow your nose".has?('nose')
36
+ assert ! phra[1].flatten.has?(phra[0])
37
+ end
38
+
39
+ def test_phrase_css
40
+ meaning = 'to have problems or be in difficult situation:'
41
+ sentence = 'a ship is in difficluties off the coast of Ireland.'
42
+ html = '<span class="phrase-block">' +
43
+ '<span class="phrase">be in difficulties</span>' +
44
+ '<span class="v" title="Variant form">be in difficulty</span>' +
45
+ '<span class="phrase-body">' +
46
+ "<span class='def-block'><span class='def'>#{meaning}</span>" +
47
+ "<span class='examp'><span class='eg'>#{sentence}</span></span>"
48
+ @html = Nokogiri::HTML html
49
+ @word = 'be in difficulty'
50
+ ret = ''
51
+ phrase_css(".def-block") { |node|
52
+ ret = Camdict::Explanation.new(node)
53
+ }
54
+ assert_equal sentence, ret.examples.first.sentence
55
+ assert_equal meaning, ret.meaning
56
+ end
57
+
58
+ end
59
+ end