camdict 1.0.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +28 -33
- data/lib/camdict/array_ext.rb +37 -0
- data/lib/camdict/client.rb +133 -97
- data/lib/camdict/common.rb +25 -143
- data/lib/camdict/definition.rb +65 -596
- data/lib/camdict/entry.rb +76 -0
- data/lib/camdict/exception.rb +5 -0
- data/lib/camdict/explanation.rb +29 -66
- data/lib/camdict/http_client.rb +14 -10
- data/lib/camdict/ipa.rb +52 -0
- data/lib/camdict/pronunciation.rb +53 -0
- data/lib/camdict/sentence.rb +38 -0
- data/lib/camdict/string_ext.rb +141 -0
- data/lib/camdict/word.rb +83 -17
- data/test/debug.rb +60 -0
- data/test/helper.rb +2 -0
- data/test/itest_client.rb +39 -8
- data/test/itest_definition.rb +24 -75
- data/test/itest_entry.rb +37 -0
- data/test/itest_explanation.rb +41 -20
- data/test/itest_ipa.rb +105 -0
- data/test/itest_pronunciation.rb +74 -0
- data/test/itest_word.rb +49 -0
- data/test/test_array_ext.rb +23 -0
- data/test/test_client.rb +35 -42
- data/test/test_common.rb +22 -78
- data/test/test_explanation.rb +21 -25
- data/test/test_http_client.rb +27 -13
- data/test/test_string_ext.rb +95 -0
- metadata +42 -7
- data/test/test_definition.rb +0 -345
data/lib/camdict/word.rb
CHANGED
@@ -1,36 +1,102 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'camdict/client'
|
2
3
|
require 'camdict/definition'
|
3
4
|
|
4
5
|
module Camdict
|
5
|
-
# Get all definitions data about a word or phrase including IPAs,
|
6
|
-
# pronunciations, usage sentences, etc. from
|
6
|
+
# Get all definitions data about a word or phrase including IPAs,
|
7
|
+
# pronunciations, usage sentences, etc. from Cambridge dictionary.
|
7
8
|
class Word
|
8
|
-
|
9
9
|
# New a +word+ or phrase, default +dictionary+ is british.
|
10
|
-
def initialize(word, dictionary=nil)
|
11
|
-
@word
|
10
|
+
def initialize(word, dictionary = nil)
|
11
|
+
@word = word
|
12
12
|
@dictionary = dictionary
|
13
|
-
|
14
|
-
|
13
|
+
end
|
14
|
+
|
15
|
+
def part_of_speech
|
16
|
+
s = definition.senses.map(&:part_of_speech).uniq
|
17
|
+
return s.first if s.count < 2
|
18
|
+
s
|
19
|
+
end
|
20
|
+
|
21
|
+
def pronunciation(region = :uk)
|
22
|
+
p = definition.pronunciation.send(region)
|
23
|
+
p.mp3 || p.ogg
|
24
|
+
end
|
25
|
+
|
26
|
+
def ipa(region = :uk)
|
27
|
+
definition.ipa.send(region)
|
28
|
+
end
|
29
|
+
|
30
|
+
def meaning
|
31
|
+
definition.senses.first.explanations.first.meaning
|
32
|
+
end
|
33
|
+
|
34
|
+
def meanings
|
35
|
+
definition.senses.map { |s| s.explanations.map(&:meaning) }.flatten
|
36
|
+
end
|
37
|
+
|
38
|
+
# show all important dictionary information, returns
|
39
|
+
# { meaning: [{ pos: '', category: '',
|
40
|
+
# sense: [{ meaning:, eg: [], level: '', code: '', synonym: '',
|
41
|
+
# opposite: '', usage: '', region: ''}] }]
|
42
|
+
# ipa: '' | { uk: , us: },
|
43
|
+
# pronunciation: { uk: mp3|ogg, us: mp3|ogg }
|
44
|
+
# }
|
45
|
+
def show
|
46
|
+
{
|
47
|
+
meaning: meanings_json,
|
48
|
+
ipa: ipa_json,
|
49
|
+
pronunciation: { uk: pronunciation, us: pronunciation(:us) }
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
def print
|
54
|
+
require 'pp'
|
55
|
+
pp show
|
15
56
|
end
|
16
57
|
|
17
58
|
# Get all definitions for this word from remote online dictionary
|
18
59
|
def definitions
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
60
|
+
@definitions ||= g_definitions
|
61
|
+
end
|
62
|
+
|
63
|
+
def raw_definition
|
64
|
+
@raw_definition ||= retrieve.to_html(save_with: 0)
|
65
|
+
end
|
66
|
+
|
67
|
+
alias pos part_of_speech
|
68
|
+
alias definition definitions
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def retrieve
|
73
|
+
@retrieved ||= Camdict::Client.new(@dictionary).html_definition(@word)
|
74
|
+
end
|
75
|
+
|
76
|
+
def g_definitions
|
77
|
+
Camdict::Definition.new(@word).parse(retrieve)
|
78
|
+
end
|
79
|
+
|
80
|
+
def meanings_json
|
81
|
+
definition.senses.map do |s|
|
82
|
+
{
|
83
|
+
pos: s.part_of_speech, category: s.category,
|
84
|
+
sense: s.explanations.map do |e|
|
85
|
+
{ meaning: e.meaning, eg: e.examples&.map(&:sentence) }
|
86
|
+
.merge(optional_meaning_items(e))
|
87
|
+
end
|
24
88
|
}
|
25
89
|
end
|
26
90
|
end
|
27
91
|
|
28
|
-
|
29
|
-
|
30
|
-
@raw_definitions.size
|
92
|
+
def ipa_json
|
93
|
+
ipa(:uk) == ipa(:us) ? ipa : { uk: ipa(:uk), us: ipa(:us) }
|
31
94
|
end
|
32
95
|
|
33
|
-
|
34
|
-
|
96
|
+
def optional_meaning_items(exp)
|
97
|
+
%w(level code synonym opposite usage region).inject({}) do |ret, o|
|
98
|
+
exp.public_send(o) ? ret.merge({ o => exp.public_send(o) }) : ret
|
99
|
+
end
|
100
|
+
end
|
35
101
|
end
|
36
102
|
end
|
data/test/debug.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
|
3
|
+
module Camdict
|
4
|
+
# test in a way that how camdict is working, so that remote changes can
|
5
|
+
# be found quickly, especially for css class changes.
|
6
|
+
class Debug < Minitest::Test
|
7
|
+
def setup
|
8
|
+
@word = ARGV[0]
|
9
|
+
check_input
|
10
|
+
@wordict = Camdict::Word.new(@word)
|
11
|
+
@client = Camdict::Client.new
|
12
|
+
$cache ||= {}
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_search
|
16
|
+
assert @client.send :fetch, @word
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_word_page
|
20
|
+
assert @client.send :single_def?, word_page
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_british_tab
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_where
|
27
|
+
where = definations.send(:where, word_content)
|
28
|
+
print 'where=', where
|
29
|
+
refute_equal 'unknown', where
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_ipa
|
33
|
+
definations
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_print
|
37
|
+
puts
|
38
|
+
@wordict.print
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def check_input
|
44
|
+
@word || abort('please specify a word on command line')
|
45
|
+
end
|
46
|
+
|
47
|
+
def definations
|
48
|
+
$cache[:definitions] ||=
|
49
|
+
Camdict::Definition.new(@word).parse(word_content)
|
50
|
+
end
|
51
|
+
|
52
|
+
def word_content
|
53
|
+
$cache[:word_content] ||= @client.send(:di_extracted, word_page)
|
54
|
+
end
|
55
|
+
|
56
|
+
def word_page
|
57
|
+
$cache[:word_page] ||= @client.get_html(@client.word_url(@word))
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/test/helper.rb
ADDED
data/test/itest_client.rb
CHANGED
@@ -1,20 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'minitest/autorun'
|
2
3
|
require 'camdict'
|
3
4
|
|
4
5
|
module Camdict
|
5
6
|
class ClientiTest < Minitest::Test
|
7
|
+
def setup
|
8
|
+
@client = Camdict::Client.new
|
9
|
+
@imaginary = @client.word_url('imaginary')
|
10
|
+
end
|
11
|
+
|
6
12
|
def test_fetch
|
7
|
-
|
8
|
-
|
9
|
-
|
13
|
+
assert !@client.send(:fetch, 'pppppp')
|
14
|
+
assert @client.send(:fetch, 'mind')
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_single_def?
|
18
|
+
html = @client.get_html(@imaginary)
|
19
|
+
assert @client.send :single_def?, html
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_mentry_links
|
23
|
+
related_html = @client.get_html(@client.search_url('related'))
|
24
|
+
related_links = @client.send :mentry_links, 'related', related_html
|
25
|
+
assert_equal 1, related_links.size
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_matched_word?
|
29
|
+
mind_url = @client.search_url('mind')
|
30
|
+
mind_node = @client.get_html(mind_url).css('.prefix-item').first
|
31
|
+
assert @client.send :matched_word?, 'mind', mind_node
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_di_extracted
|
35
|
+
html = @client.get_html(@imaginary)
|
36
|
+
r = @client.send :di_extracted, html
|
37
|
+
assert r.css('.cdo-section-title-hw')
|
38
|
+
assert r.css('.pron-info')
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_di_body
|
42
|
+
html = @client.get_html(@imaginary)
|
43
|
+
assert @client.send :di_body, html
|
10
44
|
end
|
11
45
|
|
12
46
|
def test_html_definition
|
13
|
-
|
14
|
-
search_result
|
15
|
-
r = search_result.collect {|r| r.keys}
|
16
|
-
assert_equal ["related_1", "related_2"], r.flatten
|
47
|
+
search_result = @client.html_definition('related')
|
48
|
+
assert search_result.first
|
17
49
|
end
|
18
50
|
end
|
19
51
|
end
|
20
|
-
|
data/test/itest_definition.rb
CHANGED
@@ -1,90 +1,39 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require_relative 'helper'
|
3
3
|
|
4
4
|
module Camdict
|
5
5
|
class DefinitioniTest < Minitest::Test
|
6
|
-
|
7
6
|
def test_part_of_speech
|
8
|
-
|
9
|
-
'look at sth' => 'phrasal verb', 'plagiarist' => 'noun',
|
10
|
-
'pass water' => 'idiom', 'ruby' => 'noun'}
|
11
|
-
# adjective for ruby exists in British dictionary
|
12
|
-
data.each_pair { |word, exp_result|
|
7
|
+
pos_data.each_pair do |word, exp_result|
|
13
8
|
w = Camdict::Word.new(word)
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
9
|
+
assert_equal exp_result, w.part_of_speech
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_pos
|
18
14
|
w = Camdict::Word.new('correct')
|
19
|
-
|
20
|
-
assert_equal 'adjective', defa[0].part_of_speech
|
21
|
-
assert_equal 'verb', defa[1].part_of_speech
|
15
|
+
assert_equal %w(adjective verb), w.pos
|
22
16
|
end
|
23
17
|
|
24
|
-
def
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
assert_equal "polite expression for urinate", expl.meaning
|
18
|
+
def test_region_in_block
|
19
|
+
skip 'word in block'
|
20
|
+
w = Camdict::Word.new('rubbers')
|
21
|
+
assert_equal 'US', w.region
|
29
22
|
end
|
30
23
|
|
31
|
-
def
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
:uk_inx => [10,1],
|
37
|
-
:us_inx => nil,
|
38
|
-
:which => 0
|
39
|
-
}
|
40
|
-
plagiarism = {
|
41
|
-
:word => "plagiarism",
|
42
|
-
:uk_utf8 => %w(2c8 70 6c 65 26a 2e 64 292 259 72 2e 26a 2e 7a 259 6d),
|
43
|
-
:us_utf8 => %w(2c8 70 6c 65 26a 2e 64 292 25a 2e 26a 2e 7a 259 6d),
|
44
|
-
:uk_inx => [8,1,14,1],
|
45
|
-
:us_inx => [13,1],
|
46
|
-
:which => 0
|
47
|
-
}
|
48
|
-
aluminum = {
|
49
|
-
:word => "aluminum",
|
50
|
-
:uk_utf8 => %w(259 2c8 6c 75 2d0 2e 6d 26a 2e 6e 259 6d),
|
51
|
-
:us_utf8 => %w(259 2c8 6c 75 2d0 2e 6d 26a 2e 6e 259 6d),
|
52
|
-
:uk_inx => nil,
|
53
|
-
:us_inx => nil,
|
54
|
-
:which => 0
|
55
|
-
}
|
56
|
-
sled = {
|
57
|
-
:word => "sled",
|
58
|
-
:uk_utf8 => nil,
|
59
|
-
:us_utf8 => nil,
|
60
|
-
:uk_inx => nil,
|
61
|
-
:us_inx => nil,
|
62
|
-
:which => 1
|
63
|
-
}
|
64
|
-
data = [imaginary, plagiarism, aluminum, sled]
|
65
|
-
data.each { |d|
|
66
|
-
w = Camdict::Word.new(d[:word])
|
67
|
-
defa = w.definitions
|
68
|
-
defo = defa[d[:which]]
|
69
|
-
uk = defo.ipa.uk
|
70
|
-
us = defo.ipa.us
|
71
|
-
uk = uk.unpack('U*').map { |n| n.to_s 16 } if uk
|
72
|
-
us = us.unpack('U*').map { |n| n.to_s 16 } if us
|
73
|
-
actk = defo.ipa.k
|
74
|
-
acts = defo.ipa.s
|
75
|
-
assert_equal d[:uk_utf8], uk, "#{d[:word]} uk ipa got a problem"
|
76
|
-
assert_equal d[:us_utf8], us, "#{d[:word]} us ipa got a problem"
|
77
|
-
assert_equal d[:uk_inx], actk, "#{d[:word]} uk superscript index issue"
|
78
|
-
assert_equal d[:us_inx], acts, "#{d[:word]} us superscript index issue"
|
79
|
-
}
|
24
|
+
def test_meaning
|
25
|
+
skip 'phrase'
|
26
|
+
w = Camdict::Word.new('pass water')
|
27
|
+
expl = w.definition.senses.first.explanations.first
|
28
|
+
assert_equal 'polite expression for urinate', expl.meaning
|
80
29
|
end
|
81
30
|
|
82
|
-
def
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
31
|
+
def pos_data
|
32
|
+
# 'aluminum' => 'noun', US variant ought to be got from American tab
|
33
|
+
{ 'aluminium' => 'noun',
|
34
|
+
'plagiarist' => 'noun',
|
35
|
+
'ruby' => 'noun' }
|
36
|
+
# adjective for ruby exists in British dictionary
|
87
37
|
end
|
88
|
-
|
89
38
|
end
|
90
39
|
end
|
data/test/itest_entry.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require_relative 'helper'
|
3
|
+
|
4
|
+
module Camdict
|
5
|
+
class EntryiTest < Minitest::Test
|
6
|
+
def setup
|
7
|
+
fly_e = Camdict::Client.new.html_definition('fly')
|
8
|
+
.css('.entry-body__el').first
|
9
|
+
@senses = Camdict::Definition.new('fly').send(:get_senses, fly_e)
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_senses
|
13
|
+
assert_equal 4, @senses.size
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_meaning
|
17
|
+
expect = 'When a bird, insect, or aircraft flies, it moves through ' \
|
18
|
+
'the air: '
|
19
|
+
assert_equal expect, @senses.first.explanations.first.meaning
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_part_of_speech
|
23
|
+
assert_equal 'verb', @senses.first.part_of_speech
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_category
|
27
|
+
assert_equal 'TRAVEL', @senses.first.category
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_derived_pos
|
31
|
+
html = Camdict::Client.new.html_definition('plagiarism')
|
32
|
+
.css('.entry-body__el')
|
33
|
+
senses = Camdict::Definition.new('plagiarism').send(:get_senses, html)
|
34
|
+
assert_equal 'noun', senses.first.part_of_speech
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/test/itest_explanation.rb
CHANGED
@@ -1,35 +1,56 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require_relative 'helper'
|
3
3
|
|
4
4
|
module Camdict
|
5
5
|
class ExplanationiTest < Minitest::Test
|
6
|
-
def
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
6
|
+
def test_level
|
7
|
+
assert_equal 'B2', sense(:last).explanations.first.level
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_sentence
|
11
|
+
e = sense(:first).explanations.first
|
12
|
+
assert_equal 'A2', e.level
|
13
|
+
assert_equal %("Your name is Angela Black?" "That is correct."),
|
14
|
+
e.examples.last.sentence
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_code
|
18
|
+
w = Camdict::Word.new('cause')
|
19
|
+
def1 = w.definitions
|
20
|
+
e1 = def1.senses.first.explanations.first
|
21
|
+
assert_equal ' C or U ', e1.code
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_gc_usage
|
25
|
+
w = Camdict::Word.new('cause')
|
26
|
+
def1 = w.definition
|
27
|
+
e2 = def1.senses[2].explanations.first
|
28
|
+
assert_equal ' + two objects ', e2.examples.last.usage
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_correctly
|
32
|
+
skip 'derived word - on the same page with its original'
|
33
|
+
w = Camdict::Word.new('correctly').definition
|
34
|
+
e1 = w.senses.first.explanations
|
35
|
+
assert_equal 'Have I pronounced your name correctly?',
|
36
|
+
e1[2].examples[0].sentence
|
37
|
+
assert_equal 'B1', e1[2].level
|
23
38
|
end
|
24
39
|
|
25
40
|
def test_phrase_meaning
|
41
|
+
skip 'phrase ought have its own class'
|
26
42
|
w = Camdict::Word.new('blow your nose')
|
27
43
|
defa = w.definitions
|
28
|
-
def1 = defa.first
|
44
|
+
def1 = defa.first
|
29
45
|
el = def1.explanations.last
|
30
46
|
exped = 'to force air from your lungs and through your nose to clear it'
|
31
47
|
assert_equal exped, el.meaning
|
32
48
|
end
|
33
49
|
|
50
|
+
private
|
51
|
+
|
52
|
+
def sense(nth)
|
53
|
+
Camdict::Word.new('correct').definition.senses.send(nth)
|
54
|
+
end
|
34
55
|
end
|
35
56
|
end
|