camdict 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -1
- data/lib/camdict/client.rb +1 -1
- data/lib/camdict/common.rb +40 -15
- data/lib/camdict/definition.rb +24 -22
- data/test/test_common.rb +32 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 556f016868d71a2d1dde23319a9d59dfa60ba05c
|
4
|
+
data.tar.gz: cb0090a31d8f56da0d366819f0de47309d9d5dfa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3c110ea5cfe31a35bd69952f983669377a577aa1f7960ac7f69dd7d073d25ff6ff2f031b4a9ba9aa7190b169ded33dba3b06df70788bccb17be77fb93cb1196f
|
7
|
+
data.tar.gz: dda29ecc637f0550bddfd78b23071210649e8c9aa2d58dbc34cf14b764ec0f7e92bc5c917182bd73a4da0e62516c7cdf03dac647c1b30ce5e584ae30fba54604
|
data/README.md
CHANGED
@@ -19,6 +19,8 @@ One test may fail if the gem nokogiri hasn't pulled in the fix [here][2]. But
|
|
19
19
|
it is safe to apply the patch to your nokogiri copy.
|
20
20
|
|
21
21
|
## Usage
|
22
|
+
|
23
|
+
```ruby
|
22
24
|
require 'camdict'
|
23
25
|
|
24
26
|
# Look up a new word
|
@@ -48,7 +50,7 @@ it is safe to apply the patch to your nokogiri copy.
|
|
48
50
|
# I had to give up drinking for health reasons.
|
49
51
|
# He gave up work because of ill health.
|
50
52
|
}
|
51
|
-
|
53
|
+
```
|
52
54
|
|
53
55
|
There are some useful testing examples in test directory of this gem.
|
54
56
|
|
data/lib/camdict/client.rb
CHANGED
@@ -124,7 +124,7 @@ module Camdict
|
|
124
124
|
def matched_word?(word, node)
|
125
125
|
li = node.css(".base")
|
126
126
|
resword = li.size == 1 ? li.text : li[0].text
|
127
|
-
if resword.include? '/'
|
127
|
+
if resword.include? '/' or resword.include? ';'
|
128
128
|
resword.flatten.include?(word)
|
129
129
|
else
|
130
130
|
word == resword
|
data/lib/camdict/common.rb
CHANGED
@@ -5,51 +5,76 @@ module Camdict
|
|
5
5
|
String.class_eval do
|
6
6
|
# 'blow a kiss to/at sb'.flatten =>
|
7
7
|
# %q(blow a kiss to sb, blow a kiss at sb)
|
8
|
-
# if it doesn't include a slash, returns
|
8
|
+
# if it doesn't include a slash, returns stripped string
|
9
9
|
def flatten
|
10
|
-
|
10
|
+
str = self.strip
|
11
|
+
# remove the space surrounding '/'
|
12
|
+
str = str.gsub /\s*\/\s*/, '/'
|
13
|
+
return str unless str.include? '/'
|
14
|
+
len = str.length
|
11
15
|
ret = []
|
12
|
-
|
16
|
+
# when two strings are passed in separated with ';', then separate them
|
17
|
+
if pos = str.index(';')
|
18
|
+
ret += str[0..pos-1].flatten
|
19
|
+
ret += str[pos+1..len-1].flatten
|
20
|
+
return ret
|
21
|
+
end
|
22
|
+
# when a string has round brackets meaning optional part
|
23
|
+
if str.include? '('
|
24
|
+
head, bracket, tail = str.partition(/\(.*\)/)
|
25
|
+
unless bracket.empty?
|
26
|
+
ret << (head.strip + tail).flatten
|
27
|
+
result = bracket.delete("()").flatten
|
28
|
+
result = [result] if result.is_a? String
|
29
|
+
result.each { |s|
|
30
|
+
ret << (head + s + tail).flatten
|
31
|
+
}
|
32
|
+
end
|
33
|
+
return ret.flatten
|
34
|
+
end
|
13
35
|
j=0 # count of the alternative words, 'to/at' has two.
|
14
|
-
b=[] # b[]/e[] index of the beginning/end of
|
36
|
+
b=[] # b[]/e[] index of the beginning/end of alternative words
|
15
37
|
e=[]
|
16
38
|
# set this flag when next word is expected an alternate word after slash
|
17
39
|
include_next = false
|
18
|
-
for i in
|
19
|
-
c =
|
20
|
-
case
|
21
|
-
|
40
|
+
for i in 0..len-1
|
41
|
+
c = str[i]
|
42
|
+
case c
|
43
|
+
# valid char in a word
|
44
|
+
when /[[:alnum:]\-']/
|
22
45
|
if b[j].nil?
|
23
46
|
b[j] = i
|
24
47
|
e[j] = i
|
25
48
|
else
|
26
49
|
e[j] = i
|
27
50
|
end
|
28
|
-
|
51
|
+
# char means a word has ended
|
52
|
+
when " ", "!", "?", ",", "."
|
29
53
|
if include_next
|
30
54
|
break
|
31
55
|
else
|
32
56
|
b[j] = nil
|
33
57
|
e[j] = nil
|
34
58
|
end
|
35
|
-
|
59
|
+
# 'or' separator
|
60
|
+
when "/"
|
36
61
|
j += 1
|
37
62
|
include_next = true
|
38
63
|
else
|
39
|
-
raise "
|
64
|
+
raise NotImplementedError, "char '#{c}' found in '#{self}'."
|
40
65
|
end
|
41
66
|
end
|
42
67
|
if j > 0
|
43
68
|
for i in (0..j)
|
44
69
|
# alternative word is not the last word and not at the beginning
|
45
70
|
if (e[j]+1 < len) && (b[0] > 0)
|
46
|
-
ret <<
|
71
|
+
ret << str[0..b[0]-1] + str[b[i]..e[i]] + str[e[j]+1..len-1]
|
47
72
|
elsif (e[j]+1 == len) && (b[0] > 0)
|
48
|
-
ret <<
|
73
|
+
ret << str[0..b[0]-1] + str[b[i]..e[i]]
|
49
74
|
elsif (e[j]+1 < len) && (b[0] == 0)
|
50
|
-
ret <<
|
75
|
+
ret << str[b[i]..e[i]] + str[e[j]+1..len-1]
|
51
76
|
else
|
52
|
-
ret <<
|
77
|
+
ret << str[b[i]..e[i]]
|
53
78
|
end
|
54
79
|
end
|
55
80
|
end
|
data/lib/camdict/definition.rb
CHANGED
@@ -44,7 +44,7 @@ module Camdict
|
|
44
44
|
# tense of this verb.
|
45
45
|
attr_reader :verb
|
46
46
|
|
47
|
-
# Input
|
47
|
+
# Input +word+ and +entry_html+ are
|
48
48
|
# { entry ID => its html definition source }
|
49
49
|
def initialize(word, entry_html)
|
50
50
|
@word = word
|
@@ -138,13 +138,13 @@ module Camdict
|
|
138
138
|
location = "idiom" if @is_idiom && @title_word.include?(@word)
|
139
139
|
unless @spelling_variant.nil?
|
140
140
|
# spelling variant is treated as "title word"
|
141
|
-
location = "spellvar" if @spelling_variant
|
141
|
+
location = "spellvar" if @spelling_variant == @word
|
142
142
|
end
|
143
143
|
unless @head_variant.nil?
|
144
144
|
location = "head_variant" if @head_variant.include? @word
|
145
145
|
end
|
146
|
-
location ="body_variant" if @body_variant && @body_variant
|
147
|
-
location = "inflection" if @inflection && @inflection
|
146
|
+
location ="body_variant" if @body_variant && @body_variant == @word
|
147
|
+
location = "inflection" if @inflection && @inflection == @word
|
148
148
|
unless @derived_words.nil?
|
149
149
|
if @derived_words.include? @word
|
150
150
|
unless location.nil?
|
@@ -268,6 +268,7 @@ module Camdict
|
|
268
268
|
# US IPA is always followed by a symbol US
|
269
269
|
# favorite: UK/US ipa (spellvar US s:favorite) => normal title word
|
270
270
|
usnode = @html.css ".di-info img.ussymbol + .pron .ipa"
|
271
|
+
usnode = usnode.first
|
271
272
|
usbase = parse_ipa(usnode) unless usnode.nil?
|
272
273
|
when 'inflection'
|
273
274
|
usnode = @html.css ".info-group img.ussymbol + .pron .ipa"
|
@@ -302,7 +303,7 @@ module Camdict
|
|
302
303
|
derived_uk = parse_ipa(node.first) unless node.first.nil?
|
303
304
|
}
|
304
305
|
derived_css("img.ussymbol + .pron .ipa") { |node|
|
305
|
-
usbase = parse_ipa(node) unless node.nil?
|
306
|
+
usbase = parse_ipa(node.first) unless node.first.nil?
|
306
307
|
}
|
307
308
|
if derived_uk && derived_uk[:baseipa].include?('-')
|
308
309
|
ukbase = join_ipa(ukbase, derived_uk)
|
@@ -366,7 +367,15 @@ module Camdict
|
|
366
367
|
ret = full[0..position-slen+2] + center + full[position+1..flen-1]
|
367
368
|
return {baseipa: ret, sindex: findex}
|
368
369
|
end
|
369
|
-
|
370
|
+
# this is a simple solution to workaround the issue since no common
|
371
|
+
# chars are found between the full and short ipa. Such as the word
|
372
|
+
# 'difference', so just assign full to short
|
373
|
+
begin
|
374
|
+
raise "head-tail hyphen IPA #{short} for the word #{@word}" +
|
375
|
+
"unmatched with #{full}."
|
376
|
+
rescue RuntimeError
|
377
|
+
return full_sp
|
378
|
+
end
|
370
379
|
else
|
371
380
|
# head hyphen
|
372
381
|
right = short[1, slen-1]
|
@@ -392,23 +401,16 @@ module Camdict
|
|
392
401
|
# tail hyphen
|
393
402
|
elsif short[-1] == '-'
|
394
403
|
left = short[0, slen-1]
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
return {baseipa: ret, sindex: findex}
|
402
|
-
end
|
403
|
-
elsif left[0] == full[0]
|
404
|
-
ret = left + full[slen-1..flen-1]
|
405
|
-
findex = mix_spi( ussp, 0, basesp, slen-1..flen-1)
|
406
|
-
return {baseipa: ret, sindex: findex}
|
407
|
-
else
|
408
|
-
raise "tail hyphen has uncovered case - code needs update."
|
409
|
-
end
|
404
|
+
ret = left + full[slen-1..flen-1]
|
405
|
+
findex = mix_spi( ussp, 0, basesp, slen-1..flen-1)
|
406
|
+
return {baseipa: ret, sindex: findex}
|
407
|
+
# begin with a primary or secondary stress mark like reunion
|
408
|
+
elsif ["\u{2cc}", "\u{2c8}"].include? short[0]
|
409
|
+
return full_sp # for simple, use uk ipa instead
|
410
410
|
else
|
411
|
-
raise
|
411
|
+
raise ArgumentError,
|
412
|
+
"IPA doesn't begin with a hyphen or stress, nor end with a hyphen. " +
|
413
|
+
"Nothing is done."
|
412
414
|
end
|
413
415
|
end
|
414
416
|
|
data/test/test_common.rb
CHANGED
@@ -18,6 +18,38 @@ module Camdict
|
|
18
18
|
str = "not give/budge/move an inch"
|
19
19
|
expected = ['not give an inch', 'not budge an inch', 'not move an inch']
|
20
20
|
assert_equal expected, str.flatten
|
21
|
+
str = "fall into the/sb's trap"
|
22
|
+
expected = ['fall into the trap', 'fall into sb\'s trap']
|
23
|
+
assert_equal expected, str.flatten
|
24
|
+
str = "what is sb/sth?"
|
25
|
+
expected = ['what is sb?', 'what is sth?']
|
26
|
+
assert_equal expected, str.flatten
|
27
|
+
str = "look lively/sharp!"
|
28
|
+
expected = ['look lively!', 'look sharp!']
|
29
|
+
assert_equal expected, str.flatten
|
30
|
+
str = "the like of sb/sth; sb's/sth's like"
|
31
|
+
expected = ['the like of sb', 'the like of sth',
|
32
|
+
"sb's like", "sth's like"]
|
33
|
+
assert_equal expected, str.flatten
|
34
|
+
str = "go (like/down) a bomb"
|
35
|
+
expected = ['go a bomb', 'go like a bomb', 'go down a bomb']
|
36
|
+
assert_equal expected, str.flatten
|
37
|
+
str = "the other side/end (of sth)"
|
38
|
+
expected = ['the other side', 'the other end', 'the other side of sth',
|
39
|
+
'the other end of sth']
|
40
|
+
assert_equal expected, str.flatten
|
41
|
+
strs = ["20/20 vision", "public enemy number one/no. 1"]
|
42
|
+
assert_nothing_raised do
|
43
|
+
strs.each { |s| s.flatten }
|
44
|
+
end
|
45
|
+
str = "the more...the more/less"
|
46
|
+
expected = ['the more...the more', 'the more...the less']
|
47
|
+
assert_equal expected, str.flatten
|
48
|
+
# need more examples to support complex 'or' separators
|
49
|
+
# sound like/as if/as though
|
50
|
+
# look on/upon sb/sth as sth
|
51
|
+
# look at/see sth through rose-coloured/tinted glasses
|
52
|
+
# give /quote sth/sb chapter and verse
|
21
53
|
end
|
22
54
|
|
23
55
|
def test_expand
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: camdict
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pan Gaoyong
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-
|
12
|
+
date: 2014-04-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -25,7 +25,7 @@ dependencies:
|
|
25
25
|
- - '>='
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '0'
|
28
|
-
description:
|
28
|
+
description: Get definitions, pronunciation and example sentences of a word or phrase
|
29
29
|
from the online Cambridge dictionaries.
|
30
30
|
email: pan.gaoyong@gmail.com
|
31
31
|
executables: []
|