camdict 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 86357de26d1ad3547925075d9592facaadcb39f4
4
- data.tar.gz: e16be0c399d94659828ad176a9c7557d2289dfda
3
+ metadata.gz: 556f016868d71a2d1dde23319a9d59dfa60ba05c
4
+ data.tar.gz: cb0090a31d8f56da0d366819f0de47309d9d5dfa
5
5
  SHA512:
6
- metadata.gz: b35d9c52c329f91c84cd644520d440d17268f52b29c1eb4328d2ea5b90bedf2cbbcd7308eeb37a919ed0be4fbed16a1ff31d6a2909c72786ec42feaaee92e224
7
- data.tar.gz: ac6ce320b885258a6108365e056cfd4b8bb8a6601d950862fcfe5806ce77525807b6894f564063e6b949970843c936f8193cc12d157ed89cf0725becfc24c969
6
+ metadata.gz: 3c110ea5cfe31a35bd69952f983669377a577aa1f7960ac7f69dd7d073d25ff6ff2f031b4a9ba9aa7190b169ded33dba3b06df70788bccb17be77fb93cb1196f
7
+ data.tar.gz: dda29ecc637f0550bddfd78b23071210649e8c9aa2d58dbc34cf14b764ec0f7e92bc5c917182bd73a4da0e62516c7cdf03dac647c1b30ce5e584ae30fba54604
data/README.md CHANGED
@@ -19,6 +19,8 @@ One test may fail if the gem nokogiri hasn't pulled in the fix [here][2]. But
19
19
  it is safe to apply the patch to your nokogiri copy.
20
20
 
21
21
  ## Usage
22
+
23
+ ```ruby
22
24
  require 'camdict'
23
25
 
24
26
  # Look up a new word
@@ -48,7 +50,7 @@ it is safe to apply the patch to your nokogiri copy.
48
50
  # I had to give up drinking for health reasons.
49
51
  # He gave up work because of ill health.
50
52
  }
51
-
53
+ ```
52
54
 
53
55
  There are some useful testing examples in test directory of this gem.
54
56
 
@@ -124,7 +124,7 @@ module Camdict
124
124
  def matched_word?(word, node)
125
125
  li = node.css(".base")
126
126
  resword = li.size == 1 ? li.text : li[0].text
127
- if resword.include? '/'
127
+ if resword.include? '/' or resword.include? ';'
128
128
  resword.flatten.include?(word)
129
129
  else
130
130
  word == resword
@@ -5,51 +5,76 @@ module Camdict
5
5
  String.class_eval do
6
6
  # 'blow a kiss to/at sb'.flatten =>
7
7
  # %q(blow a kiss to sb, blow a kiss at sb)
8
- # if it doesn't include a slash, returns unchanged itself
8
+ # if it doesn't include a slash, returns stripped string
9
9
  def flatten
10
- return self unless self.include? '/'
10
+ str = self.strip
11
+ # remove the space surrounding '/'
12
+ str = str.gsub /\s*\/\s*/, '/'
13
+ return str unless str.include? '/'
14
+ len = str.length
11
15
  ret = []
12
- len = self.length
16
+ # when two strings are passed in separated with ';', then separate them
17
+ if pos = str.index(';')
18
+ ret += str[0..pos-1].flatten
19
+ ret += str[pos+1..len-1].flatten
20
+ return ret
21
+ end
22
+ # when a string has round brackets meaning optional part
23
+ if str.include? '('
24
+ head, bracket, tail = str.partition(/\(.*\)/)
25
+ unless bracket.empty?
26
+ ret << (head.strip + tail).flatten
27
+ result = bracket.delete("()").flatten
28
+ result = [result] if result.is_a? String
29
+ result.each { |s|
30
+ ret << (head + s + tail).flatten
31
+ }
32
+ end
33
+ return ret.flatten
34
+ end
13
35
  j=0 # count of the alternative words, 'to/at' has two.
14
- b=[] # b[]/e[] index of the beginning/end of a alternative word
36
+ b=[] # b[]/e[] index of the beginning/end of alternative words
15
37
  e=[]
16
38
  # set this flag when next word is expected an alternate word after slash
17
39
  include_next = false
18
- for i in (0..len-1)
19
- c = self[i]
20
- case
21
- when c =~ /[[:alpha:]\-\(\)]/
40
+ for i in 0..len-1
41
+ c = str[i]
42
+ case c
43
+ # valid char in a word
44
+ when /[[:alnum:]\-']/
22
45
  if b[j].nil?
23
46
  b[j] = i
24
47
  e[j] = i
25
48
  else
26
49
  e[j] = i
27
50
  end
28
- when c == " "
51
+ # char means a word has ended
52
+ when " ", "!", "?", ",", "."
29
53
  if include_next
30
54
  break
31
55
  else
32
56
  b[j] = nil
33
57
  e[j] = nil
34
58
  end
35
- when c == "/"
59
+ # 'or' separator
60
+ when "/"
36
61
  j += 1
37
62
  include_next = true
38
63
  else
39
- raise "Invalid char '#{c}' found in a string."
64
+ raise NotImplementedError, "char '#{c}' found in '#{self}'."
40
65
  end
41
66
  end
42
67
  if j > 0
43
68
  for i in (0..j)
44
69
  # alternative word is not the last word and not at the beginning
45
70
  if (e[j]+1 < len) && (b[0] > 0)
46
- ret << self[0..b[0]-1] + self[b[i]..e[i]] + self[e[j]+1..len-1]
71
+ ret << str[0..b[0]-1] + str[b[i]..e[i]] + str[e[j]+1..len-1]
47
72
  elsif (e[j]+1 == len) && (b[0] > 0)
48
- ret << self[0..b[0]-1] + self[b[i]..e[i]]
73
+ ret << str[0..b[0]-1] + str[b[i]..e[i]]
49
74
  elsif (e[j]+1 < len) && (b[0] == 0)
50
- ret << self[b[i]..e[i]] + self[e[j]+1..len-1]
75
+ ret << str[b[i]..e[i]] + str[e[j]+1..len-1]
51
76
  else
52
- ret << self[b[i]..e[i]]
77
+ ret << str[b[i]..e[i]]
53
78
  end
54
79
  end
55
80
  end
@@ -44,7 +44,7 @@ module Camdict
44
44
  # tense of this verb.
45
45
  attr_reader :verb
46
46
 
47
- # Input are +word+ and +entry_html+ is
47
+ # Input +word+ and +entry_html+ are
48
48
  # { entry ID => its html definition source }
49
49
  def initialize(word, entry_html)
50
50
  @word = word
@@ -138,13 +138,13 @@ module Camdict
138
138
  location = "idiom" if @is_idiom && @title_word.include?(@word)
139
139
  unless @spelling_variant.nil?
140
140
  # spelling variant is treated as "title word"
141
- location = "spellvar" if @spelling_variant.include? @word
141
+ location = "spellvar" if @spelling_variant == @word
142
142
  end
143
143
  unless @head_variant.nil?
144
144
  location = "head_variant" if @head_variant.include? @word
145
145
  end
146
- location ="body_variant" if @body_variant && @body_variant.include?(@word)
147
- location = "inflection" if @inflection && @inflection.include?(@word)
146
+ location ="body_variant" if @body_variant && @body_variant == @word
147
+ location = "inflection" if @inflection && @inflection == @word
148
148
  unless @derived_words.nil?
149
149
  if @derived_words.include? @word
150
150
  unless location.nil?
@@ -268,6 +268,7 @@ module Camdict
268
268
  # US IPA is always followed by a symbol US
269
269
  # favorite: UK/US ipa (spellvar US s:favorite) => normal title word
270
270
  usnode = @html.css ".di-info img.ussymbol + .pron .ipa"
271
+ usnode = usnode.first
271
272
  usbase = parse_ipa(usnode) unless usnode.nil?
272
273
  when 'inflection'
273
274
  usnode = @html.css ".info-group img.ussymbol + .pron .ipa"
@@ -302,7 +303,7 @@ module Camdict
302
303
  derived_uk = parse_ipa(node.first) unless node.first.nil?
303
304
  }
304
305
  derived_css("img.ussymbol + .pron .ipa") { |node|
305
- usbase = parse_ipa(node) unless node.nil?
306
+ usbase = parse_ipa(node.first) unless node.first.nil?
306
307
  }
307
308
  if derived_uk && derived_uk[:baseipa].include?('-')
308
309
  ukbase = join_ipa(ukbase, derived_uk)
@@ -366,7 +367,15 @@ module Camdict
366
367
  ret = full[0..position-slen+2] + center + full[position+1..flen-1]
367
368
  return {baseipa: ret, sindex: findex}
368
369
  end
369
- raise "unmatched head-tail hyphen IPA"
370
+ # this is a simple solution to workaround the issue since no common
371
+ # chars are found between the full and short ipa. Such as the word
372
+ # 'difference', so just assign full to short
373
+ begin
374
+ raise "head-tail hyphen IPA #{short} for the word #{@word}" +
375
+ "unmatched with #{full}."
376
+ rescue RuntimeError
377
+ return full_sp
378
+ end
370
379
  else
371
380
  # head hyphen
372
381
  right = short[1, slen-1]
@@ -392,23 +401,16 @@ module Camdict
392
401
  # tail hyphen
393
402
  elsif short[-1] == '-'
394
403
  left = short[0, slen-1]
395
- # match left
396
- # unicode of secondary stress & stress mark are considered
397
- if ["\u{2cc}", "\u{2c8}"].include? left[0]
398
- if left[0,2] == full[0,2]
399
- ret = left + full[slen-1..flen-1]
400
- findex = mix_spi( ussp, 0, basesp, slen-1..flen-1)
401
- return {baseipa: ret, sindex: findex}
402
- end
403
- elsif left[0] == full[0]
404
- ret = left + full[slen-1..flen-1]
405
- findex = mix_spi( ussp, 0, basesp, slen-1..flen-1)
406
- return {baseipa: ret, sindex: findex}
407
- else
408
- raise "tail hyphen has uncovered case - code needs update."
409
- end
404
+ ret = left + full[slen-1..flen-1]
405
+ findex = mix_spi( ussp, 0, basesp, slen-1..flen-1)
406
+ return {baseipa: ret, sindex: findex}
407
+ # begin with a primary or secondary stress mark like reunion
408
+ elsif ["\u{2cc}", "\u{2c8}"].include? short[0]
409
+ return full_sp # for simple, use uk ipa instead
410
410
  else
411
- raise "IPA doesn't begin or end with a hyphen, nothing is done."
411
+ raise ArgumentError,
412
+ "IPA doesn't begin with a hyphen or stress, nor end with a hyphen. " +
413
+ "Nothing is done."
412
414
  end
413
415
  end
414
416
 
data/test/test_common.rb CHANGED
@@ -18,6 +18,38 @@ module Camdict
18
18
  str = "not give/budge/move an inch"
19
19
  expected = ['not give an inch', 'not budge an inch', 'not move an inch']
20
20
  assert_equal expected, str.flatten
21
+ str = "fall into the/sb's trap"
22
+ expected = ['fall into the trap', 'fall into sb\'s trap']
23
+ assert_equal expected, str.flatten
24
+ str = "what is sb/sth?"
25
+ expected = ['what is sb?', 'what is sth?']
26
+ assert_equal expected, str.flatten
27
+ str = "look lively/sharp!"
28
+ expected = ['look lively!', 'look sharp!']
29
+ assert_equal expected, str.flatten
30
+ str = "the like of sb/sth; sb's/sth's like"
31
+ expected = ['the like of sb', 'the like of sth',
32
+ "sb's like", "sth's like"]
33
+ assert_equal expected, str.flatten
34
+ str = "go (like/down) a bomb"
35
+ expected = ['go a bomb', 'go like a bomb', 'go down a bomb']
36
+ assert_equal expected, str.flatten
37
+ str = "the other side/end (of sth)"
38
+ expected = ['the other side', 'the other end', 'the other side of sth',
39
+ 'the other end of sth']
40
+ assert_equal expected, str.flatten
41
+ strs = ["20/20 vision", "public enemy number one/no. 1"]
42
+ assert_nothing_raised do
43
+ strs.each { |s| s.flatten }
44
+ end
45
+ str = "the more...the more/less"
46
+ expected = ['the more...the more', 'the more...the less']
47
+ assert_equal expected, str.flatten
48
+ # need more examples to support complex 'or' separators
49
+ # sound like/as if/as though
50
+ # look on/upon sb/sth as sth
51
+ # look at/see sth through rose-coloured/tinted glasses
52
+ # give /quote sth/sb chapter and verse
21
53
  end
22
54
 
23
55
  def test_expand
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: camdict
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pan Gaoyong
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-01-05 00:00:00.000000000 Z
12
+ date: 2014-04-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -25,7 +25,7 @@ dependencies:
25
25
  - - '>='
26
26
  - !ruby/object:Gem::Version
27
27
  version: '0'
28
- description: get definitions, pronunciation and example sentences of a word or phrase
28
+ description: Get definitions, pronunciation and example sentences of a word or phrase
29
29
  from the online Cambridge dictionaries.
30
30
  email: pan.gaoyong@gmail.com
31
31
  executables: []