camdict 1.0.3 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +28 -33
- data/lib/camdict/array_ext.rb +37 -0
- data/lib/camdict/client.rb +133 -97
- data/lib/camdict/common.rb +25 -143
- data/lib/camdict/definition.rb +65 -596
- data/lib/camdict/entry.rb +76 -0
- data/lib/camdict/exception.rb +5 -0
- data/lib/camdict/explanation.rb +29 -66
- data/lib/camdict/http_client.rb +14 -10
- data/lib/camdict/ipa.rb +52 -0
- data/lib/camdict/pronunciation.rb +53 -0
- data/lib/camdict/sentence.rb +38 -0
- data/lib/camdict/string_ext.rb +141 -0
- data/lib/camdict/word.rb +83 -17
- data/test/debug.rb +60 -0
- data/test/helper.rb +2 -0
- data/test/itest_client.rb +39 -8
- data/test/itest_definition.rb +24 -75
- data/test/itest_entry.rb +37 -0
- data/test/itest_explanation.rb +41 -20
- data/test/itest_ipa.rb +105 -0
- data/test/itest_pronunciation.rb +74 -0
- data/test/itest_word.rb +49 -0
- data/test/test_array_ext.rb +23 -0
- data/test/test_client.rb +35 -42
- data/test/test_common.rb +22 -78
- data/test/test_explanation.rb +21 -25
- data/test/test_http_client.rb +27 -13
- data/test/test_string_ext.rb +95 -0
- metadata +42 -7
- data/test/test_definition.rb +0 -345
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 65f86517bb1f1674118ec92b379e0436fe9fdbec
|
4
|
+
data.tar.gz: 05b6e68ba21c6d9dbee96863ed2146ceea9d12e1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f93f6da6b914e84efc540f1fc753b75034402f218048cbc825e478dc6310a609d4282ffb4edb31e34bd572a43093ce66c25c82b37436a9a1b66d0970724e4acc
|
7
|
+
data.tar.gz: bb5cc16f332a3c6c28b66f31c4e08b59e11d2aea7086c4ee87b65a58997116b9f8bd635f504c964c5e950b957b8ade3e985f7380f63de8124e5470e59d233a94
|
data/README.md
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
# A ruby gem - camdict
|
2
|
+
![Build Status][travis-image][travis-link]
|
3
|
+
![Code Climate][climate-image][climate-link]
|
2
4
|
|
3
|
-
## Introduction
|
5
|
+
## Introduction
|
4
6
|
|
5
|
-
The ruby gem camdict is a [Cambridge online dictionary][1] client.
|
7
|
+
The ruby gem camdict is a [Cambridge online dictionary][1] client.
|
6
8
|
You could use this excellent dictionary with a browser, but now it is possible
|
7
9
|
to use it with this ruby API in your code.
|
8
10
|
|
@@ -10,52 +12,45 @@ to use it with this ruby API in your code.
|
|
10
12
|
`gem install camdict`
|
11
13
|
|
12
14
|
## Verification
|
13
|
-
The gem can be tested by below commands in the directory where it's installed.
|
14
|
-
`rake` - run all the testcases which don't need internet connection.
|
15
|
-
`rake itest` - run all the testcases that need internet connection.
|
15
|
+
The gem can be tested by below commands in the directory where it's installed.
|
16
|
+
`rake` - run all the testcases which don't need internet connection.
|
17
|
+
`rake itest` - run all the testcases that need internet connection.
|
16
18
|
`rake testall` - run all above tests.
|
17
19
|
|
18
|
-
One test may fail if the gem nokogiri hasn't pulled in the fix [here][2]. But
|
19
|
-
it is safe to apply the patch to your nokogiri copy.
|
20
|
-
|
21
20
|
## Usage
|
22
21
|
|
23
22
|
```ruby
|
24
23
|
require 'camdict'
|
25
24
|
|
26
25
|
# Look up a new word
|
27
|
-
word = Camdict::Word.new
|
28
|
-
|
29
|
-
# get all definitions for this word from remote dictionary and select the
|
30
|
-
# first one. A word usually has many definitions.
|
31
|
-
health = word.definitions.first
|
26
|
+
word = Camdict::Word.new 'health'
|
32
27
|
|
33
28
|
# Print the part of speech
|
34
29
|
puts health.part_of_speech #=> noun
|
35
30
|
|
36
|
-
#
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
#
|
44
|
-
|
45
|
-
|
46
|
-
explanation1.examples.each { |e|
|
47
|
-
puts e.sentence #=>
|
48
|
-
# to be in good/poor health
|
49
|
-
# Regular exercise is good for your health.
|
50
|
-
# I had to give up drinking for health reasons.
|
51
|
-
# He gave up work because of ill health.
|
52
|
-
}
|
31
|
+
# What's the first meaning
|
32
|
+
puts health.meaning #=>
|
33
|
+
# the condition of the body and the degree to which it is free from
|
34
|
+
# illness, or the state of being well:
|
35
|
+
|
36
|
+
# all meanings
|
37
|
+
puts health.meanings #=> in addition to above meaning, it prints
|
38
|
+
# the condition of something that changes or develops, such as an
|
39
|
+
# organization or system:
|
40
|
+
|
53
41
|
```
|
54
42
|
|
55
|
-
|
43
|
+
Need more? try `health.print` to show more data in a friendly format.
|
44
|
+
|
45
|
+
## Versioning
|
46
|
+
The release of this gem follows the [semantic versioning rules][2].
|
56
47
|
|
57
48
|
## Licence MIT
|
58
|
-
Copyright (c) 2014 Pan Gaoyong
|
49
|
+
Copyright (c) 2014-2017 Pan Gaoyong
|
59
50
|
|
60
51
|
[1]: http://dictionary.cambridge.com "Cambridge"
|
61
|
-
[2]:
|
52
|
+
[2]: http://semver.org
|
53
|
+
[travis-image]: https://travis-ci.org/pan/camdict.svg?branch=master
|
54
|
+
[travis-link]: https://travis-ci.org/pan/camdict
|
55
|
+
[climate-image]: https://codeclimate.com/github/pan/camdict/badges/gpa.svg
|
56
|
+
[climate-link]: https://codeclimate.com/github/pan/camdict
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'camdict/string_ext'
|
3
|
+
|
4
|
+
module Camdict
|
5
|
+
# Extention: Refine Array class.
|
6
|
+
module ArrayExt
|
7
|
+
refine Array do
|
8
|
+
# Iterate an array and return two elements +a+ +b+ each time for handling.
|
9
|
+
def each_pair
|
10
|
+
len = length
|
11
|
+
i = 0
|
12
|
+
while i < len
|
13
|
+
a = at(i)
|
14
|
+
b = at(i + 1)
|
15
|
+
yield(a, b)
|
16
|
+
i += 2
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Test if a phrase array includes a +word+.
|
21
|
+
# ['blow your nose', 'blow a kiss to/at sb'].has?("a kiss at") #=> true
|
22
|
+
def has?(word)
|
23
|
+
expand.each { |phr| return true if phr.include? word }
|
24
|
+
false
|
25
|
+
end
|
26
|
+
|
27
|
+
using Camdict::StringExt
|
28
|
+
|
29
|
+
# Expand a phrase array into a flattened one. Example,
|
30
|
+
# ['blow your nose', 'blow a kiss to/at sb'] #=>
|
31
|
+
# ['blow your nose', 'blow a kiss to sb', 'blow a kiss at sb']
|
32
|
+
def expand
|
33
|
+
map { |p| p&.flatten || p }.flatten
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/camdict/client.rb
CHANGED
@@ -1,117 +1,171 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'camdict/http_client'
|
3
|
+
require 'camdict/string_ext'
|
4
|
+
require 'camdict/exception'
|
2
5
|
|
3
6
|
module Camdict
|
4
|
-
|
5
|
-
#
|
6
|
-
# remote Cambridge dictionaries, but not includes the extended data.
|
7
|
+
# The client downloads all the useful data about a word or phrase from
|
8
|
+
# remote Cambridge dictionaries, but not includes the extended data.
|
7
9
|
# For example,
|
8
|
-
# when the word "mind" is searched,
|
9
|
-
#
|
10
|
+
# when the word "mind" is searched, the exactly matched entry is downloaded.
|
11
|
+
# However, other related entries like "turn of mind" & "open mind"
|
10
12
|
# are not included.
|
11
|
-
class Client
|
12
|
-
|
13
|
-
# Default dictionary is english
|
14
|
-
# Other possible +dict+ values:
|
15
|
-
#
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
# Get a word's html definition
|
22
|
-
# The returned result could be an empty
|
23
|
-
#
|
24
|
-
# [{ word => html definition }],
|
25
|
-
# or many hash elements when it has multiple entries,
|
26
|
-
# [{ entry_id => html definition }, ...].
|
27
|
-
# Normally, when a +word+ has more than one meanings, its entry ID format is
|
28
|
-
# like word_nn. Otherwise it's just the word itself.
|
13
|
+
class Client < HTTP::Client
|
14
|
+
attr_reader :dictionary
|
15
|
+
# Default dictionary is British english.
|
16
|
+
# Other possible +dict+ values:
|
17
|
+
# english-chinese-simplified, learner-english,
|
18
|
+
# essential-british-english, essential-american-english, etc.
|
19
|
+
def initialize(dict = nil)
|
20
|
+
@dictionary = dict || 'english'
|
21
|
+
end
|
22
|
+
|
23
|
+
# Get a word's html definition from the web dictionary.
|
24
|
+
# The returned result could be an empty string when nothing is found, or
|
25
|
+
# its html definition
|
29
26
|
def html_definition(word)
|
30
27
|
html = fetch(word)
|
31
|
-
|
32
|
-
|
33
|
-
# some words return their only definition directly, such as aluminium.
|
34
|
-
if definition_page? html
|
35
|
-
# entry id is just the word when there is only one definition
|
36
|
-
html_defs << { word => di_head(html) + di_body(html) }
|
28
|
+
if html
|
29
|
+
di_extracted(html)
|
37
30
|
else
|
38
|
-
|
39
|
-
# not found, or the found page with all matched entries and related.
|
40
|
-
# when entry urls are not found, they are empty and spelling suggestion
|
41
|
-
# pages. So mentry_links() returns an empty array. Otherwise, it returns
|
42
|
-
# all the exactly matched entry links.
|
43
|
-
matched_urls = mentry_links(word, html)
|
44
|
-
unless matched_urls.empty?
|
45
|
-
matched_urls.each { |url|
|
46
|
-
html_defs << { entry_id(url) => get_htmldef(url) }
|
47
|
-
}
|
48
|
-
end
|
31
|
+
search(word)
|
49
32
|
end
|
50
|
-
html_defs
|
51
33
|
end
|
52
34
|
|
53
|
-
# Get a word html page
|
35
|
+
# Get a word html definition page by its entry +url+.
|
54
36
|
def get_htmldef(url)
|
55
|
-
|
56
|
-
|
37
|
+
di_extracted get_html(url)
|
38
|
+
end
|
39
|
+
|
40
|
+
# search a word with this URL
|
41
|
+
def search_url(word)
|
42
|
+
"#{host}/search/#{@dictionary}/?q=#{word}"
|
43
|
+
end
|
44
|
+
|
45
|
+
def word_url(word)
|
46
|
+
"#{host}/dictionary/#{@dictionary}/#{encode(word).downcase}"
|
57
47
|
end
|
58
48
|
|
59
49
|
private
|
60
50
|
|
51
|
+
def search(word)
|
52
|
+
html = try_search(word)
|
53
|
+
return '' unless html
|
54
|
+
# some words return their only definition directly, such as plagiarism.
|
55
|
+
if single_def?(html)
|
56
|
+
di_extracted(html)
|
57
|
+
else
|
58
|
+
multiple_entries(word, html).join
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def host
|
63
|
+
'http://dictionary.cambridge.org'
|
64
|
+
end
|
65
|
+
|
66
|
+
# returned page could be a spelling check suggestion page in case it is
|
67
|
+
# not found, or the found page with all matched entries and related.
|
68
|
+
# when entry urls are not found, they are empty and spelling suggestion
|
69
|
+
# pages. It returns all the exactly matched entry links otherwise, raise
|
70
|
+
# exception WordNotFound.
|
71
|
+
def multiple_entries(word, html)
|
72
|
+
html_defs = []
|
73
|
+
mentry_links(word, html).each do |url|
|
74
|
+
html_content = get_htmldef(url)
|
75
|
+
html_defs << html_content if html_content
|
76
|
+
end
|
77
|
+
raise WordNotFound, "#{word} not found" if html_defs.empty?
|
78
|
+
html_defs
|
79
|
+
end
|
80
|
+
|
81
|
+
def try_search(word)
|
82
|
+
get_html(search_url(word))
|
83
|
+
rescue OpenURI::HTTPError => e
|
84
|
+
# When a word does not match any definitions, it returns 404 not found.
|
85
|
+
return if e.message[0..2] == '404'
|
86
|
+
end
|
87
|
+
|
61
88
|
# Fetch word searching result page.
|
62
89
|
# Returned result is either just a single definition page if there is only
|
63
|
-
# one entry, or a result page listing all possible entries, or spelling
|
90
|
+
# one entry, or a result page listing all possible entries, or spelling
|
64
91
|
# check result. All results are objects of Nokogiri::HTML.
|
65
92
|
def fetch(w)
|
66
|
-
|
67
|
-
|
68
|
-
url = search_url + w
|
69
|
-
begin
|
70
|
-
Camdict::HTTP::Client.get_html(url)
|
71
|
-
rescue OpenURI::HTTPError => e
|
72
|
-
# "404" == e.message[0..2], When a word is not found, it returns 404
|
73
|
-
# Not Found and spelling suggestions page.
|
74
|
-
end
|
93
|
+
ret = get_html(word_url(w))
|
94
|
+
ret if definition_page? ret
|
75
95
|
end
|
76
96
|
|
77
|
-
# To determine whether or not the input object of Nokogiri::HTML is a page
|
97
|
+
# To determine whether or not the input object of Nokogiri::HTML is a page
|
78
98
|
# of a word definition. Return true if it has a source structure like this,
|
79
99
|
# <div class="di-head">
|
80
100
|
# <div class="di-title">
|
81
101
|
# <h1 class="hw">
|
82
102
|
# This works for the translation page too, like English-Spanish.
|
83
103
|
def single_def?(html)
|
84
|
-
node = html.css(
|
85
|
-
!
|
104
|
+
node = html.css('.di-head .di-title .hw')
|
105
|
+
!node.empty?
|
86
106
|
end
|
87
107
|
|
88
108
|
# Find out matched entry links from search result page
|
89
|
-
# <ul class="
|
90
|
-
# <li><a href="
|
91
|
-
#
|
92
|
-
# The
|
93
|
-
# The extended links are filtered out and the matched word or phrase's
|
109
|
+
# <ul class="prefix-block">
|
110
|
+
# <li><a href="entry_link">
|
111
|
+
# The search result html page should include above piece of code.
|
112
|
+
# The extended links are filtered out and the matched word or phrase's
|
94
113
|
# links are kept. An array of them are returned.
|
95
|
-
# For example, when the searched word is "related", entry links are like,
|
96
|
-
# http://dictionary.cambridge.org/dictionary/
|
97
|
-
# http://dictionary.cambridge.org/dictionary/
|
98
|
-
# http://dictionary.cambridge.org/dictionary/british/stress-related
|
114
|
+
# For example, when the searched word is "related", entry links are like,
|
115
|
+
# http://dictionary.cambridge.org/dictionary/english/related
|
116
|
+
# http://dictionary.cambridge.org/dictionary/english/relate
|
99
117
|
# ...
|
100
|
-
# Returned result should only contain the first
|
118
|
+
# Returned result should only contain the first one.
|
101
119
|
# Input html is an object of Nokogiri::HTML.
|
102
120
|
def mentry_links(word, html)
|
103
121
|
# suppose the word is not found in the dictionary, so it is empty.
|
104
122
|
links = []
|
105
|
-
nodes = html.css(
|
106
|
-
|
107
|
-
unless nodes.empty?
|
108
|
-
nodes.each { |a|
|
109
|
-
links << a['href'] if matched_word?(word, a)
|
110
|
-
}
|
111
|
-
end
|
123
|
+
nodes = html.css('.prefix-block a')
|
124
|
+
nodes.each { |a| links << a['href'] if matched_word?(word, a) }
|
112
125
|
links
|
113
126
|
end
|
114
127
|
|
128
|
+
# Extract definition head and body from Nokogiri::HTML, discard share links
|
129
|
+
def di_extracted(html)
|
130
|
+
body = di_body(html)
|
131
|
+
# searching aluminium returns an American or British english page
|
132
|
+
# saparately, below condition filter out American english result
|
133
|
+
return if body.empty?
|
134
|
+
body.css('.share').each { |s| body.delete s }
|
135
|
+
body
|
136
|
+
end
|
137
|
+
|
138
|
+
# Return definition body in html source
|
139
|
+
def di_body(html)
|
140
|
+
html.css("#{tab_css} .di-body")
|
141
|
+
end
|
142
|
+
|
143
|
+
# the css selecting a tab
|
144
|
+
def tab_css
|
145
|
+
"[#{tab}]"
|
146
|
+
end
|
147
|
+
|
148
|
+
# the tab attributes according to dictionary name
|
149
|
+
def tab
|
150
|
+
case @dictionary
|
151
|
+
when 'english'
|
152
|
+
'data-tab="ds-british"'
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# get the last part of http://dictionary.cambridge.org/british/related_1
|
157
|
+
def entry_id(url)
|
158
|
+
url.split('/').last
|
159
|
+
end
|
160
|
+
|
161
|
+
# phrase with space and single quote has to be replaced with dash
|
162
|
+
def encode(word)
|
163
|
+
word.gsub(/[ ']/, '-')
|
164
|
+
end
|
165
|
+
|
166
|
+
alias definition_page? single_def?
|
167
|
+
|
168
|
+
using Camdict::StringExt
|
115
169
|
# Return true if the searched word matches the one on result page.
|
116
170
|
# Node is an object of Nokogiri::Node
|
117
171
|
# <li>
|
@@ -119,36 +173,18 @@ module Camdict
|
|
119
173
|
# <b class="phrase">out of mind, or
|
120
174
|
# <b class="hw">turn of mind, or
|
121
175
|
# <b class="w">mind-numbingly
|
122
|
-
# Match criterion: the queried word should equal to the result word;
|
176
|
+
# Match criterion: the queried word should equal to the result word;
|
123
177
|
# the result phrase should be flattened, which should equal to the
|
124
178
|
# queried phrase.
|
125
179
|
def matched_word?(word, node)
|
126
|
-
li = node.css(
|
180
|
+
li = node.css('.base')
|
181
|
+
return false if li.empty?
|
127
182
|
resword = li.size == 1 ? li.text : li[0].text
|
128
|
-
if resword.include?
|
183
|
+
if resword.include?('/') || resword.include?(';')
|
129
184
|
resword.flatten.include?(word)
|
130
185
|
else
|
131
186
|
word == resword
|
132
187
|
end
|
133
188
|
end
|
134
|
-
|
135
|
-
# Return definition head in html source
|
136
|
-
def di_head(html)
|
137
|
-
html.css(".cdo-section-title-hw").to_html(:save_with=>0) +
|
138
|
-
html.css(".di-info").to_html(:save_with=>0)
|
139
|
-
end
|
140
|
-
|
141
|
-
# Return definition body in html source
|
142
|
-
def di_body(html)
|
143
|
-
html.css(".di-body").to_html(:save_with=>0)
|
144
|
-
end
|
145
|
-
|
146
|
-
# get the last part of http://dictionary.cambridge.org/british/related_1
|
147
|
-
def entry_id(url)
|
148
|
-
url.split('/').last
|
149
|
-
end
|
150
|
-
|
151
|
-
alias :definition_page? :single_def?
|
152
|
-
|
153
189
|
end
|
154
190
|
end
|
data/lib/camdict/common.rb
CHANGED
@@ -1,161 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'camdict/string_ext'
|
3
|
+
|
1
4
|
module Camdict
|
5
|
+
# some common private methods used to extract Nokogiri nodes
|
2
6
|
module Common
|
3
|
-
|
4
|
-
# Extend String class.
|
5
|
-
String.class_eval do
|
6
|
-
# 'blow a kiss to/at sb'.flatten =>
|
7
|
-
# %q(blow a kiss to sb, blow a kiss at sb)
|
8
|
-
# if it doesn't include a slash, returns stripped string
|
9
|
-
def flatten
|
10
|
-
str = self.strip
|
11
|
-
# remove the space surrounding '/'
|
12
|
-
str = str.gsub /\s*\/\s*/, '/'
|
13
|
-
return str unless str.include? '/'
|
14
|
-
len = str.length
|
15
|
-
ret = []
|
16
|
-
# when two strings are passed in separated with ';', then separate them
|
17
|
-
if pos = str.index(';')
|
18
|
-
ret += str[0..pos-1].flatten
|
19
|
-
ret += str[pos+1..len-1].flatten
|
20
|
-
return ret
|
21
|
-
end
|
22
|
-
# when a string has round brackets meaning optional part
|
23
|
-
if str.include? '('
|
24
|
-
head, bracket, tail = str.partition(/\(.*\)/)
|
25
|
-
unless bracket.empty?
|
26
|
-
ret << (head.strip + tail).flatten
|
27
|
-
result = bracket.delete("()").flatten
|
28
|
-
result = [result] if result.is_a? String
|
29
|
-
result.each { |s|
|
30
|
-
ret << (head + s + tail).flatten
|
31
|
-
}
|
32
|
-
end
|
33
|
-
return ret.flatten
|
34
|
-
end
|
35
|
-
j=0 # count of the alternative words, 'to/at' has two.
|
36
|
-
b=[] # b[]/e[] index of the beginning/end of alternative words
|
37
|
-
e=[]
|
38
|
-
# set this flag when next word is expected an alternate word after slash
|
39
|
-
include_next = false
|
40
|
-
for i in 0..len-1
|
41
|
-
c = str[i]
|
42
|
-
case c
|
43
|
-
# valid char in a word
|
44
|
-
when /[[:alnum:]\-']/
|
45
|
-
if b[j].nil?
|
46
|
-
b[j] = i
|
47
|
-
e[j] = i
|
48
|
-
else
|
49
|
-
e[j] = i
|
50
|
-
end
|
51
|
-
# char means a word has ended
|
52
|
-
when " ", "!", "?", ",", "."
|
53
|
-
if include_next
|
54
|
-
break
|
55
|
-
else
|
56
|
-
b[j] = nil
|
57
|
-
e[j] = nil
|
58
|
-
end
|
59
|
-
# 'or' separator
|
60
|
-
when "/"
|
61
|
-
j += 1
|
62
|
-
include_next = true
|
63
|
-
else
|
64
|
-
raise NotImplementedError, "char '#{c}' found in '#{self}'."
|
65
|
-
end
|
66
|
-
end
|
67
|
-
if j > 0
|
68
|
-
for i in (0..j)
|
69
|
-
# alternative word is not the last word and not at the beginning
|
70
|
-
if (e[j]+1 < len) && (b[0] > 0)
|
71
|
-
ret << str[0..b[0]-1] + str[b[i]..e[i]] + str[e[j]+1..len-1]
|
72
|
-
elsif (e[j]+1 == len) && (b[0] > 0)
|
73
|
-
ret << str[0..b[0]-1] + str[b[i]..e[i]]
|
74
|
-
elsif (e[j]+1 < len) && (b[0] == 0)
|
75
|
-
ret << str[b[i]..e[i]] + str[e[j]+1..len-1]
|
76
|
-
else
|
77
|
-
ret << str[b[i]..e[i]]
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
ret
|
82
|
-
end
|
83
|
-
|
84
|
-
# Test whether a String includes the +word+. It's useful while testing
|
85
|
-
# a variable which might be an array of phrase or just a single phrase.
|
86
|
-
def has?(word)
|
87
|
-
self.include? word
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
# Extend Array class.
|
92
|
-
Array.class_eval do
|
93
|
-
# Expand a phrase array into a flattened one. Example,
|
94
|
-
# ['blow your nose', 'blow a kiss to/at sb'] #=>
|
95
|
-
# ['blow your nose', 'blow a kiss to sb', 'blow a kiss at sb']
|
96
|
-
def expand
|
97
|
-
ret = self.map { |p|
|
98
|
-
p.flatten if p.is_a? String
|
99
|
-
}
|
100
|
-
ret.flatten
|
101
|
-
end
|
102
|
-
|
103
|
-
|
104
|
-
# Test if a phrase array includes a +word+.
|
105
|
-
# ['blow your nose', 'blow a kiss to/at sb'].has?("a kiss at") #=>true
|
106
|
-
def has?(word)
|
107
|
-
self.expand.each { |phr|
|
108
|
-
return true if phr.include? word
|
109
|
-
}
|
110
|
-
false
|
111
|
-
end
|
112
|
-
|
113
|
-
# Iterate an array and return two elements +a+ +b+ each time for handling.
|
114
|
-
def each_pair
|
115
|
-
len = self.length
|
116
|
-
i = 0
|
117
|
-
while (i < len)
|
118
|
-
a = self.at(i)
|
119
|
-
b = self.at(i+1)
|
120
|
-
yield(a, b)
|
121
|
-
i += 2
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
7
|
private
|
8
|
+
|
127
9
|
# Get the text selected by the css +selector+.
|
128
|
-
def css_text(selector)
|
129
|
-
node =
|
10
|
+
def css_text(html, selector)
|
11
|
+
node = html.css(selector)
|
130
12
|
node.text unless node.empty?
|
131
13
|
end
|
132
14
|
|
133
|
-
# Get sth by the css +selector+ for the derived word inside its runon node
|
134
|
-
def derived_css(selector)
|
135
|
-
runon =
|
136
|
-
runon.each
|
15
|
+
# Get sth by the css +selector+ for the derived word inside its runon node
|
16
|
+
def derived_css(html, selector)
|
17
|
+
runon = html.css('.runon')
|
18
|
+
runon.each do |r|
|
137
19
|
n = r.css('[title="Derived word"]')
|
138
|
-
if n.text == @word
|
20
|
+
if n.text == @word
|
139
21
|
node = r.css(selector)
|
140
22
|
yield(node)
|
141
23
|
end
|
142
|
-
|
24
|
+
end
|
143
25
|
end
|
144
26
|
|
27
|
+
using Camdict::StringExt
|
28
|
+
|
145
29
|
# Get sth by the css +selector+ for the phrase inside the node phrase-block
|
146
|
-
def phrase_css(selector)
|
147
|
-
phbs =
|
148
|
-
phbs.each
|
30
|
+
def phrase_css(html, selector)
|
31
|
+
phbs = html.css('.phrase-block')
|
32
|
+
phbs.each do |phb|
|
149
33
|
nodes = phb.css('.phrase, .v[title="Variant form"]')
|
150
|
-
nodes.each
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
}
|
34
|
+
nodes.each do |n|
|
35
|
+
next unless n.text.flatten.has? @word
|
36
|
+
node = phb.css(selector)
|
37
|
+
yield(node)
|
38
|
+
break
|
39
|
+
end
|
40
|
+
end
|
158
41
|
end
|
159
|
-
|
160
42
|
end
|
161
43
|
end
|