camdict 1.0.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +28 -33
- data/lib/camdict/array_ext.rb +37 -0
- data/lib/camdict/client.rb +133 -97
- data/lib/camdict/common.rb +25 -143
- data/lib/camdict/definition.rb +65 -596
- data/lib/camdict/entry.rb +76 -0
- data/lib/camdict/exception.rb +5 -0
- data/lib/camdict/explanation.rb +29 -66
- data/lib/camdict/http_client.rb +14 -10
- data/lib/camdict/ipa.rb +52 -0
- data/lib/camdict/pronunciation.rb +53 -0
- data/lib/camdict/sentence.rb +38 -0
- data/lib/camdict/string_ext.rb +141 -0
- data/lib/camdict/word.rb +83 -17
- data/test/debug.rb +60 -0
- data/test/helper.rb +2 -0
- data/test/itest_client.rb +39 -8
- data/test/itest_definition.rb +24 -75
- data/test/itest_entry.rb +37 -0
- data/test/itest_explanation.rb +41 -20
- data/test/itest_ipa.rb +105 -0
- data/test/itest_pronunciation.rb +74 -0
- data/test/itest_word.rb +49 -0
- data/test/test_array_ext.rb +23 -0
- data/test/test_client.rb +35 -42
- data/test/test_common.rb +22 -78
- data/test/test_explanation.rb +21 -25
- data/test/test_http_client.rb +27 -13
- data/test/test_string_ext.rb +95 -0
- metadata +42 -7
- data/test/test_definition.rb +0 -345
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 65f86517bb1f1674118ec92b379e0436fe9fdbec
|
4
|
+
data.tar.gz: 05b6e68ba21c6d9dbee96863ed2146ceea9d12e1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f93f6da6b914e84efc540f1fc753b75034402f218048cbc825e478dc6310a609d4282ffb4edb31e34bd572a43093ce66c25c82b37436a9a1b66d0970724e4acc
|
7
|
+
data.tar.gz: bb5cc16f332a3c6c28b66f31c4e08b59e11d2aea7086c4ee87b65a58997116b9f8bd635f504c964c5e950b957b8ade3e985f7380f63de8124e5470e59d233a94
|
data/README.md
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
# A ruby gem - camdict
|
2
|
+
![Build Status][travis-image][travis-link]
|
3
|
+
![Code Climate][climate-image][climate-link]
|
2
4
|
|
3
|
-
## Introduction
|
5
|
+
## Introduction
|
4
6
|
|
5
|
-
The ruby gem camdict is a [Cambridge online dictionary][1] client.
|
7
|
+
The ruby gem camdict is a [Cambridge online dictionary][1] client.
|
6
8
|
You could use this excellent dictionary with a browser, but now it is possible
|
7
9
|
to use it with this ruby API in your code.
|
8
10
|
|
@@ -10,52 +12,45 @@ to use it with this ruby API in your code.
|
|
10
12
|
`gem install camdict`
|
11
13
|
|
12
14
|
## Verification
|
13
|
-
The gem can be tested by below commands in the directory where it's installed.
|
14
|
-
`rake` - run all the testcases which don't need internet connection.
|
15
|
-
`rake itest` - run all the testcases that need internet connection.
|
15
|
+
The gem can be tested by below commands in the directory where it's installed.
|
16
|
+
`rake` - run all the testcases which don't need internet connection.
|
17
|
+
`rake itest` - run all the testcases that need internet connection.
|
16
18
|
`rake testall` - run all above tests.
|
17
19
|
|
18
|
-
One test may fail if the gem nokogiri hasn't pulled in the fix [here][2]. But
|
19
|
-
it is safe to apply the patch to your nokogiri copy.
|
20
|
-
|
21
20
|
## Usage
|
22
21
|
|
23
22
|
```ruby
|
24
23
|
require 'camdict'
|
25
24
|
|
26
25
|
# Look up a new word
|
27
|
-
word = Camdict::Word.new
|
28
|
-
|
29
|
-
# get all definitions for this word from remote dictionary and select the
|
30
|
-
# first one. A word usually has many definitions.
|
31
|
-
health = word.definitions.first
|
26
|
+
word = Camdict::Word.new 'health'
|
32
27
|
|
33
28
|
# Print the part of speech
|
34
29
|
puts health.part_of_speech #=> noun
|
35
30
|
|
36
|
-
#
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
#
|
44
|
-
|
45
|
-
|
46
|
-
explanation1.examples.each { |e|
|
47
|
-
puts e.sentence #=>
|
48
|
-
# to be in good/poor health
|
49
|
-
# Regular exercise is good for your health.
|
50
|
-
# I had to give up drinking for health reasons.
|
51
|
-
# He gave up work because of ill health.
|
52
|
-
}
|
31
|
+
# What's the first meaning
|
32
|
+
puts health.meaning #=>
|
33
|
+
# the condition of the body and the degree to which it is free from
|
34
|
+
# illness, or the state of being well:
|
35
|
+
|
36
|
+
# all meanings
|
37
|
+
puts health.meanings #=> in addition to above meaning, it prints
|
38
|
+
# the condition of something that changes or develops, such as an
|
39
|
+
# organization or system:
|
40
|
+
|
53
41
|
```
|
54
42
|
|
55
|
-
|
43
|
+
Need more? try `health.print` to show more data in a friendly format.
|
44
|
+
|
45
|
+
## Versioning
|
46
|
+
The release of this gem follows the [semantic versioning rules][2].
|
56
47
|
|
57
48
|
## Licence MIT
|
58
|
-
Copyright (c) 2014 Pan Gaoyong
|
49
|
+
Copyright (c) 2014-2017 Pan Gaoyong
|
59
50
|
|
60
51
|
[1]: http://dictionary.cambridge.com "Cambridge"
|
61
|
-
[2]:
|
52
|
+
[2]: http://semver.org
|
53
|
+
[travis-image]: https://travis-ci.org/pan/camdict.svg?branch=master
|
54
|
+
[travis-link]: https://travis-ci.org/pan/camdict
|
55
|
+
[climate-image]: https://codeclimate.com/github/pan/camdict/badges/gpa.svg
|
56
|
+
[climate-link]: https://codeclimate.com/github/pan/camdict
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'camdict/string_ext'
|
3
|
+
|
4
|
+
module Camdict
|
5
|
+
# Extention: Refine Array class.
|
6
|
+
module ArrayExt
|
7
|
+
refine Array do
|
8
|
+
# Iterate an array and return two elements +a+ +b+ each time for handling.
|
9
|
+
def each_pair
|
10
|
+
len = length
|
11
|
+
i = 0
|
12
|
+
while i < len
|
13
|
+
a = at(i)
|
14
|
+
b = at(i + 1)
|
15
|
+
yield(a, b)
|
16
|
+
i += 2
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Test if a phrase array includes a +word+.
|
21
|
+
# ['blow your nose', 'blow a kiss to/at sb'].has?("a kiss at") #=> true
|
22
|
+
def has?(word)
|
23
|
+
expand.each { |phr| return true if phr.include? word }
|
24
|
+
false
|
25
|
+
end
|
26
|
+
|
27
|
+
using Camdict::StringExt
|
28
|
+
|
29
|
+
# Expand a phrase array into a flattened one. Example,
|
30
|
+
# ['blow your nose', 'blow a kiss to/at sb'] #=>
|
31
|
+
# ['blow your nose', 'blow a kiss to sb', 'blow a kiss at sb']
|
32
|
+
def expand
|
33
|
+
map { |p| p&.flatten || p }.flatten
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/camdict/client.rb
CHANGED
@@ -1,117 +1,171 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'camdict/http_client'
|
3
|
+
require 'camdict/string_ext'
|
4
|
+
require 'camdict/exception'
|
2
5
|
|
3
6
|
module Camdict
|
4
|
-
|
5
|
-
#
|
6
|
-
# remote Cambridge dictionaries, but not includes the extended data.
|
7
|
+
# The client downloads all the useful data about a word or phrase from
|
8
|
+
# remote Cambridge dictionaries, but not includes the extended data.
|
7
9
|
# For example,
|
8
|
-
# when the word "mind" is searched,
|
9
|
-
#
|
10
|
+
# when the word "mind" is searched, the exactly matched entry is downloaded.
|
11
|
+
# However, other related entries like "turn of mind" & "open mind"
|
10
12
|
# are not included.
|
11
|
-
class Client
|
12
|
-
|
13
|
-
# Default dictionary is english
|
14
|
-
# Other possible +dict+ values:
|
15
|
-
#
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
# Get a word's html definition
|
22
|
-
# The returned result could be an empty
|
23
|
-
#
|
24
|
-
# [{ word => html definition }],
|
25
|
-
# or many hash elements when it has multiple entries,
|
26
|
-
# [{ entry_id => html definition }, ...].
|
27
|
-
# Normally, when a +word+ has more than one meanings, its entry ID format is
|
28
|
-
# like word_nn. Otherwise it's just the word itself.
|
13
|
+
class Client < HTTP::Client
|
14
|
+
attr_reader :dictionary
|
15
|
+
# Default dictionary is British english.
|
16
|
+
# Other possible +dict+ values:
|
17
|
+
# english-chinese-simplified, learner-english,
|
18
|
+
# essential-british-english, essential-american-english, etc.
|
19
|
+
def initialize(dict = nil)
|
20
|
+
@dictionary = dict || 'english'
|
21
|
+
end
|
22
|
+
|
23
|
+
# Get a word's html definition from the web dictionary.
|
24
|
+
# The returned result could be an empty string when nothing is found, or
|
25
|
+
# its html definition
|
29
26
|
def html_definition(word)
|
30
27
|
html = fetch(word)
|
31
|
-
|
32
|
-
|
33
|
-
# some words return their only definition directly, such as aluminium.
|
34
|
-
if definition_page? html
|
35
|
-
# entry id is just the word when there is only one definition
|
36
|
-
html_defs << { word => di_head(html) + di_body(html) }
|
28
|
+
if html
|
29
|
+
di_extracted(html)
|
37
30
|
else
|
38
|
-
|
39
|
-
# not found, or the found page with all matched entries and related.
|
40
|
-
# when entry urls are not found, they are empty and spelling suggestion
|
41
|
-
# pages. So mentry_links() returns an empty array. Otherwise, it returns
|
42
|
-
# all the exactly matched entry links.
|
43
|
-
matched_urls = mentry_links(word, html)
|
44
|
-
unless matched_urls.empty?
|
45
|
-
matched_urls.each { |url|
|
46
|
-
html_defs << { entry_id(url) => get_htmldef(url) }
|
47
|
-
}
|
48
|
-
end
|
31
|
+
search(word)
|
49
32
|
end
|
50
|
-
html_defs
|
51
33
|
end
|
52
34
|
|
53
|
-
# Get a word html page
|
35
|
+
# Get a word html definition page by its entry +url+.
|
54
36
|
def get_htmldef(url)
|
55
|
-
|
56
|
-
|
37
|
+
di_extracted get_html(url)
|
38
|
+
end
|
39
|
+
|
40
|
+
# search a word with this URL
|
41
|
+
def search_url(word)
|
42
|
+
"#{host}/search/#{@dictionary}/?q=#{word}"
|
43
|
+
end
|
44
|
+
|
45
|
+
def word_url(word)
|
46
|
+
"#{host}/dictionary/#{@dictionary}/#{encode(word).downcase}"
|
57
47
|
end
|
58
48
|
|
59
49
|
private
|
60
50
|
|
51
|
+
def search(word)
|
52
|
+
html = try_search(word)
|
53
|
+
return '' unless html
|
54
|
+
# some words return their only definition directly, such as plagiarism.
|
55
|
+
if single_def?(html)
|
56
|
+
di_extracted(html)
|
57
|
+
else
|
58
|
+
multiple_entries(word, html).join
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def host
|
63
|
+
'http://dictionary.cambridge.org'
|
64
|
+
end
|
65
|
+
|
66
|
+
# returned page could be a spelling check suggestion page in case it is
|
67
|
+
# not found, or the found page with all matched entries and related.
|
68
|
+
# when entry urls are not found, they are empty and spelling suggestion
|
69
|
+
# pages. It returns all the exactly matched entry links otherwise, raise
|
70
|
+
# exception WordNotFound.
|
71
|
+
def multiple_entries(word, html)
|
72
|
+
html_defs = []
|
73
|
+
mentry_links(word, html).each do |url|
|
74
|
+
html_content = get_htmldef(url)
|
75
|
+
html_defs << html_content if html_content
|
76
|
+
end
|
77
|
+
raise WordNotFound, "#{word} not found" if html_defs.empty?
|
78
|
+
html_defs
|
79
|
+
end
|
80
|
+
|
81
|
+
def try_search(word)
|
82
|
+
get_html(search_url(word))
|
83
|
+
rescue OpenURI::HTTPError => e
|
84
|
+
# When a word does not match any definitions, it returns 404 not found.
|
85
|
+
return if e.message[0..2] == '404'
|
86
|
+
end
|
87
|
+
|
61
88
|
# Fetch word searching result page.
|
62
89
|
# Returned result is either just a single definition page if there is only
|
63
|
-
# one entry, or a result page listing all possible entries, or spelling
|
90
|
+
# one entry, or a result page listing all possible entries, or spelling
|
64
91
|
# check result. All results are objects of Nokogiri::HTML.
|
65
92
|
def fetch(w)
|
66
|
-
|
67
|
-
|
68
|
-
url = search_url + w
|
69
|
-
begin
|
70
|
-
Camdict::HTTP::Client.get_html(url)
|
71
|
-
rescue OpenURI::HTTPError => e
|
72
|
-
# "404" == e.message[0..2], When a word is not found, it returns 404
|
73
|
-
# Not Found and spelling suggestions page.
|
74
|
-
end
|
93
|
+
ret = get_html(word_url(w))
|
94
|
+
ret if definition_page? ret
|
75
95
|
end
|
76
96
|
|
77
|
-
# To determine whether or not the input object of Nokogiri::HTML is a page
|
97
|
+
# To determine whether or not the input object of Nokogiri::HTML is a page
|
78
98
|
# of a word definition. Return true if it has a source structure like this,
|
79
99
|
# <div class="di-head">
|
80
100
|
# <div class="di-title">
|
81
101
|
# <h1 class="hw">
|
82
102
|
# This works for the translation page too, like English-Spanish.
|
83
103
|
def single_def?(html)
|
84
|
-
node = html.css(
|
85
|
-
!
|
104
|
+
node = html.css('.di-head .di-title .hw')
|
105
|
+
!node.empty?
|
86
106
|
end
|
87
107
|
|
88
108
|
# Find out matched entry links from search result page
|
89
|
-
# <ul class="
|
90
|
-
# <li><a href="
|
91
|
-
#
|
92
|
-
# The
|
93
|
-
# The extended links are filtered out and the matched word or phrase's
|
109
|
+
# <ul class="prefix-block">
|
110
|
+
# <li><a href="entry_link">
|
111
|
+
# The search result html page should include above piece of code.
|
112
|
+
# The extended links are filtered out and the matched word or phrase's
|
94
113
|
# links are kept. An array of them are returned.
|
95
|
-
# For example, when the searched word is "related", entry links are like,
|
96
|
-
# http://dictionary.cambridge.org/dictionary/
|
97
|
-
# http://dictionary.cambridge.org/dictionary/
|
98
|
-
# http://dictionary.cambridge.org/dictionary/british/stress-related
|
114
|
+
# For example, when the searched word is "related", entry links are like,
|
115
|
+
# http://dictionary.cambridge.org/dictionary/english/related
|
116
|
+
# http://dictionary.cambridge.org/dictionary/english/relate
|
99
117
|
# ...
|
100
|
-
# Returned result should only contain the first
|
118
|
+
# Returned result should only contain the first one.
|
101
119
|
# Input html is an object of Nokogiri::HTML.
|
102
120
|
def mentry_links(word, html)
|
103
121
|
# suppose the word is not found in the dictionary, so it is empty.
|
104
122
|
links = []
|
105
|
-
nodes = html.css(
|
106
|
-
|
107
|
-
unless nodes.empty?
|
108
|
-
nodes.each { |a|
|
109
|
-
links << a['href'] if matched_word?(word, a)
|
110
|
-
}
|
111
|
-
end
|
123
|
+
nodes = html.css('.prefix-block a')
|
124
|
+
nodes.each { |a| links << a['href'] if matched_word?(word, a) }
|
112
125
|
links
|
113
126
|
end
|
114
127
|
|
128
|
+
# Extract definition head and body from Nokogiri::HTML, discard share links
|
129
|
+
def di_extracted(html)
|
130
|
+
body = di_body(html)
|
131
|
+
# searching aluminium returns an American or British english page
|
132
|
+
# saparately, below condition filter out American english result
|
133
|
+
return if body.empty?
|
134
|
+
body.css('.share').each { |s| body.delete s }
|
135
|
+
body
|
136
|
+
end
|
137
|
+
|
138
|
+
# Return definition body in html source
|
139
|
+
def di_body(html)
|
140
|
+
html.css("#{tab_css} .di-body")
|
141
|
+
end
|
142
|
+
|
143
|
+
# the css selecting a tab
|
144
|
+
def tab_css
|
145
|
+
"[#{tab}]"
|
146
|
+
end
|
147
|
+
|
148
|
+
# the tab attributes according to dictionary name
|
149
|
+
def tab
|
150
|
+
case @dictionary
|
151
|
+
when 'english'
|
152
|
+
'data-tab="ds-british"'
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# get the last part of http://dictionary.cambridge.org/british/related_1
|
157
|
+
def entry_id(url)
|
158
|
+
url.split('/').last
|
159
|
+
end
|
160
|
+
|
161
|
+
# phrase with space and single quote has to be replaced with dash
|
162
|
+
def encode(word)
|
163
|
+
word.gsub(/[ ']/, '-')
|
164
|
+
end
|
165
|
+
|
166
|
+
alias definition_page? single_def?
|
167
|
+
|
168
|
+
using Camdict::StringExt
|
115
169
|
# Return true if the searched word matches the one on result page.
|
116
170
|
# Node is an object of Nokogiri::Node
|
117
171
|
# <li>
|
@@ -119,36 +173,18 @@ module Camdict
|
|
119
173
|
# <b class="phrase">out of mind, or
|
120
174
|
# <b class="hw">turn of mind, or
|
121
175
|
# <b class="w">mind-numbingly
|
122
|
-
# Match criterion: the queried word should equal to the result word;
|
176
|
+
# Match criterion: the queried word should equal to the result word;
|
123
177
|
# the result phrase should be flattened, which should equal to the
|
124
178
|
# queried phrase.
|
125
179
|
def matched_word?(word, node)
|
126
|
-
li = node.css(
|
180
|
+
li = node.css('.base')
|
181
|
+
return false if li.empty?
|
127
182
|
resword = li.size == 1 ? li.text : li[0].text
|
128
|
-
if resword.include?
|
183
|
+
if resword.include?('/') || resword.include?(';')
|
129
184
|
resword.flatten.include?(word)
|
130
185
|
else
|
131
186
|
word == resword
|
132
187
|
end
|
133
188
|
end
|
134
|
-
|
135
|
-
# Return definition head in html source
|
136
|
-
def di_head(html)
|
137
|
-
html.css(".cdo-section-title-hw").to_html(:save_with=>0) +
|
138
|
-
html.css(".di-info").to_html(:save_with=>0)
|
139
|
-
end
|
140
|
-
|
141
|
-
# Return definition body in html source
|
142
|
-
def di_body(html)
|
143
|
-
html.css(".di-body").to_html(:save_with=>0)
|
144
|
-
end
|
145
|
-
|
146
|
-
# get the last part of http://dictionary.cambridge.org/british/related_1
|
147
|
-
def entry_id(url)
|
148
|
-
url.split('/').last
|
149
|
-
end
|
150
|
-
|
151
|
-
alias :definition_page? :single_def?
|
152
|
-
|
153
189
|
end
|
154
190
|
end
|
data/lib/camdict/common.rb
CHANGED
@@ -1,161 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'camdict/string_ext'
|
3
|
+
|
1
4
|
module Camdict
|
5
|
+
# some common private methods used to extract Nokogiri nodes
|
2
6
|
module Common
|
3
|
-
|
4
|
-
# Extend String class.
|
5
|
-
String.class_eval do
|
6
|
-
# 'blow a kiss to/at sb'.flatten =>
|
7
|
-
# %q(blow a kiss to sb, blow a kiss at sb)
|
8
|
-
# if it doesn't include a slash, returns stripped string
|
9
|
-
def flatten
|
10
|
-
str = self.strip
|
11
|
-
# remove the space surrounding '/'
|
12
|
-
str = str.gsub /\s*\/\s*/, '/'
|
13
|
-
return str unless str.include? '/'
|
14
|
-
len = str.length
|
15
|
-
ret = []
|
16
|
-
# when two strings are passed in separated with ';', then separate them
|
17
|
-
if pos = str.index(';')
|
18
|
-
ret += str[0..pos-1].flatten
|
19
|
-
ret += str[pos+1..len-1].flatten
|
20
|
-
return ret
|
21
|
-
end
|
22
|
-
# when a string has round brackets meaning optional part
|
23
|
-
if str.include? '('
|
24
|
-
head, bracket, tail = str.partition(/\(.*\)/)
|
25
|
-
unless bracket.empty?
|
26
|
-
ret << (head.strip + tail).flatten
|
27
|
-
result = bracket.delete("()").flatten
|
28
|
-
result = [result] if result.is_a? String
|
29
|
-
result.each { |s|
|
30
|
-
ret << (head + s + tail).flatten
|
31
|
-
}
|
32
|
-
end
|
33
|
-
return ret.flatten
|
34
|
-
end
|
35
|
-
j=0 # count of the alternative words, 'to/at' has two.
|
36
|
-
b=[] # b[]/e[] index of the beginning/end of alternative words
|
37
|
-
e=[]
|
38
|
-
# set this flag when next word is expected an alternate word after slash
|
39
|
-
include_next = false
|
40
|
-
for i in 0..len-1
|
41
|
-
c = str[i]
|
42
|
-
case c
|
43
|
-
# valid char in a word
|
44
|
-
when /[[:alnum:]\-']/
|
45
|
-
if b[j].nil?
|
46
|
-
b[j] = i
|
47
|
-
e[j] = i
|
48
|
-
else
|
49
|
-
e[j] = i
|
50
|
-
end
|
51
|
-
# char means a word has ended
|
52
|
-
when " ", "!", "?", ",", "."
|
53
|
-
if include_next
|
54
|
-
break
|
55
|
-
else
|
56
|
-
b[j] = nil
|
57
|
-
e[j] = nil
|
58
|
-
end
|
59
|
-
# 'or' separator
|
60
|
-
when "/"
|
61
|
-
j += 1
|
62
|
-
include_next = true
|
63
|
-
else
|
64
|
-
raise NotImplementedError, "char '#{c}' found in '#{self}'."
|
65
|
-
end
|
66
|
-
end
|
67
|
-
if j > 0
|
68
|
-
for i in (0..j)
|
69
|
-
# alternative word is not the last word and not at the beginning
|
70
|
-
if (e[j]+1 < len) && (b[0] > 0)
|
71
|
-
ret << str[0..b[0]-1] + str[b[i]..e[i]] + str[e[j]+1..len-1]
|
72
|
-
elsif (e[j]+1 == len) && (b[0] > 0)
|
73
|
-
ret << str[0..b[0]-1] + str[b[i]..e[i]]
|
74
|
-
elsif (e[j]+1 < len) && (b[0] == 0)
|
75
|
-
ret << str[b[i]..e[i]] + str[e[j]+1..len-1]
|
76
|
-
else
|
77
|
-
ret << str[b[i]..e[i]]
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
ret
|
82
|
-
end
|
83
|
-
|
84
|
-
# Test whether a String includes the +word+. It's useful while testing
|
85
|
-
# a variable which might be an array of phrase or just a single phrase.
|
86
|
-
def has?(word)
|
87
|
-
self.include? word
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
# Extend Array class.
|
92
|
-
Array.class_eval do
|
93
|
-
# Expand a phrase array into a flattened one. Example,
|
94
|
-
# ['blow your nose', 'blow a kiss to/at sb'] #=>
|
95
|
-
# ['blow your nose', 'blow a kiss to sb', 'blow a kiss at sb']
|
96
|
-
def expand
|
97
|
-
ret = self.map { |p|
|
98
|
-
p.flatten if p.is_a? String
|
99
|
-
}
|
100
|
-
ret.flatten
|
101
|
-
end
|
102
|
-
|
103
|
-
|
104
|
-
# Test if a phrase array includes a +word+.
|
105
|
-
# ['blow your nose', 'blow a kiss to/at sb'].has?("a kiss at") #=>true
|
106
|
-
def has?(word)
|
107
|
-
self.expand.each { |phr|
|
108
|
-
return true if phr.include? word
|
109
|
-
}
|
110
|
-
false
|
111
|
-
end
|
112
|
-
|
113
|
-
# Iterate an array and return two elements +a+ +b+ each time for handling.
|
114
|
-
def each_pair
|
115
|
-
len = self.length
|
116
|
-
i = 0
|
117
|
-
while (i < len)
|
118
|
-
a = self.at(i)
|
119
|
-
b = self.at(i+1)
|
120
|
-
yield(a, b)
|
121
|
-
i += 2
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
7
|
private
|
8
|
+
|
127
9
|
# Get the text selected by the css +selector+.
|
128
|
-
def css_text(selector)
|
129
|
-
node =
|
10
|
+
def css_text(html, selector)
|
11
|
+
node = html.css(selector)
|
130
12
|
node.text unless node.empty?
|
131
13
|
end
|
132
14
|
|
133
|
-
# Get sth by the css +selector+ for the derived word inside its runon node
|
134
|
-
def derived_css(selector)
|
135
|
-
runon =
|
136
|
-
runon.each
|
15
|
+
# Get sth by the css +selector+ for the derived word inside its runon node
|
16
|
+
def derived_css(html, selector)
|
17
|
+
runon = html.css('.runon')
|
18
|
+
runon.each do |r|
|
137
19
|
n = r.css('[title="Derived word"]')
|
138
|
-
if n.text == @word
|
20
|
+
if n.text == @word
|
139
21
|
node = r.css(selector)
|
140
22
|
yield(node)
|
141
23
|
end
|
142
|
-
|
24
|
+
end
|
143
25
|
end
|
144
26
|
|
27
|
+
using Camdict::StringExt
|
28
|
+
|
145
29
|
# Get sth by the css +selector+ for the phrase inside the node phrase-block
|
146
|
-
def phrase_css(selector)
|
147
|
-
phbs =
|
148
|
-
phbs.each
|
30
|
+
def phrase_css(html, selector)
|
31
|
+
phbs = html.css('.phrase-block')
|
32
|
+
phbs.each do |phb|
|
149
33
|
nodes = phb.css('.phrase, .v[title="Variant form"]')
|
150
|
-
nodes.each
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
}
|
34
|
+
nodes.each do |n|
|
35
|
+
next unless n.text.flatten.has? @word
|
36
|
+
node = phb.css(selector)
|
37
|
+
yield(node)
|
38
|
+
break
|
39
|
+
end
|
40
|
+
end
|
158
41
|
end
|
159
|
-
|
160
42
|
end
|
161
43
|
end
|