cantonese 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cd26a3b32ad12b765087bfacfc12d15872fb8449
|
4
|
+
data.tar.gz: 237081e5067765b2687f85814cde13f52274c5bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04730e07d52a91228cd157eaaf08ccc012fcc2ae453dc37bde26ffa7d5e413ebcef3b1d3c0fdf9491dd14bf47b34101a65a5c464d68ab785708ce5c08a0c7449
|
7
|
+
data.tar.gz: 7cca9753f3bd2fcbd9e954252c33404c8030dacb48f8fc14753a5d513ea20c13e08b13ec070445663001953302ffda7933039bfde62ba584705dff0e856d880d
|
data/cantonese.gemspec
CHANGED
@@ -1,16 +1,12 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'open-uri'
|
3
3
|
require 'cgi'
|
4
|
+
require 'tidy_ffi'
|
4
5
|
|
5
6
|
module Cantonese
|
6
7
|
module Scraper
|
7
8
|
class WordScraper
|
8
9
|
def crawl(word)
|
9
|
-
html = fetch(word)
|
10
|
-
process(html)
|
11
|
-
end
|
12
|
-
|
13
|
-
def fetch(word)
|
14
10
|
# convert word parameter into big5
|
15
11
|
word_big5 = word.encode('Big5', 'UTF-8', :invalid => :replace, :undef => :replace, :replace => '')
|
16
12
|
url = "http://humanum.arts.cuhk.edu.hk/Lexis/lexi-can/search.php?q=" + CGI.escape(word_big5)
|
@@ -18,11 +14,9 @@ module Cantonese
|
|
18
14
|
# fetch and get the page in UTF8
|
19
15
|
html = open(url).read
|
20
16
|
html = html.encode('UTF-8', 'Big5', :invalid => :replace, :undef => :replace, :replace => '?')
|
21
|
-
|
17
|
+
html = TidyFFI::Tidy.clean(html.gsub(/\0/, ''))
|
22
18
|
|
23
|
-
|
24
|
-
doc = Nokogiri::HTML(html, nil, 'UTF-8')
|
25
|
-
|
19
|
+
doc = Nokogiri::HTML(html, nil, 'UTF-8')
|
26
20
|
word = doc.search(".w").first.text
|
27
21
|
|
28
22
|
radical_id = doc.search("//*[@class = 't' and .='部首:']/following-sibling::td[1]").text.strip.tr('[] ', '').to_i rescue nil
|
@@ -36,9 +30,9 @@ module Cantonese
|
|
36
30
|
|
37
31
|
syllable = doc.search('//form/table[1]/tr[position()>1]').collect do |row|
|
38
32
|
sound = row.search("./td[1]")
|
39
|
-
initial = sound.xpath("./*[@color='red']").text rescue ""
|
40
|
-
final = sound.xpath("./*[@color='green']").text rescue ""
|
41
|
-
tone = sound.xpath("./*[@color='blue']").text rescue ""
|
33
|
+
initial = sound.xpath("./*[@color='red']").text.strip rescue ""
|
34
|
+
final = sound.xpath("./*[@color='green']").text.strip rescue ""
|
35
|
+
tone = sound.xpath("./*[@color='blue']").text.strip rescue ""
|
42
36
|
sound_text = sound.text
|
43
37
|
pronunciation = "http://humanum.arts.cuhk.edu.hk/Lexis/lexi-can/sound/#{sound_text}.wav"
|
44
38
|
|
@@ -52,8 +46,17 @@ module Cantonese
|
|
52
46
|
note_text = nil
|
53
47
|
end
|
54
48
|
|
49
|
+
full = "#{initial}#{final}#{tone}"
|
50
|
+
|
51
|
+
# patch to fix error on database
|
52
|
+
if full == "6bwik1"
|
53
|
+
full = "kwik1"
|
54
|
+
initial = "k"
|
55
|
+
pronunciation = "http://humanum.arts.cuhk.edu.hk/Lexis/lexi-can/sound/#{full}.wav"
|
56
|
+
end
|
57
|
+
|
55
58
|
{
|
56
|
-
:full =>
|
59
|
+
:full => full,
|
57
60
|
:initial => initial,
|
58
61
|
:final => final,
|
59
62
|
:tone => tone,
|
@@ -62,8 +65,8 @@ module Cantonese
|
|
62
65
|
:note => note_text
|
63
66
|
}
|
64
67
|
end
|
65
|
-
|
66
68
|
{
|
69
|
+
:url => url,
|
67
70
|
:text => word,
|
68
71
|
:radical_id => radical_id,
|
69
72
|
:stroke => stroke,
|
data/lib/cantonese/version.rb
CHANGED
@@ -0,0 +1,325 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: get
|
5
|
+
uri: http://humanum.arts.cuhk.edu.hk/Lexis/lexi-can/search.php?q=%F3p
|
6
|
+
body:
|
7
|
+
encoding: US-ASCII
|
8
|
+
string: ''
|
9
|
+
headers:
|
10
|
+
Accept-Encoding:
|
11
|
+
- gzip;q=1.0,deflate;q=0.6,identity;q=0.3
|
12
|
+
Accept:
|
13
|
+
- "*/*"
|
14
|
+
User-Agent:
|
15
|
+
- Ruby
|
16
|
+
response:
|
17
|
+
status:
|
18
|
+
code: 200
|
19
|
+
message: OK
|
20
|
+
headers:
|
21
|
+
Date:
|
22
|
+
- Mon, 31 Mar 2014 10:23:38 GMT
|
23
|
+
Server:
|
24
|
+
- Apache/2.2.15 (CentOS)
|
25
|
+
X-Powered-By:
|
26
|
+
- PHP/5.3.3
|
27
|
+
Content-Length:
|
28
|
+
- '5636'
|
29
|
+
Connection:
|
30
|
+
- close
|
31
|
+
Content-Type:
|
32
|
+
- text/html
|
33
|
+
body:
|
34
|
+
encoding: ASCII-8BIT
|
35
|
+
string: !binary |-
|
36
|
+
PGh0bWw+PGhlYWQ+PHRpdGxlPrhmu3m8Zq21sHS1/KZyrnc8L3RpdGxlPjxz
|
37
|
+
dHlsZSB0eXBlPSJ0ZXh0L2NzcyI+YSB7IHRleHQtZGVjb3JhdGlvbjogbm9u
|
38
|
+
ZX0gLnRleHQgeyBsaW5lLWhlaWdodDogMTUwJSB9PC9zdHlsZT48bWV0YSBo
|
39
|
+
dHRwLWVxdWl2PSJDb250ZW50LVR5cGUiIGNvbnRlbnQ9InRleHQvaHRtbDsg
|
40
|
+
Y2hhcnNldD1iaWc1Ij48c2NyaXB0IGxhbmd1YWdlPSJKYXZhU2NyaXB0Ij4K
|
41
|
+
PCEtLQpmdW5jdGlvbiBNTV9qdW1wTWVudSh0YXJnLHNlbE9iaixyZXN0b3Jl
|
42
|
+
KXsgLy92My4wCiAgZXZhbCh0YXJnKyIubG9jYXRpb249JyIrc2VsT2JqLm9w
|
43
|
+
dGlvbnNbc2VsT2JqLnNlbGVjdGVkSW5kZXhdLnZhbHVlKyInIik7CiAgaWYg
|
44
|
+
KHJlc3RvcmUpIHNlbE9iai5zZWxlY3RlZEluZGV4PTA7Cn0KZnVuY3Rpb24g
|
45
|
+
cmVmICh1cmwpIHsKICByZXdpbj13aW5kb3cub3Blbih1cmwsJ3JlZicsJ3Rv
|
46
|
+
b2Jhcj0wLHN0YXR1cz0wLHNjcm9sbGJhcnM9MSxyZXNpemFibGU9MSx3aWR0
|
47
|
+
aD02MDAsaGVpZ2h0PTMwMCcpOwogIHNldFRpbWVvdXQgKCdyZXdpbi5mb2N1
|
48
|
+
cygpJywgMTAwKTsKfQovLy0tPgo8L3NjcmlwdD4KPHN0eWxlIHR5cGU9InRl
|
49
|
+
eHQvY3NzIj4KLnQgeyBmb250LXNpemU6IDEzOyBub3dyYXA7IHRleHQtYWxp
|
50
|
+
Z246IHJpZ2h0OyBjb2xvcjogbmF2eX0KLnQyIHsgZm9udC1zaXplOiAxMzsg
|
51
|
+
bm93cmFwOyB0ZXh0LWFsaWduOiBsZWZ0fQoudDMgeyBmb250LXNpemU6IDEz
|
52
|
+
OyBub3dyYXA7IHRleHQtYWxpZ246IGNlbnRlcn0KLncgeyBmb250LXNpemU6
|
53
|
+
IDM2OyBmb250LXdlaWdodDogYm9sZDsgY29sb3I6IHJlZDsgdGV4dC1hbGln
|
54
|
+
bjogY2VudGVyIH0KPC9zdHlsZT4KPHNjcmlwdCBsYW5ndWFnZT0iSmF2YVNj
|
55
|
+
cmlwdCI+CmZ1bmN0aW9uIHhpZF9kb3duKFhpZCkgewoJaWYgKGRvY3VtZW50
|
56
|
+
LmFsbFtYaWRdLnN0eWxlLmRpc3BsYXkgPT0gIm5vbmUiKSB7CgkJZG9jdW1l
|
57
|
+
bnQuYWxsW1hpZF0uc3R5bGUuZGlzcGxheSA9ICJibG9jayI7Cgl9IGVsc2Ug
|
58
|
+
ewoJCWRvY3VtZW50LmFsbFtYaWRdLnN0eWxlLmRpc3BsYXkgPSAibm9uZSI7
|
59
|
+
Cgl9Cn0KPC9zY3JpcHQ+PC9oZWFkPjxib2R5IGJhY2tncm91bmQ9Ii9MZXhp
|
60
|
+
cy9sZXhpLWNhbi9pbWcvcHBiazAxNC5qcGciID48dGFibGUgd2lkdGg9IjEw
|
61
|
+
MCUiIGJvcmRlcj0iMCI+CiAgPHRyPiAKICAgIDx0ZCByb3dzcGFuPSIyIiBj
|
62
|
+
bGFzcz13PvNwPC90ZD4KICAgIDx0ZCBjbGFzcz10PrOhrbo6PC90ZD4KCQk8
|
63
|
+
dGQgY2xhc3M9dDI+PGEgaHJlZj0icmFkLXN0ci5waHA/cmFkPTE2NyI+PGlt
|
64
|
+
ZyBzcmM9ImltZy9yYWQvcmFkMTY3LmdpZiIgYm9yZGVyPTAgYWxpZ249YWJz
|
65
|
+
bWlkZGxlPiBbMTY3XTwvYT48L3RkPgogICAgPHRkIGNsYXNzPXQ+tae1ZTo8
|
66
|
+
L3RkPgoJCTx0ZCBjbGFzcz10Mj48YSBocmVmPSJyYWQtc3RyLnBocD9zdHI9
|
67
|
+
MTkiPjE5PC9hPjwvdGQ+CiAgICA8dGQgY2xhc3M9dD6mcq21pMDD/jo8L3Rk
|
68
|
+
PgoJCTx0ZCBjbGFzcz10MyBiZ2NvbG9yPXllbGxvdz48YSBocmVmPSJjbGFz
|
69
|
+
c2lmaWVkLnBocD9zdD0yIj6vfa21pnI8L2E+PC90ZD4KCQk8dGQgYWxpZ249
|
70
|
+
Y2VudGVyPjxhIGhyZWY9IiMiIG9uQ2xpY2s9InJlZignaHR0cDovL3pob25n
|
71
|
+
d2VuLmNvbS9kLzI0My94MTEyLmh0bScpIj48aW1nIHNyYz0iL0ltZy96aG9u
|
72
|
+
Z3B1LmpwZyIgYm9yZGVyPTA+PC9hPiA8IS0tYSBocmVmPSIjIiBvbkNsaWNr
|
73
|
+
PSJyZWYoJ2h0dHA6Ly8xNDAuMTExLjM0LjQ2L2NnaS1iaW4vZGljdC9uZXdz
|
74
|
+
ZWFyY2guY2dpP0RhdGFiYXNlPWRpY3QmUXVlcnlTY29wZT1OYW1lJlF1ZXJ5
|
75
|
+
Q29tbWFuZD1maW5kJkdyYXBoaWNXb3JkPXllcyZRdWVyeVN0cmluZz0lRjNw
|
76
|
+
JykiLS0+CgkJPGEgaHJlZj0iIyIgb25DbGljaz0icmVmKCdodHRwOi8vMTQw
|
77
|
+
LjExMS4zNC40Ni9jZ2ktYmluL25ld0RpY3QvZGljdC5zaD9jb25kPSVGM3Am
|
78
|
+
cGllY2VMZW49NTAmZmxkPTEmY2F0PSZ1a2V5PS02MjQ3MjExODgmc2VyaWFs
|
79
|
+
PTMmcmVjTm89MCZvcD0maW1nRm9udD0xJykiPgoJCTxpbWcgc3JjPSIvSW1n
|
80
|
+
L2d5Y2QyYS5naWYiIGJvcmRlcj0wPjwvYT48L3RkPgogIDwvdHI+CiAgPHRy
|
81
|
+
PiAKICAgIDx0ZCBjbGFzcz10PqRqpK29WDo8L3RkPgoJCTx0ZCBjbGFzcz10
|
82
|
+
Mj5GMzcwPC90ZD4KICAgIDx0ZCBjbGFzcz10Pq3cvmW9WDo8L3RkPgoJCTx0
|
83
|
+
ZCBjbGFzcz10Mj6q96Tgw/ik3zwvdGQ+CiAgICA8dGQgY2xhc3M9dD7AV6fH
|
84
|
+
IC8gwFemuDo8L3RkPgoJCTx0ZCBjbGFzcz10Mj4tIC8gMDwvdGQ+CiAgICA8
|
85
|
+
dGQgYWxpZ249Y2VudGVyPjxhIGhyZWY9IiMiIG9uQ2xpY2s9InJlZignaHR0
|
86
|
+
cDovL2Vwc2lsb24zLmdlb3JnZXRvd24uZWR1L35wZXRlcnNlZS9jZ2ktYmlu
|
87
|
+
L3dvcmRsb29rLmNnaT9zZWFyY2h0eXBlPWJpZzUmd2hlcmU9YW55d2hlcmUm
|
88
|
+
d29yZD0lRjNwJykiPjxpbWcgc3JjPSIvSW1nL2NlZGljdDJfbmV3LmdpZiIg
|
89
|
+
Ym9yZGVyPTA+PC9hPiA8YSBocmVmPSIjIiBvbkNsaWNrPSJyZWYoJy9jZ2kt
|
90
|
+
YmluL2FncmVwLWxpbmRpY3Q/cXVlcnk9JUYzcCZib29sZWFuPW5vJmNhc2U9
|
91
|
+
b24mY2F0ZWdvcnk9d2hvbGVyZWNvcmQnKSI+PGltZyBzcmM9Ii9JbWcvbGlu
|
92
|
+
ZGljdF9sb2dvLmdpZiIgYm9yZGVyPTA+PC9hPjwvdGQ+CiAgPC90cj4KPC90
|
93
|
+
YWJsZT4KPGZvcm0+PHRhYmxlIHdpZHRoPSIxMDAlIiBib3JkZXI9IjEiPgog
|
94
|
+
IDx0ciBiZ2NvbG9yPSNmZmYwYzI+IAogICAgPHRoIG5vd3JhcCB3aWR0aD0x
|
95
|
+
MDA+rbW4YDxicj48Zm9udCBzaXplPS0yPiitu7Tku3mopb7Hvse3fCk8L2Zv
|
96
|
+
bnQ+PC90aD4KICAgIDx0aCBub3dyYXAgd2lkdGg9MzA+uGY8YnI+rbU8L3Ro
|
97
|
+
PgogICAgPHRoIG5vd3JhcCB3aWR0aD03MD48Zm9udCBjb2xvcj0iZ3JheSIg
|
98
|
+
ZmFjZT0iV2luZ2RpbmdzIj4mYW1wOzwvZm9udD4grtq+2jwvdGg+CiAgICA8
|
99
|
+
dGggbm93cmFwIHdpZHRoPTEwMD6mUK21pnI8L3RoPgogICAgPHRoIG5vd3Jh
|
100
|
+
cCB3aWR0aD04MD6s28P2rbW4YDwvdGg+CiAgICA8dGggbm93cmFwPrX8qNIo
|
101
|
+
PGZvbnQgY29sb3I9bWFyb29uIHNpemU9LTE+uNHEwDwvZm9udD4pIC8gPGZv
|
102
|
+
bnQgY29sb3I9Zm9yZXN0Z3JlZW4gc2l6ZT0tMT6zxrX5PC9mb250PjwvdGg+
|
103
|
+
CiAgPC90cj4KICA8dHI+CiAgICA8dGQgbm93cmFwIGFsaWduPWNlbnRlcj48
|
104
|
+
Zm9udCBjb2xvcj1yZWQgc2l6ZT0rMT5vADwvZm9udD48Zm9udCBjb2xvcj1n
|
105
|
+
cmVlbiBzaXplPSsxPnU8L2ZvbnQ+PGZvbnQgY29sb3I9Ymx1ZSBzaXplPSsx
|
106
|
+
PjE8L2ZvbnQ+PC90ZD4KICAgIDx0ZCBhbGlnbj1jZW50ZXI+PGEgaHJlZj0i
|
107
|
+
c291bmQucGhwP3M9b3UxIiB0YXJnZXQ9c291bmQ+PGltZyBzcmM9ImltZy9z
|
108
|
+
b3VuZGVyLmdpZiIgYm9yZGVyPTA+PC9hPjwvdGQ+Cgk8dGQgbm93cmFwPjxm
|
109
|
+
b250IHNpemU9LTE+pEik5blxuuI8L2ZvbnQ+PC90ZD4KICAgIDx0ZCBub3dy
|
110
|
+
YXA+CjxhIGhyZWY9InNlYXJjaC5waHA/cT0lQzMlRUYiPsPvPC9hPiwgPGEg
|
111
|
+
aHJlZj0ic2VhcmNoLnBocD9xPSVGMmoiPvJqPC9hPgk8L3RkPgogICAgPHRk
|
112
|
+
PjxzZWxlY3Qgb25DaGFuZ2U9Ik1NX2p1bXBNZW51KCdzZWxmJyx0aGlzLDAp
|
113
|
+
Ij4KICAgIDxvcHRpb24gc2VsZWN0ZWQgdmFsdWU9IiMiPi0tv+++3C0tPC9v
|
114
|
+
cHRpb24+CiAgICA8b3B0aW9uIHZhbHVlPSJwaG8tcmVsLnBocD9zMT1vACZz
|
115
|
+
Mj11Ij6mUMFuplDD/Twvb3B0aW9uPgogICAgPG9wdGlvbiB2YWx1ZT0icGhv
|
116
|
+
LXJlbC5waHA/czI9dSZzMz0xIj6mUMP9plC91Twvb3B0aW9uPgogICAgPG9w
|
117
|
+
dGlvbiB2YWx1ZT0icGhvLXJlbC5waHA/czE9bwAmczM9MSI+plDBbqZQvdU8
|
118
|
+
L29wdGlvbj4KICA8L3NlbGVjdD48L3RkPgogICAgPHRkPjxkaXYgbm93cmFw
|
119
|
+
PjwvZGl2Pjxmb250IGNvbG9yPWZvcmVzdGdyZWVuIHNpemU9LTE+plChdTxh
|
120
|
+
IGhyZWY9InNlYXJjaC5waHA/cT0lQzMlRUYiPsPvPC9hPqF2pnI8L2ZvbnQ+
|
121
|
+
PC90ZD4KICA8L3RyPgogIDx0cj4KICAgIDx0ZCBub3dyYXAgYWxpZ249Y2Vu
|
122
|
+
dGVyPjxmb250IGNvbG9yPXJlZCBzaXplPSsxPmw8L2ZvbnQ+PGZvbnQgY29s
|
123
|
+
b3I9Z3JlZW4gc2l6ZT0rMT51azwvZm9udD48Zm9udCBjb2xvcj1ibHVlIHNp
|
124
|
+
emU9KzE+NjwvZm9udD48L3RkPgogICAgPHRkIGFsaWduPWNlbnRlcj48YSBo
|
125
|
+
cmVmPSJzb3VuZC5waHA/cz1sdWs2IiB0YXJnZXQ9c291bmQ+PGltZyBzcmM9
|
126
|
+
ImltZy9zb3VuZGVyLmdpZiIgYm9yZGVyPTA+PC9hPjwvdGQ+Cgk8dGQgbm93
|
127
|
+
cmFwPjxmb250IHNpemU9LTE+pEik5blxuuI8L2ZvbnQ+PC90ZD4KICAgIDx0
|
128
|
+
ZCBub3dyYXA+CjxhIGhyZWY9InNlYXJjaC5waHA/cT0lRUUlNUMiPu5cPC9h
|
129
|
+
PiwgPGEgaHJlZj0ic2VhcmNoLnBocD9xPSVERSVENyI+3tc8L2E+LCA8YSBo
|
130
|
+
cmVmPSJzZWFyY2gucGhwP3E9JURFJUY3Ij7e9zwvYT4gPGEgaHJlZj0icGhv
|
131
|
+
LXJlbC5waHA/czE9bCZzMj11ayZzMz02Ij48Zm9udCBzaXplPS0xPls0Ni4u
|
132
|
+
XTwvZm9udD48L2E+CTwvdGQ+CiAgICA8dGQ+PHNlbGVjdCBvbkNoYW5nZT0i
|
133
|
+
TU1fanVtcE1lbnUoJ3NlbGYnLHRoaXMsMCkiPgogICAgPG9wdGlvbiBzZWxl
|
134
|
+
Y3RlZCB2YWx1ZT0iIyI+LS2/777cLS08L29wdGlvbj4KICAgIDxvcHRpb24g
|
135
|
+
dmFsdWU9InBoby1yZWwucGhwP3MxPWwmczI9dWsiPqZQwW6mUMP9PC9vcHRp
|
136
|
+
b24+CiAgICA8b3B0aW9uIHZhbHVlPSJwaG8tcmVsLnBocD9zMj11ayZzMz02
|
137
|
+
Ij6mUMP9plC91Twvb3B0aW9uPgogICAgPG9wdGlvbiB2YWx1ZT0icGhvLXJl
|
138
|
+
bC5waHA/czE9bCZzMz02Ij6mUMFuplC91Twvb3B0aW9uPgogIDwvc2VsZWN0
|
139
|
+
PjwvdGQ+CiAgICA8dGQ+PGRpdiBub3dyYXA+uWTzcDwvZGl2PjwvdGQ+CiAg
|
140
|
+
PC90cj4KPC90YWJsZT48dGFibGUgd2lkdGg9MTAwJSBib3JkZXI9MCBjZWxs
|
141
|
+
c3BhY2luZz0wIGNlbGxwYWRkaW5nPTA+PHRyPjx0ZD48Zm9udCBzaXplPS0x
|
142
|
+
IGNvbG9yPWdyYXk+t2qvwaa4vMY6IDQ2MzU8L2ZvbnQ+PC90ZD48dGQgYWxp
|
143
|
+
Z249cmlnaHQ+PGZvbnQgc2l6ZT0tMT4oPGEgaHJlZj0iYWRtaW4vZWRpdC5w
|
144
|
+
aHA/bmV3PUVkaXQmcT0lRjNwIj663rJ6pEit+7FNpc6wzzwvYT4pPC9mb250
|
145
|
+
PjwvdGQ+PC90cj48L3RhYmxlPrB0t2bCSTo8YnI+PC9mb3JtPjxocj48Zm9u
|
146
|
+
dCBjb2xvcj1ncmF5PlVuaWNvZGU6IDwvZm9udD48YSBocmVmPSJodHRwOi8v
|
147
|
+
d3d3LnVuaWNvZGUub3JnL2NnaS1iaW4vR2V0VW5paGFuRGF0YS5wbD9jb2Rl
|
148
|
+
cG9pbnQ9OTNENSIgdGFyZ2V0PV9ibGFuaz48aW1nIHNyYz0iL0ltZy91bmlj
|
149
|
+
b2RlMi5naWYiIGJvcmRlcj0wIGFsaWduPWFic21pZGRsZT48L2E+IDxmb250
|
150
|
+
IHNpemU9LTEgY29sb3I9Z3JheT5VKzkzRDU8L2ZvbnQ+PHRhYmxlIGJvcmRl
|
151
|
+
cj0wIGNlbGxzcGFjaW5nPTUgY2VsbHBhZGRpbmc9NT48dHI+PHRkIGNsYXNz
|
152
|
+
PXQ+un67eaRqpnKo5To8L3RkPjx0ZCB3aWR0aD0xMDA+PGZvbnQgc2l6ZT0t
|
153
|
+
MT5QZy40MjUwPC9mb250PjwvdGQ+PHRkIGNsYXNzPXQ+tLazcbjcOjwvdGQ+
|
154
|
+
PHRkPjxmb250IHNpemU9LTE+bHU0IDwvZm9udD48L3RkPjwvdHI+PHRyPjx0
|
155
|
+
ZCBjbGFzcz10PrFkurOmcqjlOjwvdGQ+PHRkIHdpZHRoPTEwMD48Zm9udCBz
|
156
|
+
aXplPS0xPlBnLjEyNDcuMjkwPC9mb250PjwvdGQ+PHRkIGNsYXNzPXQ+rV7E
|
157
|
+
tjo8L3RkPjx0ZD48Zm9udCBzaXplPS0xPjwvZm9udD48L3RkPjwvdHI+PHRy
|
158
|
+
Pjx0ZCBjbGFzcz10Pk1hdHRoZXdzOjwvdGQ+PHRkIHdpZHRoPTEwMD48Zm9u
|
159
|
+
dCBzaXplPS0xPi08L2ZvbnQ+PC90ZD48dGQgYWxpZ249cmlnaHQ+PGZvbnQg
|
160
|
+
c2l6ZT0tMT48L2ZvbnQ+PC90ZD48dGQ+PC90ZD48L3RyPjwvdGFibGU+PC9i
|
161
|
+
b2R5PjwvaHRtbD4=
|
162
|
+
http_version:
|
163
|
+
recorded_at: Mon, 31 Mar 2014 10:24:06 GMT
|
164
|
+
- request:
|
165
|
+
method: get
|
166
|
+
uri: http://humanum.arts.cuhk.edu.hk/Lexis/lexi-can/search.php?q=%F3p
|
167
|
+
body:
|
168
|
+
encoding: US-ASCII
|
169
|
+
string: ''
|
170
|
+
headers:
|
171
|
+
Accept-Encoding:
|
172
|
+
- gzip;q=1.0,deflate;q=0.6,identity;q=0.3
|
173
|
+
Accept:
|
174
|
+
- "*/*"
|
175
|
+
User-Agent:
|
176
|
+
- Ruby
|
177
|
+
response:
|
178
|
+
status:
|
179
|
+
code: 200
|
180
|
+
message: OK
|
181
|
+
headers:
|
182
|
+
Date:
|
183
|
+
- Mon, 31 Mar 2014 10:32:44 GMT
|
184
|
+
Server:
|
185
|
+
- Apache/2.2.15 (CentOS)
|
186
|
+
X-Powered-By:
|
187
|
+
- PHP/5.3.3
|
188
|
+
Content-Length:
|
189
|
+
- '5636'
|
190
|
+
Connection:
|
191
|
+
- close
|
192
|
+
Content-Type:
|
193
|
+
- text/html
|
194
|
+
body:
|
195
|
+
encoding: ASCII-8BIT
|
196
|
+
string: !binary |-
|
197
|
+
PGh0bWw+PGhlYWQ+PHRpdGxlPrhmu3m8Zq21sHS1/KZyrnc8L3RpdGxlPjxz
|
198
|
+
dHlsZSB0eXBlPSJ0ZXh0L2NzcyI+YSB7IHRleHQtZGVjb3JhdGlvbjogbm9u
|
199
|
+
ZX0gLnRleHQgeyBsaW5lLWhlaWdodDogMTUwJSB9PC9zdHlsZT48bWV0YSBo
|
200
|
+
dHRwLWVxdWl2PSJDb250ZW50LVR5cGUiIGNvbnRlbnQ9InRleHQvaHRtbDsg
|
201
|
+
Y2hhcnNldD1iaWc1Ij48c2NyaXB0IGxhbmd1YWdlPSJKYXZhU2NyaXB0Ij4K
|
202
|
+
PCEtLQpmdW5jdGlvbiBNTV9qdW1wTWVudSh0YXJnLHNlbE9iaixyZXN0b3Jl
|
203
|
+
KXsgLy92My4wCiAgZXZhbCh0YXJnKyIubG9jYXRpb249JyIrc2VsT2JqLm9w
|
204
|
+
dGlvbnNbc2VsT2JqLnNlbGVjdGVkSW5kZXhdLnZhbHVlKyInIik7CiAgaWYg
|
205
|
+
KHJlc3RvcmUpIHNlbE9iai5zZWxlY3RlZEluZGV4PTA7Cn0KZnVuY3Rpb24g
|
206
|
+
cmVmICh1cmwpIHsKICByZXdpbj13aW5kb3cub3Blbih1cmwsJ3JlZicsJ3Rv
|
207
|
+
b2Jhcj0wLHN0YXR1cz0wLHNjcm9sbGJhcnM9MSxyZXNpemFibGU9MSx3aWR0
|
208
|
+
aD02MDAsaGVpZ2h0PTMwMCcpOwogIHNldFRpbWVvdXQgKCdyZXdpbi5mb2N1
|
209
|
+
cygpJywgMTAwKTsKfQovLy0tPgo8L3NjcmlwdD4KPHN0eWxlIHR5cGU9InRl
|
210
|
+
eHQvY3NzIj4KLnQgeyBmb250LXNpemU6IDEzOyBub3dyYXA7IHRleHQtYWxp
|
211
|
+
Z246IHJpZ2h0OyBjb2xvcjogbmF2eX0KLnQyIHsgZm9udC1zaXplOiAxMzsg
|
212
|
+
bm93cmFwOyB0ZXh0LWFsaWduOiBsZWZ0fQoudDMgeyBmb250LXNpemU6IDEz
|
213
|
+
OyBub3dyYXA7IHRleHQtYWxpZ246IGNlbnRlcn0KLncgeyBmb250LXNpemU6
|
214
|
+
IDM2OyBmb250LXdlaWdodDogYm9sZDsgY29sb3I6IHJlZDsgdGV4dC1hbGln
|
215
|
+
bjogY2VudGVyIH0KPC9zdHlsZT4KPHNjcmlwdCBsYW5ndWFnZT0iSmF2YVNj
|
216
|
+
cmlwdCI+CmZ1bmN0aW9uIHhpZF9kb3duKFhpZCkgewoJaWYgKGRvY3VtZW50
|
217
|
+
LmFsbFtYaWRdLnN0eWxlLmRpc3BsYXkgPT0gIm5vbmUiKSB7CgkJZG9jdW1l
|
218
|
+
bnQuYWxsW1hpZF0uc3R5bGUuZGlzcGxheSA9ICJibG9jayI7Cgl9IGVsc2Ug
|
219
|
+
ewoJCWRvY3VtZW50LmFsbFtYaWRdLnN0eWxlLmRpc3BsYXkgPSAibm9uZSI7
|
220
|
+
Cgl9Cn0KPC9zY3JpcHQ+PC9oZWFkPjxib2R5IGJhY2tncm91bmQ9Ii9MZXhp
|
221
|
+
cy9sZXhpLWNhbi9pbWcvcHBiazAxNC5qcGciID48dGFibGUgd2lkdGg9IjEw
|
222
|
+
MCUiIGJvcmRlcj0iMCI+CiAgPHRyPiAKICAgIDx0ZCByb3dzcGFuPSIyIiBj
|
223
|
+
bGFzcz13PvNwPC90ZD4KICAgIDx0ZCBjbGFzcz10PrOhrbo6PC90ZD4KCQk8
|
224
|
+
dGQgY2xhc3M9dDI+PGEgaHJlZj0icmFkLXN0ci5waHA/cmFkPTE2NyI+PGlt
|
225
|
+
ZyBzcmM9ImltZy9yYWQvcmFkMTY3LmdpZiIgYm9yZGVyPTAgYWxpZ249YWJz
|
226
|
+
bWlkZGxlPiBbMTY3XTwvYT48L3RkPgogICAgPHRkIGNsYXNzPXQ+tae1ZTo8
|
227
|
+
L3RkPgoJCTx0ZCBjbGFzcz10Mj48YSBocmVmPSJyYWQtc3RyLnBocD9zdHI9
|
228
|
+
MTkiPjE5PC9hPjwvdGQ+CiAgICA8dGQgY2xhc3M9dD6mcq21pMDD/jo8L3Rk
|
229
|
+
PgoJCTx0ZCBjbGFzcz10MyBiZ2NvbG9yPXllbGxvdz48YSBocmVmPSJjbGFz
|
230
|
+
c2lmaWVkLnBocD9zdD0yIj6vfa21pnI8L2E+PC90ZD4KCQk8dGQgYWxpZ249
|
231
|
+
Y2VudGVyPjxhIGhyZWY9IiMiIG9uQ2xpY2s9InJlZignaHR0cDovL3pob25n
|
232
|
+
d2VuLmNvbS9kLzI0My94MTEyLmh0bScpIj48aW1nIHNyYz0iL0ltZy96aG9u
|
233
|
+
Z3B1LmpwZyIgYm9yZGVyPTA+PC9hPiA8IS0tYSBocmVmPSIjIiBvbkNsaWNr
|
234
|
+
PSJyZWYoJ2h0dHA6Ly8xNDAuMTExLjM0LjQ2L2NnaS1iaW4vZGljdC9uZXdz
|
235
|
+
ZWFyY2guY2dpP0RhdGFiYXNlPWRpY3QmUXVlcnlTY29wZT1OYW1lJlF1ZXJ5
|
236
|
+
Q29tbWFuZD1maW5kJkdyYXBoaWNXb3JkPXllcyZRdWVyeVN0cmluZz0lRjNw
|
237
|
+
JykiLS0+CgkJPGEgaHJlZj0iIyIgb25DbGljaz0icmVmKCdodHRwOi8vMTQw
|
238
|
+
LjExMS4zNC40Ni9jZ2ktYmluL25ld0RpY3QvZGljdC5zaD9jb25kPSVGM3Am
|
239
|
+
cGllY2VMZW49NTAmZmxkPTEmY2F0PSZ1a2V5PS02MjQ3MjExODgmc2VyaWFs
|
240
|
+
PTMmcmVjTm89MCZvcD0maW1nRm9udD0xJykiPgoJCTxpbWcgc3JjPSIvSW1n
|
241
|
+
L2d5Y2QyYS5naWYiIGJvcmRlcj0wPjwvYT48L3RkPgogIDwvdHI+CiAgPHRy
|
242
|
+
PiAKICAgIDx0ZCBjbGFzcz10PqRqpK29WDo8L3RkPgoJCTx0ZCBjbGFzcz10
|
243
|
+
Mj5GMzcwPC90ZD4KICAgIDx0ZCBjbGFzcz10Pq3cvmW9WDo8L3RkPgoJCTx0
|
244
|
+
ZCBjbGFzcz10Mj6q96Tgw/ik3zwvdGQ+CiAgICA8dGQgY2xhc3M9dD7AV6fH
|
245
|
+
IC8gwFemuDo8L3RkPgoJCTx0ZCBjbGFzcz10Mj4tIC8gMDwvdGQ+CiAgICA8
|
246
|
+
dGQgYWxpZ249Y2VudGVyPjxhIGhyZWY9IiMiIG9uQ2xpY2s9InJlZignaHR0
|
247
|
+
cDovL2Vwc2lsb24zLmdlb3JnZXRvd24uZWR1L35wZXRlcnNlZS9jZ2ktYmlu
|
248
|
+
L3dvcmRsb29rLmNnaT9zZWFyY2h0eXBlPWJpZzUmd2hlcmU9YW55d2hlcmUm
|
249
|
+
d29yZD0lRjNwJykiPjxpbWcgc3JjPSIvSW1nL2NlZGljdDJfbmV3LmdpZiIg
|
250
|
+
Ym9yZGVyPTA+PC9hPiA8YSBocmVmPSIjIiBvbkNsaWNrPSJyZWYoJy9jZ2kt
|
251
|
+
YmluL2FncmVwLWxpbmRpY3Q/cXVlcnk9JUYzcCZib29sZWFuPW5vJmNhc2U9
|
252
|
+
b24mY2F0ZWdvcnk9d2hvbGVyZWNvcmQnKSI+PGltZyBzcmM9Ii9JbWcvbGlu
|
253
|
+
ZGljdF9sb2dvLmdpZiIgYm9yZGVyPTA+PC9hPjwvdGQ+CiAgPC90cj4KPC90
|
254
|
+
YWJsZT4KPGZvcm0+PHRhYmxlIHdpZHRoPSIxMDAlIiBib3JkZXI9IjEiPgog
|
255
|
+
IDx0ciBiZ2NvbG9yPSNmZmYwYzI+IAogICAgPHRoIG5vd3JhcCB3aWR0aD0x
|
256
|
+
MDA+rbW4YDxicj48Zm9udCBzaXplPS0yPiitu7Tku3mopb7Hvse3fCk8L2Zv
|
257
|
+
bnQ+PC90aD4KICAgIDx0aCBub3dyYXAgd2lkdGg9MzA+uGY8YnI+rbU8L3Ro
|
258
|
+
PgogICAgPHRoIG5vd3JhcCB3aWR0aD03MD48Zm9udCBjb2xvcj0iZ3JheSIg
|
259
|
+
ZmFjZT0iV2luZ2RpbmdzIj4mYW1wOzwvZm9udD4grtq+2jwvdGg+CiAgICA8
|
260
|
+
dGggbm93cmFwIHdpZHRoPTEwMD6mUK21pnI8L3RoPgogICAgPHRoIG5vd3Jh
|
261
|
+
cCB3aWR0aD04MD6s28P2rbW4YDwvdGg+CiAgICA8dGggbm93cmFwPrX8qNIo
|
262
|
+
PGZvbnQgY29sb3I9bWFyb29uIHNpemU9LTE+uNHEwDwvZm9udD4pIC8gPGZv
|
263
|
+
bnQgY29sb3I9Zm9yZXN0Z3JlZW4gc2l6ZT0tMT6zxrX5PC9mb250PjwvdGg+
|
264
|
+
CiAgPC90cj4KICA8dHI+CiAgICA8dGQgbm93cmFwIGFsaWduPWNlbnRlcj48
|
265
|
+
Zm9udCBjb2xvcj1yZWQgc2l6ZT0rMT5vADwvZm9udD48Zm9udCBjb2xvcj1n
|
266
|
+
cmVlbiBzaXplPSsxPnU8L2ZvbnQ+PGZvbnQgY29sb3I9Ymx1ZSBzaXplPSsx
|
267
|
+
PjE8L2ZvbnQ+PC90ZD4KICAgIDx0ZCBhbGlnbj1jZW50ZXI+PGEgaHJlZj0i
|
268
|
+
c291bmQucGhwP3M9b3UxIiB0YXJnZXQ9c291bmQ+PGltZyBzcmM9ImltZy9z
|
269
|
+
b3VuZGVyLmdpZiIgYm9yZGVyPTA+PC9hPjwvdGQ+Cgk8dGQgbm93cmFwPjxm
|
270
|
+
b250IHNpemU9LTE+pEik5blxuuI8L2ZvbnQ+PC90ZD4KICAgIDx0ZCBub3dy
|
271
|
+
YXA+CjxhIGhyZWY9InNlYXJjaC5waHA/cT0lQzMlRUYiPsPvPC9hPiwgPGEg
|
272
|
+
aHJlZj0ic2VhcmNoLnBocD9xPSVGMmoiPvJqPC9hPgk8L3RkPgogICAgPHRk
|
273
|
+
PjxzZWxlY3Qgb25DaGFuZ2U9Ik1NX2p1bXBNZW51KCdzZWxmJyx0aGlzLDAp
|
274
|
+
Ij4KICAgIDxvcHRpb24gc2VsZWN0ZWQgdmFsdWU9IiMiPi0tv+++3C0tPC9v
|
275
|
+
cHRpb24+CiAgICA8b3B0aW9uIHZhbHVlPSJwaG8tcmVsLnBocD9zMT1vACZz
|
276
|
+
Mj11Ij6mUMFuplDD/Twvb3B0aW9uPgogICAgPG9wdGlvbiB2YWx1ZT0icGhv
|
277
|
+
LXJlbC5waHA/czI9dSZzMz0xIj6mUMP9plC91Twvb3B0aW9uPgogICAgPG9w
|
278
|
+
dGlvbiB2YWx1ZT0icGhvLXJlbC5waHA/czE9bwAmczM9MSI+plDBbqZQvdU8
|
279
|
+
L29wdGlvbj4KICA8L3NlbGVjdD48L3RkPgogICAgPHRkPjxkaXYgbm93cmFw
|
280
|
+
PjwvZGl2Pjxmb250IGNvbG9yPWZvcmVzdGdyZWVuIHNpemU9LTE+plChdTxh
|
281
|
+
IGhyZWY9InNlYXJjaC5waHA/cT0lQzMlRUYiPsPvPC9hPqF2pnI8L2ZvbnQ+
|
282
|
+
PC90ZD4KICA8L3RyPgogIDx0cj4KICAgIDx0ZCBub3dyYXAgYWxpZ249Y2Vu
|
283
|
+
dGVyPjxmb250IGNvbG9yPXJlZCBzaXplPSsxPmw8L2ZvbnQ+PGZvbnQgY29s
|
284
|
+
b3I9Z3JlZW4gc2l6ZT0rMT51azwvZm9udD48Zm9udCBjb2xvcj1ibHVlIHNp
|
285
|
+
emU9KzE+NjwvZm9udD48L3RkPgogICAgPHRkIGFsaWduPWNlbnRlcj48YSBo
|
286
|
+
cmVmPSJzb3VuZC5waHA/cz1sdWs2IiB0YXJnZXQ9c291bmQ+PGltZyBzcmM9
|
287
|
+
ImltZy9zb3VuZGVyLmdpZiIgYm9yZGVyPTA+PC9hPjwvdGQ+Cgk8dGQgbm93
|
288
|
+
cmFwPjxmb250IHNpemU9LTE+pEik5blxuuI8L2ZvbnQ+PC90ZD4KICAgIDx0
|
289
|
+
ZCBub3dyYXA+CjxhIGhyZWY9InNlYXJjaC5waHA/cT0lRUUlNUMiPu5cPC9h
|
290
|
+
PiwgPGEgaHJlZj0ic2VhcmNoLnBocD9xPSVERSVENyI+3tc8L2E+LCA8YSBo
|
291
|
+
cmVmPSJzZWFyY2gucGhwP3E9JURFJUY3Ij7e9zwvYT4gPGEgaHJlZj0icGhv
|
292
|
+
LXJlbC5waHA/czE9bCZzMj11ayZzMz02Ij48Zm9udCBzaXplPS0xPls0Ni4u
|
293
|
+
XTwvZm9udD48L2E+CTwvdGQ+CiAgICA8dGQ+PHNlbGVjdCBvbkNoYW5nZT0i
|
294
|
+
TU1fanVtcE1lbnUoJ3NlbGYnLHRoaXMsMCkiPgogICAgPG9wdGlvbiBzZWxl
|
295
|
+
Y3RlZCB2YWx1ZT0iIyI+LS2/777cLS08L29wdGlvbj4KICAgIDxvcHRpb24g
|
296
|
+
dmFsdWU9InBoby1yZWwucGhwP3MxPWwmczI9dWsiPqZQwW6mUMP9PC9vcHRp
|
297
|
+
b24+CiAgICA8b3B0aW9uIHZhbHVlPSJwaG8tcmVsLnBocD9zMj11ayZzMz02
|
298
|
+
Ij6mUMP9plC91Twvb3B0aW9uPgogICAgPG9wdGlvbiB2YWx1ZT0icGhvLXJl
|
299
|
+
bC5waHA/czE9bCZzMz02Ij6mUMFuplC91Twvb3B0aW9uPgogIDwvc2VsZWN0
|
300
|
+
PjwvdGQ+CiAgICA8dGQ+PGRpdiBub3dyYXA+uWTzcDwvZGl2PjwvdGQ+CiAg
|
301
|
+
PC90cj4KPC90YWJsZT48dGFibGUgd2lkdGg9MTAwJSBib3JkZXI9MCBjZWxs
|
302
|
+
c3BhY2luZz0wIGNlbGxwYWRkaW5nPTA+PHRyPjx0ZD48Zm9udCBzaXplPS0x
|
303
|
+
IGNvbG9yPWdyYXk+t2qvwaa4vMY6IDQ2NDA8L2ZvbnQ+PC90ZD48dGQgYWxp
|
304
|
+
Z249cmlnaHQ+PGZvbnQgc2l6ZT0tMT4oPGEgaHJlZj0iYWRtaW4vZWRpdC5w
|
305
|
+
aHA/bmV3PUVkaXQmcT0lRjNwIj663rJ6pEit+7FNpc6wzzwvYT4pPC9mb250
|
306
|
+
PjwvdGQ+PC90cj48L3RhYmxlPrB0t2bCSTo8YnI+PC9mb3JtPjxocj48Zm9u
|
307
|
+
dCBjb2xvcj1ncmF5PlVuaWNvZGU6IDwvZm9udD48YSBocmVmPSJodHRwOi8v
|
308
|
+
d3d3LnVuaWNvZGUub3JnL2NnaS1iaW4vR2V0VW5paGFuRGF0YS5wbD9jb2Rl
|
309
|
+
cG9pbnQ9OTNENSIgdGFyZ2V0PV9ibGFuaz48aW1nIHNyYz0iL0ltZy91bmlj
|
310
|
+
b2RlMi5naWYiIGJvcmRlcj0wIGFsaWduPWFic21pZGRsZT48L2E+IDxmb250
|
311
|
+
IHNpemU9LTEgY29sb3I9Z3JheT5VKzkzRDU8L2ZvbnQ+PHRhYmxlIGJvcmRl
|
312
|
+
cj0wIGNlbGxzcGFjaW5nPTUgY2VsbHBhZGRpbmc9NT48dHI+PHRkIGNsYXNz
|
313
|
+
PXQ+un67eaRqpnKo5To8L3RkPjx0ZCB3aWR0aD0xMDA+PGZvbnQgc2l6ZT0t
|
314
|
+
MT5QZy40MjUwPC9mb250PjwvdGQ+PHRkIGNsYXNzPXQ+tLazcbjcOjwvdGQ+
|
315
|
+
PHRkPjxmb250IHNpemU9LTE+bHU0IDwvZm9udD48L3RkPjwvdHI+PHRyPjx0
|
316
|
+
ZCBjbGFzcz10PrFkurOmcqjlOjwvdGQ+PHRkIHdpZHRoPTEwMD48Zm9udCBz
|
317
|
+
aXplPS0xPlBnLjEyNDcuMjkwPC9mb250PjwvdGQ+PHRkIGNsYXNzPXQ+rV7E
|
318
|
+
tjo8L3RkPjx0ZD48Zm9udCBzaXplPS0xPjwvZm9udD48L3RkPjwvdHI+PHRy
|
319
|
+
Pjx0ZCBjbGFzcz10Pk1hdHRoZXdzOjwvdGQ+PHRkIHdpZHRoPTEwMD48Zm9u
|
320
|
+
dCBzaXplPS0xPi08L2ZvbnQ+PC90ZD48dGQgYWxpZ249cmlnaHQ+PGZvbnQg
|
321
|
+
c2l6ZT0tMT48L2ZvbnQ+PC90ZD48dGQ+PC90ZD48L3RyPjwvdGFibGU+PC9i
|
322
|
+
b2R5PjwvaHRtbD4=
|
323
|
+
http_version:
|
324
|
+
recorded_at: Mon, 31 Mar 2014 10:33:12 GMT
|
325
|
+
recorded_with: VCR 2.8.0
|
@@ -64,5 +64,12 @@ describe Cantonese::Scraper::WordScraper do
|
|
64
64
|
expect(word[:syllable][1][:examples]).to be_include("可歌可泣")
|
65
65
|
end
|
66
66
|
|
67
|
+
it "should parse 鏕 properly" do
|
68
|
+
word = subject.crawl("鏕")
|
69
|
+
expect(word[:syllable]).to be_a(Array)
|
70
|
+
expect(word[:syllable][0][:full]).to eq("ou1")
|
71
|
+
expect(word[:syllable][1][:full]).to eq("luk6")
|
72
|
+
end
|
73
|
+
|
67
74
|
end
|
68
75
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cantonese
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francis Chong
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: tidy_ffi
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -128,6 +142,7 @@ files:
|
|
128
142
|
- lib/cantonese/version.rb
|
129
143
|
- spec/fixtures/cassettes/Cantonese_Scraper_ClassifiedScraper/_crawl/should_fetch_list_of_classified_words.yml
|
130
144
|
- spec/fixtures/cassettes/Cantonese_Scraper_RadicalScraper/_crawl/should_list_of_radicals.yml
|
145
|
+
- spec/fixtures/cassettes/Cantonese_Scraper_WordScraper/_crawl/should_parse_properly.yml
|
131
146
|
- spec/fixtures/cassettes/Cantonese_Scraper_WordScraper/_crawl/should_return_detail_of_a_word.yml
|
132
147
|
- spec/fixtures/cassettes/Cantonese_Scraper_WordScraper/_crawl/should_return_detail_of_a_word_with_multiple_sounds.yml
|
133
148
|
- spec/scraper/classified_scraper_spec.rb
|
@@ -161,6 +176,7 @@ summary: Set of scraper and processor to fetch Cantonese data.
|
|
161
176
|
test_files:
|
162
177
|
- spec/fixtures/cassettes/Cantonese_Scraper_ClassifiedScraper/_crawl/should_fetch_list_of_classified_words.yml
|
163
178
|
- spec/fixtures/cassettes/Cantonese_Scraper_RadicalScraper/_crawl/should_list_of_radicals.yml
|
179
|
+
- spec/fixtures/cassettes/Cantonese_Scraper_WordScraper/_crawl/should_parse_properly.yml
|
164
180
|
- spec/fixtures/cassettes/Cantonese_Scraper_WordScraper/_crawl/should_return_detail_of_a_word.yml
|
165
181
|
- spec/fixtures/cassettes/Cantonese_Scraper_WordScraper/_crawl/should_return_detail_of_a_word_with_multiple_sounds.yml
|
166
182
|
- spec/scraper/classified_scraper_spec.rb
|