cantonese 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cantonese/scraper/word_scraper.rb +7 -4
- data/lib/cantonese/version.rb +1 -1
- data/spec/scraper/word_scraper_spec.rb +8 -2
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4ddaa4f96c77b0c35bb61e83b2feef43a234f844
|
4
|
+
data.tar.gz: 5684236f0d3ba3626de4a10ebdc1ac9eeeac3734
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a02f42b7aac35e3e2071114a868be195d592f2a93eb087f304afa058700d62664d5595160ea57d208b49bb2c3eb0f7cbc4b973b3e70b41933ee9682d87be0f81
|
7
|
+
data.tar.gz: efc2ba3e781a25a0a2cf72c75d52eda78298a6d0818cbbc382f1832042a240ccfce9989ba6f2a10331806eb911c40bde627e80e731f16c345fe82d0e9098cf03
|
@@ -30,14 +30,15 @@ module Cantonese
|
|
30
30
|
classified = doc.search("//*[@class = 't' and .='字音分類:']/following-sibling::td[1]").text rescue nil
|
31
31
|
big5 = doc.search("//*[@class = 't' and .='大五碼:']/following-sibling::td[1]").text rescue nil
|
32
32
|
chanjie = doc.search("//*[@class = 't' and .='倉頡碼:']/following-sibling::td[1]").text rescue nil
|
33
|
-
|
33
|
+
rank_and_frequency = doc.search("//*[@class = 't' and .='頻序 / 頻次:']/following-sibling::td[1]").text rescue nil
|
34
34
|
combination = doc.search("//text()[.='配搭點:']/following-sibling::a").collect{|a| a.text}
|
35
|
+
rank, frequency = rank_and_frequency.split("/").collect{|word| word.strip.to_i }
|
35
36
|
|
36
37
|
syllable = doc.search('//form/table[1]/tr[position()>1]').collect do |row|
|
37
38
|
sound = row.search("./td[1]")
|
38
|
-
initial = sound.xpath("./*[@color='red']").text rescue
|
39
|
-
final = sound.xpath("./*[@color='green']").text rescue
|
40
|
-
tone = sound.xpath("./*[@color='blue']").text rescue
|
39
|
+
initial = sound.xpath("./*[@color='red']").text rescue ""
|
40
|
+
final = sound.xpath("./*[@color='green']").text rescue ""
|
41
|
+
tone = sound.xpath("./*[@color='blue']").text rescue ""
|
41
42
|
sound_text = sound.text
|
42
43
|
pronunciation = "http://humanum.arts.cuhk.edu.hk/Lexis/lexi-can/sound/#{sound_text}.wav"
|
43
44
|
|
@@ -52,6 +53,7 @@ module Cantonese
|
|
52
53
|
end
|
53
54
|
|
54
55
|
{
|
56
|
+
:full => "#{initial}#{final}#{tone}",
|
55
57
|
:initial => initial,
|
56
58
|
:final => final,
|
57
59
|
:tone => tone,
|
@@ -68,6 +70,7 @@ module Cantonese
|
|
68
70
|
:classified => classified,
|
69
71
|
:big5 => big5,
|
70
72
|
:chanjie => chanjie,
|
73
|
+
:rank => rank,
|
71
74
|
:frequency => frequency,
|
72
75
|
:syllable => syllable,
|
73
76
|
:combination => combination
|
data/lib/cantonese/version.rb
CHANGED
@@ -15,11 +15,14 @@ describe Cantonese::Scraper::WordScraper do
|
|
15
15
|
|
16
16
|
expect(word[:big5]).to eq("A440")
|
17
17
|
expect(word[:chanjie]).to eq("一")
|
18
|
-
|
18
|
+
|
19
|
+
expect(word[:rank]).to eq(2)
|
20
|
+
expect(word[:frequency]).to eq(166396)
|
19
21
|
expect(word[:combination]).to be_a(Array)
|
20
22
|
expect(word[:combination]).to be_include("山")
|
21
23
|
|
22
24
|
expect(word[:syllable]).to be_a(Array)
|
25
|
+
expect(word[:syllable][0][:full]).to eq("jat1")
|
23
26
|
expect(word[:syllable][0][:initial]).to eq("j")
|
24
27
|
expect(word[:syllable][0][:final]).to eq("at")
|
25
28
|
expect(word[:syllable][0][:tone]).to eq("1")
|
@@ -40,17 +43,20 @@ describe Cantonese::Scraper::WordScraper do
|
|
40
43
|
|
41
44
|
expect(word[:big5]).to eq("A569")
|
42
45
|
expect(word[:chanjie]).to eq("一弓口")
|
43
|
-
expect(word[:
|
46
|
+
expect(word[:rank]).to eq(36)
|
47
|
+
expect(word[:frequency]).to eq(40754)
|
44
48
|
expect(word[:combination]).to be_a(Array)
|
45
49
|
expect(word[:combination]).to be_include("不")
|
46
50
|
|
47
51
|
expect(word[:syllable]).to be_a(Array)
|
52
|
+
expect(word[:syllable][0][:full]).to eq("hak1")
|
48
53
|
expect(word[:syllable][0][:initial]).to eq("h")
|
49
54
|
expect(word[:syllable][0][:final]).to eq("ak")
|
50
55
|
expect(word[:syllable][0][:tone]).to eq("1")
|
51
56
|
expect(word[:syllable][0][:examples]).to be_a(Array)
|
52
57
|
expect(word[:syllable][0][:examples]).to be_include("可汗")
|
53
58
|
|
59
|
+
expect(word[:syllable][1][:full]).to eq("ho2")
|
54
60
|
expect(word[:syllable][1][:initial]).to eq("h")
|
55
61
|
expect(word[:syllable][1][:final]).to eq("o")
|
56
62
|
expect(word[:syllable][1][:tone]).to eq("2")
|