cantonese 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ae7740aa252f2be334b9f68abfcf916fe692320f
4
- data.tar.gz: a4dbd9c30c8bea7fdfd197cd43c801f0c7a53d6c
3
+ metadata.gz: 4ddaa4f96c77b0c35bb61e83b2feef43a234f844
4
+ data.tar.gz: 5684236f0d3ba3626de4a10ebdc1ac9eeeac3734
5
5
  SHA512:
6
- metadata.gz: baaa4749abad128a550b0a56d3157c2380a06cc9c0974c234d1356772a2d6bfe3974493a664d7a5b47a357817b001e5ff3c1ede3a9faa4e076423d457a4b2690
7
- data.tar.gz: a1c385ecedc1f7175080a6059263215db08170b74db5c004fb0ddc9a2ba8699c52ae9eeacedbcf48809117e68f16140862207ea5dbf0d39b673f865f0fd1216b
6
+ metadata.gz: a02f42b7aac35e3e2071114a868be195d592f2a93eb087f304afa058700d62664d5595160ea57d208b49bb2c3eb0f7cbc4b973b3e70b41933ee9682d87be0f81
7
+ data.tar.gz: efc2ba3e781a25a0a2cf72c75d52eda78298a6d0818cbbc382f1832042a240ccfce9989ba6f2a10331806eb911c40bde627e80e731f16c345fe82d0e9098cf03
@@ -30,14 +30,15 @@ module Cantonese
30
30
  classified = doc.search("//*[@class = 't' and .='字音分類:']/following-sibling::td[1]").text rescue nil
31
31
  big5 = doc.search("//*[@class = 't' and .='大五碼:']/following-sibling::td[1]").text rescue nil
32
32
  chanjie = doc.search("//*[@class = 't' and .='倉頡碼:']/following-sibling::td[1]").text rescue nil
33
- frequency = doc.search("//*[@class = 't' and .='頻序 / 頻次:']/following-sibling::td[1]").text rescue nil
33
+ rank_and_frequency = doc.search("//*[@class = 't' and .='頻序 / 頻次:']/following-sibling::td[1]").text rescue nil
34
34
  combination = doc.search("//text()[.='配搭點:']/following-sibling::a").collect{|a| a.text}
35
+ rank, frequency = rank_and_frequency.split("/").collect{|word| word.strip.to_i }
35
36
 
36
37
  syllable = doc.search('//form/table[1]/tr[position()>1]').collect do |row|
37
38
  sound = row.search("./td[1]")
38
- initial = sound.xpath("./*[@color='red']").text rescue nil
39
- final = sound.xpath("./*[@color='green']").text rescue nil
40
- tone = sound.xpath("./*[@color='blue']").text rescue nil
39
+ initial = sound.xpath("./*[@color='red']").text rescue ""
40
+ final = sound.xpath("./*[@color='green']").text rescue ""
41
+ tone = sound.xpath("./*[@color='blue']").text rescue ""
41
42
  sound_text = sound.text
42
43
  pronunciation = "http://humanum.arts.cuhk.edu.hk/Lexis/lexi-can/sound/#{sound_text}.wav"
43
44
 
@@ -52,6 +53,7 @@ module Cantonese
52
53
  end
53
54
 
54
55
  {
56
+ :full => "#{initial}#{final}#{tone}",
55
57
  :initial => initial,
56
58
  :final => final,
57
59
  :tone => tone,
@@ -68,6 +70,7 @@ module Cantonese
68
70
  :classified => classified,
69
71
  :big5 => big5,
70
72
  :chanjie => chanjie,
73
+ :rank => rank,
71
74
  :frequency => frequency,
72
75
  :syllable => syllable,
73
76
  :combination => combination
@@ -1,3 +1,3 @@
1
1
  module Cantonese
2
- VERSION = "0.1.2"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -15,11 +15,14 @@ describe Cantonese::Scraper::WordScraper do
15
15
 
16
16
  expect(word[:big5]).to eq("A440")
17
17
  expect(word[:chanjie]).to eq("一")
18
- expect(word[:frequency]).to eq("2 / 166396")
18
+
19
+ expect(word[:rank]).to eq(2)
20
+ expect(word[:frequency]).to eq(166396)
19
21
  expect(word[:combination]).to be_a(Array)
20
22
  expect(word[:combination]).to be_include("山")
21
23
 
22
24
  expect(word[:syllable]).to be_a(Array)
25
+ expect(word[:syllable][0][:full]).to eq("jat1")
23
26
  expect(word[:syllable][0][:initial]).to eq("j")
24
27
  expect(word[:syllable][0][:final]).to eq("at")
25
28
  expect(word[:syllable][0][:tone]).to eq("1")
@@ -40,17 +43,20 @@ describe Cantonese::Scraper::WordScraper do
40
43
 
41
44
  expect(word[:big5]).to eq("A569")
42
45
  expect(word[:chanjie]).to eq("一弓口")
43
- expect(word[:frequency]).to eq("36 / 40754")
46
+ expect(word[:rank]).to eq(36)
47
+ expect(word[:frequency]).to eq(40754)
44
48
  expect(word[:combination]).to be_a(Array)
45
49
  expect(word[:combination]).to be_include("不")
46
50
 
47
51
  expect(word[:syllable]).to be_a(Array)
52
+ expect(word[:syllable][0][:full]).to eq("hak1")
48
53
  expect(word[:syllable][0][:initial]).to eq("h")
49
54
  expect(word[:syllable][0][:final]).to eq("ak")
50
55
  expect(word[:syllable][0][:tone]).to eq("1")
51
56
  expect(word[:syllable][0][:examples]).to be_a(Array)
52
57
  expect(word[:syllable][0][:examples]).to be_include("可汗")
53
58
 
59
+ expect(word[:syllable][1][:full]).to eq("ho2")
54
60
  expect(word[:syllable][1][:initial]).to eq("h")
55
61
  expect(word[:syllable][1][:final]).to eq("o")
56
62
  expect(word[:syllable][1][:tone]).to eq("2")
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cantonese
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francis Chong