chinese_sugar 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -4
- data/lib/{dict.txt → data/phrase.txt} +0 -0
- data/lib/data/pinyin.txt +25359 -0
- data/lib/sugar.rb +1 -0
- data/lib/sugar/pinyin.rb +18 -0
- data/lib/sugar/trie.rb +1 -1
- data/lib/sugar/version.rb +1 -1
- data/spec/sugar_spec.rb +15 -0
- data/sugar.gemspec +2 -1
- metadata +23 -5
data/lib/sugar.rb
CHANGED
data/lib/sugar/pinyin.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require "sugar/version"
|
2
|
+
module Sugar
|
3
|
+
class Pinyin
|
4
|
+
def initialize(options = {})
|
5
|
+
options = {with_tone: false}.merge(options)
|
6
|
+
dict = File.expand_path("../../data/pinyin.txt", __FILE__)
|
7
|
+
@hash = {}
|
8
|
+
File.read(dict).split("\n").map do |line|
|
9
|
+
key, *values = line.split(" ")
|
10
|
+
@hash[key] = options[:with_tone] ? values : values.map{|v| v[/[a-z]+/]}
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def get(word)
|
15
|
+
@hash[word]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/sugar/trie.rb
CHANGED
@@ -6,7 +6,7 @@ module Sugar
|
|
6
6
|
attr_accessor :trie, :frequnces, :count
|
7
7
|
|
8
8
|
def initialize
|
9
|
-
dict = File.expand_path("../../
|
9
|
+
dict = File.expand_path("../../data/phrase.txt", __FILE__)
|
10
10
|
self.trie, self.frequnces, self.count = Sugar::Trie.build(dict)
|
11
11
|
end
|
12
12
|
|
data/lib/sugar/version.rb
CHANGED
data/spec/sugar_spec.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'sugar'
|
2
|
+
|
3
|
+
describe Sugar::Trie, "module" do
|
4
|
+
trie = Sugar::Trie.new
|
5
|
+
it "can segment word" do
|
6
|
+
trie.word?('江西').should be_true
|
7
|
+
end
|
8
|
+
|
9
|
+
it "can segment sentence" do
|
10
|
+
sentence = "江西是个好地方"
|
11
|
+
segmentation = trie.best_segmentation(sentence)
|
12
|
+
puts "\n" + sentence
|
13
|
+
puts segmentation.to_s
|
14
|
+
end
|
15
|
+
end
|
data/sugar.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["binz"]
|
10
10
|
spec.email = ["xinkiang@gmail.com"]
|
11
11
|
spec.summary = %q{Chinese text data mining.}
|
12
|
-
spec.description = %q{1.Chinese words segmentation using Trie and Viterbi}
|
12
|
+
spec.description = %q{1.Chinese words segmentation using Trie and Viterbi;}
|
13
13
|
spec.homepage = "https://github.com/slacken/sugar"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.5"
|
22
22
|
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "rspec"
|
23
24
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chinese_sugar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- binz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,7 +38,21 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
-
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: 1.Chinese words segmentation using Trie and Viterbi;
|
42
56
|
email:
|
43
57
|
- xinkiang@gmail.com
|
44
58
|
executables: []
|
@@ -50,10 +64,13 @@ files:
|
|
50
64
|
- LICENSE.txt
|
51
65
|
- README.md
|
52
66
|
- Rakefile
|
53
|
-
- lib/
|
67
|
+
- lib/data/phrase.txt
|
68
|
+
- lib/data/pinyin.txt
|
54
69
|
- lib/sugar.rb
|
70
|
+
- lib/sugar/pinyin.rb
|
55
71
|
- lib/sugar/trie.rb
|
56
72
|
- lib/sugar/version.rb
|
73
|
+
- spec/sugar_spec.rb
|
57
74
|
- sugar.gemspec
|
58
75
|
- sugar.rb
|
59
76
|
homepage: https://github.com/slacken/sugar
|
@@ -80,4 +97,5 @@ rubygems_version: 2.2.2
|
|
80
97
|
signing_key:
|
81
98
|
specification_version: 4
|
82
99
|
summary: Chinese text data mining.
|
83
|
-
test_files:
|
100
|
+
test_files:
|
101
|
+
- spec/sugar_spec.rb
|