chinese_sugar 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/sugar.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "sugar/version"
2
+ require "sugar/trie"
2
3
 
3
4
  module Sugar
4
5
 
@@ -0,0 +1,18 @@
1
+ require "sugar/version"
2
+ module Sugar
3
+ class Pinyin
4
+ def initialize(options = {})
5
+ options = {with_tone: false}.merge(options)
6
+ dict = File.expand_path("../../data/pinyin.txt", __FILE__)
7
+ @hash = {}
8
+ File.read(dict).split("\n").map do |line|
9
+ key, *values = line.split(" ")
10
+ @hash[key] = options[:with_tone] ? values : values.map{|v| v[/[a-z]+/]}
11
+ end
12
+ end
13
+
14
+ def get(word)
15
+ @hash[word]
16
+ end
17
+ end
18
+ end
data/lib/sugar/trie.rb CHANGED
@@ -6,7 +6,7 @@ module Sugar
6
6
  attr_accessor :trie, :frequnces, :count
7
7
 
8
8
  def initialize
9
- dict = File.expand_path("../../dict.txt", __FILE__)
9
+ dict = File.expand_path("../../data/phrase.txt", __FILE__)
10
10
  self.trie, self.frequnces, self.count = Sugar::Trie.build(dict)
11
11
  end
12
12
 
data/lib/sugar/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Sugar
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -0,0 +1,15 @@
1
+ require 'sugar'
2
+
3
+ describe Sugar::Trie, "module" do
4
+ trie = Sugar::Trie.new
5
+ it "can segment word" do
6
+ trie.word?('江西').should be_true
7
+ end
8
+
9
+ it "can segment sentence" do
10
+ sentence = "江西是个好地方"
11
+ segmentation = trie.best_segmentation(sentence)
12
+ puts "\n" + sentence
13
+ puts segmentation.to_s
14
+ end
15
+ end
data/sugar.gemspec CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["binz"]
10
10
  spec.email = ["xinkiang@gmail.com"]
11
11
  spec.summary = %q{Chinese text data mining.}
12
- spec.description = %q{1.Chinese words segmentation using Trie and Viterbi}
12
+ spec.description = %q{1.Chinese words segmentation using Trie and Viterbi;}
13
13
  spec.homepage = "https://github.com/slacken/sugar"
14
14
  spec.license = "MIT"
15
15
 
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.5"
22
22
  spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
23
24
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chinese_sugar
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - binz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-16 00:00:00.000000000 Z
11
+ date: 2014-06-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -38,7 +38,21 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
- description: 1.Chinese words segmentation using Trie and Viterbi
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: 1.Chinese words segmentation using Trie and Viterbi;
42
56
  email:
43
57
  - xinkiang@gmail.com
44
58
  executables: []
@@ -50,10 +64,13 @@ files:
50
64
  - LICENSE.txt
51
65
  - README.md
52
66
  - Rakefile
53
- - lib/dict.txt
67
+ - lib/data/phrase.txt
68
+ - lib/data/pinyin.txt
54
69
  - lib/sugar.rb
70
+ - lib/sugar/pinyin.rb
55
71
  - lib/sugar/trie.rb
56
72
  - lib/sugar/version.rb
73
+ - spec/sugar_spec.rb
57
74
  - sugar.gemspec
58
75
  - sugar.rb
59
76
  homepage: https://github.com/slacken/sugar
@@ -80,4 +97,5 @@ rubygems_version: 2.2.2
80
97
  signing_key:
81
98
  specification_version: 4
82
99
  summary: Chinese text data mining.
83
- test_files: []
100
+ test_files:
101
+ - spec/sugar_spec.rb