chinese_sugar 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/sugar.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "sugar/version"
2
+ require "sugar/trie"
2
3
 
3
4
  module Sugar
4
5
 
@@ -0,0 +1,18 @@
1
+ require "sugar/version"
2
+ module Sugar
3
+ class Pinyin
4
+ def initialize(options = {})
5
+ options = {with_tone: false}.merge(options)
6
+ dict = File.expand_path("../../data/pinyin.txt", __FILE__)
7
+ @hash = {}
8
+ File.read(dict).split("\n").map do |line|
9
+ key, *values = line.split(" ")
10
+ @hash[key] = options[:with_tone] ? values : values.map{|v| v[/[a-z]+/]}
11
+ end
12
+ end
13
+
14
+ def get(word)
15
+ @hash[word]
16
+ end
17
+ end
18
+ end
data/lib/sugar/trie.rb CHANGED
@@ -6,7 +6,7 @@ module Sugar
6
6
  attr_accessor :trie, :frequnces, :count
7
7
 
8
8
  def initialize
9
- dict = File.expand_path("../../dict.txt", __FILE__)
9
+ dict = File.expand_path("../../data/phrase.txt", __FILE__)
10
10
  self.trie, self.frequnces, self.count = Sugar::Trie.build(dict)
11
11
  end
12
12
 
data/lib/sugar/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Sugar
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -0,0 +1,15 @@
1
+ require 'sugar'
2
+
3
+ describe Sugar::Trie, "module" do
4
+ trie = Sugar::Trie.new
5
+ it "can segment word" do
6
+ trie.word?('江西').should be_true
7
+ end
8
+
9
+ it "can segment sentence" do
10
+ sentence = "江西是个好地方"
11
+ segmentation = trie.best_segmentation(sentence)
12
+ puts "\n" + sentence
13
+ puts segmentation.to_s
14
+ end
15
+ end
data/sugar.gemspec CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["binz"]
10
10
  spec.email = ["xinkiang@gmail.com"]
11
11
  spec.summary = %q{Chinese text data mining.}
12
- spec.description = %q{1.Chinese words segmentation using Trie and Viterbi}
12
+ spec.description = %q{1.Chinese words segmentation using Trie and Viterbi;}
13
13
  spec.homepage = "https://github.com/slacken/sugar"
14
14
  spec.license = "MIT"
15
15
 
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.5"
22
22
  spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
23
24
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chinese_sugar
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - binz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-16 00:00:00.000000000 Z
11
+ date: 2014-06-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -38,7 +38,21 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
- description: 1.Chinese words segmentation using Trie and Viterbi
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: 1.Chinese words segmentation using Trie and Viterbi;
42
56
  email:
43
57
  - xinkiang@gmail.com
44
58
  executables: []
@@ -50,10 +64,13 @@ files:
50
64
  - LICENSE.txt
51
65
  - README.md
52
66
  - Rakefile
53
- - lib/dict.txt
67
+ - lib/data/phrase.txt
68
+ - lib/data/pinyin.txt
54
69
  - lib/sugar.rb
70
+ - lib/sugar/pinyin.rb
55
71
  - lib/sugar/trie.rb
56
72
  - lib/sugar/version.rb
73
+ - spec/sugar_spec.rb
57
74
  - sugar.gemspec
58
75
  - sugar.rb
59
76
  homepage: https://github.com/slacken/sugar
@@ -80,4 +97,5 @@ rubygems_version: 2.2.2
80
97
  signing_key:
81
98
  specification_version: 4
82
99
  summary: Chinese text data mining.
83
- test_files: []
100
+ test_files:
101
+ - spec/sugar_spec.rb