cseg 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +12 -12
- data/cseg.gemspec +1 -1
- data/lib/cseg.rb +1 -1
- data/lib/cseg/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 50e095f936a44f0a92394260ad6f82b6e0b3391d
|
4
|
+
data.tar.gz: ce17fc8ecaa4d4f0b15566ebf5225b2c312d8940
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 284d67a876fa1d382801e1801dd0f2813f39ecf9a5753da7308e026646bb3bf036d24d811c75e69df1234b52b31c8d16c316eea11b2c60d95f85c51e79dc4962
|
7
|
+
data.tar.gz: d336a86d2239e2126360d8f5ee27328892c7659d2822c58d135b736d2f38ebdfc0ad7e0abff37aae23fc7dbba85136d6afdcb628f62070dba7b61259f6f99ac4
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,10 +1,11 @@
|
|
1
|
-
# Kurumi
|
1
|
+
# Cseg(Kurumi)
|
2
2
|
|
3
3
|
Use MIRA to train a large amount of features.
|
4
4
|
|
5
|
-
Segment chinese(both traditional and
|
5
|
+
Segment chinese(both traditional and simplified) sentences into words in high speed and correctly.
|
6
6
|
|
7
7
|
take care the name of the gem is different from the repo name!
|
8
|
+
|
8
9
|
## Installation
|
9
10
|
|
10
11
|
Add this line to your application's Gemfile:
|
@@ -23,7 +24,7 @@ you need to install [CRF++](http://crfpp.googlecode.com/svn/trunk/doc/index.html
|
|
23
24
|
|
24
25
|
On github the dictionary file was deleted since it is quite large, though you can get all from rubygems.
|
25
26
|
|
26
|
-
##
|
27
|
+
## Recall and Precision
|
27
28
|
|
28
29
|
Tested on seghanbakeoff pku test set
|
29
30
|
|
@@ -33,15 +34,14 @@ Recall: 92.86%
|
|
33
34
|
|
34
35
|
## Usage
|
35
36
|
|
36
|
-
```
|
37
|
-
The default is Simplified Chinese
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
Use parameter "tr" to specify Traditional Chinese
|
42
|
-
|
43
|
-
|
37
|
+
```ruby
|
38
|
+
#The default is Simplified Chinese
|
39
|
+
require "cseg"
|
40
|
+
Kurumi.segment("屌丝是一种自我讽刺。")
|
41
|
+
#=>["屌丝", "是", "一", "种", "自我", "讽刺", "。"]
|
42
|
+
#Use parameter "tr" to specify Traditional Chinese
|
43
|
+
Kurumi.segment("台妹真是正點。","tr")
|
44
|
+
#=>["台妹", "真", "是", "正點", "。"]
|
44
45
|
|
45
46
|
```
|
46
47
|
|
47
|
-
## Contributing
|
data/cseg.gemspec
CHANGED
data/lib/cseg.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
require "cseg/version"
|
3
3
|
class Kurumi
|
4
4
|
# since crf++ can only read from file
|
5
|
-
@modle_sp=File.expand_path("../../data/
|
5
|
+
@modle_sp=File.expand_path("../../data/pku_training.data", __FILE__)
|
6
6
|
@modle_tr=File.expand_path("../../data/as_training_less.data", __FILE__)
|
7
7
|
def self.segment(str, mode="sp")
|
8
8
|
@result=Array.new
|
data/lib/cseg/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cseg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- gyorou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: '"a chinese segmentation tool using CRF"'
|
14
14
|
email:
|
@@ -24,7 +24,7 @@ files:
|
|
24
24
|
- Rakefile
|
25
25
|
- cseg.gemspec
|
26
26
|
- data/as_training_less.data
|
27
|
-
- data/
|
27
|
+
- data/pku_training.data
|
28
28
|
- lib/cseg.rb
|
29
29
|
- lib/cseg/version.rb
|
30
30
|
homepage: ''
|