cseg 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +7 -16
- data/cseg.gemspec +2 -2
- data/lib/cseg/version.rb +1 -1
- data/lib/cseg.rb +4 -14
- metadata +5 -5
data/README.md
CHANGED
@@ -20,26 +20,17 @@ Or install it yourself as:
|
|
20
20
|
|
21
21
|
you need to install CRF++ first and set the environment environment variables.
|
22
22
|
|
23
|
-
Here is the site of CRF
|
23
|
+
Here is the site of CRF++<http://crfpp.googlecode.com/svn/trunk/doc/index.html> and you should follow the manual
|
24
|
+
|
25
|
+
On github the dictionary file was deleted since it is quite large, though you can get all from rubygems.
|
24
26
|
|
25
27
|
## Usage
|
26
28
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
=>屌丝
|
29
|
+
require "cseg"
|
30
|
+
a=Kurumi.segment("屌丝是一种自我讽刺")
|
31
|
+
|
32
|
+
=>屌丝/是/一/种/自我/讽刺
|
32
33
|
|
33
|
-
是
|
34
|
-
|
35
|
-
一
|
36
|
-
|
37
|
-
种
|
38
|
-
|
39
|
-
自我
|
40
|
-
|
41
|
-
讽刺
|
42
|
-
|
43
34
|
the result will be an array.
|
44
35
|
|
45
36
|
|
data/cseg.gemspec
CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |gem|
|
|
8
8
|
gem.version = Cseg::VERSION
|
9
9
|
gem.authors = ["gyorou"]
|
10
10
|
gem.email = ["gyorou@tjjtds.com"]
|
11
|
-
gem.description = %q{"a chinese segmentation tool using CRF"}
|
12
|
-
gem.summary = %q{""}
|
11
|
+
gem.description = %q{"a chinese segmentation tool using CRF++"}
|
12
|
+
gem.summary = %q{"CRF++ should be installed and set in the environment variables"}
|
13
13
|
gem.homepage = ""
|
14
14
|
|
15
15
|
gem.files = [".gitignore",
|
data/lib/cseg/version.rb
CHANGED
data/lib/cseg.rb
CHANGED
@@ -3,25 +3,19 @@ require "cseg/version"
|
|
3
3
|
require "tempfile"
|
4
4
|
class Kurumi
|
5
5
|
# since crf++ can only read from file
|
6
|
-
#@tmpfile.read{|file| file=nil}
|
7
6
|
@modle=File.expand_path("../../data/pkumodle.data", __FILE__)
|
8
|
-
@result=Array.new
|
9
7
|
def self.segment(str)
|
10
8
|
tmpstr=""
|
11
9
|
for i in 0..str.length-1
|
12
10
|
tmpstr+=str[i]+"\n"
|
13
11
|
end
|
14
|
-
|
15
|
-
|
16
|
-
@resultfile=Tempfile::new("result")
|
12
|
+
@tmp=Tempfile::new("tmp")
|
13
|
+
@resultfile=Tempfile::new("result")
|
17
14
|
@tmp.write(tmpstr)
|
18
15
|
@tmp.rewind
|
19
|
-
|
16
|
+
@result=Array.new
|
20
17
|
system("crf_test -m #{@modle} #{@tmp.path}>#{@resultfile.path}")
|
21
18
|
@resultfile.rewind
|
22
|
-
# puts @resultfile.read
|
23
|
-
# puts @tmp.read
|
24
|
-
|
25
19
|
word=""
|
26
20
|
@resultfile.read.each_line{|line|
|
27
21
|
token=line.chomp.split(" ")
|
@@ -33,21 +27,17 @@ class Kurumi
|
|
33
27
|
elsif token[1]=="I"
|
34
28
|
word+=token[0]
|
35
29
|
else
|
36
|
-
#nil
|
37
30
|
if word!=""
|
38
31
|
@result.push(word)
|
39
32
|
end
|
40
33
|
end
|
41
|
-
# @result.push(line.chomp.split(" ")[0])
|
42
34
|
|
43
35
|
}
|
44
36
|
|
45
37
|
@resultfile.close(true)
|
46
38
|
@tmp.close(true)
|
47
|
-
return @result
|
48
|
-
# puts $?
|
49
39
|
end
|
50
40
|
end
|
51
41
|
|
52
|
-
# result=Cseg.segment("
|
42
|
+
# result=Cseg.segment("屌丝是一种生活态度")
|
53
43
|
# print result
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cseg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,9 +9,9 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-02-
|
12
|
+
date: 2014-02-17 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
|
-
description: ! '"a chinese segmentation tool using CRF"'
|
14
|
+
description: ! '"a chinese segmentation tool using CRF++"'
|
15
15
|
email:
|
16
16
|
- gyorou@tjjtds.com
|
17
17
|
executables: []
|
@@ -48,8 +48,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
48
48
|
version: '0'
|
49
49
|
requirements: []
|
50
50
|
rubyforge_project:
|
51
|
-
rubygems_version: 1.8.
|
51
|
+
rubygems_version: 1.8.28
|
52
52
|
signing_key:
|
53
53
|
specification_version: 3
|
54
|
-
summary: ! '""'
|
54
|
+
summary: ! '"CRF++ should be installed and set in the environment variables"'
|
55
55
|
test_files: []
|