cseg 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +7 -16
- data/cseg.gemspec +2 -2
- data/lib/cseg/version.rb +1 -1
- data/lib/cseg.rb +4 -14
- metadata +5 -5
data/README.md
CHANGED
@@ -20,26 +20,17 @@ Or install it yourself as:
|
|
20
20
|
|
21
21
|
you need to install CRF++ first and set the environment environment variables.
|
22
22
|
|
23
|
-
Here is the site of CRF
|
23
|
+
Here is the site of CRF++<http://crfpp.googlecode.com/svn/trunk/doc/index.html> and you should follow the manual
|
24
|
+
|
25
|
+
On github the dictionary file was deleted since it is quite large, though you can get all from rubygems.
|
24
26
|
|
25
27
|
## Usage
|
26
28
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
=>屌丝
|
29
|
+
require "cseg"
|
30
|
+
a=Kurumi.segment("屌丝是一种自我讽刺")
|
31
|
+
|
32
|
+
=>屌丝/是/一/种/自我/讽刺
|
32
33
|
|
33
|
-
是
|
34
|
-
|
35
|
-
一
|
36
|
-
|
37
|
-
种
|
38
|
-
|
39
|
-
自我
|
40
|
-
|
41
|
-
讽刺
|
42
|
-
|
43
34
|
the result will be an array.
|
44
35
|
|
45
36
|
|
data/cseg.gemspec
CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |gem|
|
|
8
8
|
gem.version = Cseg::VERSION
|
9
9
|
gem.authors = ["gyorou"]
|
10
10
|
gem.email = ["gyorou@tjjtds.com"]
|
11
|
-
gem.description = %q{"a chinese segmentation tool using CRF"}
|
12
|
-
gem.summary = %q{""}
|
11
|
+
gem.description = %q{"a chinese segmentation tool using CRF++"}
|
12
|
+
gem.summary = %q{"CRF++ should be installed and set in the environment variables"}
|
13
13
|
gem.homepage = ""
|
14
14
|
|
15
15
|
gem.files = [".gitignore",
|
data/lib/cseg/version.rb
CHANGED
data/lib/cseg.rb
CHANGED
@@ -3,25 +3,19 @@ require "cseg/version"
|
|
3
3
|
require "tempfile"
|
4
4
|
class Kurumi
|
5
5
|
# since crf++ can only read from file
|
6
|
-
#@tmpfile.read{|file| file=nil}
|
7
6
|
@modle=File.expand_path("../../data/pkumodle.data", __FILE__)
|
8
|
-
@result=Array.new
|
9
7
|
def self.segment(str)
|
10
8
|
tmpstr=""
|
11
9
|
for i in 0..str.length-1
|
12
10
|
tmpstr+=str[i]+"\n"
|
13
11
|
end
|
14
|
-
|
15
|
-
|
16
|
-
@resultfile=Tempfile::new("result")
|
12
|
+
@tmp=Tempfile::new("tmp")
|
13
|
+
@resultfile=Tempfile::new("result")
|
17
14
|
@tmp.write(tmpstr)
|
18
15
|
@tmp.rewind
|
19
|
-
|
16
|
+
@result=Array.new
|
20
17
|
system("crf_test -m #{@modle} #{@tmp.path}>#{@resultfile.path}")
|
21
18
|
@resultfile.rewind
|
22
|
-
# puts @resultfile.read
|
23
|
-
# puts @tmp.read
|
24
|
-
|
25
19
|
word=""
|
26
20
|
@resultfile.read.each_line{|line|
|
27
21
|
token=line.chomp.split(" ")
|
@@ -33,21 +27,17 @@ class Kurumi
|
|
33
27
|
elsif token[1]=="I"
|
34
28
|
word+=token[0]
|
35
29
|
else
|
36
|
-
#nil
|
37
30
|
if word!=""
|
38
31
|
@result.push(word)
|
39
32
|
end
|
40
33
|
end
|
41
|
-
# @result.push(line.chomp.split(" ")[0])
|
42
34
|
|
43
35
|
}
|
44
36
|
|
45
37
|
@resultfile.close(true)
|
46
38
|
@tmp.close(true)
|
47
|
-
return @result
|
48
|
-
# puts $?
|
49
39
|
end
|
50
40
|
end
|
51
41
|
|
52
|
-
# result=Cseg.segment("
|
42
|
+
# result=Cseg.segment("屌丝是一种生活态度")
|
53
43
|
# print result
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cseg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,9 +9,9 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-02-
|
12
|
+
date: 2014-02-17 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
|
-
description: ! '"a chinese segmentation tool using CRF"'
|
14
|
+
description: ! '"a chinese segmentation tool using CRF++"'
|
15
15
|
email:
|
16
16
|
- gyorou@tjjtds.com
|
17
17
|
executables: []
|
@@ -48,8 +48,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
48
48
|
version: '0'
|
49
49
|
requirements: []
|
50
50
|
rubyforge_project:
|
51
|
-
rubygems_version: 1.8.
|
51
|
+
rubygems_version: 1.8.28
|
52
52
|
signing_key:
|
53
53
|
specification_version: 3
|
54
|
-
summary: ! '""'
|
54
|
+
summary: ! '"CRF++ should be installed and set in the environment variables"'
|
55
55
|
test_files: []
|