cseg 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/README.md +7 -16
  2. data/cseg.gemspec +2 -2
  3. data/lib/cseg/version.rb +1 -1
  4. data/lib/cseg.rb +4 -14
  5. metadata +5 -5
data/README.md CHANGED
@@ -20,26 +20,17 @@ Or install it yourself as:
20
20
 
21
21
  you need to install CRF++ first and set the environment environment variables.
22
22
 
23
- Here is the site of CRF++ and you should follow the manual
23
+ Here is the site of CRF++<http://crfpp.googlecode.com/svn/trunk/doc/index.html> and you should follow the manual
24
+
25
+ On github the dictionary file was deleted since it is quite large, though you can get all from rubygems.
24
26
 
25
27
  ## Usage
26
28
 
27
- require "cseg"
28
-
29
- a=Kurumi.segment("屌丝是一种自我讽刺")
30
-
31
- =>屌丝
29
+ require "cseg"
30
+ a=Kurumi.segment("屌丝是一种自我讽刺")
31
+
32
+ =>屌丝/是/一/种/自我/讽刺
32
33
 
33
-
34
-
35
-
36
-
37
-
38
-
39
- 自我
40
-
41
- 讽刺
42
-
43
34
  the result will be an array.
44
35
 
45
36
 
data/cseg.gemspec CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |gem|
8
8
  gem.version = Cseg::VERSION
9
9
  gem.authors = ["gyorou"]
10
10
  gem.email = ["gyorou@tjjtds.com"]
11
- gem.description = %q{"a chinese segmentation tool using CRF"}
12
- gem.summary = %q{""}
11
+ gem.description = %q{"a chinese segmentation tool using CRF++"}
12
+ gem.summary = %q{"CRF++ should be installed and set in the environment variables"}
13
13
  gem.homepage = ""
14
14
 
15
15
  gem.files = [".gitignore",
data/lib/cseg/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Cseg
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/cseg.rb CHANGED
@@ -3,25 +3,19 @@ require "cseg/version"
3
3
  require "tempfile"
4
4
  class Kurumi
5
5
  # since crf++ can only read from file
6
- #@tmpfile.read{|file| file=nil}
7
6
  @modle=File.expand_path("../../data/pkumodle.data", __FILE__)
8
- @result=Array.new
9
7
  def self.segment(str)
10
8
  tmpstr=""
11
9
  for i in 0..str.length-1
12
10
  tmpstr+=str[i]+"\n"
13
11
  end
14
- #@tmpfile=nil
15
- @tmp=Tempfile::new("tmp")
16
- @resultfile=Tempfile::new("result")
12
+ @tmp=Tempfile::new("tmp")
13
+ @resultfile=Tempfile::new("result")
17
14
  @tmp.write(tmpstr)
18
15
  @tmp.rewind
19
- #@tmpfile.close
16
+ @result=Array.new
20
17
  system("crf_test -m #{@modle} #{@tmp.path}>#{@resultfile.path}")
21
18
  @resultfile.rewind
22
- # puts @resultfile.read
23
- # puts @tmp.read
24
-
25
19
  word=""
26
20
  @resultfile.read.each_line{|line|
27
21
  token=line.chomp.split(" ")
@@ -33,21 +27,17 @@ class Kurumi
33
27
  elsif token[1]=="I"
34
28
  word+=token[0]
35
29
  else
36
- #nil
37
30
  if word!=""
38
31
  @result.push(word)
39
32
  end
40
33
  end
41
- # @result.push(line.chomp.split(" ")[0])
42
34
 
43
35
  }
44
36
 
45
37
  @resultfile.close(true)
46
38
  @tmp.close(true)
47
- return @result
48
- # puts $?
49
39
  end
50
40
  end
51
41
 
52
- # result=Cseg.segment("���?�z??�܁C���ꊼ�k??�I??��")
42
+ # result=Cseg.segment("屌丝是一种生活态度")
53
43
  # print result
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cseg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,9 +9,9 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-02-13 00:00:00.000000000 Z
12
+ date: 2014-02-17 00:00:00.000000000 Z
13
13
  dependencies: []
14
- description: ! '"a chinese segmentation tool using CRF"'
14
+ description: ! '"a chinese segmentation tool using CRF++"'
15
15
  email:
16
16
  - gyorou@tjjtds.com
17
17
  executables: []
@@ -48,8 +48,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
48
48
  version: '0'
49
49
  requirements: []
50
50
  rubyforge_project:
51
- rubygems_version: 1.8.24
51
+ rubygems_version: 1.8.28
52
52
  signing_key:
53
53
  specification_version: 3
54
- summary: ! '""'
54
+ summary: ! '"CRF++ should be installed and set in the environment variables"'
55
55
  test_files: []