cseg 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/README.md +7 -16
  2. data/cseg.gemspec +2 -2
  3. data/lib/cseg/version.rb +1 -1
  4. data/lib/cseg.rb +4 -14
  5. metadata +5 -5
data/README.md CHANGED
@@ -20,26 +20,17 @@ Or install it yourself as:
20
20
 
21
21
  you need to install CRF++ first and set the environment environment variables.
22
22
 
23
- Here is the site of CRF++ and you should follow the manual
23
+ Here is the site of CRF++<http://crfpp.googlecode.com/svn/trunk/doc/index.html> and you should follow the manual
24
+
25
+ On github the dictionary file was deleted since it is quite large, though you can get all from rubygems.
24
26
 
25
27
  ## Usage
26
28
 
27
- require "cseg"
28
-
29
- a=Kurumi.segment("屌丝是一种自我讽刺")
30
-
31
- =>屌丝
29
+ require "cseg"
30
+ a=Kurumi.segment("屌丝是一种自我讽刺")
31
+
32
+ =>屌丝/是/一/种/自我/讽刺
32
33
 
33
-
34
-
35
-
36
-
37
-
38
-
39
- 自我
40
-
41
- 讽刺
42
-
43
34
  the result will be an array.
44
35
 
45
36
 
data/cseg.gemspec CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |gem|
8
8
  gem.version = Cseg::VERSION
9
9
  gem.authors = ["gyorou"]
10
10
  gem.email = ["gyorou@tjjtds.com"]
11
- gem.description = %q{"a chinese segmentation tool using CRF"}
12
- gem.summary = %q{""}
11
+ gem.description = %q{"a chinese segmentation tool using CRF++"}
12
+ gem.summary = %q{"CRF++ should be installed and set in the environment variables"}
13
13
  gem.homepage = ""
14
14
 
15
15
  gem.files = [".gitignore",
data/lib/cseg/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Cseg
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/cseg.rb CHANGED
@@ -3,25 +3,19 @@ require "cseg/version"
3
3
  require "tempfile"
4
4
  class Kurumi
5
5
  # since crf++ can only read from file
6
- #@tmpfile.read{|file| file=nil}
7
6
  @modle=File.expand_path("../../data/pkumodle.data", __FILE__)
8
- @result=Array.new
9
7
  def self.segment(str)
10
8
  tmpstr=""
11
9
  for i in 0..str.length-1
12
10
  tmpstr+=str[i]+"\n"
13
11
  end
14
- #@tmpfile=nil
15
- @tmp=Tempfile::new("tmp")
16
- @resultfile=Tempfile::new("result")
12
+ @tmp=Tempfile::new("tmp")
13
+ @resultfile=Tempfile::new("result")
17
14
  @tmp.write(tmpstr)
18
15
  @tmp.rewind
19
- #@tmpfile.close
16
+ @result=Array.new
20
17
  system("crf_test -m #{@modle} #{@tmp.path}>#{@resultfile.path}")
21
18
  @resultfile.rewind
22
- # puts @resultfile.read
23
- # puts @tmp.read
24
-
25
19
  word=""
26
20
  @resultfile.read.each_line{|line|
27
21
  token=line.chomp.split(" ")
@@ -33,21 +27,17 @@ class Kurumi
33
27
  elsif token[1]=="I"
34
28
  word+=token[0]
35
29
  else
36
- #nil
37
30
  if word!=""
38
31
  @result.push(word)
39
32
  end
40
33
  end
41
- # @result.push(line.chomp.split(" ")[0])
42
34
 
43
35
  }
44
36
 
45
37
  @resultfile.close(true)
46
38
  @tmp.close(true)
47
- return @result
48
- # puts $?
49
39
  end
50
40
  end
51
41
 
52
- # result=Cseg.segment("���?�z??�܁C���ꊼ�k??�I??��")
42
+ # result=Cseg.segment("屌丝是一种生活态度")
53
43
  # print result
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cseg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,9 +9,9 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-02-13 00:00:00.000000000 Z
12
+ date: 2014-02-17 00:00:00.000000000 Z
13
13
  dependencies: []
14
- description: ! '"a chinese segmentation tool using CRF"'
14
+ description: ! '"a chinese segmentation tool using CRF++"'
15
15
  email:
16
16
  - gyorou@tjjtds.com
17
17
  executables: []
@@ -48,8 +48,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
48
48
  version: '0'
49
49
  requirements: []
50
50
  rubyforge_project:
51
- rubygems_version: 1.8.24
51
+ rubygems_version: 1.8.28
52
52
  signing_key:
53
53
  specification_version: 3
54
- summary: ! '""'
54
+ summary: ! '"CRF++ should be installed and set in the environment variables"'
55
55
  test_files: []