cseg 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +7 -16
 - data/cseg.gemspec +2 -2
 - data/lib/cseg/version.rb +1 -1
 - data/lib/cseg.rb +4 -14
 - metadata +5 -5
 
    
        data/README.md
    CHANGED
    
    | 
         @@ -20,26 +20,17 @@ Or install it yourself as: 
     | 
|
| 
       20 
20 
     | 
    
         | 
| 
       21 
21 
     | 
    
         
             
            you need to install CRF++ first and set the environment environment variables.
         
     | 
| 
       22 
22 
     | 
    
         | 
| 
       23 
     | 
    
         
            -
            Here is the site of CRF 
     | 
| 
      
 23 
     | 
    
         
            +
            Here is the site of CRF++<http://crfpp.googlecode.com/svn/trunk/doc/index.html> and you should follow the manual
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            On github the dictionary file was deleted since it is quite large, though you can get all from rubygems.
         
     | 
| 
       24 
26 
     | 
    
         | 
| 
       25 
27 
     | 
    
         
             
            ## Usage
         
     | 
| 
       26 
28 
     | 
    
         | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
            	=>屌丝
         
     | 
| 
      
 29 
     | 
    
         
            +
                require "cseg"
         
     | 
| 
      
 30 
     | 
    
         
            +
                a=Kurumi.segment("屌丝是一种自我讽刺")
         
     | 
| 
      
 31 
     | 
    
         
            +
                
         
     | 
| 
      
 32 
     | 
    
         
            +
                =>屌丝/是/一/种/自我/讽刺
         
     | 
| 
       32 
33 
     | 
    
         | 
| 
       33 
     | 
    
         
            -
            	  是
         
     | 
| 
       34 
     | 
    
         
            -
            	  
         
     | 
| 
       35 
     | 
    
         
            -
            	  一
         
     | 
| 
       36 
     | 
    
         
            -
            	  
         
     | 
| 
       37 
     | 
    
         
            -
            	  种
         
     | 
| 
       38 
     | 
    
         
            -
            	  
         
     | 
| 
       39 
     | 
    
         
            -
            	  自我
         
     | 
| 
       40 
     | 
    
         
            -
            	  
         
     | 
| 
       41 
     | 
    
         
            -
            	  讽刺
         
     | 
| 
       42 
     | 
    
         
            -
            	  
         
     | 
| 
       43 
34 
     | 
    
         
             
            	the result will be an array.
         
     | 
| 
       44 
35 
     | 
    
         | 
| 
       45 
36 
     | 
    
         | 
    
        data/cseg.gemspec
    CHANGED
    
    | 
         @@ -8,8 +8,8 @@ Gem::Specification.new do |gem| 
     | 
|
| 
       8 
8 
     | 
    
         
             
              gem.version       = Cseg::VERSION
         
     | 
| 
       9 
9 
     | 
    
         
             
              gem.authors       = ["gyorou"]
         
     | 
| 
       10 
10 
     | 
    
         
             
              gem.email         = ["gyorou@tjjtds.com"]
         
     | 
| 
       11 
     | 
    
         
            -
              gem.description   = %q{"a chinese segmentation tool using CRF"}
         
     | 
| 
       12 
     | 
    
         
            -
              gem.summary       = %q{""}
         
     | 
| 
      
 11 
     | 
    
         
            +
              gem.description   = %q{"a chinese segmentation tool using CRF++"}
         
     | 
| 
      
 12 
     | 
    
         
            +
              gem.summary       = %q{"CRF++ should be installed and set in the environment variables"}
         
     | 
| 
       13 
13 
     | 
    
         
             
              gem.homepage      = ""
         
     | 
| 
       14 
14 
     | 
    
         | 
| 
       15 
15 
     | 
    
         
             
              gem.files         = [".gitignore",
         
     | 
    
        data/lib/cseg/version.rb
    CHANGED
    
    
    
        data/lib/cseg.rb
    CHANGED
    
    | 
         @@ -3,25 +3,19 @@ require "cseg/version" 
     | 
|
| 
       3 
3 
     | 
    
         
             
            require "tempfile"
         
     | 
| 
       4 
4 
     | 
    
         
             
            class Kurumi
         
     | 
| 
       5 
5 
     | 
    
         
             
            	# since crf++ can only read from file
         
     | 
| 
       6 
     | 
    
         
            -
            	#@tmpfile.read{|file| file=nil}
         
     | 
| 
       7 
6 
     | 
    
         
             
            	@modle=File.expand_path("../../data/pkumodle.data", __FILE__)
         
     | 
| 
       8 
     | 
    
         
            -
            	@result=Array.new
         
     | 
| 
       9 
7 
     | 
    
         
             
            	def self.segment(str)
         
     | 
| 
       10 
8 
     | 
    
         
             
            		tmpstr=""
         
     | 
| 
       11 
9 
     | 
    
         
             
            		for i in 0..str.length-1
         
     | 
| 
       12 
10 
     | 
    
         
             
            			tmpstr+=str[i]+"\n"
         
     | 
| 
       13 
11 
     | 
    
         
             
            		end
         
     | 
| 
       14 
     | 
    
         
            -
            		 
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
            	@resultfile=Tempfile::new("result")
         
     | 
| 
      
 12 
     | 
    
         
            +
            		@tmp=Tempfile::new("tmp")
         
     | 
| 
      
 13 
     | 
    
         
            +
            		@resultfile=Tempfile::new("result")
         
     | 
| 
       17 
14 
     | 
    
         
             
            		@tmp.write(tmpstr)
         
     | 
| 
       18 
15 
     | 
    
         
             
            		@tmp.rewind
         
     | 
| 
       19 
     | 
    
         
            -
            		 
     | 
| 
      
 16 
     | 
    
         
            +
            		@result=Array.new
         
     | 
| 
       20 
17 
     | 
    
         
             
            		system("crf_test -m #{@modle} #{@tmp.path}>#{@resultfile.path}")
         
     | 
| 
       21 
18 
     | 
    
         
             
            		@resultfile.rewind
         
     | 
| 
       22 
     | 
    
         
            -
            	#	puts @resultfile.read
         
     | 
| 
       23 
     | 
    
         
            -
            	#	puts @tmp.read
         
     | 
| 
       24 
     | 
    
         
            -
            		
         
     | 
| 
       25 
19 
     | 
    
         
             
            		word=""
         
     | 
| 
       26 
20 
     | 
    
         
             
            		@resultfile.read.each_line{|line|
         
     | 
| 
       27 
21 
     | 
    
         
             
            			token=line.chomp.split("	")
         
     | 
| 
         @@ -33,21 +27,17 @@ class Kurumi 
     | 
|
| 
       33 
27 
     | 
    
         
             
            			elsif token[1]=="I"
         
     | 
| 
       34 
28 
     | 
    
         
             
            				word+=token[0]
         
     | 
| 
       35 
29 
     | 
    
         
             
            			else
         
     | 
| 
       36 
     | 
    
         
            -
            				#nil
         
     | 
| 
       37 
30 
     | 
    
         
             
            				if word!=""
         
     | 
| 
       38 
31 
     | 
    
         
             
            					@result.push(word)
         
     | 
| 
       39 
32 
     | 
    
         
             
            				end
         
     | 
| 
       40 
33 
     | 
    
         
             
            			end
         
     | 
| 
       41 
     | 
    
         
            -
            			# @result.push(line.chomp.split("	")[0])
         
     | 
| 
       42 
34 
     | 
    
         | 
| 
       43 
35 
     | 
    
         
             
            		}
         
     | 
| 
       44 
36 
     | 
    
         | 
| 
       45 
37 
     | 
    
         
             
            		@resultfile.close(true)
         
     | 
| 
       46 
38 
     | 
    
         
             
            		@tmp.close(true)
         
     | 
| 
       47 
     | 
    
         
            -
            		return @result
         
     | 
| 
       48 
     | 
    
         
            -
            		# puts $?
         
     | 
| 
       49 
39 
     | 
    
         
             
            	end
         
     | 
| 
       50 
40 
     | 
    
         
             
            end
         
     | 
| 
       51 
41 
     | 
    
         | 
| 
       52 
     | 
    
         
            -
            # result=Cseg.segment(" 
     | 
| 
      
 42 
     | 
    
         
            +
            # result=Cseg.segment("屌丝是一种生活态度")
         
     | 
| 
       53 
43 
     | 
    
         
             
            # print result
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: cseg
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.0.3
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       7 
7 
     | 
    
         
             
            authors:
         
     | 
| 
         @@ -9,9 +9,9 @@ authors: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       10 
10 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       11 
11 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       12 
     | 
    
         
            -
            date: 2014-02- 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2014-02-17 00:00:00.000000000 Z
         
     | 
| 
       13 
13 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       14 
     | 
    
         
            -
            description: ! '"a chinese segmentation tool using CRF"'
         
     | 
| 
      
 14 
     | 
    
         
            +
            description: ! '"a chinese segmentation tool using CRF++"'
         
     | 
| 
       15 
15 
     | 
    
         
             
            email:
         
     | 
| 
       16 
16 
     | 
    
         
             
            - gyorou@tjjtds.com
         
     | 
| 
       17 
17 
     | 
    
         
             
            executables: []
         
     | 
| 
         @@ -48,8 +48,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       48 
48 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       49 
49 
     | 
    
         
             
            requirements: []
         
     | 
| 
       50 
50 
     | 
    
         
             
            rubyforge_project: 
         
     | 
| 
       51 
     | 
    
         
            -
            rubygems_version: 1.8. 
     | 
| 
      
 51 
     | 
    
         
            +
            rubygems_version: 1.8.28
         
     | 
| 
       52 
52 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       53 
53 
     | 
    
         
             
            specification_version: 3
         
     | 
| 
       54 
     | 
    
         
            -
            summary: ! '""'
         
     | 
| 
      
 54 
     | 
    
         
            +
            summary: ! '"CRF++ should be installed and set in the environment variables"'
         
     | 
| 
       55 
55 
     | 
    
         
             
            test_files: []
         
     |