cseg 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cseg.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 gyorou
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # Cseg
2
+
3
+ Use MIRA to train a large amount of features.
4
+
5
+ Segment chinese sentences into words in high speed and correctly.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'cseg'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install cseg
20
+
21
+ you need to install CRF++ first and set the environment environment variables.
22
+
23
+ Here is the site of CRF++ and you should follow the manual
24
+
25
+ ## Usage
26
+
27
+ require "cseg"
28
+
29
+ a=Kurumi.segment("屌丝是一种自我讽刺")
30
+
31
+ =>屌丝
32
+
33
+
34
+
35
+
36
+
37
+
38
+
39
+ 自我
40
+
41
+ 讽刺
42
+
43
+ the result will be an array.
44
+
45
+
46
+ ## Contributing
47
+
48
+ 1. Fork it
49
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
50
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
51
+ 4. Push to the branch (`git push origin my-new-feature`)
52
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/cseg.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cseg/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "cseg"
8
+ gem.version = Cseg::VERSION
9
+ gem.authors = ["gyorou"]
10
+ gem.email = ["gyorou@tjjtds.com"]
11
+ gem.description = %q{"a chinese segmentation tool using CRF"}
12
+ gem.summary = %q{""}
13
+ gem.homepage = ""
14
+
15
+ gem.files = [".gitignore",
16
+ "LICENSE.txt",
17
+ "README.md",
18
+ "Gemfile",
19
+ "data/pkumodle.data",
20
+ "lib/cseg/version.rb",
21
+ "lib/cseg.rb",
22
+ "cseg.gemspec",
23
+ "Rakefile",
24
+ ]
25
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
26
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
27
+ gem.require_paths = ["lib","data"]
28
+ end
Binary file
@@ -0,0 +1,3 @@
1
+ module Cseg
2
+ VERSION = "0.0.2"
3
+ end
data/lib/cseg.rb ADDED
@@ -0,0 +1,53 @@
1
+ # encoding:utf-8
2
+ require "cseg/version"
3
+ require "tempfile"
4
+ class Kurumi
5
+ # since crf++ can only read from file
6
+ #@tmpfile.read{|file| file=nil}
7
+ @modle=File.expand_path("../../data/pkumodle.data", __FILE__)
8
+ @result=Array.new
9
+ def self.segment(str)
10
+ tmpstr=""
11
+ for i in 0..str.length-1
12
+ tmpstr+=str[i]+"\n"
13
+ end
14
+ #@tmpfile=nil
15
+ @tmp=Tempfile::new("tmp")
16
+ @resultfile=Tempfile::new("result")
17
+ @tmp.write(tmpstr)
18
+ @tmp.rewind
19
+ #@tmpfile.close
20
+ system("crf_test -m #{@modle} #{@tmp.path}>#{@resultfile.path}")
21
+ @resultfile.rewind
22
+ # puts @resultfile.read
23
+ # puts @tmp.read
24
+
25
+ word=""
26
+ @resultfile.read.each_line{|line|
27
+ token=line.chomp.split(" ")
28
+ if token[1]=="B"or token[1]=="O"
29
+ if word!=""
30
+ @result.push(word)
31
+ end
32
+ word=token[0]
33
+ elsif token[1]=="I"
34
+ word+=token[0]
35
+ else
36
+ #nil
37
+ if word!=""
38
+ @result.push(word)
39
+ end
40
+ end
41
+ # @result.push(line.chomp.split(" ")[0])
42
+
43
+ }
44
+
45
+ @resultfile.close(true)
46
+ @tmp.close(true)
47
+ return @result
48
+ # puts $?
49
+ end
50
+ end
51
+
52
+ # result=Cseg.segment("���?�z??�܁C���ꊼ�k??�I??��")
53
+ # print result
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cseg
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - gyorou
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-02-13 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: ! '"a chinese segmentation tool using CRF"'
15
+ email:
16
+ - gyorou@tjjtds.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .gitignore
22
+ - LICENSE.txt
23
+ - README.md
24
+ - Gemfile
25
+ - data/pkumodle.data
26
+ - lib/cseg/version.rb
27
+ - lib/cseg.rb
28
+ - cseg.gemspec
29
+ - Rakefile
30
+ homepage: ''
31
+ licenses: []
32
+ post_install_message:
33
+ rdoc_options: []
34
+ require_paths:
35
+ - lib
36
+ - data
37
+ required_ruby_version: !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ! '>='
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ required_rubygems_version: !ruby/object:Gem::Requirement
44
+ none: false
45
+ requirements:
46
+ - - ! '>='
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ requirements: []
50
+ rubyforge_project:
51
+ rubygems_version: 1.8.24
52
+ signing_key:
53
+ specification_version: 3
54
+ summary: ! '""'
55
+ test_files: []