rmmseg-cpp 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/Manifest.txt +40 -0
- data/README.txt +100 -0
- data/Rakefile +19 -0
- data/bin/rmmseg +63 -0
- data/data/chars.dic +12638 -0
- data/data/words.dic +120308 -0
- data/ext/rmmseg/algor.cpp +218 -0
- data/ext/rmmseg/algor.h +75 -0
- data/ext/rmmseg/chunk.h +58 -0
- data/ext/rmmseg/dict.cpp +228 -0
- data/ext/rmmseg/dict.h +34 -0
- data/ext/rmmseg/extconf.rb +10 -0
- data/ext/rmmseg/memory.cpp +9 -0
- data/ext/rmmseg/memory.h +43 -0
- data/ext/rmmseg/rmmseg.cpp +261 -0
- data/ext/rmmseg/rules.h +87 -0
- data/ext/rmmseg/token.h +19 -0
- data/ext/rmmseg/word.h +44 -0
- data/lib/rmmseg.rb +3 -0
- data/lib/rmmseg/dictionary.rb +59 -0
- data/lib/rmmseg/ferret.rb +64 -0
- data/misc/convert.rb +114 -0
- data/misc/ferret_example.rb +59 -0
- data/spec/rmmseg_spec.rb +8 -0
- data/spec/spec_helper.rb +17 -0
- data/tasks/ann.rake +81 -0
- data/tasks/bones.rake +21 -0
- data/tasks/gem.rake +126 -0
- data/tasks/git.rake +41 -0
- data/tasks/manifest.rake +49 -0
- data/tasks/notes.rake +28 -0
- data/tasks/post_load.rake +39 -0
- data/tasks/rdoc.rake +51 -0
- data/tasks/rubyforge.rake +58 -0
- data/tasks/setup.rb +268 -0
- data/tasks/spec.rake +55 -0
- data/tasks/svn.rake +48 -0
- data/tasks/test.rake +38 -0
- data/test/test_rmmseg.rb +0 -0
- metadata +96 -0
data/tasks/spec.rake
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# $Id$
|
2
|
+
|
3
|
+
if HAVE_SPEC_RAKE_SPECTASK
|
4
|
+
require 'spec/rake/verify_rcov'
|
5
|
+
|
6
|
+
namespace :spec do
|
7
|
+
|
8
|
+
desc 'Run all specs with basic output'
|
9
|
+
Spec::Rake::SpecTask.new(:run) do |t|
|
10
|
+
t.ruby_opts = PROJ.ruby_opts
|
11
|
+
t.spec_opts = PROJ.spec.opts
|
12
|
+
t.spec_files = PROJ.spec.files
|
13
|
+
t.libs += PROJ.libs
|
14
|
+
end
|
15
|
+
|
16
|
+
desc 'Run all specs with text output'
|
17
|
+
Spec::Rake::SpecTask.new(:specdoc) do |t|
|
18
|
+
t.ruby_opts = PROJ.ruby_opts
|
19
|
+
t.spec_opts = PROJ.spec.opts + ['--format', 'specdoc']
|
20
|
+
t.spec_files = PROJ.spec.files
|
21
|
+
t.libs += PROJ.libs
|
22
|
+
end
|
23
|
+
|
24
|
+
if HAVE_RCOV
|
25
|
+
desc 'Run all specs with RCov'
|
26
|
+
Spec::Rake::SpecTask.new(:rcov) do |t|
|
27
|
+
t.ruby_opts = PROJ.ruby_opts
|
28
|
+
t.spec_opts = PROJ.spec.opts
|
29
|
+
t.spec_files = PROJ.spec.files
|
30
|
+
t.libs += PROJ.libs
|
31
|
+
t.rcov = true
|
32
|
+
t.rcov_dir = PROJ.rcov.dir
|
33
|
+
t.rcov_opts = PROJ.rcov.opts + ['--exclude', 'spec']
|
34
|
+
end
|
35
|
+
|
36
|
+
RCov::VerifyTask.new(:verify) do |t|
|
37
|
+
t.threshold = PROJ.rcov.threshold
|
38
|
+
t.index_html = File.join(PROJ.rcov.dir, 'index.html')
|
39
|
+
t.require_exact_threshold = PROJ.rcov.threshold_exact
|
40
|
+
end
|
41
|
+
|
42
|
+
task :verify => :rcov
|
43
|
+
remove_desc_for_task %w(spec:clobber_rcov)
|
44
|
+
end
|
45
|
+
|
46
|
+
end # namespace :spec
|
47
|
+
|
48
|
+
desc 'Alias to spec:run'
|
49
|
+
task :spec => 'spec:run'
|
50
|
+
|
51
|
+
task :clobber => 'spec:clobber_rcov' if HAVE_RCOV
|
52
|
+
|
53
|
+
end # if HAVE_SPEC_RAKE_SPECTASK
|
54
|
+
|
55
|
+
# EOF
|
data/tasks/svn.rake
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# $Id$
|
2
|
+
|
3
|
+
if HAVE_SVN
|
4
|
+
|
5
|
+
unless PROJ.svn.root
|
6
|
+
info = %x/svn info ./
|
7
|
+
m = %r/^Repository Root:\s+(.*)$/.match(info)
|
8
|
+
PROJ.svn.root = (m.nil? ? '' : m[1])
|
9
|
+
end
|
10
|
+
PROJ.svn.root = File.join(PROJ.svn.root, PROJ.svn.path) unless PROJ.svn.path.empty?
|
11
|
+
|
12
|
+
namespace :svn do
|
13
|
+
|
14
|
+
# A prerequisites task that all other tasks depend upon
|
15
|
+
task :prereqs
|
16
|
+
|
17
|
+
desc 'Show tags from the SVN repository'
|
18
|
+
task :show_tags => 'svn:prereqs' do |t|
|
19
|
+
tags = %x/svn list #{File.join(PROJ.svn.root, PROJ.svn.tags)}/
|
20
|
+
tags.gsub!(%r/\/$/, '')
|
21
|
+
tags = tags.split("\n").sort {|a,b| b <=> a}
|
22
|
+
puts tags
|
23
|
+
end
|
24
|
+
|
25
|
+
desc 'Create a new tag in the SVN repository'
|
26
|
+
task :create_tag => 'svn:prereqs' do |t|
|
27
|
+
v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
|
28
|
+
abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
|
29
|
+
|
30
|
+
svn = PROJ.svn
|
31
|
+
trunk = File.join(svn.root, svn.trunk)
|
32
|
+
tag = "%s-%s" % [PROJ.name, PROJ.version]
|
33
|
+
tag = File.join(svn.root, svn.tags, tag)
|
34
|
+
msg = "Creating tag for #{PROJ.name} version #{PROJ.version}"
|
35
|
+
|
36
|
+
puts "Creating SVN tag '#{tag}'"
|
37
|
+
unless system "svn cp -m '#{msg}' #{trunk} #{tag}"
|
38
|
+
abort "Tag creation failed"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end # namespace :svn
|
43
|
+
|
44
|
+
task 'gem:release' => 'svn:create_tag'
|
45
|
+
|
46
|
+
end # if PROJ.svn.path
|
47
|
+
|
48
|
+
# EOF
|
data/tasks/test.rake
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# $Id$
|
2
|
+
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
namespace :test do
|
6
|
+
|
7
|
+
Rake::TestTask.new(:run) do |t|
|
8
|
+
t.libs = PROJ.libs
|
9
|
+
t.test_files = if test(?f, PROJ.test.file) then [PROJ.test.file]
|
10
|
+
else PROJ.test.files end
|
11
|
+
t.ruby_opts += PROJ.ruby_opts
|
12
|
+
t.ruby_opts += PROJ.test.opts
|
13
|
+
end
|
14
|
+
|
15
|
+
if HAVE_RCOV
|
16
|
+
desc 'Run rcov on the unit tests'
|
17
|
+
task :rcov => :clobber_rcov do
|
18
|
+
opts = PROJ.rcov.opts.dup << '-o' << PROJ.rcov.dir
|
19
|
+
opts = opts.join(' ')
|
20
|
+
files = if test(?f, PROJ.test.file) then [PROJ.test.file]
|
21
|
+
else PROJ.test.files end
|
22
|
+
files = files.join(' ')
|
23
|
+
sh "#{RCOV} #{files} #{opts}"
|
24
|
+
end
|
25
|
+
|
26
|
+
task :clobber_rcov do
|
27
|
+
rm_r 'coverage' rescue nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end # namespace :test
|
32
|
+
|
33
|
+
desc 'Alias to test:run'
|
34
|
+
task :test => 'test:run'
|
35
|
+
|
36
|
+
task :clobber => 'test:clobber_rcov' if HAVE_RCOV
|
37
|
+
|
38
|
+
# EOF
|
data/test/test_rmmseg.rb
ADDED
File without changes
|
metadata
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rmmseg-cpp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.5
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- pluskid
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-06-07 00:00:00 -04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: rmmseg-cpp is a high performance Chinese word segmentation utility for Ruby. It features full "Ferret":http://ferret.davebalmain.com/ integration as well as support for normal Ruby program usage. rmmseg-cpp is a re-written of the original RMMSeg(http://rmmseg.rubyforge.org/) gem in C++. RMMSeg is written in pure Ruby. Though I tried hard to tweak RMMSeg, it just consumes lots of memory and the segmenting process is rather slow. The interface is almost identical to RMMSeg but the performance is much better. This gem is always preferable in production use. However, if you want to understand how the MMSEG segmenting algorithm works, the source code of RMMSeg is a better choice than this.
|
17
|
+
email: pluskid@gmail.com
|
18
|
+
executables:
|
19
|
+
- rmmseg
|
20
|
+
extensions:
|
21
|
+
- ext/rmmseg/extconf.rb
|
22
|
+
extra_rdoc_files:
|
23
|
+
- History.txt
|
24
|
+
- README.txt
|
25
|
+
- bin/rmmseg
|
26
|
+
files:
|
27
|
+
- History.txt
|
28
|
+
- Manifest.txt
|
29
|
+
- README.txt
|
30
|
+
- Rakefile
|
31
|
+
- bin/rmmseg
|
32
|
+
- data/chars.dic
|
33
|
+
- data/words.dic
|
34
|
+
- ext/rmmseg/algor.cpp
|
35
|
+
- ext/rmmseg/algor.h
|
36
|
+
- ext/rmmseg/chunk.h
|
37
|
+
- ext/rmmseg/dict.cpp
|
38
|
+
- ext/rmmseg/dict.h
|
39
|
+
- ext/rmmseg/extconf.rb
|
40
|
+
- ext/rmmseg/memory.cpp
|
41
|
+
- ext/rmmseg/memory.h
|
42
|
+
- ext/rmmseg/rmmseg.cpp
|
43
|
+
- ext/rmmseg/rules.h
|
44
|
+
- ext/rmmseg/token.h
|
45
|
+
- ext/rmmseg/word.h
|
46
|
+
- lib/rmmseg.rb
|
47
|
+
- lib/rmmseg/dictionary.rb
|
48
|
+
- lib/rmmseg/ferret.rb
|
49
|
+
- misc/convert.rb
|
50
|
+
- misc/ferret_example.rb
|
51
|
+
- spec/rmmseg_spec.rb
|
52
|
+
- spec/spec_helper.rb
|
53
|
+
- tasks/ann.rake
|
54
|
+
- tasks/bones.rake
|
55
|
+
- tasks/gem.rake
|
56
|
+
- tasks/git.rake
|
57
|
+
- tasks/manifest.rake
|
58
|
+
- tasks/notes.rake
|
59
|
+
- tasks/post_load.rake
|
60
|
+
- tasks/rdoc.rake
|
61
|
+
- tasks/rubyforge.rake
|
62
|
+
- tasks/setup.rb
|
63
|
+
- tasks/spec.rake
|
64
|
+
- tasks/svn.rake
|
65
|
+
- tasks/test.rake
|
66
|
+
- test/test_rmmseg.rb
|
67
|
+
has_rdoc: true
|
68
|
+
homepage: http://rmmseg-cpp.rubyforge.org
|
69
|
+
post_install_message:
|
70
|
+
rdoc_options:
|
71
|
+
- --main
|
72
|
+
- README.txt
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
- ext
|
76
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: "0"
|
81
|
+
version:
|
82
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: "0"
|
87
|
+
version:
|
88
|
+
requirements: []
|
89
|
+
|
90
|
+
rubyforge_project: rmmseg-cpp
|
91
|
+
rubygems_version: 1.1.1
|
92
|
+
signing_key:
|
93
|
+
specification_version: 2
|
94
|
+
summary: rmmseg-cpp is a high performance Chinese word segmentation utility for Ruby
|
95
|
+
test_files:
|
96
|
+
- test/test_rmmseg.rb
|