plexus-rmmseg 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/History.txt +42 -0
  4. data/Manifest.txt +51 -0
  5. data/README.txt +74 -0
  6. data/Rakefile +12 -0
  7. data/TODO.txt +5 -0
  8. data/bin/rmmseg +65 -0
  9. data/data/chars.dic +12638 -0
  10. data/data/custom.dic +12 -0
  11. data/data/punctuation.dic +79 -0
  12. data/data/words.dic +120330 -0
  13. data/lib/rmmseg.rb +13 -0
  14. data/lib/rmmseg/algorithm.rb +136 -0
  15. data/lib/rmmseg/amibguity.rb +4 -0
  16. data/lib/rmmseg/chunk.rb +41 -0
  17. data/lib/rmmseg/complex_algorithm.rb +122 -0
  18. data/lib/rmmseg/config.rb +65 -0
  19. data/lib/rmmseg/dictionary.rb +80 -0
  20. data/lib/rmmseg/ferret.rb +109 -0
  21. data/lib/rmmseg/lawl_rule.rb +12 -0
  22. data/lib/rmmseg/lsdmfocw_rule.rb +13 -0
  23. data/lib/rmmseg/mm_rule.rb +13 -0
  24. data/lib/rmmseg/rule_helper.rb +28 -0
  25. data/lib/rmmseg/simple_algorithm.rb +37 -0
  26. data/lib/rmmseg/svwl_rule.rb +12 -0
  27. data/lib/rmmseg/token.rb +30 -0
  28. data/lib/rmmseg/version.rb +3 -0
  29. data/lib/rmmseg/word.rb +38 -0
  30. data/misc/ferret_example.rb +56 -0
  31. data/misc/homepage.erb +170 -0
  32. data/misc/homepage.html +1214 -0
  33. data/plexus-rmmseg.gemspec +20 -0
  34. data/spec/chunk_spec.rb +25 -0
  35. data/spec/complex_algorithm_spec.rb +18 -0
  36. data/spec/config_spec.rb +12 -0
  37. data/spec/dictionary_spec.rb +20 -0
  38. data/spec/lawl_rule_spec.rb +15 -0
  39. data/spec/lsdmfocw_rule_spec.rb +14 -0
  40. data/spec/mm_rule_spec.rb +15 -0
  41. data/spec/simple_algorithm_spec.rb +46 -0
  42. data/spec/spec_helper.rb +12 -0
  43. data/spec/svwl_rule_spec.rb +14 -0
  44. data/spec/word_spec.rb +9 -0
  45. data/tasks/ann.rake +76 -0
  46. data/tasks/annotations.rake +22 -0
  47. data/tasks/doc.rake +48 -0
  48. data/tasks/gem.rake +110 -0
  49. data/tasks/homepage.rake +12 -0
  50. data/tasks/manifest.rake +49 -0
  51. data/tasks/post_load.rake +26 -0
  52. data/tasks/rubyforge.rake +57 -0
  53. data/tasks/setup.rb +227 -0
  54. data/tasks/spec.rake +54 -0
  55. data/tasks/svn.rake +44 -0
  56. data/tasks/test.rake +38 -0
  57. metadata +121 -0
@@ -0,0 +1,38 @@
1
+ # $Id$
2
+
3
+ require 'rake/testtask'
4
+
5
+ namespace :test do
6
+
7
+ Rake::TestTask.new(:run) do |t|
8
+ t.libs = PROJ.libs
9
+ t.test_files = if test(?f, PROJ.test_file) then [PROJ.test_file]
10
+ else PROJ.tests end
11
+ t.ruby_opts += PROJ.ruby_opts
12
+ t.ruby_opts += PROJ.test_opts
13
+ end
14
+
15
+ if HAVE_RCOV
16
+ desc 'Run rcov on the unit tests'
17
+ task :rcov => :clobber_rcov do
18
+ opts = PROJ.rcov_opts.dup << '-o' << PROJ.rcov_dir
19
+ opts = opts.join(' ')
20
+ files = if test(?f, PROJ.test_file) then [PROJ.test_file]
21
+ else PROJ.tests end
22
+ files = files.join(' ')
23
+ sh "#{RCOV} #{files} #{opts}"
24
+ end
25
+
26
+ task :clobber_rcov do
27
+ rm_r 'coverage' rescue nil
28
+ end
29
+ end
30
+
31
+ end # namespace :test
32
+
33
+ desc 'Alias to test:run'
34
+ task :test => 'test:run'
35
+
36
+ task :clobber => 'test:clobber_rcov' if HAVE_RCOV
37
+
38
+ # EOF
metadata ADDED
@@ -0,0 +1,121 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: plexus-rmmseg
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.6
5
+ platform: ruby
6
+ authors:
7
+ - pluskid
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2008-03-16 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: 'RMMSeg is an implementation of MMSEG Chinese word segmentation algorithm.
14
+ It is based on two variants of maximum matching algorithms. Two algorithms are available
15
+ for using: * simple algorithm that uses only forward maximum matching. * complex
16
+ algorithm that uses three-word chunk maximum matching and 3 additonal rules to solve
17
+ ambiguities. For more information about the algorithm, please refer to the following
18
+ essays: * http://technology.chtsai.org/mmseg/ * http://pluskid.lifegoo.com/?p=261'
19
+ email: pluskid@gmail.com
20
+ executables:
21
+ - rmmseg
22
+ extensions: []
23
+ extra_rdoc_files:
24
+ - History.txt
25
+ - README.txt
26
+ - TODO.txt
27
+ - bin/rmmseg
28
+ files:
29
+ - .gitignore
30
+ - History.txt
31
+ - Manifest.txt
32
+ - README.txt
33
+ - Rakefile
34
+ - TODO.txt
35
+ - bin/rmmseg
36
+ - data/chars.dic
37
+ - data/custom.dic
38
+ - data/punctuation.dic
39
+ - data/words.dic
40
+ - lib/rmmseg.rb
41
+ - lib/rmmseg/algorithm.rb
42
+ - lib/rmmseg/amibguity.rb
43
+ - lib/rmmseg/chunk.rb
44
+ - lib/rmmseg/complex_algorithm.rb
45
+ - lib/rmmseg/config.rb
46
+ - lib/rmmseg/dictionary.rb
47
+ - lib/rmmseg/ferret.rb
48
+ - lib/rmmseg/lawl_rule.rb
49
+ - lib/rmmseg/lsdmfocw_rule.rb
50
+ - lib/rmmseg/mm_rule.rb
51
+ - lib/rmmseg/rule_helper.rb
52
+ - lib/rmmseg/simple_algorithm.rb
53
+ - lib/rmmseg/svwl_rule.rb
54
+ - lib/rmmseg/token.rb
55
+ - lib/rmmseg/version.rb
56
+ - lib/rmmseg/word.rb
57
+ - misc/ferret_example.rb
58
+ - misc/homepage.erb
59
+ - misc/homepage.html
60
+ - plexus-rmmseg.gemspec
61
+ - spec/chunk_spec.rb
62
+ - spec/complex_algorithm_spec.rb
63
+ - spec/config_spec.rb
64
+ - spec/dictionary_spec.rb
65
+ - spec/lawl_rule_spec.rb
66
+ - spec/lsdmfocw_rule_spec.rb
67
+ - spec/mm_rule_spec.rb
68
+ - spec/simple_algorithm_spec.rb
69
+ - spec/spec_helper.rb
70
+ - spec/svwl_rule_spec.rb
71
+ - spec/word_spec.rb
72
+ - tasks/ann.rake
73
+ - tasks/annotations.rake
74
+ - tasks/doc.rake
75
+ - tasks/gem.rake
76
+ - tasks/homepage.rake
77
+ - tasks/manifest.rake
78
+ - tasks/post_load.rake
79
+ - tasks/rubyforge.rake
80
+ - tasks/setup.rb
81
+ - tasks/spec.rake
82
+ - tasks/svn.rake
83
+ - tasks/test.rake
84
+ homepage: http://rmmseg.rubyforge.org
85
+ licenses:
86
+ - MIT
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options:
90
+ - --main
91
+ - README.txt
92
+ require_paths:
93
+ - lib
94
+ required_ruby_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ required_rubygems_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project:
106
+ rubygems_version: 2.2.1
107
+ signing_key:
108
+ specification_version: 4
109
+ summary: RMMSeg is an implementation of MMSEG Chinese word segmentation algorithm
110
+ test_files:
111
+ - spec/chunk_spec.rb
112
+ - spec/complex_algorithm_spec.rb
113
+ - spec/config_spec.rb
114
+ - spec/dictionary_spec.rb
115
+ - spec/lawl_rule_spec.rb
116
+ - spec/lsdmfocw_rule_spec.rb
117
+ - spec/mm_rule_spec.rb
118
+ - spec/simple_algorithm_spec.rb
119
+ - spec/spec_helper.rb
120
+ - spec/svwl_rule_spec.rb
121
+ - spec/word_spec.rb