plexus-rmmseg 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/History.txt +42 -0
  4. data/Manifest.txt +51 -0
  5. data/README.txt +74 -0
  6. data/Rakefile +12 -0
  7. data/TODO.txt +5 -0
  8. data/bin/rmmseg +65 -0
  9. data/data/chars.dic +12638 -0
  10. data/data/custom.dic +12 -0
  11. data/data/punctuation.dic +79 -0
  12. data/data/words.dic +120330 -0
  13. data/lib/rmmseg.rb +13 -0
  14. data/lib/rmmseg/algorithm.rb +136 -0
  15. data/lib/rmmseg/amibguity.rb +4 -0
  16. data/lib/rmmseg/chunk.rb +41 -0
  17. data/lib/rmmseg/complex_algorithm.rb +122 -0
  18. data/lib/rmmseg/config.rb +65 -0
  19. data/lib/rmmseg/dictionary.rb +80 -0
  20. data/lib/rmmseg/ferret.rb +109 -0
  21. data/lib/rmmseg/lawl_rule.rb +12 -0
  22. data/lib/rmmseg/lsdmfocw_rule.rb +13 -0
  23. data/lib/rmmseg/mm_rule.rb +13 -0
  24. data/lib/rmmseg/rule_helper.rb +28 -0
  25. data/lib/rmmseg/simple_algorithm.rb +37 -0
  26. data/lib/rmmseg/svwl_rule.rb +12 -0
  27. data/lib/rmmseg/token.rb +30 -0
  28. data/lib/rmmseg/version.rb +3 -0
  29. data/lib/rmmseg/word.rb +38 -0
  30. data/misc/ferret_example.rb +56 -0
  31. data/misc/homepage.erb +170 -0
  32. data/misc/homepage.html +1214 -0
  33. data/plexus-rmmseg.gemspec +20 -0
  34. data/spec/chunk_spec.rb +25 -0
  35. data/spec/complex_algorithm_spec.rb +18 -0
  36. data/spec/config_spec.rb +12 -0
  37. data/spec/dictionary_spec.rb +20 -0
  38. data/spec/lawl_rule_spec.rb +15 -0
  39. data/spec/lsdmfocw_rule_spec.rb +14 -0
  40. data/spec/mm_rule_spec.rb +15 -0
  41. data/spec/simple_algorithm_spec.rb +46 -0
  42. data/spec/spec_helper.rb +12 -0
  43. data/spec/svwl_rule_spec.rb +14 -0
  44. data/spec/word_spec.rb +9 -0
  45. data/tasks/ann.rake +76 -0
  46. data/tasks/annotations.rake +22 -0
  47. data/tasks/doc.rake +48 -0
  48. data/tasks/gem.rake +110 -0
  49. data/tasks/homepage.rake +12 -0
  50. data/tasks/manifest.rake +49 -0
  51. data/tasks/post_load.rake +26 -0
  52. data/tasks/rubyforge.rake +57 -0
  53. data/tasks/setup.rb +227 -0
  54. data/tasks/spec.rake +54 -0
  55. data/tasks/svn.rake +44 -0
  56. data/tasks/test.rake +38 -0
  57. metadata +121 -0
@@ -0,0 +1,12 @@
1
+ namespace :homepage do
2
+ desc 'generate homepage'
3
+ task :generate do
4
+ sh "cd misc && gerbil html homepage.erb > homepage.html"
5
+ end
6
+
7
+ desc 'publish homepage to rubyforge'
8
+ task :publish => :generate do
9
+ remote_path = "rubyforge.org:/var/www/gforge-projects"
10
+ sh "scp misc/homepage.html #{remote_path}/rmmseg/index.html"
11
+ end
12
+ end
@@ -0,0 +1,49 @@
1
+ # $Id$
2
+
3
+ require 'find'
4
+
5
+ namespace :manifest do
6
+
7
+ desc 'Verify the manifest'
8
+ task :check do
9
+ fn = PROJ.manifest_file + '.tmp'
10
+ files = manifest_files
11
+
12
+ File.open(fn, 'w') {|fp| fp.puts files}
13
+ lines = %x(#{DIFF} -du #{PROJ.manifest_file} #{fn}).split("\n")
14
+ if HAVE_FACETS_ANSICODE and ENV.has_key?('TERM')
15
+ lines.map! do |line|
16
+ case line
17
+ when %r/^(-{3}|\+{3})/; nil
18
+ when %r/^@/; Console::ANSICode.blue line
19
+ when %r/^\+/; Console::ANSICode.green line
20
+ when %r/^\-/; Console::ANSICode.red line
21
+ else line end
22
+ end
23
+ end
24
+ puts lines.compact
25
+ rm fn rescue nil
26
+ end
27
+
28
+ desc 'Create a new manifest'
29
+ task :create do
30
+ files = manifest_files
31
+ unless test(?f, PROJ.manifest_file)
32
+ files << PROJ.manifest_file
33
+ files.sort!
34
+ end
35
+ File.open(PROJ.manifest_file, 'w') {|fp| fp.puts files}
36
+ end
37
+
38
+ task :assert do
39
+ files = manifest_files
40
+ manifest = File.read(PROJ.manifest_file).split($/)
41
+ raise "ERROR: #{PROJ.manifest_file} is out of date" unless files == manifest
42
+ end
43
+
44
+ end # namespace :manifest
45
+
46
+ desc 'Alias to manifest:check'
47
+ task :manifest => 'manifest:check'
48
+
49
+ # EOF
@@ -0,0 +1,26 @@
1
+ # $Id$
2
+
3
+ # This file does not define any rake tasks. It is used to load some project
4
+ # settings if they are not defined by the user.
5
+
6
+ PROJ.rdoc_exclude << "^#{Regexp.escape(PROJ.manifest_file)}$"
7
+ PROJ.exclude << "^#{Regexp.escape(PROJ.ann_file)}$"
8
+
9
+ PROJ.changes ||= paragraphs_of(PROJ.history_file, 0..1).join("\n\n")
10
+
11
+ PROJ.description ||= paragraphs_of(PROJ.readme_file, 'description').join("\n\n")
12
+
13
+ PROJ.summary ||= PROJ.description.split('.').first
14
+
15
+ PROJ.files ||=
16
+ if test(?f, PROJ.manifest_file)
17
+ files = File.readlines(PROJ.manifest_file).map {|fn| fn.chomp.strip}
18
+ files.delete ''
19
+ files
20
+ else [] end
21
+
22
+ PROJ.executables ||= PROJ.files.find_all {|fn| fn =~ %r/^bin/}
23
+
24
+ PROJ.rdoc_main ||= PROJ.readme_file
25
+
26
+ # EOF
@@ -0,0 +1,57 @@
1
+ # $Id$
2
+
3
+ if PROJ.rubyforge_name && HAVE_RUBYFORGE
4
+
5
+ require 'rubyforge'
6
+ require 'rake/contrib/sshpublisher'
7
+
8
+ namespace :gem do
9
+ desc 'Package and upload to RubyForge'
10
+ task :release => [:clobber, :package] do |t|
11
+ v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
12
+ abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
13
+ pkg = "pkg/#{PROJ.spec.full_name}"
14
+
15
+ if $DEBUG then
16
+ puts "release_id = rf.add_release #{PROJ.rubyforge_name.inspect}, #{PROJ.name.inspect}, #{PROJ.version.inspect}, \"#{pkg}.tgz\""
17
+ puts "rf.add_file #{PROJ.rubyforge_name.inspect}, #{PROJ.name.inspect}, release_id, \"#{pkg}.gem\""
18
+ end
19
+
20
+ rf = RubyForge.new
21
+ puts 'Logging in'
22
+ rf.login
23
+
24
+ c = rf.userconfig
25
+ c['release_notes'] = PROJ.description if PROJ.description
26
+ c['release_changes'] = PROJ.changes if PROJ.changes
27
+ c['preformatted'] = true
28
+
29
+ files = [(PROJ.need_tar ? "#{pkg}.tgz" : nil),
30
+ (PROJ.need_zip ? "#{pkg}.zip" : nil),
31
+ "#{pkg}.gem"].compact
32
+
33
+ puts "Releasing #{PROJ.name} v. #{PROJ.version}"
34
+ rf.add_release PROJ.rubyforge_name, PROJ.name, PROJ.version, *files
35
+ end
36
+ end # namespace :gem
37
+
38
+
39
+ namespace :doc do
40
+ desc "Publish RDoc to RubyForge"
41
+ task :release => %w(doc:clobber_rdoc doc:rdoc) do
42
+ config = YAML.load(
43
+ File.read(File.expand_path('~/.rubyforge/user-config.yml'))
44
+ )
45
+
46
+ host = "#{config['username']}@rubyforge.org"
47
+ remote_dir = "/var/www/gforge-projects/#{PROJ.rubyforge_name}/"
48
+ remote_dir << PROJ.rdoc_remote_dir if PROJ.rdoc_remote_dir
49
+ local_dir = PROJ.rdoc_dir
50
+
51
+ Rake::SshDirPublisher.new(host, remote_dir, local_dir).upload
52
+ end
53
+ end # namespace :doc
54
+
55
+ end # if HAVE_RUBYFORGE
56
+
57
+ # EOF
@@ -0,0 +1,227 @@
1
+ # $Id$
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+ require 'rake/clean'
6
+ require 'fileutils'
7
+ require 'ostruct'
8
+
9
+ PROJ = OpenStruct.new
10
+
11
+ PROJ.name = nil
12
+ PROJ.summary = nil
13
+ PROJ.description = nil
14
+ PROJ.changes = nil
15
+ PROJ.authors = nil
16
+ PROJ.email = nil
17
+ PROJ.url = nil
18
+ PROJ.version = ENV['VERSION'] || '0.0.0'
19
+ PROJ.rubyforge_name = nil
20
+ PROJ.exclude = %w(tmp$ bak$ ~$ CVS .svn/ ^pkg/ ^doc/)
21
+ PROJ.release_name = ENV['RELEASE']
22
+ PROJ.history_file = 'History.txt'
23
+ PROJ.manifest_file = 'Manifest.txt'
24
+ PROJ.readme_file = 'README.txt'
25
+
26
+ # Rspec
27
+ PROJ.specs = FileList['spec/**/*_spec.rb']
28
+ PROJ.spec_opts = []
29
+
30
+ # Test::Unit
31
+ PROJ.tests = FileList['test/**/test_*.rb']
32
+ PROJ.test_file = 'test/all.rb'
33
+ PROJ.test_opts = []
34
+
35
+ # Rcov
36
+ PROJ.rcov_dir = 'coverage'
37
+ PROJ.rcov_opts = ['--sort', 'coverage', '-T']
38
+ PROJ.rcov_threshold = 90.0
39
+ PROJ.rcov_threshold_exact = false
40
+
41
+ # Rdoc
42
+ PROJ.rdoc_opts = []
43
+ PROJ.rdoc_include = %w(^lib/ ^bin/ ^ext/ .txt$)
44
+ PROJ.rdoc_exclude = %w(extconf.rb$)
45
+ PROJ.rdoc_main = nil
46
+ PROJ.rdoc_dir = 'doc'
47
+ PROJ.rdoc_remote_dir = nil
48
+
49
+ # Extensions
50
+ PROJ.extensions = FileList['ext/**/extconf.rb']
51
+ PROJ.ruby_opts = %w(-w)
52
+ PROJ.libs = []
53
+ %w(lib ext).each {|dir| PROJ.libs << dir if test ?d, dir}
54
+
55
+ # Gem Packaging
56
+ PROJ.files = nil
57
+ PROJ.executables = nil
58
+ PROJ.dependencies = []
59
+ PROJ.need_tar = true
60
+ PROJ.need_zip = false
61
+ PROJ.post_install_message = nil
62
+
63
+ # File Annotations
64
+ PROJ.annotation_exclude = %w(^tasks/setup.rb$)
65
+ PROJ.annotation_extensions = %w(.txt .rb .erb) << ''
66
+ PROJ.annotation_tags = %w(FIXME OPTIMIZE TODO)
67
+
68
+ # Subversion Repository
69
+ PROJ.svn = false
70
+ PROJ.svn_root = nil
71
+ PROJ.svn_trunk = 'trunk'
72
+ PROJ.svn_tags = 'tags'
73
+ PROJ.svn_branches = 'branches'
74
+
75
+ # Announce
76
+ PROJ.ann_file = 'announcement.txt'
77
+ PROJ.ann_text = nil
78
+ PROJ.ann_paragraphs = []
79
+ PROJ.ann_email = {
80
+ :from => nil,
81
+ :to => %w(ruby-talk@ruby-lang.org),
82
+ :server => 'localhost',
83
+ :port => 25,
84
+ :domain => ENV['HOSTNAME'],
85
+ :acct => nil,
86
+ :passwd => nil,
87
+ :authtype => :plain
88
+ }
89
+
90
+ # Load the other rake files in the tasks folder
91
+ rakefiles = Dir.glob('tasks/*.rake').sort
92
+ rakefiles.unshift(rakefiles.delete('tasks/post_load.rake')).compact!
93
+ import(*rakefiles)
94
+
95
+ # Setup some constants
96
+ WIN32 = %r/djgpp|(cyg|ms|bcc)win|mingw/ =~ RUBY_PLATFORM unless defined? WIN32
97
+
98
+ DEV_NULL = WIN32 ? 'NUL:' : '/dev/null'
99
+
100
+ def quiet( &block )
101
+ io = [STDOUT.dup, STDERR.dup]
102
+ STDOUT.reopen DEV_NULL
103
+ STDERR.reopen DEV_NULL
104
+ block.call
105
+ ensure
106
+ STDOUT.reopen io.first
107
+ STDERR.reopen io.last
108
+ end
109
+
110
+ DIFF = if WIN32 then 'diff.exe'
111
+ else
112
+ if quiet {system "gdiff", __FILE__, __FILE__} then 'gdiff'
113
+ else 'diff' end
114
+ end unless defined? DIFF
115
+
116
+ SUDO = if WIN32 then ''
117
+ else
118
+ if quiet {system 'which sudo'} then 'sudo'
119
+ else '' end
120
+ end
121
+
122
+ RCOV = WIN32 ? 'rcov.bat' : 'rcov'
123
+ GEM = WIN32 ? 'gem.bat' : 'gem'
124
+
125
+ %w(rcov spec/rake/spectask rubyforge bones facets/ansicode).each do |lib|
126
+ begin
127
+ require lib
128
+ Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", true}
129
+ rescue LoadError
130
+ Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", false}
131
+ end
132
+ end
133
+
134
+ # Reads a file at +path+ and spits out an array of the +paragraphs+
135
+ # specified.
136
+ #
137
+ # changes = paragraphs_of('History.txt', 0..1).join("\n\n")
138
+ # summary, *description = paragraphs_of('README.txt', 3, 3..8)
139
+ #
140
+ def paragraphs_of( path, *paragraphs )
141
+ title = String === paragraphs.first ? paragraphs.shift : nil
142
+ ary = File.read(path).delete("\r").split(/\n\n+/)
143
+
144
+ result = if title
145
+ tmp, matching = [], false
146
+ rgxp = %r/^=+\s*#{Regexp.escape(title)}/i
147
+ paragraphs << (0..-1) if paragraphs.empty?
148
+
149
+ ary.each do |val|
150
+ if val =~ rgxp
151
+ break if matching
152
+ matching = true
153
+ rgxp = %r/^=+/i
154
+ elsif matching
155
+ tmp << val
156
+ end
157
+ end
158
+ tmp
159
+ else ary end
160
+
161
+ result.values_at(*paragraphs)
162
+ end
163
+
164
+ # Adds the given gem _name_ to the current project's dependency list. An
165
+ # optional gem _version_ can be given. If omitted, the newest gem version
166
+ # will be used.
167
+ #
168
+ def depend_on( name, version = nil )
169
+ spec = Gem.source_index.find_name(name).last
170
+ version = spec.version.to_s if version.nil? and !spec.nil?
171
+
172
+ PROJ.dependencies << case version
173
+ when nil; [name]
174
+ when %r/^\d/; [name, ">= #{version}"]
175
+ else [name, version] end
176
+ end
177
+
178
+ # Adds the given arguments to the include path if they are not already there
179
+ #
180
+ def ensure_in_path( *args )
181
+ args.each do |path|
182
+ path = File.expand_path(path)
183
+ $:.unshift(path) if test(?d, path) and not $:.include?(path)
184
+ end
185
+ end
186
+
187
+ # Find a rake task using the task name and remove any description text. This
188
+ # will prevent the task from being displayed in the list of available tasks.
189
+ #
190
+ def remove_desc_for_task( names )
191
+ Array(names).each do |task_name|
192
+ task = Rake.application.tasks.find {|t| t.name == task_name}
193
+ next if task.nil?
194
+ task.instance_variable_set :@comment, nil
195
+ end
196
+ end
197
+
198
+ # Change working directories to _dir_, call the _block_ of code, and then
199
+ # change back to the original working directory (the current directory when
200
+ # this method was called).
201
+ #
202
+ def in_directory( dir, &block )
203
+ curdir = pwd
204
+ begin
205
+ cd dir
206
+ return block.call
207
+ ensure
208
+ cd curdir
209
+ end
210
+ end
211
+
212
+ # Scans the current working directory and creates a list of files that are
213
+ # candidates to be in the manifest.
214
+ #
215
+ def manifest_files
216
+ files = []
217
+ exclude = Regexp.new(PROJ.exclude.join('|'))
218
+ Find.find '.' do |path|
219
+ path.sub! %r/^(\.\/|\/)/o, ''
220
+ next unless test ?f, path
221
+ next if path =~ exclude
222
+ files << path
223
+ end
224
+ files.sort!
225
+ end
226
+
227
+ # EOF
@@ -0,0 +1,54 @@
1
+ # $Id$
2
+
3
+ if HAVE_SPEC_RAKE_SPECTASK
4
+ require 'spec/rake/verify_rcov'
5
+
6
+ namespace :spec do
7
+
8
+ desc 'Run all specs with basic output'
9
+ Spec::Rake::SpecTask.new(:run) do |t|
10
+ t.ruby_opts = PROJ.ruby_opts
11
+ t.spec_opts = PROJ.spec_opts
12
+ t.spec_files = PROJ.specs
13
+ t.libs += PROJ.libs
14
+ end
15
+
16
+ desc 'Run all specs with text output'
17
+ Spec::Rake::SpecTask.new(:specdoc) do |t|
18
+ t.ruby_opts = PROJ.ruby_opts
19
+ t.spec_opts = PROJ.spec_opts + ['--format', 'specdoc']
20
+ t.spec_files = PROJ.specs
21
+ t.libs += PROJ.libs
22
+ end
23
+
24
+ if HAVE_RCOV
25
+ desc 'Run all specs with RCov'
26
+ Spec::Rake::SpecTask.new(:rcov) do |t|
27
+ t.ruby_opts = PROJ.ruby_opts
28
+ t.spec_opts = PROJ.spec_opts
29
+ t.spec_files = PROJ.specs
30
+ t.libs += PROJ.libs
31
+ t.rcov = true
32
+ t.rcov_dir = PROJ.rcov_dir
33
+ t.rcov_opts = PROJ.rcov_opts + ['--exclude', 'spec']
34
+ end
35
+
36
+ RCov::VerifyTask.new(:verify) do |t|
37
+ t.threshold = PROJ.rcov_threshold
38
+ t.index_html = File.join(PROJ.rcov_dir, 'index.html')
39
+ t.require_exact_threshold = PROJ.rcov_threshold_exact
40
+ end
41
+ end
42
+
43
+ end # namespace :spec
44
+
45
+ desc 'Alias to spec:run'
46
+ task :spec => 'spec:run'
47
+
48
+ task :clobber => 'spec:clobber_rcov' if HAVE_RCOV
49
+
50
+ remove_desc_for_task %w(spec:clobber_rcov)
51
+
52
+ end # if HAVE_SPEC_RAKE_SPECTASK
53
+
54
+ # EOF
@@ -0,0 +1,44 @@
1
+ # $Id$
2
+
3
+
4
+ if PROJ.svn and system("svn --version 2>&1 > #{DEV_NULL}")
5
+
6
+ unless PROJ.svn_root
7
+ info = %x/svn info ./
8
+ m = %r/^Repository Root:\s+(.*)$/.match(info)
9
+ PROJ.svn_root = (m.nil? ? '' : m[1])
10
+ end
11
+ PROJ.svn_root = File.join(PROJ.svn_root, PROJ.svn) if String === PROJ.svn
12
+
13
+ namespace :svn do
14
+
15
+ desc 'Show tags from the SVN repository'
16
+ task :show_tags do |t|
17
+ tags = %x/svn list #{File.join(PROJ.svn_root, PROJ.svn_tags)}/
18
+ tags.gsub!(%r/\/$/, '')
19
+ puts tags
20
+ end
21
+
22
+ desc 'Create a new tag in the SVN repository'
23
+ task :create_tag do |t|
24
+ v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
25
+ abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
26
+
27
+ trunk = File.join(PROJ.svn_root, PROJ.svn_trunk)
28
+ tag = "%s-%s" % [PROJ.name, PROJ.version]
29
+ tag = File.join(PROJ.svn_root, PROJ.svn_tags, tag)
30
+ msg = "Creating tag for #{PROJ.name} version #{PROJ.version}"
31
+
32
+ puts "Creating SVN tag '#{tag}'"
33
+ unless system "svn cp -m '#{msg}' #{trunk} #{tag}"
34
+ abort "Tag creation failed"
35
+ end
36
+ end
37
+
38
+ end # namespace :svn
39
+
40
+ task 'gem:release' => 'svn:create_tag'
41
+
42
+ end # if PROJ.svn
43
+
44
+ # EOF