plexus-rmmseg 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/History.txt +42 -0
  4. data/Manifest.txt +51 -0
  5. data/README.txt +74 -0
  6. data/Rakefile +12 -0
  7. data/TODO.txt +5 -0
  8. data/bin/rmmseg +65 -0
  9. data/data/chars.dic +12638 -0
  10. data/data/custom.dic +12 -0
  11. data/data/punctuation.dic +79 -0
  12. data/data/words.dic +120330 -0
  13. data/lib/rmmseg.rb +13 -0
  14. data/lib/rmmseg/algorithm.rb +136 -0
  15. data/lib/rmmseg/amibguity.rb +4 -0
  16. data/lib/rmmseg/chunk.rb +41 -0
  17. data/lib/rmmseg/complex_algorithm.rb +122 -0
  18. data/lib/rmmseg/config.rb +65 -0
  19. data/lib/rmmseg/dictionary.rb +80 -0
  20. data/lib/rmmseg/ferret.rb +109 -0
  21. data/lib/rmmseg/lawl_rule.rb +12 -0
  22. data/lib/rmmseg/lsdmfocw_rule.rb +13 -0
  23. data/lib/rmmseg/mm_rule.rb +13 -0
  24. data/lib/rmmseg/rule_helper.rb +28 -0
  25. data/lib/rmmseg/simple_algorithm.rb +37 -0
  26. data/lib/rmmseg/svwl_rule.rb +12 -0
  27. data/lib/rmmseg/token.rb +30 -0
  28. data/lib/rmmseg/version.rb +3 -0
  29. data/lib/rmmseg/word.rb +38 -0
  30. data/misc/ferret_example.rb +56 -0
  31. data/misc/homepage.erb +170 -0
  32. data/misc/homepage.html +1214 -0
  33. data/plexus-rmmseg.gemspec +20 -0
  34. data/spec/chunk_spec.rb +25 -0
  35. data/spec/complex_algorithm_spec.rb +18 -0
  36. data/spec/config_spec.rb +12 -0
  37. data/spec/dictionary_spec.rb +20 -0
  38. data/spec/lawl_rule_spec.rb +15 -0
  39. data/spec/lsdmfocw_rule_spec.rb +14 -0
  40. data/spec/mm_rule_spec.rb +15 -0
  41. data/spec/simple_algorithm_spec.rb +46 -0
  42. data/spec/spec_helper.rb +12 -0
  43. data/spec/svwl_rule_spec.rb +14 -0
  44. data/spec/word_spec.rb +9 -0
  45. data/tasks/ann.rake +76 -0
  46. data/tasks/annotations.rake +22 -0
  47. data/tasks/doc.rake +48 -0
  48. data/tasks/gem.rake +110 -0
  49. data/tasks/homepage.rake +12 -0
  50. data/tasks/manifest.rake +49 -0
  51. data/tasks/post_load.rake +26 -0
  52. data/tasks/rubyforge.rake +57 -0
  53. data/tasks/setup.rb +227 -0
  54. data/tasks/spec.rake +54 -0
  55. data/tasks/svn.rake +44 -0
  56. data/tasks/test.rake +38 -0
  57. metadata +121 -0
@@ -0,0 +1,12 @@
1
+ namespace :homepage do
2
+ desc 'generate homepage'
3
+ task :generate do
4
+ sh "cd misc && gerbil html homepage.erb > homepage.html"
5
+ end
6
+
7
+ desc 'publish homepage to rubyforge'
8
+ task :publish => :generate do
9
+ remote_path = "rubyforge.org:/var/www/gforge-projects"
10
+ sh "scp misc/homepage.html #{remote_path}/rmmseg/index.html"
11
+ end
12
+ end
@@ -0,0 +1,49 @@
1
+ # $Id$
2
+
3
+ require 'find'
4
+
5
+ namespace :manifest do
6
+
7
+ desc 'Verify the manifest'
8
+ task :check do
9
+ fn = PROJ.manifest_file + '.tmp'
10
+ files = manifest_files
11
+
12
+ File.open(fn, 'w') {|fp| fp.puts files}
13
+ lines = %x(#{DIFF} -du #{PROJ.manifest_file} #{fn}).split("\n")
14
+ if HAVE_FACETS_ANSICODE and ENV.has_key?('TERM')
15
+ lines.map! do |line|
16
+ case line
17
+ when %r/^(-{3}|\+{3})/; nil
18
+ when %r/^@/; Console::ANSICode.blue line
19
+ when %r/^\+/; Console::ANSICode.green line
20
+ when %r/^\-/; Console::ANSICode.red line
21
+ else line end
22
+ end
23
+ end
24
+ puts lines.compact
25
+ rm fn rescue nil
26
+ end
27
+
28
+ desc 'Create a new manifest'
29
+ task :create do
30
+ files = manifest_files
31
+ unless test(?f, PROJ.manifest_file)
32
+ files << PROJ.manifest_file
33
+ files.sort!
34
+ end
35
+ File.open(PROJ.manifest_file, 'w') {|fp| fp.puts files}
36
+ end
37
+
38
+ task :assert do
39
+ files = manifest_files
40
+ manifest = File.read(PROJ.manifest_file).split($/)
41
+ raise "ERROR: #{PROJ.manifest_file} is out of date" unless files == manifest
42
+ end
43
+
44
+ end # namespace :manifest
45
+
46
+ desc 'Alias to manifest:check'
47
+ task :manifest => 'manifest:check'
48
+
49
+ # EOF
@@ -0,0 +1,26 @@
1
+ # $Id$
2
+
3
+ # This file does not define any rake tasks. It is used to load some project
4
+ # settings if they are not defined by the user.
5
+
6
+ PROJ.rdoc_exclude << "^#{Regexp.escape(PROJ.manifest_file)}$"
7
+ PROJ.exclude << "^#{Regexp.escape(PROJ.ann_file)}$"
8
+
9
+ PROJ.changes ||= paragraphs_of(PROJ.history_file, 0..1).join("\n\n")
10
+
11
+ PROJ.description ||= paragraphs_of(PROJ.readme_file, 'description').join("\n\n")
12
+
13
+ PROJ.summary ||= PROJ.description.split('.').first
14
+
15
+ PROJ.files ||=
16
+ if test(?f, PROJ.manifest_file)
17
+ files = File.readlines(PROJ.manifest_file).map {|fn| fn.chomp.strip}
18
+ files.delete ''
19
+ files
20
+ else [] end
21
+
22
+ PROJ.executables ||= PROJ.files.find_all {|fn| fn =~ %r/^bin/}
23
+
24
+ PROJ.rdoc_main ||= PROJ.readme_file
25
+
26
+ # EOF
@@ -0,0 +1,57 @@
1
+ # $Id$
2
+
3
+ if PROJ.rubyforge_name && HAVE_RUBYFORGE
4
+
5
+ require 'rubyforge'
6
+ require 'rake/contrib/sshpublisher'
7
+
8
+ namespace :gem do
9
+ desc 'Package and upload to RubyForge'
10
+ task :release => [:clobber, :package] do |t|
11
+ v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
12
+ abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
13
+ pkg = "pkg/#{PROJ.spec.full_name}"
14
+
15
+ if $DEBUG then
16
+ puts "release_id = rf.add_release #{PROJ.rubyforge_name.inspect}, #{PROJ.name.inspect}, #{PROJ.version.inspect}, \"#{pkg}.tgz\""
17
+ puts "rf.add_file #{PROJ.rubyforge_name.inspect}, #{PROJ.name.inspect}, release_id, \"#{pkg}.gem\""
18
+ end
19
+
20
+ rf = RubyForge.new
21
+ puts 'Logging in'
22
+ rf.login
23
+
24
+ c = rf.userconfig
25
+ c['release_notes'] = PROJ.description if PROJ.description
26
+ c['release_changes'] = PROJ.changes if PROJ.changes
27
+ c['preformatted'] = true
28
+
29
+ files = [(PROJ.need_tar ? "#{pkg}.tgz" : nil),
30
+ (PROJ.need_zip ? "#{pkg}.zip" : nil),
31
+ "#{pkg}.gem"].compact
32
+
33
+ puts "Releasing #{PROJ.name} v. #{PROJ.version}"
34
+ rf.add_release PROJ.rubyforge_name, PROJ.name, PROJ.version, *files
35
+ end
36
+ end # namespace :gem
37
+
38
+
39
+ namespace :doc do
40
+ desc "Publish RDoc to RubyForge"
41
+ task :release => %w(doc:clobber_rdoc doc:rdoc) do
42
+ config = YAML.load(
43
+ File.read(File.expand_path('~/.rubyforge/user-config.yml'))
44
+ )
45
+
46
+ host = "#{config['username']}@rubyforge.org"
47
+ remote_dir = "/var/www/gforge-projects/#{PROJ.rubyforge_name}/"
48
+ remote_dir << PROJ.rdoc_remote_dir if PROJ.rdoc_remote_dir
49
+ local_dir = PROJ.rdoc_dir
50
+
51
+ Rake::SshDirPublisher.new(host, remote_dir, local_dir).upload
52
+ end
53
+ end # namespace :doc
54
+
55
+ end # if HAVE_RUBYFORGE
56
+
57
+ # EOF
@@ -0,0 +1,227 @@
1
+ # $Id$
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+ require 'rake/clean'
6
+ require 'fileutils'
7
+ require 'ostruct'
8
+
9
+ PROJ = OpenStruct.new
10
+
11
+ PROJ.name = nil
12
+ PROJ.summary = nil
13
+ PROJ.description = nil
14
+ PROJ.changes = nil
15
+ PROJ.authors = nil
16
+ PROJ.email = nil
17
+ PROJ.url = nil
18
+ PROJ.version = ENV['VERSION'] || '0.0.0'
19
+ PROJ.rubyforge_name = nil
20
+ PROJ.exclude = %w(tmp$ bak$ ~$ CVS .svn/ ^pkg/ ^doc/)
21
+ PROJ.release_name = ENV['RELEASE']
22
+ PROJ.history_file = 'History.txt'
23
+ PROJ.manifest_file = 'Manifest.txt'
24
+ PROJ.readme_file = 'README.txt'
25
+
26
+ # Rspec
27
+ PROJ.specs = FileList['spec/**/*_spec.rb']
28
+ PROJ.spec_opts = []
29
+
30
+ # Test::Unit
31
+ PROJ.tests = FileList['test/**/test_*.rb']
32
+ PROJ.test_file = 'test/all.rb'
33
+ PROJ.test_opts = []
34
+
35
+ # Rcov
36
+ PROJ.rcov_dir = 'coverage'
37
+ PROJ.rcov_opts = ['--sort', 'coverage', '-T']
38
+ PROJ.rcov_threshold = 90.0
39
+ PROJ.rcov_threshold_exact = false
40
+
41
+ # Rdoc
42
+ PROJ.rdoc_opts = []
43
+ PROJ.rdoc_include = %w(^lib/ ^bin/ ^ext/ .txt$)
44
+ PROJ.rdoc_exclude = %w(extconf.rb$)
45
+ PROJ.rdoc_main = nil
46
+ PROJ.rdoc_dir = 'doc'
47
+ PROJ.rdoc_remote_dir = nil
48
+
49
+ # Extensions
50
+ PROJ.extensions = FileList['ext/**/extconf.rb']
51
+ PROJ.ruby_opts = %w(-w)
52
+ PROJ.libs = []
53
+ %w(lib ext).each {|dir| PROJ.libs << dir if test ?d, dir}
54
+
55
+ # Gem Packaging
56
+ PROJ.files = nil
57
+ PROJ.executables = nil
58
+ PROJ.dependencies = []
59
+ PROJ.need_tar = true
60
+ PROJ.need_zip = false
61
+ PROJ.post_install_message = nil
62
+
63
+ # File Annotations
64
+ PROJ.annotation_exclude = %w(^tasks/setup.rb$)
65
+ PROJ.annotation_extensions = %w(.txt .rb .erb) << ''
66
+ PROJ.annotation_tags = %w(FIXME OPTIMIZE TODO)
67
+
68
+ # Subversion Repository
69
+ PROJ.svn = false
70
+ PROJ.svn_root = nil
71
+ PROJ.svn_trunk = 'trunk'
72
+ PROJ.svn_tags = 'tags'
73
+ PROJ.svn_branches = 'branches'
74
+
75
+ # Announce
76
+ PROJ.ann_file = 'announcement.txt'
77
+ PROJ.ann_text = nil
78
+ PROJ.ann_paragraphs = []
79
+ PROJ.ann_email = {
80
+ :from => nil,
81
+ :to => %w(ruby-talk@ruby-lang.org),
82
+ :server => 'localhost',
83
+ :port => 25,
84
+ :domain => ENV['HOSTNAME'],
85
+ :acct => nil,
86
+ :passwd => nil,
87
+ :authtype => :plain
88
+ }
89
+
90
+ # Load the other rake files in the tasks folder
91
+ rakefiles = Dir.glob('tasks/*.rake').sort
92
+ rakefiles.unshift(rakefiles.delete('tasks/post_load.rake')).compact!
93
+ import(*rakefiles)
94
+
95
+ # Setup some constants
96
+ WIN32 = %r/djgpp|(cyg|ms|bcc)win|mingw/ =~ RUBY_PLATFORM unless defined? WIN32
97
+
98
+ DEV_NULL = WIN32 ? 'NUL:' : '/dev/null'
99
+
100
+ def quiet( &block )
101
+ io = [STDOUT.dup, STDERR.dup]
102
+ STDOUT.reopen DEV_NULL
103
+ STDERR.reopen DEV_NULL
104
+ block.call
105
+ ensure
106
+ STDOUT.reopen io.first
107
+ STDERR.reopen io.last
108
+ end
109
+
110
+ DIFF = if WIN32 then 'diff.exe'
111
+ else
112
+ if quiet {system "gdiff", __FILE__, __FILE__} then 'gdiff'
113
+ else 'diff' end
114
+ end unless defined? DIFF
115
+
116
+ SUDO = if WIN32 then ''
117
+ else
118
+ if quiet {system 'which sudo'} then 'sudo'
119
+ else '' end
120
+ end
121
+
122
+ RCOV = WIN32 ? 'rcov.bat' : 'rcov'
123
+ GEM = WIN32 ? 'gem.bat' : 'gem'
124
+
125
+ %w(rcov spec/rake/spectask rubyforge bones facets/ansicode).each do |lib|
126
+ begin
127
+ require lib
128
+ Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", true}
129
+ rescue LoadError
130
+ Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", false}
131
+ end
132
+ end
133
+
134
+ # Reads a file at +path+ and spits out an array of the +paragraphs+
135
+ # specified.
136
+ #
137
+ # changes = paragraphs_of('History.txt', 0..1).join("\n\n")
138
+ # summary, *description = paragraphs_of('README.txt', 3, 3..8)
139
+ #
140
+ def paragraphs_of( path, *paragraphs )
141
+ title = String === paragraphs.first ? paragraphs.shift : nil
142
+ ary = File.read(path).delete("\r").split(/\n\n+/)
143
+
144
+ result = if title
145
+ tmp, matching = [], false
146
+ rgxp = %r/^=+\s*#{Regexp.escape(title)}/i
147
+ paragraphs << (0..-1) if paragraphs.empty?
148
+
149
+ ary.each do |val|
150
+ if val =~ rgxp
151
+ break if matching
152
+ matching = true
153
+ rgxp = %r/^=+/i
154
+ elsif matching
155
+ tmp << val
156
+ end
157
+ end
158
+ tmp
159
+ else ary end
160
+
161
+ result.values_at(*paragraphs)
162
+ end
163
+
164
+ # Adds the given gem _name_ to the current project's dependency list. An
165
+ # optional gem _version_ can be given. If omitted, the newest gem version
166
+ # will be used.
167
+ #
168
+ def depend_on( name, version = nil )
169
+ spec = Gem.source_index.find_name(name).last
170
+ version = spec.version.to_s if version.nil? and !spec.nil?
171
+
172
+ PROJ.dependencies << case version
173
+ when nil; [name]
174
+ when %r/^\d/; [name, ">= #{version}"]
175
+ else [name, version] end
176
+ end
177
+
178
+ # Adds the given arguments to the include path if they are not already there
179
+ #
180
+ def ensure_in_path( *args )
181
+ args.each do |path|
182
+ path = File.expand_path(path)
183
+ $:.unshift(path) if test(?d, path) and not $:.include?(path)
184
+ end
185
+ end
186
+
187
+ # Find a rake task using the task name and remove any description text. This
188
+ # will prevent the task from being displayed in the list of available tasks.
189
+ #
190
+ def remove_desc_for_task( names )
191
+ Array(names).each do |task_name|
192
+ task = Rake.application.tasks.find {|t| t.name == task_name}
193
+ next if task.nil?
194
+ task.instance_variable_set :@comment, nil
195
+ end
196
+ end
197
+
198
+ # Change working directories to _dir_, call the _block_ of code, and then
199
+ # change back to the original working directory (the current directory when
200
+ # this method was called).
201
+ #
202
+ def in_directory( dir, &block )
203
+ curdir = pwd
204
+ begin
205
+ cd dir
206
+ return block.call
207
+ ensure
208
+ cd curdir
209
+ end
210
+ end
211
+
212
+ # Scans the current working directory and creates a list of files that are
213
+ # candidates to be in the manifest.
214
+ #
215
+ def manifest_files
216
+ files = []
217
+ exclude = Regexp.new(PROJ.exclude.join('|'))
218
+ Find.find '.' do |path|
219
+ path.sub! %r/^(\.\/|\/)/o, ''
220
+ next unless test ?f, path
221
+ next if path =~ exclude
222
+ files << path
223
+ end
224
+ files.sort!
225
+ end
226
+
227
+ # EOF
@@ -0,0 +1,54 @@
1
+ # $Id$
2
+
3
+ if HAVE_SPEC_RAKE_SPECTASK
4
+ require 'spec/rake/verify_rcov'
5
+
6
+ namespace :spec do
7
+
8
+ desc 'Run all specs with basic output'
9
+ Spec::Rake::SpecTask.new(:run) do |t|
10
+ t.ruby_opts = PROJ.ruby_opts
11
+ t.spec_opts = PROJ.spec_opts
12
+ t.spec_files = PROJ.specs
13
+ t.libs += PROJ.libs
14
+ end
15
+
16
+ desc 'Run all specs with text output'
17
+ Spec::Rake::SpecTask.new(:specdoc) do |t|
18
+ t.ruby_opts = PROJ.ruby_opts
19
+ t.spec_opts = PROJ.spec_opts + ['--format', 'specdoc']
20
+ t.spec_files = PROJ.specs
21
+ t.libs += PROJ.libs
22
+ end
23
+
24
+ if HAVE_RCOV
25
+ desc 'Run all specs with RCov'
26
+ Spec::Rake::SpecTask.new(:rcov) do |t|
27
+ t.ruby_opts = PROJ.ruby_opts
28
+ t.spec_opts = PROJ.spec_opts
29
+ t.spec_files = PROJ.specs
30
+ t.libs += PROJ.libs
31
+ t.rcov = true
32
+ t.rcov_dir = PROJ.rcov_dir
33
+ t.rcov_opts = PROJ.rcov_opts + ['--exclude', 'spec']
34
+ end
35
+
36
+ RCov::VerifyTask.new(:verify) do |t|
37
+ t.threshold = PROJ.rcov_threshold
38
+ t.index_html = File.join(PROJ.rcov_dir, 'index.html')
39
+ t.require_exact_threshold = PROJ.rcov_threshold_exact
40
+ end
41
+ end
42
+
43
+ end # namespace :spec
44
+
45
+ desc 'Alias to spec:run'
46
+ task :spec => 'spec:run'
47
+
48
+ task :clobber => 'spec:clobber_rcov' if HAVE_RCOV
49
+
50
+ remove_desc_for_task %w(spec:clobber_rcov)
51
+
52
+ end # if HAVE_SPEC_RAKE_SPECTASK
53
+
54
+ # EOF
@@ -0,0 +1,44 @@
1
+ # $Id$
2
+
3
+
4
+ if PROJ.svn and system("svn --version 2>&1 > #{DEV_NULL}")
5
+
6
+ unless PROJ.svn_root
7
+ info = %x/svn info ./
8
+ m = %r/^Repository Root:\s+(.*)$/.match(info)
9
+ PROJ.svn_root = (m.nil? ? '' : m[1])
10
+ end
11
+ PROJ.svn_root = File.join(PROJ.svn_root, PROJ.svn) if String === PROJ.svn
12
+
13
+ namespace :svn do
14
+
15
+ desc 'Show tags from the SVN repository'
16
+ task :show_tags do |t|
17
+ tags = %x/svn list #{File.join(PROJ.svn_root, PROJ.svn_tags)}/
18
+ tags.gsub!(%r/\/$/, '')
19
+ puts tags
20
+ end
21
+
22
+ desc 'Create a new tag in the SVN repository'
23
+ task :create_tag do |t|
24
+ v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
25
+ abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
26
+
27
+ trunk = File.join(PROJ.svn_root, PROJ.svn_trunk)
28
+ tag = "%s-%s" % [PROJ.name, PROJ.version]
29
+ tag = File.join(PROJ.svn_root, PROJ.svn_tags, tag)
30
+ msg = "Creating tag for #{PROJ.name} version #{PROJ.version}"
31
+
32
+ puts "Creating SVN tag '#{tag}'"
33
+ unless system "svn cp -m '#{msg}' #{trunk} #{tag}"
34
+ abort "Tag creation failed"
35
+ end
36
+ end
37
+
38
+ end # namespace :svn
39
+
40
+ task 'gem:release' => 'svn:create_tag'
41
+
42
+ end # if PROJ.svn
43
+
44
+ # EOF