rmmseg 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 0.1.1 / 2008-02-05
2
+
3
+ * Use bones instead of hoe as building tool.
4
+
1
5
  === 0.1.0 / 2008-02-01
2
6
 
3
7
  * Add filter to filter out Chinese punctuations.
data/Manifest.txt CHANGED
@@ -37,3 +37,14 @@ spec/simple_algorithm_spec.rb
37
37
  spec/spec_helper.rb
38
38
  spec/svwl_rule_spec.rb
39
39
  spec/word_spec.rb
40
+ tasks/annotations.rake
41
+ tasks/doc.rake
42
+ tasks/gem.rake
43
+ tasks/homepage.rake
44
+ tasks/manifest.rake
45
+ tasks/post_load.rake
46
+ tasks/rubyforge.rake
47
+ tasks/setup.rb
48
+ tasks/spec.rake
49
+ tasks/svn.rake
50
+ tasks/test.rake
data/README.txt CHANGED
@@ -1,7 +1,6 @@
1
1
  = rmmseg
2
-
3
- * http://rmmseg.rubyforge.org
4
- * mailto:pluskid@gmail.com
2
+ by pluskid
3
+ http://rmmseg.rubyforge.org
5
4
 
6
5
  == DESCRIPTION:
7
6
 
data/Rakefile CHANGED
@@ -1,33 +1,22 @@
1
1
  # -*- ruby -*-
2
2
 
3
- $: << File.join(File.dirname(__FILE__), "lib")
3
+ load 'tasks/setup.rb'
4
4
 
5
- require 'rubygems'
6
- require 'hoe'
5
+ ensure_in_path 'lib'
7
6
  require 'rmmseg'
8
7
 
9
- Hoe.new('rmmseg', RMMSeg::VERSION) do |p|
10
- p.rubyforge_name = 'rmmseg'
11
- p.author = 'pluskid'
12
- p.email = 'pluskid@gmail.com'
13
- p.test_globs = ["spec/spec.rb"]
14
- p.rdoc_pattern = /^lib\/.*\.rb$|\.txt$/
15
- p.summary = <<-END
16
- RMMSeg is an implementation of MMSEG algorithm in Ruby. MMSEG is a
17
- Chinese segmentation algorithm based on two variants of maximum
18
- matching.
8
+ task :default => 'spec:run'
19
9
 
20
- RMMSeg can be used as a stand alone program or as an Analyzer of
21
- Ferret.
22
- END
23
- end
10
+ PROJ.name = 'rmmseg'
11
+ PROJ.authors = 'pluskid'
12
+ PROJ.email = 'pluskid@gmail.com'
13
+ PROJ.url = 'http://rmmseg.rubyforge.org'
14
+ PROJ.rubyforge_name = 'rmmseg'
15
+ PROJ.rdoc_remote_dir = 'rmmseg'
16
+ PROJ.version = RMMSeg::VERSION
24
17
 
25
- task :homepage do
26
- sh "cd misc && gerbil html homepage.erb > homepage.html"
27
- end
18
+ PROJ.exclude << '\.git'
28
19
 
29
- task :publish_homepage do
30
- sh "scp misc/homepage.html rubyforge.org:/var/www/gforge-projects/rmmseg/index.html"
31
- end
20
+ PROJ.spec_opts << '--color'
32
21
 
33
22
  # vim: syntax=Ruby
@@ -139,13 +139,8 @@ module RMMSeg
139
139
  end
140
140
 
141
141
  # Determine whether a character is a basic latin character.
142
- #--
143
- # TODO: Implement this method in a more correct way.
144
- # currently I use number of bytes in this char to determine this.
145
- # If it is a one-byte char, I consider it a basic latin.
146
- #++
147
142
  def basic_latin?(char)
148
- char.size == 1
143
+ !char.mbchar?
149
144
  end
150
145
 
151
146
  # Determine whether a character can be part of a basic latin
data/lib/rmmseg/ferret.rb CHANGED
@@ -2,6 +2,7 @@
2
2
  require 'singleton'
3
3
  require 'rubygems'
4
4
  require 'ferret'
5
+ require 'rmmseg'
5
6
 
6
7
  module RMMSeg
7
8
  module Ferret
data/lib/rmmseg.rb CHANGED
@@ -6,7 +6,7 @@ require 'rmmseg/simple_algorithm'
6
6
  require 'rmmseg/complex_algorithm'
7
7
 
8
8
  module RMMSeg
9
- VERSION = '0.1.0'
9
+ VERSION = '0.1.1'
10
10
 
11
11
  # Segment +text+ using the algorithm configured.
12
12
  def segment(text)
@@ -6,8 +6,8 @@ require 'rmmseg/ferret'
6
6
  analyzer = RMMSeg::Ferret::Analyzer.new { |tokenizer|
7
7
  Ferret::Analysis::LowerCaseFilter.new(tokenizer)
8
8
  }
9
- $index = Ferret::Index::Index.new(:analyzer => analyzer,
10
- :path => '/tmp/index')
9
+
10
+ $index = Ferret::Index::Index.new(:analyzer => analyzer)
11
11
 
12
12
  $index << {
13
13
  :title => "分词",
data/misc/homepage.erb CHANGED
@@ -167,7 +167,3 @@
167
167
  * "Implementation Details":http://pluskid.lifegoo.com/?p=261: My blog post about the implementation details of RMMSeg (Chinese).
168
168
  * "Author's Email":mailto:pluskid@gmail.com: Contact me if you have any problem.
169
169
  <% end %>
170
-
171
- <% footer do %>
172
- "[Validate]":http://validator.w3.org/check/referer
173
- <% end %>
data/misc/homepage.html CHANGED
@@ -2,9 +2,9 @@
2
2
  <html>
3
3
  <head>
4
4
  <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
5
- <meta name="date" content="01 February 2008"/>
5
+ <meta name="date" content="04 February 2008"/>
6
6
  <meta name="author" content="pluskid"/>
7
- <meta name="generator" content="Gerbil 1.1.0"/>
7
+ <meta name="generator" content="Gerbil 2.0.0"/>
8
8
  <title>RMMSeg Homepage</title>
9
9
 
10
10
  <style type="text/css" media="screen">
@@ -94,6 +94,8 @@
94
94
  table
95
95
  {
96
96
  border : none;
97
+ margin : auto; /* center horizontally */
98
+ margin-top : 1em;
97
99
  }
98
100
 
99
101
  th,
@@ -110,11 +112,6 @@
110
112
  background-color : #F5F5F5;
111
113
  }
112
114
 
113
- table
114
- {
115
- margin : auto;
116
- }
117
-
118
115
  /* document structure */
119
116
 
120
117
  #header
@@ -484,6 +481,8 @@
484
481
  table
485
482
  {
486
483
  border : none;
484
+ margin : auto; /* center horizontally */
485
+ margin-top : 1em;
487
486
  }
488
487
 
489
488
  th,
@@ -500,11 +499,6 @@
500
499
  background-color : #F5F5F5;
501
500
  }
502
501
 
503
- table
504
- {
505
- margin : auto;
506
- }
507
-
508
502
  /* document structure */
509
503
 
510
504
  #header
@@ -759,23 +753,27 @@
759
753
  </style>
760
754
  </head>
761
755
  <body>
756
+
762
757
  <div id="header">
758
+
763
759
 
764
760
  <h1 class="title">RMMSeg Homepage</h1>
765
761
  <h2 class="authors"><a href="http://pluskid.lifegoo.com">pluskid</a></h2>
766
- <h3 class="date">01 February 2008</h3>
762
+ <h3 class="date">04 February 2008</h3>
763
+
767
764
  </div>
765
+
768
766
 
769
767
 
770
- <div id="toc"><h1>Contents</h1> <ul><li>1&nbsp;&nbsp;<a id="a-606801458" href="#Introduction">Introduction</a></li><li>2&nbsp;&nbsp;<a id="a-606803598" href="#Setup">Setup</a><ul><li>2.1&nbsp;&nbsp;<a id="a-606805098" href="#Requirements">Requirements</a></li><li>2.2&nbsp;&nbsp;<a id="a-606807208" href="#Installation">Installation</a><ul><li>2.2.1&nbsp;&nbsp;<a id="a-606808808" href="#Using-RubyGems">Using RubyGems</a></li><li>2.2.2&nbsp;&nbsp;<a id="a-606810898" href="#From-Subversion">From Subversion</a></li></ul></li></ul></li><li>3&nbsp;&nbsp;<a id="a-606815688" href="#Usage">Usage</a><ul><li>3.1&nbsp;&nbsp;<a id="a-606817228" href="#Stand-Alone-rmmseg">Stand Alone rmmseg</a></li><li>3.2&nbsp;&nbsp;<a id="a-606819308" href="#Analyzer-for-Ferret">Analyzer for Ferret</a></li><li>3.3&nbsp;&nbsp;<a id="a-606825488" href="#Customization">Customization</a></li></ul></li><li>4&nbsp;&nbsp;<a id="a-606828108" href="#Resources">Resources</a></li></ul></div>
768
+ <div id="toc"><h1>Contents</h1> <ul><li>1&nbsp;&nbsp;<a id="a-606973768" href="#Introduction">Introduction</a></li><li>2&nbsp;&nbsp;<a id="a-606975908" href="#Setup">Setup</a><ul><li>2.1&nbsp;&nbsp;<a id="a-606977408" href="#Requirements">Requirements</a></li><li>2.2&nbsp;&nbsp;<a id="a-606979518" href="#Installation">Installation</a><ul><li>2.2.1&nbsp;&nbsp;<a id="a-606981118" href="#Using-RubyGems">Using RubyGems</a></li><li>2.2.2&nbsp;&nbsp;<a id="a-606983208" href="#From-Subversion">From Subversion</a></li></ul></li></ul></li><li>3&nbsp;&nbsp;<a id="a-606987998" href="#Usage">Usage</a><ul><li>3.1&nbsp;&nbsp;<a id="a-606989538" href="#Stand-Alone-rmmseg">Stand Alone rmmseg</a></li><li>3.2&nbsp;&nbsp;<a id="a-606991618" href="#Analyzer-for-Ferret">Analyzer for Ferret</a></li><li>3.3&nbsp;&nbsp;<a id="a-606997828" href="#Customization">Customization</a></li></ul></li><li>4&nbsp;&nbsp;<a id="a-606008348" href="#Resources">Resources</a></li></ul></div>
771
769
 
772
- <div id="lof"><h1>Figures</h1> <ol><li><a id="a-606823268" href="#Ferret-Example-Screenshot">Ferret Example Screenshot</a></li></ol><h1>Notes</h1> <ol><li><a id="a-606812538" href="#The-latest-code-might-be-unstable">The latest code might be unstable</a></li></ol></div>
770
+ <div id="lof"><h1>Figures</h1> <ol><li><a id="a-606995608" href="#Ferret-Example-Screenshot">Ferret Example Screenshot</a></li></ol><h1>Notes</h1> <ol><li><a id="a-606984848" href="#The-latest-code-might-be-unstable">The latest code might be unstable</a></li></ol></div>
773
771
 
774
772
  <div id="content">
775
773
  <div class="chapter">
776
774
  <h1 class="title">
777
775
  Chapter
778
- <a class="toc" id="Introduction" href="#a-606801458">1</a>
776
+ <a class="toc" id="Introduction" href="#a-606973768">1</a>
779
777
 
780
778
  <br/>
781
779
 
@@ -811,7 +809,7 @@ following essays:</p>
811
809
  <div class="chapter">
812
810
  <h1 class="title">
813
811
  Chapter
814
- <a class="toc" id="Setup" href="#a-606803598">2</a>
812
+ <a class="toc" id="Setup" href="#a-606975908">2</a>
815
813
 
816
814
  <br/>
817
815
 
@@ -820,7 +818,7 @@ following essays:</p>
820
818
 
821
819
  <div class="content"><div class="section">
822
820
  <h2 class="title">
823
- <a class="toc" id="Requirements" href="#a-606805098">2.1</a>&nbsp;&nbsp;Requirements
821
+ <a class="toc" id="Requirements" href="#a-606977408">2.1</a>&nbsp;&nbsp;Requirements
824
822
  </h2>
825
823
  <div class="content">Your system needs the following software to run RMMSeg.
826
824
 
@@ -850,11 +848,11 @@ following essays:</p>
850
848
  </div>
851
849
  <div class="section">
852
850
  <h2 class="title">
853
- <a class="toc" id="Installation" href="#a-606807208">2.2</a>&nbsp;&nbsp;Installation
851
+ <a class="toc" id="Installation" href="#a-606979518">2.2</a>&nbsp;&nbsp;Installation
854
852
  </h2>
855
853
  <div class="content"><div class="section">
856
854
  <h3 class="title">
857
- <a class="toc" id="Using-RubyGems" href="#a-606808808">2.2.1</a>&nbsp;&nbsp;Using RubyGems
855
+ <a class="toc" id="Using-RubyGems" href="#a-606981118">2.2.1</a>&nbsp;&nbsp;Using RubyGems
858
856
  </h3>
859
857
  <div class="content"><p>To install the gem remotely from <a href="http://rubyforge.org">RubyForge</a> :</p>
860
858
 
@@ -869,11 +867,11 @@ following essays:</p>
869
867
  </div>
870
868
  <div class="section">
871
869
  <h3 class="title">
872
- <a class="toc" id="From-Subversion" href="#a-606810898">2.2.2</a>&nbsp;&nbsp;From Subversion
870
+ <a class="toc" id="From-Subversion" href="#a-606983208">2.2.2</a>&nbsp;&nbsp;From Subversion
873
871
  </h3>
874
872
  <div class="content"><p>From subversion repository hosted at <a href="http://rmmseg.rubyforge.org/svn/">RubyForge</a>, you can always get the latest source code.
875
873
  <div class="note">
876
- <p class="title"><a class="toc" id="The-latest-code-might-be-unstable" href="#a-606812538">Note 1</a>.&nbsp;&nbsp;The latest code might be unstable</p>
874
+ <p class="title"><a class="toc" id="The-latest-code-might-be-unstable" href="#a-606984848">Note 1</a>.&nbsp;&nbsp;The latest code might be unstable</p>
877
875
 
878
876
  <img src="
879
877
  fAhkiAAAAAlwSFlzAAAN1wAADdcBQiibeAAAABl0RVh0U29mdHdhcmUAd3d3
@@ -954,7 +952,7 @@ DUCQhrhWJkj394A0gKeUCjVo3r9Zv0r2P3yyQqPd16MPAAAAAElFTkSuQmCC
954
952
  <div class="chapter">
955
953
  <h1 class="title">
956
954
  Chapter
957
- <a class="toc" id="Usage" href="#a-606815688">3</a>
955
+ <a class="toc" id="Usage" href="#a-606987998">3</a>
958
956
 
959
957
  <br/>
960
958
 
@@ -963,7 +961,7 @@ DUCQhrhWJkj394A0gKeUCjVo3r9Zv0r2P3yyQqPd16MPAAAAAElFTkSuQmCC
963
961
 
964
962
  <div class="content"><div class="section">
965
963
  <h2 class="title">
966
- <a class="toc" id="Stand-Alone-rmmseg" href="#a-606817228">3.1</a>&nbsp;&nbsp;Stand Alone rmmseg
964
+ <a class="toc" id="Stand-Alone-rmmseg" href="#a-606989538">3.1</a>&nbsp;&nbsp;Stand Alone rmmseg
967
965
  </h2>
968
966
  <div class="content"><p>RMMSeg comes with a script <code class="code">rmmseg</code>. To get the basic usage, just execute it with <code class="code">-h</code> option:</p>
969
967
 
@@ -980,7 +978,7 @@ example:</p>
980
978
  </div>
981
979
  <div class="section">
982
980
  <h2 class="title">
983
- <a class="toc" id="Analyzer-for-Ferret" href="#a-606819308">3.2</a>&nbsp;&nbsp;Analyzer for Ferret
981
+ <a class="toc" id="Analyzer-for-Ferret" href="#a-606991618">3.2</a>&nbsp;&nbsp;Analyzer for Ferret
984
982
  </h2>
985
983
  <div class="content"><p>RMMSeg include an analyzer for Ferret. It is simply ready to
986
984
  use. Just require it and pass it to Ferret. Here&#8217;s a complete
@@ -993,7 +991,10 @@ require <span style="background-color:#fff0f0"><span style="color:#710">'</span>
993
991
  require <span style="background-color:#fff0f0"><span style="color:#710">'</span><span style="color:#D20">rmmseg</span><span style="color:#710">'</span></span>
994
992
  require <span style="background-color:#fff0f0"><span style="color:#710">'</span><span style="color:#D20">rmmseg/ferret</span><span style="color:#710">'</span></span>
995
993
 
996
- analyzer = <span style="color:#036; font-weight:bold">RMMSeg</span>::<span style="color:#036; font-weight:bold">Ferret</span>::<span style="color:#036; font-weight:bold">Analyzer</span>.new
994
+ analyzer = <span style="color:#036; font-weight:bold">RMMSeg</span>::<span style="color:#036; font-weight:bold">Ferret</span>::<span style="color:#036; font-weight:bold">Analyzer</span>.new { |tokenizer|
995
+ <span style="color:#036; font-weight:bold">Ferret</span>::<span style="color:#036; font-weight:bold">Analysis</span>::<span style="color:#036; font-weight:bold">LowerCaseFilter</span>.new(tokenizer)
996
+ }
997
+
997
998
  <span style="color:#d70; font-weight:bold">$index</span> = <span style="color:#036; font-weight:bold">Ferret</span>::<span style="color:#036; font-weight:bold">Index</span>::<span style="color:#036; font-weight:bold">Index</span>.new(<span style="color:#A60">:analyzer</span> =&gt; analyzer)
998
999
 
999
1000
  <span style="color:#d70; font-weight:bold">$index</span> &lt;&lt; {
@@ -1089,13 +1090,13 @@ highlighted as in <a class="xref" href="#Ferret-Example-Screenshot">Figure 1: <e
1089
1090
 
1090
1091
 
1091
1092
  <p><div class="figure">
1092
- <p class="title"><a class="toc" id="Ferret-Example-Screenshot" href="#a-606823268">Figure 1</a>.&nbsp;&nbsp;Ferret Example Screenshot</p>
1093
+ <p class="title"><a class="toc" id="Ferret-Example-Screenshot" href="#a-606995608">Figure 1</a>.&nbsp;&nbsp;Ferret Example Screenshot</p>
1093
1094
  <div class="content"><img src="http://pluskid.lifegoo.com/wp-content/uploads/2008/02/rmmseg.png" alt="" /></div>
1094
1095
  </div></p></div>
1095
1096
  </div>
1096
1097
  <div class="section">
1097
1098
  <h2 class="title">
1098
- <a class="toc" id="Customization" href="#a-606825488">3.3</a>&nbsp;&nbsp;Customization
1099
+ <a class="toc" id="Customization" href="#a-606997828">3.3</a>&nbsp;&nbsp;Customization
1099
1100
  </h2>
1100
1101
  <div class="content"><p>RMMSeg can be customized through <code class="code"><span style="color:#036; font-weight:bold">RMMSeg</span>::<span style="color:#036; font-weight:bold">Config</span></code>. For example, to use your own dictionaries, just set it before starting to do segmentation:</p>
1101
1102
 
@@ -1122,7 +1123,7 @@ highlighted as in <a class="xref" href="#Ferret-Example-Screenshot">Figure 1: <e
1122
1123
  <div class="chapter">
1123
1124
  <h1 class="title">
1124
1125
  Chapter
1125
- <a class="toc" id="Resources" href="#a-606828108">4</a>
1126
+ <a class="toc" id="Resources" href="#a-606008348">4</a>
1126
1127
 
1127
1128
  <br/>
1128
1129
 
@@ -1135,16 +1136,17 @@ highlighted as in <a class="xref" href="#Ferret-Example-Screenshot">Figure 1: <e
1135
1136
  <li><a href="http://pluskid.lifegoo.com/?p=261">Implementation Details</a>: My blog post about the implementation details of RMMSeg (Chinese).</li>
1136
1137
  <li><a href="mailto:pluskid@gmail.com">Author&#8217;s Email</a>: Contact me if you have any problem.</li>
1137
1138
  </ul></div>
1138
- </div>
1139
- </div>
1139
+ </div></div>
1140
1140
 
1141
1141
 
1142
1142
  <br style="display: none"/>
1143
1143
  <hr style="display: none"/>
1144
1144
  <br style="display: none"/>
1145
1145
 
1146
+
1146
1147
  <div id="footer">
1147
- <a href="http://validator.w3.org/check/referer">[Validate]</a>
1148
+
1149
+ Generated on Mon Feb 04 16:17:21 +0800 2008 by <a href="http://gerbil.rubyforge.org">Gerbil</a> 2.0.0.
1148
1150
 
1149
1151
  <p>The admonition icons (<img src="
1150
1152
  fAhkiAAAAAlwSFlzAAAN1wAADdcBQiibeAAAABl0RVh0U29mdHdhcmUAd3d3
@@ -1204,6 +1206,8 @@ YvOZpmnxjo6O3x48eHA76S4UB3qEUgohRBQoGLoyKTJJp+nyR97kmA1IwANS
1204
1206
  wODQ1auUSmYP9UNjoQBwAIv0DBUMFb6WZpNuRB9wSYsfUEr5kOdfSiGETrr1
1205
1207
  DUCQhrhWJkj394A0gKeUCjVo3r9Zv0r2P3yyQqPd16MPAAAAAElFTkSuQmCC
1206
1208
  " alt="note"/>) used in this document are Copyright &copy; 2005 <a href="http://tango.freedesktop.org">Tango Desktop Project</a>. They are part of the <a href="http://tango.freedesktop.org/Tango_Icon_Library">Tango Icon Theme</a> set, which is distributed under the <a href="http://creativecommons.org/licenses/by-sa/2.5/">Creative Commons Attribution-ShareAlike 2.5 License Agreement</a>.</p>
1209
+
1207
1210
  </div>
1211
+
1208
1212
  </body>
1209
1213
  </html>
data/spec/spec_helper.rb CHANGED
@@ -1,9 +1,6 @@
1
1
  $: << File.join(File.dirname(__FILE__), "../lib")
2
2
  require 'rmmseg'
3
3
 
4
- require 'rubygems'
5
- require 'spec'
6
-
7
4
  def gen_words words, freqs=nil
8
5
  if freqs.nil?
9
6
  words.map { |word| RMMSeg::Word.new(word) }
@@ -0,0 +1,30 @@
1
+ # $Id$
2
+
3
+ if HAVE_BONES
4
+
5
+ desc "Enumerate all annotations"
6
+ task :notes do
7
+ Bones::AnnotationExtractor.enumerate(
8
+ PROJ, "OPTIMIZE|FIXME|TODO", :tag => true)
9
+ end
10
+
11
+ namespace :notes do
12
+ desc "Enumerate all OPTIMIZE annotations"
13
+ task :optimize do
14
+ Bones::AnnotationExtractor.enumerate(PROJ, "OPTIMIZE")
15
+ end
16
+
17
+ desc "Enumerate all FIXME annotations"
18
+ task :fixme do
19
+ Bones::AnnotationExtractor.enumerate(PROJ, "FIXME")
20
+ end
21
+
22
+ desc "Enumerate all TODO annotations"
23
+ task :todo do
24
+ Bones::AnnotationExtractor.enumerate(PROJ, "TODO")
25
+ end
26
+ end
27
+
28
+ end # if HAVE_BONES
29
+
30
+ # EOF
data/tasks/doc.rake ADDED
@@ -0,0 +1,49 @@
1
+ # $Id$
2
+
3
+ require 'rake/rdoctask'
4
+
5
+ namespace :doc do
6
+
7
+ desc 'Generate RDoc documentation'
8
+ Rake::RDocTask.new do |rd|
9
+ rd.main = PROJ.rdoc_main
10
+ rd.rdoc_dir = PROJ.rdoc_dir
11
+
12
+ incl = Regexp.new(PROJ.rdoc_include.join('|'))
13
+ excl = Regexp.new(PROJ.rdoc_exclude.join('|'))
14
+ files = PROJ.files.find_all do |fn|
15
+ case fn
16
+ when excl; false
17
+ when incl; true
18
+ else false end
19
+ end
20
+ rd.rdoc_files.push(*files)
21
+
22
+ title = "#{PROJ.name}-#{PROJ.version} Documentation"
23
+ title = "#{PROJ.rubyforge_name}'s " + title if PROJ.rubyforge_name != title
24
+
25
+ rd.options << "-t #{title}"
26
+ rd.options.concat(PROJ.rdoc_opts)
27
+ end
28
+
29
+ desc 'Generate ri locally for testing'
30
+ task :ri => :clobber_ri do
31
+ sh "#{RDOC} --ri -o ri ."
32
+ end
33
+
34
+ desc 'Remove ri products'
35
+ task :clobber_ri do
36
+ rm_r 'ri' rescue nil
37
+ end
38
+
39
+ end # namespace :doc
40
+
41
+ desc 'Alias to doc:rdoc'
42
+ task :doc => 'doc:rdoc'
43
+
44
+ desc 'Remove all build products'
45
+ task :clobber => %w(doc:clobber_rdoc doc:clobber_ri)
46
+
47
+ remove_desc_for_task %w(doc:clobber_rdoc doc:clobber_ri)
48
+
49
+ # EOF
data/tasks/gem.rake ADDED
@@ -0,0 +1,89 @@
1
+ # $Id$
2
+
3
+ require 'rake/gempackagetask'
4
+
5
+ namespace :gem do
6
+
7
+ PROJ.spec = Gem::Specification.new do |s|
8
+ s.name = PROJ.name
9
+ s.version = PROJ.version
10
+ s.summary = PROJ.summary
11
+ s.authors = Array(PROJ.authors)
12
+ s.email = PROJ.email
13
+ s.homepage = Array(PROJ.url).first
14
+ s.rubyforge_project = PROJ.rubyforge_name
15
+
16
+ s.description = PROJ.description
17
+
18
+ PROJ.dependencies.each do |dep|
19
+ s.add_dependency(*dep)
20
+ end
21
+
22
+ s.files = PROJ.files
23
+ s.executables = PROJ.executables.map {|fn| File.basename(fn)}
24
+ s.extensions = PROJ.files.grep %r/extconf\.rb$/
25
+
26
+ s.bindir = 'bin'
27
+ dirs = Dir["{#{PROJ.libs.join(',')}}"]
28
+ s.require_paths = dirs unless dirs.empty?
29
+
30
+ incl = Regexp.new(PROJ.rdoc_include.join('|'))
31
+ excl = PROJ.rdoc_exclude.dup.concat %w[\.rb$ ^(\.\/|\/)?ext]
32
+ excl = Regexp.new(excl.join('|'))
33
+ rdoc_files = PROJ.files.find_all do |fn|
34
+ case fn
35
+ when excl; false
36
+ when incl; true
37
+ else false end
38
+ end
39
+ s.rdoc_options = PROJ.rdoc_opts + ['--main', PROJ.rdoc_main]
40
+ s.extra_rdoc_files = rdoc_files
41
+ s.has_rdoc = true
42
+
43
+ if test ?f, PROJ.test_file
44
+ s.test_file = PROJ.test_file
45
+ else
46
+ s.test_files = PROJ.tests.to_a
47
+ end
48
+
49
+ # Do any extra stuff the user wants
50
+ # spec_extras.each do |msg, val|
51
+ # case val
52
+ # when Proc
53
+ # val.call(s.send(msg))
54
+ # else
55
+ # s.send "#{msg}=", val
56
+ # end
57
+ # end
58
+ end
59
+
60
+ desc 'Show information about the gem'
61
+ task :debug do
62
+ puts PROJ.spec.to_ruby
63
+ end
64
+
65
+ Rake::GemPackageTask.new(PROJ.spec) do |pkg|
66
+ pkg.need_tar = PROJ.need_tar
67
+ pkg.need_zip = PROJ.need_zip
68
+ end
69
+
70
+ desc 'Install the gem'
71
+ task :install => [:clobber, :package] do
72
+ sh "#{SUDO} #{GEM} install pkg/#{PROJ.spec.full_name}"
73
+ end
74
+
75
+ desc 'Uninstall the gem'
76
+ task :uninstall do
77
+ sh "#{SUDO} #{GEM} uninstall -v '#{PROJ.version}' -x #{PROJ.name}"
78
+ end
79
+
80
+ end # namespace :gem
81
+
82
+ desc 'Alias to gem:package'
83
+ task :gem => 'gem:package'
84
+
85
+ task :clobber => 'gem:clobber_package'
86
+
87
+ remove_desc_for_task %w(gem:clobber_package)
88
+
89
+ # EOF
@@ -0,0 +1,12 @@
1
+ namespace :homepage do
2
+ desc 'generate homepage'
3
+ task :generate do
4
+ sh "cd misc && gerbil html homepage.erb > homepage.html"
5
+ end
6
+
7
+ desc 'publish homepage to rubyforge'
8
+ task :publish => :generate do
9
+ remote_path = "rubyforge.org:/var/www/gforge-projects"
10
+ sh "scp misc/homepage.html #{remote_path}/rmmseg/index.html"
11
+ end
12
+ end
@@ -0,0 +1,44 @@
1
+ # $Id$
2
+
3
+ require 'find'
4
+
5
+ namespace :manifest do
6
+
7
+ desc 'Verify the manifest'
8
+ task :check do
9
+ fn = 'Manifest.tmp'
10
+ files = []
11
+ exclude = Regexp.new(PROJ.exclude.join('|'))
12
+ Find.find '.' do |path|
13
+ path.sub! %r/^(\.\/|\/)/o, ''
14
+ next unless test ?f, path
15
+ next if path =~ exclude
16
+ files << path
17
+ end
18
+
19
+ File.open(fn, 'w') {|fp| fp.puts files.sort}
20
+ system "#{DIFF} -du Manifest.txt #{fn}"
21
+ rm fn rescue nil
22
+ end
23
+
24
+ desc 'Create a new manifest'
25
+ task :create do
26
+ fn = 'Manifest.txt'
27
+ files = []
28
+ exclude = Regexp.new(PROJ.exclude.join('|'))
29
+ Find.find '.' do |path|
30
+ path.sub! %r/^(\.\/|\/)/o, ''
31
+ next unless test ?f, path
32
+ next if path =~ exclude
33
+ files << path
34
+ end
35
+
36
+ files << fn unless test ?f, fn
37
+ File.open(fn, 'w') {|fp| fp.puts files.sort}
38
+ end
39
+ end # namespace :manifest
40
+
41
+ desc 'Alias to manifest:check'
42
+ task :manifest => 'manifest:check'
43
+
44
+ # EOF
@@ -0,0 +1,18 @@
1
+ # $Id$
2
+
3
+ # This file does not define any rake tasks. It is used to load some project
4
+ # settings if they are not defined by the user.
5
+
6
+ unless PROJ.changes
7
+ PROJ.changes = paragraphs_of('History.txt', 0..1).join("\n\n")
8
+ end
9
+
10
+ unless PROJ.description
11
+ PROJ.description = paragraphs_of('README.txt', 'description').join("\n\n")
12
+ end
13
+
14
+ unless PROJ.summary
15
+ PROJ.summary = PROJ.description.split('.').first
16
+ end
17
+
18
+ # EOF
@@ -0,0 +1,57 @@
1
+ # $Id$
2
+
3
+ if PROJ.rubyforge_name && HAVE_RUBYFORGE
4
+
5
+ require 'rubyforge'
6
+ require 'rake/contrib/sshpublisher'
7
+
8
+ namespace :gem do
9
+ desc 'Package and upload to RubyForge'
10
+ task :release => [:clobber, :package] do |t|
11
+ v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
12
+ abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
13
+ pkg = "pkg/#{PROJ.spec.full_name}"
14
+
15
+ if $DEBUG then
16
+ puts "release_id = rf.add_release #{PROJ.rubyforge_name.inspect}, #{PROJ.name.inspect}, #{PROJ.version.inspect}, \"#{pkg}.tgz\""
17
+ puts "rf.add_file #{PROJ.rubyforge_name.inspect}, #{PROJ.name.inspect}, release_id, \"#{pkg}.gem\""
18
+ end
19
+
20
+ rf = RubyForge.new
21
+ puts 'Logging in'
22
+ rf.login
23
+
24
+ c = rf.userconfig
25
+ c['release_notes'] = PROJ.description if PROJ.description
26
+ c['release_changes'] = PROJ.changes if PROJ.changes
27
+ c['preformatted'] = true
28
+
29
+ files = [(PROJ.need_tar ? "#{pkg}.tgz" : nil),
30
+ (PROJ.need_zip ? "#{pkg}.zip" : nil),
31
+ "#{pkg}.gem"].compact
32
+
33
+ puts "Releasing #{PROJ.name} v. #{PROJ.version}"
34
+ rf.add_release PROJ.rubyforge_name, PROJ.name, PROJ.version, *files
35
+ end
36
+ end # namespace :gem
37
+
38
+
39
+ namespace :doc do
40
+ desc "Publish RDoc to RubyForge"
41
+ task :release => %w(doc:clobber_rdoc doc:rdoc) do
42
+ config = YAML.load(
43
+ File.read(File.expand_path('~/.rubyforge/user-config.yml'))
44
+ )
45
+
46
+ host = "#{config['username']}@rubyforge.org"
47
+ remote_dir = "/var/www/gforge-projects/#{PROJ.rubyforge_name}/"
48
+ remote_dir << PROJ.rdoc_remote_dir if PROJ.rdoc_remote_dir
49
+ local_dir = PROJ.rdoc_dir
50
+
51
+ Rake::SshDirPublisher.new(host, remote_dir, local_dir).upload
52
+ end
53
+ end # namespace :doc
54
+
55
+ end # if HAVE_RUBYFORGE
56
+
57
+ # EOF
data/tasks/setup.rb ADDED
@@ -0,0 +1,189 @@
1
+ # $Id$
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+ require 'fileutils'
6
+ require 'ostruct'
7
+
8
+ PROJ = OpenStruct.new
9
+
10
+ PROJ.name = nil
11
+ PROJ.summary = nil
12
+ PROJ.description = nil
13
+ PROJ.changes = nil
14
+ PROJ.authors = nil
15
+ PROJ.email = nil
16
+ PROJ.url = nil
17
+ PROJ.version = ENV['VERSION'] || '0.0.0'
18
+ PROJ.rubyforge_name = nil
19
+ PROJ.exclude = %w(tmp$ bak$ ~$ CVS .svn/ ^pkg/ ^doc/)
20
+
21
+ # Rspec
22
+ PROJ.specs = FileList['spec/**/*_spec.rb']
23
+ PROJ.spec_opts = []
24
+
25
+ # Test::Unit
26
+ PROJ.tests = FileList['test/**/test_*.rb']
27
+ PROJ.test_file = 'test/all.rb'
28
+ PROJ.test_opts = []
29
+
30
+ # Rcov
31
+ PROJ.rcov_opts = ['--sort', 'coverage', '-T']
32
+
33
+ # Rdoc
34
+ PROJ.rdoc_opts = []
35
+ PROJ.rdoc_include = %w(^lib/ ^bin/ ^ext/ .txt$)
36
+ PROJ.rdoc_exclude = %w(extconf.rb$ ^Manifest.txt$)
37
+ PROJ.rdoc_main = 'README.txt'
38
+ PROJ.rdoc_dir = 'doc'
39
+ PROJ.rdoc_remote_dir = nil
40
+
41
+ # Extensions
42
+ PROJ.extensions = FileList['ext/**/extconf.rb']
43
+ PROJ.ruby_opts = %w(-w)
44
+ PROJ.libs = []
45
+ %w(lib ext).each {|dir| PROJ.libs << dir if test ?d, dir}
46
+
47
+ # Gem Packaging
48
+ PROJ.files =
49
+ if test ?f, 'Manifest.txt'
50
+ files = File.readlines('Manifest.txt').map {|fn| fn.chomp.strip}
51
+ files.delete ''
52
+ files
53
+ else [] end
54
+ PROJ.executables = PROJ.files.find_all {|fn| fn =~ %r/^bin/}
55
+ PROJ.dependencies = []
56
+ PROJ.need_tar = true
57
+ PROJ.need_zip = false
58
+
59
+ # File Annotations
60
+ PROJ.annotation_exclude = []
61
+ PROJ.annotation_extensions = %w(.txt .rb .erb) << ''
62
+
63
+ # Subversion Repository
64
+ PROJ.svn = false
65
+ PROJ.svn_root = nil
66
+ PROJ.svn_trunk = 'trunk'
67
+ PROJ.svn_tags = 'tags'
68
+ PROJ.svn_branches = 'branches'
69
+
70
+ # Load the other rake files in the tasks folder
71
+ rakefiles = Dir.glob('tasks/*.rake').sort
72
+ rakefiles.unshift(rakefiles.delete('tasks/post_load.rake')).compact!
73
+ import(*rakefiles)
74
+
75
+ # Setup some constants
76
+ WIN32 = %r/win32/ =~ RUBY_PLATFORM unless defined? WIN32
77
+
78
+ DEV_NULL = WIN32 ? 'NUL:' : '/dev/null'
79
+
80
+ def quiet( &block )
81
+ io = [STDOUT.dup, STDERR.dup]
82
+ STDOUT.reopen DEV_NULL
83
+ STDERR.reopen DEV_NULL
84
+ block.call
85
+ ensure
86
+ STDOUT.reopen io.first
87
+ STDERR.reopen io.last
88
+ end
89
+
90
+ DIFF = if WIN32 then 'diff.exe'
91
+ else
92
+ if quiet {system "gdiff", __FILE__, __FILE__} then 'gdiff'
93
+ else 'diff' end
94
+ end unless defined? DIFF
95
+
96
+ SUDO = if WIN32 then ''
97
+ else
98
+ if quiet {system 'which sudo'} then 'sudo'
99
+ else '' end
100
+ end
101
+
102
+ RCOV = WIN32 ? 'rcov.cmd' : 'rcov'
103
+ GEM = WIN32 ? 'gem.cmd' : 'gem'
104
+
105
+ %w(rcov spec/rake/spectask rubyforge bones).each do |lib|
106
+ begin
107
+ require lib
108
+ Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", true}
109
+ rescue LoadError
110
+ Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", false}
111
+ end
112
+ end
113
+
114
+ # Reads a file at +path+ and spits out an array of the +paragraphs+
115
+ # specified.
116
+ #
117
+ # changes = paragraphs_of('History.txt', 0..1).join("\n\n")
118
+ # summary, *description = paragraphs_of('README.txt', 3, 3..8)
119
+ #
120
+ def paragraphs_of( path, *paragraphs )
121
+ title = String === paragraphs.first ? paragraphs.shift : nil
122
+ ary = File.read(path).delete("\r").split(/\n\n+/)
123
+
124
+ result = if title
125
+ tmp, matching = [], false
126
+ rgxp = %r/^=+\s*#{Regexp.escape(title)}/i
127
+ paragraphs << (0..-1) if paragraphs.empty?
128
+
129
+ ary.each do |val|
130
+ if val =~ rgxp
131
+ break if matching
132
+ matching = true
133
+ rgxp = %r/^=+/i
134
+ elsif matching
135
+ tmp << val
136
+ end
137
+ end
138
+ tmp
139
+ else ary end
140
+
141
+ result.values_at(*paragraphs)
142
+ end
143
+
144
+ # Adds the given gem _name_ to the current project's dependency list. An
145
+ # optional gem _version_ can be given. If omitted, the newest gem version
146
+ # will be used.
147
+ #
148
+ def depend_on( name, version = nil )
149
+ spec = Gem.source_index.find_name(name).last
150
+ version = spec.version.to_s if version.nil? and !spec.nil?
151
+
152
+ PROJ.dependencies << (version.nil? ? [name] : [name, ">= #{version}"])
153
+ end
154
+
155
+ # Adds the given arguments to the include path if they are not already there
156
+ #
157
+ def ensure_in_path( *args )
158
+ args.each do |path|
159
+ path = File.expand_path(path)
160
+ $:.unshift(path) if test(?d, path) and not $:.include?(path)
161
+ end
162
+ end
163
+
164
+ # Find a rake task using the task name and remove any description text. This
165
+ # will prevent the task from being displayed in the list of available tasks.
166
+ #
167
+ def remove_desc_for_task( names )
168
+ Array(names).each do |task_name|
169
+ task = Rake.application.tasks.find {|t| t.name == task_name}
170
+ next if task.nil?
171
+ task.instance_variable_set :@comment, nil
172
+ end
173
+ end
174
+
175
+ # Change working directories to _dir_, call the _block_ of code, and then
176
+ # change back to the original working directory (the current directory when
177
+ # this method was called).
178
+ #
179
+ def in_directory( dir, &block )
180
+ curdir = pwd
181
+ begin
182
+ cd dir
183
+ return block.call
184
+ ensure
185
+ cd curdir
186
+ end
187
+ end
188
+
189
+ # EOF
data/tasks/spec.rake ADDED
@@ -0,0 +1,43 @@
1
+ # $Id$
2
+
3
+ if HAVE_SPEC_RAKE_SPECTASK
4
+
5
+ namespace :spec do
6
+
7
+ desc 'Run all specs with basic output'
8
+ Spec::Rake::SpecTask.new(:run) do |t|
9
+ t.spec_opts = PROJ.spec_opts
10
+ t.spec_files = PROJ.specs
11
+ t.libs += PROJ.libs
12
+ end
13
+
14
+ desc 'Run all specs with text output'
15
+ Spec::Rake::SpecTask.new(:specdoc) do |t|
16
+ t.spec_opts = PROJ.spec_opts + ['--format', 'specdoc']
17
+ t.spec_files = PROJ.specs
18
+ t.libs += PROJ.libs
19
+ end
20
+
21
+ if HAVE_RCOV
22
+ desc 'Run all specs with RCov'
23
+ Spec::Rake::SpecTask.new(:rcov) do |t|
24
+ t.spec_opts = PROJ.spec_opts
25
+ t.spec_files = PROJ.specs
26
+ t.libs += PROJ.libs
27
+ t.rcov = true
28
+ t.rcov_opts = PROJ.rcov_opts + ['--exclude', 'spec']
29
+ end
30
+ end
31
+
32
+ end # namespace :spec
33
+
34
+ desc 'Alias to spec:run'
35
+ task :spec => 'spec:run'
36
+
37
+ task :clobber => 'spec:clobber_rcov' if HAVE_RCOV
38
+
39
+ remove_desc_for_task %w(spec:clobber_rcov)
40
+
41
+ end # if HAVE_SPEC_RAKE_SPECTASK
42
+
43
+ # EOF
data/tasks/svn.rake ADDED
@@ -0,0 +1,44 @@
1
+ # $Id$
2
+
3
+
4
+ if PROJ.svn and system("svn --version 2>&1 > #{DEV_NULL}")
5
+
6
+ unless PROJ.svn_root
7
+ info = %x/svn info ./
8
+ m = %r/^Repository Root:\s+(.*)$/.match(info)
9
+ PROJ.svn_root = (m.nil? ? '' : m[1])
10
+ end
11
+ PROJ.svn_root = File.join(PROJ.svn_root, PROJ.svn) if String === PROJ.svn
12
+
13
+ namespace :svn do
14
+
15
+ desc 'Show tags from the SVN repository'
16
+ task :show_tags do |t|
17
+ tags = %x/svn list #{File.join(PROJ.svn_root, PROJ.svn_tags)}/
18
+ tags.gsub!(%r/\/$/, '')
19
+ puts tags
20
+ end
21
+
22
+ desc 'Create a new tag in the SVN repository'
23
+ task :create_tag do |t|
24
+ v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
25
+ abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
26
+
27
+ trunk = File.join(PROJ.svn_root, PROJ.svn_trunk)
28
+ tag = "%s-%s" % [PROJ.name, PROJ.version]
29
+ tag = File.join(PROJ.svn_root, PROJ.svn_tags, tag)
30
+ msg = "Creating tag for #{PROJ.name} version #{PROJ.version}"
31
+
32
+ puts "Creating SVN tag '#{tag}'"
33
+ unless system "svn cp -m '#{msg}' #{trunk} #{tag}"
34
+ abort "Tag creation failed"
35
+ end
36
+ end
37
+
38
+ end # namespace :svn
39
+
40
+ task 'gem:release' => 'svn:create_tag'
41
+
42
+ end # if PROJ.svn
43
+
44
+ # EOF
data/tasks/test.rake ADDED
@@ -0,0 +1,40 @@
1
+ # $Id$
2
+
3
+ require 'rake/testtask'
4
+
5
+ namespace :test do
6
+
7
+ Rake::TestTask.new(:run) do |t|
8
+ t.libs = PROJ.libs
9
+ t.test_files = if test ?f, PROJ.test_file then [PROJ.test_file]
10
+ else PROJ.tests end
11
+ t.ruby_opts += PROJ.ruby_opts
12
+ t.ruby_opts += PROJ.test_opts
13
+ end
14
+
15
+ if HAVE_RCOV
16
+ desc 'Run rcov on the unit tests'
17
+ task :rcov => :clobber_rcov do
18
+ opts = PROJ.rcov_opts.join(' ')
19
+ files = if test ?f, PROJ.test_file then [PROJ.test_file]
20
+ else PROJ.tests end
21
+ files = files.join(' ')
22
+ sh "#{RCOV} #{files} #{opts}"
23
+ end
24
+
25
+ desc 'Remove rcov products'
26
+ task :clobber_rcov do
27
+ rm_r 'coverage' rescue nil
28
+ end
29
+ end
30
+
31
+ end # namespace :test
32
+
33
+ desc 'Alias to test:run'
34
+ task :test => 'test:run'
35
+
36
+ task :clobber => 'test:clobber_rcov' if HAVE_RCOV
37
+
38
+ remove_desc_for_task %w(test:clobber_rcov)
39
+
40
+ # EOF
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rmmseg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - pluskid
@@ -9,18 +9,10 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-02-01 00:00:00 +08:00
12
+ date: 2008-02-05 00:00:00 -08:00
13
13
  default_executable:
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
16
- name: hoe
17
- version_requirement:
18
- version_requirements: !ruby/object:Gem::Requirement
19
- requirements:
20
- - - ">="
21
- - !ruby/object:Gem::Version
22
- version: 1.5.0
23
- version:
14
+ dependencies: []
15
+
24
16
  description: "RMMSeg is an implementation of MMSEG Chinese word segmentation algorithm. It is based on two variants of maximum matching algorithms. Two algorithms are available for using: * simple algorithm that uses only forward maximum matching. * complex algorithm that uses three-word chunk maximum matching and 3 aditonal rules to solve ambiguities. For more information about the algorithm, please refer to the following essays: * http://technology.chtsai.org/mmseg/ * http://pluskid.lifegoo.com/?p=261"
25
17
  email: pluskid@gmail.com
26
18
  executables:
@@ -29,9 +21,9 @@ extensions: []
29
21
 
30
22
  extra_rdoc_files:
31
23
  - History.txt
32
- - Manifest.txt
33
24
  - README.txt
34
25
  - TODO.txt
26
+ - bin/rmmseg
35
27
  files:
36
28
  - History.txt
37
29
  - Manifest.txt
@@ -72,6 +64,17 @@ files:
72
64
  - spec/spec_helper.rb
73
65
  - spec/svwl_rule_spec.rb
74
66
  - spec/word_spec.rb
67
+ - tasks/annotations.rake
68
+ - tasks/doc.rake
69
+ - tasks/gem.rake
70
+ - tasks/homepage.rake
71
+ - tasks/manifest.rake
72
+ - tasks/post_load.rake
73
+ - tasks/rubyforge.rake
74
+ - tasks/setup.rb
75
+ - tasks/spec.rake
76
+ - tasks/svn.rake
77
+ - tasks/test.rake
75
78
  has_rdoc: true
76
79
  homepage: http://rmmseg.rubyforge.org
77
80
  post_install_message:
@@ -98,6 +101,6 @@ rubyforge_project: rmmseg
98
101
  rubygems_version: 1.0.1
99
102
  signing_key:
100
103
  specification_version: 2
101
- summary: RMMSeg is an implementation of MMSEG algorithm in Ruby. MMSEG is a Chinese segmentation algorithm based on two variants of maximum matching. RMMSeg can be used as a stand alone program or as an Analyzer of Ferret.
104
+ summary: RMMSeg is an implementation of MMSEG Chinese word segmentation algorithm
102
105
  test_files: []
103
106