RubyGems - rmmseg-cpp - Versions diffs - 0.2.7 → 0.2.9 - Mend

rmmseg-cpp 0.2.7 → 0.2.9

Files changed (15) hide show

data/History.txt CHANGED

@@ -1,3 +1,11 @@
+== 0.2.9 / 2011-09-10
+* Fix GC-related bugs in Ruby C extension.
+== 0.2.8 / 2010-03-22
+* Minor release, fixed building bugs in Ruby 1.9.
 == 0.2.7 / 2008-09-17
 * Fix various stupid bugs (typo) that cause problems under MacOSX.

data/README CHANGED

@@ -43,6 +43,17 @@ init by loading the dictionaries:
   RMMSeg::Dictionary.load_dictionaries
+If you want to add customized dictionaries, append them to
++RMMSeg::Dictionary.dictionaries+ before calling +load_dictionaries+.
+The formats of chars.dic and  words.dic are NOT the same:
+* For chars.dic, each line contains freq, a space, and then the character
+* For words.dic, each line contains length, a space, and then the word.
+Note length mean the length of the word, i.e. the number of characters
+of the word, not number of bytes. WARNING: there should be a newline at
+the end of every dictionary file.
 Then create a +Algorithm+ object and call +next_token+ until got a
 +nil+:

data/Rakefile CHANGED

@@ -8,7 +8,7 @@ require 'rmmseg'
 task :default => 'spec:run'
 PROJ.name = 'rmmseg-cpp'
-PROJ.version = '0.2.7'
+PROJ.version = '0.2.9'
 PROJ.authors = 'pluskid'
 PROJ.email = 'pluskid@gmail.com'
 PROJ.url = 'http://rmmseg-cpp.rubyforge.org'

data/ext/rmmseg/algor.cpp CHANGED

@@ -71,27 +71,28 @@ namespace rmmseg
     Token Algorithm::get_cjk_word(int len)
     {
-        create_chunks();
-        if (m_chunks_size > 1)
-            m_chunks_size = mm_filter(m_chunks, m_chunks_size);
-        if (m_chunks_size > 1)
-            m_chunks_size = lawl_filter(m_chunks, m_chunks_size);
-        if (m_chunks_size > 1)
-            m_chunks_size = svwl_filter(m_chunks, m_chunks_size);
-        if (m_chunks_size > 1)
-            m_chunks_size = lsdmfocw_filter(m_chunks, m_chunks_size);
-        if (m_chunks_size < 1)
+        vector<Chunk> chunks = create_chunks();
+        if (chunks.size() > 1)
+            mm_filter(chunks);
+        if (chunks.size() > 1)
+            lawl_filter(chunks);
+        if (chunks.size() > 1)
+            svwl_filter(chunks);
+        if (chunks.size() > 1)
+            lsdmfocw_filter(chunks);
+        if (chunks.size() < 1)
             return Token(NULL, 0);
-        Token token(m_text+m_pos, m_chunks[0].words[0]->nbytes);
-        m_pos += m_chunks[0].words[0]->nbytes;
+        Token token(m_text+m_pos, chunks[0].words[0]->nbytes);
+        m_pos += chunks[0].words[0]->nbytes;
         return token;
     }
-    void Algorithm::create_chunks()
+    vector<Chunk> Algorithm::create_chunks()
     {
+        vector<Chunk> chunks;
         Chunk chunk;
         Word *w1, *w2, *w3;
@@ -100,8 +101,6 @@ namespace rmmseg
         typedef vec_t::iterator it_t;
         vec_t words1 = find_match_words();
-        m_chunks_size = 0;
         for (it_t i1 = words1.begin();
              i1 != words1.end();
              ++i1)
@@ -136,17 +135,13 @@ namespace rmmseg
                                 chunk.n = 3;
                                 chunk.words[2] = w3;
                             }
-                            memcpy(m_chunks+m_chunks_size, &chunk,
-                                   sizeof(Chunk));
-                            m_chunks_size++;
+                            chunks.push_back(chunk);
                         }
                     }
                     else if (m_pos == m_text_length)
                     {
                         chunk.n = 2;
-                        memcpy(m_chunks+m_chunks_size, &chunk,
-                               sizeof(Chunk));
-                        m_chunks_size++;
+                        chunks.push_back(chunk);
                     }
                     m_pos -= w2->nbytes;
                 }
@@ -154,13 +149,13 @@ namespace rmmseg
             else if (m_pos == m_text_length)
             {
                 chunk.n = 1;
-                memcpy(m_chunks+m_chunks_size, &chunk, sizeof(Chunk));
-                m_chunks_size++;
+                chunks.push_back(chunk);
             }
             m_pos -= w1->nbytes;
         }
         m_pos = orig_pos;
+        return chunks;
     }
     int Algorithm::next_char()
@@ -169,15 +164,11 @@ namespace rmmseg
         unsigned char ch = m_text[m_pos];
         if (ch >= 0xC0 && ch <= 0xDF)
         {
-            if (m_text_length-m_pos < 2)
-                return 1; /* broken text at the end */
-            return 2;
+            return min(2, m_text_length-m_pos);
         }
         if (ch >= 0xE0 && ch <= 0xEF)
         {
-            if (m_text_length-m_pos < 3)
-                return 1; /* broken text at the end */
-            return 3;
+            return min(3, m_text_length-m_pos);
         }
         return 1;
     }
@@ -195,11 +186,11 @@ namespace rmmseg
         while (m_pos < m_text_length)
         {
+            if (n >= max_word_length())
+                break;
             len = next_char();
             if (len <= 1)
                 break;
-            if (n >= max_word_length())
-                break;
             m_pos += len;
             n++;

data/ext/rmmseg/algor.h CHANGED

@@ -22,33 +22,32 @@ namespace rmmseg
     {
     public:
         Algorithm(const char *text, int length)
-            :m_chunks_size(0), m_text(text), m_pos(0),
+            :m_text(text), m_pos(0),
             m_text_length(length),
             m_tmp_words_i(0),
             m_match_cache_i(0)
-            {
-                for (int i = 0; i < match_cache_size; ++i)
-                    m_match_cache[i].first = -1;
-            }
+        {
+            for (int i = 0; i < match_cache_size; ++i)
+                m_match_cache[i].first = -1;
+        }
         Token next_token();
+        const char *get_text() const
+        {
+            return m_text;
+        }
     private:
         Token get_basic_latin_word();
         Token get_cjk_word(int);
-        static const int MAX_WORD_LENGTH = 4;
-        static const int MAX_N_CHUNKS = \
-            MAX_WORD_LENGTH*MAX_WORD_LENGTH*MAX_WORD_LENGTH;
-        void create_chunks();
+        std::vector<Chunk> create_chunks();
         int next_word();
         int next_char();
         std::vector<Word *> find_match_words();
-        int max_word_length() { return MAX_WORD_LENGTH; }
+        int max_word_length() { return 4; }
-        Chunk m_chunks[MAX_N_CHUNKS];
-        int m_chunks_size;
         const char *m_text;
         int m_pos;
@@ -65,7 +64,7 @@ namespace rmmseg
             return &m_tmp_words[m_tmp_words_i++];
         }
-        /* related to max_word_length and match_words_cache_size */
+        /* related to max_word_length and match_cache_size */
         static const int max_tmp_words = 64;
         Word m_tmp_words[max_tmp_words];
         int m_tmp_words_i;

data/ext/rmmseg/chunk.h CHANGED

@@ -12,36 +12,37 @@ namespace rmmseg
      */
     struct Chunk
     {
-        int total_length()
+        int total_length() const
         {
             int len = 0;
             for (int i = 0; i < n; ++i)
-                if (words[i]->length == -1) /* tmp word */
-                    len += 1;
-                else
-                    len += words[i]->length;
+                len += std::abs(words[i]->length);
+                //if (words[i]->length == -1) /* tmp word */
+                //    len += 1;
+                //else
+                //    len += words[i]->length;
             return len;
         }
-        double average_length()
+        double average_length() const
         {
             return ((double)total_length())/n;
         }
-        double variance()
+        double variance() const
         {
             double avg = average_length();
             double sqr_sum = 0;
             double tmp;
             for (int i = 0; i < n; ++i)
             {
-                tmp = words[i]->length;
-                if (tmp == -1)
-                    tmp = 1;
+                tmp = std::abs(words[i]->length);
+                //if (tmp == -1)
+                //    tmp = 1;
                 tmp = tmp-avg;
                 sqr_sum += tmp*tmp;
             }
             return std::sqrt(sqr_sum);
         }
-        int degree_of_morphemic_freedom()
+        int degree_of_morphemic_freedom() const
         {
             int sum = 0;
             for (int i = 0; i < n; ++i)

data/ext/rmmseg/dict.cpp CHANGED

@@ -12,12 +12,12 @@ namespace rmmseg
         Entry *next;
     };
-    const int init_size = 262147;
-    const int max_density = 5;
+    const size_t init_size = 262147;
+    const size_t max_density = 5;
     /*
       Table of prime numbers 2^n+a, 2<=n<=30.
     */
-    static int primes[] = {
+    static size_t primes[] = {
         524288 + 21,
         1048576 + 7,
         2097152 + 17,
@@ -33,14 +33,14 @@ namespace rmmseg
     };
-    static int n_bins = init_size;
-    static int n_entries = 0;
+    static size_t n_bins = init_size;
+    static size_t n_entries = 0;
     static Entry **bins = static_cast<Entry **>(std::calloc(init_size,
                                                             sizeof(Entry *)));
-    static int new_size()
+    static size_t new_size()
     {
-        for (int i = 0;
+        for (size_t i = 0;
              i < sizeof(primes)/sizeof(primes[0]);
              ++i)
         {
@@ -76,7 +76,7 @@ namespace rmmseg
         Entry *entry, *next;
         unsigned int hash_val;
-        for (int i = 0; i < n_bins; ++i)
+        for (size_t i = 0; i < n_bins; ++i)
         {
             entry = bins[i];
             while (entry)
@@ -140,6 +140,7 @@ namespace rmmseg
                 entry->next = NULL;
                 bins[h] = entry;
                 n_entries++;
+                return;
             }
             bool done = false;
@@ -168,6 +169,7 @@ namespace rmmseg
                 entry->word = word;
                 entry->next = bins[h];
                 bins[h] = entry;
+                n_entries++;
             }
         }
@@ -179,7 +181,7 @@ namespace rmmseg
                 return false;
             }
-            const int buf_len = 24;
+            const size_t buf_len = 24;
             char buf[buf_len];
             char *ptr;

data/ext/rmmseg/memory.cpp CHANGED

@@ -4,6 +4,6 @@
 namespace rmmseg
 {
-    char *_pool_base = static_cast<char *>(std::malloc(PRE_ALLOC_SIZE));
-    int   _pool_size = PRE_ALLOC_SIZE;
+    char   *_pool_base = static_cast<char *>(std::malloc(PRE_ALLOC_SIZE));
+    size_t  _pool_size = PRE_ALLOC_SIZE;
 }

data/ext/rmmseg/memory.h CHANGED

@@ -12,12 +12,12 @@
 namespace rmmseg
 {
-    const int REALLOC_SIZE = 2048; /* 2KB */
+    const size_t REALLOC_SIZE = 2048; /* 2KB */
-    extern int   _pool_size;
-    extern char *_pool_base;
+    extern size_t  _pool_size;
+    extern char   *_pool_base;
-    inline void *pool_alloc(int len)
+    inline void *pool_alloc(size_t len)
     {
         void *mem = _pool_base;

data/ext/rmmseg/rmmseg.cpp CHANGED

@@ -36,7 +36,7 @@ extern "C" {
      */
     static VALUE dic_load_chars(VALUE mod, VALUE path)
     {
-        if (rmmseg::dict::load_chars(RSTRING(path)->ptr))
+        if (rmmseg::dict::load_chars(RSTRING_PTR(path)))
             return Qtrue;
         return Qfalse;
     }
@@ -51,7 +51,7 @@ extern "C" {
      */
     static VALUE dic_load_words(VALUE mod, VALUE path)
     {
-        if (rmmseg::dict::load_words(RSTRING(path)->ptr))
+        if (rmmseg::dict::load_words(RSTRING_PTR(path)))
             return Qtrue;
         return Qfalse;
     }
@@ -70,8 +70,8 @@ extern "C" {
      */
     static VALUE dic_add(VALUE mod, VALUE word, VALUE len, VALUE freq)
     {
-        const char *str = RSTRING(word)->ptr;
-        int nbytes = RSTRING(word)->len;
+        const char *str = RSTRING_PTR(word);
+        int nbytes = RSTRING_LEN(word);
         rmmseg::Word *w = rmmseg::make_word(str, FIX2INT(len), FIX2INT(freq), nbytes);
         rmmseg::dict::add(w);
         return Qnil;
@@ -88,8 +88,8 @@ extern "C" {
      */
     static VALUE dic_has_word(VALUE mod, VALUE word)
     {
-        const char *str = RSTRING(word)->ptr;
-        int nbytes = RSTRING(word)->len;
+        const char *str = RSTRING_PTR(word);
+        int nbytes = RSTRING_LEN(word);
         if (rmmseg::dict::get(str, nbytes) != NULL)
             return Qtrue;
         return Qfalse;
@@ -162,16 +162,17 @@ extern "C" {
         int start = t.text-base;
         // This is necessary, see
-        // http://pluskid.lifegoo.com/?p=348
+        // http://lifegoo.pluskid.org/?p=348
         volatile VALUE text = rb_str_new(t.text, t.length);
         tk->text = text;
         tk->start = INT2FIX(start);
         tk->end = INT2FIX(start + t.length);
-        return Data_Wrap_Struct(cToken,
+        volatile VALUE tok = Data_Wrap_Struct(cToken,
                                 (RUBY_DATA_FUNC)tk_mark,
                                 (RUBY_DATA_FUNC)tk_free,
                                 tk);
+        return tok;
     }
     /*********************
@@ -207,8 +208,8 @@ extern "C" {
         void *mem;
         algor->text = text;
         mem = malloc(sizeof(rmmseg::Algorithm));
-        algor->algor = new(mem) rmmseg::Algorithm(RSTRING(text)->ptr,
-                                                  RSTRING(text)->len);
+        algor->algor = new(mem) rmmseg::Algorithm(RSTRING_PTR(text),
+                                                  RSTRING_LEN(text));
         return Data_Wrap_Struct(klass,
                                 (RUBY_DATA_FUNC)algor_mark,
@@ -231,7 +232,8 @@ extern "C" {
         if (tk.length == 0)
             return Qnil;
-        return tk_create(RSTRING(algor->text)->ptr, tk);
+        volatile VALUE rtk = tk_create(RSTRING_PTR(algor->text), tk);
+        return rtk;
     }

data/ext/rmmseg/rules.h CHANGED

@@ -9,37 +9,36 @@
 namespace rmmseg
 {
     template <typename Cmp>
-    int take_highest(Chunk *chunks, int n, Cmp &cmp)
+    void take_highest(std::vector<Chunk> &chunks, const Cmp &cmp)
     {
-        int i = 1, j;
-        Chunk &max = chunks[0];
-        for (j = 1; j < n; ++j)
+        unsigned int i = 1, j;
+        for (j = 1; j < chunks.size(); ++j)
         {
-            int rlt = cmp(chunks[j], max);
+            int rlt = cmp(chunks[j], chunks[0]);
             if (rlt > 0)
                 i = 0;
             if (rlt >= 0)
                 std::swap(chunks[i++], chunks[j]);
         }
-        return i;
+        chunks.erase(chunks.begin()+i, chunks.end());
     }
     struct MMCmp_t
     {
-        int operator()(Chunk &a, Chunk &b)
+        int operator()(const Chunk &a, const Chunk &b) const
         {
             return a.total_length() - b.total_length();
         }
     } MMCmp;
-    int mm_filter(Chunk *chunks, int n)
+    void mm_filter(std::vector<Chunk> &chunks)
     {
-        return take_highest(chunks, n, MMCmp);
+        take_highest(chunks, MMCmp);
     }
     struct LAWLCmp_t
     {
-        int operator()(Chunk &a, Chunk &b)
+        int operator()(const Chunk &a, const Chunk &b) const
         {
             double rlt = a.average_length() - b.average_length();
             if (rlt == 0)
@@ -49,14 +48,14 @@ namespace rmmseg
             return -1;
         }
     } LAWLCmp;
-    int lawl_filter(Chunk *chunks, int n)
+    void lawl_filter(std::vector<Chunk> &chunks)
     {
-        return take_highest(chunks, n, LAWLCmp);
+        take_highest(chunks, LAWLCmp);
     }
     struct SVWLCmp_t
     {
-        int operator()(Chunk &a, Chunk& b)
+        int operator()(const Chunk &a, const Chunk& b) const
         {
             double rlt = a.variance() - b.variance();
             if (rlt == 0)
@@ -66,21 +65,21 @@ namespace rmmseg
             return -1;
         }
     } SVWLCmp;
-    int svwl_filter(Chunk *chunks, int n)
+    void svwl_filter(std::vector<Chunk> &chunks)
     {
-        return take_highest(chunks, n, SVWLCmp);
+        take_highest(chunks, SVWLCmp);
     }
     struct LSDMFOCWCmp_t
     {
-        int operator()(Chunk &a, Chunk& b)
+        int operator()(const Chunk &a, const Chunk& b) const
         {
             return a.degree_of_morphemic_freedom() - b.degree_of_morphemic_freedom();
         }
     } LSDMFOCWCmp;
-    int lsdmfocw_filter(Chunk *chunks, int n)
+    void lsdmfocw_filter(std::vector<Chunk> &chunks)
     {
-        return take_highest(chunks, n, LSDMFOCWCmp);
+        take_highest(chunks, LSDMFOCWCmp);
     }
 }

data/ext/rmmseg/word.h CHANGED

@@ -28,11 +28,11 @@ namespace rmmseg
         if (freq > USHRT_MAX)
             freq = USHRT_MAX;   /* avoid overflow */
         if (nbytes == -1)
-            nbytes = strlen(text);
+            nbytes = std::strlen(text);
         Word *w = static_cast<Word *>(pool_alloc(sizeof(Word)
                                                  + nbytes+1
                                                  - word_embed_len));
-        w->nbytes = std::strlen(text);
+        w->nbytes = nbytes;
         w->length = length;
         w->freq = freq;
         std::strncpy(w->text, text, nbytes);

data/misc/homepage.erb CHANGED

@@ -1,7 +1,7 @@
 <%# -*- mode: text; coding: utf-8 -*- %>
 <%
   $title = "rmmseg-cpp Homepage"
-  $authors = { 'pluskid' => 'http://pluskid.lifegoo.com' }
+  $authors = { 'pluskid' => 'http://blog.pluskid.org' }
 %>
 <% chapter "Introduction" do %>
@@ -110,7 +110,24 @@
       RMMSeg::Dictionary.load_dictionaries
       </code>
-      Now rmmseg-cpp will be ready to do segmenting.
+      Now rmmseg-cpp will be ready to do segmenting. If you want to load your own customized
+      dictionaries, please customize <tt>RMMSeg::Dictionary.dictionaries</tt> before calling
+      <tt>load_dictionaries</tt>. e.g.
+      <code>
+      RMMSeg::Dictionary.dictionaries = [[:chars, "my_chars.dic"],
+                                         [:words, "my_words.dic"],
+                                         [:words, "my_words2.dic"]]
+      </code>
+      The basic format for char-dictionary and word-dictionary are similar. For each line,
+      there is a number, then *a* space, then the string. Note there *SHOULD* be a newline
+      at the end of the dictionary file. And the number in char-dictionary and word-dictionary
+      has different meaning.
+      In char-dictionary, the number means the frequency of the character. In word-dictionary,
+      the number mean the number of characters in the word. Note that this is NOT the number
+      of *bytes* in the word.
     <% end %>
@@ -139,7 +156,7 @@
       of running that example is shown in <%= xref "Ferret Example Screenshot" %>.
       <% figure "Ferret Example Screenshot" do %>
-        !http://pluskid.lifegoo.com/wp-content/uploads/2008/02/rmmseg.png!
+        !http://lifegoo.pluskid.org/wp-content/uploads/2008/02/rmmseg.png!
       <% end %>
     <% end %>
@@ -174,6 +191,6 @@
 <% chapter "Resources" do %>
   * "Project Home":http://rubyforge.org/projects/rmmseg-cpp/: The Project page at RubyForge.
   * "RDoc of rmmseg-cpp":http://rmmseg-cpp.rubyforge.org/rdoc/index.html: The auto generated rdoc of RMMSeg.
-  * "Free Mind":http://pluskid.lifegoo.com/: The author's blog.
+  * "Free Mind":http://blog.pluskid.org/: The author's blog.
   * "Author's Email":mailto:pluskid@gmail.com: Contact me if you have any problem.
 <% end %>

data/misc/homepage.html CHANGED

@@ -3,7 +3,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
   <head>
     <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
-    <meta name="date" content="17 September 2008"/>
+    <meta name="date" content="10 September 2011"/>
     <meta name="author" content="pluskid"/>
     <meta name="generator" content="Gerbil 3.1.0"/>
     <title>rmmseg-cpp Homepage</title>
@@ -928,8 +928,8 @@
       <h1 class="title"><a class="here" href="#">rmmseg-cpp Homepage</a></h1>
-      <h2 class="authors"><a href="http://pluskid.lifegoo.com">pluskid</a></h2>
-      <h3 class="date">17 September 2008</h3>
+      <h2 class="authors"><a href="http://blog.pluskid.org">pluskid</a></h2>
+      <h3 class="date">10 September 2011</h3>
     </div>
@@ -943,12 +943,12 @@
       <div id="Contents">
         <h1 class="title"><a class="here" href="#Contents">Contents</a></h1>
         <ul>
-          <li>1&nbsp;&nbsp;<a id="a-607090478" href="#Introduction">Introduction</a></li><li>2&nbsp;&nbsp;<a id="a-607093208" href="#Setup">Setup</a><ul><li>2.1&nbsp;&nbsp;<a id="a-607094728" href="#Requirements">Requirements</a></li><li>2.2&nbsp;&nbsp;<a id="a-607099478" href="#Installation">Installation</a><ul><li>2.2.1&nbsp;&nbsp;<a id="a-607103648" href="#Using-RubyGems">Using RubyGems</a></li><li>2.2.2&nbsp;&nbsp;<a id="a-607106038" href="#From-Git">From Git</a></li></ul></li></ul></li><li>3&nbsp;&nbsp;<a id="a-607115348" href="#Usage">Usage</a><ul><li>3.1&nbsp;&nbsp;<a id="a-607120028" href="#Stand-Alone-rmmseg">Stand Alone rmmseg</a></li><li>3.2&nbsp;&nbsp;<a id="a-607126248" href="#Use-in-Ruby-program">Use in Ruby program</a><ul><li>3.2.1&nbsp;&nbsp;<a id="a-607131168" href="#Initialize">Initialize</a></li><li>3.2.2&nbsp;&nbsp;<a id="a-607137248" href="#Ferret-Integration">Ferret Integration</a></li><li>3.2.3&nbsp;&nbsp;<a id="a-607154008" href="#Normal-Ruby-program">Normal Ruby program</a></li></ul></li></ul></li><li>4&nbsp;&nbsp;<a id="a-607162878" href="#Who-use-it">Who use it</a></li><li>5&nbsp;&nbsp;<a id="a-607172188" href="#Resources">Resources</a></li>
+          <li>1&nbsp;&nbsp;<a id="a16539600" href="#Introduction">Introduction</a></li><li>2&nbsp;&nbsp;<a id="a16533660" href="#Setup">Setup</a><ul><li>2.1&nbsp;&nbsp;<a id="a16530900" href="#Requirements">Requirements</a></li><li>2.2&nbsp;&nbsp;<a id="a16472140" href="#Installation">Installation</a><ul><li>2.2.1&nbsp;&nbsp;<a id="a16468300" href="#Using-RubyGems">Using RubyGems</a></li><li>2.2.2&nbsp;&nbsp;<a id="a16363260" href="#From-Git">From Git</a></li></ul></li></ul></li><li>3&nbsp;&nbsp;<a id="a16272720" href="#Usage">Usage</a><ul><li>3.1&nbsp;&nbsp;<a id="a16246860" href="#Stand-Alone-rmmseg">Stand Alone rmmseg</a></li><li>3.2&nbsp;&nbsp;<a id="a16240340" href="#Use-in-Ruby-program">Use in Ruby program</a><ul><li>3.2.1&nbsp;&nbsp;<a id="a16231580" href="#Initialize">Initialize</a></li><li>3.2.2&nbsp;&nbsp;<a id="a16187880" href="#Ferret-Integration">Ferret Integration</a></li><li>3.2.3&nbsp;&nbsp;<a id="a16113620" href="#Normal-Ruby-program">Normal Ruby program</a></li></ul></li></ul></li><li>4&nbsp;&nbsp;<a id="a16072000" href="#Who-use-it">Who use it</a></li><li>5&nbsp;&nbsp;<a id="a16034860" href="#Resources">Resources</a></li>
         </ul>
       </div>
-      <div id="lof"><h1 id="Figures" class="title"><a class="here" href="#Figures">Figures</a></h1> <ol><li><a id="a-607147048" href="#Ferret-Example-Screenshot">Ferret Example Screenshot</a></li></ol><h1 id="Tips" class="title"><a class="here" href="#Tips">Tips</a></h1> <ol><li><a id="a-607168148" href="#Expand-this-list">Expand this list</a></li></ol><h1 id="Warnings" class="title"><a class="here" href="#Warnings">Warnings</a></h1> <ol><li><a id="a-607107678" href="#The-latest-source-code-may-be-unstable">The latest source code may be unstable</a></li></ol></div>
+      <div id="lof"><h1 id=\"Figures\" class=\"title\"><a class=\"here\" href=\"#Figures\">Figures</a></h1> <ol><li><a id=\\\"a16148860\\\" href=\\\"#Ferret-Example-Screenshot\\\">Ferret Example Screenshot</a></li></ol><h1 id=\"Tips\" class=\"title\"><a class=\"here\" href=\"#Tips\">Tips</a></h1> <ol><li><a id=\\\"a16067160\\\" href=\\\"#Expand-this-list\\\">Expand this list</a></li></ol><h1 id=\"Warnings\" class=\"title\"><a class=\"here\" href=\"#Warnings\">Warnings</a></h1> <ol><li><a id=\\\"a16360020\\\" href=\\\"#The-latest-source-code-may-be-unstable\\\">The latest source code may be unstable</a></li></ol></div>
     <br style="display: none"/>
     <hr style="display: none"/>
@@ -958,30 +958,30 @@
 <div class="chapter">
   <h1 class="title">
     Chapter
-    <a class="list" id="Introduction" href="#a-607090478">1</a>
+    <a class="list" id="Introduction" href="#a16539600">1</a>
     <br/>
     <a class="here" href="#Introduction"><big>Introduction</big></a>
   </h1>
-  <div class="content"><p>rmmseg-cpp is a high performance Chinese word segmentation utility for<br />
-Ruby. It features full <a href="http://ferret.davebalmain.com/">Ferret</a> integration<br />
+  <div class="content"><p>rmmseg-cpp is a high performance Chinese word segmentation utility for
+Ruby. It features full <a href="http://ferret.davebalmain.com/">Ferret</a> integration
 as well as support for normal Ruby program usage.</p>
-<p>rmmseg-cpp is a re-written of the original<br />
-<a href="http://rmmseg.rubyforge.org/">RMMSeg</a> gem in C++. RMMSeg is written<br />
-in pure Ruby. Though I tried hard to tweak RMMSeg, it just consumes<br />
+<p>rmmseg-cpp is a re-written of the original
+<a href="http://rmmseg.rubyforge.org/">RMMSeg</a> gem in C++. RMMSeg is written
+in pure Ruby. Though I tried hard to tweak RMMSeg, it just consumes
 lots of memory and the segmenting process is rather slow.</p>
-<p>The interface is almost identical to RMMSeg but the performance is<br />
-much better. This gem is always preferable in production<br />
-use. However, if you want to understand how the MMSEG segmenting<br />
-algorithm works, the source code of RMMSeg is a better choice than<br />
+<p>The interface is almost identical to RMMSeg but the performance is
+much better. This gem is always preferable in production
+use. However, if you want to understand how the MMSEG segmenting
+algorithm works, the source code of RMMSeg is a better choice than
 this.</p></div>
 </div>
 <div class="chapter">
   <h1 class="title">
     Chapter
-    <a class="list" id="Setup" href="#a-607093208">2</a>
+    <a class="list" id="Setup" href="#a16533660">2</a>
     <br/>
@@ -990,7 +990,7 @@ this.</p></div>
   <div class="content"><div class="section">
   <h2 class="title">
-    <a class="list" id="Requirements" href="#a-607094728">2.1</a>&nbsp;&nbsp;<a class="here" href="#Requirements">Requirements</a>
+    <a class="list" id="Requirements" href="#a16530900">2.1</a>&nbsp;&nbsp;<a class="here" href="#Requirements">Requirements</a>
   </h2>
   <div class="content"><p>Your system needs the following software to run RMMSeg.</p>
 <table border="1">
@@ -1011,37 +1011,37 @@ this.</p></div>
 		<td> Used to build the native extension </td>
 	</tr>
 </table></div>
-</div><br />
+</div>
 <div class="section">
   <h2 class="title">
-    <a class="list" id="Installation" href="#a-607099478">2.2</a>&nbsp;&nbsp;<a class="here" href="#Installation">Installation</a>
+    <a class="list" id="Installation" href="#a16472140">2.2</a>&nbsp;&nbsp;<a class="here" href="#Installation">Installation</a>
   </h2>
   <div class="content"><div class="section">
   <h3 class="title">
-    <a class="list" id="Using-RubyGems" href="#a-607103648">2.2.1</a>&nbsp;&nbsp;<a class="here" href="#Using-RubyGems">Using RubyGems</a>
+    <a class="list" id="Using-RubyGems" href="#a16468300">2.2.1</a>&nbsp;&nbsp;<a class="here" href="#Using-RubyGems">Using RubyGems</a>
   </h3>
   <div class="content"><p>To install the gem remotely from <a href="http://rubyforge.org">RubyForge</a>:</p>
 sudo gem install rmmseg-cpp
-<p>Or you can download the gem file manually from<br />
-<a href="http://rubyforge.org/projects/rmmseg-cpp/">RubyForge</a> and<br />
+<p>Or you can download the gem file manually from
+<a href="http://rubyforge.org/projects/rmmseg-cpp/">RubyForge</a> and
 install it locally:</p>
 sudo gem install &#8212;local rmmseg-cpp-x.y.z.gem</div>
-</div><br />
+</div>
 <div class="section">
   <h3 class="title">
-    <a class="list" id="From-Git" href="#a-607106038">2.2.2</a>&nbsp;&nbsp;<a class="here" href="#From-Git">From Git</a>
+    <a class="list" id="From-Git" href="#a16363260">2.2.2</a>&nbsp;&nbsp;<a class="here" href="#From-Git">From Git</a>
   </h3>
-  <div class="content"><p>To build the gem manually from the latest source code. You&#8217;ll<br />
+  <div class="content"><p>To build the gem manually from the latest source code. You&#8217;ll
 need to have <strong>git</strong> and <strong>rake</strong> installed.</p>
 <p><div class="warning">
-  <p class="title"><a class="list" id="The-latest-source-code-may-be-unstable" href="#a-607107678">Warning 1</a>.&nbsp;&nbsp;<a class="here" href="#The-latest-source-code-may-be-unstable">The latest source code may be unstable</a></p>
+  <p class="title"><a class="list" id="The-latest-source-code-may-be-unstable" href="#a16360020">Warning 1</a>.&nbsp;&nbsp;<a class="here" href="#The-latest-source-code-may-be-unstable">The latest source code may be unstable</a></p>
-  <div class="content icon-warning">While I tried to avoid such kind of problems, the source<br />
-code from the repository might still be broken sometimes.<br />
+  <div class="content icon-warning">While I tried to avoid such kind of problems, the source
+code from the repository might still be broken sometimes.
 It is generally not recommended to follow the source code.</div>
-</div>    <br />
-The source code of rmmseg-cpp is hosted at<br />
-<a href="http://github.com/pluskid/rmmseg-cpp/">GitHub</a>. You can get the<br />
+</div>
+The source code of rmmseg-cpp is hosted at
+<a href="http://github.com/pluskid/rmmseg-cpp/">GitHub</a>. You can get the
 source code by git clone:</p>
 git clone git://github.com/pluskid/rmmseg-cpp.git
 <p>then you can use Rake to build and install the gem:</p>
@@ -1053,97 +1053,107 @@ rake gem:install</div>
 <div class="chapter">
   <h1 class="title">
     Chapter
-    <a class="list" id="Usage" href="#a-607115348">3</a>
+    <a class="list" id="Usage" href="#a16272720">3</a>
     <br/>
     <a class="here" href="#Usage"><big>Usage</big></a>
   </h1>
-  <div class="content"><div class="section">
+  <div class="content">
+<p><div class="section">
   <h2 class="title">
-    <a class="list" id="Stand-Alone-rmmseg" href="#a-607120028">3.1</a>&nbsp;&nbsp;<a class="here" href="#Stand-Alone-rmmseg">Stand Alone rmmseg</a>
+    <a class="list" id="Stand-Alone-rmmseg" href="#a16246860">3.1</a>&nbsp;&nbsp;<a class="here" href="#Stand-Alone-rmmseg">Stand Alone rmmseg</a>
   </h2>
-  <div class="content"><p>rmmseg-cpp comes with a script <strong>rmmseg</strong>. To get the basic usage, just execute it with <tt>-h</tt> option:<br />
-  <br />
-  rmmseg -h</p>
-<p>It reads from STDIN and print result to STDOUT. Here is a real<br />
+  <div class="content"><p>rmmseg-cpp comes with a script <strong>rmmseg</strong>. To get the basic usage, just execute it with <tt>-h</tt> option:</p>
+rmmseg -h
+<p>It reads from STDIN and print result to STDOUT. Here is a real
 example:</p>
 $ echo &#8220;我们都喜欢用 Ruby&#8221; | rmmseg
 我们 都 喜欢 用 Ruby</div>
-</div><br />
+</div>
 <div class="section">
   <h2 class="title">
-    <a class="list" id="Use-in-Ruby-program" href="#a-607126248">3.2</a>&nbsp;&nbsp;<a class="here" href="#Use-in-Ruby-program">Use in Ruby program</a>
+    <a class="list" id="Use-in-Ruby-program" href="#a16240340">3.2</a>&nbsp;&nbsp;<a class="here" href="#Use-in-Ruby-program">Use in Ruby program</a>
   </h2>
   <div class="content"><div class="section">
   <h3 class="title">
-    <a class="list" id="Initialize" href="#a-607131168">3.2.1</a>&nbsp;&nbsp;<a class="here" href="#Initialize">Initialize</a>
+    <a class="list" id="Initialize" href="#a16231580">3.2.1</a>&nbsp;&nbsp;<a class="here" href="#Initialize">Initialize</a>
   </h3>
   <div class="content"><p>To use rmmseg-cpp in Ruby program, you&#8217;ll first load it with RubyGems:</p>
 <pre class="code">
-require <span style="background-color:#fff0f0"><span style="color:#710">'</span><span style="color:#D20">rubygems</span><span style="color:#710">'</span></span>
-require <span style="background-color:#fff0f0"><span style="color:#710">'</span><span style="color:#D20">rmmseg</span><span style="color:#710">'</span></span>
+require <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">'</span><span style="">rubygems</span><span style="color:#710">'</span></span>
+require <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">'</span><span style="">rmmseg</span><span style="color:#710">'</span></span>
 </pre>
-<p>Then you may customize the dictionaries used by rmmseg-cpp<br />
-(see <a href="http://rmmseg-cpp.rubyforge.org/rdoc/classes/RMMSeg/Dictionary.html">the rdoc</a> on<br />
+<p>Then you may customize the dictionaries used by rmmseg-cpp
+(see <a href="http://rmmseg-cpp.rubyforge.org/rdoc/classes/RMMSeg/Dictionary.html">the rdoc</a> on
 how to add your own dictionaries) and load all dictionaries:</p>
 <pre class="code">
-<span style="color:#036; font-weight:bold">RMMSeg</span>::<span style="color:#036; font-weight:bold">Dictionary</span>.load_dictionaries
+<span style="color:#036;font-weight:bold">RMMSeg</span>::<span style="color:#036;font-weight:bold">Dictionary</span>.load_dictionaries
+</pre>
+<p>Now rmmseg-cpp will be ready to do segmenting. If you want to load your own customized
+dictionaries, please customize <tt>RMMSeg::Dictionary.dictionaries</tt> before calling
+<tt>load_dictionaries</tt>. e.g.</p>
+<pre class="code">
+<span style="color:#036;font-weight:bold">RMMSeg</span>::<span style="color:#036;font-weight:bold">Dictionary</span>.dictionaries = [[<span style="color:#A60">:chars</span>, <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">&quot;</span><span style="">my_chars.dic</span><span style="color:#710">&quot;</span></span>],
+                                   [<span style="color:#A60">:words</span>, <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">&quot;</span><span style="">my_words.dic</span><span style="color:#710">&quot;</span></span>],
+                                   [<span style="color:#A60">:words</span>, <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">&quot;</span><span style="">my_words2.dic</span><span style="color:#710">&quot;</span></span>]]
 </pre>
-<p>Now rmmseg-cpp will be ready to do segmenting.</p></div>
-</div><br />
+<p>The basic format for char-dictionary and word-dictionary are similar. For each line,
+there is a number, then <strong>a</strong> space, then the string. Note there <strong>SHOULD</strong> be a newline
+at the end of the dictionary file. And the number in char-dictionary and word-dictionary
+has different meaning.</p>
+<p>In char-dictionary, the number means the frequency of the character. In word-dictionary,
+the number mean the number of characters in the word. Note that this is NOT the number
+of <strong>bytes</strong> in the word.</p></div>
+</div>
 <div class="section">
   <h3 class="title">
-    <a class="list" id="Ferret-Integration" href="#a-607137248">3.2.2</a>&nbsp;&nbsp;<a class="here" href="#Ferret-Integration">Ferret Integration</a>
+    <a class="list" id="Ferret-Integration" href="#a16187880">3.2.2</a>&nbsp;&nbsp;<a class="here" href="#Ferret-Integration">Ferret Integration</a>
   </h3>
-  <div class="content"><p>To use rmmseg-cpp with Ferret, you&#8217;ll need to <code class="code">require</code> the<br />
-Ferret support of rmmseg-cpp (Of course you&#8217;ll also have to<br />
-got Ferret installed. If you have problems running the belowing<br />
-example, please try to update to the latest version of both<br />
-Ferret and rmmseg-cpp first):<br />
-  <br />
+  <div class="content"><p>To use rmmseg-cpp with Ferret, you&#8217;ll need to <code class="code">require</code> the
+Ferret support of rmmseg-cpp (Of course you&#8217;ll also have to
+got Ferret installed. If you have problems running the belowing
+example, please try to update to the latest version of both
+Ferret and rmmseg-cpp first):</p>
 <pre class="code">
-require <span style="background-color:#fff0f0"><span style="color:#710">'</span><span style="color:#D20">rmmseg/ferret</span><span style="color:#710">'</span></span>
-</pre></p>
-<p>rmmseg-cpp comes with a ready to use Ferret analyzer:<br />
-  <br />
+require <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">'</span><span style="">rmmseg/ferret</span><span style="color:#710">'</span></span>
+</pre>
+<p>rmmseg-cpp comes with a ready to use Ferret analyzer:</p>
 <pre class="code">
-analyzer = <span style="color:#036; font-weight:bold">RMMSeg</span>::<span style="color:#036; font-weight:bold">Ferret</span>::<span style="color:#036; font-weight:bold">Analyzer</span>.new { |tokenizer|
-  <span style="color:#036; font-weight:bold">Ferret</span>::<span style="color:#036; font-weight:bold">Analysis</span>::<span style="color:#036; font-weight:bold">LowerCaseFilter</span>.new(tokenizer)
+analyzer = <span style="color:#036;font-weight:bold">RMMSeg</span>::<span style="color:#036;font-weight:bold">Ferret</span>::<span style="color:#036;font-weight:bold">Analyzer</span>.new { |tokenizer|
+  <span style="color:#036;font-weight:bold">Ferret</span>::<span style="color:#036;font-weight:bold">Analysis</span>::<span style="color:#036;font-weight:bold">LowerCaseFilter</span>.new(tokenizer)
 }
-index = <span style="color:#036; font-weight:bold">Ferret</span>::<span style="color:#036; font-weight:bold">Index</span>::<span style="color:#036; font-weight:bold">Index</span>.new(<span style="color:#A60">:analyzer</span> =&gt; analyzer)
-</pre><br />
-  <br />
-A complete example can be found in <tt>misc/ferret_example.rb</tt>. The result<br />
-of running that example is shown in <a class="xref" href="#Ferret-Example-Screenshot">Figure 1. Ferret Example Screenshot</a>.<br />
-  <br />
-<div class="figure">
-  <p class="title"><a class="list" id="Ferret-Example-Screenshot" href="#a-607147048">Figure 1</a>.&nbsp;&nbsp;<a class="here" href="#Ferret-Example-Screenshot">Ferret Example Screenshot</a></p>
-  <div class="content"><img src="http://pluskid.lifegoo.com/wp-content/uploads/2008/02/rmmseg.png" alt="" /></div>
+index = <span style="color:#036;font-weight:bold">Ferret</span>::<span style="color:#036;font-weight:bold">Index</span>::<span style="color:#036;font-weight:bold">Index</span>.new(<span style="color:#A60">:analyzer</span> =&gt; analyzer)
+</pre>
+<p>A complete example can be found in <tt>misc/ferret_example.rb</tt>. The result
+of running that example is shown in <a class="xref" href="#Ferret-Example-Screenshot">Figure 1. Ferret Example Screenshot</a>.</p>
+<p><div class="figure">
+  <p class="title"><a class="list" id="Ferret-Example-Screenshot" href="#a16148860">Figure 1</a>.&nbsp;&nbsp;<a class="here" href="#Ferret-Example-Screenshot">Ferret Example Screenshot</a></p>
+  <div class="content"><img src="http://lifegoo.pluskid.org/wp-content/uploads/2008/02/rmmseg.png" alt="" /></div>
 </div></p></div>
-</div><br />
+</div>
 <div class="section">
   <h3 class="title">
-    <a class="list" id="Normal-Ruby-program" href="#a-607154008">3.2.3</a>&nbsp;&nbsp;<a class="here" href="#Normal-Ruby-program">Normal Ruby program</a>
+    <a class="list" id="Normal-Ruby-program" href="#a16113620">3.2.3</a>&nbsp;&nbsp;<a class="here" href="#Normal-Ruby-program">Normal Ruby program</a>
   </h3>
-  <div class="content"><p>rmmseg-cpp can also be used in normal Ruby programs. Just create<br />
-an <code class="code"><span style="color:#036; font-weight:bold">Algorithm</span></code> object and call <code class="code">next_token</code> until a <code class="code"><span style="color:#038; font-weight:bold">nil</span></code> is returned:</p>
+  <div class="content"><p>rmmseg-cpp can also be used in normal Ruby programs. Just create
+an <code class="code"><span style="color:#036;font-weight:bold">Algorithm</span></code> object and call <code class="code">next_token</code> until a <code class="code"><span style="color:#038;font-weight:bold">nil</span></code> is returned:</p>
 <pre class="code">
-algor = <span style="color:#036; font-weight:bold">RMMSeg</span>::<span style="color:#036; font-weight:bold">Algorithm</span>.new(text)
-loop <span style="color:#080; font-weight:bold">do</span>
+algor = <span style="color:#036;font-weight:bold">RMMSeg</span>::<span style="color:#036;font-weight:bold">Algorithm</span>.new(text)
+loop <span style="color:#080;font-weight:bold">do</span>
   tok = algor.next_token
-  <span style="color:#080; font-weight:bold">break</span> <span style="color:#080; font-weight:bold">if</span> tok.nil?
-  puts <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="background: #eee"><span style="font-weight: bold; color: #888">#{</span>tok.text<span style="font-weight: bold; color: #888">}</span></span><span style="color:#D20"> [</span><span style="background: #eee"><span style="font-weight: bold; color: #888">#{</span>tok.start<span style="font-weight: bold; color: #888">}</span></span><span style="color:#D20">..</span><span style="background: #eee"><span style="font-weight: bold; color: #888">#{</span>tok.end<span style="font-weight: bold; color: #888">}</span></span><span style="color:#D20">]</span><span style="color:#710">&quot;</span></span>
-<span style="color:#080; font-weight:bold">end</span>
+  <span style="color:#080;font-weight:bold">break</span> <span style="color:#080;font-weight:bold">if</span> tok.nil?
+  puts <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">&quot;</span><span style="background:#ddd;color:black"><span style="background:#ddd;font-weight:bold;color:#666">#{</span>tok.text<span style="background:#ddd;font-weight:bold;color:#666">}</span></span><span style=""> [</span><span style="background:#ddd;color:black"><span style="background:#ddd;font-weight:bold;color:#666">#{</span>tok.start<span style="background:#ddd;font-weight:bold;color:#666">}</span></span><span style="">..</span><span style="background:#ddd;color:black"><span style="background:#ddd;font-weight:bold;color:#666">#{</span>tok.end<span style="background:#ddd;font-weight:bold;color:#666">}</span></span><span style="">]</span><span style="color:#710">&quot;</span></span>
+<span style="color:#080;font-weight:bold">end</span>
 </pre></div>
 </div></div>
-</div></div>
+</div></p></div>
 </div>
 <div class="chapter">
   <h1 class="title">
     Chapter
-    <a class="list" id="Who-use-it" href="#a-607162878">4</a>
+    <a class="list" id="Who-use-it" href="#a16072000">4</a>
     <br/>
@@ -1151,20 +1161,20 @@ loop <span style="color:#080; font-weight:bold">do</span>
   </h1>
   <div class="content"><p><div class="tip">
-  <p class="title"><a class="list" id="Expand-this-list" href="#a-607168148">Tip 1</a>.&nbsp;&nbsp;<a class="here" href="#Expand-this-list">Expand this list</a></p>
+  <p class="title"><a class="list" id="Expand-this-list" href="#a16067160">Tip 1</a>.&nbsp;&nbsp;<a class="here" href="#Expand-this-list">Expand this list</a></p>
-  <div class="content icon-tip">If you used rmmseg-cpp and would like your project to<br />
+  <div class="content icon-tip">If you used rmmseg-cpp and would like your project to
 appear in this list, please <a href="mailto:pluskid@gmail.com">contact me</a>.</div>
 </div></p>
 <ul>
-	<li><a href="http://www.javaeye.com/">JavaEye</a>: One of the biggest software developper<br />
+	<li><a href="http://www.javaeye.com/">JavaEye</a>: One of the biggest software developper
   community in China.</li>
 </ul></div>
 </div>
 <div class="chapter">
   <h1 class="title">
     Chapter
-    <a class="list" id="Resources" href="#a-607172188">5</a>
+    <a class="list" id="Resources" href="#a16034860">5</a>
     <br/>
@@ -1174,7 +1184,7 @@ appear in this list, please <a href="mailto:pluskid@gmail.com">contact me</a>.</
   <div class="content"><ul>
 	<li><a href="http://rubyforge.org/projects/rmmseg-cpp/">Project Home</a>: The Project page at RubyForge.</li>
 	<li><a href="http://rmmseg-cpp.rubyforge.org/rdoc/index.html">RDoc of rmmseg-cpp</a>: The auto generated rdoc of RMMSeg.</li>
-	<li><a href="http://pluskid.lifegoo.com/">Free Mind</a>: The author&#8217;s blog.</li>
+	<li><a href="http://blog.pluskid.org/">Free Mind</a>: The author&#8217;s blog.</li>
 	<li><a href="mailto:pluskid@gmail.com">Author&#8217;s Email</a>: Contact me if you have any problem.</li>
 </ul></div>
 </div></div>
@@ -1187,7 +1197,7 @@ appear in this list, please <a href="mailto:pluskid@gmail.com">contact me</a>.</
     <div id="footer">
-      Generated on Wed Sep 17 10:18:56 -0400 2008 by <a href="http://gerbil.rubyforge.org">Gerbil</a> 3.1.0.
+      Generated on 2011-09-10 15:59:08 +0800 by <a href="http://gerbil.rubyforge.org">Gerbil</a> 3.1.0.
       <div id="footer-credits">
         <span class="icon-warning" style="float: right">&nbsp;</span>

metadata CHANGED

@@ -1,7 +1,12 @@
 --- !ruby/object:Gem::Specification
 name: rmmseg-cpp
 version: !ruby/object:Gem::Version
-  version: 0.2.7
+  prerelease: false
+  segments:
+  - 0
+  - 2
+  - 9
+  version: 0.2.9
 platform: ruby
 authors:
 - pluskid
@@ -9,11 +14,25 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2008-09-17 00:00:00 -04:00
+date: 2011-09-10 00:00:00 +08:00
 default_executable:
 dependencies: []
-description: rmmseg-cpp is a high performance Chinese word segmentation utility for Ruby. It features full "Ferret":http://ferret.davebalmain.com/ integration as well as support for normal Ruby program usage.  rmmseg-cpp is a re-written of the original RMMSeg(http://rmmseg.rubyforge.org/) gem in C++. RMMSeg is written in pure Ruby. Though I tried hard to tweak RMMSeg, it just consumes lots of memory and the segmenting process is rather slow.  The interface is almost identical to RMMSeg but the performance is much better. This gem is always preferable in production use. However, if you want to understand how the MMSEG segmenting algorithm works, the source code of RMMSeg is a better choice than this.
+description: |-
+  rmmseg-cpp is a high performance Chinese word segmentation utility for
+  Ruby. It features full "Ferret":http://ferret.davebalmain.com/ integration
+  as well as support for normal Ruby program usage.
+  rmmseg-cpp is a re-written of the original
+  RMMSeg(http://rmmseg.rubyforge.org/) gem in C++. RMMSeg is written
+  in pure Ruby. Though I tried hard to tweak RMMSeg, it just consumes
+  lots of memory and the segmenting process is rather slow.
+  The interface is almost identical to RMMSeg but the performance is
+  much better. This gem is always preferable in production
+  use. However, if you want to understand how the MMSEG segmenting
+  algorithm works, the source code of RMMSeg is a better choice than
+  this.
 email: pluskid@gmail.com
 executables:
 - rmmseg
@@ -69,6 +88,8 @@ files:
 - test/test_rmmseg.rb
 has_rdoc: true
 homepage: http://rmmseg-cpp.rubyforge.org
+licenses: []
 post_install_message:
 rdoc_options:
 - --main
@@ -77,23 +98,27 @@ require_paths:
 - lib
 - ext
 required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
+      segments:
+      - 0
       version: "0"
-  version:
 required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
+      segments:
+      - 0
       version: "0"
-  version:
 requirements: []
 rubyforge_project: rmmseg-cpp
-rubygems_version: 1.2.0
+rubygems_version: 1.3.7
 signing_key:
-specification_version: 2
+specification_version: 3
 summary: rmmseg-cpp is a high performance Chinese word segmentation utility for Ruby
 test_files:
 - test/test_rmmseg.rb