rmmseg-cpp 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
 - data/Rakefile +1 -1
 - data/ext/rmmseg/algor.cpp +33 -20
 - data/ext/rmmseg/algor.h +9 -3
 - data/ext/rmmseg/extconf.rb +1 -1
 - data/ext/rmmseg/rules.h +13 -13
 - data/misc/homepage.html +22 -22
 - data/spec/rmmseg_spec.rb +1 -1
 - metadata +3 -3
 
    
        data/History.txt
    CHANGED
    
    
    
        data/Rakefile
    CHANGED
    
    
    
        data/ext/rmmseg/algor.cpp
    CHANGED
    
    | 
         @@ -71,28 +71,27 @@ namespace rmmseg 
     | 
|
| 
       71 
71 
     | 
    
         | 
| 
       72 
72 
     | 
    
         
             
                Token Algorithm::get_cjk_word(int len)
         
     | 
| 
       73 
73 
     | 
    
         
             
                {
         
     | 
| 
       74 
     | 
    
         
            -
                     
     | 
| 
       75 
     | 
    
         
            -
             
     | 
| 
       76 
     | 
    
         
            -
                    if ( 
     | 
| 
       77 
     | 
    
         
            -
                        mm_filter( 
     | 
| 
       78 
     | 
    
         
            -
                    if ( 
     | 
| 
       79 
     | 
    
         
            -
                        lawl_filter( 
     | 
| 
       80 
     | 
    
         
            -
                    if ( 
     | 
| 
       81 
     | 
    
         
            -
                        svwl_filter( 
     | 
| 
       82 
     | 
    
         
            -
                    if ( 
     | 
| 
       83 
     | 
    
         
            -
                        lsdmfocw_filter( 
     | 
| 
       84 
     | 
    
         
            -
             
     | 
| 
       85 
     | 
    
         
            -
                    if ( 
     | 
| 
      
 74 
     | 
    
         
            +
                    create_chunks();
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
                    if (m_chunks_size > 1)
         
     | 
| 
      
 77 
     | 
    
         
            +
                        m_chunks_size = mm_filter(m_chunks, m_chunks_size);
         
     | 
| 
      
 78 
     | 
    
         
            +
                    if (m_chunks_size > 1)
         
     | 
| 
      
 79 
     | 
    
         
            +
                        m_chunks_size = lawl_filter(m_chunks, m_chunks_size);
         
     | 
| 
      
 80 
     | 
    
         
            +
                    if (m_chunks_size > 1)
         
     | 
| 
      
 81 
     | 
    
         
            +
                        m_chunks_size = svwl_filter(m_chunks, m_chunks_size);
         
     | 
| 
      
 82 
     | 
    
         
            +
                    if (m_chunks_size > 1)
         
     | 
| 
      
 83 
     | 
    
         
            +
                        m_chunks_size = lsdmfocw_filter(m_chunks, m_chunks_size);
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                    if (m_chunks_size < 1)
         
     | 
| 
       86 
86 
     | 
    
         
             
                        return Token(NULL, 0);
         
     | 
| 
       87 
87 
     | 
    
         | 
| 
       88 
     | 
    
         
            -
                    Token token(m_text+m_pos,  
     | 
| 
       89 
     | 
    
         
            -
                    m_pos +=  
     | 
| 
      
 88 
     | 
    
         
            +
                    Token token(m_text+m_pos, m_chunks[0].words[0]->nbytes);
         
     | 
| 
      
 89 
     | 
    
         
            +
                    m_pos += m_chunks[0].words[0]->nbytes;
         
     | 
| 
       90 
90 
     | 
    
         
             
                    return token;
         
     | 
| 
       91 
91 
     | 
    
         
             
                }
         
     | 
| 
       92 
92 
     | 
    
         | 
| 
       93 
     | 
    
         
            -
                 
     | 
| 
      
 93 
     | 
    
         
            +
                void Algorithm::create_chunks()
         
     | 
| 
       94 
94 
     | 
    
         
             
                {
         
     | 
| 
       95 
     | 
    
         
            -
                    vector<Chunk> chunks;
         
     | 
| 
       96 
95 
     | 
    
         
             
                    Chunk chunk;
         
     | 
| 
       97 
96 
     | 
    
         
             
                    Word *w1, *w2, *w3;
         
     | 
| 
       98 
97 
     | 
    
         | 
| 
         @@ -101,6 +100,8 @@ namespace rmmseg 
     | 
|
| 
       101 
100 
     | 
    
         
             
                    typedef vec_t::iterator it_t;
         
     | 
| 
       102 
101 
     | 
    
         | 
| 
       103 
102 
     | 
    
         
             
                    vec_t words1 = find_match_words();
         
     | 
| 
      
 103 
     | 
    
         
            +
                    m_chunks_size = 0;
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
       104 
105 
     | 
    
         
             
                    for (it_t i1 = words1.begin();
         
     | 
| 
       105 
106 
     | 
    
         
             
                         i1 != words1.end();
         
     | 
| 
       106 
107 
     | 
    
         
             
                         ++i1)
         
     | 
| 
         @@ -135,13 +136,17 @@ namespace rmmseg 
     | 
|
| 
       135 
136 
     | 
    
         
             
                                            chunk.n = 3;
         
     | 
| 
       136 
137 
     | 
    
         
             
                                            chunk.words[2] = w3;
         
     | 
| 
       137 
138 
     | 
    
         
             
                                        }
         
     | 
| 
       138 
     | 
    
         
            -
                                         
     | 
| 
      
 139 
     | 
    
         
            +
                                        memcpy(m_chunks+m_chunks_size, &chunk,
         
     | 
| 
      
 140 
     | 
    
         
            +
                                               sizeof(Chunk));
         
     | 
| 
      
 141 
     | 
    
         
            +
                                        m_chunks_size++;
         
     | 
| 
       139 
142 
     | 
    
         
             
                                    }
         
     | 
| 
       140 
143 
     | 
    
         
             
                                }
         
     | 
| 
       141 
144 
     | 
    
         
             
                                else if (m_pos == m_text_length)
         
     | 
| 
       142 
145 
     | 
    
         
             
                                {
         
     | 
| 
       143 
146 
     | 
    
         
             
                                    chunk.n = 2;
         
     | 
| 
       144 
     | 
    
         
            -
                                     
     | 
| 
      
 147 
     | 
    
         
            +
                                    memcpy(m_chunks+m_chunks_size, &chunk,
         
     | 
| 
      
 148 
     | 
    
         
            +
                                           sizeof(Chunk));
         
     | 
| 
      
 149 
     | 
    
         
            +
                                    m_chunks_size++;
         
     | 
| 
       145 
150 
     | 
    
         
             
                                }
         
     | 
| 
       146 
151 
     | 
    
         
             
                                m_pos -= w2->nbytes;
         
     | 
| 
       147 
152 
     | 
    
         
             
                            }
         
     | 
| 
         @@ -149,13 +154,13 @@ namespace rmmseg 
     | 
|
| 
       149 
154 
     | 
    
         
             
                        else if (m_pos == m_text_length)
         
     | 
| 
       150 
155 
     | 
    
         
             
                        {
         
     | 
| 
       151 
156 
     | 
    
         
             
                            chunk.n = 1;
         
     | 
| 
       152 
     | 
    
         
            -
                             
     | 
| 
      
 157 
     | 
    
         
            +
                            memcpy(m_chunks+m_chunks_size, &chunk, sizeof(Chunk));
         
     | 
| 
      
 158 
     | 
    
         
            +
                            m_chunks_size++;
         
     | 
| 
       153 
159 
     | 
    
         
             
                        }
         
     | 
| 
       154 
160 
     | 
    
         
             
                        m_pos -= w1->nbytes;
         
     | 
| 
       155 
161 
     | 
    
         
             
                    }
         
     | 
| 
       156 
162 
     | 
    
         | 
| 
       157 
163 
     | 
    
         
             
                    m_pos = orig_pos;
         
     | 
| 
       158 
     | 
    
         
            -
                    return chunks;
         
     | 
| 
       159 
164 
     | 
    
         
             
                }
         
     | 
| 
       160 
165 
     | 
    
         | 
| 
       161 
166 
     | 
    
         
             
                int Algorithm::next_char()
         
     | 
| 
         @@ -163,9 +168,17 @@ namespace rmmseg 
     | 
|
| 
       163 
168 
     | 
    
         
             
                    // ONLY for UTF-8
         
     | 
| 
       164 
169 
     | 
    
         
             
                    unsigned char ch = m_text[m_pos];
         
     | 
| 
       165 
170 
     | 
    
         
             
                    if (ch >= 0xC0 && ch <= 0xDF)
         
     | 
| 
      
 171 
     | 
    
         
            +
                    {
         
     | 
| 
      
 172 
     | 
    
         
            +
                        if (m_text_length-m_pos < 2)
         
     | 
| 
      
 173 
     | 
    
         
            +
                            return 1; /* broken text at the end */
         
     | 
| 
       166 
174 
     | 
    
         
             
                        return 2;
         
     | 
| 
      
 175 
     | 
    
         
            +
                    }
         
     | 
| 
       167 
176 
     | 
    
         
             
                    if (ch >= 0xE0 && ch <= 0xEF)
         
     | 
| 
      
 177 
     | 
    
         
            +
                    {
         
     | 
| 
      
 178 
     | 
    
         
            +
                        if (m_text_length-m_pos < 3)
         
     | 
| 
      
 179 
     | 
    
         
            +
                            return 1; /* broken text at the end */
         
     | 
| 
       168 
180 
     | 
    
         
             
                        return 3;
         
     | 
| 
      
 181 
     | 
    
         
            +
                    }
         
     | 
| 
       169 
182 
     | 
    
         
             
                    return 1;
         
     | 
| 
       170 
183 
     | 
    
         
             
                }
         
     | 
| 
       171 
184 
     | 
    
         | 
    
        data/ext/rmmseg/algor.h
    CHANGED
    
    | 
         @@ -22,7 +22,7 @@ namespace rmmseg 
     | 
|
| 
       22 
22 
     | 
    
         
             
                {
         
     | 
| 
       23 
23 
     | 
    
         
             
                public:
         
     | 
| 
       24 
24 
     | 
    
         
             
                    Algorithm(const char *text, int length)
         
     | 
| 
       25 
     | 
    
         
            -
                        :m_text(text), m_pos(0),
         
     | 
| 
      
 25 
     | 
    
         
            +
                        :m_chunks_size(0), m_text(text), m_pos(0),
         
     | 
| 
       26 
26 
     | 
    
         
             
                        m_text_length(length),
         
     | 
| 
       27 
27 
     | 
    
         
             
                        m_tmp_words_i(0),
         
     | 
| 
       28 
28 
     | 
    
         
             
                        m_match_cache_i(0)
         
     | 
| 
         @@ -36,13 +36,19 @@ namespace rmmseg 
     | 
|
| 
       36 
36 
     | 
    
         
             
                private:
         
     | 
| 
       37 
37 
     | 
    
         
             
                    Token get_basic_latin_word();
         
     | 
| 
       38 
38 
     | 
    
         
             
                    Token get_cjk_word(int);
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                    static const int MAX_WORD_LENGTH = 4;
         
     | 
| 
      
 41 
     | 
    
         
            +
                    static const int MAX_N_CHUNKS = \
         
     | 
| 
      
 42 
     | 
    
         
            +
                        MAX_WORD_LENGTH*MAX_WORD_LENGTH*MAX_WORD_LENGTH;
         
     | 
| 
       39 
43 
     | 
    
         | 
| 
       40 
     | 
    
         
            -
                     
     | 
| 
      
 44 
     | 
    
         
            +
                    void create_chunks();
         
     | 
| 
       41 
45 
     | 
    
         
             
                    int next_word();
         
     | 
| 
       42 
46 
     | 
    
         
             
                    int next_char();
         
     | 
| 
       43 
47 
     | 
    
         
             
                    std::vector<Word *> find_match_words();
         
     | 
| 
       44 
     | 
    
         
            -
                    int max_word_length() { return  
     | 
| 
      
 48 
     | 
    
         
            +
                    int max_word_length() { return MAX_WORD_LENGTH; }
         
     | 
| 
       45 
49 
     | 
    
         | 
| 
      
 50 
     | 
    
         
            +
                    Chunk m_chunks[MAX_N_CHUNKS];
         
     | 
| 
      
 51 
     | 
    
         
            +
                    int m_chunks_size;
         
     | 
| 
       46 
52 
     | 
    
         | 
| 
       47 
53 
     | 
    
         
             
                    const char *m_text;
         
     | 
| 
       48 
54 
     | 
    
         
             
                    int m_pos;
         
     | 
    
        data/ext/rmmseg/extconf.rb
    CHANGED
    
    | 
         @@ -10,7 +10,7 @@ CONFIG['LDSHARED'] = CONFIG['LDSHARED'].sub(/^\$\(CC\)/, 'g++') 
     | 
|
| 
       10 
10 
     | 
    
         
             
            # end
         
     | 
| 
       11 
11 
     | 
    
         | 
| 
       12 
12 
     | 
    
         
             
            if RUBY_PLATFORM =~ /darwin/
         
     | 
| 
       13 
     | 
    
         
            -
              CONFIG[' 
     | 
| 
      
 13 
     | 
    
         
            +
              CONFIG['LDSHARED'] = 'g++ -dynamiclib -single_module -flat_namespace -undefined suppress'
         
     | 
| 
       14 
14 
     | 
    
         
             
            end
         
     | 
| 
       15 
15 
     | 
    
         | 
| 
       16 
16 
     | 
    
         
             
            $objs = ['algor.o', 'dict.o', 'memory.o', 'rmmseg.o']
         
     | 
    
        data/ext/rmmseg/rules.h
    CHANGED
    
    | 
         @@ -9,12 +9,12 @@ 
     | 
|
| 
       9 
9 
     | 
    
         
             
            namespace rmmseg
         
     | 
| 
       10 
10 
     | 
    
         
             
            {
         
     | 
| 
       11 
11 
     | 
    
         
             
                template <typename Cmp>
         
     | 
| 
       12 
     | 
    
         
            -
                 
     | 
| 
      
 12 
     | 
    
         
            +
                int take_highest(Chunk *chunks, int n, Cmp &cmp)
         
     | 
| 
       13 
13 
     | 
    
         
             
                {
         
     | 
| 
       14 
14 
     | 
    
         
             
                    int i = 1, j;
         
     | 
| 
       15 
     | 
    
         
            -
                    Chunk& 
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
                    for (j = 1; j <  
     | 
| 
      
 15 
     | 
    
         
            +
                    Chunk &max = chunks[0];
         
     | 
| 
      
 16 
     | 
    
         
            +
                    
         
     | 
| 
      
 17 
     | 
    
         
            +
                    for (j = 1; j < n; ++j)
         
     | 
| 
       18 
18 
     | 
    
         
             
                    {
         
     | 
| 
       19 
19 
     | 
    
         
             
                        int rlt = cmp(chunks[j], max);
         
     | 
| 
       20 
20 
     | 
    
         
             
                        if (rlt > 0)
         
     | 
| 
         @@ -22,7 +22,7 @@ namespace rmmseg 
     | 
|
| 
       22 
22 
     | 
    
         
             
                        if (rlt >= 0)
         
     | 
| 
       23 
23 
     | 
    
         
             
                            std::swap(chunks[i++], chunks[j]);
         
     | 
| 
       24 
24 
     | 
    
         
             
                    }
         
     | 
| 
       25 
     | 
    
         
            -
                     
     | 
| 
      
 25 
     | 
    
         
            +
                    return i;
         
     | 
| 
       26 
26 
     | 
    
         
             
                }
         
     | 
| 
       27 
27 
     | 
    
         | 
| 
       28 
28 
     | 
    
         
             
                struct MMCmp_t
         
     | 
| 
         @@ -32,9 +32,9 @@ namespace rmmseg 
     | 
|
| 
       32 
32 
     | 
    
         
             
                        return a.total_length() - b.total_length();
         
     | 
| 
       33 
33 
     | 
    
         
             
                    }
         
     | 
| 
       34 
34 
     | 
    
         
             
                } MMCmp;
         
     | 
| 
       35 
     | 
    
         
            -
                 
     | 
| 
      
 35 
     | 
    
         
            +
                int mm_filter(Chunk *chunks, int n)
         
     | 
| 
       36 
36 
     | 
    
         
             
                {
         
     | 
| 
       37 
     | 
    
         
            -
                    take_highest(chunks, MMCmp);
         
     | 
| 
      
 37 
     | 
    
         
            +
                    return take_highest(chunks, n, MMCmp);
         
     | 
| 
       38 
38 
     | 
    
         
             
                }
         
     | 
| 
       39 
39 
     | 
    
         | 
| 
       40 
40 
     | 
    
         
             
                struct LAWLCmp_t
         
     | 
| 
         @@ -49,9 +49,9 @@ namespace rmmseg 
     | 
|
| 
       49 
49 
     | 
    
         
             
                        return -1;
         
     | 
| 
       50 
50 
     | 
    
         
             
                    }
         
     | 
| 
       51 
51 
     | 
    
         
             
                } LAWLCmp;
         
     | 
| 
       52 
     | 
    
         
            -
                 
     | 
| 
      
 52 
     | 
    
         
            +
                int lawl_filter(Chunk *chunks, int n)
         
     | 
| 
       53 
53 
     | 
    
         
             
                {
         
     | 
| 
       54 
     | 
    
         
            -
                    take_highest(chunks, LAWLCmp);
         
     | 
| 
      
 54 
     | 
    
         
            +
                    return take_highest(chunks, n, LAWLCmp);
         
     | 
| 
       55 
55 
     | 
    
         
             
                }
         
     | 
| 
       56 
56 
     | 
    
         | 
| 
       57 
57 
     | 
    
         
             
                struct SVWLCmp_t
         
     | 
| 
         @@ -66,9 +66,9 @@ namespace rmmseg 
     | 
|
| 
       66 
66 
     | 
    
         
             
                        return -1;
         
     | 
| 
       67 
67 
     | 
    
         
             
                    }
         
     | 
| 
       68 
68 
     | 
    
         
             
                } SVWLCmp;
         
     | 
| 
       69 
     | 
    
         
            -
                 
     | 
| 
      
 69 
     | 
    
         
            +
                int svwl_filter(Chunk *chunks, int n)
         
     | 
| 
       70 
70 
     | 
    
         
             
                {
         
     | 
| 
       71 
     | 
    
         
            -
                    take_highest(chunks, SVWLCmp);
         
     | 
| 
      
 71 
     | 
    
         
            +
                    return take_highest(chunks, n, SVWLCmp);
         
     | 
| 
       72 
72 
     | 
    
         
             
                }
         
     | 
| 
       73 
73 
     | 
    
         | 
| 
       74 
74 
     | 
    
         
             
                struct LSDMFOCWCmp_t
         
     | 
| 
         @@ -78,9 +78,9 @@ namespace rmmseg 
     | 
|
| 
       78 
78 
     | 
    
         
             
                        return a.degree_of_morphemic_freedom() - b.degree_of_morphemic_freedom();
         
     | 
| 
       79 
79 
     | 
    
         
             
                    }
         
     | 
| 
       80 
80 
     | 
    
         
             
                } LSDMFOCWCmp;
         
     | 
| 
       81 
     | 
    
         
            -
                 
     | 
| 
      
 81 
     | 
    
         
            +
                int lsdmfocw_filter(Chunk *chunks, int n)
         
     | 
| 
       82 
82 
     | 
    
         
             
                {
         
     | 
| 
       83 
     | 
    
         
            -
                    take_highest(chunks, LSDMFOCWCmp);
         
     | 
| 
      
 83 
     | 
    
         
            +
                    return take_highest(chunks, n, LSDMFOCWCmp);
         
     | 
| 
       84 
84 
     | 
    
         
             
                }
         
     | 
| 
       85 
85 
     | 
    
         
             
            }
         
     | 
| 
       86 
86 
     | 
    
         | 
    
        data/misc/homepage.html
    CHANGED
    
    | 
         @@ -3,7 +3,7 @@ 
     | 
|
| 
       3 
3 
     | 
    
         
             
            <html xmlns="http://www.w3.org/1999/xhtml">
         
     | 
| 
       4 
4 
     | 
    
         
             
              <head>
         
     | 
| 
       5 
5 
     | 
    
         
             
                <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
         
     | 
| 
       6 
     | 
    
         
            -
                <meta name="date" content=" 
     | 
| 
      
 6 
     | 
    
         
            +
                <meta name="date" content="17 September 2008"/>
         
     | 
| 
       7 
7 
     | 
    
         
             
                <meta name="author" content="pluskid"/>
         
     | 
| 
       8 
8 
     | 
    
         
             
                <meta name="generator" content="Gerbil 3.1.0"/>
         
     | 
| 
       9 
9 
     | 
    
         
             
                <title>rmmseg-cpp Homepage</title>
         
     | 
| 
         @@ -929,7 +929,7 @@ 
     | 
|
| 
       929 
929 
     | 
    
         | 
| 
       930 
930 
     | 
    
         
             
                  <h1 class="title"><a class="here" href="#">rmmseg-cpp Homepage</a></h1>
         
     | 
| 
       931 
931 
     | 
    
         
             
                  <h2 class="authors"><a href="http://pluskid.lifegoo.com">pluskid</a></h2>
         
     | 
| 
       932 
     | 
    
         
            -
                  <h3 class="date"> 
     | 
| 
      
 932 
     | 
    
         
            +
                  <h3 class="date">17 September 2008</h3>
         
     | 
| 
       933 
933 
     | 
    
         | 
| 
       934 
934 
     | 
    
         
             
                </div>
         
     | 
| 
       935 
935 
     | 
    
         | 
| 
         @@ -943,12 +943,12 @@ 
     | 
|
| 
       943 
943 
     | 
    
         
             
                  <div id="Contents">
         
     | 
| 
       944 
944 
     | 
    
         
             
                    <h1 class="title"><a class="here" href="#Contents">Contents</a></h1>
         
     | 
| 
       945 
945 
     | 
    
         
             
                    <ul>
         
     | 
| 
       946 
     | 
    
         
            -
                      <li>1  <a id="a- 
     | 
| 
      
 946 
     | 
    
         
            +
                      <li>1  <a id="a-607090478" href="#Introduction">Introduction</a></li><li>2  <a id="a-607093208" href="#Setup">Setup</a><ul><li>2.1  <a id="a-607094728" href="#Requirements">Requirements</a></li><li>2.2  <a id="a-607099478" href="#Installation">Installation</a><ul><li>2.2.1  <a id="a-607103648" href="#Using-RubyGems">Using RubyGems</a></li><li>2.2.2  <a id="a-607106038" href="#From-Git">From Git</a></li></ul></li></ul></li><li>3  <a id="a-607115348" href="#Usage">Usage</a><ul><li>3.1  <a id="a-607120028" href="#Stand-Alone-rmmseg">Stand Alone rmmseg</a></li><li>3.2  <a id="a-607126248" href="#Use-in-Ruby-program">Use in Ruby program</a><ul><li>3.2.1  <a id="a-607131168" href="#Initialize">Initialize</a></li><li>3.2.2  <a id="a-607137248" href="#Ferret-Integration">Ferret Integration</a></li><li>3.2.3  <a id="a-607154008" href="#Normal-Ruby-program">Normal Ruby program</a></li></ul></li></ul></li><li>4  <a id="a-607162878" href="#Who-use-it">Who use it</a></li><li>5  <a id="a-607172188" href="#Resources">Resources</a></li>
         
     | 
| 
       947 
947 
     | 
    
         | 
| 
       948 
948 
     | 
    
         
             
                    </ul>
         
     | 
| 
       949 
949 
     | 
    
         
             
                  </div>
         
     | 
| 
       950 
950 
     | 
    
         | 
| 
       951 
     | 
    
         
            -
                  <div id="lof"><h1 id="Figures" class="title"><a class="here" href="#Figures">Figures</a></h1> <ol><li><a id="a- 
     | 
| 
      
 951 
     | 
    
         
            +
                  <div id="lof"><h1 id="Figures" class="title"><a class="here" href="#Figures">Figures</a></h1> <ol><li><a id="a-607147048" href="#Ferret-Example-Screenshot">Ferret Example Screenshot</a></li></ol><h1 id="Tips" class="title"><a class="here" href="#Tips">Tips</a></h1> <ol><li><a id="a-607168148" href="#Expand-this-list">Expand this list</a></li></ol><h1 id="Warnings" class="title"><a class="here" href="#Warnings">Warnings</a></h1> <ol><li><a id="a-607107678" href="#The-latest-source-code-may-be-unstable">The latest source code may be unstable</a></li></ol></div>
         
     | 
| 
       952 
952 
     | 
    
         | 
| 
       953 
953 
     | 
    
         
             
                <br style="display: none"/>
         
     | 
| 
       954 
954 
     | 
    
         
             
                <hr style="display: none"/>
         
     | 
| 
         @@ -958,7 +958,7 @@ 
     | 
|
| 
       958 
958 
     | 
    
         
             
            <div class="chapter">
         
     | 
| 
       959 
959 
     | 
    
         
             
              <h1 class="title">
         
     | 
| 
       960 
960 
     | 
    
         
             
                Chapter
         
     | 
| 
       961 
     | 
    
         
            -
                <a class="list" id="Introduction" href="#a- 
     | 
| 
      
 961 
     | 
    
         
            +
                <a class="list" id="Introduction" href="#a-607090478">1</a>
         
     | 
| 
       962 
962 
     | 
    
         | 
| 
       963 
963 
     | 
    
         
             
                <br/>
         
     | 
| 
       964 
964 
     | 
    
         | 
| 
         @@ -981,7 +981,7 @@ this.</p></div> 
     | 
|
| 
       981 
981 
     | 
    
         
             
            <div class="chapter">
         
     | 
| 
       982 
982 
     | 
    
         
             
              <h1 class="title">
         
     | 
| 
       983 
983 
     | 
    
         
             
                Chapter
         
     | 
| 
       984 
     | 
    
         
            -
                <a class="list" id="Setup" href="#a- 
     | 
| 
      
 984 
     | 
    
         
            +
                <a class="list" id="Setup" href="#a-607093208">2</a>
         
     | 
| 
       985 
985 
     | 
    
         | 
| 
       986 
986 
     | 
    
         
             
                <br/>
         
     | 
| 
       987 
987 
     | 
    
         | 
| 
         @@ -990,7 +990,7 @@ this.</p></div> 
     | 
|
| 
       990 
990 
     | 
    
         | 
| 
       991 
991 
     | 
    
         
             
              <div class="content"><div class="section">
         
     | 
| 
       992 
992 
     | 
    
         
             
              <h2 class="title">
         
     | 
| 
       993 
     | 
    
         
            -
                <a class="list" id="Requirements" href="#a- 
     | 
| 
      
 993 
     | 
    
         
            +
                <a class="list" id="Requirements" href="#a-607094728">2.1</a>  <a class="here" href="#Requirements">Requirements</a>
         
     | 
| 
       994 
994 
     | 
    
         
             
              </h2>
         
     | 
| 
       995 
995 
     | 
    
         
             
              <div class="content"><p>Your system needs the following software to run RMMSeg.</p>
         
     | 
| 
       996 
996 
     | 
    
         
             
            <table border="1">
         
     | 
| 
         @@ -1014,11 +1014,11 @@ this.</p></div> 
     | 
|
| 
       1014 
1014 
     | 
    
         
             
            </div><br />
         
     | 
| 
       1015 
1015 
     | 
    
         
             
            <div class="section">
         
     | 
| 
       1016 
1016 
     | 
    
         
             
              <h2 class="title">
         
     | 
| 
       1017 
     | 
    
         
            -
                <a class="list" id="Installation" href="#a- 
     | 
| 
      
 1017 
     | 
    
         
            +
                <a class="list" id="Installation" href="#a-607099478">2.2</a>  <a class="here" href="#Installation">Installation</a>
         
     | 
| 
       1018 
1018 
     | 
    
         
             
              </h2>
         
     | 
| 
       1019 
1019 
     | 
    
         
             
              <div class="content"><div class="section">
         
     | 
| 
       1020 
1020 
     | 
    
         
             
              <h3 class="title">
         
     | 
| 
       1021 
     | 
    
         
            -
                <a class="list" id="Using-RubyGems" href="#a- 
     | 
| 
      
 1021 
     | 
    
         
            +
                <a class="list" id="Using-RubyGems" href="#a-607103648">2.2.1</a>  <a class="here" href="#Using-RubyGems">Using RubyGems</a>
         
     | 
| 
       1022 
1022 
     | 
    
         
             
              </h3>
         
     | 
| 
       1023 
1023 
     | 
    
         
             
              <div class="content"><p>To install the gem remotely from <a href="http://rubyforge.org">RubyForge</a>:</p>
         
     | 
| 
       1024 
1024 
     | 
    
         
             
            sudo gem install rmmseg-cpp
         
     | 
| 
         @@ -1029,12 +1029,12 @@ sudo gem install —local rmmseg-cpp-x.y.z.gem</div> 
     | 
|
| 
       1029 
1029 
     | 
    
         
             
            </div><br />
         
     | 
| 
       1030 
1030 
     | 
    
         
             
            <div class="section">
         
     | 
| 
       1031 
1031 
     | 
    
         
             
              <h3 class="title">
         
     | 
| 
       1032 
     | 
    
         
            -
                <a class="list" id="From-Git" href="#a- 
     | 
| 
      
 1032 
     | 
    
         
            +
                <a class="list" id="From-Git" href="#a-607106038">2.2.2</a>  <a class="here" href="#From-Git">From Git</a>
         
     | 
| 
       1033 
1033 
     | 
    
         
             
              </h3>
         
     | 
| 
       1034 
1034 
     | 
    
         
             
              <div class="content"><p>To build the gem manually from the latest source code. You’ll<br />
         
     | 
| 
       1035 
1035 
     | 
    
         
             
            need to have <strong>git</strong> and <strong>rake</strong> installed.</p>
         
     | 
| 
       1036 
1036 
     | 
    
         
             
            <p><div class="warning">
         
     | 
| 
       1037 
     | 
    
         
            -
              <p class="title"><a class="list" id="The-latest-source-code-may-be-unstable" href="#a- 
     | 
| 
      
 1037 
     | 
    
         
            +
              <p class="title"><a class="list" id="The-latest-source-code-may-be-unstable" href="#a-607107678">Warning 1</a>.  <a class="here" href="#The-latest-source-code-may-be-unstable">The latest source code may be unstable</a></p>
         
     | 
| 
       1038 
1038 
     | 
    
         | 
| 
       1039 
1039 
     | 
    
         
             
              <div class="content icon-warning">While I tried to avoid such kind of problems, the source<br />
         
     | 
| 
       1040 
1040 
     | 
    
         
             
            code from the repository might still be broken sometimes.<br />
         
     | 
| 
         @@ -1053,7 +1053,7 @@ rake gem:install</div> 
     | 
|
| 
       1053 
1053 
     | 
    
         
             
            <div class="chapter">
         
     | 
| 
       1054 
1054 
     | 
    
         
             
              <h1 class="title">
         
     | 
| 
       1055 
1055 
     | 
    
         
             
                Chapter
         
     | 
| 
       1056 
     | 
    
         
            -
                <a class="list" id="Usage" href="#a- 
     | 
| 
      
 1056 
     | 
    
         
            +
                <a class="list" id="Usage" href="#a-607115348">3</a>
         
     | 
| 
       1057 
1057 
     | 
    
         | 
| 
       1058 
1058 
     | 
    
         
             
                <br/>
         
     | 
| 
       1059 
1059 
     | 
    
         | 
| 
         @@ -1062,7 +1062,7 @@ rake gem:install</div> 
     | 
|
| 
       1062 
1062 
     | 
    
         | 
| 
       1063 
1063 
     | 
    
         
             
              <div class="content"><div class="section">
         
     | 
| 
       1064 
1064 
     | 
    
         
             
              <h2 class="title">
         
     | 
| 
       1065 
     | 
    
         
            -
                <a class="list" id="Stand-Alone-rmmseg" href="#a- 
     | 
| 
      
 1065 
     | 
    
         
            +
                <a class="list" id="Stand-Alone-rmmseg" href="#a-607120028">3.1</a>  <a class="here" href="#Stand-Alone-rmmseg">Stand Alone rmmseg</a>
         
     | 
| 
       1066 
1066 
     | 
    
         
             
              </h2>
         
     | 
| 
       1067 
1067 
     | 
    
         
             
              <div class="content"><p>rmmseg-cpp comes with a script <strong>rmmseg</strong>. To get the basic usage, just execute it with <tt>-h</tt> option:<br />
         
     | 
| 
       1068 
1068 
     | 
    
         
             
              <br />
         
     | 
| 
         @@ -1074,11 +1074,11 @@ $ echo “我们都喜欢用 Ruby” | rmmseg 
     | 
|
| 
       1074 
1074 
     | 
    
         
             
            </div><br />
         
     | 
| 
       1075 
1075 
     | 
    
         
             
            <div class="section">
         
     | 
| 
       1076 
1076 
     | 
    
         
             
              <h2 class="title">
         
     | 
| 
       1077 
     | 
    
         
            -
                <a class="list" id="Use-in-Ruby-program" href="#a- 
     | 
| 
      
 1077 
     | 
    
         
            +
                <a class="list" id="Use-in-Ruby-program" href="#a-607126248">3.2</a>  <a class="here" href="#Use-in-Ruby-program">Use in Ruby program</a>
         
     | 
| 
       1078 
1078 
     | 
    
         
             
              </h2>
         
     | 
| 
       1079 
1079 
     | 
    
         
             
              <div class="content"><div class="section">
         
     | 
| 
       1080 
1080 
     | 
    
         
             
              <h3 class="title">
         
     | 
| 
       1081 
     | 
    
         
            -
                <a class="list" id="Initialize" href="#a- 
     | 
| 
      
 1081 
     | 
    
         
            +
                <a class="list" id="Initialize" href="#a-607131168">3.2.1</a>  <a class="here" href="#Initialize">Initialize</a>
         
     | 
| 
       1082 
1082 
     | 
    
         
             
              </h3>
         
     | 
| 
       1083 
1083 
     | 
    
         
             
              <div class="content"><p>To use rmmseg-cpp in Ruby program, you’ll first load it with RubyGems:</p>
         
     | 
| 
       1084 
1084 
     | 
    
         
             
            <pre class="code">
         
     | 
| 
         @@ -1095,7 +1095,7 @@ how to add your own dictionaries) and load all dictionaries:</p> 
     | 
|
| 
       1095 
1095 
     | 
    
         
             
            </div><br />
         
     | 
| 
       1096 
1096 
     | 
    
         
             
            <div class="section">
         
     | 
| 
       1097 
1097 
     | 
    
         
             
              <h3 class="title">
         
     | 
| 
       1098 
     | 
    
         
            -
                <a class="list" id="Ferret-Integration" href="#a- 
     | 
| 
      
 1098 
     | 
    
         
            +
                <a class="list" id="Ferret-Integration" href="#a-607137248">3.2.2</a>  <a class="here" href="#Ferret-Integration">Ferret Integration</a>
         
     | 
| 
       1099 
1099 
     | 
    
         
             
              </h3>
         
     | 
| 
       1100 
1100 
     | 
    
         
             
              <div class="content"><p>To use rmmseg-cpp with Ferret, you’ll need to <code class="code">require</code> the<br />
         
     | 
| 
       1101 
1101 
     | 
    
         
             
            Ferret support of rmmseg-cpp (Of course you’ll also have to<br />
         
     | 
| 
         @@ -1119,13 +1119,13 @@ A complete example can be found in <tt>misc/ferret_example.rb</tt>. The result<b 
     | 
|
| 
       1119 
1119 
     | 
    
         
             
            of running that example is shown in <a class="xref" href="#Ferret-Example-Screenshot">Figure 1. Ferret Example Screenshot</a>.<br />
         
     | 
| 
       1120 
1120 
     | 
    
         
             
              <br />
         
     | 
| 
       1121 
1121 
     | 
    
         
             
            <div class="figure">
         
     | 
| 
       1122 
     | 
    
         
            -
              <p class="title"><a class="list" id="Ferret-Example-Screenshot" href="#a- 
     | 
| 
      
 1122 
     | 
    
         
            +
              <p class="title"><a class="list" id="Ferret-Example-Screenshot" href="#a-607147048">Figure 1</a>.  <a class="here" href="#Ferret-Example-Screenshot">Ferret Example Screenshot</a></p>
         
     | 
| 
       1123 
1123 
     | 
    
         
             
              <div class="content"><img src="http://pluskid.lifegoo.com/wp-content/uploads/2008/02/rmmseg.png" alt="" /></div>
         
     | 
| 
       1124 
1124 
     | 
    
         
             
            </div></p></div>
         
     | 
| 
       1125 
1125 
     | 
    
         
             
            </div><br />
         
     | 
| 
       1126 
1126 
     | 
    
         
             
            <div class="section">
         
     | 
| 
       1127 
1127 
     | 
    
         
             
              <h3 class="title">
         
     | 
| 
       1128 
     | 
    
         
            -
                <a class="list" id="Normal-Ruby-program" href="#a- 
     | 
| 
      
 1128 
     | 
    
         
            +
                <a class="list" id="Normal-Ruby-program" href="#a-607154008">3.2.3</a>  <a class="here" href="#Normal-Ruby-program">Normal Ruby program</a>
         
     | 
| 
       1129 
1129 
     | 
    
         
             
              </h3>
         
     | 
| 
       1130 
1130 
     | 
    
         
             
              <div class="content"><p>rmmseg-cpp can also be used in normal Ruby programs. Just create<br />
         
     | 
| 
       1131 
1131 
     | 
    
         
             
            an <code class="code"><span style="color:#036; font-weight:bold">Algorithm</span></code> object and call <code class="code">next_token</code> until a <code class="code"><span style="color:#038; font-weight:bold">nil</span></code> is returned:</p>
         
     | 
| 
         @@ -1143,7 +1143,7 @@ loop <span style="color:#080; font-weight:bold">do</span> 
     | 
|
| 
       1143 
1143 
     | 
    
         
             
            <div class="chapter">
         
     | 
| 
       1144 
1144 
     | 
    
         
             
              <h1 class="title">
         
     | 
| 
       1145 
1145 
     | 
    
         
             
                Chapter
         
     | 
| 
       1146 
     | 
    
         
            -
                <a class="list" id="Who-use-it" href="#a- 
     | 
| 
      
 1146 
     | 
    
         
            +
                <a class="list" id="Who-use-it" href="#a-607162878">4</a>
         
     | 
| 
       1147 
1147 
     | 
    
         | 
| 
       1148 
1148 
     | 
    
         
             
                <br/>
         
     | 
| 
       1149 
1149 
     | 
    
         | 
| 
         @@ -1151,7 +1151,7 @@ loop <span style="color:#080; font-weight:bold">do</span> 
     | 
|
| 
       1151 
1151 
     | 
    
         
             
              </h1>
         
     | 
| 
       1152 
1152 
     | 
    
         | 
| 
       1153 
1153 
     | 
    
         
             
              <div class="content"><p><div class="tip">
         
     | 
| 
       1154 
     | 
    
         
            -
              <p class="title"><a class="list" id="Expand-this-list" href="#a- 
     | 
| 
      
 1154 
     | 
    
         
            +
              <p class="title"><a class="list" id="Expand-this-list" href="#a-607168148">Tip 1</a>.  <a class="here" href="#Expand-this-list">Expand this list</a></p>
         
     | 
| 
       1155 
1155 
     | 
    
         | 
| 
       1156 
1156 
     | 
    
         
             
              <div class="content icon-tip">If you used rmmseg-cpp and would like your project to<br />
         
     | 
| 
       1157 
1157 
     | 
    
         
             
            appear in this list, please <a href="mailto:pluskid@gmail.com">contact me</a>.</div>
         
     | 
| 
         @@ -1164,7 +1164,7 @@ appear in this list, please <a href="mailto:pluskid@gmail.com">contact me</a>.</ 
     | 
|
| 
       1164 
1164 
     | 
    
         
             
            <div class="chapter">
         
     | 
| 
       1165 
1165 
     | 
    
         
             
              <h1 class="title">
         
     | 
| 
       1166 
1166 
     | 
    
         
             
                Chapter
         
     | 
| 
       1167 
     | 
    
         
            -
                <a class="list" id="Resources" href="#a- 
     | 
| 
      
 1167 
     | 
    
         
            +
                <a class="list" id="Resources" href="#a-607172188">5</a>
         
     | 
| 
       1168 
1168 
     | 
    
         | 
| 
       1169 
1169 
     | 
    
         
             
                <br/>
         
     | 
| 
       1170 
1170 
     | 
    
         | 
| 
         @@ -1187,7 +1187,7 @@ appear in this list, please <a href="mailto:pluskid@gmail.com">contact me</a>.</ 
     | 
|
| 
       1187 
1187 
     | 
    
         | 
| 
       1188 
1188 
     | 
    
         
             
                <div id="footer">
         
     | 
| 
       1189 
1189 
     | 
    
         | 
| 
       1190 
     | 
    
         
            -
                  Generated on  
     | 
| 
      
 1190 
     | 
    
         
            +
                  Generated on Wed Sep 17 10:18:56 -0400 2008 by <a href="http://gerbil.rubyforge.org">Gerbil</a> 3.1.0.
         
     | 
| 
       1191 
1191 
     | 
    
         | 
| 
       1192 
1192 
     | 
    
         
             
                  <div id="footer-credits">
         
     | 
| 
       1193 
1193 
     | 
    
         
             
                    <span class="icon-warning" style="float: right"> </span>
         
     | 
    
        data/spec/rmmseg_spec.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification 
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: rmmseg-cpp
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version 
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.2. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.2.7
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors: 
         
     | 
| 
       7 
7 
     | 
    
         
             
            - pluskid
         
     | 
| 
         @@ -9,7 +9,7 @@ autorequire: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
11 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
            date: 2008- 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2008-09-17 00:00:00 -04:00
         
     | 
| 
       13 
13 
     | 
    
         
             
            default_executable: 
         
     | 
| 
       14 
14 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       15 
15 
     | 
    
         | 
| 
         @@ -91,7 +91,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       91 
91 
     | 
    
         
             
            requirements: []
         
     | 
| 
       92 
92 
     | 
    
         | 
| 
       93 
93 
     | 
    
         
             
            rubyforge_project: rmmseg-cpp
         
     | 
| 
       94 
     | 
    
         
            -
            rubygems_version: 1. 
     | 
| 
      
 94 
     | 
    
         
            +
            rubygems_version: 1.2.0
         
     | 
| 
       95 
95 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       96 
96 
     | 
    
         
             
            specification_version: 2
         
     | 
| 
       97 
97 
     | 
    
         
             
            summary: rmmseg-cpp is a high performance Chinese word segmentation utility for Ruby
         
     |