plexus-rmmseg 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/History.txt +42 -0
  4. data/Manifest.txt +51 -0
  5. data/README.txt +74 -0
  6. data/Rakefile +12 -0
  7. data/TODO.txt +5 -0
  8. data/bin/rmmseg +65 -0
  9. data/data/chars.dic +12638 -0
  10. data/data/custom.dic +12 -0
  11. data/data/punctuation.dic +79 -0
  12. data/data/words.dic +120330 -0
  13. data/lib/rmmseg.rb +13 -0
  14. data/lib/rmmseg/algorithm.rb +136 -0
  15. data/lib/rmmseg/amibguity.rb +4 -0
  16. data/lib/rmmseg/chunk.rb +41 -0
  17. data/lib/rmmseg/complex_algorithm.rb +122 -0
  18. data/lib/rmmseg/config.rb +65 -0
  19. data/lib/rmmseg/dictionary.rb +80 -0
  20. data/lib/rmmseg/ferret.rb +109 -0
  21. data/lib/rmmseg/lawl_rule.rb +12 -0
  22. data/lib/rmmseg/lsdmfocw_rule.rb +13 -0
  23. data/lib/rmmseg/mm_rule.rb +13 -0
  24. data/lib/rmmseg/rule_helper.rb +28 -0
  25. data/lib/rmmseg/simple_algorithm.rb +37 -0
  26. data/lib/rmmseg/svwl_rule.rb +12 -0
  27. data/lib/rmmseg/token.rb +30 -0
  28. data/lib/rmmseg/version.rb +3 -0
  29. data/lib/rmmseg/word.rb +38 -0
  30. data/misc/ferret_example.rb +56 -0
  31. data/misc/homepage.erb +170 -0
  32. data/misc/homepage.html +1214 -0
  33. data/plexus-rmmseg.gemspec +20 -0
  34. data/spec/chunk_spec.rb +25 -0
  35. data/spec/complex_algorithm_spec.rb +18 -0
  36. data/spec/config_spec.rb +12 -0
  37. data/spec/dictionary_spec.rb +20 -0
  38. data/spec/lawl_rule_spec.rb +15 -0
  39. data/spec/lsdmfocw_rule_spec.rb +14 -0
  40. data/spec/mm_rule_spec.rb +15 -0
  41. data/spec/simple_algorithm_spec.rb +46 -0
  42. data/spec/spec_helper.rb +12 -0
  43. data/spec/svwl_rule_spec.rb +14 -0
  44. data/spec/word_spec.rb +9 -0
  45. data/tasks/ann.rake +76 -0
  46. data/tasks/annotations.rake +22 -0
  47. data/tasks/doc.rake +48 -0
  48. data/tasks/gem.rake +110 -0
  49. data/tasks/homepage.rake +12 -0
  50. data/tasks/manifest.rake +49 -0
  51. data/tasks/post_load.rake +26 -0
  52. data/tasks/rubyforge.rake +57 -0
  53. data/tasks/setup.rb +227 -0
  54. data/tasks/spec.rake +54 -0
  55. data/tasks/svn.rake +44 -0
  56. data/tasks/test.rake +38 -0
  57. metadata +121 -0
@@ -0,0 +1,1214 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html>
3
+ <head>
4
+ <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
5
+ <meta name="date" content="06 February 2008"/>
6
+ <meta name="author" content="pluskid"/>
7
+ <meta name="generator" content="Gerbil 2.0.0"/>
8
+ <title>RMMSeg Homepage</title>
9
+
10
+ <style type="text/css" media="screen">
11
+ body
12
+ {
13
+ color : #000000;
14
+ background-color : #FFFFFF;
15
+ line-height : 1.5em;
16
+ font-family : Calibri, Verdana, sans-serif;
17
+ }
18
+
19
+ /* emphasis */
20
+
21
+ em,
22
+ blockquote
23
+ {
24
+ font-family : Cambria, Georgia, serif;
25
+ }
26
+
27
+ /* headings */
28
+
29
+ h1,
30
+ h2,
31
+ h3,
32
+ h4,
33
+ h5,
34
+ h6,
35
+ .title
36
+ {
37
+ font-weight : lighter;
38
+ font-family : Constantia, "Book Antiqua", "URW Bookman L", serif;
39
+ }
40
+
41
+ #lof h1,
42
+ #lof h2,
43
+ #lof h3,
44
+ #lof h4,
45
+ #lof h5,
46
+ #lof h6
47
+ {
48
+ margin-top : 1.25em;
49
+ }
50
+
51
+ #content h1,
52
+ #content h2,
53
+ #content h3,
54
+ #content h4,
55
+ #content h5,
56
+ #content h6
57
+ {
58
+ margin-top : 2.5em;
59
+ line-height : 1.25em;
60
+ }
61
+
62
+ #content h1
63
+ {
64
+ font-size : 2.0em;
65
+ }
66
+
67
+ #content h2
68
+ {
69
+ font-size : 1.8em;
70
+ }
71
+
72
+ #content h3
73
+ {
74
+ font-size : 1.6em;
75
+ }
76
+
77
+ #content h4
78
+ {
79
+ font-size : 1.4em;
80
+ }
81
+
82
+ #content h5
83
+ {
84
+ font-size : 1.2em;
85
+ }
86
+
87
+ #content h6
88
+ {
89
+ font-size : 1.0em;
90
+ }
91
+
92
+ /* tables */
93
+
94
+ table
95
+ {
96
+ border : none;
97
+ margin : auto; /* center horizontally */
98
+ margin-top : 1em;
99
+ }
100
+
101
+ th,
102
+ td
103
+ {
104
+ padding : 1em;
105
+ border : 1px solid #C0C0C0;
106
+ vertical-align : top;
107
+ background-color : #FFFFFF;
108
+ }
109
+
110
+ th
111
+ {
112
+ background-color : #F5F5F5;
113
+ }
114
+
115
+ /* document structure */
116
+
117
+ #header
118
+ {
119
+ margin-bottom : 5em;
120
+ text-align : center;
121
+ }
122
+
123
+ #abstract
124
+ {
125
+ margin-bottom : 5em;
126
+ }
127
+
128
+ #toc li
129
+ {
130
+ list-style-type : none;
131
+ }
132
+
133
+ #toc li ul
134
+ {
135
+ padding-bottom : 1em;
136
+ border-left : thick solid #F5F5F5;
137
+ _border-left : none; /* for IE6 */
138
+ }
139
+
140
+ #toc li ul:hover
141
+ {
142
+ border-color : #DCDCDC;
143
+ }
144
+
145
+ #toc > ul
146
+ {
147
+ padding-left : 1em;
148
+ }
149
+
150
+ #references
151
+ {
152
+ margin-top : 5em;
153
+ }
154
+
155
+ #footer
156
+ {
157
+ border-top : thick dotted #DCDCDC;
158
+ padding-top : 1em;
159
+ margin-top : 5em;
160
+ text-align : center;
161
+ }
162
+
163
+ /* document nodes */
164
+
165
+ .part > .title,
166
+ .chapter > .title
167
+ {
168
+ padding-bottom : 0.5em;
169
+ }
170
+
171
+ .part > .title > big,
172
+ .chapter > .title > big
173
+ {
174
+ display : block;
175
+ margin-top : 0.25em;
176
+ }
177
+
178
+ .part .title big,
179
+ .chapter .title big
180
+ {
181
+ _display : block; /* for IE6 */
182
+ _margin-top : 0.25em; /* for IE6 */
183
+ _margin-bottom : 0.75em; /* for IE6 */
184
+ }
185
+
186
+ .paragraph > .title,
187
+ .tip > .title,
188
+ .note > .title,
189
+ .caution > .title,
190
+ .warning > .title,
191
+ .important > .title,
192
+ .figure > .title,
193
+ .table > .title,
194
+ .example > .title,
195
+ .equation > .title,
196
+ .procedure > .title
197
+ {
198
+ font-size : large;
199
+ margin-top : 2em;
200
+ }
201
+
202
+ .paragraph .title,
203
+ .tip .title,
204
+ .note .title,
205
+ .caution .title,
206
+ .warning .title,
207
+ .important .title,
208
+ .figure .title,
209
+ .table .title,
210
+ .example .title,
211
+ .equation .title,
212
+ .procedure .title
213
+ {
214
+ _font-size : large; /* for IE6 */
215
+ _font-weight : bold; /* large is not bold in IE6 */
216
+ _margin-top : 2em; /* for IE6 */
217
+ }
218
+
219
+ .tip ,
220
+ .note ,
221
+ .caution ,
222
+ .warning ,
223
+ .important,
224
+ .figure ,
225
+ .table ,
226
+ .example ,
227
+ .equation ,
228
+ .procedure
229
+ {
230
+ margin : 3em;
231
+ }
232
+
233
+ .tip > .icon,
234
+ .note > .icon,
235
+ .caution > .icon,
236
+ .warning > .icon,
237
+ .important > .icon
238
+ {
239
+ float : left;
240
+ margin : 0 1em 1em 0; /* top right bottom left */
241
+ }
242
+
243
+ .tip .icon,
244
+ .note .icon,
245
+ .caution .icon,
246
+ .warning .icon,
247
+ .important .icon
248
+ {
249
+ _display : none; /* IE6 cannot display embedded images */
250
+ }
251
+
252
+ .figure > .title
253
+ {
254
+ text-align : center;
255
+ }
256
+
257
+ .figure .title
258
+ {
259
+ _text-align : center; /* for IE6 */
260
+ }
261
+
262
+ .figure > .content img
263
+ {
264
+ display : block;
265
+ margin : auto;
266
+ }
267
+
268
+ .figure .content img
269
+ {
270
+ _display : block; /* for IE6 */
271
+ _margin : auto; /* for IE6 */
272
+ }
273
+
274
+ body
275
+ {
276
+ margin : auto;
277
+ padding : 0.5em;
278
+ max-width : 36em;
279
+ }
280
+
281
+ /* hyperlinks */
282
+
283
+ a
284
+ {
285
+ color : #0000FF;
286
+ text-decoration : none;
287
+ }
288
+
289
+ a:visited
290
+ {
291
+ color : #800080;
292
+ }
293
+
294
+ a:hover
295
+ {
296
+ color : #FF0000;
297
+ text-decoration : underline;
298
+ }
299
+
300
+ a:target
301
+ {
302
+ color : #FF0000;
303
+ text-decoration : underline;
304
+ }
305
+
306
+ a.toc:link,
307
+ a.toc:visited
308
+ {
309
+ text-decoration : none;
310
+ z-index : 1;
311
+ }
312
+
313
+ a img
314
+ {
315
+ border : none;
316
+ }
317
+
318
+ /*
319
+ mark external links with a symbol to help the user
320
+ distinguish between internal and external links
321
+ */
322
+ a:after
323
+ {
324
+ content: "∗";
325
+ }
326
+
327
+ a[href^="#"]:after
328
+ {
329
+ content: "";
330
+ }
331
+
332
+ /* source code */
333
+
334
+ tt,
335
+ code,
336
+ pre
337
+ {
338
+ font-family : Consolas, "Lucida Console", monospace;
339
+ }
340
+
341
+ tt
342
+ {
343
+ font-weight : bold;
344
+ color : #A52A2A;
345
+ background-color : #FFFAF0;
346
+ }
347
+
348
+ /* output of syntax colorizer */
349
+ .code
350
+ {
351
+ background-color : #FFFFF0;
352
+ }
353
+
354
+ pre
355
+ {
356
+ line-height : normal;
357
+ border : 1px dashed #C0C0C0;
358
+ background-color : #F5FFDF;
359
+ padding : 1em;
360
+ overflow : auto;
361
+ cursor : text;
362
+ }
363
+
364
+ /*
365
+ pre:hover
366
+ {
367
+ border : none;
368
+ position : fixed;
369
+ z-index : 1;
370
+ margin : 0;
371
+ top : 0;
372
+ left : 0;
373
+ right : 0;
374
+ bottom : 0;
375
+ overflow : auto;
376
+ cursor : text;
377
+ }
378
+ */
379
+
380
+ /* emphasis */
381
+
382
+ blockquote
383
+ {
384
+ margin : 1em;
385
+ border : 5px dotted #C0C0C0;
386
+ padding : 1em;
387
+ color : #444;
388
+ }
389
+
390
+ hr
391
+ {
392
+ color : #FF0000; /* for IE6 */
393
+ background-color : #FF0000; /* for Firefox */
394
+ }
395
+
396
+ </style>
397
+ <style type="text/css" media="print">
398
+ body
399
+ {
400
+ color : #000000;
401
+ background-color : #FFFFFF;
402
+ line-height : 1.5em;
403
+ font-family : Calibri, Verdana, sans-serif;
404
+ }
405
+
406
+ /* emphasis */
407
+
408
+ em,
409
+ blockquote
410
+ {
411
+ font-family : Cambria, Georgia, serif;
412
+ }
413
+
414
+ /* headings */
415
+
416
+ h1,
417
+ h2,
418
+ h3,
419
+ h4,
420
+ h5,
421
+ h6,
422
+ .title
423
+ {
424
+ font-weight : lighter;
425
+ font-family : Constantia, "Book Antiqua", "URW Bookman L", serif;
426
+ }
427
+
428
+ #lof h1,
429
+ #lof h2,
430
+ #lof h3,
431
+ #lof h4,
432
+ #lof h5,
433
+ #lof h6
434
+ {
435
+ margin-top : 1.25em;
436
+ }
437
+
438
+ #content h1,
439
+ #content h2,
440
+ #content h3,
441
+ #content h4,
442
+ #content h5,
443
+ #content h6
444
+ {
445
+ margin-top : 2.5em;
446
+ line-height : 1.25em;
447
+ }
448
+
449
+ #content h1
450
+ {
451
+ font-size : 2.0em;
452
+ }
453
+
454
+ #content h2
455
+ {
456
+ font-size : 1.8em;
457
+ }
458
+
459
+ #content h3
460
+ {
461
+ font-size : 1.6em;
462
+ }
463
+
464
+ #content h4
465
+ {
466
+ font-size : 1.4em;
467
+ }
468
+
469
+ #content h5
470
+ {
471
+ font-size : 1.2em;
472
+ }
473
+
474
+ #content h6
475
+ {
476
+ font-size : 1.0em;
477
+ }
478
+
479
+ /* tables */
480
+
481
+ table
482
+ {
483
+ border : none;
484
+ margin : auto; /* center horizontally */
485
+ margin-top : 1em;
486
+ }
487
+
488
+ th,
489
+ td
490
+ {
491
+ padding : 1em;
492
+ border : 1px solid #C0C0C0;
493
+ vertical-align : top;
494
+ background-color : #FFFFFF;
495
+ }
496
+
497
+ th
498
+ {
499
+ background-color : #F5F5F5;
500
+ }
501
+
502
+ /* document structure */
503
+
504
+ #header
505
+ {
506
+ margin-bottom : 5em;
507
+ text-align : center;
508
+ }
509
+
510
+ #abstract
511
+ {
512
+ margin-bottom : 5em;
513
+ }
514
+
515
+ #toc li
516
+ {
517
+ list-style-type : none;
518
+ }
519
+
520
+ #toc li ul
521
+ {
522
+ padding-bottom : 1em;
523
+ border-left : thick solid #F5F5F5;
524
+ _border-left : none; /* for IE6 */
525
+ }
526
+
527
+ #toc li ul:hover
528
+ {
529
+ border-color : #DCDCDC;
530
+ }
531
+
532
+ #toc > ul
533
+ {
534
+ padding-left : 1em;
535
+ }
536
+
537
+ #references
538
+ {
539
+ margin-top : 5em;
540
+ }
541
+
542
+ #footer
543
+ {
544
+ border-top : thick dotted #DCDCDC;
545
+ padding-top : 1em;
546
+ margin-top : 5em;
547
+ text-align : center;
548
+ }
549
+
550
+ /* document nodes */
551
+
552
+ .part > .title,
553
+ .chapter > .title
554
+ {
555
+ padding-bottom : 0.5em;
556
+ }
557
+
558
+ .part > .title > big,
559
+ .chapter > .title > big
560
+ {
561
+ display : block;
562
+ margin-top : 0.25em;
563
+ }
564
+
565
+ .part .title big,
566
+ .chapter .title big
567
+ {
568
+ _display : block; /* for IE6 */
569
+ _margin-top : 0.25em; /* for IE6 */
570
+ _margin-bottom : 0.75em; /* for IE6 */
571
+ }
572
+
573
+ .paragraph > .title,
574
+ .tip > .title,
575
+ .note > .title,
576
+ .caution > .title,
577
+ .warning > .title,
578
+ .important > .title,
579
+ .figure > .title,
580
+ .table > .title,
581
+ .example > .title,
582
+ .equation > .title,
583
+ .procedure > .title
584
+ {
585
+ font-size : large;
586
+ margin-top : 2em;
587
+ }
588
+
589
+ .paragraph .title,
590
+ .tip .title,
591
+ .note .title,
592
+ .caution .title,
593
+ .warning .title,
594
+ .important .title,
595
+ .figure .title,
596
+ .table .title,
597
+ .example .title,
598
+ .equation .title,
599
+ .procedure .title
600
+ {
601
+ _font-size : large; /* for IE6 */
602
+ _font-weight : bold; /* large is not bold in IE6 */
603
+ _margin-top : 2em; /* for IE6 */
604
+ }
605
+
606
+ .tip ,
607
+ .note ,
608
+ .caution ,
609
+ .warning ,
610
+ .important,
611
+ .figure ,
612
+ .table ,
613
+ .example ,
614
+ .equation ,
615
+ .procedure
616
+ {
617
+ margin : 3em;
618
+ }
619
+
620
+ .tip > .icon,
621
+ .note > .icon,
622
+ .caution > .icon,
623
+ .warning > .icon,
624
+ .important > .icon
625
+ {
626
+ float : left;
627
+ margin : 0 1em 1em 0; /* top right bottom left */
628
+ }
629
+
630
+ .tip .icon,
631
+ .note .icon,
632
+ .caution .icon,
633
+ .warning .icon,
634
+ .important .icon
635
+ {
636
+ _display : none; /* IE6 cannot display embedded images */
637
+ }
638
+
639
+ .figure > .title
640
+ {
641
+ text-align : center;
642
+ }
643
+
644
+ .figure .title
645
+ {
646
+ _text-align : center; /* for IE6 */
647
+ }
648
+
649
+ .figure > .content img
650
+ {
651
+ display : block;
652
+ margin : auto;
653
+ }
654
+
655
+ .figure .content img
656
+ {
657
+ _display : block; /* for IE6 */
658
+ _margin : auto; /* for IE6 */
659
+ }
660
+
661
+ /* headings */
662
+
663
+ h1,
664
+ h2,
665
+ h3,
666
+ h4,
667
+ h5,
668
+ h6
669
+ {
670
+ font-weight : normal;
671
+ }
672
+
673
+ /* hyperlinks */
674
+
675
+ a:link,
676
+ a:visited,
677
+ a:active
678
+ {
679
+ color : #0000FF;
680
+ font-weight : bold;
681
+ text-decoration : underline;
682
+ }
683
+
684
+ a:after
685
+ {
686
+ content : " (" attr(href) ")";
687
+ font-family : sans-serif;
688
+ font-weight : normal;
689
+ font-size : 90%;
690
+ }
691
+
692
+ a[href^="#"]:after
693
+ {
694
+ content : "";
695
+ }
696
+
697
+ a[href^="#"]
698
+ {
699
+ color : #A52A2A;
700
+ font-weight : lighter;
701
+ text-decoration : none;
702
+ font-style : italic;
703
+ }
704
+
705
+ a.toc:link,
706
+ a.toc:visited
707
+ {
708
+ color : inherit;
709
+ .color : #000000; /* for IE6 and IE7 */
710
+ font-weight : inherit;
711
+ text-decoration : none;
712
+ font-style : normal;
713
+ }
714
+
715
+ /* source code */
716
+
717
+ tt
718
+ {
719
+ color : inherit;
720
+ background-color : inherit;
721
+ font-weight : normal;
722
+ }
723
+
724
+ pre,
725
+ .code
726
+ {
727
+ border : none;
728
+ overflow : visible;
729
+ background-color : inherit;
730
+ }
731
+
732
+ /* document structure */
733
+
734
+ #lof
735
+ {
736
+ display : none;
737
+ }
738
+
739
+ /* document nodes */
740
+
741
+ .part > .title > big,
742
+ .chapter > .title > big
743
+ {
744
+ padding-bottom : 0.5em;
745
+ }
746
+
747
+ .part .title big,
748
+ .chapter .title big
749
+ {
750
+ _padding-bottom : 0.5em; /* for IE6 */
751
+ }
752
+
753
+ </style>
754
+ </head>
755
+ <body>
756
+
757
+ <div id="header">
758
+
759
+
760
+ <h1 class="title">RMMSeg Homepage</h1>
761
+ <h2 class="authors"><a href="http://pluskid.lifegoo.com">pluskid</a></h2>
762
+ <h3 class="date">06 February 2008</h3>
763
+
764
+ </div>
765
+
766
+
767
+
768
+ <div id="toc"><h1>Contents</h1> <ul><li>1&nbsp;&nbsp;<a id="a-606666518" href="#Introduction">Introduction</a></li><li>2&nbsp;&nbsp;<a id="a-606668658" href="#Setup">Setup</a><ul><li>2.1&nbsp;&nbsp;<a id="a-606670158" href="#Requirements">Requirements</a></li><li>2.2&nbsp;&nbsp;<a id="a-606672268" href="#Installation">Installation</a><ul><li>2.2.1&nbsp;&nbsp;<a id="a-606673868" href="#Using-RubyGems">Using RubyGems</a></li><li>2.2.2&nbsp;&nbsp;<a id="a-606675958" href="#From-Subversion">From Subversion</a></li></ul></li></ul></li><li>3&nbsp;&nbsp;<a id="a-606680748" href="#Usage">Usage</a><ul><li>3.1&nbsp;&nbsp;<a id="a-606682288" href="#Stand-Alone-rmmseg">Stand Alone rmmseg</a></li><li>3.2&nbsp;&nbsp;<a id="a-606684368" href="#Analyzer-for-Ferret">Analyzer for Ferret</a></li><li>3.3&nbsp;&nbsp;<a id="a-606690578" href="#Customization">Customization</a></li></ul></li><li>4&nbsp;&nbsp;<a id="a-606693198" href="#Resources">Resources</a></li></ul></div>
769
+
770
+ <div id="lof"><h1>Figures</h1> <ol><li><a id="a-606688358" href="#Ferret-Example-Screenshot">Ferret Example Screenshot</a></li></ol><h1>Notes</h1> <ol><li><a id="a-606677598" href="#The-latest-code-might-be-unstable">The latest code might be unstable</a></li></ol></div>
771
+
772
+ <div id="content">
773
+ <div class="chapter">
774
+ <h1 class="title">
775
+ Chapter
776
+ <a class="toc" id="Introduction" href="#a-606666518">1</a>
777
+
778
+ <br/>
779
+
780
+ <big>Introduction</big>
781
+ </h1>
782
+
783
+ <div class="content"><p>RMMSeg is an implementation of
784
+ <a href="http://technology.chtsai.org/mmseg/">MMSEG</a> Chinese word
785
+ segmentation algorithm. It is based on two variants of maximum
786
+ matching algorithms. Two algorithms are available for using:</p>
787
+
788
+
789
+ <ul>
790
+ <li>simple algorithm that uses only forward maximum matching.</li>
791
+ <li>complex algorithm that uses three-word chunk maximum matching and 3
792
+ aditonal rules to solve ambiguities.</li>
793
+ </ul>
794
+
795
+
796
+ <p>For more information about the algorithm, please refer to the
797
+ following essays:</p>
798
+
799
+
800
+ <ul>
801
+ <li>http://technology.chtsai.org/mmseg/</li>
802
+ <li>http://pluskid.lifegoo.com/?p=261</li>
803
+ </ul>
804
+
805
+
806
+ <p>RMMSeg can be used as either a stand alone program or an Analyzer of
807
+ <a href="http://ferret.davebalmain.com/trac">Ferret</a>.</p></div>
808
+ </div>
809
+ <div class="chapter">
810
+ <h1 class="title">
811
+ Chapter
812
+ <a class="toc" id="Setup" href="#a-606668658">2</a>
813
+
814
+ <br/>
815
+
816
+ <big>Setup</big>
817
+ </h1>
818
+
819
+ <div class="content"><div class="section">
820
+ <h2 class="title">
821
+ <a class="toc" id="Requirements" href="#a-606670158">2.1</a>&nbsp;&nbsp;Requirements
822
+ </h2>
823
+ <div class="content">Your system needs the following software to run RMMSeg.
824
+
825
+
826
+ <table border="1">
827
+ <tr>
828
+ <th>Software </th>
829
+ <th>Notes </th>
830
+ </tr>
831
+ <tr>
832
+ <td> <a href="http://ruby-lang.org">Ruby</a> </td>
833
+ <td> Version 1.8.x is required </td>
834
+ </tr>
835
+ <tr>
836
+ <td> <a href="http://seattlerb.rubyforge.org/hoe/">hoe</a> </td>
837
+ <td> If you want to build the gem manually </td>
838
+ </tr>
839
+ <tr>
840
+ <td> <a href="http://rake.rubyforge.org/">Rake</a> </td>
841
+ <td> If you want to build the gem manually </td>
842
+ </tr>
843
+ <tr>
844
+ <td> <a href="http://rspec.rubyforge.org/">rspec</a> </td>
845
+ <td> If you want to run the testcases </td>
846
+ </tr>
847
+ </table></div>
848
+ </div>
849
+ <div class="section">
850
+ <h2 class="title">
851
+ <a class="toc" id="Installation" href="#a-606672268">2.2</a>&nbsp;&nbsp;Installation
852
+ </h2>
853
+ <div class="content"><div class="section">
854
+ <h3 class="title">
855
+ <a class="toc" id="Using-RubyGems" href="#a-606673868">2.2.1</a>&nbsp;&nbsp;Using RubyGems
856
+ </h3>
857
+ <div class="content"><p>To install the gem remotely from <a href="http://rubyforge.org">RubyForge</a> :</p>
858
+
859
+
860
+ <pre>sudo gem install rmmseg</pre>
861
+
862
+
863
+ <p>Or you can download the gem file manually from <a href="http://rubyforge.org/projects/rmmseg/">RubyForge</a> and install it locally:</p>
864
+
865
+
866
+ <pre>sudo gem install --local rmmseg-x.y.z.gem</pre></div>
867
+ </div>
868
+ <div class="section">
869
+ <h3 class="title">
870
+ <a class="toc" id="From-Subversion" href="#a-606675958">2.2.2</a>&nbsp;&nbsp;From Subversion
871
+ </h3>
872
+ <div class="content"><p>From subversion repository hosted at <a href="http://rmmseg.rubyforge.org/svn/">RubyForge</a>, you can always get the latest source code.
873
+ <div class="note">
874
+ <p class="title"><a class="toc" id="The-latest-code-might-be-unstable" href="#a-606677598">Note 1</a>.&nbsp;&nbsp;The latest code might be unstable</p>
875
+
876
+ <img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAABHNCSVQICAgI
877
+ fAhkiAAAAAlwSFlzAAAN1wAADdcBQiibeAAAABl0RVh0U29mdHdhcmUAd3d3
878
+ Lmlua3NjYXBlLm9yZ5vuPBoAAAmCSURBVGiB1ZltbJXlGcd/9/N+Tlssh1Ja
879
+ hNEJaK2UFxFptqC2wGCSTWdM3DSQzWVDEzVxy4xfjNkHQ8xMnEEXviwZ62Rj
880
+ 6siWTcVBZeJA5yBQpOWlKNZSaEvpyyk95zwv970Pp+fY5/S0tFBkXsmTc67n
881
+ eu77+f/u6349Ryil+Cqbdq0FXKldEuClp+3nfvVz/eJLT9vPXQv/igFc13tq
882
+ w4/ujLqu99S18C9lRr6br7zyyi2WZb2eSqUqpVSUlE5BSmW8/PLL6sv2bdtu
883
+ TiaT9z/++ONN486AZVm/XLiw+qZlt98Wun/bsqUhf+nSJSF/ya2LQv6ixQtD
884
+ /sJFC0L+guqqkF91S2XIr6y6idlfm1Vp2/Yz+XSOCiCEqDJMQ3ze+hm6bnC+
885
+ sx9d1zl+vBlN1znf2Y+m6TQfa0LTvvCbmo6G/KNHj4T8Ix83hvzGI4dD/uHG
886
+ QyG/peUElmUJIURlPp2jAkgpiwXg+R5FM9dRv3U/heXrkFJSWHZ32i/7NlJK
887
+ CsrWUr91P9EZa5FSEildQ/3W/USmfwspJc701dRv3Y9dshopJda0VdRv3Y8V
888
+ W4mUEjO2kvqt+zCm1iGlxCiu5fe/24defBeu6yHSkopHAxD51oEtW7bEb1++
889
+ rLC5+ShBEKCU+uJCgWJCvlJD9/iinlw/o2O4b5omixct4fixkz2PPPJILB9A
890
+ 3kEspYzOmjWbxYuWjIiNtfBdjdjg4EWam44XjhYf0YU2b948xTTNoGRaSbjl
891
+ R2mlqx0rLCxC0zR98+bNdj6AERkIgiDmOI6nlDK/7NbOF1NKYdt2KplMxoCz
892
+ ufERAKZpxiKRSDC8wnfeeWfUl06W0FxbtWpV9tloNOoPDg6OD0BKGYtEIqGX
893
+ rV69etKFXio2vGs5jqOklOMbxJqmxaLRqBheSRAEo77sSoWOZkKI7DORSERo
894
+ mjY+AKVULBKJGMMB3n333Uum/XK35aOVq6ury2YiEonoSqnxAziOYw0HqKur
895
+ G5fIyR68mU/Hccyes8cq19cVbQB84Fh9Q/xgXgDTNGfYtq0NBxivkMmGAGg7
896
+ 3cTOV58xe9tPPvnwuodUxFX89cOd/vq6orX1DfG9IwB0XS+zbTs0kHbt2jVh
897
+ iMwscrkQrS2H+ce2TfSdPc2Dd9xN1cL7TK29E9rbWWBXWk9o+95eX1c0Jd9K
898
+ XGqaZghg5cqVExIyEaG51tL0IW9u2wRuijVrf4JXXIHR1oZoaIC2NjhzBtHb
899
+ S+lcTT9jBNPzAUyzLCvUBy9HyETLHm98nze3bWKw7xy1NZXcOG8JUmq0BgGt
900
+ paVoRUWUfPwxnb5Pl+/RJ6QAIvkGcXFuBq4mxNEDu3lr2yaUN8BdyyuZO/d2
901
+ hNBQ0sPrf43C0yvoqqjic9umdXCQ85akeT6plMUL9f+Mf5pvIZtiGAa5g3gy
902
+ IZRSNH74Nm/9cRO2HrByeSUVFRWAQAYpEj2nSF44hZI+F/tn4V0/n3hPJ+1V
903
+ ym+KKFcY4lXPU89A/r1QgWEY+L6fvbd79+4rhkg/Izlz8j+c/O/rlBRHufuO
904
+ BcyeNRsA6Q+S6G4h1XcaJdMLZ0evT3tXG/65VznU9j5eofqN5/N8/c7+9kyd
905
+ ofPAiy++GIlGo/ENGzborutOWOhosSDw+ehfb7Dzzy9QVjKFO5bdTHl5eTrm
906
+ xklcOEmq73NQEqmgvduj5axLR5/GRVdQd89G2uMFsqd3wH722Wf94XWHMhCJ
907
+ RGK2bbtAZDLmc99z+aDhT+x6/dfMmTWdH3znG5ROL03HUr0kzp/AjbcDikAq
908
+ Wrt8TrWn6IgbSC1K3b2PsbzuAQqLrmP79u2u0KypQNeoAL7vxxzH8XMHb0ND
909
+ w4Qh2k99RPO//0DlvNlsuL+WabFpAHiJbpLdx3EHOgBwfcVnnR4t7SnO9Qk0
910
+ q5Ablt7Lgz/+BZqmZ+uzbdsfGBiIjQkwtI1QuTNQbW3tJVt7uB14bwcfHdnB
911
+ xofvoTgaBQTuxXMkuk/gD3YDkHAVn55z+eRcis64QUnZXB7Y8DNuXnzXiNlv
912
+ aD+kDMMYsR8akQHTNIWU8rJnoAN7d7DrtefZ+MSjRGfMI/HpPuInd+EnewGI
913
+ JySnzrp81unRGRdU3LiUHz76JBU3LhkhOmNSShzHId+WOgSQTCZjlmWFdqIA
914
+ e/bsGdcs03ZiH62H/sJPH12PXXAd0r2IXVZFz5E36BkIaDnr0trl09kXUF6x
915
+ iOW191BYXM7p9j5Ot+8J1XXnnXeGYEzT1F3XHRsgCIKYZVlmLsCKFStGtMqw
916
+ Mvi+z4H3dtD1yXus+e59WJaBn+hBc+MoBUfaFM2nk1y4KKlaupq1teuZWjIT
917
+ wzDQdR3DMDAMA9M0s/UOFy+lxLZtw/f9sQE8z5tu27aR24UyhwshROiQ47ou
918
+ rutyaN/f2Pv3Lew9fJ7Dxzr4/pqv0xt36e4ZoOt8D6mUy7zFa6m5dR0FRVPx
919
+ lUFvby+maWJZFqZpZr9blkVmIR0OY5qm6XnetDEBlFLllmWNaO29e/eS8xy+
920
+ 7+P7Pm0nPuD4B9soKCxkzTdvoOC6Ms755TglMWZ+bSpzIsWYzhR0XSdQOslk
921
+ ksw7hBAIIdA0LfuZ2cbkvs80TaGUKh8TwPf9UsuykFKGIGpqanLLZVPbffNc
922
+ vvfQY3heGsjzPDzPy3at3Exqmoau6+i6jmmaGIaB4zg4joNhGKEsZ0xKyZCu
923
+ GZcCKMnsRIf3QV3XRwBkYmUzK0JAmSsIguz3fK2t6zqapqFp2og6873Hsiw8
924
+ z5s+JkAqlZqaycDlTKO5gsZTbjynvkwGXNedOiaA67pT8gFMBOJqxDIAqVRq
925
+ xI+8IYBEIlE4lKoJn4evZiwDkEgkinKfyeZ748aNpuu6pmma2QzkXplKv+yY
926
+ lBLTNEkmk86CBQus4QDZDDQ2Nl4/f/5817Isp7S0NDv3T6aNZzUfzXzfRwjh
927
+ K6XmCCHOAoNKKWkACCEKqqurpwkhZEtLC42NjZOledKsuroaTdOU67olwACg
928
+ CSEGDCGEBdiu6zq+75NMJkNL+v+LJRIJfN8nkUhEgAgQAK5BehzoPT098e7u
929
+ bj2VSjFnzpxrKjafSSm5cOEC/f39KUAnrVsTgAkUAYU1NTUPx2KxjUEQTLmW
930
+ YvOZpmnxjo6O3x48eHA76S4UB3qEUgohRBQoGLoyKTJJp+nyR97kmA1IwANS
931
+ wODQ1auUSmYP9UNjoQBwAIv0DBUMFb6WZpNuRB9wSYsfUEr5kOdfSiGETrr1
932
+ DUCQhrhWJkj394A0gKeUCjVo3r9Zv0r2P3yyQqPd16MPAAAAAElFTkSuQmCC
933
+ " alt="note" class="icon"/>
934
+
935
+ <div class="content">Some new features may only be available in the latest code in subversion, but the code might be broken in some cases. So it is recommended to use the released gem package for production.</div>
936
+ </div> To check out the code from Rubyforge, you need to install subversion, then:</p>
937
+
938
+
939
+ <pre>svn checkout http://rmmseg.rubyforge.org/svn/trunk/ rmmseg</pre>
940
+
941
+
942
+ <p>Then you can run</p>
943
+
944
+
945
+ <pre>rake gem</pre>
946
+
947
+
948
+ <p>to build the gem file.</p></div>
949
+ </div></div>
950
+ </div></div>
951
+ </div>
952
+ <div class="chapter">
953
+ <h1 class="title">
954
+ Chapter
955
+ <a class="toc" id="Usage" href="#a-606680748">3</a>
956
+
957
+ <br/>
958
+
959
+ <big>Usage</big>
960
+ </h1>
961
+
962
+ <div class="content"><div class="section">
963
+ <h2 class="title">
964
+ <a class="toc" id="Stand-Alone-rmmseg" href="#a-606682288">3.1</a>&nbsp;&nbsp;Stand Alone rmmseg
965
+ </h2>
966
+ <div class="content"><p>RMMSeg comes with a script <code class="code">rmmseg</code>. To get the basic usage, just execute it with <code class="code">-h</code> option:</p>
967
+
968
+
969
+ <pre>rmmseg -h</pre>
970
+
971
+
972
+ <p>It reads from STDIN and print result to STDOUT. Here is a real
973
+ example:</p>
974
+
975
+
976
+ <pre>$ echo "我们都喜欢用 Ruby" | rmmseg
977
+ 我们 都 喜欢 用 Ruby</pre></div>
978
+ </div>
979
+ <div class="section">
980
+ <h2 class="title">
981
+ <a class="toc" id="Analyzer-for-Ferret" href="#a-606684368">3.2</a>&nbsp;&nbsp;Analyzer for Ferret
982
+ </h2>
983
+ <div class="content"><p>RMMSeg include an analyzer for Ferret. It is simply ready to
984
+ use. Just require it and pass it to Ferret. Here&#8217;s a complete
985
+ example:</p>
986
+
987
+
988
+ <pre class="code" lang="ruby">
989
+ <span style="color:#888">#!/usr/bin/env ruby</span>
990
+ require <span style="background-color:#fff0f0"><span style="color:#710">'</span><span style="color:#D20">rubygems</span><span style="color:#710">'</span></span>
991
+ require <span style="background-color:#fff0f0"><span style="color:#710">'</span><span style="color:#D20">rmmseg</span><span style="color:#710">'</span></span>
992
+ require <span style="background-color:#fff0f0"><span style="color:#710">'</span><span style="color:#D20">rmmseg/ferret</span><span style="color:#710">'</span></span>
993
+
994
+ analyzer = <span style="color:#036; font-weight:bold">RMMSeg</span>::<span style="color:#036; font-weight:bold">Ferret</span>::<span style="color:#036; font-weight:bold">Analyzer</span>.new { |tokenizer|
995
+ <span style="color:#036; font-weight:bold">Ferret</span>::<span style="color:#036; font-weight:bold">Analysis</span>::<span style="color:#036; font-weight:bold">LowerCaseFilter</span>.new(tokenizer)
996
+ }
997
+
998
+ <span style="color:#d70; font-weight:bold">$index</span> = <span style="color:#036; font-weight:bold">Ferret</span>::<span style="color:#036; font-weight:bold">Index</span>::<span style="color:#036; font-weight:bold">Index</span>.new(<span style="color:#A60">:analyzer</span> =&gt; analyzer)
999
+
1000
+ <span style="color:#d70; font-weight:bold">$index</span> &lt;&lt; {
1001
+ <span style="color:#A60">:title</span> =&gt; <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#D20">分词</span><span style="color:#710">&quot;</span></span>,
1002
+ <span style="color:#A60">:content</span> =&gt; <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#D20">中文分词比较困难,不像英文那样,直接在空格和标点符号的地方断开就可以了。</span><span style="color:#710">&quot;</span></span>
1003
+ }
1004
+ <span style="color:#d70; font-weight:bold">$index</span> &lt;&lt; {
1005
+ <span style="color:#A60">:title</span> =&gt; <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#D20">RMMSeg</span><span style="color:#710">&quot;</span></span>,
1006
+ <span style="color:#A60">:content</span> =&gt; <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#D20">RMMSeg 我近日做的一个 Ruby 中文分词实现,下一步是和 Ferret 进行集成。</span><span style="color:#710">&quot;</span></span>
1007
+ }
1008
+ <span style="color:#d70; font-weight:bold">$index</span> &lt;&lt; {
1009
+ <span style="color:#A60">:title</span> =&gt; <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#D20">Ruby 1.9</span><span style="color:#710">&quot;</span></span>,
1010
+ <span style="color:#A60">:content</span> =&gt; <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#D20">Ruby 1.9.0 已经发布了,1.9 的一个重大改进就是对 Unicode 的支持。</span><span style="color:#710">&quot;</span></span>
1011
+ }
1012
+ <span style="color:#d70; font-weight:bold">$index</span> &lt;&lt; {
1013
+ <span style="color:#A60">:title</span> =&gt; <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#D20">Ferret</span><span style="color:#710">&quot;</span></span>,
1014
+ <span style="color:#A60">:content</span> =&gt; <span style="background-color:#fff0f0"><span style="color:#710">&lt;&lt;END</span></span><span style="background-color:#fff0f0"><span style="color:#D20">
1015
+ Ferret is a high-performance, full-featured text search engine library
1016
+ written for Ruby. It is inspired by Apache Lucene Java project. With
1017
+ the introduction of Ferret, Ruby users now have one of the fastest and
1018
+ most flexible search libraries available. And it is surprisingly easy
1019
+ to use.</span><span style="color:#710">
1020
+ END</span></span>
1021
+ }
1022
+
1023
+ <span style="color:#080; font-weight:bold">def</span> <span style="color:#06B; font-weight:bold">highlight_search</span>(key)
1024
+ <span style="color:#d70; font-weight:bold">$index</span>.search_each(<span style="background-color:#fff0f0"><span style="color:#710">%Q!</span><span style="color:#D20">content:&quot;</span><span style="background: #eee"><span style="font-weight: bold; color: #888">#{</span>key<span style="font-weight: bold; color: #888">}</span></span><span style="color:#D20">&quot;</span><span style="color:#710">!</span></span>) <span style="color:#080; font-weight:bold">do</span> |id, score|
1025
+ puts <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#D20">*** Document </span><span style="color:#04D">\&quot;</span><span style="background: #eee"><span style="font-weight: bold; color: #888">#{</span><span style="color:#d70; font-weight:bold">$index</span>[id][<span style="color:#A60">:title</span>]<span style="font-weight: bold; color: #888">}</span></span><span style="color:#04D">\&quot;</span><span style="color:#D20"> found with a score of </span><span style="background: #eee"><span style="font-weight: bold; color: #888">#{</span>score<span style="font-weight: bold; color: #888">}</span></span><span style="color:#710">&quot;</span></span>
1026
+ puts <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#D20">-</span><span style="color:#710">&quot;</span></span>*<span style="color:#00D; font-weight:bold">40</span>
1027
+ highlights = <span style="color:#d70; font-weight:bold">$index</span>.highlight(<span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#D20">content:</span><span style="background: #eee"><span style="font-weight: bold; color: #888">#{</span>key<span style="font-weight: bold; color: #888">}</span></span><span style="color:#710">&quot;</span></span>, id,
1028
+ <span style="color:#A60">:field</span> =&gt; <span style="color:#A60">:content</span>,
1029
+ <span style="color:#A60">:pre_tag</span> =&gt; <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#04D">\033</span><span style="color:#D20">[36m</span><span style="color:#710">&quot;</span></span>,
1030
+ <span style="color:#A60">:post_tag</span> =&gt; <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#04D">\033</span><span style="color:#D20">[m</span><span style="color:#710">&quot;</span></span>)
1031
+ puts <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="background: #eee"><span style="font-weight: bold; color: #888">#{</span>highlights<span style="font-weight: bold; color: #888">}</span></span><span style="color:#710">&quot;</span></span>
1032
+ puts <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#710">&quot;</span></span>
1033
+ <span style="color:#080; font-weight:bold">end</span>
1034
+ <span style="color:#080; font-weight:bold">end</span>
1035
+
1036
+ <span style="color:#038; font-weight:bold">ARGV</span>.each { |key|
1037
+ puts <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#04D">\033</span><span style="color:#D20">[33mSearching for </span><span style="background: #eee"><span style="font-weight: bold; color: #888">#{</span>key<span style="font-weight: bold; color: #888">}</span></span><span style="color:#D20">...</span><span style="color:#04D">\033</span><span style="color:#D20">[m</span><span style="color:#710">&quot;</span></span>
1038
+ puts <span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#710">&quot;</span></span>
1039
+ highlight_search(key)
1040
+ }
1041
+
1042
+ <span style="color:#888"># Local Variables:</span>
1043
+ <span style="color:#888"># coding: utf-8</span>
1044
+ <span style="color:#888"># End:</span>
1045
+ </pre>
1046
+
1047
+
1048
+ <p>execute it on the following key words:</p>
1049
+
1050
+
1051
+ <pre>$ ruby ferret_example.rb Ruby 中文</pre>
1052
+
1053
+
1054
+ <p>will generate the following results:</p>
1055
+
1056
+
1057
+ <pre class="code" lang="text">
1058
+ Searching for Ruby...
1059
+
1060
+ *** Document &quot;RMMSeg&quot; found with a score of 0.21875
1061
+ ----------------------------------------
1062
+ RMMSeg 我近日做的一个 Ruby 中文分词实现,下一步是和 Ferret 进行集成。
1063
+
1064
+ *** Document &quot;Ruby 1.9&quot; found with a score of 0.21875
1065
+ ----------------------------------------
1066
+ Ruby 1.9.0 已经发布了,1.9 的一个重大改进就是对 Unicode 的支持。
1067
+
1068
+ *** Document &quot;Ferret&quot; found with a score of 0.176776692271233
1069
+ ----------------------------------------
1070
+ Ferret is a high-performance, full-featured text search engine library
1071
+ written for Ruby. It is inspired by Apache Lucene Java project. With
1072
+ the introduction of Ferret, Ruby users now have one of the fastest and
1073
+ most flexible search libraries available. And it's surprisingly easy
1074
+ to use.
1075
+
1076
+ Searching for 中文...
1077
+
1078
+ *** Document &quot;分词&quot; found with a score of 0.281680464744568
1079
+ ----------------------------------------
1080
+ 中文分词比较困难,不像英文那样,直接在空格和标点符号的地方断开就可以了。
1081
+
1082
+ *** Document &quot;RMMSeg&quot; found with a score of 0.281680464744568
1083
+ ----------------------------------------
1084
+ RMMSeg 我近日做的一个 Ruby 中文分词实现,下一步是和 Ferret 进行集成。
1085
+ </pre>
1086
+
1087
+
1088
+ <p>And if you run the example in terminal, you&#8217;ll see the result
1089
+ highlighted as in <a class="xref" href="#Ferret-Example-Screenshot">Figure 1: <em>Ferret Example Screenshot</em></a>.</p>
1090
+
1091
+
1092
+ <p><div class="figure">
1093
+ <p class="title"><a class="toc" id="Ferret-Example-Screenshot" href="#a-606688358">Figure 1</a>.&nbsp;&nbsp;Ferret Example Screenshot</p>
1094
+ <div class="content"><img src="http://pluskid.lifegoo.com/wp-content/uploads/2008/02/rmmseg.png" alt="" /></div>
1095
+ </div></p></div>
1096
+ </div>
1097
+ <div class="section">
1098
+ <h2 class="title">
1099
+ <a class="toc" id="Customization" href="#a-606690578">3.3</a>&nbsp;&nbsp;Customization
1100
+ </h2>
1101
+ <div class="content"><p>RMMSeg can be customized through <code class="code"><span style="color:#036; font-weight:bold">RMMSeg</span>::<span style="color:#036; font-weight:bold">Config</span></code>. For example, to use your own dictionaries, just set it before starting to do segmentation:</p>
1102
+
1103
+
1104
+ <pre class="code" lang="ruby">
1105
+ <span style="color:#036; font-weight:bold">RMMSeg</span>::<span style="color:#036; font-weight:bold">Config</span>.dictionaries = [[<span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#D20">dict1.dic</span><span style="color:#710">&quot;</span></span>, <span style="color:#038; font-weight:bold">true</span>], <span style="color:#888"># with frequency info</span>
1106
+ [<span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#D20">dict2.dic</span><span style="color:#710">&quot;</span></span>, <span style="color:#038; font-weight:bold">false</span>], <span style="color:#888"># without</span>
1107
+ [<span style="background-color:#fff0f0"><span style="color:#710">&quot;</span><span style="color:#D20">dict3.dic</span><span style="color:#710">&quot;</span></span>, <span style="color:#038; font-weight:bold">false</span>]]
1108
+ <span style="color:#036; font-weight:bold">RMMSeg</span>::<span style="color:#036; font-weight:bold">Config</span>.max_word_length = <span style="color:#00D; font-weight:bold">6</span>
1109
+ </pre>
1110
+
1111
+
1112
+ <p>Or to use the simple algorithm for more efficient (and less accurate) segmenting:</p>
1113
+
1114
+
1115
+ <pre class="code">
1116
+ <span style="color:#036; font-weight:bold">RMMSeg</span>::<span style="color:#036; font-weight:bold">Config</span>.algorithm = <span style="color:#A60">:simple</span>
1117
+ </pre>
1118
+
1119
+
1120
+ <p>For more information on customization, please refer to the RDoc of <a href="http://rmmseg.rubyforge.org/rmmseg/index.html">RMMSeg::Config</a>.</p></div>
1121
+ </div></div>
1122
+ </div>
1123
+ <div class="chapter">
1124
+ <h1 class="title">
1125
+ Chapter
1126
+ <a class="toc" id="Resources" href="#a-606693198">4</a>
1127
+
1128
+ <br/>
1129
+
1130
+ <big>Resources</big>
1131
+ </h1>
1132
+
1133
+ <div class="content"><ul>
1134
+ <li><a href="http://rubyforge.org/projects/rmmseg/">Project Home</a>: The Project page at RubyForge.</li>
1135
+ <li><a href="http://rmmseg.rubyforge.org/rmmseg/index.html">RDoc of RMMSeg</a>: The auto generated rdoc of RMMSeg.</li>
1136
+ <li><a href="http://pluskid.lifegoo.com/?p=272">A Screencast</a>: Demo of Ferret RMMSeg and acts_as_ferret.</li>
1137
+ <li><a href="http://pluskid.lifegoo.com/?p=261">Implementation Details</a>: My blog post about the implementation details of RMMSeg (Chinese).</li>
1138
+ <li><a href="mailto:pluskid@gmail.com">Author&#8217;s Email</a>: Contact me if you have any problem.</li>
1139
+ </ul></div>
1140
+ </div></div>
1141
+
1142
+
1143
+ <br style="display: none"/>
1144
+ <hr style="display: none"/>
1145
+ <br style="display: none"/>
1146
+
1147
+
1148
+ <div id="footer">
1149
+
1150
+ Generated on Wed Feb 06 11:37:39 -0800 2008 by <a href="http://gerbil.rubyforge.org">Gerbil</a> 2.0.0.
1151
+
1152
+ <p>The admonition icons (<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAABHNCSVQICAgI
1153
+ fAhkiAAAAAlwSFlzAAAN1wAADdcBQiibeAAAABl0RVh0U29mdHdhcmUAd3d3
1154
+ Lmlua3NjYXBlLm9yZ5vuPBoAAAmCSURBVGiB1ZltbJXlGcd/9/N+Tlssh1Ja
1155
+ hNEJaK2UFxFptqC2wGCSTWdM3DSQzWVDEzVxy4xfjNkHQ8xMnEEXviwZ62Rj
1156
+ 6siWTcVBZeJA5yBQpOWlKNZSaEvpyyk95zwv970Pp+fY5/S0tFBkXsmTc67n
1157
+ eu77+f/u6349Ryil+Cqbdq0FXKldEuClp+3nfvVz/eJLT9vPXQv/igFc13tq
1158
+ w4/ujLqu99S18C9lRr6br7zyyi2WZb2eSqUqpVSUlE5BSmW8/PLL6sv2bdtu
1159
+ TiaT9z/++ONN486AZVm/XLiw+qZlt98Wun/bsqUhf+nSJSF/ya2LQv6ixQtD
1160
+ /sJFC0L+guqqkF91S2XIr6y6idlfm1Vp2/Yz+XSOCiCEqDJMQ3ze+hm6bnC+
1161
+ sx9d1zl+vBlN1znf2Y+m6TQfa0LTvvCbmo6G/KNHj4T8Ix83hvzGI4dD/uHG
1162
+ QyG/peUElmUJIURlPp2jAkgpiwXg+R5FM9dRv3U/heXrkFJSWHZ32i/7NlJK
1163
+ CsrWUr91P9EZa5FSEildQ/3W/USmfwspJc701dRv3Y9dshopJda0VdRv3Y8V
1164
+ W4mUEjO2kvqt+zCm1iGlxCiu5fe/24defBeu6yHSkopHAxD51oEtW7bEb1++
1165
+ rLC5+ShBEKCU+uJCgWJCvlJD9/iinlw/o2O4b5omixct4fixkz2PPPJILB9A
1166
+ 3kEspYzOmjWbxYuWjIiNtfBdjdjg4EWam44XjhYf0YU2b948xTTNoGRaSbjl
1167
+ R2mlqx0rLCxC0zR98+bNdj6AERkIgiDmOI6nlDK/7NbOF1NKYdt2KplMxoCz
1168
+ ufERAKZpxiKRSDC8wnfeeWfUl06W0FxbtWpV9tloNOoPDg6OD0BKGYtEIqGX
1169
+ rV69etKFXio2vGs5jqOklOMbxJqmxaLRqBheSRAEo77sSoWOZkKI7DORSERo
1170
+ mjY+AKVULBKJGMMB3n333Uum/XK35aOVq6ury2YiEonoSqnxAziOYw0HqKur
1171
+ G5fIyR68mU/Hccyes8cq19cVbQB84Fh9Q/xgXgDTNGfYtq0NBxivkMmGAGg7
1172
+ 3cTOV58xe9tPPvnwuodUxFX89cOd/vq6orX1DfG9IwB0XS+zbTs0kHbt2jVh
1173
+ iMwscrkQrS2H+ce2TfSdPc2Dd9xN1cL7TK29E9rbWWBXWk9o+95eX1c0Jd9K
1174
+ XGqaZghg5cqVExIyEaG51tL0IW9u2wRuijVrf4JXXIHR1oZoaIC2NjhzBtHb
1175
+ S+lcTT9jBNPzAUyzLCvUBy9HyETLHm98nze3bWKw7xy1NZXcOG8JUmq0BgGt
1176
+ paVoRUWUfPwxnb5Pl+/RJ6QAIvkGcXFuBq4mxNEDu3lr2yaUN8BdyyuZO/d2
1177
+ hNBQ0sPrf43C0yvoqqjic9umdXCQ85akeT6plMUL9f+Mf5pvIZtiGAa5g3gy
1178
+ IZRSNH74Nm/9cRO2HrByeSUVFRWAQAYpEj2nSF44hZI+F/tn4V0/n3hPJ+1V
1179
+ ym+KKFcY4lXPU89A/r1QgWEY+L6fvbd79+4rhkg/Izlz8j+c/O/rlBRHufuO
1180
+ BcyeNRsA6Q+S6G4h1XcaJdMLZ0evT3tXG/65VznU9j5eofqN5/N8/c7+9kyd
1181
+ ofPAiy++GIlGo/ENGzborutOWOhosSDw+ehfb7Dzzy9QVjKFO5bdTHl5eTrm
1182
+ xklcOEmq73NQEqmgvduj5axLR5/GRVdQd89G2uMFsqd3wH722Wf94XWHMhCJ
1183
+ RGK2bbtAZDLmc99z+aDhT+x6/dfMmTWdH3znG5ROL03HUr0kzp/AjbcDikAq
1184
+ Wrt8TrWn6IgbSC1K3b2PsbzuAQqLrmP79u2u0KypQNeoAL7vxxzH8XMHb0ND
1185
+ w4Qh2k99RPO//0DlvNlsuL+WabFpAHiJbpLdx3EHOgBwfcVnnR4t7SnO9Qk0
1186
+ q5Ablt7Lgz/+BZqmZ+uzbdsfGBiIjQkwtI1QuTNQbW3tJVt7uB14bwcfHdnB
1187
+ xofvoTgaBQTuxXMkuk/gD3YDkHAVn55z+eRcis64QUnZXB7Y8DNuXnzXiNlv
1188
+ aD+kDMMYsR8akQHTNIWU8rJnoAN7d7DrtefZ+MSjRGfMI/HpPuInd+EnewGI
1189
+ JySnzrp81unRGRdU3LiUHz76JBU3LhkhOmNSShzHId+WOgSQTCZjlmWFdqIA
1190
+ e/bsGdcs03ZiH62H/sJPH12PXXAd0r2IXVZFz5E36BkIaDnr0trl09kXUF6x
1191
+ iOW191BYXM7p9j5Ot+8J1XXnnXeGYEzT1F3XHRsgCIKYZVlmLsCKFStGtMqw
1192
+ Mvi+z4H3dtD1yXus+e59WJaBn+hBc+MoBUfaFM2nk1y4KKlaupq1teuZWjIT
1193
+ wzDQdR3DMDAMA9M0s/UOFy+lxLZtw/f9sQE8z5tu27aR24UyhwshROiQ47ou
1194
+ rutyaN/f2Pv3Lew9fJ7Dxzr4/pqv0xt36e4ZoOt8D6mUy7zFa6m5dR0FRVPx
1195
+ lUFvby+maWJZFqZpZr9blkVmIR0OY5qm6XnetDEBlFLllmWNaO29e/eS8xy+
1196
+ 7+P7Pm0nPuD4B9soKCxkzTdvoOC6Ms755TglMWZ+bSpzIsWYzhR0XSdQOslk
1197
+ ksw7hBAIIdA0LfuZ2cbkvs80TaGUKh8TwPf9UsuykFKGIGpqanLLZVPbffNc
1198
+ vvfQY3heGsjzPDzPy3at3Exqmoau6+i6jmmaGIaB4zg4joNhGKEsZ0xKyZCu
1199
+ GZcCKMnsRIf3QV3XRwBkYmUzK0JAmSsIguz3fK2t6zqapqFp2og6873Hsiw8
1200
+ z5s+JkAqlZqaycDlTKO5gsZTbjynvkwGXNedOiaA67pT8gFMBOJqxDIAqVRq
1201
+ xI+8IYBEIlE4lKoJn4evZiwDkEgkinKfyeZ748aNpuu6pmma2QzkXplKv+yY
1202
+ lBLTNEkmk86CBQus4QDZDDQ2Nl4/f/5817Isp7S0NDv3T6aNZzUfzXzfRwjh
1203
+ K6XmCCHOAoNKKWkACCEKqqurpwkhZEtLC42NjZOledKsuroaTdOU67olwACg
1204
+ CSEGDCGEBdiu6zq+75NMJkNL+v+LJRIJfN8nkUhEgAgQAK5BehzoPT098e7u
1205
+ bj2VSjFnzpxrKjafSSm5cOEC/f39KUAnrVsTgAkUAYU1NTUPx2KxjUEQTLmW
1206
+ YvOZpmnxjo6O3x48eHA76S4UB3qEUgohRBQoGLoyKTJJp+nyR97kmA1IwANS
1207
+ wODQ1auUSmYP9UNjoQBwAIv0DBUMFb6WZpNuRB9wSYsfUEr5kOdfSiGETrr1
1208
+ DUCQhrhWJkj394A0gKeUCjVo3r9Zv0r2P3yyQqPd16MPAAAAAElFTkSuQmCC
1209
+ " alt="note"/>) used in this document are Copyright &copy; 2005 <a href="http://tango.freedesktop.org">Tango Desktop Project</a>. They are part of the <a href="http://tango.freedesktop.org/Tango_Icon_Library">Tango Icon Theme</a> set, which is distributed under the <a href="http://creativecommons.org/licenses/by-sa/2.5/">Creative Commons Attribution-ShareAlike 2.5 License Agreement</a>.</p>
1210
+
1211
+ </div>
1212
+
1213
+ </body>
1214
+ </html>