rmmseg-cpp-traditional 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/.gitignore +17 -0
  2. data/Gemfile +4 -0
  3. data/History.txt +21 -0
  4. data/LICENSE.txt +22 -0
  5. data/Manifest.txt +43 -0
  6. data/README +111 -0
  7. data/README.md +29 -0
  8. data/Rakefile +19 -0
  9. data/bin/rmmseg +63 -0
  10. data/data/chars.dic +12638 -0
  11. data/data/words.dic +120308 -0
  12. data/ext/rmmseg/algor.cpp +222 -0
  13. data/ext/rmmseg/algor.h +80 -0
  14. data/ext/rmmseg/chunk.h +59 -0
  15. data/ext/rmmseg/dict.cpp +230 -0
  16. data/ext/rmmseg/dict.h +34 -0
  17. data/ext/rmmseg/extconf.rb +17 -0
  18. data/ext/rmmseg/memory.cpp +9 -0
  19. data/ext/rmmseg/memory.h +43 -0
  20. data/ext/rmmseg/rmmseg.cpp +263 -0
  21. data/ext/rmmseg/rules.h +86 -0
  22. data/ext/rmmseg/token.h +19 -0
  23. data/ext/rmmseg/word.h +44 -0
  24. data/lib/rmmseg/dictionary.rb +59 -0
  25. data/lib/rmmseg/ferret.rb +64 -0
  26. data/lib/rmmseg-cpp-traditional/version.rb +7 -0
  27. data/lib/rmmseg-cpp-traditional.rb +9 -0
  28. data/lib/rmmseg.rb +3 -0
  29. data/misc/convert.rb +114 -0
  30. data/misc/ferret_example.rb +59 -0
  31. data/misc/homepage.erb +196 -0
  32. data/misc/homepage.html +1212 -0
  33. data/rmmseg-cpp-traditional.gemspec +19 -0
  34. data/spec/rmmseg_spec.rb +8 -0
  35. data/spec/spec_helper.rb +17 -0
  36. data/tasks/ann.rake +81 -0
  37. data/tasks/bones.rake +21 -0
  38. data/tasks/gem.rake +126 -0
  39. data/tasks/git.rake +41 -0
  40. data/tasks/homepage.rake +15 -0
  41. data/tasks/manifest.rake +49 -0
  42. data/tasks/notes.rake +28 -0
  43. data/tasks/post_load.rake +39 -0
  44. data/tasks/rdoc.rake +51 -0
  45. data/tasks/rubyforge.rake +58 -0
  46. data/tasks/setup.rb +268 -0
  47. data/tasks/spec.rake +55 -0
  48. data/tasks/svn.rake +48 -0
  49. data/tasks/test.rake +38 -0
  50. data/test/test_rmmseg.rb +0 -0
  51. metadata +116 -0
@@ -0,0 +1,1212 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
2
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3
+ <html xmlns="http://www.w3.org/1999/xhtml">
4
+ <head>
5
+ <meta http-equiv="content-type" content="text/html; charset=utf-8"/>
6
+ <meta name="date" content="10 September 2011"/>
7
+ <meta name="author" content="pluskid"/>
8
+ <meta name="generator" content="Gerbil 3.1.0"/>
9
+ <title>rmmseg-cpp Homepage</title>
10
+
11
+ <style type="text/css" media="all">
12
+ <!--
13
+ .icon-warning {
14
+ display: list-item;
15
+ list-style-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAN1wAADdcBQiibeAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAAlSSURBVGiB1ZpJjFzFGcd/31t6ncWzeHrGG15JwBAcjAGDkwtJDPIgnAgh+UAkDlzgFHHIzQeOUXKJAheUE1IQEoqMMiZBkSMQToxkx2DMImMbg5cZz7jbs3W/3t6rL4funnm9zoxnEiklleq9qn9X/f9ffVWvlhZV5f85OGtSi4iMwT4LDtsi9wObDaQC1X4AW+S2BZPAtUD1vIFjo3CaNbCerKaOMZEnbMt6XlWf6Uml7M337U50pYbsSFcXkZ4u3HgCTEDZ8yhls5Tm5pmfmAiufXXBm7+VDkTk3cCYN0dVT/xPBYyJ7LdFXutav37Xzr0PJgfuuVvcaAymZyCbhVIZiiUolys/cF2IuhCJQFcSeropFwukL3ytl8+ey2VvpS8Gqi+Pqp76rwo4JrIjKvK629tzYM+PHo8P3LtbyNyGTAbyhWXVsdBaPAYDfWhvN5kLF/Szkx/ny7NzJ4uqLx1WvbzmAo6LHLQc553d+x9ObHporyXzOZi4CYFZHuEWeaoKtgXDKehNcP30WfPVx2c84/vPHlJ9f80EHBd5xU0kXn300MFEd/8g3JgA31826fB7rb0mjOMgG4bI3r7F6b+e8Mpe/ugh1d+tWsB7Im90p4aO7Hvyp8moWjB1a0nC4bxw/e2EhfMlNUjJ8jn73onc3FT6rVHVFzvxszoVHhd5pTs1dOSxp59KRr1SHXkNxcY8o4qqVtIG3AKmRWqAYDKN7QU8dOgnye6hwSPHRV7pxLFtDxwXOegmEn/+8c9HE9GCDzmvvQVXaOVWedWKFsuSCfy48PGxv3m+l/9FuzHRUsAxkR1Rx/n08aef6uqyXMh5bUl3dJ8O5Bc5N4tfyFnXhVfMcvr4iWzZD/a0mp1aulBU5PV7H9mb6HIctGr5dq5Ra7CTazS5UUM9phFfLdfpeeJujLv3PZCIirzeimuTgDGR/W5vz4GNO7ZZZi7X1Fg7X25HOky4kXQdtlpOqB0DmBmP1PYtltOdPDAmsn9JAbbIa/c9tCeumRkUWdEA7GTldqSpietgBDOdY9fe++OWyGsdBYyJPJFcP7hrIDUkWvLrLbGElVnCymHXCJNeFqZkWDfUL4nBvl1jIk+0FWBb1vNbv7cjqdNzqGW3dZGWrrGElRtdY8meEEFFqqmFPznNxp13JW14Psx5cTktIiryTP+mDWImp8GNdJ7mGtKFZ1UQWXwXaY0Jl4XxjRioCCkpvZtSYkSeQURq09dCD4zBvq71A7bjB6jjoiILDbS0imWhlgWWhVbzqeUvWE7aYyyrqT5aYaq/JZ7AKvskBvvsMdhX422FHg4P37U5ofO5yo9rhMMVhUmH3aCKa8JUDbCAEWmJQaQyBjoYCNfFvzXD4OaRBHC4yYVE5P5kb7etBtSxmt3n7bfrXqUhXevgPfccsOiGiBDki8R7k7ZV2fUBYQGw2Y3HMEXT5JNrRVSN1lUkSPuKLat5vBnFiUYANtdgCwIMpNx4FC0U6gdeVcxqBKhRjJqWW2ARwbIsROpbMA3vlZWA4CSiKKSaBaj2O7EoahVbzgodl60dQhAEHffuqkoQBItCar0SMmItNV4eNxrBVA8L6gTUKlPXreu+Ow1qlMAEy8fXhCBYdmVCaCrPZuumVwjPQiK3S4UCanRxKgvFlQQTmBWRryNKRUjx3DmCbHaRg+9jTEC5WMQSuV3Dh3tgspwvDNuBDRK9ox5YqdU7hXImQzmTQZJJrL4+/JuTWF0xSl4BKmdMQP0sdK2Yzz8QlejCtLUcESJSGaTGYLTzBn8lYWHJks3iZ7MAWNFuyoUiAteaBASq57Mzc08lYzFbInEkHl+oCKDwwgtNgky+gP/dt5THxyEIlr3UuBMMgNWbJD+XDYzq+YW8UPmx9NVxz1rfT/Gzz1Bj6r6aC+PCGPx0msLnn5M/+RGlq1dR3++4oDO1FWo7TLW8E8YAdqqP6WuTHnCsqQdG4fTx9HTgOxZqfLwPPsAeHEQGBiAwaKmI8TyCdBoNgtaWW8KCq8FIMoaJ2OTTs8EonG4SgKoi8u7Mjclfdo8MSnDpGuWpKZia6rhvrdsPrALTSZgC0eF+5m+kVeDdMKG6yVbhzfFL3+WcrRswlnTeUYW7fAnXWA6mk/sgQmTrMLeu3MgpvBnmXCdgVPVEPj19cTY9rc7Wjc2bmQ4NrRbTSby7bZhsZlYL6dmLjSfZTSsEo/ryN598mXd2bgLXWZMBqHQWZmg48QgLcx0iOzcwfv5SHni5kW+TgFHVU8G8dzLz3bhxd29f2n2WYcFluVgL0QaI37uFmauTxp/zTrY6fm+5RiupvvTtv7/wyskYzo5NS7pGeEO+li4W3T6C6Y5x89OLng8vteLaUsBh1csSmGcvfnjGs7aOYA31LWnBVblYC4yb6sPdluLqR+c9Ccyz7e4M2q6SD6m+H3iFo5f/+UkuumcXdqr/ztznDjBuqo/4nu1cP/VFzuSLRzvdFSx5vP4XkTcSg+uObDuwJ+l/M07h8g2g+YPTlLdCDFVR8R0jRLYPc+PUl7l8evatp5c4Xl/2BYediL267cc/TFjzebwvrqCl8pp+bcV1SO6+C7pjXD953jP50tpccIREHFTbemfDnrsT67akrMKlcYpXJjBmcQV6J0sELIvYthTxnSPMXZsyU+e+8SQwa3vFBCAizm/g+7vg97HuxKMbfrAzlhjolcKVCUoTGYJcoZko7d3HTsZwh/uIb0+Rz8zrzfNXCt68d+ZD+PUf4GugBBRVtbQqASLiAjEgWkt/BY88Bkd7Bnu3DGwbiXWNDApln9LEbfzZHEGxhMkXCYqVa1aJuljRCFbMxe5NEh3pQ10HbzyjmW8nCrOZ+Rt/h9/+Ec4CBaDYkBZUtbhiASLiAPEq8VjDc+xF2P8g/GwYHokN9NjrNg5Goj1Jy4lFsGMRnJiLAkGhjF8o4ueLFOZyZmY8Uy5m5s11OPsxfPgnOFMjCuSrxPOh97YiViWglmdD/Em451F4eD1sTkB/DLojFTxFKOQhl4WZKRj/F3z6D7hoQuSWISCrqk371eW4kFUlXiMfbUgj1edIKLqAXY1SjQoE1ViuxlIoFkLpgusAWSrWb7lfXckglhDZeDV1Q4RdKvsLJ0S+9qGsCaitOgLAbyGmDHhUra6q5SV53emfPaqCrBDpMPlaalVjI/la9EMCgnZW7hT+A5SLlrQmK/qkAAAAAElFTkSuQmCC);
16
+ list-style-position: inside;
17
+ }
18
+ .icon-tip {
19
+ display: list-item;
20
+ list-style-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAN1wAADdcBQiibeAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAAyOSURBVGiB1ZlrkB3Fdcf/5/Q87r1z7z7uvrSrXa3QC7EIyUFCSWwnJIANFiGJXdQSICRllGDni4LjqviDnQ+ufHEKx2BXygg9cEwVriBcMsZOHFlEWCo/glFhrAeSxe6ifWnvPu7evc+Z6Z6ezoe7S7aUQO1qF5N01anpmaqZ/v/6nNMzfYaMMfj/3Pj9FrDSZq3Wg4iIPvnIX+8G2fcS2R+BiTtMHGUBgNiaBfGkMeo4jPr2Nw589edmlVxPq/Gchx/5m1uJnWdT6aZMz+ZdXsfaXuF5KXipBAQDoQwR+D5yV0b14BuvVstz+bKJ5YNPH/jKyfcVYO/ez2ViQV9zk8l7b761P93a1oF8SWKm7KMaSgRSARQj5QpkkjZ6Wj30tnuYmp7F8RefrVbL1edZm32HD/9D+dcOsPeRz3wMZD/TtemW9I07b01MFBRGpipoyhAa04yGlECjJ5BwGLUwQiVQmJ4LMFUM0NfTgh3rszh58sfh+dOnyjDqzw4fePwHvzaAhz/92ftsJ3V4+4fv87zGdlwar8JLxtix0UM2YwEwMCYGMcWAMXE8f8kASsf4xeCMqYWab9veI6rlCr773D/XfL/08NP7//G59xzgk488eguR+/KuO//CC+IU8mWJD2xKYG2rM69Sg5g0DAwRYgMYoJ6yxhgYA4SRxqlzkzqUWuza3OG0p23+5pNPVI0Jf/8bB554dTl6lrWM7tu3zyVyv7d+58c9djLIFUL83o4M1rY6MEYDiGMmigiImBAxEDGRYpASzFIwSyZSjhDRTb3N2rbYf21wqmY7Tvw7d9+XInK+t2/fPvc9AygH9qNeS29mXe8GDOVq+OCNHlIJQhxrACYGjAYjYkAzkyImxYSImRUBigiKmSQTVFtjQids4duC/RNnRyu/u/MG07FuU6Yc2I++JwD9/V90mPgLfbfcmRqe8nF9t4uWRgs6jmBgYiJEND/7xKQIpJigBJEkMkrMe4JBipmkxayyDa50LAp0ZPyTb4zXHry/PyHY+kJ//xedVQfwGoq3NbX3xkakUKgodLfai2YeGoCuhw7Vw4ahBLGszzhLYpJCsGRBUjBLIUimXVs5tvBti/3R6XJZG9ab+7YZr6F426oDsO0+0LZuW2auGsFLxnAcQMcRANJEiJhYEUERkySGYqqHEJFRgiGZIJkQMkEKppAJUkZxxWEObIsD2+Lwrcmi/4GbdyXZth9YdQADfKi5pYuKVYmWRp6ffWgg1kSIQCYipohBiogWYl0K5lAwSUuQZKK6eCYZxybQ2lRsWwS2zaEtOBjLV8qb1/cQAR9cqq4lfwuR1m1uIoUwKkOQQGxiI5g0EUcEiogQzcd9RIBiQgQiTfXQqr8JBIwxBJiYQqVDx6aAYjIUsQGAmlTsJV0ycdy2VF1L8kB/f78gMp5hG0CE2WoNhLpwABEIEYEVs1FEJAWRIiJpEYUWc8jMoRAc2IICixGaGP5sReZsmwNHiMAWHNjMoS04jA1iwZTu7+8XqwZw5MgRHRsTCY5AHKEShFrHWhNBA0bDIALqHqgvlyQFkWRBITOFFiMgQkigEKBgphiMAKbqChHYdj3+LebQEiK0BCOOY33kyBG9agAAQGQVfL+KpEtgoujyVCUEEDEoYmYl3k5cSCEoZIGQASnIhIIptJgCFuSXamokMig4lvBtiwNbcGBZFNoWhdm0wxU/jEBcXKqupe8HmC+WSzMd6WQGJR9qYtb3u1o8ZDOuJsQRIBQR1VccQNbXf1JMFIGgZaQLs2U5o1SsHYutiIwmig3AgDYcW0a0N3n2bLEUgnh8ybKWDBAF3yrnBmtNKUcL4kgIigaulEpXZqolP4z9G9c1re9tTXWnHJEAQRJRoGNTVHGcmyvLC8WSHEQcV9akwts3NslPuRYHjuDQEhQKi6UlWGbTicTY6KihWL2w6gDKEi9Mjf4qziaNSbpWLIgiY4ycLgbFoVxpohqqCgkRrW3xNq/NJn/DFXGTDe0ZFTa5HHa3efrDG9qsvWtas31uwpu1hAhsiwNLCGkJlgnbitZmvc4zp19ho6NvrzrAM08+NqXC4DMXXv1RsLWzWRCTZqZICERCIHprovymiuLayTMT3xzP106lHCvdmLG3dDQld63Jenc0pJPX+YE8z0IMGTt53LI4FBZJS0BagmVfT8vWgYFBNTw6+tyhQ189t1Rdy9oTH3rqy4f2fvpv7990/cju69q7nemSr5k5YuJoPF+7srGzoSvtuTxZlueKfvRz1xK+Y1MgiBUxdCKZabFd975CoXzOYbKMERwLspqSdk9z0tnwT//yrYqR+OxyNC27KqHI3P+Tl/99LhEVM+taMykGacGkY4OgUAlGshm30WFSQpC0bAotQYHlkG8Ly29IJZuVil+3iSqWTaHFpFoyybbetoY79z+1P/arlQeXu71cNsAzTz42pf3ybx974Xk/EVfbmtNugpk0Mel8RY3ZZFLMpGzByiaWjuDQsThIuuzHMsjIOD4rLAotwdKxLNPZ6N375P798fDl0T89fOArLy1XzzXVhQ4e/NpYFNRu/8mJY4muZq9FEDQT6WIlmC3MzrgUKVcQKWGRFMSha7EfVqtObvKKkKEcFcRSCFYtDcntA0OD/uWRkS89ffCx716LlmsubB08+PiF3MTEf2q/mBVCMBEMk1D5QqkwV8hliVhbhMh2WDmCw6HBX7UX8rMjlu34FkGxYe0w73jp+Esp24ivX6uOFVXmlAyfGR8doUbPTVP9O81EhmeGBy+1MyM24FgwtBCWOnfm9U4IGnSEUUKwZmFiBrpzE7lzTz31pSW/ea9uK6rMCYhTA5cuJbfuuLljYLw0JaszbYh8PXz54gbvZ9V7PC+yTRxWpqcKswMD422t7cmhwC+ku9dv9710U28gQxOE/rI28asKcOjQl4f/8q8+NzQ5ermjwTaNuZk32i2KUglX0dTUOPr6NsC2ErgyNon16zNIuDN/ODY2PjYydun0H+z51Ieeff5IYCJ97H0DAIBYqo9/5/nnzv753ofaUj3brOn8KLq6Q0xOXIFtJ2FbgON42LxlM7q62oUxTm8qtaX3/PkzpXPnz506dOjxf13J+KtSG+3/k4/8sr1j1/aPfuyjuOH6LSiWp6CjKrSuQVCIVDKNZKoJqWQWrtuC4y8dMz/84alAgK47ePCJyfcVYM+ezRu71mYH7rijD2fPr4FlZ7F79060tWXR3NwML+XB9yuYmysgN5HDf5z4GfzaKMZHX/u3F1/85d0rGhyrEEKJhDVXLgVBpILEA/e3Y67YjMGhi3j9F3PIz1RQrkg0NrpoyDCaGgxu2FxDaU5hfFQtufLwngIcPXohf889N/x4eKR6x+7fymL7tl246aYCAIMwlNCRRLVaRbEwi8JsHoMDs3hrOAdj8PIq6F/5H5q77urrbG3N3L5jexatLduRSd8I11kDISy4jg1mBhFBKo3Lw9MoVwKEUgIws6ugf+UeSKWMDEMZVavSzs++DsdNAjQHIgOpIszMzCGfz6NcKqOnO4tUAjBxLyYnZ/54NQBW7IGjRy/kS6XwRxcvFmFiAZgYWkfQWsMYg+bmDLq62rGmoxlCCFRrIaam52AMzv+fAPjEJza1Z1vSt+3c2Y7W1puQ9vqQcP87hIQQICJEWmN8fBbGMNZ1t8N27Fv27NncvdLxVxxCUiY41pqCQOLqENKxwVyhjOnpPCqVKnrWtSE/ncfgTB5kjC4WQ0NENuq1VXMtP/6uCYCIeP5eBlC+664tf3/06MXPT+Rsa+PGMazp7EIy5aFSriKfL2Bqchpzc3OolMoYHcvhypWcqtXk53/601EfgId65U4TkZ7vR8aYeElalgNNRGJe+OKjACB27Oja1NqaujuZbLjb89Lbk0kL5fIc4ljDcTyEYQSl1PlKpfDi5cul7w8Pz+YAKMxXtq+yaOFo6n9OVg5ARNa86MUAC30HgAsgsWvXrt/cunXrEw899FDSdV0UCgUMDQ3hlVdeCS5evPh3Z86ceW1eeDhvwfxRLhIfLe4bY6J30rWkJJ4PmXcyaxGAe/r06Yu+74+dOHFCTU5OIp/P480334zCMMydOXNmDEAGQApAYuGe+futdxqDiOgdta22BwC4nZ2dXdu2bfujlpaWe4wxNDMz84OzZ88em5qamkZ9ppfjAflu+bDcHGAANv6XHFhk9iI4CwDNm5m3aJG9Ww4o1MPnXQVe09fovEsXBC+AXO16LBK/0BYggPpqs9jeBlvqCnTNAP/jIXWgBeNF/YXzBfHxIoC3z5cj+Or2X3r9Ye2F1izgAAAAAElFTkSuQmCC);
21
+ list-style-position: inside;
22
+ }
23
+ -->
24
+ </style>
25
+
26
+ <style type="text/css" media="screen">
27
+ <!--
28
+ body
29
+ {
30
+ color : #000000;
31
+ background-color : #FFFFFF;
32
+ line-height : 1.5em;
33
+ font-family : Calibri, Verdana, sans-serif;
34
+ }
35
+
36
+ /* emphasis */
37
+
38
+ em,
39
+ blockquote
40
+ {
41
+ font-family : Cambria, Georgia, serif;
42
+ }
43
+
44
+ blockquote
45
+ {
46
+ margin : 1em;
47
+ border : 5px dotted #ddd;
48
+ padding : 1em;
49
+ }
50
+
51
+ hr
52
+ {
53
+ height : 0;
54
+ border : 0;
55
+ border-top : 2px solid #FF0000;
56
+ }
57
+
58
+ /* source code */
59
+
60
+ tt,
61
+ code,
62
+ pre
63
+ {
64
+ font-family : Consolas, "Lucida Console", monospace;
65
+ }
66
+
67
+ tt
68
+ {
69
+ font-weight : bold;
70
+ color : #A52A2A;
71
+ background-color : #FFFAF0;
72
+ }
73
+
74
+ /* output of the syntax coloring library */
75
+ .code
76
+ {
77
+ background-color : #FFFFF0;
78
+ }
79
+
80
+ pre
81
+ {
82
+ cursor : text;
83
+ line-height : normal;
84
+ border : 1px dashed #C0C0C0;
85
+ background-color : #F5FFDF;
86
+ padding : 1em;
87
+ overflow : auto;
88
+ }
89
+
90
+ /* hyperlinks */
91
+
92
+ a > img
93
+ {
94
+ border : none;
95
+ }
96
+
97
+ a img
98
+ {
99
+ _border : none; /* for IE6 */
100
+ }
101
+
102
+ a.here:link,
103
+ a.here:visited
104
+ {
105
+ color : #000000;
106
+ }
107
+
108
+ /* lists */
109
+
110
+ #content li:first-child
111
+ {
112
+ margin-top : 1em;
113
+ }
114
+
115
+ #content li
116
+ {
117
+ margin-bottom : 1em;
118
+ }
119
+
120
+ /* headings */
121
+
122
+ h1,
123
+ h2,
124
+ h3,
125
+ h4,
126
+ h5,
127
+ h6,
128
+ .title
129
+ {
130
+ font-weight : lighter;
131
+ font-family : Constantia, "Book Antiqua", "URW Bookman L", serif;
132
+ }
133
+
134
+ #lof h1,
135
+ #lof h2,
136
+ #lof h3,
137
+ #lof h4,
138
+ #lof h5,
139
+ #lof h6
140
+ {
141
+ margin-top : 1.25em;
142
+ }
143
+
144
+ #content h1,
145
+ #content h2,
146
+ #content h3,
147
+ #content h4,
148
+ #content h5,
149
+ #content h6
150
+ {
151
+ margin-top : 2.5em;
152
+ line-height : 1.25em;
153
+ }
154
+
155
+ #content h1
156
+ {
157
+ font-size : 2.0em;
158
+ }
159
+
160
+ #content h2
161
+ {
162
+ font-size : 1.8em;
163
+ }
164
+
165
+ #content h3
166
+ {
167
+ font-size : 1.6em;
168
+ }
169
+
170
+ #content h4
171
+ {
172
+ font-size : 1.4em;
173
+ }
174
+
175
+ #content h5
176
+ {
177
+ font-size : 1.2em;
178
+ }
179
+
180
+ #content h6
181
+ {
182
+ font-size : 1.0em;
183
+ }
184
+
185
+ /* tables */
186
+
187
+ table
188
+ {
189
+ border-collapse : collapse; /* no spacing between cell borders */
190
+ margin : auto; /* center horizontally */
191
+ margin-top : 1em;
192
+ }
193
+
194
+ th,
195
+ td
196
+ {
197
+ padding : 1em;
198
+ border : 1px solid #bbb;
199
+ vertical-align : top;
200
+ background-color : inherit;
201
+ }
202
+
203
+ th
204
+ {
205
+ background-color : #F5F5F5;
206
+ }
207
+
208
+ /* document structure */
209
+
210
+ .nav
211
+ {
212
+ text-align : center;
213
+ border-bottom : thick dotted #DCDCDC;
214
+ padding-bottom : 1em;
215
+ margin-bottom : 4em;
216
+ }
217
+
218
+ #header
219
+ {
220
+ text-align : center;
221
+ }
222
+
223
+ .header_outside_above,
224
+ #header,
225
+ .header_outside_below
226
+ {
227
+ margin-bottom : 5em;
228
+ }
229
+
230
+ .footer_outside_above,
231
+ #footer,
232
+ .footer_outside_below
233
+ {
234
+ margin-top : 5em;
235
+ }
236
+
237
+ #header .header_inside_above,
238
+ #footer .footer_inside_above
239
+ {
240
+ margin-bottom : 4em;
241
+ }
242
+
243
+ #header .header_inside_below,
244
+ #footer .footer_inside_below
245
+ {
246
+ margin-top : 4em;
247
+ }
248
+
249
+ #Abstract
250
+ {
251
+ margin-bottom : 5em;
252
+ }
253
+
254
+ #Contents li
255
+ {
256
+ list-style-type : none;
257
+ }
258
+
259
+ #Contents li ul
260
+ {
261
+ padding-bottom : 1em;
262
+ border-left : thick solid #F5F5F5;
263
+ _border-left : none; /* for IE6 */
264
+ }
265
+
266
+ #Contents li ul:hover
267
+ {
268
+ border-color : #DCDCDC;
269
+ }
270
+
271
+ #Contents > ul
272
+ {
273
+ padding-left : 1em;
274
+ }
275
+
276
+ #References
277
+ {
278
+ margin-top : 5em;
279
+ }
280
+
281
+ #footer
282
+ {
283
+ border-top : thick dotted #DCDCDC;
284
+ padding-top : 1em;
285
+ text-align : center;
286
+ }
287
+
288
+ #footer-credits
289
+ {
290
+ margin-top : 2em;
291
+ }
292
+
293
+ /* document nodes */
294
+
295
+ .part > .title,
296
+ .chapter > .title
297
+ {
298
+ padding-bottom : 0.5em;
299
+ }
300
+
301
+ .part > .title > big,
302
+ .chapter > .title > big
303
+ {
304
+ display : block;
305
+ margin-top : 0.25em;
306
+ }
307
+
308
+ .part .title big,
309
+ .chapter .title big
310
+ {
311
+ _display : block; /* for IE6 */
312
+ _margin-top : 0.25em; /* for IE6 */
313
+ _margin-bottom : 0.75em; /* for IE6 */
314
+ }
315
+
316
+ .paragraph > .title,
317
+ .tip > .title,
318
+ .note > .title,
319
+ .caution > .title,
320
+ .warning > .title,
321
+ .important > .title,
322
+ .figure > .title,
323
+ .table > .title,
324
+ .example > .title,
325
+ .equation > .title,
326
+ .procedure > .title
327
+ {
328
+ font-size : large;
329
+ margin-top : 2em;
330
+ }
331
+
332
+ .paragraph .title,
333
+ .tip .title,
334
+ .note .title,
335
+ .caution .title,
336
+ .warning .title,
337
+ .important .title,
338
+ .figure .title,
339
+ .table .title,
340
+ .example .title,
341
+ .equation .title,
342
+ .procedure .title
343
+ {
344
+ _font-size : large; /* for IE6 */
345
+ _font-weight : bold; /* large is not bold in IE6 */
346
+ _margin-top : 2em; /* for IE6 */
347
+ }
348
+
349
+ .tip ,
350
+ .note ,
351
+ .caution ,
352
+ .warning ,
353
+ .important,
354
+ .figure ,
355
+ .table ,
356
+ .example ,
357
+ .equation ,
358
+ .procedure
359
+ {
360
+ margin : 3em;
361
+ }
362
+
363
+ .tip > .icon,
364
+ .note > .icon,
365
+ .caution > .icon,
366
+ .warning > .icon,
367
+ .important > .icon
368
+ {
369
+ float : left;
370
+ margin : 0 1em 1em 0; /* top right bottom left */
371
+ }
372
+
373
+ .tip .icon,
374
+ .note .icon,
375
+ .caution .icon,
376
+ .warning .icon,
377
+ .important .icon
378
+ {
379
+ _display : none; /* IE6 cannot display embedded images */
380
+ }
381
+
382
+ .figure > .title
383
+ {
384
+ text-align : center;
385
+ }
386
+
387
+ .figure .title
388
+ {
389
+ _text-align : center; /* for IE6 */
390
+ }
391
+
392
+ .figure > .content img
393
+ {
394
+ display : block;
395
+ margin : auto;
396
+ }
397
+
398
+ .figure .content img
399
+ {
400
+ _display : block; /* for IE6 */
401
+ _margin : auto; /* for IE6 */
402
+ }
403
+
404
+ body
405
+ {
406
+ margin : auto;
407
+ padding : 0.5em;
408
+ max-width : 36em;
409
+ }
410
+
411
+ /* hyperlinks */
412
+
413
+ a:link
414
+ {
415
+ color : #0000FF;
416
+ text-decoration : none;
417
+ }
418
+
419
+ a:visited
420
+ {
421
+ color : #800080;
422
+ text-decoration : none;
423
+ }
424
+
425
+ a:hover
426
+ {
427
+ color : #FF0000;
428
+ text-decoration : underline;
429
+ }
430
+
431
+ a:target
432
+ {
433
+ color : #FF0000;
434
+ font-weight : bold;
435
+ }
436
+
437
+ -->
438
+ </style>
439
+ <style type="text/css" media="print">
440
+ <!--
441
+ body
442
+ {
443
+ color : #000000;
444
+ background-color : #FFFFFF;
445
+ line-height : 1.5em;
446
+ font-family : Calibri, Verdana, sans-serif;
447
+ }
448
+
449
+ /* emphasis */
450
+
451
+ em,
452
+ blockquote
453
+ {
454
+ font-family : Cambria, Georgia, serif;
455
+ }
456
+
457
+ blockquote
458
+ {
459
+ margin : 1em;
460
+ border : 5px dotted #ddd;
461
+ padding : 1em;
462
+ }
463
+
464
+ hr
465
+ {
466
+ height : 0;
467
+ border : 0;
468
+ border-top : 2px solid #FF0000;
469
+ }
470
+
471
+ /* source code */
472
+
473
+ tt,
474
+ code,
475
+ pre
476
+ {
477
+ font-family : Consolas, "Lucida Console", monospace;
478
+ }
479
+
480
+ tt
481
+ {
482
+ font-weight : bold;
483
+ color : #A52A2A;
484
+ background-color : #FFFAF0;
485
+ }
486
+
487
+ /* output of the syntax coloring library */
488
+ .code
489
+ {
490
+ background-color : #FFFFF0;
491
+ }
492
+
493
+ pre
494
+ {
495
+ cursor : text;
496
+ line-height : normal;
497
+ border : 1px dashed #C0C0C0;
498
+ background-color : #F5FFDF;
499
+ padding : 1em;
500
+ overflow : auto;
501
+ }
502
+
503
+ /* hyperlinks */
504
+
505
+ a > img
506
+ {
507
+ border : none;
508
+ }
509
+
510
+ a img
511
+ {
512
+ _border : none; /* for IE6 */
513
+ }
514
+
515
+ a.here:link,
516
+ a.here:visited
517
+ {
518
+ color : #000000;
519
+ }
520
+
521
+ /* lists */
522
+
523
+ #content li:first-child
524
+ {
525
+ margin-top : 1em;
526
+ }
527
+
528
+ #content li
529
+ {
530
+ margin-bottom : 1em;
531
+ }
532
+
533
+ /* headings */
534
+
535
+ h1,
536
+ h2,
537
+ h3,
538
+ h4,
539
+ h5,
540
+ h6,
541
+ .title
542
+ {
543
+ font-weight : lighter;
544
+ font-family : Constantia, "Book Antiqua", "URW Bookman L", serif;
545
+ }
546
+
547
+ #lof h1,
548
+ #lof h2,
549
+ #lof h3,
550
+ #lof h4,
551
+ #lof h5,
552
+ #lof h6
553
+ {
554
+ margin-top : 1.25em;
555
+ }
556
+
557
+ #content h1,
558
+ #content h2,
559
+ #content h3,
560
+ #content h4,
561
+ #content h5,
562
+ #content h6
563
+ {
564
+ margin-top : 2.5em;
565
+ line-height : 1.25em;
566
+ }
567
+
568
+ #content h1
569
+ {
570
+ font-size : 2.0em;
571
+ }
572
+
573
+ #content h2
574
+ {
575
+ font-size : 1.8em;
576
+ }
577
+
578
+ #content h3
579
+ {
580
+ font-size : 1.6em;
581
+ }
582
+
583
+ #content h4
584
+ {
585
+ font-size : 1.4em;
586
+ }
587
+
588
+ #content h5
589
+ {
590
+ font-size : 1.2em;
591
+ }
592
+
593
+ #content h6
594
+ {
595
+ font-size : 1.0em;
596
+ }
597
+
598
+ /* tables */
599
+
600
+ table
601
+ {
602
+ border-collapse : collapse; /* no spacing between cell borders */
603
+ margin : auto; /* center horizontally */
604
+ margin-top : 1em;
605
+ }
606
+
607
+ th,
608
+ td
609
+ {
610
+ padding : 1em;
611
+ border : 1px solid #bbb;
612
+ vertical-align : top;
613
+ background-color : inherit;
614
+ }
615
+
616
+ th
617
+ {
618
+ background-color : #F5F5F5;
619
+ }
620
+
621
+ /* document structure */
622
+
623
+ .nav
624
+ {
625
+ text-align : center;
626
+ border-bottom : thick dotted #DCDCDC;
627
+ padding-bottom : 1em;
628
+ margin-bottom : 4em;
629
+ }
630
+
631
+ #header
632
+ {
633
+ text-align : center;
634
+ }
635
+
636
+ .header_outside_above,
637
+ #header,
638
+ .header_outside_below
639
+ {
640
+ margin-bottom : 5em;
641
+ }
642
+
643
+ .footer_outside_above,
644
+ #footer,
645
+ .footer_outside_below
646
+ {
647
+ margin-top : 5em;
648
+ }
649
+
650
+ #header .header_inside_above,
651
+ #footer .footer_inside_above
652
+ {
653
+ margin-bottom : 4em;
654
+ }
655
+
656
+ #header .header_inside_below,
657
+ #footer .footer_inside_below
658
+ {
659
+ margin-top : 4em;
660
+ }
661
+
662
+ #Abstract
663
+ {
664
+ margin-bottom : 5em;
665
+ }
666
+
667
+ #Contents li
668
+ {
669
+ list-style-type : none;
670
+ }
671
+
672
+ #Contents li ul
673
+ {
674
+ padding-bottom : 1em;
675
+ border-left : thick solid #F5F5F5;
676
+ _border-left : none; /* for IE6 */
677
+ }
678
+
679
+ #Contents li ul:hover
680
+ {
681
+ border-color : #DCDCDC;
682
+ }
683
+
684
+ #Contents > ul
685
+ {
686
+ padding-left : 1em;
687
+ }
688
+
689
+ #References
690
+ {
691
+ margin-top : 5em;
692
+ }
693
+
694
+ #footer
695
+ {
696
+ border-top : thick dotted #DCDCDC;
697
+ padding-top : 1em;
698
+ text-align : center;
699
+ }
700
+
701
+ #footer-credits
702
+ {
703
+ margin-top : 2em;
704
+ }
705
+
706
+ /* document nodes */
707
+
708
+ .part > .title,
709
+ .chapter > .title
710
+ {
711
+ padding-bottom : 0.5em;
712
+ }
713
+
714
+ .part > .title > big,
715
+ .chapter > .title > big
716
+ {
717
+ display : block;
718
+ margin-top : 0.25em;
719
+ }
720
+
721
+ .part .title big,
722
+ .chapter .title big
723
+ {
724
+ _display : block; /* for IE6 */
725
+ _margin-top : 0.25em; /* for IE6 */
726
+ _margin-bottom : 0.75em; /* for IE6 */
727
+ }
728
+
729
+ .paragraph > .title,
730
+ .tip > .title,
731
+ .note > .title,
732
+ .caution > .title,
733
+ .warning > .title,
734
+ .important > .title,
735
+ .figure > .title,
736
+ .table > .title,
737
+ .example > .title,
738
+ .equation > .title,
739
+ .procedure > .title
740
+ {
741
+ font-size : large;
742
+ margin-top : 2em;
743
+ }
744
+
745
+ .paragraph .title,
746
+ .tip .title,
747
+ .note .title,
748
+ .caution .title,
749
+ .warning .title,
750
+ .important .title,
751
+ .figure .title,
752
+ .table .title,
753
+ .example .title,
754
+ .equation .title,
755
+ .procedure .title
756
+ {
757
+ _font-size : large; /* for IE6 */
758
+ _font-weight : bold; /* large is not bold in IE6 */
759
+ _margin-top : 2em; /* for IE6 */
760
+ }
761
+
762
+ .tip ,
763
+ .note ,
764
+ .caution ,
765
+ .warning ,
766
+ .important,
767
+ .figure ,
768
+ .table ,
769
+ .example ,
770
+ .equation ,
771
+ .procedure
772
+ {
773
+ margin : 3em;
774
+ }
775
+
776
+ .tip > .icon,
777
+ .note > .icon,
778
+ .caution > .icon,
779
+ .warning > .icon,
780
+ .important > .icon
781
+ {
782
+ float : left;
783
+ margin : 0 1em 1em 0; /* top right bottom left */
784
+ }
785
+
786
+ .tip .icon,
787
+ .note .icon,
788
+ .caution .icon,
789
+ .warning .icon,
790
+ .important .icon
791
+ {
792
+ _display : none; /* IE6 cannot display embedded images */
793
+ }
794
+
795
+ .figure > .title
796
+ {
797
+ text-align : center;
798
+ }
799
+
800
+ .figure .title
801
+ {
802
+ _text-align : center; /* for IE6 */
803
+ }
804
+
805
+ .figure > .content img
806
+ {
807
+ display : block;
808
+ margin : auto;
809
+ }
810
+
811
+ .figure .content img
812
+ {
813
+ _display : block; /* for IE6 */
814
+ _margin : auto; /* for IE6 */
815
+ }
816
+
817
+ /* source code */
818
+
819
+ tt
820
+ {
821
+ background-color : inherit;
822
+ font-weight : normal;
823
+ }
824
+
825
+ pre,
826
+ .code
827
+ {
828
+ border : none;
829
+ background-color : inherit;
830
+ }
831
+
832
+ /* headings */
833
+
834
+ h1,
835
+ h2,
836
+ h3,
837
+ h4,
838
+ h5,
839
+ h6
840
+ {
841
+ font-weight : normal;
842
+ }
843
+
844
+ /* hyperlinks */
845
+
846
+ /* blend all hyperlinks with normal text */
847
+ a:link,
848
+ a:visited
849
+ {
850
+ color : #000000;
851
+ text-decoration : none;
852
+ }
853
+
854
+ /* emphasize external and cross-reference hyperlinks */
855
+ a:not([href^="#"]):link,
856
+ a:not([href^="#"]):visited,
857
+ a.xref[title]:link,
858
+ a.xref[title]:visited
859
+ {
860
+ color : #0000FF;
861
+ text-decoration : underline;
862
+ font-weight : bolder;
863
+ }
864
+
865
+ /* show URL of destination for external hyperlinks */
866
+ a:not([href^="#"]):after
867
+ {
868
+ content : " " attr(href);
869
+ font-family : Consolas, "Lucida Console", monospace;
870
+ }
871
+
872
+ /* show name of destination for cross-references */
873
+ a.xref[title]:after
874
+ {
875
+ content : " " attr(title);
876
+ }
877
+
878
+ a:after
879
+ {
880
+ font-weight : normal;
881
+ font-size : smaller;
882
+ }
883
+
884
+ /* document structure */
885
+
886
+ .nav,
887
+ #lof
888
+ {
889
+ display : none;
890
+ }
891
+
892
+ #Contents,
893
+ .part,
894
+ .chapter,
895
+ #References
896
+ {
897
+ page-break-before : always;
898
+ }
899
+
900
+ /* document nodes */
901
+
902
+ .part > .title > big,
903
+ .chapter > .title > big
904
+ {
905
+ padding-bottom : 0.5em;
906
+ }
907
+
908
+ .part .title big,
909
+ .chapter .title big
910
+ {
911
+ _padding-bottom : 0.5em; /* for IE6 */
912
+ }
913
+
914
+ -->
915
+ </style>
916
+ </head>
917
+ <body>
918
+ <div class="nav">
919
+ <a href="#Contents">Contents</a> &middot; <a href="#Figures">Figures</a> &middot; <a href="#Tips">Tips</a> &middot; <a href="#Warnings">Warnings</a>
920
+ </div>
921
+
922
+ <br style="display: none"/>
923
+ <hr style="display: none"/>
924
+ <br style="display: none"/>
925
+
926
+
927
+ <div id="header">
928
+
929
+
930
+ <h1 class="title"><a class="here" href="#">rmmseg-cpp Homepage</a></h1>
931
+ <h2 class="authors"><a href="http://blog.pluskid.org">pluskid</a></h2>
932
+ <h3 class="date">10 September 2011</h3>
933
+
934
+ </div>
935
+
936
+
937
+
938
+
939
+ <br style="display: none"/>
940
+ <hr style="display: none"/>
941
+ <br style="display: none"/>
942
+
943
+ <div id="Contents">
944
+ <h1 class="title"><a class="here" href="#Contents">Contents</a></h1>
945
+ <ul>
946
+ <li>1&nbsp;&nbsp;<a id="a16539600" href="#Introduction">Introduction</a></li><li>2&nbsp;&nbsp;<a id="a16533660" href="#Setup">Setup</a><ul><li>2.1&nbsp;&nbsp;<a id="a16530900" href="#Requirements">Requirements</a></li><li>2.2&nbsp;&nbsp;<a id="a16472140" href="#Installation">Installation</a><ul><li>2.2.1&nbsp;&nbsp;<a id="a16468300" href="#Using-RubyGems">Using RubyGems</a></li><li>2.2.2&nbsp;&nbsp;<a id="a16363260" href="#From-Git">From Git</a></li></ul></li></ul></li><li>3&nbsp;&nbsp;<a id="a16272720" href="#Usage">Usage</a><ul><li>3.1&nbsp;&nbsp;<a id="a16246860" href="#Stand-Alone-rmmseg">Stand Alone rmmseg</a></li><li>3.2&nbsp;&nbsp;<a id="a16240340" href="#Use-in-Ruby-program">Use in Ruby program</a><ul><li>3.2.1&nbsp;&nbsp;<a id="a16231580" href="#Initialize">Initialize</a></li><li>3.2.2&nbsp;&nbsp;<a id="a16187880" href="#Ferret-Integration">Ferret Integration</a></li><li>3.2.3&nbsp;&nbsp;<a id="a16113620" href="#Normal-Ruby-program">Normal Ruby program</a></li></ul></li></ul></li><li>4&nbsp;&nbsp;<a id="a16072000" href="#Who-use-it">Who use it</a></li><li>5&nbsp;&nbsp;<a id="a16034860" href="#Resources">Resources</a></li>
947
+
948
+ </ul>
949
+ </div>
950
+
951
+ <div id="lof"><h1 id=\"Figures\" class=\"title\"><a class=\"here\" href=\"#Figures\">Figures</a></h1> <ol><li><a id=\\\"a16148860\\\" href=\\\"#Ferret-Example-Screenshot\\\">Ferret Example Screenshot</a></li></ol><h1 id=\"Tips\" class=\"title\"><a class=\"here\" href=\"#Tips\">Tips</a></h1> <ol><li><a id=\\\"a16067160\\\" href=\\\"#Expand-this-list\\\">Expand this list</a></li></ol><h1 id=\"Warnings\" class=\"title\"><a class=\"here\" href=\"#Warnings\">Warnings</a></h1> <ol><li><a id=\\\"a16360020\\\" href=\\\"#The-latest-source-code-may-be-unstable\\\">The latest source code may be unstable</a></li></ol></div>
952
+
953
+ <br style="display: none"/>
954
+ <hr style="display: none"/>
955
+ <br style="display: none"/>
956
+
957
+ <div id="content">
958
+ <div class="chapter">
959
+ <h1 class="title">
960
+ Chapter
961
+ <a class="list" id="Introduction" href="#a16539600">1</a>
962
+
963
+ <br/>
964
+
965
+ <a class="here" href="#Introduction"><big>Introduction</big></a>
966
+ </h1>
967
+
968
+ <div class="content"><p>rmmseg-cpp is a high performance Chinese word segmentation utility for
969
+ Ruby. It features full <a href="http://ferret.davebalmain.com/">Ferret</a> integration
970
+ as well as support for normal Ruby program usage.</p>
971
+ <p>rmmseg-cpp is a re-written of the original
972
+ <a href="http://rmmseg.rubyforge.org/">RMMSeg</a> gem in C++. RMMSeg is written
973
+ in pure Ruby. Though I tried hard to tweak RMMSeg, it just consumes
974
+ lots of memory and the segmenting process is rather slow.</p>
975
+ <p>The interface is almost identical to RMMSeg but the performance is
976
+ much better. This gem is always preferable in production
977
+ use. However, if you want to understand how the MMSEG segmenting
978
+ algorithm works, the source code of RMMSeg is a better choice than
979
+ this.</p></div>
980
+ </div>
981
+ <div class="chapter">
982
+ <h1 class="title">
983
+ Chapter
984
+ <a class="list" id="Setup" href="#a16533660">2</a>
985
+
986
+ <br/>
987
+
988
+ <a class="here" href="#Setup"><big>Setup</big></a>
989
+ </h1>
990
+
991
+ <div class="content"><div class="section">
992
+ <h2 class="title">
993
+ <a class="list" id="Requirements" href="#a16530900">2.1</a>&nbsp;&nbsp;<a class="here" href="#Requirements">Requirements</a>
994
+ </h2>
995
+ <div class="content"><p>Your system needs the following software to run RMMSeg.</p>
996
+ <table border="1">
997
+ <tr>
998
+ <th>Software </th>
999
+ <th>Notes </th>
1000
+ </tr>
1001
+ <tr>
1002
+ <td> <a href="http://ruby-lang.org">Ruby</a> </td>
1003
+ <td> Version 1.8.x is required </td>
1004
+ </tr>
1005
+ <tr>
1006
+ <td> RubyGems </td>
1007
+ <td> rmmseg-cpp is released as a gem </td>
1008
+ </tr>
1009
+ <tr>
1010
+ <td> g++ </td>
1011
+ <td> Used to build the native extension </td>
1012
+ </tr>
1013
+ </table></div>
1014
+ </div>
1015
+ <div class="section">
1016
+ <h2 class="title">
1017
+ <a class="list" id="Installation" href="#a16472140">2.2</a>&nbsp;&nbsp;<a class="here" href="#Installation">Installation</a>
1018
+ </h2>
1019
+ <div class="content"><div class="section">
1020
+ <h3 class="title">
1021
+ <a class="list" id="Using-RubyGems" href="#a16468300">2.2.1</a>&nbsp;&nbsp;<a class="here" href="#Using-RubyGems">Using RubyGems</a>
1022
+ </h3>
1023
+ <div class="content"><p>To install the gem remotely from <a href="http://rubyforge.org">RubyForge</a>:</p>
1024
+ sudo gem install rmmseg-cpp
1025
+ <p>Or you can download the gem file manually from
1026
+ <a href="http://rubyforge.org/projects/rmmseg-cpp/">RubyForge</a> and
1027
+ install it locally:</p>
1028
+ sudo gem install &#8212;local rmmseg-cpp-x.y.z.gem</div>
1029
+ </div>
1030
+ <div class="section">
1031
+ <h3 class="title">
1032
+ <a class="list" id="From-Git" href="#a16363260">2.2.2</a>&nbsp;&nbsp;<a class="here" href="#From-Git">From Git</a>
1033
+ </h3>
1034
+ <div class="content"><p>To build the gem manually from the latest source code. You&#8217;ll
1035
+ need to have <strong>git</strong> and <strong>rake</strong> installed.</p>
1036
+ <p><div class="warning">
1037
+ <p class="title"><a class="list" id="The-latest-source-code-may-be-unstable" href="#a16360020">Warning 1</a>.&nbsp;&nbsp;<a class="here" href="#The-latest-source-code-may-be-unstable">The latest source code may be unstable</a></p>
1038
+
1039
+ <div class="content icon-warning">While I tried to avoid such kind of problems, the source
1040
+ code from the repository might still be broken sometimes.
1041
+ It is generally not recommended to follow the source code.</div>
1042
+ </div>
1043
+ The source code of rmmseg-cpp is hosted at
1044
+ <a href="http://github.com/pluskid/rmmseg-cpp/">GitHub</a>. You can get the
1045
+ source code by git clone:</p>
1046
+ git clone git://github.com/pluskid/rmmseg-cpp.git
1047
+ <p>then you can use Rake to build and install the gem:</p>
1048
+ cd rmmseg-cpp
1049
+ rake gem:install</div>
1050
+ </div></div>
1051
+ </div></div>
1052
+ </div>
1053
+ <div class="chapter">
1054
+ <h1 class="title">
1055
+ Chapter
1056
+ <a class="list" id="Usage" href="#a16272720">3</a>
1057
+
1058
+ <br/>
1059
+
1060
+ <a class="here" href="#Usage"><big>Usage</big></a>
1061
+ </h1>
1062
+
1063
+ <div class="content">
1064
+ <p><div class="section">
1065
+ <h2 class="title">
1066
+ <a class="list" id="Stand-Alone-rmmseg" href="#a16246860">3.1</a>&nbsp;&nbsp;<a class="here" href="#Stand-Alone-rmmseg">Stand Alone rmmseg</a>
1067
+ </h2>
1068
+ <div class="content"><p>rmmseg-cpp comes with a script <strong>rmmseg</strong>. To get the basic usage, just execute it with <tt>-h</tt> option:</p>
1069
+ rmmseg -h
1070
+ <p>It reads from STDIN and print result to STDOUT. Here is a real
1071
+ example:</p>
1072
+ $ echo &#8220;我们都喜欢用 Ruby&#8221; | rmmseg
1073
+ 我们 都 喜欢 用 Ruby</div>
1074
+ </div>
1075
+ <div class="section">
1076
+ <h2 class="title">
1077
+ <a class="list" id="Use-in-Ruby-program" href="#a16240340">3.2</a>&nbsp;&nbsp;<a class="here" href="#Use-in-Ruby-program">Use in Ruby program</a>
1078
+ </h2>
1079
+ <div class="content"><div class="section">
1080
+ <h3 class="title">
1081
+ <a class="list" id="Initialize" href="#a16231580">3.2.1</a>&nbsp;&nbsp;<a class="here" href="#Initialize">Initialize</a>
1082
+ </h3>
1083
+ <div class="content"><p>To use rmmseg-cpp in Ruby program, you&#8217;ll first load it with RubyGems:</p>
1084
+ <pre class="code">
1085
+ require <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">'</span><span style="">rubygems</span><span style="color:#710">'</span></span>
1086
+ require <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">'</span><span style="">rmmseg</span><span style="color:#710">'</span></span>
1087
+ </pre>
1088
+ <p>Then you may customize the dictionaries used by rmmseg-cpp
1089
+ (see <a href="http://rmmseg-cpp.rubyforge.org/rdoc/classes/RMMSeg/Dictionary.html">the rdoc</a> on
1090
+ how to add your own dictionaries) and load all dictionaries:</p>
1091
+ <pre class="code">
1092
+ <span style="color:#036;font-weight:bold">RMMSeg</span>::<span style="color:#036;font-weight:bold">Dictionary</span>.load_dictionaries
1093
+ </pre>
1094
+ <p>Now rmmseg-cpp will be ready to do segmenting. If you want to load your own customized
1095
+ dictionaries, please customize <tt>RMMSeg::Dictionary.dictionaries</tt> before calling
1096
+ <tt>load_dictionaries</tt>. e.g.</p>
1097
+ <pre class="code">
1098
+ <span style="color:#036;font-weight:bold">RMMSeg</span>::<span style="color:#036;font-weight:bold">Dictionary</span>.dictionaries = [[<span style="color:#A60">:chars</span>, <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">&quot;</span><span style="">my_chars.dic</span><span style="color:#710">&quot;</span></span>],
1099
+ [<span style="color:#A60">:words</span>, <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">&quot;</span><span style="">my_words.dic</span><span style="color:#710">&quot;</span></span>],
1100
+ [<span style="color:#A60">:words</span>, <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">&quot;</span><span style="">my_words2.dic</span><span style="color:#710">&quot;</span></span>]]
1101
+ </pre>
1102
+ <p>The basic format for char-dictionary and word-dictionary are similar. For each line,
1103
+ there is a number, then <strong>a</strong> space, then the string. Note there <strong>SHOULD</strong> be a newline
1104
+ at the end of the dictionary file. And the number in char-dictionary and word-dictionary
1105
+ has different meaning.</p>
1106
+ <p>In char-dictionary, the number means the frequency of the character. In word-dictionary,
1107
+ the number mean the number of characters in the word. Note that this is NOT the number
1108
+ of <strong>bytes</strong> in the word.</p></div>
1109
+ </div>
1110
+ <div class="section">
1111
+ <h3 class="title">
1112
+ <a class="list" id="Ferret-Integration" href="#a16187880">3.2.2</a>&nbsp;&nbsp;<a class="here" href="#Ferret-Integration">Ferret Integration</a>
1113
+ </h3>
1114
+ <div class="content"><p>To use rmmseg-cpp with Ferret, you&#8217;ll need to <code class="code">require</code> the
1115
+ Ferret support of rmmseg-cpp (Of course you&#8217;ll also have to
1116
+ got Ferret installed. If you have problems running the belowing
1117
+ example, please try to update to the latest version of both
1118
+ Ferret and rmmseg-cpp first):</p>
1119
+ <pre class="code">
1120
+ require <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">'</span><span style="">rmmseg/ferret</span><span style="color:#710">'</span></span>
1121
+ </pre>
1122
+ <p>rmmseg-cpp comes with a ready to use Ferret analyzer:</p>
1123
+ <pre class="code">
1124
+ analyzer = <span style="color:#036;font-weight:bold">RMMSeg</span>::<span style="color:#036;font-weight:bold">Ferret</span>::<span style="color:#036;font-weight:bold">Analyzer</span>.new { |tokenizer|
1125
+ <span style="color:#036;font-weight:bold">Ferret</span>::<span style="color:#036;font-weight:bold">Analysis</span>::<span style="color:#036;font-weight:bold">LowerCaseFilter</span>.new(tokenizer)
1126
+ }
1127
+ index = <span style="color:#036;font-weight:bold">Ferret</span>::<span style="color:#036;font-weight:bold">Index</span>::<span style="color:#036;font-weight:bold">Index</span>.new(<span style="color:#A60">:analyzer</span> =&gt; analyzer)
1128
+ </pre>
1129
+ <p>A complete example can be found in <tt>misc/ferret_example.rb</tt>. The result
1130
+ of running that example is shown in <a class="xref" href="#Ferret-Example-Screenshot">Figure 1. Ferret Example Screenshot</a>.</p>
1131
+ <p><div class="figure">
1132
+ <p class="title"><a class="list" id="Ferret-Example-Screenshot" href="#a16148860">Figure 1</a>.&nbsp;&nbsp;<a class="here" href="#Ferret-Example-Screenshot">Ferret Example Screenshot</a></p>
1133
+ <div class="content"><img src="http://lifegoo.pluskid.org/wp-content/uploads/2008/02/rmmseg.png" alt="" /></div>
1134
+ </div></p></div>
1135
+ </div>
1136
+ <div class="section">
1137
+ <h3 class="title">
1138
+ <a class="list" id="Normal-Ruby-program" href="#a16113620">3.2.3</a>&nbsp;&nbsp;<a class="here" href="#Normal-Ruby-program">Normal Ruby program</a>
1139
+ </h3>
1140
+ <div class="content"><p>rmmseg-cpp can also be used in normal Ruby programs. Just create
1141
+ an <code class="code"><span style="color:#036;font-weight:bold">Algorithm</span></code> object and call <code class="code">next_token</code> until a <code class="code"><span style="color:#038;font-weight:bold">nil</span></code> is returned:</p>
1142
+ <pre class="code">
1143
+ algor = <span style="color:#036;font-weight:bold">RMMSeg</span>::<span style="color:#036;font-weight:bold">Algorithm</span>.new(text)
1144
+ loop <span style="color:#080;font-weight:bold">do</span>
1145
+ tok = algor.next_token
1146
+ <span style="color:#080;font-weight:bold">break</span> <span style="color:#080;font-weight:bold">if</span> tok.nil?
1147
+ puts <span style="background-color:#fff0f0;color:#D20"><span style="color:#710">&quot;</span><span style="background:#ddd;color:black"><span style="background:#ddd;font-weight:bold;color:#666">#{</span>tok.text<span style="background:#ddd;font-weight:bold;color:#666">}</span></span><span style=""> [</span><span style="background:#ddd;color:black"><span style="background:#ddd;font-weight:bold;color:#666">#{</span>tok.start<span style="background:#ddd;font-weight:bold;color:#666">}</span></span><span style="">..</span><span style="background:#ddd;color:black"><span style="background:#ddd;font-weight:bold;color:#666">#{</span>tok.end<span style="background:#ddd;font-weight:bold;color:#666">}</span></span><span style="">]</span><span style="color:#710">&quot;</span></span>
1148
+ <span style="color:#080;font-weight:bold">end</span>
1149
+ </pre></div>
1150
+ </div></div>
1151
+ </div></p></div>
1152
+ </div>
1153
+ <div class="chapter">
1154
+ <h1 class="title">
1155
+ Chapter
1156
+ <a class="list" id="Who-use-it" href="#a16072000">4</a>
1157
+
1158
+ <br/>
1159
+
1160
+ <a class="here" href="#Who-use-it"><big>Who use it</big></a>
1161
+ </h1>
1162
+
1163
+ <div class="content"><p><div class="tip">
1164
+ <p class="title"><a class="list" id="Expand-this-list" href="#a16067160">Tip 1</a>.&nbsp;&nbsp;<a class="here" href="#Expand-this-list">Expand this list</a></p>
1165
+
1166
+ <div class="content icon-tip">If you used rmmseg-cpp and would like your project to
1167
+ appear in this list, please <a href="mailto:pluskid@gmail.com">contact me</a>.</div>
1168
+ </div></p>
1169
+ <ul>
1170
+ <li><a href="http://www.javaeye.com/">JavaEye</a>: One of the biggest software developper
1171
+ community in China.</li>
1172
+ </ul></div>
1173
+ </div>
1174
+ <div class="chapter">
1175
+ <h1 class="title">
1176
+ Chapter
1177
+ <a class="list" id="Resources" href="#a16034860">5</a>
1178
+
1179
+ <br/>
1180
+
1181
+ <a class="here" href="#Resources"><big>Resources</big></a>
1182
+ </h1>
1183
+
1184
+ <div class="content"><ul>
1185
+ <li><a href="http://rubyforge.org/projects/rmmseg-cpp/">Project Home</a>: The Project page at RubyForge.</li>
1186
+ <li><a href="http://rmmseg-cpp.rubyforge.org/rdoc/index.html">RDoc of rmmseg-cpp</a>: The auto generated rdoc of RMMSeg.</li>
1187
+ <li><a href="http://blog.pluskid.org/">Free Mind</a>: The author&#8217;s blog.</li>
1188
+ <li><a href="mailto:pluskid@gmail.com">Author&#8217;s Email</a>: Contact me if you have any problem.</li>
1189
+ </ul></div>
1190
+ </div></div>
1191
+
1192
+
1193
+ <br style="display: none"/>
1194
+ <hr style="display: none"/>
1195
+ <br style="display: none"/>
1196
+
1197
+
1198
+ <div id="footer">
1199
+
1200
+ Generated on 2011-09-10 15:59:08 +0800 by <a href="http://gerbil.rubyforge.org">Gerbil</a> 3.1.0.
1201
+
1202
+ <div id="footer-credits">
1203
+ <span class="icon-warning" style="float: right">&nbsp;</span>
1204
+ <span class="icon-tip" style="float: right">&nbsp;</span>
1205
+
1206
+ <p>The admonition graphics used in this document are Copyright &copy; 2005 <a href="http://tango.freedesktop.org">Tango Desktop Project</a>. They are part of the <a href="http://tango.freedesktop.org/Tango_Icon_Library">Tango Icon Theme</a> set, which is distributed under the <a href="http://creativecommons.org/licenses/by-sa/2.5/">Creative Commons Attribution-ShareAlike 2.5 License Agreement</a>.</p>
1207
+ </div>
1208
+
1209
+ </div>
1210
+
1211
+ </body>
1212
+ </html>