chipper 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (134) hide show
  1. data/README.rdoc +51 -0
  2. data/ext/extconf.rb +58 -0
  3. data/ext/libstemmer_c/Makefile +10 -0
  4. data/ext/libstemmer_c/examples/stemwords.c +209 -0
  5. data/ext/libstemmer_c/include/libstemmer.h +79 -0
  6. data/ext/libstemmer_c/libstemmer/libstemmer.c +95 -0
  7. data/ext/libstemmer_c/libstemmer/libstemmer_utf8.c +95 -0
  8. data/ext/libstemmer_c/libstemmer/modules.h +190 -0
  9. data/ext/libstemmer_c/libstemmer/modules_utf8.h +121 -0
  10. data/ext/libstemmer_c/mkinc.mak +82 -0
  11. data/ext/libstemmer_c/mkinc_utf8.mak +52 -0
  12. data/ext/libstemmer_c/runtime/api.c +66 -0
  13. data/ext/libstemmer_c/runtime/api.h +26 -0
  14. data/ext/libstemmer_c/runtime/header.h +58 -0
  15. data/ext/libstemmer_c/runtime/utilities.c +478 -0
  16. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.c +337 -0
  17. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_danish.h +16 -0
  18. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c +624 -0
  19. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h +16 -0
  20. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.c +1117 -0
  21. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_english.h +16 -0
  22. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h +16 -0
  24. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.c +1246 -0
  25. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_french.h +16 -0
  26. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.c +521 -0
  27. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_german.h +16 -0
  28. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c +1230 -0
  29. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h +16 -0
  30. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.c +1065 -0
  31. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_italian.h +16 -0
  32. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c +297 -0
  33. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h +16 -0
  34. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.c +749 -0
  35. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_porter.h +16 -0
  36. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c +1017 -0
  37. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h +16 -0
  38. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c +1093 -0
  39. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h +16 -0
  40. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c +307 -0
  41. data/ext/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h +16 -0
  42. data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c +998 -0
  43. data/ext/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h +16 -0
  44. data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.c +700 -0
  45. data/ext/libstemmer_c/src_c/stem_KOI8_R_russian.h +16 -0
  46. data/ext/libstemmer_c/src_c/stem_UTF_8_danish.c +339 -0
  47. data/ext/libstemmer_c/src_c/stem_UTF_8_danish.h +16 -0
  48. data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.c +634 -0
  49. data/ext/libstemmer_c/src_c/stem_UTF_8_dutch.h +16 -0
  50. data/ext/libstemmer_c/src_c/stem_UTF_8_english.c +1125 -0
  51. data/ext/libstemmer_c/src_c/stem_UTF_8_english.h +16 -0
  52. data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.c +768 -0
  53. data/ext/libstemmer_c/src_c/stem_UTF_8_finnish.h +16 -0
  54. data/ext/libstemmer_c/src_c/stem_UTF_8_french.c +1256 -0
  55. data/ext/libstemmer_c/src_c/stem_UTF_8_french.h +16 -0
  56. data/ext/libstemmer_c/src_c/stem_UTF_8_german.c +527 -0
  57. data/ext/libstemmer_c/src_c/stem_UTF_8_german.h +16 -0
  58. data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.c +1234 -0
  59. data/ext/libstemmer_c/src_c/stem_UTF_8_hungarian.h +16 -0
  60. data/ext/libstemmer_c/src_c/stem_UTF_8_italian.c +1073 -0
  61. data/ext/libstemmer_c/src_c/stem_UTF_8_italian.h +16 -0
  62. data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.c +299 -0
  63. data/ext/libstemmer_c/src_c/stem_UTF_8_norwegian.h +16 -0
  64. data/ext/libstemmer_c/src_c/stem_UTF_8_porter.c +755 -0
  65. data/ext/libstemmer_c/src_c/stem_UTF_8_porter.h +16 -0
  66. data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.c +1023 -0
  67. data/ext/libstemmer_c/src_c/stem_UTF_8_portuguese.h +16 -0
  68. data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.c +1004 -0
  69. data/ext/libstemmer_c/src_c/stem_UTF_8_romanian.h +16 -0
  70. data/ext/libstemmer_c/src_c/stem_UTF_8_russian.c +694 -0
  71. data/ext/libstemmer_c/src_c/stem_UTF_8_russian.h +16 -0
  72. data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.c +1097 -0
  73. data/ext/libstemmer_c/src_c/stem_UTF_8_spanish.h +16 -0
  74. data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.c +309 -0
  75. data/ext/libstemmer_c/src_c/stem_UTF_8_swedish.h +16 -0
  76. data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.c +2205 -0
  77. data/ext/libstemmer_c/src_c/stem_UTF_8_turkish.h +16 -0
  78. data/ext/re2/bitstate.cc +378 -0
  79. data/ext/re2/compile.cc +1138 -0
  80. data/ext/re2/dfa.cc +2086 -0
  81. data/ext/re2/filtered_re2.cc +100 -0
  82. data/ext/re2/filtered_re2.h +99 -0
  83. data/ext/re2/hash.cc +231 -0
  84. data/ext/re2/mimics_pcre.cc +185 -0
  85. data/ext/re2/nfa.cc +709 -0
  86. data/ext/re2/onepass.cc +614 -0
  87. data/ext/re2/parse.cc +2202 -0
  88. data/ext/re2/perl_groups.cc +119 -0
  89. data/ext/re2/prefilter.cc +671 -0
  90. data/ext/re2/prefilter.h +105 -0
  91. data/ext/re2/prefilter_tree.cc +398 -0
  92. data/ext/re2/prefilter_tree.h +130 -0
  93. data/ext/re2/prog.cc +341 -0
  94. data/ext/re2/prog.h +376 -0
  95. data/ext/re2/re2.cc +1180 -0
  96. data/ext/re2/re2.h +837 -0
  97. data/ext/re2/regexp.cc +920 -0
  98. data/ext/re2/regexp.h +632 -0
  99. data/ext/re2/rune.cc +258 -0
  100. data/ext/re2/set.cc +113 -0
  101. data/ext/re2/set.h +55 -0
  102. data/ext/re2/simplify.cc +393 -0
  103. data/ext/re2/stringpiece.cc +87 -0
  104. data/ext/re2/stringpiece.h +182 -0
  105. data/ext/re2/tostring.cc +341 -0
  106. data/ext/re2/unicode_casefold.cc +469 -0
  107. data/ext/re2/unicode_casefold.h +75 -0
  108. data/ext/re2/unicode_groups.cc +4851 -0
  109. data/ext/re2/unicode_groups.h +64 -0
  110. data/ext/re2/valgrind.cc +24 -0
  111. data/ext/re2/variadic_function.h +346 -0
  112. data/ext/re2/walker-inl.h +244 -0
  113. data/ext/src/chipper.cc +626 -0
  114. data/ext/src/version.h +1 -0
  115. data/ext/stemmer.rb +40 -0
  116. data/ext/util/arena.h +103 -0
  117. data/ext/util/atomicops.h +79 -0
  118. data/ext/util/benchmark.h +41 -0
  119. data/ext/util/flags.h +27 -0
  120. data/ext/util/logging.h +78 -0
  121. data/ext/util/mutex.h +190 -0
  122. data/ext/util/pcre.h +679 -0
  123. data/ext/util/random.h +29 -0
  124. data/ext/util/sparse_array.h +451 -0
  125. data/ext/util/sparse_set.h +177 -0
  126. data/ext/util/test.h +57 -0
  127. data/ext/util/thread.h +26 -0
  128. data/ext/util/utf.h +43 -0
  129. data/ext/util/util.h +127 -0
  130. data/ext/util/valgrind.h +4517 -0
  131. data/test/helper.rb +5 -0
  132. data/test/test_entities.rb +57 -0
  133. data/test/test_tokens.rb +118 -0
  134. metadata +199 -0
@@ -0,0 +1,119 @@
1
+ // GENERATED BY make_perl_groups.pl; DO NOT EDIT.
2
+ // make_perl_groups.pl >perl_groups.cc
3
+
4
+ #include "re2/unicode_groups.h"
5
+
6
+ namespace re2 {
7
+
8
+ static URange16 code1[] = { /* \d */
9
+ { 0x30, 0x39 },
10
+ };
11
+ static URange16 code2[] = { /* \s */
12
+ { 0x9, 0xa },
13
+ { 0xc, 0xd },
14
+ { 0x20, 0x20 },
15
+ };
16
+ static URange16 code3[] = { /* \w */
17
+ { 0x30, 0x39 },
18
+ { 0x41, 0x5a },
19
+ { 0x5f, 0x5f },
20
+ { 0x61, 0x7a },
21
+ };
22
+ UGroup perl_groups[] = {
23
+ { "\\d", +1, code1, 1 },
24
+ { "\\D", -1, code1, 1 },
25
+ { "\\s", +1, code2, 3 },
26
+ { "\\S", -1, code2, 3 },
27
+ { "\\w", +1, code3, 4 },
28
+ { "\\W", -1, code3, 4 },
29
+ };
30
+ int num_perl_groups = 6;
31
+ static URange16 code4[] = { /* [:alnum:] */
32
+ { 0x30, 0x39 },
33
+ { 0x41, 0x5a },
34
+ { 0x61, 0x7a },
35
+ };
36
+ static URange16 code5[] = { /* [:alpha:] */
37
+ { 0x41, 0x5a },
38
+ { 0x61, 0x7a },
39
+ };
40
+ static URange16 code6[] = { /* [:ascii:] */
41
+ { 0x0, 0x7f },
42
+ };
43
+ static URange16 code7[] = { /* [:blank:] */
44
+ { 0x9, 0x9 },
45
+ { 0x20, 0x20 },
46
+ };
47
+ static URange16 code8[] = { /* [:cntrl:] */
48
+ { 0x0, 0x1f },
49
+ { 0x7f, 0x7f },
50
+ };
51
+ static URange16 code9[] = { /* [:digit:] */
52
+ { 0x30, 0x39 },
53
+ };
54
+ static URange16 code10[] = { /* [:graph:] */
55
+ { 0x21, 0x7e },
56
+ };
57
+ static URange16 code11[] = { /* [:lower:] */
58
+ { 0x61, 0x7a },
59
+ };
60
+ static URange16 code12[] = { /* [:print:] */
61
+ { 0x20, 0x7e },
62
+ };
63
+ static URange16 code13[] = { /* [:punct:] */
64
+ { 0x21, 0x2f },
65
+ { 0x3a, 0x40 },
66
+ { 0x5b, 0x60 },
67
+ { 0x7b, 0x7e },
68
+ };
69
+ static URange16 code14[] = { /* [:space:] */
70
+ { 0x9, 0xd },
71
+ { 0x20, 0x20 },
72
+ };
73
+ static URange16 code15[] = { /* [:upper:] */
74
+ { 0x41, 0x5a },
75
+ };
76
+ static URange16 code16[] = { /* [:word:] */
77
+ { 0x30, 0x39 },
78
+ { 0x41, 0x5a },
79
+ { 0x5f, 0x5f },
80
+ { 0x61, 0x7a },
81
+ };
82
+ static URange16 code17[] = { /* [:xdigit:] */
83
+ { 0x30, 0x39 },
84
+ { 0x41, 0x46 },
85
+ { 0x61, 0x66 },
86
+ };
87
+ UGroup posix_groups[] = {
88
+ { "[:alnum:]", +1, code4, 3 },
89
+ { "[:^alnum:]", -1, code4, 3 },
90
+ { "[:alpha:]", +1, code5, 2 },
91
+ { "[:^alpha:]", -1, code5, 2 },
92
+ { "[:ascii:]", +1, code6, 1 },
93
+ { "[:^ascii:]", -1, code6, 1 },
94
+ { "[:blank:]", +1, code7, 2 },
95
+ { "[:^blank:]", -1, code7, 2 },
96
+ { "[:cntrl:]", +1, code8, 2 },
97
+ { "[:^cntrl:]", -1, code8, 2 },
98
+ { "[:digit:]", +1, code9, 1 },
99
+ { "[:^digit:]", -1, code9, 1 },
100
+ { "[:graph:]", +1, code10, 1 },
101
+ { "[:^graph:]", -1, code10, 1 },
102
+ { "[:lower:]", +1, code11, 1 },
103
+ { "[:^lower:]", -1, code11, 1 },
104
+ { "[:print:]", +1, code12, 1 },
105
+ { "[:^print:]", -1, code12, 1 },
106
+ { "[:punct:]", +1, code13, 4 },
107
+ { "[:^punct:]", -1, code13, 4 },
108
+ { "[:space:]", +1, code14, 2 },
109
+ { "[:^space:]", -1, code14, 2 },
110
+ { "[:upper:]", +1, code15, 1 },
111
+ { "[:^upper:]", -1, code15, 1 },
112
+ { "[:word:]", +1, code16, 4 },
113
+ { "[:^word:]", -1, code16, 4 },
114
+ { "[:xdigit:]", +1, code17, 3 },
115
+ { "[:^xdigit:]", -1, code17, 3 },
116
+ };
117
+ int num_posix_groups = 28;
118
+
119
+ } // namespace re2