isomorfeus-ferret 0.12.4 → 0.12.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (123) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +612 -612
  3. data/README.md +80 -48
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +9 -6
  5. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +4 -2
  6. data/ext/isomorfeus_ferret_ext/frb_store.c +34 -5
  7. data/ext/isomorfeus_ferret_ext/frt_posh.h +11 -19
  8. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1844 -1911
  9. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +7 -7
  10. data/ext/isomorfeus_ferret_ext/frt_scanner.c +1 -0
  11. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +1 -0
  12. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +1 -0
  13. data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
  14. data/ext/isomorfeus_ferret_ext/libstemmer.c +14 -11
  15. data/ext/isomorfeus_ferret_ext/libstemmer.h +4 -9
  16. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +1167 -0
  17. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +6 -0
  18. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +1433 -0
  19. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +6 -0
  20. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +120 -143
  21. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +1 -2
  22. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +217 -237
  23. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +1 -1
  24. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +377 -432
  25. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +1 -1
  26. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +298 -342
  27. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +1 -2
  28. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +530 -524
  29. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +1 -1
  30. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +201 -214
  31. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +1 -1
  32. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1 -1
  33. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +394 -0
  34. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +6 -0
  35. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +457 -0
  36. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +6 -0
  37. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +396 -439
  38. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +1 -1
  39. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +104 -128
  40. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +1 -1
  41. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +242 -273
  42. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +1 -1
  43. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +406 -461
  44. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +1 -2
  45. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +405 -456
  46. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +1 -1
  47. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +108 -126
  48. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +1 -1
  49. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +849 -0
  50. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +6 -0
  51. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +373 -405
  52. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +1 -1
  53. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +288 -305
  54. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +1 -1
  55. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c +1651 -0
  56. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h +6 -0
  57. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c +546 -0
  58. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h +6 -0
  59. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c +1171 -0
  60. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h +6 -0
  61. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c +1436 -0
  62. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h +6 -0
  63. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +121 -141
  64. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +1 -1
  65. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +221 -241
  66. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +1 -1
  67. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +381 -431
  68. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +1 -1
  69. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +300 -345
  70. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +1 -1
  71. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +518 -511
  72. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +1 -1
  73. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +201 -209
  74. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +1 -1
  75. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c +3660 -0
  76. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h +6 -0
  77. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c +309 -0
  78. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h +6 -0
  79. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +306 -671
  80. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +1 -1
  81. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c +394 -0
  82. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h +6 -0
  83. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c +457 -0
  84. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h +6 -0
  85. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +400 -442
  86. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +1 -1
  87. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c +824 -0
  88. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h +6 -0
  89. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c +408 -0
  90. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h +6 -0
  91. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +105 -127
  92. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +1 -1
  93. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +245 -276
  94. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +1 -1
  95. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +409 -464
  96. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +1 -1
  97. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +376 -408
  98. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +1 -1
  99. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +272 -287
  100. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +1 -1
  101. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c +6530 -0
  102. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h +6 -0
  103. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +407 -458
  104. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +1 -1
  105. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +110 -125
  106. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +1 -1
  107. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c +1865 -0
  108. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h +6 -0
  109. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +698 -806
  110. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +1 -1
  111. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c +1220 -0
  112. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h +6 -0
  113. data/ext/isomorfeus_ferret_ext/stem_api.c +1 -9
  114. data/ext/isomorfeus_ferret_ext/stem_api.h +1 -3
  115. data/ext/isomorfeus_ferret_ext/stem_header.h +30 -26
  116. data/ext/isomorfeus_ferret_ext/stem_modules.h +113 -26
  117. data/ext/isomorfeus_ferret_ext/stem_modules.txt +18 -5
  118. data/ext/isomorfeus_ferret_ext/stem_utilities.c +167 -132
  119. data/ext/isomorfeus_ferret_ext/test.c +7 -1
  120. data/ext/isomorfeus_ferret_ext/test_search.c +0 -1
  121. data/lib/isomorfeus/ferret/version.rb +1 -1
  122. metadata +39 -4
  123. data/ext/isomorfeus_ferret_ext/q_parser.y +0 -1366
@@ -5,58 +5,66 @@
5
5
 
6
6
  #include "stem_header.h"
7
7
 
8
- #define unless(C) if(!(C))
9
-
10
8
  #define CREATE_SIZE 1
11
9
 
12
- symbol * create_s(void) {
10
+ extern symbol * create_s(void) {
13
11
  symbol * p;
14
12
  void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol));
15
13
  if (mem == NULL) return NULL;
16
14
  p = (symbol *) (HEAD + (char *) mem);
17
15
  CAPACITY(p) = CREATE_SIZE;
18
- SET_SIZE(p, CREATE_SIZE);
16
+ SET_SIZE(p, 0);
19
17
  return p;
20
18
  }
21
19
 
22
- void lose_s(symbol * p) {
20
+ extern void lose_s(symbol * p) {
23
21
  if (p == NULL) return;
24
22
  free((char *) p - HEAD);
25
23
  }
26
24
 
27
25
  /*
28
- new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c
29
- if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new
30
- position, or 0 on failure.
26
+ new_p = skip_utf8(p, c, l, n); skips n characters forwards from p + c.
27
+ new_p is the new position, or -1 on failure.
31
28
 
32
29
  -- used to implement hop and next in the utf8 case.
33
30
  */
34
31
 
35
- int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
32
+ extern int skip_utf8(const symbol * p, int c, int limit, int n) {
36
33
  int b;
37
- if (n >= 0) {
38
- for (; n > 0; n--) {
39
- if (c >= l) return -1;
40
- b = p[c++];
41
- if (b >= 0xC0) { /* 1100 0000 */
42
- while (c < l) {
43
- b = p[c];
44
- if (b >= 0xC0 || b < 0x80) break;
45
- /* break unless b is 10------ */
46
- c++;
47
- }
34
+ if (n < 0) return -1;
35
+ for (; n > 0; n--) {
36
+ if (c >= limit) return -1;
37
+ b = p[c++];
38
+ if (b >= 0xC0) { /* 1100 0000 */
39
+ while (c < limit) {
40
+ b = p[c];
41
+ if (b >= 0xC0 || b < 0x80) break;
42
+ /* break unless b is 10------ */
43
+ c++;
48
44
  }
49
45
  }
50
- } else {
51
- for (; n < 0; n++) {
52
- if (c <= lb) return -1;
53
- b = p[--c];
54
- if (b >= 0x80) { /* 1000 0000 */
55
- while (c > lb) {
56
- b = p[c];
57
- if (b >= 0xC0) break; /* 1100 0000 */
58
- c--;
59
- }
46
+ }
47
+ return c;
48
+ }
49
+
50
+ /*
51
+ new_p = skip_b_utf8(p, c, lb, n); skips n characters backwards from p + c - 1
52
+ new_p is the new position, or -1 on failure.
53
+
54
+ -- used to implement hop and next in the utf8 case.
55
+ */
56
+
57
+ extern int skip_b_utf8(const symbol * p, int c, int limit, int n) {
58
+ int b;
59
+ if (n < 0) return -1;
60
+ for (; n > 0; n--) {
61
+ if (c <= limit) return -1;
62
+ b = p[--c];
63
+ if (b >= 0x80) { /* 1000 0000 */
64
+ while (c > limit) {
65
+ b = p[c];
66
+ if (b >= 0xC0) break; /* 1100 0000 */
67
+ c--;
60
68
  }
61
69
  }
62
70
  }
@@ -66,156 +74,174 @@ int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
66
74
  /* Code for character groupings: utf8 cases */
67
75
 
68
76
  static int get_utf8(const symbol * p, int c, int l, int * slot) {
69
- int b0, b1;
77
+ int b0, b1, b2;
70
78
  if (c >= l) return 0;
71
79
  b0 = p[c++];
72
80
  if (b0 < 0xC0 || c == l) { /* 1100 0000 */
73
- * slot = b0; return 1;
81
+ *slot = b0;
82
+ return 1;
74
83
  }
75
- b1 = p[c++];
84
+ b1 = p[c++] & 0x3F;
76
85
  if (b0 < 0xE0 || c == l) { /* 1110 0000 */
77
- * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
86
+ *slot = (b0 & 0x1F) << 6 | b1;
87
+ return 2;
88
+ }
89
+ b2 = p[c++] & 0x3F;
90
+ if (b0 < 0xF0 || c == l) { /* 1111 0000 */
91
+ *slot = (b0 & 0xF) << 12 | b1 << 6 | b2;
92
+ return 3;
78
93
  }
79
- * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3;
94
+ *slot = (b0 & 0x7) << 18 | b1 << 12 | b2 << 6 | (p[c] & 0x3F);
95
+ return 4;
80
96
  }
81
97
 
82
98
  static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
83
- int b0, b1;
99
+ int a, b;
84
100
  if (c <= lb) return 0;
85
- b0 = p[--c];
86
- if (b0 < 0x80 || c == lb) { /* 1000 0000 */
87
- * slot = b0; return 1;
101
+ b = p[--c];
102
+ if (b < 0x80 || c == lb) { /* 1000 0000 */
103
+ *slot = b;
104
+ return 1;
88
105
  }
89
- b1 = p[--c];
90
- if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */
91
- * slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
106
+ a = b & 0x3F;
107
+ b = p[--c];
108
+ if (b >= 0xC0 || c == lb) { /* 1100 0000 */
109
+ *slot = (b & 0x1F) << 6 | a;
110
+ return 2;
92
111
  }
93
- * slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
112
+ a |= (b & 0x3F) << 6;
113
+ b = p[--c];
114
+ if (b >= 0xE0 || c == lb) { /* 1110 0000 */
115
+ *slot = (b & 0xF) << 12 | a;
116
+ return 3;
117
+ }
118
+ *slot = (p[--c] & 0x7) << 18 | (b & 0x3F) << 12 | a;
119
+ return 4;
94
120
  }
95
121
 
96
- int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
122
+ extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
97
123
  do {
98
- int ch;
99
- int w = get_utf8(z->p, z->c, z->l, & ch);
100
- unless (w) return -1;
101
- if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
102
- return w;
103
- z->c += w;
124
+ int ch;
125
+ int w = get_utf8(z->p, z->c, z->l, & ch);
126
+ if (!w) return -1;
127
+ if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
128
+ return w;
129
+ z->c += w;
104
130
  } while (repeat);
105
131
  return 0;
106
132
  }
107
133
 
108
- int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
134
+ extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
109
135
  do {
110
- int ch;
111
- int w = get_b_utf8(z->p, z->c, z->lb, & ch);
112
- unless (w) return -1;
113
- if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
114
- return w;
115
- z->c -= w;
136
+ int ch;
137
+ int w = get_b_utf8(z->p, z->c, z->lb, & ch);
138
+ if (!w) return -1;
139
+ if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
140
+ return w;
141
+ z->c -= w;
116
142
  } while (repeat);
117
143
  return 0;
118
144
  }
119
145
 
120
- int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
146
+ extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
121
147
  do {
122
- int ch;
123
- int w = get_utf8(z->p, z->c, z->l, & ch);
124
- unless (w) return -1;
125
- unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
126
- return w;
127
- z->c += w;
148
+ int ch;
149
+ int w = get_utf8(z->p, z->c, z->l, & ch);
150
+ if (!w) return -1;
151
+ if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
152
+ return w;
153
+ z->c += w;
128
154
  } while (repeat);
129
155
  return 0;
130
156
  }
131
157
 
132
- int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
158
+ extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
133
159
  do {
134
- int ch;
135
- int w = get_b_utf8(z->p, z->c, z->lb, & ch);
136
- unless (w) return -1;
137
- unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
138
- return w;
139
- z->c -= w;
160
+ int ch;
161
+ int w = get_b_utf8(z->p, z->c, z->lb, & ch);
162
+ if (!w) return -1;
163
+ if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
164
+ return w;
165
+ z->c -= w;
140
166
  } while (repeat);
141
167
  return 0;
142
168
  }
143
169
 
144
170
  /* Code for character groupings: non-utf8 cases */
145
171
 
146
- int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
172
+ extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
147
173
  do {
148
- int ch;
149
- if (z->c >= z->l) return -1;
150
- ch = z->p[z->c];
151
- if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
152
- return 1;
153
- z->c++;
174
+ int ch;
175
+ if (z->c >= z->l) return -1;
176
+ ch = z->p[z->c];
177
+ if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
178
+ return 1;
179
+ z->c++;
154
180
  } while (repeat);
155
181
  return 0;
156
182
  }
157
183
 
158
- int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
184
+ extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
159
185
  do {
160
- int ch;
161
- if (z->c <= z->lb) return -1;
162
- ch = z->p[z->c - 1];
163
- if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
164
- return 1;
165
- z->c--;
186
+ int ch;
187
+ if (z->c <= z->lb) return -1;
188
+ ch = z->p[z->c - 1];
189
+ if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
190
+ return 1;
191
+ z->c--;
166
192
  } while (repeat);
167
193
  return 0;
168
194
  }
169
195
 
170
- int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
196
+ extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
171
197
  do {
172
- int ch;
173
- if (z->c >= z->l) return -1;
174
- ch = z->p[z->c];
175
- unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
176
- return 1;
177
- z->c++;
198
+ int ch;
199
+ if (z->c >= z->l) return -1;
200
+ ch = z->p[z->c];
201
+ if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
202
+ return 1;
203
+ z->c++;
178
204
  } while (repeat);
179
205
  return 0;
180
206
  }
181
207
 
182
- int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
208
+ extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
183
209
  do {
184
- int ch;
185
- if (z->c <= z->lb) return -1;
186
- ch = z->p[z->c - 1];
187
- unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
188
- return 1;
189
- z->c--;
210
+ int ch;
211
+ if (z->c <= z->lb) return -1;
212
+ ch = z->p[z->c - 1];
213
+ if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
214
+ return 1;
215
+ z->c--;
190
216
  } while (repeat);
191
217
  return 0;
192
218
  }
193
219
 
194
- int eq_s(struct SN_env * z, int s_size, const symbol * s) {
220
+ extern int eq_s(struct SN_env * z, int s_size, const symbol * s) {
195
221
  if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
196
222
  z->c += s_size; return 1;
197
223
  }
198
224
 
199
- int eq_s_b(struct SN_env * z, int s_size, const symbol * s) {
225
+ extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) {
200
226
  if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
201
227
  z->c -= s_size; return 1;
202
228
  }
203
229
 
204
- int eq_v(struct SN_env * z, const symbol * p) {
230
+ extern int eq_v(struct SN_env * z, const symbol * p) {
205
231
  return eq_s(z, SIZE(p), p);
206
232
  }
207
233
 
208
- int eq_v_b(struct SN_env * z, const symbol * p) {
234
+ extern int eq_v_b(struct SN_env * z, const symbol * p) {
209
235
  return eq_s_b(z, SIZE(p), p);
210
236
  }
211
237
 
212
- int find_among(struct SN_env * z, const struct among * v, int v_size) {
238
+ extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
213
239
 
214
240
  int i = 0;
215
241
  int j = v_size;
216
242
 
217
243
  int c = z->c; int l = z->l;
218
- symbol * q = z->p + c;
244
+ const symbol * q = z->p + c;
219
245
 
220
246
  const struct among * w;
221
247
 
@@ -224,7 +250,7 @@ int find_among(struct SN_env * z, const struct among * v, int v_size) {
224
250
 
225
251
  int first_key_inspected = 0;
226
252
 
227
- while(1) {
253
+ while (1) {
228
254
  int k = i + ((j - i) >> 1);
229
255
  int diff = 0;
230
256
  int common = common_i < common_j ? common_i : common_j; /* smaller */
@@ -237,8 +263,13 @@ int find_among(struct SN_env * z, const struct among * v, int v_size) {
237
263
  common++;
238
264
  }
239
265
  }
240
- if (diff < 0) { j = k; common_j = common; }
241
- else { i = k; common_i = common; }
266
+ if (diff < 0) {
267
+ j = k;
268
+ common_j = common;
269
+ } else {
270
+ i = k;
271
+ common_i = common;
272
+ }
242
273
  if (j - i <= 1) {
243
274
  if (i > 0) break; /* v->s has been inspected */
244
275
  if (j == i) break; /* only one item in v */
@@ -251,7 +282,7 @@ int find_among(struct SN_env * z, const struct among * v, int v_size) {
251
282
  first_key_inspected = 1;
252
283
  }
253
284
  }
254
- while(1) {
285
+ while (1) {
255
286
  w = v + i;
256
287
  if (common_i >= w->s_size) {
257
288
  z->c = c + w->s_size;
@@ -269,13 +300,13 @@ int find_among(struct SN_env * z, const struct among * v, int v_size) {
269
300
 
270
301
  /* find_among_b is for backwards processing. Same comments apply */
271
302
 
272
- int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
303
+ extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
273
304
 
274
305
  int i = 0;
275
306
  int j = v_size;
276
307
 
277
308
  int c = z->c; int lb = z->lb;
278
- symbol * q = z->p + c - 1;
309
+ const symbol * q = z->p + c - 1;
279
310
 
280
311
  const struct among * w;
281
312
 
@@ -284,7 +315,7 @@ int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
284
315
 
285
316
  int first_key_inspected = 0;
286
317
 
287
- while(1) {
318
+ while (1) {
288
319
  int k = i + ((j - i) >> 1);
289
320
  int diff = 0;
290
321
  int common = common_i < common_j ? common_i : common_j;
@@ -306,7 +337,7 @@ int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
306
337
  first_key_inspected = 1;
307
338
  }
308
339
  }
309
- while(1) {
340
+ while (1) {
310
341
  w = v + i;
311
342
  if (common_i >= w->s_size) {
312
343
  z->c = c - w->s_size;
@@ -345,7 +376,7 @@ static symbol * increase_size(symbol * p, int n) {
345
376
  Returns 0 on success, -1 on error.
346
377
  Also, frees z->p (and sets it to NULL) on error.
347
378
  */
348
- int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr)
379
+ extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr)
349
380
  {
350
381
  int adjustment;
351
382
  int len;
@@ -367,11 +398,10 @@ int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol
367
398
  z->l += adjustment;
368
399
  if (z->c >= c_ket)
369
400
  z->c += adjustment;
370
- else
371
- if (z->c > c_bra)
372
- z->c = c_bra;
401
+ else if (z->c > c_bra)
402
+ z->c = c_bra;
373
403
  }
374
- unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
404
+ if (s_size) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
375
405
  if (adjptr != NULL)
376
406
  *adjptr = adjustment;
377
407
  return 0;
@@ -394,20 +424,20 @@ static int slice_check(struct SN_env * z) {
394
424
  return 0;
395
425
  }
396
426
 
397
- int slice_from_s(struct SN_env * z, int s_size, const symbol * s) {
427
+ extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) {
398
428
  if (slice_check(z)) return -1;
399
429
  return replace_s(z, z->bra, z->ket, s_size, s, NULL);
400
430
  }
401
431
 
402
- int slice_from_v(struct SN_env * z, const symbol * p) {
432
+ extern int slice_from_v(struct SN_env * z, const symbol * p) {
403
433
  return slice_from_s(z, SIZE(p), p);
404
434
  }
405
435
 
406
- int slice_del(struct SN_env * z) {
436
+ extern int slice_del(struct SN_env * z) {
407
437
  return slice_from_s(z, 0, 0);
408
438
  }
409
439
 
410
- int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) {
440
+ extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) {
411
441
  int adjustment;
412
442
  if (replace_s(z, bra, ket, s_size, s, &adjustment))
413
443
  return -1;
@@ -416,16 +446,11 @@ int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s)
416
446
  return 0;
417
447
  }
418
448
 
419
- int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) {
420
- int adjustment;
421
- if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
422
- return -1;
423
- if (bra <= z->bra) z->bra += adjustment;
424
- if (bra <= z->ket) z->ket += adjustment;
425
- return 0;
449
+ extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) {
450
+ return insert_s(z, bra, ket, SIZE(p), p);
426
451
  }
427
452
 
428
- symbol * slice_to(struct SN_env * z, symbol * p) {
453
+ extern symbol * slice_to(struct SN_env * z, symbol * p) {
429
454
  if (slice_check(z)) {
430
455
  lose_s(p);
431
456
  return NULL;
@@ -443,7 +468,7 @@ symbol * slice_to(struct SN_env * z, symbol * p) {
443
468
  return p;
444
469
  }
445
470
 
446
- symbol * assign_to(struct SN_env * z, symbol * p) {
471
+ extern symbol * assign_to(struct SN_env * z, symbol * p) {
447
472
  int len = z->l;
448
473
  if (CAPACITY(p) < len) {
449
474
  p = increase_size(p, len);
@@ -455,8 +480,18 @@ symbol * assign_to(struct SN_env * z, symbol * p) {
455
480
  return p;
456
481
  }
457
482
 
483
+ extern int len_utf8(const symbol * p) {
484
+ int size = SIZE(p);
485
+ int len = 0;
486
+ while (size--) {
487
+ symbol b = *p++;
488
+ if (b >= 0xC0 || b < 0x80) ++len;
489
+ }
490
+ return len;
491
+ }
492
+
458
493
  #if 0
459
- void debug(struct SN_env * z, int number, int line_count) {
494
+ extern void debug(struct SN_env * z, int number, int line_count) {
460
495
  int i;
461
496
  int limit = SIZE(z->p);
462
497
  /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
@@ -774,6 +774,12 @@ static VALUE frb_ts_term_vectors(VALUE v) { return INT2FIX(execute_test(33)); }
774
774
  static VALUE frb_ts_test(VALUE v) { return INT2FIX(execute_test(34)); }
775
775
  static VALUE frb_ts_threading(VALUE v) { return INT2FIX(execute_test(35)); }
776
776
 
777
+ static VALUE frb_ts_posh(VALUE v) {
778
+ const char *posh = POSH_GetArchString();
779
+ printf("\n%s\n", posh);
780
+ return Qnil;
781
+ }
782
+
777
783
  static VALUE frb_ts_run_all(VALUE v) {
778
784
  int i, test_count;
779
785
  int rv = 0;
@@ -845,6 +851,6 @@ void Init_Test(void) {
845
851
  rb_define_singleton_method(mTest, "term_vectors", frb_ts_term_vectors, 0);
846
852
  rb_define_singleton_method(mTest, "test", frb_ts_test, 0);
847
853
  rb_define_singleton_method(mTest, "threading", frb_ts_threading, 0);
848
-
854
+ rb_define_singleton_method(mTest, "posh", frb_ts_posh, 0);
849
855
  rb_define_singleton_method(mTest, "run_all", frb_ts_run_all, 0);
850
856
  }
@@ -556,7 +556,6 @@ static void test_phrase_query(TestCase *tc, void *data)
556
556
  check_to_s(tc, phq, field, "\"\"");
557
557
  check_to_s(tc, phq, (FrtSymbol)NULL, "field:\"\"");
558
558
 
559
-
560
559
  frt_phq_add_term(phq, "quick", 1);
561
560
  frt_phq_add_term(phq, "brown", 1);
562
561
  frt_phq_add_term(phq, "fox", 1);
@@ -1,5 +1,5 @@
1
1
  module Isomorfeus
2
2
  module Ferret
3
- VERSION = '0.12.4'
3
+ VERSION = '0.12.5'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isomorfeus-ferret
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.4
4
+ version: 0.12.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Biedermann
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-02-08 00:00:00.000000000 Z
11
+ date: 2022-02-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -152,12 +152,15 @@ files:
152
152
  - ext/isomorfeus_ferret_ext/isomorfeus_ferret.h
153
153
  - ext/isomorfeus_ferret_ext/libstemmer.c
154
154
  - ext/isomorfeus_ferret_ext/libstemmer.h
155
- - ext/isomorfeus_ferret_ext/q_parser.y
156
155
  - ext/isomorfeus_ferret_ext/scanner.h
157
156
  - ext/isomorfeus_ferret_ext/scanner.in
158
157
  - ext/isomorfeus_ferret_ext/scanner.rl
159
158
  - ext/isomorfeus_ferret_ext/scanner_mb.rl
160
159
  - ext/isomorfeus_ferret_ext/scanner_utf8.rl
160
+ - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c
161
+ - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h
162
+ - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c
163
+ - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h
161
164
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c
162
165
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h
163
166
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c
@@ -172,6 +175,10 @@ files:
172
175
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h
173
176
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c
174
177
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h
178
+ - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c
179
+ - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h
180
+ - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c
181
+ - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h
175
182
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c
176
183
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h
177
184
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c
@@ -184,10 +191,20 @@ files:
184
191
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h
185
192
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c
186
193
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h
194
+ - ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c
195
+ - ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h
187
196
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c
188
197
  - ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h
189
198
  - ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c
190
199
  - ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h
200
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c
201
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h
202
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c
203
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h
204
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c
205
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h
206
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c
207
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h
191
208
  - ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c
192
209
  - ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h
193
210
  - ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c
@@ -200,10 +217,22 @@ files:
200
217
  - ext/isomorfeus_ferret_ext/stem_UTF_8_french.h
201
218
  - ext/isomorfeus_ferret_ext/stem_UTF_8_german.c
202
219
  - ext/isomorfeus_ferret_ext/stem_UTF_8_german.h
220
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c
221
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h
222
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c
223
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h
203
224
  - ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c
204
225
  - ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h
226
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c
227
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h
228
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c
229
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h
205
230
  - ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c
206
231
  - ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h
232
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c
233
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h
234
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c
235
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h
207
236
  - ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c
208
237
  - ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h
209
238
  - ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c
@@ -214,12 +243,18 @@ files:
214
243
  - ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h
215
244
  - ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c
216
245
  - ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h
246
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c
247
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h
217
248
  - ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c
218
249
  - ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h
219
250
  - ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c
220
251
  - ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h
252
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c
253
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h
221
254
  - ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c
222
255
  - ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h
256
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c
257
+ - ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h
223
258
  - ext/isomorfeus_ferret_ext/stem_api.c
224
259
  - ext/isomorfeus_ferret_ext/stem_api.h
225
260
  - ext/isomorfeus_ferret_ext/stem_header.h
@@ -280,7 +315,7 @@ files:
280
315
  - lib/isomorfeus/ferret/monitor.rb
281
316
  - lib/isomorfeus/ferret/stdlib_patches.rb
282
317
  - lib/isomorfeus/ferret/version.rb
283
- homepage: http://isomorfeus.com
318
+ homepage: https://isomorfeus.com
284
319
  licenses:
285
320
  - MIT
286
321
  metadata: