isomorfeus-ferret 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +612 -0
  3. data/README.md +44 -0
  4. data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
  5. data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
  6. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
  7. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
  8. data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
  9. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
  10. data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
  11. data/ext/isomorfeus_ferret_ext/email.rl +21 -0
  12. data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
  13. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
  14. data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
  15. data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
  16. data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
  17. data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
  18. data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
  19. data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
  20. data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
  21. data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
  22. data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
  23. data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
  24. data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
  25. data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
  26. data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
  27. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
  28. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
  29. data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
  30. data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
  31. data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
  32. data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
  33. data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
  34. data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
  42. data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
  43. data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
  44. data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
  45. data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
  46. data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
  47. data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
  48. data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
  50. data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
  51. data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
  52. data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
  53. data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
  54. data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
  55. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
  56. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
  57. data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
  58. data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
  59. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
  60. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
  61. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
  62. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
  63. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
  64. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
  65. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
  66. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
  67. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
  68. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
  69. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
  70. data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
  71. data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
  72. data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
  73. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
  74. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
  75. data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
  76. data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
  77. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
  78. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
  79. data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
  80. data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
  81. data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
  82. data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
  83. data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
  84. data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
  85. data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
  86. data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
  87. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
  88. data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
  89. data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
  90. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
  91. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
  92. data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
  93. data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
  94. data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
  95. data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
  96. data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
  97. data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
  98. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
  99. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
  100. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
  101. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
  102. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
  103. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
  104. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
  105. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
  106. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
  129. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
  130. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
  131. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
  132. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
  133. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
  134. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
  135. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
  136. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
  137. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
  138. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
  139. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
  140. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
  141. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
  143. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
  144. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
  145. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
  162. data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
  163. data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
  164. data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
  165. data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
  166. data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
  167. data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
  168. data/ext/isomorfeus_ferret_ext/test.c +850 -0
  169. data/ext/isomorfeus_ferret_ext/test.h +416 -0
  170. data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
  171. data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
  172. data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
  173. data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
  174. data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
  175. data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
  176. data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
  177. data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
  178. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
  179. data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
  180. data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
  181. data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
  182. data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
  183. data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
  184. data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
  185. data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
  186. data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
  187. data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
  188. data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
  189. data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
  190. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
  191. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
  192. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
  193. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
  194. data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
  195. data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
  196. data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
  197. data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
  198. data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
  199. data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
  200. data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
  201. data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
  202. data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
  203. data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
  204. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
  205. data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
  206. data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
  207. data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
  208. data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
  209. data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
  210. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
  211. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
  212. data/ext/isomorfeus_ferret_ext/url.rl +27 -0
  213. data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
  214. data/lib/isomorfeus/ferret/document.rb +132 -0
  215. data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
  216. data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
  217. data/lib/isomorfeus/ferret/index/index.rb +970 -0
  218. data/lib/isomorfeus/ferret/monitor.rb +323 -0
  219. data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
  220. data/lib/isomorfeus/ferret/version.rb +5 -0
  221. data/lib/isomorfeus-ferret.rb +8 -0
  222. metadata +307 -0
@@ -0,0 +1,72 @@
1
+ #include <string.h>
2
+ #include "frt_global.h"
3
+ #include "frt_index.h"
4
+ #include "frt_array.h"
5
+ #include "frt_helper.h"
6
+
7
+ /****************************************************************************
8
+ *
9
+ * TermVector
10
+ *
11
+ ****************************************************************************/
12
+
13
+ void frt_tv_destroy(FrtTermVector *tv)
14
+ {
15
+ int i = tv->term_cnt;
16
+ while (i > 0) {
17
+ i--;
18
+ free(tv->terms[i].text);
19
+ free(tv->terms[i].positions);
20
+ }
21
+ free(tv->offsets);
22
+ free(tv->terms);
23
+ free(tv);
24
+ }
25
+
26
+ int frt_tv_scan_to_term_index(FrtTermVector *tv, const char *term)
27
+ {
28
+ int lo = 0; /* search starts array */
29
+ int hi = tv->term_cnt - 1; /* for 1st element < n, return its index */
30
+ int mid;
31
+ int cmp;
32
+ char *mid_term;
33
+
34
+ while (hi >= lo) {
35
+ mid = (lo + hi) >> 1;
36
+ mid_term = tv->terms[mid].text;
37
+ cmp = strcmp(term, mid_term);
38
+ if (cmp < 0) {
39
+ hi = mid - 1;
40
+ }
41
+ else if (cmp > 0) {
42
+ lo = mid + 1;
43
+ }
44
+ else { /* found a match */
45
+ return mid;
46
+ }
47
+ }
48
+ return lo;
49
+ }
50
+
51
+ int frt_tv_get_term_index(FrtTermVector *tv, const char *term)
52
+ {
53
+ int index = frt_tv_scan_to_term_index(tv, term);
54
+ if (index < tv->term_cnt && (0 == strcmp(term, tv->terms[index].text))) {
55
+ /* found term */
56
+ return index;
57
+ }
58
+ else {
59
+ return -1;
60
+ }
61
+ }
62
+
63
+ FrtTVTerm *frt_tv_get_tv_term(FrtTermVector *tv, const char *term)
64
+ {
65
+ int index = frt_tv_get_term_index(tv, term);
66
+ if (index >= 0) {
67
+ return &(tv->terms[index]);
68
+ }
69
+ else {
70
+ return NULL;
71
+ }
72
+ }
@@ -0,0 +1,23 @@
1
+ #ifndef FRT_THREADING_H
2
+ #define FRT_THREADING_H
3
+
4
+ #include <pthread.h>
5
+
6
+ typedef pthread_mutex_t frt_mutex_t;
7
+ typedef pthread_key_t frt_thread_key_t;
8
+ typedef pthread_once_t frt_thread_once_t;
9
+ #define FRT_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
10
+ #define FRT_THREAD_ONCE_INIT PTHREAD_ONCE_INIT
11
+ #define frt_mutex_init(a, b) pthread_mutex_init(a, b)
12
+ #define frt_mutex_lock(a) pthread_mutex_lock(a)
13
+ #define frt_mutex_trylock(a) pthread_mutex_trylock(a)
14
+ #define frt_mutex_unlock(a) pthread_mutex_unlock(a)
15
+ #define frt_mutex_destroy(a) pthread_mutex_destroy(a)
16
+ #define frt_thread_key_create(a, b) pthread_key_create(a, b)
17
+ #define frt_thread_key_delete(a) pthread_key_delete(a)
18
+ #define frt_thread_setspecific(a, b) pthread_setspecific(a, b)
19
+ #define frt_thread_getspecific(a) pthread_getspecific(a)
20
+ #define frt_thread_exit(a) pthread_exit(a)
21
+ #define frt_thread_once(a, b) pthread_once(a, b)
22
+
23
+ #endif
@@ -0,0 +1,54 @@
1
+ #ifndef FRT_WIN32_H
2
+ #define FRT_WIN32_H
3
+
4
+ #include "frt_global.h"
5
+ #include <io.h>
6
+
7
+ struct dirent
8
+ {
9
+ char *d_name;
10
+ };
11
+
12
+ typedef struct DIR
13
+ {
14
+ struct _finddata_t find_data;
15
+ struct dirent de;
16
+ long handle;
17
+ } DIR;
18
+
19
+ DIR *opendir(const char *dirname)
20
+ {
21
+ DIR *d = FRT_ALLOC_AND_ZERO(DIR);
22
+ char dirname_buf[FRT_MAX_FILE_PATH];
23
+ long ff_res;
24
+ sprintf(dirname_buf, "%s\\*", dirname);
25
+ ff_res = _findfirst(dirname_buf, &d->find_data);
26
+ if (ff_res < 0) {
27
+ free(d);
28
+ d = NULL;
29
+ } else {
30
+ d->de.d_name = NULL;
31
+ d->handle = ff_res;
32
+ }
33
+ return d;
34
+ }
35
+
36
+ struct dirent *readdir(DIR *d)
37
+ {
38
+ /* _findfirst already returned so do _findnext */
39
+ if (d->de.d_name != NULL) {
40
+ if (_findnext(d->handle, &d->find_data) < 0) {
41
+ return NULL;
42
+ }
43
+ }
44
+ d->de.d_name = d->find_data.name;
45
+ return &d->de;
46
+ }
47
+
48
+ void closedir(DIR *d)
49
+ {
50
+ _findclose(d->handle);
51
+ free(d);
52
+ }
53
+
54
+ #endif
@@ -0,0 +1,409 @@
1
+ #include <errno.h>
2
+ #include "isomorfeus_ferret.h"
3
+ #include "frt_global.h"
4
+ #include "frt_except.h"
5
+ #include "frt_hash.h"
6
+ #include "frt_hashset.h"
7
+ #include "frb_threading.h"
8
+ #include "frb_lang.h"
9
+
10
+
11
+ /* Object Map */
12
+ static FrtHash *object_map;
13
+
14
+ /* IDs */
15
+ ID id_new;
16
+ ID id_call;
17
+ ID id_eql;
18
+ ID id_hash;
19
+ ID id_capacity;
20
+ ID id_less_than;
21
+ ID id_lt;
22
+ ID id_is_directory;
23
+ ID id_close;
24
+ ID id_cclass;
25
+ ID id_data;
26
+
27
+ static ID id_mkdir_p;
28
+
29
+ /* Symbols */
30
+ VALUE sym_yes;
31
+ VALUE sym_no;
32
+ VALUE sym_true;
33
+ VALUE sym_false;
34
+ VALUE sym_path;
35
+ VALUE sym_dir;
36
+
37
+ /* Modules */
38
+ VALUE mIsomorfeus;
39
+ VALUE mFerret;
40
+ VALUE mStore;
41
+ VALUE mStringHelper;
42
+ VALUE mSpans;
43
+
44
+ /* Classes */
45
+ VALUE cTerm;
46
+ VALUE cFileNotFoundError;
47
+ VALUE cParseError;
48
+ VALUE cStateError;
49
+
50
+ void Init_Benchmark(void);
51
+ void Init_Test(void);
52
+
53
+ unsigned long long value_hash(const void *key)
54
+ {
55
+ return (unsigned long long)key;
56
+ }
57
+
58
+ int value_eq(const void *key1, const void *key2)
59
+ {
60
+ return key1 == key2;
61
+ }
62
+
63
+ VALUE object_get(void *key)
64
+ {
65
+ VALUE val = (VALUE)frt_h_get(object_map, key);
66
+ if (!val) val = Qnil;
67
+ return val;
68
+ }
69
+
70
+ //static int hash_cnt = 0;
71
+ void
72
+ //object_add(void *key, VALUE obj)
73
+ object_add2(void *key, VALUE obj, const char *file, int line)
74
+ {
75
+ if (frt_h_get(object_map, key))
76
+ printf("failed adding %lx to %lld; already contains %llx. %s:%d\n",
77
+ (long)obj, (long long)key, (long long)frt_h_get(object_map, key), file, line);
78
+ frt_h_set(object_map, key, (void *)obj);
79
+ }
80
+
81
+ void
82
+ //object_set(void *key, VALUE obj)
83
+ object_set2(void *key, VALUE obj, const char *file, int line)
84
+ {
85
+ frt_h_set(object_map, key, (void *)obj);
86
+ }
87
+
88
+ void
89
+ //object_del(void *key)
90
+ object_del2(void *key, const char *file, int line)
91
+ {
92
+ if (object_get(key) == Qnil)
93
+ printf("failed deleting %lld. %s:%d\n", (long long)key, file, line);
94
+ frt_h_del(object_map, key);
95
+ }
96
+
97
+ void frb_gc_mark(void *key)
98
+ {
99
+ VALUE val = (VALUE)frt_h_get(object_map, key);
100
+ if (val)
101
+ rb_gc_mark(val);
102
+ }
103
+
104
+ VALUE frb_data_alloc(VALUE klass)
105
+ {
106
+ return Frt_Make_Struct(klass);
107
+ }
108
+
109
+ void frb_deref_free(void *p)
110
+ {
111
+ object_del(p);
112
+ }
113
+
114
+ void frb_thread_once(int *once_control, void (*init_routine) (void))
115
+ {
116
+ if (*once_control) {
117
+ init_routine();
118
+ *once_control = 0;
119
+ }
120
+ }
121
+
122
+ void frb_thread_key_create(frt_thread_key_t *key, void (*destr_function)(void *))
123
+ {
124
+ *key = frt_h_new(&value_hash, &value_eq, NULL, destr_function);
125
+ }
126
+
127
+ void frb_thread_key_delete(frt_thread_key_t key)
128
+ {
129
+ frt_h_destroy(key);
130
+ }
131
+
132
+ void frb_thread_setspecific(frt_thread_key_t key, const void *pointer)
133
+ {
134
+ frt_h_set(key, (void *)rb_thread_current(), (void *)pointer);
135
+ }
136
+
137
+ void *frb_thread_getspecific(frt_thread_key_t key)
138
+ {
139
+ return frt_h_get(key, (void *)rb_thread_current());
140
+ }
141
+
142
+ void frb_create_dir(VALUE rpath)
143
+ {
144
+ VALUE mFileUtils;
145
+ mFileUtils = rb_define_module("FileUtils");
146
+ rb_funcall(mFileUtils, id_mkdir_p, 1, rpath);
147
+ }
148
+
149
+ VALUE frb_hs_to_rb_ary(FrtHashSet *hs)
150
+ {
151
+ FrtHashSetEntry *hse;
152
+ VALUE ary = rb_ary_new();
153
+
154
+ for (hse = hs->first; hse; hse = hse->next) {
155
+ rb_ary_push(ary, rb_str_new2(hse->elem));
156
+ }
157
+ return ary;
158
+ }
159
+
160
+ void *frb_rb_data_ptr(VALUE val)
161
+ {
162
+ Check_Type(val, T_DATA);
163
+ return DATA_PTR(val);
164
+ }
165
+
166
+ char *
167
+ rs2s(VALUE rstr)
168
+ {
169
+ return (char *)(RSTRING_PTR(rstr) ? RSTRING_PTR(rstr) : FRT_EMPTY_STRING);
170
+ }
171
+
172
+ char *
173
+ rstrdup(VALUE rstr)
174
+ {
175
+ char *old = rs2s(rstr);
176
+ //int len = RSTRING_LEN(rstr);
177
+ //char *new = FRT_ALLOC_N(char, len + 1);
178
+ //memcpy(new, old, len);
179
+ return frt_estrdup(old);
180
+ }
181
+
182
+ FrtSymbol
183
+ frb_field(VALUE rfield)
184
+ {
185
+ switch (TYPE(rfield)) {
186
+ case T_SYMBOL:
187
+ return rb_to_id(rfield);
188
+ case T_STRING:
189
+ return rb_intern_str(rfield);
190
+ default:
191
+ rb_raise(rb_eArgError, "field name must be a symbol or string");
192
+ return (ID)NULL;
193
+ }
194
+ }
195
+
196
+ /*
197
+ * Json Exportation - Loading each LazyDoc and formatting them into json
198
+ * This code is designed to get a VERY FAST json string, the goal was speed,
199
+ * not sexiness.
200
+ * Jeremie 'ahFeel' BORDIER
201
+ * ahFeel@rift.Fr
202
+ */
203
+ char *
204
+ json_concat_string(char *s, char *field)
205
+ {
206
+ *(s++) = '"';
207
+ while (*field) {
208
+ if (*field == '"') {
209
+ *(s++) = '\'';
210
+ *(s++) = *(field++);
211
+ *(s++) = '\'';
212
+ }
213
+ else {
214
+ *(s++) = *(field++);
215
+ }
216
+ }
217
+ *(s++) = '"';
218
+ return s;
219
+ }
220
+
221
+ static VALUE error_map;
222
+
223
+ VALUE frb_get_error(const char *err_type)
224
+ {
225
+ VALUE error_class;
226
+ if (Qnil != (error_class = rb_hash_aref(error_map, rb_intern(err_type)))) {
227
+ return error_class;
228
+ }
229
+ return rb_eStandardError;
230
+ }
231
+
232
+ #define FRT_BUF_SIZ 2046
233
+ #ifdef FRT_HAS_VARARGS
234
+ void vfrt_rb_raise(const char *file, int line_num, const char *func,
235
+ const char *err_type, const char *fmt, va_list args)
236
+ #else
237
+ void V_FRT_EXIT(const char *err_type, const char *fmt, va_list args)
238
+ #endif
239
+ {
240
+ char buf[FRT_BUF_SIZ];
241
+ size_t so_far = 0;
242
+ #ifdef FRT_HAS_VARARGS
243
+ snprintf(buf, FRT_BUF_SIZ, "%s occurred at <%s>:%d in %s\n",
244
+ err_type, file, line_num, func);
245
+ #else
246
+ snprintf(buf, FRT_BUF_SIZ, "%s occurred:\n", err_type);
247
+ #endif
248
+ so_far = strlen(buf);
249
+ vsnprintf(buf + so_far, FRT_BUF_SIZ - so_far, fmt, args);
250
+
251
+ so_far = strlen(buf);
252
+ if (fmt[0] != '\0' && fmt[strlen(fmt) - 1] == ':') {
253
+ snprintf(buf + so_far, FRT_BUF_SIZ - so_far, " %s", strerror(errno));
254
+ so_far = strlen(buf);
255
+ }
256
+
257
+ snprintf(buf + so_far, FRT_BUF_SIZ - so_far, "\n");
258
+ rb_raise(frb_get_error(err_type), "%s", buf);
259
+ }
260
+
261
+ #ifdef FRT_HAS_VARARGS
262
+ void frb_rb_raise(const char *file, int line_num, const char *func,
263
+ const char *err_type, const char *fmt, ...)
264
+ #else
265
+ void FRT_EXIT(const char *err_type, const char *fmt, ...)
266
+ #endif
267
+ {
268
+ va_list args;
269
+ va_start(args, fmt);
270
+ #ifdef FRT_HAS_VARARGS
271
+ vfrt_rb_raise(file, line_num, func, err_type, fmt, args);
272
+ #else
273
+ V_FRT_EXIT(err_type, fmt, args);
274
+ #endif
275
+ va_end(args);
276
+ }
277
+
278
+ /****************************************************************************
279
+ *
280
+ * Term Methods
281
+ *
282
+ ****************************************************************************/
283
+ static ID id_field;
284
+ static ID id_text;
285
+
286
+ VALUE frb_get_term(FrtSymbol field, const char *text)
287
+ {
288
+ return rb_struct_new(cTerm,
289
+ ID2SYM(field),
290
+ rb_str_new_cstr(text),
291
+ NULL);
292
+ }
293
+
294
+ static VALUE frb_term_to_s(VALUE self)
295
+ {
296
+ VALUE rstr;
297
+ VALUE rfield = rb_funcall(self, id_field, 0);
298
+ VALUE rtext = rb_funcall(self, id_text, 0);
299
+ char *field = StringValuePtr(rfield);
300
+ char *text = StringValuePtr(rtext);
301
+ char *term_str = FRT_ALLOC_N(char, 5 + RSTRING_LEN(rfield) + RSTRING_LEN(rtext));
302
+ sprintf(term_str, "%s:%s", field, text);
303
+ rstr = rb_str_new2(term_str);
304
+ free(term_str);
305
+ return rstr;
306
+ }
307
+ /*
308
+ * Document-class: Ferret::Term
309
+ *
310
+ * == Summary
311
+ *
312
+ * A Term holds a term from a document and its field name (as a Symbol).
313
+ */
314
+ void Init_Term(void)
315
+ {
316
+ const char *term_class = "Term";
317
+ cTerm = rb_struct_define(term_class, "field", "text", NULL);
318
+ rb_set_class_path(cTerm, mFerret, term_class);
319
+ rb_const_set(mFerret, rb_intern(term_class), cTerm);
320
+ rb_define_method(cTerm, "to_s", frb_term_to_s, 0);
321
+ id_field = rb_intern("field");
322
+ id_text = rb_intern("text");
323
+ }
324
+
325
+ /*
326
+ * Document-module: Ferret
327
+ *
328
+ * See the README
329
+ */
330
+ void Init_Ferret(void)
331
+ {
332
+ Init_Term();
333
+ rb_require("fileutils");
334
+ }
335
+
336
+ void Init_isomorfeus_ferret_ext(void)
337
+ {
338
+ const char *const progname[] = {"ruby"};
339
+
340
+ frt_init(1, progname);
341
+
342
+ /* initialize object map */
343
+ object_map = frt_h_new(&value_hash, &value_eq, NULL, NULL);
344
+
345
+ /* IDs */
346
+ id_new = rb_intern("new");
347
+ id_call = rb_intern("call");
348
+ id_eql = rb_intern("eql?");
349
+ id_hash = rb_intern("hash");
350
+
351
+ id_capacity = rb_intern("capacity");
352
+ id_less_than = rb_intern("less_than");
353
+ id_lt = rb_intern("<");
354
+
355
+ id_mkdir_p = rb_intern("mkdir_p");
356
+ id_is_directory = rb_intern("directory?");
357
+ id_close = rb_intern("close");
358
+
359
+ id_cclass = rb_intern("cclass");
360
+
361
+ id_data = rb_intern("@data");
362
+
363
+ /* Symbols */
364
+ sym_yes = ID2SYM(rb_intern("yes"));;
365
+ sym_no = ID2SYM(rb_intern("no"));;
366
+ sym_true = ID2SYM(rb_intern("true"));;
367
+ sym_false = ID2SYM(rb_intern("false"));;
368
+ sym_path = ID2SYM(rb_intern("path"));;
369
+ sym_dir = ID2SYM(rb_intern("dir"));;
370
+
371
+ mIsomorfeus = rb_define_module("Isomorfeus");
372
+ mFerret = rb_define_module_under(mIsomorfeus, "Ferret");
373
+
374
+ /* Inits */
375
+ Init_Ferret();
376
+ Init_Utils();
377
+ Init_Analysis();
378
+ Init_Store();
379
+ Init_Index();
380
+ Init_Search();
381
+ Init_QueryParser();
382
+ Init_Test();
383
+ Init_Benchmark();
384
+
385
+ /* Error Classes */
386
+ cParseError = rb_define_class_under(mFerret, "ParseError", rb_eStandardError);
387
+ cStateError = rb_define_class_under(mFerret, "StateError", rb_eStandardError);
388
+ cFileNotFoundError = rb_define_class_under(mFerret, "FileNotFoundError", rb_eIOError);
389
+
390
+ error_map = rb_hash_new();
391
+ rb_hash_aset(error_map, rb_intern(ERROR_TYPES[2]), rb_eStandardError);
392
+ rb_hash_aset(error_map, rb_intern(ERROR_TYPES[3]), rb_eIOError);
393
+ rb_hash_aset(error_map, rb_intern(ERROR_TYPES[4]), cFileNotFoundError);
394
+ rb_hash_aset(error_map, rb_intern(ERROR_TYPES[5]), rb_eArgError);
395
+ rb_hash_aset(error_map, rb_intern(ERROR_TYPES[6]), rb_eEOFError);
396
+ rb_hash_aset(error_map, rb_intern(ERROR_TYPES[7]), rb_eNotImpError);
397
+ rb_hash_aset(error_map, rb_intern(ERROR_TYPES[8]), cStateError);
398
+ rb_hash_aset(error_map, rb_intern(ERROR_TYPES[9]), cParseError);
399
+ rb_hash_aset(error_map, rb_intern(ERROR_TYPES[10]), rb_eNoMemError);
400
+ rb_hash_aset(error_map, rb_intern(ERROR_TYPES[11]), rb_eIndexError);
401
+ rb_hash_aset(error_map, rb_intern(ERROR_TYPES[12]), cLockError);
402
+
403
+ rb_define_const(mFerret, "EXCEPTION_MAP", error_map);
404
+ rb_define_const(mFerret, "FIX_INT_MAX", INT2FIX(INT_MAX >> 1));
405
+ }
406
+
407
+ extern void frb_raise(int excode, const char *msg) {
408
+ rb_raise(frb_get_error(ERROR_TYPES[excode]), "%s", msg);
409
+ }
@@ -0,0 +1,95 @@
1
+ #ifndef __FERRET_H_
2
+ #define __FERRET_H_
3
+ #include <ruby.h>
4
+ #include "frt_global.h"
5
+ #include "frt_hashset.h"
6
+ #include "frt_document.h"
7
+ #include "frb_lang.h"
8
+
9
+ /* IDs */
10
+ extern ID id_new;
11
+ extern ID id_call;
12
+ extern ID id_hash;
13
+ extern ID id_eql;
14
+ extern ID id_capacity;
15
+ extern ID id_less_than;
16
+ extern ID id_lt;
17
+ extern ID id_is_directory;
18
+ extern ID id_close;
19
+ extern ID id_cclass;
20
+ extern ID id_data;
21
+
22
+ /* Symbols */
23
+ extern VALUE sym_yes;
24
+ extern VALUE sym_no;
25
+ extern VALUE sym_true;
26
+ extern VALUE sym_false;
27
+ extern VALUE sym_path;
28
+ extern VALUE sym_dir;
29
+
30
+ /* Modules */
31
+ extern VALUE mFerret;
32
+ extern VALUE mIndex;
33
+ extern VALUE mSearch;
34
+ extern VALUE mStore;
35
+ extern VALUE mStringHelper;
36
+ extern VALUE mSpans;
37
+
38
+ /* Classes */
39
+ extern VALUE cDirectory;
40
+ extern VALUE cFileNotFoundError;
41
+ extern VALUE cLockError;
42
+ extern VALUE cTerm;
43
+
44
+ /* Ferret Inits */
45
+ extern void Init_Utils();
46
+ extern void Init_Analysis();
47
+ extern void Init_Store();
48
+ extern void Init_Index();
49
+ extern void Init_Search();
50
+ extern void Init_QueryParser();
51
+
52
+ extern void frb_raise(int excode, const char *msg);
53
+ //extern void object_add(void *key, VALUE obj);
54
+ #define object_add(key, obj) object_add2(key, obj, __FILE__, __LINE__)
55
+ extern void object_add2(void *key, VALUE obj, const char *file, int line);
56
+ //extern void object_set(void *key, VALUE obj);
57
+ #define object_set(key, obj) object_set2(key, obj, __FILE__, __LINE__)
58
+ extern void object_set2(void *key, VALUE obj, const char *file, int line);
59
+ //extern void object_del(void *key);
60
+ #define object_del(key) object_del2(key, __FILE__, __LINE__)
61
+ extern void object_del2(void *key, const char *file, int line);
62
+ extern void frb_gc_mark(void *key);
63
+ extern VALUE object_get(void *key);
64
+ extern VALUE frb_data_alloc(VALUE klass);
65
+ extern void frb_deref_free(void *p);
66
+ extern void frb_create_dir(VALUE rpath);
67
+ extern VALUE frb_hs_to_rb_ary(FrtHashSet *hs);
68
+ extern void *frb_rb_data_ptr(VALUE val);
69
+ extern FrtSymbol frb_field(VALUE rfield);
70
+ extern VALUE frb_get_term(FrtSymbol field, const char *term);
71
+ extern char *json_concat_string(char *s, char *field);
72
+ extern char *rs2s(VALUE rstr);
73
+ extern char *rstrdup(VALUE rstr);
74
+
75
+ #define Frt_Make_Struct(klass)\
76
+ rb_data_object_wrap(klass,NULL,(RUBY_DATA_FUNC)NULL,(RUBY_DATA_FUNC)NULL)
77
+
78
+ #define Frt_Wrap_Struct(self,mmark,mfree,mdata)\
79
+ do {\
80
+ ((struct RData *)(self))->data = mdata;\
81
+ ((struct RData *)(self))->dmark = (RUBY_DATA_FUNC)mmark;\
82
+ ((struct RData *)(self))->dfree = (RUBY_DATA_FUNC)mfree;\
83
+ } while (0)
84
+
85
+ #define Frt_Unwrap_Struct(self)\
86
+ do {\
87
+ ((struct RData *)(self))->data = NULL;\
88
+ ((struct RData *)(self))->dmark = NULL;\
89
+ ((struct RData *)(self))->dfree = NULL;\
90
+ } while (0)
91
+
92
+ #endif
93
+
94
+ #define frb_mark_cclass(klass) rb_ivar_set(klass, id_cclass, Qtrue)
95
+ #define frb_is_cclass(obj) (rb_ivar_defined(CLASS_OF(obj), id_cclass))