isomorfeus-ferret 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +612 -0
  3. data/README.md +44 -0
  4. data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
  5. data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
  6. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
  7. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
  8. data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
  9. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
  10. data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
  11. data/ext/isomorfeus_ferret_ext/email.rl +21 -0
  12. data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
  13. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
  14. data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
  15. data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
  16. data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
  17. data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
  18. data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
  19. data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
  20. data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
  21. data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
  22. data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
  23. data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
  24. data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
  25. data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
  26. data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
  27. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
  28. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
  29. data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
  30. data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
  31. data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
  32. data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
  33. data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
  34. data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
  42. data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
  43. data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
  44. data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
  45. data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
  46. data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
  47. data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
  48. data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
  50. data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
  51. data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
  52. data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
  53. data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
  54. data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
  55. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
  56. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
  57. data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
  58. data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
  59. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
  60. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
  61. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
  62. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
  63. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
  64. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
  65. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
  66. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
  67. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
  68. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
  69. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
  70. data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
  71. data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
  72. data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
  73. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
  74. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
  75. data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
  76. data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
  77. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
  78. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
  79. data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
  80. data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
  81. data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
  82. data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
  83. data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
  84. data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
  85. data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
  86. data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
  87. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
  88. data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
  89. data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
  90. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
  91. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
  92. data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
  93. data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
  94. data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
  95. data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
  96. data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
  97. data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
  98. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
  99. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
  100. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
  101. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
  102. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
  103. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
  104. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
  105. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
  106. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
  129. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
  130. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
  131. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
  132. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
  133. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
  134. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
  135. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
  136. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
  137. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
  138. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
  139. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
  140. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
  141. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
  143. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
  144. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
  145. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
  162. data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
  163. data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
  164. data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
  165. data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
  166. data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
  167. data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
  168. data/ext/isomorfeus_ferret_ext/test.c +850 -0
  169. data/ext/isomorfeus_ferret_ext/test.h +416 -0
  170. data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
  171. data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
  172. data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
  173. data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
  174. data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
  175. data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
  176. data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
  177. data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
  178. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
  179. data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
  180. data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
  181. data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
  182. data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
  183. data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
  184. data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
  185. data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
  186. data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
  187. data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
  188. data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
  189. data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
  190. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
  191. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
  192. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
  193. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
  194. data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
  195. data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
  196. data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
  197. data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
  198. data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
  199. data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
  200. data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
  201. data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
  202. data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
  203. data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
  204. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
  205. data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
  206. data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
  207. data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
  208. data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
  209. data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
  210. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
  211. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
  212. data/ext/isomorfeus_ferret_ext/url.rl +27 -0
  213. data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
  214. data/lib/isomorfeus/ferret/document.rb +132 -0
  215. data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
  216. data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
  217. data/lib/isomorfeus/ferret/index/index.rb +970 -0
  218. data/lib/isomorfeus/ferret/monitor.rb +323 -0
  219. data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
  220. data/lib/isomorfeus/ferret/version.rb +5 -0
  221. data/lib/isomorfeus-ferret.rb +8 -0
  222. metadata +307 -0
@@ -0,0 +1,206 @@
1
+ #ifndef FRT_HASHSET_H
2
+ #define FRT_HASHSET_H
3
+
4
+ #include "frt_hash.h"
5
+ #include "frt_global.h"
6
+
7
+ typedef struct FrtHashSetEntry {
8
+ void *elem;
9
+ struct FrtHashSetEntry *next;
10
+ struct FrtHashSetEntry *prev;
11
+ } FrtHashSetEntry;
12
+
13
+ typedef struct FrtHashSet
14
+ {
15
+ /* the number of elements in the instance */
16
+ int size;
17
+
18
+ /* the first element in the list of elements in the FrtHashSet. The elements
19
+ * will be listed in the order they were added and can be iterated over by
20
+ * following the ->next pointer */
21
+ FrtHashSetEntry *first;
22
+
23
+ /* the last element in the list of elements in the FrtHashSet. This is used
24
+ * internally to add elements to the list. */
25
+ FrtHashSetEntry *last;
26
+
27
+ /* Hash used internally */
28
+ FrtHash *ht;
29
+
30
+ /* Internal: Frees elements added to the FrtHashSet. Should never be NULL */
31
+ frt_free_ft free_elem_i;
32
+ } FrtHashSet;
33
+
34
+ /**
35
+ * Create a new FrtHashSet. The function will allocate a FrtHashSet Struct
36
+ * setting the functions used to hash the objects it will contain and the eq
37
+ * function. This should be used for non-string types.
38
+ *
39
+ * @param hash function to hash objects added to the FrtHashSet
40
+ * @param eq function to determine whether two items are equal
41
+ * @param free_elem function used to free elements as added to the FrtHashSet
42
+ * when the FrtHashSet if destroyed or duplicate elements are added to the Set
43
+ * @return a newly allocated FrtHashSet structure
44
+ */
45
+ extern FrtHashSet *frt_hs_new(frt_hash_ft hash_func,
46
+ frt_eq_ft eq_func,
47
+ frt_free_ft free_func);
48
+
49
+ /**
50
+ * Create a new FrtHashSet specifically for strings. This will create a
51
+ * FrtHashSet as if you used frt_hs_new with the standard string hash and eq
52
+ * functions.
53
+ *
54
+ * @param free_elem function used to free elements as added to the FrtHashSet
55
+ * when the FrtHashSet if destroyed or duplicate elements are added to the Set
56
+ * @return a newly allocated FrtHashSet structure
57
+ */
58
+ extern FrtHashSet *frt_hs_new_str(frt_free_ft free_func);
59
+
60
+ /**
61
+ * Create a new FrtHashSet specifically for pointers. Note that the only way
62
+ * two pointers will be considered equal is if they have the same address. So
63
+ * you can add the string "key" twice if it is stored at two different
64
+ * addresses.
65
+ *
66
+ * @param free_elem function used to free elements as added to the FrtHashSet
67
+ * when the FrtHashSet if destroyed or duplicate elements are added to the Set
68
+ * @return a newly allocated FrtHashSet structure
69
+ */
70
+ extern FrtHashSet *frt_hs_new_ptr(frt_free_ft free_func);
71
+
72
+ /**
73
+ * Free the memory allocated by the FrtHashSet, but don't free the elements added
74
+ * to the FrtHashSet. If you'd like to free everything in the FrtHashSet you should
75
+ * use frt_hs_destroy
76
+ *
77
+ * @param hs the FrtHashSet to free
78
+ */
79
+ extern void frt_hs_free(FrtHashSet *self);
80
+
81
+ /**
82
+ * Destroy the FrtHashSet including all elements added to the FrtHashSet. If you'd
83
+ * like to free the memory allocated to the FrtHashSet without touching the
84
+ * elements in the FrtHashSet then use frt_hs_free
85
+ *
86
+ * @param hs the FrtHashSet to destroy
87
+ */
88
+ extern void frt_hs_destroy(FrtHashSet *self);
89
+
90
+ /**
91
+ * WARNING: this function may destroy some elements if you add them to a
92
+ * FrtHashSet were equivalent elements already exist, depending on how free_elem
93
+ * was set.
94
+ *
95
+ * Add the element to the FrtHashSet whether or not it was already in the
96
+ * FrtHashSet.
97
+ *
98
+ * When a element is added to the Hash where it already exists, free_elem
99
+ * is called on it, ie the element you tried to add might get destroyed.
100
+ *
101
+ * @param hs the FrtHashSet to add the element to
102
+ * @param elem the element to add to the FrtHashSet
103
+ * @return one of three values;
104
+ * <pre>
105
+ * FRT_HASH_KEY_DOES_NOT_EXIST the element was not already in the FrtHashSet.
106
+ * This value is equal to 0 or false
107
+ * FRT_HASH_KEY_SAME the element was identical (same memory
108
+ * pointer) to an existing element so no freeing
109
+ * was done
110
+ * FRT_HASH_KEY_EQUAL the element was equal to an element already in
111
+ * the FrtHashSet so the new_elem was freed if
112
+ * free_elem was set
113
+ * </pre>
114
+ */
115
+ extern FrtHashKeyStatus frt_hs_add(FrtHashSet *self, void *elem);
116
+
117
+ /**
118
+ * Add element to the FrtHashSet. If the element already existed in the FrtHashSet
119
+ * and the new element was equal but not the same (same pointer/memory) then
120
+ * don't add the element and return false, otherwise return true.
121
+ *
122
+ * @param hs the FrtHashSet to add the element to
123
+ * @param elem the element to add to the FrtHashSet
124
+ * @return true if the element was successfully added or false otherwise
125
+ */
126
+ extern int frt_hs_add_safe(FrtHashSet *self, void *elem);
127
+
128
+ /**
129
+ * Delete the element from the FrtHashSet. Returns true if the item was
130
+ * successfully deleted or false if the element never existed.
131
+ *
132
+ * @param hs the FrtHashSet to delete from
133
+ * @param elem the element to delete
134
+ * @return true if the element was deleted or false if the element never
135
+ * existed
136
+ */
137
+ extern int frt_hs_del(FrtHashSet *self, const void *elem);
138
+
139
+ /**
140
+ * Remove an item from the FrtHashSet without actually freeing the item. This
141
+ * function should return the item itself so that it can be freed later if
142
+ * necessary.
143
+ *
144
+ * @param hs the FrtHashSet to remove the element from.
145
+ * @param elem the element to remove
146
+ * @param the element that was removed or NULL otherwise
147
+ */
148
+ extern void *frt_hs_rem(FrtHashSet *self, const void *elem);
149
+
150
+ /**
151
+ * Check if the element exists and return the appropriate value described
152
+ * bellow.
153
+ *
154
+ * @param hs the FrtHashSet to check in
155
+ * @param elem the element to check for
156
+ * @return one of the following values
157
+ * <pre>
158
+ * FRT_HASH_KEY_DOES_NOT_EXIST the element was not already in the FrtHashSet.
159
+ * This value is equal to 0 or false
160
+ * FRT_HASH_KEY_SAME the element was identical (same memory
161
+ * pointer) to an existing element so no freeing
162
+ * was done
163
+ * FRT_HASH_KEY_EQUAL the element was equal to an element already in
164
+ * the FrtHashSet so the new_elem was freed if
165
+ * free_elem was set
166
+ * </pre>
167
+ */
168
+ extern FrtHashKeyStatus frt_hs_exists(FrtHashSet *self, const void *elem);
169
+
170
+ /**
171
+ * Merge two HashSets. When a merge is done the merger (self) Hash is
172
+ * returned and the mergee is destroyed. All elements from mergee that were
173
+ * not found in merger (self) will be added to self, otherwise they will be
174
+ * destroyed.
175
+ *
176
+ * @param self the FrtHashSet to merge into
177
+ * @param other HastSet to be merged into self
178
+ * @return the merged FrtHashSet
179
+ */
180
+ extern FrtHashSet *frt_hs_merge(FrtHashSet *self, FrtHashSet *other);
181
+
182
+ /**
183
+ * Return the original version of +elem+. So if you allocate two elements
184
+ * which are equal and add the first to the FrtHashSet, calling this function
185
+ * with the second element will return the first element from the FrtHashSet.
186
+ */
187
+ extern void *frt_hs_orig(FrtHashSet *self, const void *elem);
188
+
189
+ /**
190
+ * Clear all elements from the FrtHashSet. If free_elem was set then use it to
191
+ * free all elements as they are cleared. After the method is called, the
192
+ * HashSets size will be 0.
193
+ *
194
+ * @param self the FrtHashSet to clear
195
+ */
196
+ extern void frt_hs_clear(FrtHashSet *self);
197
+
198
+ /* TODO: finish implementing these functions FrtHashSet
199
+ int hs_osf(FrtHashSet *hs, void *elem);
200
+ FrtHashSet hs_or(FrtHashSet *hs1, FrtHashSet *h2);
201
+ FrtHashSet hs_excl_or(FrtHashSet *hs1, FrtHashSet *h2);
202
+ FrtHashSet hs_and(FrtHashSet *hs1, FrtHashSet *h2);
203
+ FrtHashSet hs_mask(FrtHashSet *hs1, FrtHashSet *h2);
204
+ */
205
+
206
+ #endif
@@ -0,0 +1,62 @@
1
+ #include "frt_helper.h"
2
+
3
+ int frt_hlp_string_diff(register const char *const s1,
4
+ register const char *const s2)
5
+ {
6
+ register int i = 0;
7
+ while (s1[i] && (s1[i] == s2[i])) {
8
+ i++;
9
+ }
10
+ return i;
11
+ }
12
+
13
+ frt_i32 frt_float2int(float f)
14
+ {
15
+ union { frt_i32 i; float f; } tmp;
16
+ tmp.f = f;
17
+ return tmp.i;
18
+ }
19
+
20
+ float frt_int2float(frt_i32 v)
21
+ {
22
+ union { frt_i32 i; float f; } tmp;
23
+ tmp.i = v;
24
+ return tmp.f;
25
+ }
26
+
27
+ float frt_byte2float(unsigned char b)
28
+ {
29
+ if (b == 0) {
30
+ return 0.0;
31
+ }
32
+ else {
33
+ frt_u32 mantissa = b & 0x07;
34
+ frt_u32 exponent = (b >> 3) & 0x1f;
35
+
36
+ return frt_int2float((mantissa << 21) | ((exponent + 48) << 24));
37
+ }
38
+ }
39
+
40
+ unsigned char frt_float2byte(float f)
41
+ {
42
+ if (f <= 0.0) {
43
+ return 0;
44
+ }
45
+ else {
46
+ /* correctly order the bytes for encoding */
47
+ frt_u32 i = frt_float2int(f);
48
+ int mantissa = (i & 0xEf0000) >> 21;
49
+ int exponent = ((i >> 24) - 48);
50
+
51
+ if (exponent > 0x1f) {
52
+ exponent = 0x1f; /* 0x1f = 31 = 0b00011111 */
53
+ mantissa = 0x07; /* 0x07 = 7 = 0b00000111 */
54
+ }
55
+
56
+ if (exponent < 0) {
57
+ exponent = 0;
58
+ mantissa = 1;
59
+ }
60
+ return ((exponent<<3) | mantissa);
61
+ }
62
+ }
@@ -0,0 +1,13 @@
1
+ #ifndef FRT_HELPER_H
2
+ #define FRT_HELPER_H
3
+
4
+ #include "frt_config.h"
5
+
6
+ extern int frt_hlp_string_diff(register const char *const s1,
7
+ register const char *const s2);
8
+ extern frt_i32 frt_float2int(float f);
9
+ extern float frt_int2float(frt_i32 i32);
10
+ extern float frt_byte2float(unsigned char b);
11
+ extern unsigned char frt_float2byte(float f);
12
+
13
+ #endif
@@ -0,0 +1,353 @@
1
+ #include "frt_ind.h"
2
+ #include "frt_array.h"
3
+ #include <string.h>
4
+
5
+ static const char *NON_UNIQUE_KEY_ERROR_MSG =
6
+ "Tried to use a key that was not unique";
7
+
8
+ #define INDEX_CLOSE_READER(self) do { \
9
+ if (self->sea) { \
10
+ frt_searcher_close(self->sea); \
11
+ self->sea = NULL; \
12
+ self->ir = NULL; \
13
+ } else if (self->ir) { \
14
+ frt_ir_close(self->ir); \
15
+ self->ir = NULL; \
16
+ } \
17
+ } while (0)
18
+
19
+ #define AUTOFLUSH_IR(self) do { \
20
+ if (self->auto_flush) frt_ir_commit(self->ir); \
21
+ else self->has_writes = true; \
22
+ } while(0)
23
+
24
+ #define AUTOFLUSH_IW(self) do { \
25
+ if (self->auto_flush) { \
26
+ frt_iw_close(self->iw); \
27
+ self->iw = NULL; \
28
+ } else { \
29
+ self->has_writes = true; \
30
+ } \
31
+ } while (0)
32
+
33
+ FrtIndex *frt_index_new(FrtStore *store, FrtAnalyzer *analyzer, FrtHashSet *def_fields,
34
+ bool create)
35
+ {
36
+ FrtIndex *self = FRT_ALLOC_AND_ZERO(FrtIndex);
37
+ FrtHashSetEntry *hse;
38
+ /* FIXME: need to add these to the query parser */
39
+ self->config = frt_default_config;
40
+ frt_mutex_init(&self->mutex, NULL);
41
+ self->has_writes = false;
42
+ if (store) {
43
+ FRT_REF(store);
44
+ self->store = store;
45
+ } else {
46
+ self->store = frt_open_ram_store();
47
+ create = true;
48
+ }
49
+ if (analyzer) {
50
+ self->analyzer = analyzer;
51
+ FRT_REF(analyzer);
52
+ } else {
53
+ self->analyzer = frt_mb_standard_analyzer_new(true);
54
+ }
55
+
56
+ if (create) {
57
+ FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
58
+ frt_index_create(self->store, fis);
59
+ frt_fis_deref(fis);
60
+ }
61
+
62
+ /* options */
63
+ self->key = NULL;
64
+ self->id_field = rb_intern("id");
65
+ self->def_field = rb_intern("id");
66
+ self->auto_flush = false;
67
+ self->check_latest = true;
68
+
69
+ FRT_REF(self->analyzer);
70
+ self->qp = frt_qp_new(self->analyzer);
71
+ for (hse = def_fields->first; hse; hse = hse->next) {
72
+ frt_qp_add_field(self->qp, (FrtSymbol)hse->elem, true, true);
73
+ }
74
+ /* Index is a convenience class so set qp convenience options */
75
+ self->qp->allow_any_fields = true;
76
+ self->qp->clean_str = true;
77
+ self->qp->handle_parse_errors = true;
78
+
79
+ return self;
80
+ }
81
+
82
+ void frt_index_destroy(FrtIndex *self)
83
+ {
84
+ frt_mutex_destroy(&self->mutex);
85
+ INDEX_CLOSE_READER(self);
86
+ if (self->iw) frt_iw_close(self->iw);
87
+ frt_store_deref(self->store);
88
+ frt_a_deref(self->analyzer);
89
+ if (self->qp) frt_qp_destroy(self->qp);
90
+ if (self->key) frt_hs_destroy(self->key);
91
+ free(self);
92
+ }
93
+
94
+
95
+ void frt_ensure_writer_open(FrtIndex *self)
96
+ {
97
+ if (!self->iw) {
98
+ INDEX_CLOSE_READER(self);
99
+
100
+ /* make sure the analzyer isn't deleted by the FrtIndexWriter */
101
+ FRT_REF(self->analyzer);
102
+ self->iw = frt_iw_open(self->store, self->analyzer, false);
103
+ self->iw->config.use_compound_file = self->config.use_compound_file;
104
+ }
105
+ }
106
+
107
+ void frt_ensure_reader_open(FrtIndex *self)
108
+ {
109
+ if (self->ir) {
110
+ if (self->check_latest && !frt_ir_is_latest(self->ir)) {
111
+ INDEX_CLOSE_READER(self);
112
+ self->ir = frt_ir_open(self->store);
113
+ }
114
+ return;
115
+ }
116
+ if (self->iw) {
117
+ frt_iw_close(self->iw);
118
+ self->iw = NULL;
119
+ }
120
+ self->ir = frt_ir_open(self->store);
121
+ }
122
+
123
+ void frt_ensure_searcher_open(FrtIndex *self)
124
+ {
125
+ frt_ensure_reader_open(self);
126
+ if (!self->sea) {
127
+ self->sea = frt_isea_new(self->ir);
128
+ }
129
+ }
130
+
131
+ int frt_index_size(FrtIndex *self)
132
+ {
133
+ int size;
134
+ frt_mutex_lock(&self->mutex);
135
+ {
136
+ frt_ensure_reader_open(self);
137
+ size = self->ir->num_docs(self->ir);
138
+ }
139
+ frt_mutex_unlock(&self->mutex);
140
+ return size;
141
+ }
142
+
143
+ void frt_index_optimize(FrtIndex *self)
144
+ {
145
+ frt_mutex_lock(&self->mutex);
146
+ {
147
+ frt_ensure_writer_open(self);
148
+ frt_iw_optimize(self->iw);
149
+ AUTOFLUSH_IW(self);
150
+ }
151
+ frt_mutex_unlock(&self->mutex);
152
+ }
153
+
154
+ bool frt_index_is_deleted(FrtIndex *self, int doc_num)
155
+ {
156
+ bool is_del;
157
+ frt_mutex_lock(&self->mutex);
158
+ {
159
+ frt_ensure_reader_open(self);
160
+ is_del = self->ir->is_deleted(self->ir, doc_num);
161
+ }
162
+ frt_mutex_unlock(&self->mutex);
163
+ return is_del;
164
+ }
165
+
166
+ static void index_del_doc_with_key_i(FrtIndex *self, FrtDocument *doc,
167
+ FrtHashSet *key)
168
+ {
169
+ FrtQuery *q;
170
+ FrtTopDocs *td;
171
+ FrtDocField *df;
172
+ FrtHashSetEntry *hse;
173
+
174
+ if (key->size == 1) {
175
+ FrtSymbol field = (FrtSymbol)key->first->elem;
176
+ frt_ensure_writer_open(self);
177
+ df = frt_doc_get_field(doc, field);
178
+ if (df) {
179
+ frt_iw_delete_term(self->iw, field, df->data[0]);
180
+ }
181
+ return;
182
+ }
183
+
184
+ q = frt_bq_new(false);
185
+ frt_ensure_searcher_open(self);
186
+
187
+ for (hse = key->first; hse; hse = hse->next) {
188
+ FrtSymbol field = (FrtSymbol)hse->elem;
189
+ df = frt_doc_get_field(doc, field);
190
+ if (!df) continue;
191
+ frt_bq_add_query(q, frt_tq_new(field, df->data[0]), FRT_BC_MUST);
192
+ }
193
+ td = frt_searcher_search(self->sea, q, 0, 1, NULL, NULL, NULL);
194
+ if (td->total_hits > 1) {
195
+ frt_td_destroy(td);
196
+ FRT_RAISE(FRT_ARG_ERROR, "%s", NON_UNIQUE_KEY_ERROR_MSG);
197
+ } else if (td->total_hits == 1) {
198
+ frt_ir_delete_doc(self->ir, td->hits[0]->doc);
199
+ }
200
+ frt_q_deref(q);
201
+ frt_td_destroy(td);
202
+ }
203
+
204
+ static void index_add_doc_i(FrtIndex *self, FrtDocument *doc)
205
+ {
206
+ if (self->key) {
207
+ index_del_doc_with_key_i(self, doc, self->key);
208
+ }
209
+ frt_ensure_writer_open(self);
210
+ frt_iw_add_doc(self->iw, doc);
211
+ AUTOFLUSH_IW(self);
212
+ }
213
+
214
+ void frt_index_add_doc(FrtIndex *self, FrtDocument *doc)
215
+ {
216
+ frt_mutex_lock(&self->mutex);
217
+ {
218
+ index_add_doc_i(self, doc);
219
+ }
220
+ frt_mutex_unlock(&self->mutex);
221
+ }
222
+
223
+ FrtQuery *frt_index_get_query(FrtIndex *self, char *qstr)
224
+ {
225
+ int i;
226
+ FrtFieldInfos *fis;
227
+ frt_ensure_searcher_open(self);
228
+ fis = self->ir->fis;
229
+ for (i = fis->size - 1; i >= 0; i--) {
230
+ frt_hs_add(self->qp->all_fields, (void *)fis->fields[i]->name);
231
+ }
232
+ return qp_parse(self->qp, qstr);
233
+ }
234
+
235
+ FrtTopDocs *frt_index_search_str(FrtIndex *self, char *qstr, int first_doc,
236
+ int num_docs, FrtFilter *filter, FrtSort *sort,
237
+ FrtPostFilter *post_filter)
238
+ {
239
+ FrtQuery *query;
240
+ FrtTopDocs *td;
241
+ query = frt_index_get_query(self, qstr); /* will ensure_searcher is open */
242
+ td = frt_searcher_search(self->sea, query, first_doc, num_docs,
243
+ filter, sort, post_filter);
244
+ frt_q_deref(query);
245
+ return td;
246
+ }
247
+
248
+ FrtDocument *frt_index_get_doc(FrtIndex *self, int doc_num)
249
+ {
250
+ FrtDocument *doc;
251
+ frt_ensure_reader_open(self);
252
+ doc = self->ir->get_doc(self->ir, doc_num);
253
+ return doc;
254
+ }
255
+
256
+ FrtDocument *frt_index_get_doc_ts(FrtIndex *self, int doc_num)
257
+ {
258
+ FrtDocument *doc;
259
+ frt_mutex_lock(&self->mutex);
260
+ {
261
+ doc = frt_index_get_doc(self, doc_num);
262
+ }
263
+ frt_mutex_unlock(&self->mutex);
264
+ return doc;
265
+ }
266
+
267
+ FrtDocument *frt_index_get_doc_term(FrtIndex *self, FrtSymbol field,
268
+ const char *term)
269
+ {
270
+ FrtDocument *doc = NULL;
271
+ FrtTermDocEnum *tde;
272
+ frt_mutex_lock(&self->mutex);
273
+ {
274
+ frt_ensure_reader_open(self);
275
+ tde = ir_term_docs_for(self->ir, field, term);
276
+ if (tde->next(tde)) {
277
+ doc = frt_index_get_doc(self, tde->doc_num(tde));
278
+ }
279
+ tde->close(tde);
280
+ }
281
+ frt_mutex_unlock(&self->mutex);
282
+ return doc;
283
+ }
284
+
285
+ FrtDocument *frt_index_get_doc_id(FrtIndex *self, const char *id)
286
+ {
287
+ return frt_index_get_doc_term(self, self->id_field, id);
288
+ }
289
+
290
+ void frt_index_delete(FrtIndex *self, int doc_num)
291
+ {
292
+ frt_mutex_lock(&self->mutex);
293
+ {
294
+ frt_ensure_reader_open(self);
295
+ frt_ir_delete_doc(self->ir, doc_num);
296
+ AUTOFLUSH_IR(self);
297
+ }
298
+ frt_mutex_unlock(&self->mutex);
299
+ }
300
+
301
+ void frt_index_delete_term(FrtIndex *self, FrtSymbol field, const char *term)
302
+ {
303
+ FrtTermDocEnum *tde;
304
+ frt_mutex_lock(&self->mutex);
305
+ {
306
+ if (self->ir) {
307
+ tde = ir_term_docs_for(self->ir, field, term);
308
+ FRT_TRY
309
+ while (tde->next(tde)) {
310
+ frt_ir_delete_doc(self->ir, tde->doc_num(tde));
311
+ AUTOFLUSH_IR(self);
312
+ }
313
+ FRT_XFINALLY
314
+ tde->close(tde);
315
+ FRT_XENDTRY
316
+ } else {
317
+ frt_ensure_writer_open(self);
318
+ frt_iw_delete_term(self->iw, field, term);
319
+ }
320
+ }
321
+ frt_mutex_unlock(&self->mutex);
322
+ }
323
+
324
+ void frt_index_delete_id(FrtIndex *self, const char *id)
325
+ {
326
+ frt_index_delete_term(self, self->id_field, id);
327
+ }
328
+
329
+ static void index_qdel_i(FrtSearcher *sea, int doc_num, float score, void *arg)
330
+ {
331
+ (void)score; (void)arg;
332
+ frt_ir_delete_doc(((FrtIndexSearcher *)sea)->ir, doc_num);
333
+ }
334
+
335
+ void frt_index_delete_query(FrtIndex *self, FrtQuery *q, FrtFilter *f,
336
+ FrtPostFilter *post_filter)
337
+ {
338
+ frt_mutex_lock(&self->mutex);
339
+ {
340
+ frt_ensure_searcher_open(self);
341
+ frt_searcher_search_each(self->sea, q, f, post_filter, &index_qdel_i, 0);
342
+ AUTOFLUSH_IR(self);
343
+ }
344
+ frt_mutex_unlock(&self->mutex);
345
+ }
346
+
347
+ void frt_index_delete_query_str(FrtIndex *self, char *qstr, FrtFilter *f,
348
+ FrtPostFilter *post_filter)
349
+ {
350
+ FrtQuery *q = frt_index_get_query(self, qstr);
351
+ frt_index_delete_query(self, q, f, post_filter);
352
+ frt_q_deref(q);
353
+ }
@@ -0,0 +1,54 @@
1
+ #ifndef FRT_IND_H
2
+ #define FRT_IND_H
3
+
4
+ #include "frt_search.h"
5
+ #include "frt_index.h"
6
+
7
+ /***************************************************************************
8
+ *
9
+ * FrtIndex
10
+ *
11
+ ***************************************************************************/
12
+
13
+ typedef struct FrtIndex
14
+ {
15
+ FrtConfig config;
16
+ frt_mutex_t mutex;
17
+ FrtStore *store;
18
+ FrtAnalyzer *analyzer;
19
+ FrtIndexReader *ir;
20
+ FrtIndexWriter *iw;
21
+ FrtSearcher *sea;
22
+ FrtQParser *qp;
23
+ FrtHashSet *key;
24
+ FrtSymbol id_field;
25
+ FrtSymbol def_field;
26
+ /* for FrtIndexWriter */
27
+ bool auto_flush : 1;
28
+ bool has_writes : 1;
29
+ bool check_latest : 1;
30
+ } FrtIndex;
31
+
32
+ extern FrtIndex *frt_index_new(FrtStore *store, FrtAnalyzer *analyzer, FrtHashSet *def_fields, bool create);
33
+ extern void frt_index_destroy(FrtIndex *self);
34
+ extern int frt_index_size(FrtIndex *self);
35
+ extern void frt_index_optimize(FrtIndex *self);
36
+ extern bool frt_index_is_deleted(FrtIndex *self, int doc_num);
37
+ extern void frt_index_add_doc(FrtIndex *self, FrtDocument *doc);
38
+ extern FrtTopDocs *frt_index_search_str(FrtIndex *self, char *query, int first_doc, int num_docs, FrtFilter *filter, FrtSort *sort, FrtPostFilter *post_filter);
39
+ extern FrtQuery *frt_index_get_query(FrtIndex *self, char *qstr);
40
+ extern FrtDocument *frt_index_get_doc(FrtIndex *self, int doc_num);
41
+ extern FrtDocument *frt_index_get_doc_ts(FrtIndex *self, int doc_num);
42
+ extern FrtDocument *frt_index_get_doc_id(FrtIndex *self, const char *id);
43
+ extern FrtDocument *frt_index_get_doc_term(FrtIndex *self, FrtSymbol field, const char *term);
44
+ extern void frt_index_delete(FrtIndex *self, int doc_num);
45
+ extern void frt_index_delete_term(FrtIndex *self, FrtSymbol field, const char *term);
46
+ extern void frt_index_delete_id(FrtIndex *self, const char *id);
47
+ extern void frt_index_delete_query(FrtIndex *self, FrtQuery *q, FrtFilter *f, FrtPostFilter *pf);
48
+ extern void frt_index_delete_query_str(FrtIndex *self, char *qstr,FrtFilter *f, FrtPostFilter *pf);
49
+
50
+ extern void frt_ensure_searcher_open(FrtIndex *self);
51
+ extern void frt_ensure_reader_open(FrtIndex *self);
52
+ extern void frt_ensure_writer_open(FrtIndex *self);
53
+
54
+ #endif