ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/hashset.c CHANGED
@@ -1,139 +1,167 @@
1
- #include <hashset.h>
1
+ #include "hashset.h"
2
2
  #include <string.h>
3
- #define HS_MIN_SIZE 4
4
3
 
5
- int *imalloc(int i)
4
+ /*
5
+ * The HashSet contains an array +elems+ of the elements that have been added.
6
+ * It always has +size+ elements so +size+ ane +elems+ can be used to iterate
7
+ * over all alements in the HashSet. It also uses a HashTable to keep track of
8
+ * which elements have been added and their index in the +elems+ array.
9
+ */
10
+ static HashSet *hs_alloc(void (*free_elem) (void *p))
6
11
  {
7
- int *ip = ALLOC(int);
8
- *ip = i;
9
- return ip;
12
+ HashSet *hs = ALLOC(HashSet);
13
+ hs->size = 0;
14
+ hs->capa = HS_MIN_SIZE;
15
+ hs->elems = ALLOC_N(void *, HS_MIN_SIZE);
16
+ hs->free_elem_i = free_elem ? free_elem : &dummy_free;
17
+ return hs;
10
18
  }
11
19
 
12
- void hs_dummy_free(void *p){}
13
-
14
- HashSet *hs_create(unsigned int (*hash)(const void *p),
15
- int (*eq)(const void *p1, const void *p2),
16
- void (*free_elem)(void *p))
20
+ HashSet *hs_new(ulong (*hash)(const void *p),
21
+ int (*eq)(const void *p1, const void *p2),
22
+ void (*free_elem)(void *p))
17
23
  {
18
- HashSet *hs = ALLOC(HashSet);
19
- hs->ht = h_new(hash, eq, NULL, &free);
20
- hs->elems = NULL;
21
- hs->capa = hs->size = 0;
22
- if (free_elem == NULL)
23
- hs->free_elem = &hs_dummy_free;
24
- else
25
- hs->free_elem = free_elem;
26
- return hs;
24
+ HashSet *hs = hs_alloc(free_elem);
25
+ hs->ht = h_new(hash, eq, NULL, &free);
26
+ return hs;
27
27
  }
28
28
 
29
- HashSet *hs_str_create(void (*free_elem)(void *p))
29
+ HashSet *hs_new_str(void (*free_elem) (void *p))
30
30
  {
31
- HashSet *hs = ALLOC(HashSet);
32
- hs->ht = h_new_str((free_ft)NULL, &free);
33
- hs->elems = NULL;
34
- hs->capa = hs->size = 0;
35
- if (free_elem == NULL)
36
- hs->free_elem = &hs_dummy_free;
37
- else
38
- hs->free_elem = free_elem;
39
- return hs;
31
+ HashSet *hs = hs_alloc(free_elem);
32
+ hs->ht = h_new_str((free_ft) NULL, &free);
33
+ return hs;
40
34
  }
41
35
 
42
- void hs_destroy(HashSet *hs)
36
+ void hs_free(HashSet *hs)
43
37
  {
44
- h_destroy(hs->ht);
45
- free(hs->elems);
46
- free(hs);
38
+ h_destroy(hs->ht);
39
+ free(hs->elems);
40
+ free(hs);
47
41
  }
48
42
 
49
- void hs_clear(HashSet *self)
43
+ void hs_clear(HashSet *hs)
50
44
  {
51
- int i;
52
- for (i = self->size - 1; i >= 0; i--)
53
- hs_del(self, self->elems[i]);
45
+ int i;
46
+ for (i = hs->size - 1; i >= 0; i--) {
47
+ hs_del(hs, hs->elems[i]);
48
+ }
54
49
  }
55
50
 
56
- void hs_destroy_all(HashSet *hs)
51
+ void hs_destroy(HashSet *hs)
57
52
  {
58
- int i;
59
- if (hs->free_elem != &dummy_free)
60
- for (i = 0; i < hs->size; i++)
61
- hs->free_elem(hs->elems[i]);
62
- hs_destroy(hs);
53
+ int i;
54
+ if (hs->free_elem_i != &dummy_free) {
55
+ for (i = 0; i < hs->size; i++) {
56
+ hs->free_elem_i(hs->elems[i]);
57
+ }
58
+ }
59
+ h_destroy(hs->ht);
60
+ free(hs->elems);
61
+ free(hs);
63
62
  }
64
63
 
65
64
  int hs_add(HashSet *hs, void *elem)
66
65
  {
67
- int has_elem = h_has_key(hs->ht, elem);
68
- //printf("has_elem = %d %d:%d\n", has_elem, HASH_KEY_EQUAL, HASH_KEY_SAME);
69
- if (has_elem == HASH_KEY_EQUAL) {
70
- // We don't want to keep two of the same elem so free if necessary
71
- hs->free_elem(elem);
72
- } else if (has_elem == HASH_KEY_SAME) {
73
- // No need to do anything
74
- } else {
75
- // add the elem to the array, resizing if necessary
76
- if (hs->size >= hs->capa) {
77
- if (hs->capa == 0)
78
- hs->capa = HS_MIN_SIZE;
79
- else
80
- hs->capa *= 2;
81
- REALLOC_N(hs->elems, void *, hs->capa);
66
+ int has_elem = h_has_key(hs->ht, elem);
67
+ if (has_elem == HASH_KEY_EQUAL) {
68
+ /* We don't want to keep two of the same elem so free if necessary */
69
+ hs->free_elem_i(elem);
70
+ }
71
+ else if (has_elem == HASH_KEY_SAME) {
72
+ /* No need to do anything */
73
+ }
74
+ else {
75
+ /* add the elem to the array, resizing if necessary */
76
+ if (hs->size >= hs->capa) {
77
+ hs->capa *= 2;
78
+ REALLOC_N(hs->elems, void *, hs->capa);
79
+ }
80
+ hs->elems[hs->size] = elem;
81
+ h_set(hs->ht, elem, imalloc(hs->size));
82
+ hs->size++;
83
+ }
84
+ return has_elem;
85
+ }
86
+
87
+ int hs_add_safe(HashSet *hs, void *elem)
88
+ {
89
+ int has_elem = h_has_key(hs->ht, elem);
90
+ if (has_elem == HASH_KEY_EQUAL) {
91
+ /* element can't be added */
92
+ return false;
93
+ }
94
+ else if (has_elem == HASH_KEY_SAME) {
95
+ /* the exact same element has already been added */
96
+ return true;
97
+ }
98
+ else {
99
+ /* add the elem to the array, resizing if necessary */
100
+ if (hs->size >= hs->capa) {
101
+ hs->capa *= 2;
102
+ REALLOC_N(hs->elems, void *, hs->capa);
103
+ }
104
+ hs->elems[hs->size] = elem;
105
+ h_set(hs->ht, elem, imalloc(hs->size));
106
+ hs->size++;
107
+ return true;
82
108
  }
83
- hs->elems[hs->size] = elem;
84
- h_set(hs->ht, elem, imalloc(hs->size));
85
- hs->size++;
86
- }
87
- return has_elem;
88
109
  }
89
110
 
90
111
  int hs_del(HashSet *hs, void *elem)
91
112
  {
92
- void *tmp_elem = hs_rem(hs, elem);
93
- if (tmp_elem != NULL) {
94
- hs->free_elem(tmp_elem);
95
- return 1;
96
- } else {
97
- return 0;
98
- }
113
+ void *tmp_elem = hs_rem(hs, elem);
114
+ if (tmp_elem != NULL) {
115
+ hs->free_elem_i(tmp_elem);
116
+ return 1;
117
+ }
118
+ else {
119
+ return 0;
120
+ }
99
121
  }
100
122
 
101
123
  void *hs_rem(HashSet *hs, void *elem)
102
124
  {
103
- void *ret_elem;
104
- int *index = (int *)h_get(hs->ht, elem);
105
- if (index == NULL) {
106
- return NULL;
107
- } else {
108
- int i = *index;
109
- ret_elem = hs->elems[i];
110
- h_del(hs->ht, elem);
111
- hs->size--;
112
- memmove(&hs->elems[i], &hs->elems[i+1], sizeof(void *) * (hs->size - i));
113
- return ret_elem;
114
- }
125
+ void *ret_elem;
126
+ int *index = (int *)h_get(hs->ht, elem);
127
+ if (index == NULL) {
128
+ return NULL;
129
+ }
130
+ else {
131
+ int i = *index;
132
+ ret_elem = hs->elems[i];
133
+ h_del(hs->ht, elem);
134
+ hs->size--;
135
+ memmove(&hs->elems[i], &hs->elems[i + 1],
136
+ sizeof(void *) * (hs->size - i));
137
+ return ret_elem;
138
+ }
115
139
  }
116
140
 
117
141
  int hs_exists(HashSet *hs, void *elem)
118
142
  {
119
- return h_has_key(hs->ht, elem);
143
+ return h_has_key(hs->ht, elem);
120
144
  }
121
145
 
122
- HashSet *hs_merge(HashSet *hs, HashSet *other)
146
+ HashSet *hs_merge(HashSet *hs, HashSet * other)
123
147
  {
124
- int i;
125
- for (i = 0; i < other->size; i++) {
126
- hs_add(hs, other->elems[i]);
127
- }
128
- // Now free the other hashset. It is no longer needed. No need, however, to
129
- // delete the elements as they are in the new hash set
130
- hs_destroy(other);
131
- return hs;
148
+ int i;
149
+ for (i = 0; i < other->size; i++) {
150
+ hs_add(hs, other->elems[i]);
151
+ }
152
+ /* Now free the other hashset. It is no longer needed. No need, however, to
153
+ * delete the elements as they're either destroyed or in the new hash set */
154
+ hs_free(other);
155
+ return hs;
132
156
  }
133
157
 
134
158
  void *hs_orig(HashSet *hs, void *elem)
135
159
  {
136
- int *i = h_get(hs->ht, elem);
137
- if (i) return hs->elems[*i];
138
- else return NULL;
160
+ int *index = h_get(hs->ht, elem);
161
+ if (index) {
162
+ return hs->elems[*index];
163
+ }
164
+ else {
165
+ return NULL;
166
+ }
139
167
  }
data/ext/hashset.h CHANGED
@@ -2,30 +2,179 @@
2
2
  #define FRT_HASHSET_H
3
3
 
4
4
  #include "hash.h"
5
- #include "array.h"
6
5
  #include "global.h"
7
6
 
8
- typedef struct HashSet {
9
- int capa;
10
- int size;
11
- void **elems;
12
- HshTable *ht;
13
- void (*free_elem)(void *p);
7
+ #define HS_MIN_SIZE 4
8
+
9
+ typedef struct HashSet
10
+ {
11
+ /* used internally to allocate space to elems */
12
+ int capa;
13
+
14
+ /* the number of elements in the HashSet */
15
+ int size;
16
+
17
+ /* the elements in the HashSet. The elements will be found in the order
18
+ * they were added and can be iterated over from 0 to .size */
19
+ void **elems;
20
+
21
+ /* HashTable used internally */
22
+ HashTable *ht;
23
+
24
+ /* Internal: Frees elements added to the HashSet. Should never be NULL */
25
+ void (*free_elem_i)(void *p);
14
26
  } HashSet;
15
27
 
16
- HashSet *hs_create(unsigned int (*hash)(const void *p),
17
- int (*eq)(const void *p1, const void *p2),
18
- void (*free_elem)(void *p));
19
- HashSet *hs_str_create(void (*free_elem)(void *p));
20
- void hs_destroy(HashSet *hs);
21
- void hs_destroy_all(HashSet *hs);
22
- int hs_add(HashSet *hs, void *elem);
23
- int hs_del(HashSet *hs, void *elem);
24
- void *hs_rem(HashSet *hs, void *elem);
25
- int hs_exists(HashSet *hs, void *elem);
26
- HashSet *hs_merge(HashSet *hs, HashSet *other);
27
- void *hs_orig(HashSet *hs, void *elem);
28
- void hs_clear(HashSet *self);
28
+ /**
29
+ * Create a new HashSet. The function will allocate a HashSet Struct setting
30
+ * the functions used to hash the objects it will contain and the eq function.
31
+ * This should be used for non-string types.
32
+ *
33
+ * @param hash function to hash objects added to the HashSet
34
+ * @param eq function to determine whether two items are equal
35
+ * @param free_elem function used to free elements as added to the HashSet
36
+ * when the HashSet if destroyed or duplicate elements are added to the Set
37
+ * @return a newly allocated HashSet structure
38
+ */
39
+ extern HashSet *hs_new(ulong (*hash)(const void *p),
40
+ int (*eq)(const void *p1, const void *p2),
41
+ void (*free_elem)(void *p));
42
+
43
+ /**
44
+ * Create a new HashSet specifically for strings. This will create a HashSet
45
+ * as if you used hs_new with the standard string hash and eq functions.
46
+ *
47
+ * @param free_elem function used to free elements as added to the HashSet
48
+ * when the HashSet if destroyed or duplicate elements are added to the Set
49
+ * @return a newly allocated HashSet structure
50
+ */
51
+ extern HashSet *hs_new_str(void (*free_elem) (void *p));
52
+
53
+ /**
54
+ * Free the memory allocated by the HashSet, but don't free the elements added
55
+ * to the HashSet. If you'd like to free everything in the HashSet you should
56
+ * use hs_destroy
57
+ *
58
+ * @param hs the HashSet to free
59
+ */
60
+ extern void hs_free(HashSet *self);
61
+
62
+ /**
63
+ * Destroy the HashSet including all elements added to the HashSet. If you'd
64
+ * like to free the memory allocated to the HashSet without touching the
65
+ * elements in the HashSet then use hs_free
66
+ *
67
+ * @param hs the HashSet to destroy
68
+ */
69
+ extern void hs_destroy(HashSet *self);
70
+
71
+ /**
72
+ * WARNING: this function may destroy some elements if you add them to a
73
+ * HashSet were equivalent elements already exist, depending on how free_elem
74
+ * was set.
75
+ *
76
+ * Add the element to the HashSet whether or not it was already in the
77
+ * HashSet.
78
+ *
79
+ * When a element is added to the HashTable where it already exists, free_elem
80
+ * is called on it, ie the element you tried to add might get destroyed.
81
+ *
82
+ * @param hs the HashSet to add the element to
83
+ * @param elem the element to add to the HashSet
84
+ * @return one of three values;
85
+ * <pre>
86
+ * HASH_KEY_DOES_NOT_EXIST the element was not already in the HashSet.
87
+ * This value is equal to 0 or false
88
+ * HASH_KEY_SAME the element was identical (same memory
89
+ * pointer) to an existing element so no freeing
90
+ * was done
91
+ * HASH_KEY_EQUAL the element was equal to an element already in
92
+ * the HashSet so the new_elem was freed if
93
+ * free_elem was set
94
+ * </pre>
95
+ */
96
+ extern int hs_add(HashSet *self, void *elem);
97
+
98
+ /**
99
+ * Add element to the HashSet. If the element already existed in the HashSet
100
+ * and the new element was equal but not the same (same pointer/memory) then
101
+ * don't add the element and return false, otherwise return true.
102
+ *
103
+ * @param hs the HashSet to add the element to
104
+ * @param elem the element to add to the HashSet
105
+ * @return true if the element was successfully added or false otherwise
106
+ */
107
+ extern int hs_add_safe(HashSet *self, void *elem);
108
+
109
+ /**
110
+ * Delete the element from the HashSet. Returns true if the item was
111
+ * successfully deleted or false if the element never existed.
112
+ *
113
+ * @param hs the HashSet to delete from
114
+ * @param elem the element to delete
115
+ * @return true if the element was deleted or false if the element never
116
+ * existed
117
+ */
118
+ extern int hs_del(HashSet *self, void *elem);
119
+
120
+ /**
121
+ * Remove an item from the HashSet without actually freeing the item. This
122
+ * function should return the item itself so that it can be freed later if
123
+ * necessary.
124
+ *
125
+ * @param hs the HashSet to remove the element from.
126
+ * @param elem the element to remove
127
+ * @param the element that was removed or NULL otherwise
128
+ */
129
+ extern void *hs_rem(HashSet *self, void *elem);
130
+
131
+ /**
132
+ * Check if the element exists and return the appropriate value described
133
+ * bellow.
134
+ *
135
+ * @param hs the HashSet to check in
136
+ * @param elem the element to check for
137
+ * @return one of the following values
138
+ * <pre>
139
+ * HASH_KEY_DOES_NOT_EXIST the element was not already in the HashSet.
140
+ * This value is equal to 0 or false
141
+ * HASH_KEY_SAME the element was identical (same memory
142
+ * pointer) to an existing element so no freeing
143
+ * was done
144
+ * HASH_KEY_EQUAL the element was equal to an element already in
145
+ * the HashSet so the new_elem was freed if
146
+ * free_elem was set
147
+ * </pre>
148
+ */
149
+ extern int hs_exists(HashSet *self, void *elem);
150
+
151
+ /**
152
+ * Merge two HashSets. When a merge is done the merger (self) HashTable is
153
+ * returned and the mergee is destroyed. All elements from mergee that were
154
+ * not found in merger (self) will be added to self, otherwise they will be
155
+ * destroyed.
156
+ *
157
+ * @param self the HashSet to merge into
158
+ * @param other HastSet to be merged into self
159
+ * @return the merged HashSet
160
+ */
161
+ extern HashSet *hs_merge(HashSet *self, HashSet *other);
162
+
163
+ /**
164
+ * Return the original version of +elem+. So if you allocate two elements
165
+ * which are equal and add the first to the HashSet, calling this function
166
+ * with the second element will return the first element from the HashSet.
167
+ */
168
+ extern void *hs_orig(HashSet *self, void *elem);
169
+
170
+ /**
171
+ * Clear all elements from the HashSet. If free_elem was set then use it to
172
+ * free all elements as they are cleared. After the method is called, the
173
+ * HashSets size will be 0.
174
+ *
175
+ * @param self the HashSet to clear
176
+ */
177
+ extern void hs_clear(HashSet *self);
29
178
 
30
179
  /* TODO: finish these functions.
31
180
  int hs_osf(HashSet *hs, void *elem);