ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/hashset.c CHANGED
@@ -1,139 +1,167 @@
1
- #include <hashset.h>
1
+ #include "hashset.h"
2
2
  #include <string.h>
3
- #define HS_MIN_SIZE 4
4
3
 
5
- int *imalloc(int i)
4
+ /*
5
+ * The HashSet contains an array +elems+ of the elements that have been added.
6
+ * It always has +size+ elements so +size+ ane +elems+ can be used to iterate
7
+ * over all alements in the HashSet. It also uses a HashTable to keep track of
8
+ * which elements have been added and their index in the +elems+ array.
9
+ */
10
+ static HashSet *hs_alloc(void (*free_elem) (void *p))
6
11
  {
7
- int *ip = ALLOC(int);
8
- *ip = i;
9
- return ip;
12
+ HashSet *hs = ALLOC(HashSet);
13
+ hs->size = 0;
14
+ hs->capa = HS_MIN_SIZE;
15
+ hs->elems = ALLOC_N(void *, HS_MIN_SIZE);
16
+ hs->free_elem_i = free_elem ? free_elem : &dummy_free;
17
+ return hs;
10
18
  }
11
19
 
12
- void hs_dummy_free(void *p){}
13
-
14
- HashSet *hs_create(unsigned int (*hash)(const void *p),
15
- int (*eq)(const void *p1, const void *p2),
16
- void (*free_elem)(void *p))
20
+ HashSet *hs_new(ulong (*hash)(const void *p),
21
+ int (*eq)(const void *p1, const void *p2),
22
+ void (*free_elem)(void *p))
17
23
  {
18
- HashSet *hs = ALLOC(HashSet);
19
- hs->ht = h_new(hash, eq, NULL, &free);
20
- hs->elems = NULL;
21
- hs->capa = hs->size = 0;
22
- if (free_elem == NULL)
23
- hs->free_elem = &hs_dummy_free;
24
- else
25
- hs->free_elem = free_elem;
26
- return hs;
24
+ HashSet *hs = hs_alloc(free_elem);
25
+ hs->ht = h_new(hash, eq, NULL, &free);
26
+ return hs;
27
27
  }
28
28
 
29
- HashSet *hs_str_create(void (*free_elem)(void *p))
29
+ HashSet *hs_new_str(void (*free_elem) (void *p))
30
30
  {
31
- HashSet *hs = ALLOC(HashSet);
32
- hs->ht = h_new_str((free_ft)NULL, &free);
33
- hs->elems = NULL;
34
- hs->capa = hs->size = 0;
35
- if (free_elem == NULL)
36
- hs->free_elem = &hs_dummy_free;
37
- else
38
- hs->free_elem = free_elem;
39
- return hs;
31
+ HashSet *hs = hs_alloc(free_elem);
32
+ hs->ht = h_new_str((free_ft) NULL, &free);
33
+ return hs;
40
34
  }
41
35
 
42
- void hs_destroy(HashSet *hs)
36
+ void hs_free(HashSet *hs)
43
37
  {
44
- h_destroy(hs->ht);
45
- free(hs->elems);
46
- free(hs);
38
+ h_destroy(hs->ht);
39
+ free(hs->elems);
40
+ free(hs);
47
41
  }
48
42
 
49
- void hs_clear(HashSet *self)
43
+ void hs_clear(HashSet *hs)
50
44
  {
51
- int i;
52
- for (i = self->size - 1; i >= 0; i--)
53
- hs_del(self, self->elems[i]);
45
+ int i;
46
+ for (i = hs->size - 1; i >= 0; i--) {
47
+ hs_del(hs, hs->elems[i]);
48
+ }
54
49
  }
55
50
 
56
- void hs_destroy_all(HashSet *hs)
51
+ void hs_destroy(HashSet *hs)
57
52
  {
58
- int i;
59
- if (hs->free_elem != &dummy_free)
60
- for (i = 0; i < hs->size; i++)
61
- hs->free_elem(hs->elems[i]);
62
- hs_destroy(hs);
53
+ int i;
54
+ if (hs->free_elem_i != &dummy_free) {
55
+ for (i = 0; i < hs->size; i++) {
56
+ hs->free_elem_i(hs->elems[i]);
57
+ }
58
+ }
59
+ h_destroy(hs->ht);
60
+ free(hs->elems);
61
+ free(hs);
63
62
  }
64
63
 
65
64
  int hs_add(HashSet *hs, void *elem)
66
65
  {
67
- int has_elem = h_has_key(hs->ht, elem);
68
- //printf("has_elem = %d %d:%d\n", has_elem, HASH_KEY_EQUAL, HASH_KEY_SAME);
69
- if (has_elem == HASH_KEY_EQUAL) {
70
- // We don't want to keep two of the same elem so free if necessary
71
- hs->free_elem(elem);
72
- } else if (has_elem == HASH_KEY_SAME) {
73
- // No need to do anything
74
- } else {
75
- // add the elem to the array, resizing if necessary
76
- if (hs->size >= hs->capa) {
77
- if (hs->capa == 0)
78
- hs->capa = HS_MIN_SIZE;
79
- else
80
- hs->capa *= 2;
81
- REALLOC_N(hs->elems, void *, hs->capa);
66
+ int has_elem = h_has_key(hs->ht, elem);
67
+ if (has_elem == HASH_KEY_EQUAL) {
68
+ /* We don't want to keep two of the same elem so free if necessary */
69
+ hs->free_elem_i(elem);
70
+ }
71
+ else if (has_elem == HASH_KEY_SAME) {
72
+ /* No need to do anything */
73
+ }
74
+ else {
75
+ /* add the elem to the array, resizing if necessary */
76
+ if (hs->size >= hs->capa) {
77
+ hs->capa *= 2;
78
+ REALLOC_N(hs->elems, void *, hs->capa);
79
+ }
80
+ hs->elems[hs->size] = elem;
81
+ h_set(hs->ht, elem, imalloc(hs->size));
82
+ hs->size++;
83
+ }
84
+ return has_elem;
85
+ }
86
+
87
+ int hs_add_safe(HashSet *hs, void *elem)
88
+ {
89
+ int has_elem = h_has_key(hs->ht, elem);
90
+ if (has_elem == HASH_KEY_EQUAL) {
91
+ /* element can't be added */
92
+ return false;
93
+ }
94
+ else if (has_elem == HASH_KEY_SAME) {
95
+ /* the exact same element has already been added */
96
+ return true;
97
+ }
98
+ else {
99
+ /* add the elem to the array, resizing if necessary */
100
+ if (hs->size >= hs->capa) {
101
+ hs->capa *= 2;
102
+ REALLOC_N(hs->elems, void *, hs->capa);
103
+ }
104
+ hs->elems[hs->size] = elem;
105
+ h_set(hs->ht, elem, imalloc(hs->size));
106
+ hs->size++;
107
+ return true;
82
108
  }
83
- hs->elems[hs->size] = elem;
84
- h_set(hs->ht, elem, imalloc(hs->size));
85
- hs->size++;
86
- }
87
- return has_elem;
88
109
  }
89
110
 
90
111
  int hs_del(HashSet *hs, void *elem)
91
112
  {
92
- void *tmp_elem = hs_rem(hs, elem);
93
- if (tmp_elem != NULL) {
94
- hs->free_elem(tmp_elem);
95
- return 1;
96
- } else {
97
- return 0;
98
- }
113
+ void *tmp_elem = hs_rem(hs, elem);
114
+ if (tmp_elem != NULL) {
115
+ hs->free_elem_i(tmp_elem);
116
+ return 1;
117
+ }
118
+ else {
119
+ return 0;
120
+ }
99
121
  }
100
122
 
101
123
  void *hs_rem(HashSet *hs, void *elem)
102
124
  {
103
- void *ret_elem;
104
- int *index = (int *)h_get(hs->ht, elem);
105
- if (index == NULL) {
106
- return NULL;
107
- } else {
108
- int i = *index;
109
- ret_elem = hs->elems[i];
110
- h_del(hs->ht, elem);
111
- hs->size--;
112
- memmove(&hs->elems[i], &hs->elems[i+1], sizeof(void *) * (hs->size - i));
113
- return ret_elem;
114
- }
125
+ void *ret_elem;
126
+ int *index = (int *)h_get(hs->ht, elem);
127
+ if (index == NULL) {
128
+ return NULL;
129
+ }
130
+ else {
131
+ int i = *index;
132
+ ret_elem = hs->elems[i];
133
+ h_del(hs->ht, elem);
134
+ hs->size--;
135
+ memmove(&hs->elems[i], &hs->elems[i + 1],
136
+ sizeof(void *) * (hs->size - i));
137
+ return ret_elem;
138
+ }
115
139
  }
116
140
 
117
141
  int hs_exists(HashSet *hs, void *elem)
118
142
  {
119
- return h_has_key(hs->ht, elem);
143
+ return h_has_key(hs->ht, elem);
120
144
  }
121
145
 
122
- HashSet *hs_merge(HashSet *hs, HashSet *other)
146
+ HashSet *hs_merge(HashSet *hs, HashSet * other)
123
147
  {
124
- int i;
125
- for (i = 0; i < other->size; i++) {
126
- hs_add(hs, other->elems[i]);
127
- }
128
- // Now free the other hashset. It is no longer needed. No need, however, to
129
- // delete the elements as they are in the new hash set
130
- hs_destroy(other);
131
- return hs;
148
+ int i;
149
+ for (i = 0; i < other->size; i++) {
150
+ hs_add(hs, other->elems[i]);
151
+ }
152
+ /* Now free the other hashset. It is no longer needed. No need, however, to
153
+ * delete the elements as they're either destroyed or in the new hash set */
154
+ hs_free(other);
155
+ return hs;
132
156
  }
133
157
 
134
158
  void *hs_orig(HashSet *hs, void *elem)
135
159
  {
136
- int *i = h_get(hs->ht, elem);
137
- if (i) return hs->elems[*i];
138
- else return NULL;
160
+ int *index = h_get(hs->ht, elem);
161
+ if (index) {
162
+ return hs->elems[*index];
163
+ }
164
+ else {
165
+ return NULL;
166
+ }
139
167
  }
data/ext/hashset.h CHANGED
@@ -2,30 +2,179 @@
2
2
  #define FRT_HASHSET_H
3
3
 
4
4
  #include "hash.h"
5
- #include "array.h"
6
5
  #include "global.h"
7
6
 
8
- typedef struct HashSet {
9
- int capa;
10
- int size;
11
- void **elems;
12
- HshTable *ht;
13
- void (*free_elem)(void *p);
7
+ #define HS_MIN_SIZE 4
8
+
9
+ typedef struct HashSet
10
+ {
11
+ /* used internally to allocate space to elems */
12
+ int capa;
13
+
14
+ /* the number of elements in the HashSet */
15
+ int size;
16
+
17
+ /* the elements in the HashSet. The elements will be found in the order
18
+ * they were added and can be iterated over from 0 to .size */
19
+ void **elems;
20
+
21
+ /* HashTable used internally */
22
+ HashTable *ht;
23
+
24
+ /* Internal: Frees elements added to the HashSet. Should never be NULL */
25
+ void (*free_elem_i)(void *p);
14
26
  } HashSet;
15
27
 
16
- HashSet *hs_create(unsigned int (*hash)(const void *p),
17
- int (*eq)(const void *p1, const void *p2),
18
- void (*free_elem)(void *p));
19
- HashSet *hs_str_create(void (*free_elem)(void *p));
20
- void hs_destroy(HashSet *hs);
21
- void hs_destroy_all(HashSet *hs);
22
- int hs_add(HashSet *hs, void *elem);
23
- int hs_del(HashSet *hs, void *elem);
24
- void *hs_rem(HashSet *hs, void *elem);
25
- int hs_exists(HashSet *hs, void *elem);
26
- HashSet *hs_merge(HashSet *hs, HashSet *other);
27
- void *hs_orig(HashSet *hs, void *elem);
28
- void hs_clear(HashSet *self);
28
+ /**
29
+ * Create a new HashSet. The function will allocate a HashSet Struct setting
30
+ * the functions used to hash the objects it will contain and the eq function.
31
+ * This should be used for non-string types.
32
+ *
33
+ * @param hash function to hash objects added to the HashSet
34
+ * @param eq function to determine whether two items are equal
35
+ * @param free_elem function used to free elements as added to the HashSet
36
+ * when the HashSet if destroyed or duplicate elements are added to the Set
37
+ * @return a newly allocated HashSet structure
38
+ */
39
+ extern HashSet *hs_new(ulong (*hash)(const void *p),
40
+ int (*eq)(const void *p1, const void *p2),
41
+ void (*free_elem)(void *p));
42
+
43
+ /**
44
+ * Create a new HashSet specifically for strings. This will create a HashSet
45
+ * as if you used hs_new with the standard string hash and eq functions.
46
+ *
47
+ * @param free_elem function used to free elements as added to the HashSet
48
+ * when the HashSet if destroyed or duplicate elements are added to the Set
49
+ * @return a newly allocated HashSet structure
50
+ */
51
+ extern HashSet *hs_new_str(void (*free_elem) (void *p));
52
+
53
+ /**
54
+ * Free the memory allocated by the HashSet, but don't free the elements added
55
+ * to the HashSet. If you'd like to free everything in the HashSet you should
56
+ * use hs_destroy
57
+ *
58
+ * @param hs the HashSet to free
59
+ */
60
+ extern void hs_free(HashSet *self);
61
+
62
+ /**
63
+ * Destroy the HashSet including all elements added to the HashSet. If you'd
64
+ * like to free the memory allocated to the HashSet without touching the
65
+ * elements in the HashSet then use hs_free
66
+ *
67
+ * @param hs the HashSet to destroy
68
+ */
69
+ extern void hs_destroy(HashSet *self);
70
+
71
+ /**
72
+ * WARNING: this function may destroy some elements if you add them to a
73
+ * HashSet were equivalent elements already exist, depending on how free_elem
74
+ * was set.
75
+ *
76
+ * Add the element to the HashSet whether or not it was already in the
77
+ * HashSet.
78
+ *
79
+ * When a element is added to the HashTable where it already exists, free_elem
80
+ * is called on it, ie the element you tried to add might get destroyed.
81
+ *
82
+ * @param hs the HashSet to add the element to
83
+ * @param elem the element to add to the HashSet
84
+ * @return one of three values;
85
+ * <pre>
86
+ * HASH_KEY_DOES_NOT_EXIST the element was not already in the HashSet.
87
+ * This value is equal to 0 or false
88
+ * HASH_KEY_SAME the element was identical (same memory
89
+ * pointer) to an existing element so no freeing
90
+ * was done
91
+ * HASH_KEY_EQUAL the element was equal to an element already in
92
+ * the HashSet so the new_elem was freed if
93
+ * free_elem was set
94
+ * </pre>
95
+ */
96
+ extern int hs_add(HashSet *self, void *elem);
97
+
98
+ /**
99
+ * Add element to the HashSet. If the element already existed in the HashSet
100
+ * and the new element was equal but not the same (same pointer/memory) then
101
+ * don't add the element and return false, otherwise return true.
102
+ *
103
+ * @param hs the HashSet to add the element to
104
+ * @param elem the element to add to the HashSet
105
+ * @return true if the element was successfully added or false otherwise
106
+ */
107
+ extern int hs_add_safe(HashSet *self, void *elem);
108
+
109
+ /**
110
+ * Delete the element from the HashSet. Returns true if the item was
111
+ * successfully deleted or false if the element never existed.
112
+ *
113
+ * @param hs the HashSet to delete from
114
+ * @param elem the element to delete
115
+ * @return true if the element was deleted or false if the element never
116
+ * existed
117
+ */
118
+ extern int hs_del(HashSet *self, void *elem);
119
+
120
+ /**
121
+ * Remove an item from the HashSet without actually freeing the item. This
122
+ * function should return the item itself so that it can be freed later if
123
+ * necessary.
124
+ *
125
+ * @param hs the HashSet to remove the element from.
126
+ * @param elem the element to remove
127
+ * @param the element that was removed or NULL otherwise
128
+ */
129
+ extern void *hs_rem(HashSet *self, void *elem);
130
+
131
+ /**
132
+ * Check if the element exists and return the appropriate value described
133
+ * bellow.
134
+ *
135
+ * @param hs the HashSet to check in
136
+ * @param elem the element to check for
137
+ * @return one of the following values
138
+ * <pre>
139
+ * HASH_KEY_DOES_NOT_EXIST the element was not already in the HashSet.
140
+ * This value is equal to 0 or false
141
+ * HASH_KEY_SAME the element was identical (same memory
142
+ * pointer) to an existing element so no freeing
143
+ * was done
144
+ * HASH_KEY_EQUAL the element was equal to an element already in
145
+ * the HashSet so the new_elem was freed if
146
+ * free_elem was set
147
+ * </pre>
148
+ */
149
+ extern int hs_exists(HashSet *self, void *elem);
150
+
151
+ /**
152
+ * Merge two HashSets. When a merge is done the merger (self) HashTable is
153
+ * returned and the mergee is destroyed. All elements from mergee that were
154
+ * not found in merger (self) will be added to self, otherwise they will be
155
+ * destroyed.
156
+ *
157
+ * @param self the HashSet to merge into
158
+ * @param other HastSet to be merged into self
159
+ * @return the merged HashSet
160
+ */
161
+ extern HashSet *hs_merge(HashSet *self, HashSet *other);
162
+
163
+ /**
164
+ * Return the original version of +elem+. So if you allocate two elements
165
+ * which are equal and add the first to the HashSet, calling this function
166
+ * with the second element will return the first element from the HashSet.
167
+ */
168
+ extern void *hs_orig(HashSet *self, void *elem);
169
+
170
+ /**
171
+ * Clear all elements from the HashSet. If free_elem was set then use it to
172
+ * free all elements as they are cleared. After the method is called, the
173
+ * HashSets size will be 0.
174
+ *
175
+ * @param self the HashSet to clear
176
+ */
177
+ extern void hs_clear(HashSet *self);
29
178
 
30
179
  /* TODO: finish these functions.
31
180
  int hs_osf(HashSet *hs, void *elem);