ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/hash.h CHANGED
@@ -3,87 +3,460 @@
3
3
 
4
4
  #include "global.h"
5
5
 
6
- #define NUM_ENTRIES 256
7
- #define MULTIPLIER 31
8
-
9
- typedef struct HashEntry {
10
- char *name;
11
- void *value;
12
- struct HashEntry *next;
13
- } HashEntry;
14
-
15
- HashEntry **ht_create();
16
- int ht_count(HashEntry **ht);
17
- void ht_destroy(HashEntry **ht);
18
- void ht_destroy_all(HashEntry **ht, void (*fn)(void *));
19
- void ht_set(HashEntry **ht, char *name, void *value);
20
- void *ht_get(HashEntry **ht, char *name);
21
- void *ht_delete(HashEntry **ht, char *name);
22
-
23
6
  /****************************************************************************
24
7
  *
25
- * HshTable
8
+ * HashTable
26
9
  *
27
10
  ****************************************************************************/
28
11
 
29
- #define Hsh_MINSIZE 8
30
- #define SLOW_DOWN 50000 /* stop increasing the hash table so quickly to
31
- * conserve memory */
32
- extern char *dummy_key;
33
- enum {
34
- HASH_KEY_DOES_NOT_EXIST = 0,
35
- HASH_KEY_SAME = 1,
36
- HASH_KEY_EQUAL = 2
12
+ #define HASH_MINSIZE 8
13
+ #define SLOW_DOWN 50000 /* stop increasing the hash table so quickly to
14
+ * conserve memory */
15
+
16
+ /**
17
+ * Return values for h_set
18
+ */
19
+ enum HashSetValues
20
+ {
21
+ HASH_KEY_DOES_NOT_EXIST = 0,
22
+ HASH_KEY_EQUAL = 1,
23
+ HASH_KEY_SAME = 2
37
24
  };
38
25
 
39
- typedef struct {
40
- int hash; /* cached hash code of key */
41
- void *key;
42
- void *value;
43
- } HshEntry;
44
-
45
- typedef struct HshTable {
46
- int fill; /* # Active + # Dummy */
47
- int used; /* # Active */
48
- int mask; /* size of table - 1 */
49
-
50
- /* table points to smalltable for small tables, else to
51
- * additional malloc'ed memory. */
52
- HshEntry *table;
53
- HshEntry smalltable[Hsh_MINSIZE];
54
- HshEntry *(*lookup)(struct HshTable *ht, register const void *key);
55
- unsigned int (*hash)(const void *key);
56
- int (*eq)(const void *key1, const void *key2);
57
- free_ft free_key;
58
- free_ft free_value;
59
- } HshTable;
60
-
61
- typedef unsigned int (*hash_ft)(const void *key);
26
+ /**
27
+ * struct used internally to store values in the HashTable
28
+ */
29
+ typedef struct
30
+ {
31
+ ulong hash;
32
+ void *key;
33
+ void *value;
34
+ } HashEntry;
35
+
36
+ /**
37
+ * As the hash table is filled and entries are deleted, Dummy HashEntries are
38
+ * put in place. We therefor keep two counts. +size+ is the number active
39
+ * elements and +fill+ is the number of active elements together with the
40
+ * number of dummy elements. +fill+ is basically just kept around so that we
41
+ * know when to resize. The HashTable is resized when more than two thirds of
42
+ * the HashTable is Filled.
43
+ */
44
+ typedef struct HashTable
45
+ {
46
+ int fill; /* num Active + num Dummy */
47
+ int size; /* num Active ie, num keys set */
48
+ int mask; /* capacity_of_table - 1 */
49
+
50
+ /* table points to smalltable initially. If the table grows beyond 2/3 of
51
+ * HASH_MINSIZE it will point to newly malloced memory as it grows. */
52
+ HashEntry *table;
53
+
54
+ /* When a HashTable is created it needs an initial table to start if off.
55
+ * All HashTables will start with smalltable and then malloc a larger
56
+ * table as the HashTable grows */
57
+ HashEntry smalltable[HASH_MINSIZE];
58
+
59
+ /* the following function pointers are used internally and should not be
60
+ * used outside of the HashTable methods */
61
+ HashEntry *(*lookup_i)(struct HashTable *self, register const void *key);
62
+ ulong (*hash_i)(const void *key);
63
+ int (*eq_i)(const void *key1, const void *key2);
64
+ void (*free_key_i)(void *p);
65
+ void (*free_value_i)(void *p);
66
+ } HashTable;
67
+
68
+ /**
69
+ * Hashing function type used by HashTable. A function of this type must be
70
+ * passed to create a new HashTable.
71
+ *
72
+ * @param key object to hash
73
+ * @return an unsigned 32-bit integer hash value
74
+ */
75
+ typedef ulong (*hash_ft)(const void *key);
76
+
77
+ /**
78
+ * Equals function type used by HashTable. A function of this type must be
79
+ * passed to create a new HashTable.
80
+ */
62
81
  typedef int (*eq_ft)(const void *key1, const void *key2);
63
82
 
64
- HshTable *h_new_str(free_ft free_key, free_ft free_value);
65
- HshTable *h_new(hash_ft hash, eq_ft eq, free_ft free_key, free_ft free_value);
66
- void h_destroy(HshTable *ht);
67
- void h_clear(HshTable *ht);
68
83
 
69
- void *h_get(HshTable *ht, const void *key);
70
- int h_del(HshTable *ht, const void *key);
71
- void *h_rem(HshTable *ht, const void *key, bool del_key);
72
- int h_set(HshTable *ht, const void *key, void *value);
73
- int h_set_safe(HshTable *ht, const void *key, void *value);
74
- int h_has_key(HshTable *ht, const void *key);
75
- unsigned int str_hash(const char *const str);
84
+ /**
85
+ * Create a pointer to an allocated U32 integer. This function is a utility
86
+ * function used to add integers to a HashTable, either as the key or the
87
+ * value.
88
+ */
89
+ extern ulong *imalloc(ulong value);
90
+
91
+ /**
92
+ * Determine a hash value for a string. The string must be null terminated
93
+ *
94
+ * @param str string to hash
95
+ * @return an unsigned 32-bit integer hash value
96
+ */
97
+ extern ulong str_hash(const char *const str);
98
+
99
+ /**
100
+ * Create a new HashTable that uses any type of object as it's key. The
101
+ * HashTable will store all keys and values so if you want to destroy those
102
+ * values when the HashTable is destroyed then you should pass free functions.
103
+ * NULL will suffice otherwise.
104
+ *
105
+ * @param hash function to determine the hash value of a key in the HashTable
106
+ * @param eq function to determine the equality of to keys in the HashTable
107
+ * @param free_key function to free the key stored in the HashTable when an
108
+ * entry is deleted, replaced or when the HashTable is destroyed. If you
109
+ * pass NULL in place of this parameter the key will not be destroyed.
110
+ * @param free_value function to free the value stored in the HashTable when
111
+ * an entry is deleted, replaced or when the HashTable is destroyed. If you
112
+ * pass NULL in place of this parameter the value will not be destroyed.
113
+ * @return A newly allocated HashTable
114
+ */
115
+ extern HashTable *h_new(hash_ft hash, eq_ft eq, free_ft free_key,
116
+ free_ft free_value);
117
+
118
+ /**
119
+ * Create a new HashTable that uses null-terminated strings as it's keys. The
120
+ * HashTable will store all keys and values so if you want to destroy those
121
+ * values when the HashTable is destroyed then you should pass free functions.
122
+ * NULL will suffice otherwise.
123
+ *
124
+ * @param free_key function to free the key stored in the HashTable when an
125
+ * entry is deleted, replaced or when the HashTable is destroyed. If you
126
+ * pass NULL in place of this parameter the key will not be destroyed.
127
+ * @param free_value function to free the value stored in the HashTable when
128
+ * an entry is deleted, replaced or when the HashTable is destroyed. If you
129
+ * pass NULL in place of this parameter the value will not be destroyed.
130
+ * @return A newly allocated HashTable
131
+ */
132
+ extern HashTable *h_new_str(free_ft free_key, free_ft free_value);
133
+
134
+ /**
135
+ * Create a new HashTable that uses integers as it's keys. The
136
+ * HashTable will store all values so if you want to destroy those
137
+ * values when the HashTable is destroyed then you should pass a free function.
138
+ * NULL will suffice otherwise.
139
+ *
140
+ * @param free_value function to free the value stored in the HashTable when
141
+ * an entry is deleted, replaced or when the HashTable is destroyed. If you
142
+ * pass NULL in place of this parameter the value will not be destroyed.
143
+ * @return A newly allocated HashTable
144
+ */
145
+ extern HashTable *h_new_int(free_ft free_value);
146
+
147
+ /**
148
+ * Destroy the HashTable. This function will also destroy all keys and values
149
+ * in the HashTable depending on how the free_key and free_value were set.
150
+ *
151
+ * @param self the HashTable to destroy
152
+ */
153
+ extern void h_destroy(HashTable *self);
154
+
155
+ /**
156
+ * Clear the HashTable. This function will delete all keys and values from the
157
+ * hash table, also destroying all keys and values in the HashTable depending
158
+ * on how the free_key and free_value were set.
159
+ *
160
+ * @param self the HashTable to clear
161
+ */
162
+ extern void h_clear(HashTable *self);
163
+
164
+ /**
165
+ * Get the value in the HashTable referenced by the key +key+.
166
+ *
167
+ * @param self the HashTable to reference
168
+ * @param key the key to lookup
169
+ * @return the value referenced by the key +key+. If there is no value
170
+ * referenced by that key, NULL is returned.
171
+ */
172
+ extern void *h_get(HashTable *self, const void *key);
173
+
174
+ /**
175
+ * Delete the value in HashTable referenced by the key +key+. When the value
176
+ * is deleted it is also destroyed along with the key depending on how
177
+ * free_key and free_value where set when the HashTable was created. If you
178
+ * don't want to destroy the value use h_rem.
179
+ *
180
+ * This functions returns +true+ if the value was deleted successfully or
181
+ * false if the key was not found.
182
+ *
183
+ * @see h_rem
184
+ *
185
+ * @param self the HashTable to reference
186
+ * @param key the key to lookup
187
+ * @return true if the object was successfully deleted or false if the key was
188
+ * not found
189
+ */
190
+ extern int h_del(HashTable *self, const void *key);
191
+
192
+ /**
193
+ * Remove the value in HashTable referenced by the key +key+. When the value
194
+ * is removed it is returned rather than destroyed. The key however is
195
+ * destroyed using the free_key functions passed when the HashTable is created
196
+ * if del_key is true.
197
+ *
198
+ * If you want the value to be destroyed, use the h_del function.
199
+ *
200
+ * @see h_del
201
+ *
202
+ * @param self the HashTable to reference
203
+ * @param key the key to lookup
204
+ * @param del_key set to true if you want the key to be deleted when the value
205
+ * is removed from the HashTable
206
+ * @return the value referenced by +key+ if it can be found or NULL otherwise
207
+ */
208
+ extern void *h_rem(HashTable *self, const void *key, bool del_key);
209
+
210
+ /**
211
+ * WARNING: this function may destroy an old value or key if the key already
212
+ * exists in the HashTable, depending on how free_value and free_key were set
213
+ * for this HashTable.
214
+ *
215
+ * Add the value +value+ to the HashTable referencing it with key +key+.
216
+ *
217
+ * When a value is added to the HashTable it replaces any value that
218
+ * might already be stored under that key. If free_value is already set then
219
+ * the old value will be freed using that function.
220
+ *
221
+ * Similarly the old key might replace be replaced by the new key if they are
222
+ * are equal (according to the HashTable's eq function) but seperately
223
+ * allocated objects.
224
+ *
225
+ * @param self the HashTable to add the value to
226
+ * @param key the key to use to reference the value
227
+ * @param value the value to add to the HashTable
228
+ * @return one of three values;
229
+ * <pre>
230
+ * HASH_KEY_DOES_NOT_EXIST there was no value stored with that key
231
+ * HASH_KEY_EQUAL the key existed and was seperately allocated.
232
+ * In this situation the old key will have been
233
+ * destroyed if free_key was set
234
+ * HASH_KEY_SAME the key was identical (same memory pointer) to
235
+ * the existing key so no key was freed
236
+ * </pre>
237
+ */
238
+ extern int h_set(HashTable *self, const void *key, void *value);
239
+
240
+ /**
241
+ * Add the value +value+ to the HashTable referencing it with key +key+. If
242
+ * the key already exists in the HashTable, the value won't be added and the
243
+ * function will return false. Otherwise it will return true.
244
+ *
245
+ * @param self the HashTable to add the value to
246
+ * @param key the key to use to reference the value
247
+ * @param value the value to add to the HashTable
248
+ * @return true if the value was successfully added or false otherwise
249
+ */
250
+ extern int h_set_safe(HashTable *self, const void *key, void *value);
251
+
252
+ /**
253
+ * Return a hash entry object so you can handle the insert yourself. This can
254
+ * be used for performance reasons or for more control over how a value is
255
+ * added. Say, for example, you wanted to append a value to an array, or add a
256
+ * new array if non-existed, you could use this method by checking the value
257
+ * of the HashEntry returned.
258
+ *
259
+ * @param self the HashTable to add the value to
260
+ * @param key the key to use to reference the value
261
+ * @return HashEntry a pointer to the hash entry object now reserved for this
262
+ * value. Be sure to set both the *key* and the *value*
263
+ */
264
+ extern HashEntry *h_set_ext(HashTable *ht, const void *key);
265
+
266
+ /**
267
+ * Check whether key +key+ exists in the HashTable.
268
+ *
269
+ * @param self the HashTable to check in
270
+ * @param key the key to check for in the HashTable
271
+ * @return true if the key exists in the HashTable, false otherwise.
272
+ */
273
+ extern int h_has_key(HashTable *self, const void *key);
274
+
275
+ /**
276
+ * Get the value in the HashTable referenced by an integer key +key+.
277
+ *
278
+ * @param self the HashTable to reference
279
+ * @param key the integer key to lookup
280
+ * @return the value referenced by the key +key+. If there is no value
281
+ * referenced by that key, NULL is returned.
282
+ */
283
+ extern void *h_get_int(HashTable *self, const ulong key);
284
+
285
+ /**
286
+ * Delete the value in HashTable referenced by the integer key +key+. When the
287
+ * value is deleted it is also destroyed using the free_value function. If you
288
+ * don't want to destroy the value use h_rem.
289
+ *
290
+ * This functions returns +true+ if the value was deleted successfully or
291
+ * false if the key was not found.
292
+ *
293
+ * @see h_rem
294
+ *
295
+ * @param self the HashTable to reference
296
+ * @param key the integer key to lookup
297
+ * @return true if the object was successfully deleted or false if the key was
298
+ * not found
299
+ */
300
+ extern int h_del_int(HashTable *self, const ulong key);
301
+
302
+ /**
303
+ * Remove the value in HashTable referenced by the integer key +key+. When the
304
+ * value is removed it is returned rather than destroyed.
305
+ *
306
+ * If you want the value to be destroyed, use the h_del function.
307
+ *
308
+ * @see h_del
309
+ *
310
+ * @param self the HashTable to reference
311
+ * @param key the integer key to lookup
312
+ * @return the value referenced by +key+ if it can be found or NULL otherwise
313
+ */
314
+ extern void *h_rem_int(HashTable *self, const ulong key);
315
+
316
+ /**
317
+ * WARNING: this function may destroy an old value if the key already exists
318
+ * in the HashTable, depending on how free_value was set for this HashTable.
319
+ *
320
+ * Add the value +value+ to the HashTable referencing it with an integer key
321
+ * +key+.
322
+ *
323
+ * When a value is added to the HashTable it replaces any value that
324
+ * might already be stored under that key. If free_value is already set then
325
+ * the old value will be freed using that function.
326
+ *
327
+ * Similarly the old key might replace be replaced by the new key if they are
328
+ * are equal (according to the HashTable's eq function) but seperately
329
+ * allocated objects.
330
+ *
331
+ * @param self the HashTable to add the value to
332
+ * @param key the integer key to use to reference the value
333
+ * @param value the value to add to the HashTable
334
+ * @return one of three values;
335
+ * <pre>
336
+ * HASH_KEY_DOES_NOT_EXIST there was no value stored with that key
337
+ * HASH_KEY_EQUAL the key existed and was seperately allocated.
338
+ * In this situation the old key will have been
339
+ * destroyed if free_key was set
340
+ * HASH_KEY_SAME the key was identical (same memory pointer) to
341
+ * the existing key so no key was freed
342
+ * </pre>
343
+ */
344
+ extern int h_set_int(HashTable *self, const ulong key, void *value);
345
+
346
+ /**
347
+ * Add the value +value+ to the HashTable referencing it with integer key
348
+ * +key+. If the key already exists in the HashTable, the value won't be added
349
+ * and the function will return false. Otherwise it will return true.
350
+ *
351
+ * @param self the HashTable to add the value to
352
+ * @param key the integer key to use to reference the value
353
+ * @param value the value to add to the HashTable
354
+ * @return true if the value was successfully added or false otherwise
355
+ */
356
+ extern int h_set_safe_int(HashTable *self, const ulong key, void *value);
357
+ /**
358
+ * Check whether integer key +key+ exists in the HashTable.
359
+ *
360
+ * @param self the HashTable to check in
361
+ * @param key the integer key to check for in the HashTable
362
+ * @return true if the key exists in the HashTable, false otherwise.
363
+ */
364
+ extern int h_has_key_int(HashTable *self, const ulong key);
365
+
366
+ typedef void (*h_each_key_val_ft)(void *key, void *value, void *arg);
76
367
 
77
- void h_each(HshTable *ht,
78
- void (*each_kv)(void *key, void *value, void *arg),
79
- void *arg);
368
+ /**
369
+ * Run function +each_key_val+ on each key and value in the HashTable. The third
370
+ * argument +arg+ will also be passed to +each_key_val+. If you need to pass
371
+ * more than one argument to the function you should pass a struct.
372
+ *
373
+ * example;
374
+ *
375
+ * // Lets say we have stored strings in a HashTable and we want to put them
376
+ * // all into an array. First we need to create a struct to store the
377
+ * // strings;
378
+ *
379
+ * struct StringArray {
380
+ * char **strings;
381
+ * int cnt;
382
+ * int size;
383
+ * };
384
+ *
385
+ * static void add_string_ekv(void *key, void *value,
386
+ * struct StringArray *str_arr)
387
+ * {
388
+ * str_arr->strings[str_arr->cnt] = (char *)value;
389
+ * str_arr->cnt++;
390
+ * }
391
+ *
392
+ * struct StringArray *h_extract_strings(HashTable *ht)
393
+ * {
394
+ * struct StringArray *str_arr = ALLOC(struct StringArray);
395
+ *
396
+ * str_arr->strings = ALLOC_N(char *, ht->size);
397
+ * str_arr->cnt = 0;
398
+ * str_arr->size = ht->size;
399
+ *
400
+ * h_each(ht, (h_each_key_val_ft)add_string_ekv, str_arr);
401
+ *
402
+ * return str_arr;
403
+ * }
404
+ *
405
+ * @param self the HashTable to run the function on
406
+ * @param each_key_val function to run on on each key and value in the
407
+ * HashTable
408
+ * @param arg an extra argument to pass to each_key_val each time it is called
409
+ */
410
+ extern void h_each(HashTable *self,
411
+ void (*each_key_val)(void *key, void *value, void *arg),
412
+ void *arg);
80
413
 
81
414
  typedef void *(*h_clone_func_t)(void *val);
82
- HshTable *h_clone(HshTable *ht,
83
- h_clone_func_t clone_key,
84
- h_clone_func_t clone_value);
415
+ /**
416
+ * Clone the HashTable as well as cloning each of the keys and values if you
417
+ * want to do a deep clone. To do a deep clone you will need to pass a
418
+ * clone_key function and/or a clone_value function.
419
+ *
420
+ * @param self the HashTable to clone
421
+ * @param clone_key the function to clone the key with
422
+ * @param clone_value the function to clone the value with
423
+ * @return a clone of the original HashTable
424
+ */
425
+ extern HashTable *h_clone(HashTable *self,
426
+ h_clone_func_t clone_key,
427
+ h_clone_func_t clone_value);
428
+
429
+ /*
430
+ * The following functions should only be used in static HashTable
431
+ * declarations
432
+ */
433
+ /**
434
+ * This is the lookup function for a hash table keyed with strings. Since it
435
+ * is so common for hash tables to be keyed with strings it gets it's own
436
+ * lookup function. This method will always return a HashEntry. If there is no
437
+ * entry with the given key then an empty entry will be returned with the key
438
+ * set to the key that was passed.
439
+ *
440
+ * @param ht the hash table to look in
441
+ * @param key the key to lookup
442
+ * @return the HashEntry that was found
443
+ */
444
+ extern HashEntry *h_lookup_str(HashTable *ht, register const char *key);
445
+
446
+ /**
447
+ * This is the lookup function for a hash table with non-string keys. The
448
+ * hash() and eq() methods used are stored in the hash table. This method will
449
+ * always return a HashEntry. If there is no entry with the given key then an
450
+ * empty entry will be returned with the key set to the key that was passed.
451
+ *
452
+ * @param ht the hash table to look in
453
+ * @param key the key to lookup
454
+ * @return the HashEntry that was found
455
+ */
456
+ extern HashEntry *h_lookup(HashTable *ht, register const void *key);
457
+
458
+ typedef HashEntry *(*h_lookup_ft)(HashTable *ht, register const void *key);
85
459
 
86
- void dummy_free(void *p);
87
- HshEntry *h_lookup_str(HshTable *ht, register const void *key_p);
460
+ extern void h_str_print_keys(HashTable *ht);
88
461
 
89
462
  #endif