ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/ext/hashset.c
CHANGED
@@ -1,139 +1,167 @@
|
|
1
|
-
#include
|
1
|
+
#include "hashset.h"
|
2
2
|
#include <string.h>
|
3
|
-
#define HS_MIN_SIZE 4
|
4
3
|
|
5
|
-
|
4
|
+
/*
|
5
|
+
* The HashSet contains an array +elems+ of the elements that have been added.
|
6
|
+
* It always has +size+ elements so +size+ ane +elems+ can be used to iterate
|
7
|
+
* over all alements in the HashSet. It also uses a HashTable to keep track of
|
8
|
+
* which elements have been added and their index in the +elems+ array.
|
9
|
+
*/
|
10
|
+
static HashSet *hs_alloc(void (*free_elem) (void *p))
|
6
11
|
{
|
7
|
-
|
8
|
-
|
9
|
-
|
12
|
+
HashSet *hs = ALLOC(HashSet);
|
13
|
+
hs->size = 0;
|
14
|
+
hs->capa = HS_MIN_SIZE;
|
15
|
+
hs->elems = ALLOC_N(void *, HS_MIN_SIZE);
|
16
|
+
hs->free_elem_i = free_elem ? free_elem : &dummy_free;
|
17
|
+
return hs;
|
10
18
|
}
|
11
19
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
int (*eq)(const void *p1, const void *p2),
|
16
|
-
void (*free_elem)(void *p))
|
20
|
+
HashSet *hs_new(ulong (*hash)(const void *p),
|
21
|
+
int (*eq)(const void *p1, const void *p2),
|
22
|
+
void (*free_elem)(void *p))
|
17
23
|
{
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
hs->capa = hs->size = 0;
|
22
|
-
if (free_elem == NULL)
|
23
|
-
hs->free_elem = &hs_dummy_free;
|
24
|
-
else
|
25
|
-
hs->free_elem = free_elem;
|
26
|
-
return hs;
|
24
|
+
HashSet *hs = hs_alloc(free_elem);
|
25
|
+
hs->ht = h_new(hash, eq, NULL, &free);
|
26
|
+
return hs;
|
27
27
|
}
|
28
28
|
|
29
|
-
HashSet *
|
29
|
+
HashSet *hs_new_str(void (*free_elem) (void *p))
|
30
30
|
{
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
hs->capa = hs->size = 0;
|
35
|
-
if (free_elem == NULL)
|
36
|
-
hs->free_elem = &hs_dummy_free;
|
37
|
-
else
|
38
|
-
hs->free_elem = free_elem;
|
39
|
-
return hs;
|
31
|
+
HashSet *hs = hs_alloc(free_elem);
|
32
|
+
hs->ht = h_new_str((free_ft) NULL, &free);
|
33
|
+
return hs;
|
40
34
|
}
|
41
35
|
|
42
|
-
void
|
36
|
+
void hs_free(HashSet *hs)
|
43
37
|
{
|
44
|
-
|
45
|
-
|
46
|
-
|
38
|
+
h_destroy(hs->ht);
|
39
|
+
free(hs->elems);
|
40
|
+
free(hs);
|
47
41
|
}
|
48
42
|
|
49
|
-
void hs_clear(HashSet *
|
43
|
+
void hs_clear(HashSet *hs)
|
50
44
|
{
|
51
|
-
|
52
|
-
|
53
|
-
|
45
|
+
int i;
|
46
|
+
for (i = hs->size - 1; i >= 0; i--) {
|
47
|
+
hs_del(hs, hs->elems[i]);
|
48
|
+
}
|
54
49
|
}
|
55
50
|
|
56
|
-
void
|
51
|
+
void hs_destroy(HashSet *hs)
|
57
52
|
{
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
53
|
+
int i;
|
54
|
+
if (hs->free_elem_i != &dummy_free) {
|
55
|
+
for (i = 0; i < hs->size; i++) {
|
56
|
+
hs->free_elem_i(hs->elems[i]);
|
57
|
+
}
|
58
|
+
}
|
59
|
+
h_destroy(hs->ht);
|
60
|
+
free(hs->elems);
|
61
|
+
free(hs);
|
63
62
|
}
|
64
63
|
|
65
64
|
int hs_add(HashSet *hs, void *elem)
|
66
65
|
{
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
66
|
+
int has_elem = h_has_key(hs->ht, elem);
|
67
|
+
if (has_elem == HASH_KEY_EQUAL) {
|
68
|
+
/* We don't want to keep two of the same elem so free if necessary */
|
69
|
+
hs->free_elem_i(elem);
|
70
|
+
}
|
71
|
+
else if (has_elem == HASH_KEY_SAME) {
|
72
|
+
/* No need to do anything */
|
73
|
+
}
|
74
|
+
else {
|
75
|
+
/* add the elem to the array, resizing if necessary */
|
76
|
+
if (hs->size >= hs->capa) {
|
77
|
+
hs->capa *= 2;
|
78
|
+
REALLOC_N(hs->elems, void *, hs->capa);
|
79
|
+
}
|
80
|
+
hs->elems[hs->size] = elem;
|
81
|
+
h_set(hs->ht, elem, imalloc(hs->size));
|
82
|
+
hs->size++;
|
83
|
+
}
|
84
|
+
return has_elem;
|
85
|
+
}
|
86
|
+
|
87
|
+
int hs_add_safe(HashSet *hs, void *elem)
|
88
|
+
{
|
89
|
+
int has_elem = h_has_key(hs->ht, elem);
|
90
|
+
if (has_elem == HASH_KEY_EQUAL) {
|
91
|
+
/* element can't be added */
|
92
|
+
return false;
|
93
|
+
}
|
94
|
+
else if (has_elem == HASH_KEY_SAME) {
|
95
|
+
/* the exact same element has already been added */
|
96
|
+
return true;
|
97
|
+
}
|
98
|
+
else {
|
99
|
+
/* add the elem to the array, resizing if necessary */
|
100
|
+
if (hs->size >= hs->capa) {
|
101
|
+
hs->capa *= 2;
|
102
|
+
REALLOC_N(hs->elems, void *, hs->capa);
|
103
|
+
}
|
104
|
+
hs->elems[hs->size] = elem;
|
105
|
+
h_set(hs->ht, elem, imalloc(hs->size));
|
106
|
+
hs->size++;
|
107
|
+
return true;
|
82
108
|
}
|
83
|
-
hs->elems[hs->size] = elem;
|
84
|
-
h_set(hs->ht, elem, imalloc(hs->size));
|
85
|
-
hs->size++;
|
86
|
-
}
|
87
|
-
return has_elem;
|
88
109
|
}
|
89
110
|
|
90
111
|
int hs_del(HashSet *hs, void *elem)
|
91
112
|
{
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
113
|
+
void *tmp_elem = hs_rem(hs, elem);
|
114
|
+
if (tmp_elem != NULL) {
|
115
|
+
hs->free_elem_i(tmp_elem);
|
116
|
+
return 1;
|
117
|
+
}
|
118
|
+
else {
|
119
|
+
return 0;
|
120
|
+
}
|
99
121
|
}
|
100
122
|
|
101
123
|
void *hs_rem(HashSet *hs, void *elem)
|
102
124
|
{
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
125
|
+
void *ret_elem;
|
126
|
+
int *index = (int *)h_get(hs->ht, elem);
|
127
|
+
if (index == NULL) {
|
128
|
+
return NULL;
|
129
|
+
}
|
130
|
+
else {
|
131
|
+
int i = *index;
|
132
|
+
ret_elem = hs->elems[i];
|
133
|
+
h_del(hs->ht, elem);
|
134
|
+
hs->size--;
|
135
|
+
memmove(&hs->elems[i], &hs->elems[i + 1],
|
136
|
+
sizeof(void *) * (hs->size - i));
|
137
|
+
return ret_elem;
|
138
|
+
}
|
115
139
|
}
|
116
140
|
|
117
141
|
int hs_exists(HashSet *hs, void *elem)
|
118
142
|
{
|
119
|
-
|
143
|
+
return h_has_key(hs->ht, elem);
|
120
144
|
}
|
121
145
|
|
122
|
-
HashSet *hs_merge(HashSet *hs, HashSet *other)
|
146
|
+
HashSet *hs_merge(HashSet *hs, HashSet * other)
|
123
147
|
{
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
148
|
+
int i;
|
149
|
+
for (i = 0; i < other->size; i++) {
|
150
|
+
hs_add(hs, other->elems[i]);
|
151
|
+
}
|
152
|
+
/* Now free the other hashset. It is no longer needed. No need, however, to
|
153
|
+
* delete the elements as they're either destroyed or in the new hash set */
|
154
|
+
hs_free(other);
|
155
|
+
return hs;
|
132
156
|
}
|
133
157
|
|
134
158
|
void *hs_orig(HashSet *hs, void *elem)
|
135
159
|
{
|
136
|
-
|
137
|
-
|
138
|
-
|
160
|
+
int *index = h_get(hs->ht, elem);
|
161
|
+
if (index) {
|
162
|
+
return hs->elems[*index];
|
163
|
+
}
|
164
|
+
else {
|
165
|
+
return NULL;
|
166
|
+
}
|
139
167
|
}
|
data/ext/hashset.h
CHANGED
@@ -2,30 +2,179 @@
|
|
2
2
|
#define FRT_HASHSET_H
|
3
3
|
|
4
4
|
#include "hash.h"
|
5
|
-
#include "array.h"
|
6
5
|
#include "global.h"
|
7
6
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
7
|
+
#define HS_MIN_SIZE 4
|
8
|
+
|
9
|
+
typedef struct HashSet
|
10
|
+
{
|
11
|
+
/* used internally to allocate space to elems */
|
12
|
+
int capa;
|
13
|
+
|
14
|
+
/* the number of elements in the HashSet */
|
15
|
+
int size;
|
16
|
+
|
17
|
+
/* the elements in the HashSet. The elements will be found in the order
|
18
|
+
* they were added and can be iterated over from 0 to .size */
|
19
|
+
void **elems;
|
20
|
+
|
21
|
+
/* HashTable used internally */
|
22
|
+
HashTable *ht;
|
23
|
+
|
24
|
+
/* Internal: Frees elements added to the HashSet. Should never be NULL */
|
25
|
+
void (*free_elem_i)(void *p);
|
14
26
|
} HashSet;
|
15
27
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
28
|
+
/**
|
29
|
+
* Create a new HashSet. The function will allocate a HashSet Struct setting
|
30
|
+
* the functions used to hash the objects it will contain and the eq function.
|
31
|
+
* This should be used for non-string types.
|
32
|
+
*
|
33
|
+
* @param hash function to hash objects added to the HashSet
|
34
|
+
* @param eq function to determine whether two items are equal
|
35
|
+
* @param free_elem function used to free elements as added to the HashSet
|
36
|
+
* when the HashSet if destroyed or duplicate elements are added to the Set
|
37
|
+
* @return a newly allocated HashSet structure
|
38
|
+
*/
|
39
|
+
extern HashSet *hs_new(ulong (*hash)(const void *p),
|
40
|
+
int (*eq)(const void *p1, const void *p2),
|
41
|
+
void (*free_elem)(void *p));
|
42
|
+
|
43
|
+
/**
|
44
|
+
* Create a new HashSet specifically for strings. This will create a HashSet
|
45
|
+
* as if you used hs_new with the standard string hash and eq functions.
|
46
|
+
*
|
47
|
+
* @param free_elem function used to free elements as added to the HashSet
|
48
|
+
* when the HashSet if destroyed or duplicate elements are added to the Set
|
49
|
+
* @return a newly allocated HashSet structure
|
50
|
+
*/
|
51
|
+
extern HashSet *hs_new_str(void (*free_elem) (void *p));
|
52
|
+
|
53
|
+
/**
|
54
|
+
* Free the memory allocated by the HashSet, but don't free the elements added
|
55
|
+
* to the HashSet. If you'd like to free everything in the HashSet you should
|
56
|
+
* use hs_destroy
|
57
|
+
*
|
58
|
+
* @param hs the HashSet to free
|
59
|
+
*/
|
60
|
+
extern void hs_free(HashSet *self);
|
61
|
+
|
62
|
+
/**
|
63
|
+
* Destroy the HashSet including all elements added to the HashSet. If you'd
|
64
|
+
* like to free the memory allocated to the HashSet without touching the
|
65
|
+
* elements in the HashSet then use hs_free
|
66
|
+
*
|
67
|
+
* @param hs the HashSet to destroy
|
68
|
+
*/
|
69
|
+
extern void hs_destroy(HashSet *self);
|
70
|
+
|
71
|
+
/**
|
72
|
+
* WARNING: this function may destroy some elements if you add them to a
|
73
|
+
* HashSet were equivalent elements already exist, depending on how free_elem
|
74
|
+
* was set.
|
75
|
+
*
|
76
|
+
* Add the element to the HashSet whether or not it was already in the
|
77
|
+
* HashSet.
|
78
|
+
*
|
79
|
+
* When a element is added to the HashTable where it already exists, free_elem
|
80
|
+
* is called on it, ie the element you tried to add might get destroyed.
|
81
|
+
*
|
82
|
+
* @param hs the HashSet to add the element to
|
83
|
+
* @param elem the element to add to the HashSet
|
84
|
+
* @return one of three values;
|
85
|
+
* <pre>
|
86
|
+
* HASH_KEY_DOES_NOT_EXIST the element was not already in the HashSet.
|
87
|
+
* This value is equal to 0 or false
|
88
|
+
* HASH_KEY_SAME the element was identical (same memory
|
89
|
+
* pointer) to an existing element so no freeing
|
90
|
+
* was done
|
91
|
+
* HASH_KEY_EQUAL the element was equal to an element already in
|
92
|
+
* the HashSet so the new_elem was freed if
|
93
|
+
* free_elem was set
|
94
|
+
* </pre>
|
95
|
+
*/
|
96
|
+
extern int hs_add(HashSet *self, void *elem);
|
97
|
+
|
98
|
+
/**
|
99
|
+
* Add element to the HashSet. If the element already existed in the HashSet
|
100
|
+
* and the new element was equal but not the same (same pointer/memory) then
|
101
|
+
* don't add the element and return false, otherwise return true.
|
102
|
+
*
|
103
|
+
* @param hs the HashSet to add the element to
|
104
|
+
* @param elem the element to add to the HashSet
|
105
|
+
* @return true if the element was successfully added or false otherwise
|
106
|
+
*/
|
107
|
+
extern int hs_add_safe(HashSet *self, void *elem);
|
108
|
+
|
109
|
+
/**
|
110
|
+
* Delete the element from the HashSet. Returns true if the item was
|
111
|
+
* successfully deleted or false if the element never existed.
|
112
|
+
*
|
113
|
+
* @param hs the HashSet to delete from
|
114
|
+
* @param elem the element to delete
|
115
|
+
* @return true if the element was deleted or false if the element never
|
116
|
+
* existed
|
117
|
+
*/
|
118
|
+
extern int hs_del(HashSet *self, void *elem);
|
119
|
+
|
120
|
+
/**
|
121
|
+
* Remove an item from the HashSet without actually freeing the item. This
|
122
|
+
* function should return the item itself so that it can be freed later if
|
123
|
+
* necessary.
|
124
|
+
*
|
125
|
+
* @param hs the HashSet to remove the element from.
|
126
|
+
* @param elem the element to remove
|
127
|
+
* @param the element that was removed or NULL otherwise
|
128
|
+
*/
|
129
|
+
extern void *hs_rem(HashSet *self, void *elem);
|
130
|
+
|
131
|
+
/**
|
132
|
+
* Check if the element exists and return the appropriate value described
|
133
|
+
* bellow.
|
134
|
+
*
|
135
|
+
* @param hs the HashSet to check in
|
136
|
+
* @param elem the element to check for
|
137
|
+
* @return one of the following values
|
138
|
+
* <pre>
|
139
|
+
* HASH_KEY_DOES_NOT_EXIST the element was not already in the HashSet.
|
140
|
+
* This value is equal to 0 or false
|
141
|
+
* HASH_KEY_SAME the element was identical (same memory
|
142
|
+
* pointer) to an existing element so no freeing
|
143
|
+
* was done
|
144
|
+
* HASH_KEY_EQUAL the element was equal to an element already in
|
145
|
+
* the HashSet so the new_elem was freed if
|
146
|
+
* free_elem was set
|
147
|
+
* </pre>
|
148
|
+
*/
|
149
|
+
extern int hs_exists(HashSet *self, void *elem);
|
150
|
+
|
151
|
+
/**
|
152
|
+
* Merge two HashSets. When a merge is done the merger (self) HashTable is
|
153
|
+
* returned and the mergee is destroyed. All elements from mergee that were
|
154
|
+
* not found in merger (self) will be added to self, otherwise they will be
|
155
|
+
* destroyed.
|
156
|
+
*
|
157
|
+
* @param self the HashSet to merge into
|
158
|
+
* @param other HastSet to be merged into self
|
159
|
+
* @return the merged HashSet
|
160
|
+
*/
|
161
|
+
extern HashSet *hs_merge(HashSet *self, HashSet *other);
|
162
|
+
|
163
|
+
/**
|
164
|
+
* Return the original version of +elem+. So if you allocate two elements
|
165
|
+
* which are equal and add the first to the HashSet, calling this function
|
166
|
+
* with the second element will return the first element from the HashSet.
|
167
|
+
*/
|
168
|
+
extern void *hs_orig(HashSet *self, void *elem);
|
169
|
+
|
170
|
+
/**
|
171
|
+
* Clear all elements from the HashSet. If free_elem was set then use it to
|
172
|
+
* free all elements as they are cleared. After the method is called, the
|
173
|
+
* HashSets size will be 0.
|
174
|
+
*
|
175
|
+
* @param self the HashSet to clear
|
176
|
+
*/
|
177
|
+
extern void hs_clear(HashSet *self);
|
29
178
|
|
30
179
|
/* TODO: finish these functions.
|
31
180
|
int hs_osf(HashSet *hs, void *elem);
|