ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/ext/r_utils.c
ADDED
@@ -0,0 +1,941 @@
|
|
1
|
+
#include "ferret.h"
|
2
|
+
#include "bitvector.h"
|
3
|
+
|
4
|
+
/*****************
|
5
|
+
*** BitVector ***
|
6
|
+
*****************/
|
7
|
+
static VALUE cBitVector;
|
8
|
+
|
9
|
+
static VALUE
|
10
|
+
frt_bv_alloc(VALUE klass)
|
11
|
+
{
|
12
|
+
return Data_Wrap_Struct(klass, NULL, &bv_destroy, bv_new());
|
13
|
+
}
|
14
|
+
|
15
|
+
#define GET_BV(bv, self) Data_Get_Struct(self, BitVector, bv)
|
16
|
+
|
17
|
+
/*
|
18
|
+
* call-seq:
|
19
|
+
* BitVector.new() -> new_bv
|
20
|
+
*
|
21
|
+
* Returns a new empty bit-vector object
|
22
|
+
*/
|
23
|
+
static VALUE
|
24
|
+
frt_bv_init(VALUE self)
|
25
|
+
{
|
26
|
+
return self;
|
27
|
+
}
|
28
|
+
|
29
|
+
/*
|
30
|
+
* call-seq:
|
31
|
+
* bv[i] = bool -> bool
|
32
|
+
*
|
33
|
+
* Set the bit and _i_ to *val* (+true+ or
|
34
|
+
* +false+).
|
35
|
+
*/
|
36
|
+
VALUE
|
37
|
+
frt_bv_set(VALUE self, VALUE rindex, VALUE rstate)
|
38
|
+
{
|
39
|
+
BitVector *bv;
|
40
|
+
int index = FIX2INT(rindex);
|
41
|
+
GET_BV(bv, self);
|
42
|
+
if (index < 0) {
|
43
|
+
rb_raise(rb_eIndexError, "%d < 0", index);
|
44
|
+
}
|
45
|
+
if (RTEST(rstate)) {
|
46
|
+
bv_set(bv, index);
|
47
|
+
}
|
48
|
+
else {
|
49
|
+
bv_unset(bv, index);
|
50
|
+
}
|
51
|
+
|
52
|
+
return rstate;
|
53
|
+
}
|
54
|
+
|
55
|
+
/*
|
56
|
+
* call-seq:
|
57
|
+
* bv.set(i) -> self
|
58
|
+
*
|
59
|
+
* Set the bit at _i_ to *on* (+true+)
|
60
|
+
*/
|
61
|
+
VALUE
|
62
|
+
frt_bv_set_on(VALUE self, VALUE rindex)
|
63
|
+
{
|
64
|
+
frt_bv_set(self, rindex, Qtrue);
|
65
|
+
return self;
|
66
|
+
}
|
67
|
+
|
68
|
+
/*
|
69
|
+
* call-seq:
|
70
|
+
* bv.unset(i) -> self
|
71
|
+
*
|
72
|
+
* Set the bit at _i_ to *off* (+false+)
|
73
|
+
*/
|
74
|
+
VALUE
|
75
|
+
frt_bv_set_off(VALUE self, VALUE rindex)
|
76
|
+
{
|
77
|
+
frt_bv_set(self, rindex, Qfalse);
|
78
|
+
return self;
|
79
|
+
}
|
80
|
+
|
81
|
+
/*
|
82
|
+
* call-seq:
|
83
|
+
* bv.get(i) -> bool
|
84
|
+
* bv[i] -> bool
|
85
|
+
*
|
86
|
+
* Get the bit value at _i_
|
87
|
+
*/
|
88
|
+
VALUE
|
89
|
+
frt_bv_get(VALUE self, VALUE rindex)
|
90
|
+
{
|
91
|
+
BitVector *bv;
|
92
|
+
int index = FIX2INT(rindex);
|
93
|
+
GET_BV(bv, self);
|
94
|
+
if (index < 0) {
|
95
|
+
rb_raise(rb_eIndexError, "%d < 0", index);
|
96
|
+
}
|
97
|
+
|
98
|
+
return bv_get(bv, index) ? Qtrue : Qfalse;
|
99
|
+
}
|
100
|
+
|
101
|
+
/*
|
102
|
+
* call-seq:
|
103
|
+
* bv.count -> bit_count
|
104
|
+
*
|
105
|
+
* Count the number of bits set in the bit-vector. If the bit-vector has been
|
106
|
+
* negated using +#not+ then count the number of unset bits
|
107
|
+
* instead.
|
108
|
+
*/
|
109
|
+
VALUE
|
110
|
+
frt_bv_count(VALUE self)
|
111
|
+
{
|
112
|
+
BitVector *bv;
|
113
|
+
GET_BV(bv, self);
|
114
|
+
return INT2FIX(bv->count);
|
115
|
+
}
|
116
|
+
|
117
|
+
/*
|
118
|
+
* call-seq:
|
119
|
+
* bv.clear -> self
|
120
|
+
*
|
121
|
+
* Clears all set bits in the bit-vector. Negated bit-vectors will still have
|
122
|
+
* all bits set to *off*.
|
123
|
+
*/
|
124
|
+
VALUE
|
125
|
+
frt_bv_clear(VALUE self)
|
126
|
+
{
|
127
|
+
BitVector *bv;
|
128
|
+
GET_BV(bv, self);
|
129
|
+
bv_clear(bv);
|
130
|
+
bv_scan_reset(bv);
|
131
|
+
return self;
|
132
|
+
}
|
133
|
+
|
134
|
+
/*
|
135
|
+
* call-seq:
|
136
|
+
* bv1 == bv2 -> bool
|
137
|
+
* bv1 != bv2 -> bool
|
138
|
+
* bv1.eql(bv2) -> bool
|
139
|
+
*
|
140
|
+
* Compares two bit vectors and returns true if both bitvectors have the same
|
141
|
+
* bits set.
|
142
|
+
*/
|
143
|
+
VALUE
|
144
|
+
frt_bv_eql(VALUE self, VALUE other)
|
145
|
+
{
|
146
|
+
BitVector *bv1, *bv2;
|
147
|
+
GET_BV(bv1, self);
|
148
|
+
GET_BV(bv2, other);
|
149
|
+
return bv_eq(bv1, bv2) ? Qtrue : Qfalse;
|
150
|
+
}
|
151
|
+
|
152
|
+
/*
|
153
|
+
* call-seq:
|
154
|
+
* bv.hash -> int
|
155
|
+
*
|
156
|
+
* Used to store bit vectors in Hashes. Especially useful if you want to
|
157
|
+
* cache them.
|
158
|
+
*/
|
159
|
+
VALUE
|
160
|
+
frt_bv_hash(VALUE self)
|
161
|
+
{
|
162
|
+
BitVector *bv;
|
163
|
+
GET_BV(bv, self);
|
164
|
+
return LONG2NUM(bv_hash(bv));
|
165
|
+
}
|
166
|
+
|
167
|
+
/*
|
168
|
+
* call-seq:
|
169
|
+
* bv1 & bv2 -> anded_bv
|
170
|
+
* bv1.and(bv2) -> anded_bv
|
171
|
+
*
|
172
|
+
* Perform a boolean _and_ operation on +bv1+ and
|
173
|
+
* +bv2+
|
174
|
+
*/
|
175
|
+
VALUE
|
176
|
+
frt_bv_and(VALUE self, VALUE other)
|
177
|
+
{
|
178
|
+
BitVector *bv1, *bv2;
|
179
|
+
GET_BV(bv1, self);
|
180
|
+
GET_BV(bv2, other);
|
181
|
+
return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_and(bv1, bv2));
|
182
|
+
}
|
183
|
+
|
184
|
+
/*
|
185
|
+
* call-seq:
|
186
|
+
* bv1.and!(bv2) -> self
|
187
|
+
*
|
188
|
+
* Perform a boolean _and_ operation on +bv1+ and
|
189
|
+
* +bv2+ in place on +bv1+
|
190
|
+
*/
|
191
|
+
VALUE
|
192
|
+
frt_bv_and_x(VALUE self, VALUE other)
|
193
|
+
{
|
194
|
+
BitVector *bv1, *bv2;
|
195
|
+
GET_BV(bv1, self);
|
196
|
+
GET_BV(bv2, other);
|
197
|
+
bv_and_x(bv1, bv2);
|
198
|
+
return self;
|
199
|
+
}
|
200
|
+
|
201
|
+
/*
|
202
|
+
* call-seq:
|
203
|
+
* bv1 | bv2 -> ored_bv
|
204
|
+
* bv1.or(bv2) -> ored_bv
|
205
|
+
*
|
206
|
+
* Perform a boolean _or_ operation on +bv1+ and
|
207
|
+
* +bv2+
|
208
|
+
*/
|
209
|
+
VALUE
|
210
|
+
frt_bv_or(VALUE self, VALUE other)
|
211
|
+
{
|
212
|
+
BitVector *bv1, *bv2;
|
213
|
+
GET_BV(bv1, self);
|
214
|
+
GET_BV(bv2, other);
|
215
|
+
return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_or(bv1, bv2));
|
216
|
+
}
|
217
|
+
|
218
|
+
/*
|
219
|
+
* call-seq:
|
220
|
+
* bv1.or!(bv2) -> self
|
221
|
+
*
|
222
|
+
* Perform a boolean _or_ operation on +bv1+ and
|
223
|
+
* +bv2+ in place on +bv1+
|
224
|
+
*/
|
225
|
+
VALUE
|
226
|
+
frt_bv_or_x(VALUE self, VALUE other)
|
227
|
+
{
|
228
|
+
BitVector *bv1, *bv2;
|
229
|
+
GET_BV(bv1, self);
|
230
|
+
GET_BV(bv2, other);
|
231
|
+
bv_or_x(bv1, bv2);
|
232
|
+
return self;
|
233
|
+
}
|
234
|
+
|
235
|
+
/*
|
236
|
+
* call-seq:
|
237
|
+
* bv1 ^ bv2 -> xored_bv
|
238
|
+
* bv1.xor(bv2) -> xored_bv
|
239
|
+
*
|
240
|
+
* Perform a boolean _xor_ operation on +bv1+ and
|
241
|
+
* +bv2+
|
242
|
+
*/
|
243
|
+
VALUE
|
244
|
+
frt_bv_xor(VALUE self, VALUE other)
|
245
|
+
{
|
246
|
+
BitVector *bv1, *bv2;
|
247
|
+
GET_BV(bv1, self);
|
248
|
+
GET_BV(bv2, other);
|
249
|
+
return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_xor(bv1, bv2));
|
250
|
+
}
|
251
|
+
|
252
|
+
/*
|
253
|
+
* call-seq:
|
254
|
+
* bv1.xor!(bv2) -> self
|
255
|
+
*
|
256
|
+
* Perform a boolean _xor_ operation on +bv1+ and
|
257
|
+
* +bv2+ in place on +bv1+
|
258
|
+
*/
|
259
|
+
VALUE
|
260
|
+
frt_bv_xor_x(VALUE self, VALUE other)
|
261
|
+
{
|
262
|
+
BitVector *bv1, *bv2;
|
263
|
+
GET_BV(bv1, self);
|
264
|
+
GET_BV(bv2, other);
|
265
|
+
bv_xor_x(bv1, bv2);
|
266
|
+
return self;
|
267
|
+
}
|
268
|
+
|
269
|
+
/*
|
270
|
+
* call-seq:
|
271
|
+
* ~bv -> bv
|
272
|
+
* bv.not -> bv
|
273
|
+
*
|
274
|
+
* Perform a boolean _not_ operation on +bv+
|
275
|
+
* */
|
276
|
+
VALUE
|
277
|
+
frt_bv_not(VALUE self)
|
278
|
+
{
|
279
|
+
BitVector *bv;
|
280
|
+
GET_BV(bv, self);
|
281
|
+
return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_not(bv));
|
282
|
+
}
|
283
|
+
|
284
|
+
/*
|
285
|
+
* call-seq:
|
286
|
+
* bv.not! -> self
|
287
|
+
*
|
288
|
+
* Perform a boolean _not_ operation on +bv+ in-place
|
289
|
+
*/
|
290
|
+
VALUE
|
291
|
+
frt_bv_not_x(VALUE self)
|
292
|
+
{
|
293
|
+
BitVector *bv;
|
294
|
+
GET_BV(bv, self);
|
295
|
+
bv_not_x(bv);
|
296
|
+
return self;
|
297
|
+
}
|
298
|
+
|
299
|
+
/*
|
300
|
+
* call-seq:
|
301
|
+
* bv.reset_scan -> self
|
302
|
+
*
|
303
|
+
* Resets the BitVector ready for scanning. You should call this method
|
304
|
+
* before calling +#next+ or +#next_unset+. It isn't
|
305
|
+
* necessary for the other scan methods or for the +#each+ method.
|
306
|
+
*/
|
307
|
+
VALUE
|
308
|
+
frt_bv_reset_scan(VALUE self)
|
309
|
+
{
|
310
|
+
BitVector *bv;
|
311
|
+
GET_BV(bv, self);
|
312
|
+
bv_scan_reset(bv);
|
313
|
+
return self;
|
314
|
+
}
|
315
|
+
|
316
|
+
/*
|
317
|
+
* call-seq:
|
318
|
+
* bv.next -> bit_num
|
319
|
+
*
|
320
|
+
* Returns the next set bit in the bit-vector scanning from low order to high
|
321
|
+
* order. You should call +#reset_scan+ before calling this method
|
322
|
+
* if you want to scan from the beginning. It is automatically reset when you
|
323
|
+
* first create the bit-vector.
|
324
|
+
*/
|
325
|
+
VALUE
|
326
|
+
frt_bv_next(VALUE self)
|
327
|
+
{
|
328
|
+
BitVector *bv;
|
329
|
+
GET_BV(bv, self);
|
330
|
+
return INT2FIX(bv_scan_next(bv));
|
331
|
+
}
|
332
|
+
|
333
|
+
/*
|
334
|
+
* call-seq:
|
335
|
+
* bv.next_unset -> bit_num
|
336
|
+
*
|
337
|
+
* Returns the next unset bit in the bit-vector scanning from low order to
|
338
|
+
* high order. This method should only be called on bit-vectors which have
|
339
|
+
* been flipped (negated). You should call +#reset_scan+ before
|
340
|
+
* calling this method if you want to scan from the beginning. It is
|
341
|
+
* automatically reset when you first create the bit-vector.
|
342
|
+
*/
|
343
|
+
VALUE
|
344
|
+
frt_bv_next_unset(VALUE self)
|
345
|
+
{
|
346
|
+
BitVector *bv;
|
347
|
+
GET_BV(bv, self);
|
348
|
+
return INT2FIX(bv_scan_next_unset(bv));
|
349
|
+
}
|
350
|
+
|
351
|
+
/*
|
352
|
+
* call-seq:
|
353
|
+
* bv.next_from(from) -> bit_num
|
354
|
+
*
|
355
|
+
* Returns the next set bit in the bit-vector scanning from low order to
|
356
|
+
* high order and starting at +from+. The scan is inclusive so if
|
357
|
+
* +from+ is equal to 10 and +bv[10]+ is set it will
|
358
|
+
* return the number 10. If the bit-vector has been negated than you should
|
359
|
+
* use the +#next_unset_from+ method.
|
360
|
+
*/
|
361
|
+
VALUE
|
362
|
+
frt_bv_next_from(VALUE self, VALUE rfrom)
|
363
|
+
{
|
364
|
+
BitVector *bv;
|
365
|
+
int from = FIX2INT(rfrom);
|
366
|
+
GET_BV(bv, self);
|
367
|
+
if (from < 0) {
|
368
|
+
from = 0;
|
369
|
+
}
|
370
|
+
return INT2FIX(bv_scan_next_from(bv, from));
|
371
|
+
}
|
372
|
+
|
373
|
+
/*
|
374
|
+
* call-seq:
|
375
|
+
* bv.next_unset_from(from) -> bit_num
|
376
|
+
*
|
377
|
+
* Returns the next unset bit in the bit-vector scanning from low order to
|
378
|
+
* high order and starting at +from+. The scan is inclusive so if
|
379
|
+
* +from+ is equal to 10 and +bv[10]+ is unset it will
|
380
|
+
* return the number 10. If the bit-vector has not been negated than you
|
381
|
+
* should use the +#next_from+ method.
|
382
|
+
*/
|
383
|
+
VALUE
|
384
|
+
frt_bv_next_unset_from(VALUE self, VALUE rfrom)
|
385
|
+
{
|
386
|
+
BitVector *bv;
|
387
|
+
int from = FIX2INT(rfrom);
|
388
|
+
GET_BV(bv, self);
|
389
|
+
if (from < 0) {
|
390
|
+
from = 0;
|
391
|
+
}
|
392
|
+
return INT2FIX(bv_scan_next_unset_from(bv, from));
|
393
|
+
}
|
394
|
+
|
395
|
+
/*
|
396
|
+
* call-seq:
|
397
|
+
* bv.each { |bit_num| }
|
398
|
+
*
|
399
|
+
* Iterate through all the set bits in the bit-vector yeilding each one in
|
400
|
+
* order
|
401
|
+
*/
|
402
|
+
VALUE
|
403
|
+
frt_bv_each(VALUE self)
|
404
|
+
{
|
405
|
+
BitVector *bv;
|
406
|
+
int bit;
|
407
|
+
GET_BV(bv, self);
|
408
|
+
bv_scan_reset(bv);
|
409
|
+
if (bv->extends_as_ones) {
|
410
|
+
while ((bit = bv_scan_next_unset(bv)) >= 0) {
|
411
|
+
rb_yield(INT2FIX(bit));
|
412
|
+
}
|
413
|
+
}
|
414
|
+
else {
|
415
|
+
while ((bit = bv_scan_next(bv)) >= 0) {
|
416
|
+
rb_yield(INT2FIX(bit));
|
417
|
+
}
|
418
|
+
}
|
419
|
+
return self;
|
420
|
+
}
|
421
|
+
|
422
|
+
/*
|
423
|
+
* call-seq:
|
424
|
+
* bv.to_a
|
425
|
+
*
|
426
|
+
* Iterate through all the set bits in the bit-vector adding the index of
|
427
|
+
* each set bit to an array. This is useful if you want to perform array
|
428
|
+
* methods on the bit-vecter. If you want to convert an array to a bit_vector
|
429
|
+
* simply do this;
|
430
|
+
*
|
431
|
+
* bv = [1, 12, 45, 367, 455].inject(BitVector.new) {|bv, i| bv.set(i)}
|
432
|
+
*/
|
433
|
+
VALUE
|
434
|
+
frt_bv_to_a(VALUE self)
|
435
|
+
{
|
436
|
+
BitVector *bv;
|
437
|
+
int bit;
|
438
|
+
VALUE ary;
|
439
|
+
GET_BV(bv, self);
|
440
|
+
ary = rb_ary_new();
|
441
|
+
bv_scan_reset(bv);
|
442
|
+
if (bv->extends_as_ones) {
|
443
|
+
while ((bit = bv_scan_next_unset(bv)) >= 0) {
|
444
|
+
rb_ary_push(ary, INT2FIX(bit));
|
445
|
+
}
|
446
|
+
}
|
447
|
+
else {
|
448
|
+
while ((bit = bv_scan_next(bv)) >= 0) {
|
449
|
+
rb_ary_push(ary, INT2FIX(bit));
|
450
|
+
}
|
451
|
+
}
|
452
|
+
return ary;
|
453
|
+
}
|
454
|
+
|
455
|
+
static VALUE mUtils;
|
456
|
+
|
457
|
+
/*
|
458
|
+
* Document-class: Ferret::Utils::BitVector
|
459
|
+
*
|
460
|
+
* == Summary
|
461
|
+
*
|
462
|
+
* A BitVector is pretty easy to implement in Ruby using Ruby's BigNum class.
|
463
|
+
* This BitVector however allows you to count the set bits with the
|
464
|
+
* +#count+ method (or unset bits of flipped bit vectors) and also
|
465
|
+
* to quickly scan the set bits.
|
466
|
+
*
|
467
|
+
* == Boolean Operations
|
468
|
+
*
|
469
|
+
* BitVector handles four boolean operations;
|
470
|
+
*
|
471
|
+
* * +&+
|
472
|
+
* * +|+
|
473
|
+
* * +^+
|
474
|
+
* * +~+
|
475
|
+
*
|
476
|
+
* bv1 = BitVector.new
|
477
|
+
* bv2 = BitVector.new
|
478
|
+
* bv3 = BitVector.new
|
479
|
+
*
|
480
|
+
* bv4 = (bv1 & bv2) | ~bv3
|
481
|
+
*
|
482
|
+
* You can also do the operations in-place;
|
483
|
+
*
|
484
|
+
* * +and!+
|
485
|
+
* * +or!+
|
486
|
+
* * +xor!+
|
487
|
+
* * +not!+
|
488
|
+
*
|
489
|
+
* bv4.and!(bv5).not!
|
490
|
+
*
|
491
|
+
* == Set Bit Scanning
|
492
|
+
*
|
493
|
+
* Perhaps the most useful functionality in BitVector is the ability to
|
494
|
+
* quickly scan for set bits. To print all set bits;
|
495
|
+
*
|
496
|
+
* bv.each {|bit| puts bit }
|
497
|
+
*
|
498
|
+
* Alternatively you could use the lower level +next+ or
|
499
|
+
* +next_unset+ methods. Note that the +each+ method will
|
500
|
+
* automatically scan unset bits if the BitVector has been flipped (using
|
501
|
+
* +not+).
|
502
|
+
*/
|
503
|
+
static void
|
504
|
+
Init_BitVector(void)
|
505
|
+
{
|
506
|
+
/* BitVector */
|
507
|
+
cBitVector = rb_define_class_under(mUtils, "BitVector", rb_cObject);
|
508
|
+
rb_define_alloc_func(cBitVector, frt_bv_alloc);
|
509
|
+
|
510
|
+
rb_define_method(cBitVector, "initialize", frt_bv_init, 0);
|
511
|
+
rb_define_method(cBitVector, "set", frt_bv_set_on, 1);
|
512
|
+
rb_define_method(cBitVector, "unset", frt_bv_set_off, 1);
|
513
|
+
rb_define_method(cBitVector, "[]=", frt_bv_set, 2);
|
514
|
+
rb_define_method(cBitVector, "get", frt_bv_get, 1);
|
515
|
+
rb_define_method(cBitVector, "[]", frt_bv_get, 1);
|
516
|
+
rb_define_method(cBitVector, "count", frt_bv_count, 0);
|
517
|
+
rb_define_method(cBitVector, "clear", frt_bv_clear, 0);
|
518
|
+
rb_define_method(cBitVector, "eql?", frt_bv_eql, 1);
|
519
|
+
rb_define_method(cBitVector, "==", frt_bv_eql, 1);
|
520
|
+
rb_define_method(cBitVector, "hash", frt_bv_hash, 0);
|
521
|
+
rb_define_method(cBitVector, "and!", frt_bv_and_x, 1);
|
522
|
+
rb_define_method(cBitVector, "and", frt_bv_and, 1);
|
523
|
+
rb_define_method(cBitVector, "&", frt_bv_and, 1);
|
524
|
+
rb_define_method(cBitVector, "or!", frt_bv_or_x, 1);
|
525
|
+
rb_define_method(cBitVector, "or", frt_bv_or, 1);
|
526
|
+
rb_define_method(cBitVector, "|", frt_bv_or, 1);
|
527
|
+
rb_define_method(cBitVector, "xor!", frt_bv_xor_x, 1);
|
528
|
+
rb_define_method(cBitVector, "xor", frt_bv_xor, 1);
|
529
|
+
rb_define_method(cBitVector, "^", frt_bv_xor, 1);
|
530
|
+
rb_define_method(cBitVector, "not!", frt_bv_not_x, 0);
|
531
|
+
rb_define_method(cBitVector, "not", frt_bv_not, 0);
|
532
|
+
rb_define_method(cBitVector, "~", frt_bv_not, 0);
|
533
|
+
rb_define_method(cBitVector, "reset_scan", frt_bv_reset_scan, 0);
|
534
|
+
rb_define_method(cBitVector, "next", frt_bv_next, 0);
|
535
|
+
rb_define_method(cBitVector, "next_unset", frt_bv_next_unset, 0);
|
536
|
+
rb_define_method(cBitVector, "next_from", frt_bv_next_from, 1);
|
537
|
+
rb_define_method(cBitVector, "next_unset_from", frt_bv_next_unset_from, 1);
|
538
|
+
rb_define_method(cBitVector, "each", frt_bv_each, 0);
|
539
|
+
rb_define_method(cBitVector, "to_a", frt_bv_to_a, 0);
|
540
|
+
}
|
541
|
+
|
542
|
+
/*********************
|
543
|
+
*** PriorityQueue ***
|
544
|
+
*********************/
|
545
|
+
typedef struct PriQ
|
546
|
+
{
|
547
|
+
int size;
|
548
|
+
int capa;
|
549
|
+
int mem_capa;
|
550
|
+
VALUE *heap;
|
551
|
+
VALUE proc;
|
552
|
+
} PriQ;
|
553
|
+
|
554
|
+
#define PQ_START_CAPA 32
|
555
|
+
|
556
|
+
static bool frt_pq_lt(VALUE proc, VALUE v1, VALUE v2)
|
557
|
+
{
|
558
|
+
if (proc == Qnil) {
|
559
|
+
return RTEST(rb_funcall(v1, id_lt, 1, v2));
|
560
|
+
}
|
561
|
+
else {
|
562
|
+
return RTEST(rb_funcall(proc, id_call, 2, v1, v2));
|
563
|
+
}
|
564
|
+
}
|
565
|
+
|
566
|
+
static void pq_up(PriQ *pq)
|
567
|
+
{
|
568
|
+
VALUE *heap = pq->heap;
|
569
|
+
VALUE node;
|
570
|
+
int i = pq->size;
|
571
|
+
int j = i >> 1;
|
572
|
+
|
573
|
+
node = heap[i];
|
574
|
+
|
575
|
+
while ((j > 0) && frt_pq_lt(pq->proc, node, heap[j])) {
|
576
|
+
heap[i] = heap[j];
|
577
|
+
i = j;
|
578
|
+
j = j >> 1;
|
579
|
+
}
|
580
|
+
heap[i] = node;
|
581
|
+
}
|
582
|
+
|
583
|
+
static void pq_down(PriQ *pq)
|
584
|
+
{
|
585
|
+
register int i = 1;
|
586
|
+
register int j = 2; /* i << 1; */
|
587
|
+
register int k = 3; /* j + 1; */
|
588
|
+
register int size = pq->size;
|
589
|
+
VALUE *heap = pq->heap;
|
590
|
+
VALUE node = heap[i]; /* save top node */
|
591
|
+
|
592
|
+
if ((k <= size) && (frt_pq_lt(pq->proc, heap[k], heap[j]))) {
|
593
|
+
j = k;
|
594
|
+
}
|
595
|
+
|
596
|
+
while ((j <= size) && frt_pq_lt(pq->proc, heap[j], node)) {
|
597
|
+
heap[i] = heap[j]; /* shift up child */
|
598
|
+
i = j;
|
599
|
+
j = i << 1;
|
600
|
+
k = j + 1;
|
601
|
+
if ((k <= size) && frt_pq_lt(pq->proc, heap[k], heap[j])) {
|
602
|
+
j = k;
|
603
|
+
}
|
604
|
+
}
|
605
|
+
heap[i] = node;
|
606
|
+
}
|
607
|
+
|
608
|
+
static void pq_push(PriQ *pq, VALUE elem)
|
609
|
+
{
|
610
|
+
pq->size++;
|
611
|
+
if (pq->size >= pq->mem_capa) {
|
612
|
+
pq->mem_capa <<= 1;
|
613
|
+
REALLOC_N(pq->heap, VALUE, pq->mem_capa);
|
614
|
+
}
|
615
|
+
pq->heap[pq->size] = elem;
|
616
|
+
pq_up(pq);
|
617
|
+
}
|
618
|
+
|
619
|
+
static VALUE cPriorityQueue;
|
620
|
+
|
621
|
+
static void
|
622
|
+
frt_pq_mark(void *p)
|
623
|
+
{
|
624
|
+
PriQ *pq = (PriQ *)p;
|
625
|
+
int i;
|
626
|
+
for (i = pq->size; i > 0; i--) {
|
627
|
+
rb_gc_mark_maybe(pq->heap[i]);
|
628
|
+
}
|
629
|
+
}
|
630
|
+
|
631
|
+
static void frt_pq_free(PriQ *pq)
|
632
|
+
{
|
633
|
+
free(pq->heap);
|
634
|
+
free(pq);
|
635
|
+
}
|
636
|
+
|
637
|
+
static VALUE
|
638
|
+
frt_pq_alloc(VALUE klass)
|
639
|
+
{
|
640
|
+
PriQ *pq = ALLOC_AND_ZERO(PriQ);
|
641
|
+
pq->capa = PQ_START_CAPA;
|
642
|
+
pq->mem_capa = PQ_START_CAPA;
|
643
|
+
pq->heap = ALLOC_N(VALUE, PQ_START_CAPA);
|
644
|
+
pq->proc = Qnil;
|
645
|
+
return Data_Wrap_Struct(klass, &frt_pq_mark, &frt_pq_free, pq);
|
646
|
+
}
|
647
|
+
|
648
|
+
#define GET_PQ(pq, self) Data_Get_Struct(self, PriQ, pq)
|
649
|
+
/*
|
650
|
+
* call-seq:
|
651
|
+
* PriorityQueue.new(capacity = 32) -> new_pq
|
652
|
+
* PriorityQueue.new({:capacity => 32,
|
653
|
+
* :less_than_proc => lambda{|a, b| a < b}) -> new_pq
|
654
|
+
* PriorityQueue.new({:capacity => 32}) {|a, b| a < b} -> new_pq
|
655
|
+
*
|
656
|
+
* Returns a new empty priority queue object with an optional capacity.
|
657
|
+
* Once the capacity is filled, the lowest valued elements will be
|
658
|
+
* automatically popped off the top of the queue as more elements are
|
659
|
+
* inserted into the queue.
|
660
|
+
*/
|
661
|
+
static VALUE
|
662
|
+
frt_pq_init(int argc, VALUE *argv, VALUE self)
|
663
|
+
{
|
664
|
+
if (argc >= 1) {
|
665
|
+
PriQ *pq;
|
666
|
+
VALUE options = argv[0];
|
667
|
+
VALUE param;
|
668
|
+
int capa = PQ_START_CAPA;
|
669
|
+
GET_PQ(pq, self);
|
670
|
+
switch (TYPE(options)) {
|
671
|
+
case T_FIXNUM:
|
672
|
+
capa = FIX2INT(options);
|
673
|
+
break;
|
674
|
+
case T_HASH:
|
675
|
+
if (!NIL_P(param = rb_hash_aref(options,
|
676
|
+
ID2SYM(id_capacity)))) {
|
677
|
+
capa = FIX2INT(param);
|
678
|
+
}
|
679
|
+
if (!NIL_P(param = rb_hash_aref(options,
|
680
|
+
ID2SYM(id_less_than)))) {
|
681
|
+
pq->proc = param;
|
682
|
+
}
|
683
|
+
break;
|
684
|
+
default:
|
685
|
+
rb_raise(rb_eArgError,
|
686
|
+
"PriorityQueue#initialize only takes a Hash or "
|
687
|
+
"an integer");
|
688
|
+
|
689
|
+
break;
|
690
|
+
}
|
691
|
+
if (capa < 0) {
|
692
|
+
rb_raise(rb_eIndexError,
|
693
|
+
"PriorityQueue must have a capacity > 0. %d < 0",
|
694
|
+
index);
|
695
|
+
}
|
696
|
+
pq->capa = capa;
|
697
|
+
if (rb_block_given_p()) {
|
698
|
+
pq->proc = rb_block_proc();
|
699
|
+
}
|
700
|
+
if (argc > 1) {
|
701
|
+
rb_raise(rb_eArgError,
|
702
|
+
"PriorityQueue#initialize only takes one parameter");
|
703
|
+
}
|
704
|
+
}
|
705
|
+
|
706
|
+
return self;
|
707
|
+
}
|
708
|
+
|
709
|
+
/*
|
710
|
+
* call-seq:
|
711
|
+
* pq.clone -> pq_clone
|
712
|
+
*
|
713
|
+
* Returns a shallow clone of the priority queue. That is only the priority
|
714
|
+
* queue is cloned, its contents are not cloned.
|
715
|
+
*/
|
716
|
+
static VALUE
|
717
|
+
frt_pq_clone(VALUE self)
|
718
|
+
{
|
719
|
+
PriQ *pq, *new_pq = ALLOC(PriQ);
|
720
|
+
GET_PQ(pq, self);
|
721
|
+
memcpy(new_pq, pq, sizeof(PriQ));
|
722
|
+
new_pq->heap = ALLOC_N(VALUE, new_pq->mem_capa);
|
723
|
+
memcpy(new_pq->heap, pq->heap, sizeof(VALUE) * (new_pq->size + 1));
|
724
|
+
|
725
|
+
return Data_Wrap_Struct(cPriorityQueue, &frt_pq_mark, &frt_pq_free, new_pq);
|
726
|
+
}
|
727
|
+
|
728
|
+
/*
|
729
|
+
* call-seq:
|
730
|
+
* pq.clear -> self
|
731
|
+
*
|
732
|
+
* Clears all elements from the priority queue. The size will be reset to 0.
|
733
|
+
*/
|
734
|
+
static VALUE
|
735
|
+
frt_pq_clear(VALUE self)
|
736
|
+
{
|
737
|
+
PriQ *pq;
|
738
|
+
GET_PQ(pq, self);
|
739
|
+
pq->size = 0;
|
740
|
+
return self;
|
741
|
+
}
|
742
|
+
|
743
|
+
/*
|
744
|
+
* call-seq:
|
745
|
+
* pq.insert(elem) -> self
|
746
|
+
* pq << elem -> self
|
747
|
+
*
|
748
|
+
* Insert an element into a queue. It will be inserted into the correct
|
749
|
+
* position in the queue according to its priority.
|
750
|
+
*/
|
751
|
+
static VALUE
|
752
|
+
frt_pq_insert(VALUE self, VALUE elem)
|
753
|
+
{
|
754
|
+
PriQ *pq;
|
755
|
+
GET_PQ(pq, self);
|
756
|
+
if (pq->size < pq->capa) {
|
757
|
+
pq_push(pq, elem);
|
758
|
+
}
|
759
|
+
else if (pq->size > 0 && frt_pq_lt(pq->proc, pq->heap[1], elem)) {
|
760
|
+
pq->heap[1] = elem;
|
761
|
+
pq_down(pq);
|
762
|
+
}
|
763
|
+
/* else ignore the element */
|
764
|
+
return self;
|
765
|
+
}
|
766
|
+
|
767
|
+
/*
|
768
|
+
* call-seq:
|
769
|
+
* pq.adjust -> self
|
770
|
+
*
|
771
|
+
* Sometimes you modify the top element in the priority queue so that its
|
772
|
+
* priority changes. When you do this you need to reorder the queue and you
|
773
|
+
* do this by calling the adjust method.
|
774
|
+
*/
|
775
|
+
static VALUE
|
776
|
+
frt_pq_adjust(VALUE self)
|
777
|
+
{
|
778
|
+
PriQ *pq;
|
779
|
+
GET_PQ(pq, self);
|
780
|
+
pq_down(pq);
|
781
|
+
return self;
|
782
|
+
}
|
783
|
+
|
784
|
+
/*
|
785
|
+
* call-seq:
|
786
|
+
* pq.top -> elem
|
787
|
+
*
|
788
|
+
* Returns the top element in the queue but does not remove it from the
|
789
|
+
* queue.
|
790
|
+
*/
|
791
|
+
static VALUE
|
792
|
+
frt_pq_top(VALUE self)
|
793
|
+
{
|
794
|
+
PriQ *pq;
|
795
|
+
GET_PQ(pq, self);
|
796
|
+
return (pq->size > 0) ? pq->heap[1] : Qnil;
|
797
|
+
}
|
798
|
+
|
799
|
+
/*
|
800
|
+
* call-seq:
|
801
|
+
* pq.pop -> elem
|
802
|
+
*
|
803
|
+
* Returns the top element in the queue removing it from the queue.
|
804
|
+
*/
|
805
|
+
static VALUE
|
806
|
+
frt_pq_pop(VALUE self)
|
807
|
+
{
|
808
|
+
PriQ *pq;
|
809
|
+
GET_PQ(pq, self);
|
810
|
+
if (pq->size > 0) {
|
811
|
+
VALUE result = pq->heap[1]; /* save first value */
|
812
|
+
pq->heap[1] = pq->heap[pq->size]; /* move last to first */
|
813
|
+
pq->heap[pq->size] = Qnil;
|
814
|
+
pq->size--;
|
815
|
+
pq_down(pq); /* adjust heap */
|
816
|
+
return result;
|
817
|
+
}
|
818
|
+
else {
|
819
|
+
return Qnil;
|
820
|
+
}
|
821
|
+
}
|
822
|
+
|
823
|
+
/*
|
824
|
+
* call-seq:
|
825
|
+
* pq.size -> integer
|
826
|
+
*
|
827
|
+
* Returns the size of the queue, ie. the number of elements currently stored
|
828
|
+
* in the queue. The _size_ of a PriorityQueue can never be greater than
|
829
|
+
* its _capacity_
|
830
|
+
*/
|
831
|
+
static VALUE
|
832
|
+
frt_pq_size(VALUE self)
|
833
|
+
{
|
834
|
+
PriQ *pq;
|
835
|
+
GET_PQ(pq, self);
|
836
|
+
return INT2FIX(pq->size);
|
837
|
+
}
|
838
|
+
|
839
|
+
/*
|
840
|
+
* call-seq:
|
841
|
+
* pq.capacity -> integer
|
842
|
+
*
|
843
|
+
* Returns the capacity of the queue, ie. the number of elements that can be
|
844
|
+
* stored in a Priority queue before they start to drop off the end. The
|
845
|
+
* _size_ of a PriorityQueue can never be greater than its
|
846
|
+
* _capacity_
|
847
|
+
*/
|
848
|
+
static VALUE
|
849
|
+
frt_pq_capa(VALUE self)
|
850
|
+
{
|
851
|
+
PriQ *pq;
|
852
|
+
GET_PQ(pq, self);
|
853
|
+
return INT2FIX(pq->capa);
|
854
|
+
}
|
855
|
+
|
856
|
+
/*
|
857
|
+
* Document-class: Ferret::Utils::PriorityQueue
|
858
|
+
*
|
859
|
+
* == Summary
|
860
|
+
*
|
861
|
+
* A PriorityQueue is a very useful data structure and one that needs a fast
|
862
|
+
* implementation. Hence this priority queue is implemented in C. It is
|
863
|
+
* pretty easy to use; basically you just insert elements into the queue and
|
864
|
+
* pop them off.
|
865
|
+
*
|
866
|
+
* The elements are sorted with the lowest valued elements on the top of
|
867
|
+
* the heap, ie the first to be popped off. Elements are ordered using the
|
868
|
+
* less_than '<' method. To change the order of the queue you can either
|
869
|
+
* reimplement the '<' method pass a block when you initialize the queue.
|
870
|
+
*
|
871
|
+
* You can also set the capacity of the PriorityQueue. Once you hit the
|
872
|
+
* capacity, the lowest values elements are automatically popped of the top
|
873
|
+
* of the queue as more elements are added.
|
874
|
+
*
|
875
|
+
* == Example
|
876
|
+
*
|
877
|
+
* Here is a toy example that sorts strings by their lenth and has a capicity
|
878
|
+
* of 5;
|
879
|
+
*
|
880
|
+
* q = PriorityQueue.new(5) {|a, b| a.size < b.size}
|
881
|
+
* q << "x"
|
882
|
+
* q << "xxxxx"
|
883
|
+
* q << "xxx"
|
884
|
+
* q << "xxxx"
|
885
|
+
* q << "xxxxxx"
|
886
|
+
* q << "xx" # hit capacity so "x" will be popped off the top
|
887
|
+
*
|
888
|
+
* puts q.size #=> 5
|
889
|
+
* word = q.pop #=> "xx"
|
890
|
+
* q.top << "yyyy" # "xxxyyyy" will still be at the top of the queue
|
891
|
+
* q.adjust # move "xxxyyyy" to its correct location in queue
|
892
|
+
* word = q.pop #=> "xxxx"
|
893
|
+
* word = q.pop #=> "xxxxx"
|
894
|
+
* word = q.pop #=> "xxxxxx"
|
895
|
+
* word = q.pop #=> "xxxyyyy"
|
896
|
+
* word = q.pop #=> nil
|
897
|
+
*/
|
898
|
+
static void
|
899
|
+
Init_PriorityQueue(void)
|
900
|
+
{
|
901
|
+
/* PriorityQueue */
|
902
|
+
cPriorityQueue = rb_define_class_under(mUtils, "PriorityQueue", rb_cObject);
|
903
|
+
rb_define_alloc_func(cPriorityQueue, frt_pq_alloc);
|
904
|
+
|
905
|
+
rb_define_method(cPriorityQueue, "initialize", frt_pq_init, -1);
|
906
|
+
rb_define_method(cPriorityQueue, "clone", frt_pq_clone, 0);
|
907
|
+
rb_define_method(cPriorityQueue, "clear", frt_pq_clear, 0);
|
908
|
+
rb_define_method(cPriorityQueue, "insert", frt_pq_insert, 1);
|
909
|
+
rb_define_method(cPriorityQueue, "<<", frt_pq_insert, 1);
|
910
|
+
rb_define_method(cPriorityQueue, "top", frt_pq_top, 0);
|
911
|
+
rb_define_method(cPriorityQueue, "pop", frt_pq_pop, 0);
|
912
|
+
rb_define_method(cPriorityQueue, "size", frt_pq_size, 0);
|
913
|
+
rb_define_method(cPriorityQueue, "capacity", frt_pq_capa, 0);
|
914
|
+
rb_define_method(cPriorityQueue, "adjust", frt_pq_adjust, 0);
|
915
|
+
}
|
916
|
+
|
917
|
+
/* rdoc hack
|
918
|
+
extern VALUE mFerret = rb_define_module("Ferret");
|
919
|
+
*/
|
920
|
+
|
921
|
+
/*
|
922
|
+
* Document-module: Ferret::Utils
|
923
|
+
*
|
924
|
+
* The Utils module contains a number of helper classes and modules that are
|
925
|
+
* useful when indexing with Ferret. They are;
|
926
|
+
*
|
927
|
+
* * BitVector
|
928
|
+
* * PriorityQueue
|
929
|
+
* * => more to come
|
930
|
+
*
|
931
|
+
* These helper classes could also be quite useful outside of Ferret and may
|
932
|
+
* one day find themselves in their own separate library.
|
933
|
+
*/
|
934
|
+
void
|
935
|
+
Init_Utils(void)
|
936
|
+
{
|
937
|
+
mUtils = rb_define_module_under(mFerret, "Utils");
|
938
|
+
|
939
|
+
Init_BitVector();
|
940
|
+
Init_PriorityQueue();
|
941
|
+
}
|