ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/similarity.c CHANGED
@@ -1,172 +1,150 @@
1
- #include <search.h>
2
- #include <global.h>
1
+ #include "similarity.h"
2
+ #include "search.h"
3
+ #include "array.h"
4
+ #include "helper.h"
3
5
  #include <math.h>
6
+ #include <stdlib.h>
7
+ #include <string.h>
4
8
 
5
- static int low_bit = 0, low_mid_bit = 0, high_mid_bit = 0, high_bit = 0;
6
- static void
7
- setup_endian()
8
- {
9
- static int init = 0;
10
- char *p;
11
-
12
- if (init) return;
13
- init = 1;
14
- p = (char*)&init;
15
-
16
- if (p[0]) {
17
- low_bit = 0;
18
- low_mid_bit = 1;
19
- high_mid_bit = 2;
20
- high_bit = 3;
21
- } else {
22
- low_bit = 3;
23
- low_mid_bit = 2;
24
- high_mid_bit = 1;
25
- high_bit = 0;
26
- }
27
- }
9
+ /****************************************************************************
10
+ *
11
+ * Term
12
+ *
13
+ ****************************************************************************/
28
14
 
29
- float byte_to_float(uchar b)
15
+ Term *term_new(const char *field, const char *text)
30
16
  {
31
- char flt[4];
32
- if (b == 0) {
33
- return 0.0;
34
- } else {
35
- int mantissa = b & 0x07; // 0x07 = 7 = 0b00000111
36
- int exponent = (b >> 3) & 0x1F; // 0x1f = 31 = 0b00011111
37
-
38
- if (!low_mid_bit) setup_endian();
39
- flt[low_bit] = flt[low_mid_bit] = 0;
40
- flt[high_mid_bit] = mantissa << 5;
41
- flt[high_bit] = exponent + 48;
42
- return *((float *)flt);
43
- }
17
+ Term *t = ALLOC(Term);
18
+ t->field = estrdup(field);
19
+ t->text = estrdup(text);
20
+ return t;
44
21
  }
45
22
 
46
- uchar float_to_byte(float f)
23
+ void term_destroy(Term *self)
47
24
  {
48
- if (f <= 0.0) {
49
- return 0;
50
- } else {
51
- char *bits = (char *)&f;
52
- int mantissa = (bits[high_mid_bit] & 0xEf) >> 5;
53
- int exponent = (bits[high_bit] - 48);
54
-
55
- if (exponent > 0x1f) {
56
- exponent = 0x1f; // 0x1f = 31 = 0b00011111
57
- mantissa = 0x07; // 0x07 = 7 = 0b00000111
58
- }
25
+ free(self->text);
26
+ free(self->field);
27
+ free(self);
28
+ }
59
29
 
60
- if (exponent < 0) {
61
- exponent = 0;
62
- mantissa = 1;
63
- }
30
+ int term_eq(const void *t1, const void *t2)
31
+ {
32
+ return (strcmp(((Term *)t1)->text, ((Term *)t2)->text)) == 0 &&
33
+ (strcmp(((Term *)t1)->field, ((Term *)t2)->field) == 0);
34
+ }
64
35
 
65
- return ((exponent<<3) | mantissa);
66
- }
36
+ ulong term_hash(const void *t)
37
+ {
38
+ return str_hash(((Term *)t)->text) * str_hash(((Term *)t)->field);
67
39
  }
68
40
 
69
- float simdef_length_norm(Similarity *s, char *field, int num_terms)
41
+ /****************************************************************************
42
+ *
43
+ * Similarity
44
+ *
45
+ ****************************************************************************/
46
+
47
+ float simdef_length_norm(Similarity *s, const char *field, int num_terms)
70
48
  {
71
- return (float)(1.0 / sqrt(num_terms));
49
+ (void)s;
50
+ (void)field;
51
+ return (float)(1.0 / sqrt(num_terms));
72
52
  }
73
53
 
74
54
  float simdef_query_norm(struct Similarity *s, float sum_of_squared_weights)
75
55
  {
76
- return (float)(1.0 / sqrt(sum_of_squared_weights));
56
+ (void)s;
57
+ return (float)(1.0 / sqrt(sum_of_squared_weights));
77
58
  }
78
59
 
79
60
  float simdef_tf(struct Similarity *s, float freq)
80
61
  {
81
- return (float)sqrt(freq);
62
+ (void)s;
63
+ return (float)sqrt(freq);
82
64
  }
83
65
 
84
66
  float simdef_sloppy_freq(struct Similarity *s, int distance)
85
67
  {
86
- return (float)(1.0 / (double)(distance + 1));
68
+ (void)s;
69
+ return (float)(1.0 / (double)(distance + 1));
87
70
  }
88
71
 
89
- float simdef_idf_term(struct Similarity *s, Term *term, Searcher *searcher)
72
+ float simdef_idf_term(struct Similarity *s, const char *field, char *term,
73
+ Searcher *searcher)
90
74
  {
91
- return s->idf(s, searcher->doc_freq(searcher, term), searcher->max_doc(searcher));
75
+ return s->idf(s, searcher->doc_freq(searcher, field, term),
76
+ searcher->max_doc(searcher));
92
77
  }
93
78
 
94
- float simdef_idf_phrase(struct Similarity *s, Term **terms, int tcnt, Searcher *searcher)
79
+ float simdef_idf_phrase(struct Similarity *s, const char *field,
80
+ PhrasePosition *positions,
81
+ int pp_cnt, Searcher *searcher)
95
82
  {
96
- float idf = 0.0;
97
- int i;
98
- for (i = 0; i < tcnt; i++) {
99
- idf += s->idf_term(s, terms[i], searcher);
100
- }
101
- return idf;
83
+ float idf = 0.0;
84
+ int i, j;
85
+ for (i = 0; i < pp_cnt; i++) {
86
+ char **terms = positions[i].terms;
87
+ for (j = ary_size(terms) - 1; j >= 0; j--) {
88
+ idf += sim_idf_term(s, field, terms[j], searcher);
89
+ }
90
+ }
91
+ return idf;
102
92
  }
103
93
 
104
94
  float simdef_idf(struct Similarity *s, int doc_freq, int num_docs)
105
95
  {
106
- return (float)(log((float)num_docs/(float)(doc_freq+1)) + 1.0);
96
+ (void)s;
97
+ return (float)(log((float)num_docs/(float)(doc_freq+1)) + 1.0);
107
98
  }
108
99
 
109
100
  float simdef_coord(struct Similarity *s, int overlap, int max_overlap)
110
101
  {
111
- return (float)((double)overlap / (double)max_overlap);
102
+ (void)s;
103
+ return (float)((double)overlap / (double)max_overlap);
112
104
  }
113
105
 
114
106
  float simdef_decode_norm(struct Similarity *s, uchar b)
115
107
  {
116
- return s->norm_table[b];
108
+ return s->norm_table[b];
117
109
  }
118
110
 
119
111
  uchar simdef_encode_norm(struct Similarity *s, float f)
120
112
  {
121
- return float_to_byte(f);
113
+ (void)s;
114
+ return float2byte(f);
122
115
  }
123
116
 
124
117
  void simdef_destroy(Similarity *s)
125
118
  {
126
- /* nothing to do here */
119
+ (void)s;
120
+ /* nothing to do here */
127
121
  }
128
122
 
129
- #ifdef WIN32
130
123
  static Similarity default_similarity = {
131
- NULL,
132
- {0},
133
- &simdef_length_norm,
134
- &simdef_query_norm,
135
- &simdef_tf,
136
- &simdef_sloppy_freq,
137
- &simdef_idf_term,
138
- &simdef_idf_phrase,
139
- &simdef_idf,
140
- &simdef_coord,
141
- &simdef_decode_norm,
142
- &simdef_encode_norm,
143
- &simdef_destroy
124
+ NULL,
125
+ {0},
126
+ &simdef_length_norm,
127
+ &simdef_query_norm,
128
+ &simdef_tf,
129
+ &simdef_sloppy_freq,
130
+ &simdef_idf_term,
131
+ &simdef_idf_phrase,
132
+ &simdef_idf,
133
+ &simdef_coord,
134
+ &simdef_decode_norm,
135
+ &simdef_encode_norm,
136
+ &simdef_destroy
144
137
  };
145
- #else
146
- static Similarity default_similarity = {
147
- data:NULL,
148
- length_norm:&simdef_length_norm,
149
- query_norm:&simdef_query_norm,
150
- tf:&simdef_tf,
151
- sloppy_freq:&simdef_sloppy_freq,
152
- idf_term:&simdef_idf_term,
153
- idf_phrase:&simdef_idf_phrase,
154
- idf:&simdef_idf,
155
- coord:&simdef_coord,
156
- decode_norm:&simdef_decode_norm,
157
- encode_norm:&simdef_encode_norm,
158
- destroy:&simdef_destroy
159
- };
160
- #endif
161
138
 
162
139
  Similarity *sim_create_default()
163
140
  {
164
- int i;
165
- if (!default_similarity.data) {
166
- for (i = 0; i < 256; i++)
167
- default_similarity.norm_table[i] = byte_to_float(i);
141
+ int i;
142
+ if (!default_similarity.data) {
143
+ for (i = 0; i < 256; i++) {
144
+ default_similarity.norm_table[i] = byte2float((unsigned char)i);
145
+ }
168
146
 
169
- default_similarity.data = &default_similarity;
170
- }
171
- return &default_similarity;
147
+ default_similarity.data = &default_similarity;
148
+ }
149
+ return &default_similarity;
172
150
  }
data/ext/similarity.h CHANGED
@@ -9,18 +9,31 @@ typedef struct Searcher Searcher;
9
9
  *
10
10
  ****************************************************************************/
11
11
 
12
- typedef struct Term {
13
- char *field;
14
- char *text;
12
+ #define term_set_new() \
13
+ hs_new((hash_ft)&term_hash, (eq_ft)&term_eq, (free_ft)&term_destroy)
14
+
15
+ typedef struct Term
16
+ {
17
+ char *field;
18
+ char *text;
15
19
  } Term;
16
20
 
17
- Term *term_clone(Term *term);
18
- Term *term_create(const char *field, char *text);
19
- void term_destroy(Term *self);
20
- int term_cmp(void *t1, void *t2);
21
- int term_eq(const void *t1, const void *t2);
22
- unsigned int term_hash(const void *t);
23
- char *term_to_s(Term *term);
21
+ extern Term *term_new(const char *field, const char *text);
22
+ extern void term_destroy(Term *self);
23
+ extern int term_eq(const void *t1, const void *t2);
24
+ extern unsigned long term_hash(const void *t);
25
+
26
+ /***************************************************************************
27
+ *
28
+ * PhrasePosition
29
+ *
30
+ ***************************************************************************/
31
+
32
+ typedef struct PhrasePosition
33
+ {
34
+ int pos;
35
+ char **terms;
36
+ } PhrasePosition;
24
37
 
25
38
  /***************************************************************************
26
39
  *
@@ -30,38 +43,40 @@ char *term_to_s(Term *term);
30
43
 
31
44
  typedef struct Similarity Similarity;
32
45
 
33
- struct Similarity {
34
- void *data;
35
- float norm_table[256];
36
- float (*length_norm)(Similarity *self, char *field, int num_terms);
37
- float (*query_norm)(Similarity *self, float sum_of_squared_weights);
38
- float (*tf)(Similarity *self, float freq);
39
- float (*sloppy_freq)(Similarity *self, int distance);
40
- float (*idf_term)(Similarity *self, Term *term, Searcher *searcher);
41
- float (*idf_phrase)(Similarity *self, Term **terms,
42
- int tcnt, Searcher *searcher);
43
- float (*idf)(Similarity *self, int doc_freq, int num_docs);
44
- float (*coord)(Similarity *self, int overlap, int max_overlap);
45
- float (*decode_norm)(Similarity *self, uchar b);
46
- uchar (*encode_norm)(Similarity *self, float f);
47
- void (*destroy)(Similarity *self);
46
+ struct Similarity
47
+ {
48
+ void *data;
49
+ float norm_table[256];
50
+ float (*length_norm)(Similarity *self, const char *field, int num_terms);
51
+ float (*query_norm)(Similarity *self, float sum_of_squared_weights);
52
+ float (*tf)(Similarity *self, float freq);
53
+ float (*sloppy_freq)(Similarity *self, int distance);
54
+ float (*idf_term)(Similarity *self, const char *field, char *term,
55
+ Searcher *searcher);
56
+ float (*idf_phrase)(Similarity *self, const char *field,
57
+ PhrasePosition *positions,
58
+ int pp_cnt, Searcher *searcher);
59
+ float (*idf)(Similarity *self, int doc_freq, int num_docs);
60
+ float (*coord)(Similarity *self, int overlap, int max_overlap);
61
+ float (*decode_norm)(Similarity *self, unsigned char b);
62
+ unsigned char (*encode_norm)(Similarity *self, float f);
63
+ void (*destroy)(Similarity *self);
48
64
  };
49
65
 
50
66
  #define sim_length_norm(msim, field, num_terms) msim->length_norm(msim, field, num_terms)
51
67
  #define sim_query_norm(msim, sosw) msim->query_norm(msim, sosw)
52
68
  #define sim_tf(msim, freq) msim->tf(msim, freq)
53
69
  #define sim_sloppy_freq(msim, distance) msim->sloppy_freq(msim, distance)
54
- #define sim_idf_term(msim, term, searcher) msim->idf_term(msim, term, searcher)
55
- #define sim_idf_phrase(msim, terms, tcnt, searcher) msim->idf_phrase(msim, terms, tcnt, searcher)
70
+ #define sim_idf_term(msim, field, term, searcher)\
71
+ msim->idf_term(msim, field, term, searcher)
72
+ #define sim_idf_phrase(msim, field, positions, pos_cnt, searcher)\
73
+ msim->idf_phrase(msim, field, positions, pos_cnt, searcher)
56
74
  #define sim_idf(msim, doc_freq, num_docs) msim->idf(msim, doc_freq, num_docs)
57
75
  #define sim_coord(msim, overlap, max_overlap) msim->coord(msim, overlap, max_overlap)
58
76
  #define sim_decode_norm(msim, b) msim->decode_norm(msim, b)
59
77
  #define sim_encode_norm(msim, f) msim->encode_norm(msim, f)
60
78
  #define sim_destroy(msim) msim->destroy(msim)
61
79
 
62
- float byte_to_float(uchar b);
63
- uchar float_to_byte(float f);
64
-
65
80
  Similarity *sim_create_default();
66
81
 
67
82
  #endif
data/ext/sort.c CHANGED
@@ -2,94 +2,96 @@
2
2
  #include "search.h"
3
3
  #include "index.h"
4
4
 
5
- static char * const NO_TERM_ERROR_MSG = "no terms in field to sort by";
6
-
7
5
  /***************************************************************************
8
6
  *
9
7
  * SortField
10
8
  *
11
9
  ***************************************************************************/
12
10
 
13
- unsigned int sort_field_hash(const void *p)
11
+ ulong sort_field_hash(const void *p)
14
12
  {
15
- SortField *self = (SortField *)p;
16
- return str_hash(self->field) ^ (self->type*37);
13
+ SortField *self = (SortField *)p;
14
+ return str_hash(self->field) ^ (self->type*37);
17
15
  }
18
16
 
19
17
  int sort_field_eq(const void *p1, const void *p2)
20
18
  {
21
- SortField *key1 = (SortField *)p1;
22
- SortField *key2 = (SortField *)p2;
23
- int equal = (strcmp(key1->field, key2->field) == 0) && key1->type == key2->type;
24
- /*
25
- * TODO: The could probable be done more cleanly.
26
- * If the sort field is an auto field then it was evaluated before it was
27
- * entered into the cache so we need to pass the compare function back to
28
- * the new sort field.
29
- */
30
- if (equal && (key1->type == SORT_TYPE_AUTO)) {
31
- key2->compare = key1->compare;
32
- }
33
- return equal;
34
- }
35
-
36
- SortField *sort_field_clone(SortField *self)
37
- {
38
- SortField *clone = ALLOC(SortField);
39
- memcpy(clone, self, sizeof(SortField));
40
- mutex_init(&clone->mutex, NULL);
41
- clone->field = estrdup(self->field);
42
- return clone;
43
- }
44
-
45
- SortField *sort_field_alloc(char *field, int type, bool reverse)
46
- {
47
- SortField *self = ALLOC(SortField);
48
- mutex_init(&self->mutex, NULL);
49
- self->field = field ? estrdup(field) : NULL;
50
- self->type = type;
51
- self->reverse = reverse;
52
- self->index = NULL;
53
- self->destroy_index = &free;
54
- self->compare = NULL;
55
- return self;
56
- }
57
-
58
- SortField *sort_field_create(char *field, int type, bool reverse)
59
- {
60
- SortField *sf = NULL;
61
- switch (type) {
62
- case SORT_TYPE_SCORE:
63
- sf = sort_field_score_create(reverse);
64
- break;
65
- case SORT_TYPE_DOC:
66
- sf = sort_field_doc_create(reverse);
67
- break;
68
- case SORT_TYPE_INTEGER:
69
- sf = sort_field_int_create(field, reverse);
70
- break;
71
- case SORT_TYPE_FLOAT:
72
- sf = sort_field_float_create(field, reverse);
73
- break;
74
- case SORT_TYPE_STRING:
75
- sf = sort_field_string_create(field, reverse);
76
- break;
77
- case SORT_TYPE_AUTO:
78
- sf = sort_field_auto_create(field, reverse);
79
- break;
80
- }
81
- return sf;
19
+ SortField *key1 = (SortField *)p1;
20
+ SortField *key2 = (SortField *)p2;
21
+ return (strcmp(key1->field, key2->field) == 0)
22
+ && key1->type == key2->type;
23
+ }
24
+
25
+ static int sort_field_cache_eq(const void *p1, const void *p2)
26
+ {
27
+ SortField *key1 = (SortField *)p1;
28
+ SortField *key2 = (SortField *)p2;
29
+ int equal = (strcmp(key1->field, key2->field) == 0)
30
+ && key1->type == key2->type;
31
+
32
+ return equal;
33
+ }
34
+
35
+ static SortField *sort_field_clone(SortField *self)
36
+ {
37
+ SortField *clone = ALLOC(SortField);
38
+ memcpy(clone, self, sizeof(SortField));
39
+ mutex_init(&clone->mutex, NULL);
40
+ clone->field = estrdup(self->field);
41
+ return clone;
42
+ }
43
+
44
+ static SortField *sort_field_alloc(char *field, int type, bool reverse)
45
+ {
46
+ SortField *self = ALLOC(SortField);
47
+ mutex_init(&self->mutex, NULL);
48
+ self->field = field ? estrdup(field) : NULL;
49
+ self->type = type;
50
+ self->reverse = reverse;
51
+ self->index = NULL;
52
+ self->destroy_index = &free;
53
+ self->compare = NULL;
54
+ return self;
55
+ }
56
+
57
+ SortField *sort_field_new(char *field, enum SORT_TYPE type, bool reverse)
58
+ {
59
+ SortField *sf = NULL;
60
+ switch (type) {
61
+ case SORT_TYPE_SCORE:
62
+ sf = sort_field_score_new(reverse);
63
+ break;
64
+ case SORT_TYPE_DOC:
65
+ sf = sort_field_doc_new(reverse);
66
+ break;
67
+ case SORT_TYPE_BYTE:
68
+ sf = sort_field_byte_new(field, reverse);
69
+ break;
70
+ case SORT_TYPE_INTEGER:
71
+ sf = sort_field_int_new(field, reverse);
72
+ break;
73
+ case SORT_TYPE_FLOAT:
74
+ sf = sort_field_float_new(field, reverse);
75
+ break;
76
+ case SORT_TYPE_STRING:
77
+ sf = sort_field_string_new(field, reverse);
78
+ break;
79
+ case SORT_TYPE_AUTO:
80
+ sf = sort_field_auto_new(field, reverse);
81
+ break;
82
+ }
83
+ return sf;
82
84
  }
83
85
 
84
86
  void sort_field_destroy(void *p)
85
87
  {
86
- SortField *self = (SortField *)p;
87
- if (self->index) {
88
- self->destroy_index(self->index);
89
- }
90
- free(self->field);
91
- mutex_destroy(&self->mutex);
92
- free(p);
88
+ SortField *self = (SortField *)p;
89
+ if (self->index) {
90
+ self->destroy_index(self->index);
91
+ }
92
+ free(self->field);
93
+ mutex_destroy(&self->mutex);
94
+ free(p);
93
95
  }
94
96
 
95
97
  /*
@@ -97,210 +99,304 @@ void sort_field_destroy(void *p)
97
99
  */
98
100
  char *sort_field_to_s(SortField *self)
99
101
  {
100
- char *str;
101
- char *type = NULL;
102
- switch (self->type) {
103
- case SORT_TYPE_SCORE:
104
- type = "<SCORE>";
105
- break;
106
- case SORT_TYPE_DOC:
107
- type = "<DOC>";
108
- break;
109
- case SORT_TYPE_INTEGER:
110
- type = "<integer>";
111
- break;
112
- case SORT_TYPE_FLOAT:
113
- type = "<float>";
114
- break;
115
- case SORT_TYPE_STRING:
116
- type = "<string>";
117
- break;
118
- case SORT_TYPE_AUTO:
119
- type = "<auto>";
120
- break;
121
- }
122
- if (self->field) {
123
- str = ALLOC_N(char, 10 + strlen(self->field) + strlen(type));
124
- sprintf(str, "%s:%s%s", self->field, type, (self->reverse ? "!" : ""));
125
- } else {
126
- str = ALLOC_N(char, 10 + strlen(type));
127
- sprintf(str, "%s%s", type, (self->reverse ? "!" : ""));
128
- }
129
- return str;
102
+ char *str;
103
+ char *type = NULL;
104
+ switch (self->type) {
105
+ case SORT_TYPE_SCORE:
106
+ type = "<SCORE>";
107
+ break;
108
+ case SORT_TYPE_DOC:
109
+ type = "<DOC>";
110
+ break;
111
+ case SORT_TYPE_BYTE:
112
+ type = "<byte>";
113
+ break;
114
+ case SORT_TYPE_INTEGER:
115
+ type = "<integer>";
116
+ break;
117
+ case SORT_TYPE_FLOAT:
118
+ type = "<float>";
119
+ break;
120
+ case SORT_TYPE_STRING:
121
+ type = "<string>";
122
+ break;
123
+ case SORT_TYPE_AUTO:
124
+ type = "<auto>";
125
+ break;
126
+ }
127
+ if (self->field) {
128
+ str = ALLOC_N(char, 10 + strlen(self->field) + strlen(type));
129
+ sprintf(str, "%s:%s%s", self->field, type, (self->reverse ? "!" : ""));
130
+ } else {
131
+ str = ALLOC_N(char, 10 + strlen(type));
132
+ sprintf(str, "%s%s", type, (self->reverse ? "!" : ""));
133
+ }
134
+ return str;
130
135
  }
131
136
 
132
137
  /***************************************************************************
133
138
  * ScoreSortField
134
139
  ***************************************************************************/
135
140
 
141
+ void sf_score_get_val(void *index, Hit *hit, Comparable *comparable)
142
+ {
143
+ (void)index;
144
+ comparable->val.f = hit->score;
145
+ }
146
+
136
147
  int sf_score_compare(void *index_ptr, Hit *hit2, Hit *hit1)
137
148
  {
138
- float val1 = hit1->score;
139
- float val2 = hit2->score;
140
- if (val1 > val2) return 1;
141
- else if (val1 < val2) return -1;
142
- else return 0;
149
+ float val1 = hit1->score;
150
+ float val2 = hit2->score;
151
+ (void)index_ptr;
152
+
153
+ if (val1 > val2) return 1;
154
+ else if (val1 < val2) return -1;
155
+ else return 0;
143
156
  }
144
157
 
145
- SortField *sort_field_score_create(bool reverse)
158
+ SortField *sort_field_score_new(bool reverse)
146
159
  {
147
- SortField *self = sort_field_alloc(NULL, SORT_TYPE_SCORE, reverse);
148
- self->compare = &sf_score_compare;
149
- return self;
160
+ SortField *self = sort_field_alloc(NULL, SORT_TYPE_SCORE, reverse);
161
+ self->compare = &sf_score_compare;
162
+ self->get_val = &sf_score_get_val;
163
+ return self;
150
164
  }
151
165
 
152
- SortField SORT_FIELD_SCORE = {
153
- MUTEX_INITIALIZER,
154
- /* field */NULL,
155
- /* type */SORT_TYPE_SCORE,
156
- /* reverse */false,
157
- /* index */NULL,
158
- /* compare */&sf_score_compare,
159
- /* create_index */NULL,
160
- /* destroy_index */NULL,
161
- /* handle_term */NULL
166
+ const SortField SORT_FIELD_SCORE = {
167
+ MUTEX_INITIALIZER,
168
+ NULL, /* field */
169
+ SORT_TYPE_SCORE, /* type */
170
+ false, /* reverse */
171
+ NULL, /* index */
172
+ &sf_score_compare, /* compare */
173
+ &sf_score_get_val, /* get_val */
174
+ NULL, /* create_index */
175
+ NULL, /* destroy_index */
176
+ NULL, /* handle_term */
162
177
  };
163
178
 
164
- SortField SORT_FIELD_SCORE_REV = {
165
- MUTEX_INITIALIZER,
166
- /* field */NULL,
167
- /* type */SORT_TYPE_SCORE,
168
- /* reverse */true,
169
- /* index */NULL,
170
- /* compare */&sf_score_compare,
171
- /* create_index */NULL,
172
- /* destroy_index */NULL,
173
- /* handle_term */NULL
179
+ const SortField SORT_FIELD_SCORE_REV = {
180
+ MUTEX_INITIALIZER,
181
+ NULL, /* field */
182
+ SORT_TYPE_SCORE, /* type */
183
+ true, /* reverse */
184
+ NULL, /* index */
185
+ &sf_score_compare, /* compare */
186
+ &sf_score_get_val, /* get_val */
187
+ NULL, /* create_index */
188
+ NULL, /* destroy_index */
189
+ NULL, /* handle_term */
174
190
  };
175
191
 
176
192
  /**************************************************************************
177
193
  * DocSortField
178
194
  ***************************************************************************/
179
195
 
196
+ void sf_doc_get_val(void *index, Hit *hit, Comparable *comparable)
197
+ {
198
+ (void)index;
199
+ comparable->val.i = hit->doc;
200
+ }
201
+
180
202
  int sf_doc_compare(void *index_ptr, Hit *hit1, Hit *hit2)
181
203
  {
182
- int val1 = hit1->doc;
183
- int val2 = hit2->doc;
184
- if (val1 > val2) return 1;
185
- else if (val1 < val2) return -1;
186
- else return 0;
204
+ int val1 = hit1->doc;
205
+ int val2 = hit2->doc;
206
+ (void)index_ptr;
207
+
208
+ if (val1 > val2) return 1;
209
+ else if (val1 < val2) return -1;
210
+ else return 0;
187
211
  }
188
212
 
189
- SortField *sort_field_doc_create(bool reverse)
213
+ SortField *sort_field_doc_new(bool reverse)
190
214
  {
191
- SortField *self = sort_field_alloc(NULL, SORT_TYPE_DOC, reverse);
192
- self->compare = &sf_doc_compare;
193
- return self;
215
+ SortField *self = sort_field_alloc(NULL, SORT_TYPE_DOC, reverse);
216
+ self->compare = &sf_doc_compare;
217
+ self->get_val = &sf_doc_get_val;
218
+ return self;
194
219
  }
195
220
 
196
- SortField SORT_FIELD_DOC = {
197
- MUTEX_INITIALIZER,
198
- /* field */NULL,
199
- /* type */SORT_TYPE_DOC,
200
- /* reverse */false,
201
- /* index */NULL,
202
- /* compare */&sf_doc_compare,
203
- /* create_index */NULL,
204
- /* destroy_index */NULL,
205
- /* handle_term */NULL
221
+ const SortField SORT_FIELD_DOC = {
222
+ MUTEX_INITIALIZER,
223
+ NULL, /* field */
224
+ SORT_TYPE_DOC, /* type */
225
+ false, /* reverse */
226
+ NULL, /* index */
227
+ &sf_doc_compare, /* compare */
228
+ &sf_doc_get_val, /* get_val */
229
+ NULL, /* create_index */
230
+ NULL, /* destroy_index */
231
+ NULL, /* handle_term */
206
232
  };
207
233
 
208
- SortField SORT_FIELD_DOC_REV = {
209
- MUTEX_INITIALIZER,
210
- /* field */NULL,
211
- /* type */SORT_TYPE_DOC,
212
- /* reverse */true,
213
- /* index */NULL,
214
- /* compare */&sf_doc_compare,
215
- /* create_index */NULL,
216
- /* destroy_index */NULL,
217
- /* handle_term */NULL
234
+ const SortField SORT_FIELD_DOC_REV = {
235
+ MUTEX_INITIALIZER,
236
+ NULL, /* field */
237
+ SORT_TYPE_DOC, /* type */
238
+ true, /* reverse */
239
+ NULL, /* index */
240
+ &sf_doc_compare, /* compare */
241
+ &sf_doc_get_val, /* get_val */
242
+ NULL, /* create_index */
243
+ NULL, /* destroy_index */
244
+ NULL, /* handle_term */
218
245
  };
219
246
 
247
+ /***************************************************************************
248
+ * ByteSortField
249
+ ***************************************************************************/
250
+
251
+ static void sf_byte_get_val(void *index, Hit *hit, Comparable *comparable)
252
+ {
253
+ comparable->val.i = ((int *)index)[hit->doc];
254
+ }
255
+
256
+ static int sf_byte_compare(void *index, Hit *hit1, Hit *hit2)
257
+ {
258
+ int val1 = ((int *)index)[hit1->doc];
259
+ int val2 = ((int *)index)[hit2->doc];
260
+ if (val1 > val2) return 1;
261
+ else if (val1 < val2) return -1;
262
+ else return 0;
263
+ }
264
+
265
+ static void *sf_byte_create_index(int size)
266
+ {
267
+ int *index = ALLOC_AND_ZERO_N(int, size + 1);
268
+ return &index[1];
269
+ }
270
+
271
+ static void sf_byte_destroy_index(void *p)
272
+ {
273
+ int *index = (int *)p;
274
+ free(&index[-1]);
275
+ }
276
+
277
+ static void sf_byte_handle_term(void *index_ptr, TermDocEnum *tde, char *text)
278
+ {
279
+ int *index = (int *)index_ptr;
280
+ int val = index[-1]++;
281
+ (void)text;
282
+ while (tde->next(tde)) {
283
+ index[tde->doc_num(tde)] = val;
284
+ }
285
+ }
286
+
287
+ static void sort_field_byte_methods(SortField *self)
288
+ {
289
+ self->type = SORT_TYPE_BYTE;
290
+ self->compare = &sf_byte_compare;
291
+ self->get_val = &sf_byte_get_val;
292
+ self->create_index = &sf_byte_create_index;
293
+ self->destroy_index = &sf_byte_destroy_index;
294
+ self->handle_term = &sf_byte_handle_term;
295
+ }
296
+
297
+ SortField *sort_field_byte_new(char *field, bool reverse)
298
+ {
299
+ SortField *self = sort_field_alloc(field, SORT_TYPE_BYTE, reverse);
300
+ sort_field_byte_methods(self);
301
+ return self;
302
+ }
303
+
220
304
  /***************************************************************************
221
305
  * IntegerSortField
222
306
  ***************************************************************************/
223
307
 
224
- int sf_int_compare(void *index_ptr, Hit *hit1, Hit *hit2)
308
+ void sf_int_get_val(void *index, Hit *hit, Comparable *comparable)
225
309
  {
226
- int *index = (int *)index_ptr;
227
- int val1 = index[hit1->doc];
228
- int val2 = index[hit2->doc];
229
- if (val1 > val2) return 1;
230
- else if (val1 < val2) return -1;
231
- else return 0;
310
+ comparable->val.i = ((int *)index)[hit->doc];
311
+ }
312
+
313
+ int sf_int_compare(void *index, Hit *hit1, Hit *hit2)
314
+ {
315
+ int val1 = ((int *)index)[hit1->doc];
316
+ int val2 = ((int *)index)[hit2->doc];
317
+ if (val1 > val2) return 1;
318
+ else if (val1 < val2) return -1;
319
+ else return 0;
232
320
  }
233
321
 
234
322
  void *sf_int_create_index(int size)
235
323
  {
236
- return ALLOC_N(int, size);
324
+ return ALLOC_AND_ZERO_N(int, size);
237
325
  }
238
326
 
239
327
  void sf_int_handle_term(void *index_ptr, TermDocEnum *tde, char *text)
240
328
  {
241
- int *index = (int *)index_ptr;
242
- int val;
243
- sscanf(text, "%d", &val);
244
- while (tde->next(tde)) {
245
- index[tde->doc_num(tde)] = val;
246
- }
329
+ int *index = (int *)index_ptr;
330
+ int val;
331
+ sscanf(text, "%d", &val);
332
+ while (tde->next(tde)) {
333
+ index[tde->doc_num(tde)] = val;
334
+ }
247
335
  }
248
336
 
249
337
  void sort_field_int_methods(SortField *self)
250
338
  {
251
- self->compare = &sf_int_compare;
252
- self->create_index = &sf_int_create_index;
253
- self->handle_term = &sf_int_handle_term;
339
+ self->type = SORT_TYPE_INTEGER;
340
+ self->compare = &sf_int_compare;
341
+ self->get_val = &sf_int_get_val;
342
+ self->create_index = &sf_int_create_index;
343
+ self->handle_term = &sf_int_handle_term;
254
344
  }
255
345
 
256
- SortField *sort_field_int_create(char *field, bool reverse)
346
+ SortField *sort_field_int_new(char *field, bool reverse)
257
347
  {
258
- SortField *self = sort_field_alloc(field, SORT_TYPE_INTEGER, reverse);
259
- sort_field_int_methods(self);
260
- return self;
348
+ SortField *self = sort_field_alloc(field, SORT_TYPE_INTEGER, reverse);
349
+ sort_field_int_methods(self);
350
+ return self;
261
351
  }
262
352
 
263
353
  /***************************************************************************
264
354
  * FloatSortField
265
355
  ***************************************************************************/
266
356
 
267
- int sf_float_compare(void *index_ptr, Hit *hit1, Hit *hit2)
357
+ void sf_float_get_val(void *index, Hit *hit, Comparable *comparable)
268
358
  {
269
- float *index = (float *)index_ptr;
270
- float val1 = index[hit1->doc];
271
- float val2 = index[hit2->doc];
272
- if (val1 > val2) return 1;
273
- else if (val1 < val2) return -1;
274
- else return 0;
359
+ comparable->val.f = ((float *)index)[hit->doc];
360
+ }
361
+
362
+ int sf_float_compare(void *index, Hit *hit1, Hit *hit2)
363
+ {
364
+ float val1 = ((float *)index)[hit1->doc];
365
+ float val2 = ((float *)index)[hit2->doc];
366
+ if (val1 > val2) return 1;
367
+ else if (val1 < val2) return -1;
368
+ else return 0;
275
369
  }
276
370
 
277
371
  void *sf_float_create_index(int size)
278
372
  {
279
- return ALLOC_N(float, size);
373
+ return ALLOC_AND_ZERO_N(float, size);
280
374
  }
281
375
 
282
376
  void sf_float_handle_term(void *index_ptr, TermDocEnum *tde, char *text)
283
377
  {
284
- float *index = (float *)index_ptr;
285
- float val;
286
- sscanf(text, "%g", &val);
287
- while (tde->next(tde)) {
288
- index[tde->doc_num(tde)] = val;
289
- }
378
+ float *index = (float *)index_ptr;
379
+ float val;
380
+ sscanf(text, "%g", &val);
381
+ while (tde->next(tde)) {
382
+ index[tde->doc_num(tde)] = val;
383
+ }
290
384
  }
291
385
 
292
386
  void sort_field_float_methods(SortField *self)
293
387
  {
294
- self->compare = &sf_float_compare;
295
- self->create_index = &sf_float_create_index;
296
- self->handle_term = &sf_float_handle_term;
388
+ self->type = SORT_TYPE_FLOAT;
389
+ self->compare = &sf_float_compare;
390
+ self->get_val = &sf_float_get_val;
391
+ self->create_index = &sf_float_create_index;
392
+ self->handle_term = &sf_float_handle_term;
297
393
  }
298
394
 
299
- SortField *sort_field_float_create(char *field, bool reverse)
395
+ SortField *sort_field_float_new(char *field, bool reverse)
300
396
  {
301
- SortField *self = sort_field_alloc(field, SORT_TYPE_FLOAT, reverse);
302
- sort_field_float_methods(self);
303
- return self;
397
+ SortField *self = sort_field_alloc(field, SORT_TYPE_FLOAT, reverse);
398
+ sort_field_float_methods(self);
399
+ return self;
304
400
  }
305
401
 
306
402
  /***************************************************************************
@@ -309,78 +405,99 @@ SortField *sort_field_float_create(char *field, bool reverse)
309
405
 
310
406
  #define VALUES_ARRAY_START_SIZE 8
311
407
  typedef struct StringIndex {
312
- int size;
313
- int *index;
314
- char **values;
315
- int v_cnt;
316
- int v_size;
408
+ int size;
409
+ int *index;
410
+ char **values;
411
+ int v_size;
412
+ int v_capa;
317
413
  } StringIndex;
318
414
 
319
- int sf_string_compare(void *index_ptr, Hit *hit1, Hit *hit2)
415
+ void sf_string_get_val(void *index, Hit *hit, Comparable *comparable)
320
416
  {
321
- StringIndex *index = (StringIndex *)index_ptr;
322
- return strcoll(index->values[index->index[hit1->doc]],
323
- index->values[index->index[hit2->doc]]);
324
- /*
325
- int val1 = index->index[hit1->doc];
326
- int val2 = index->index[hit2->doc];
327
- if (val1 > val2) return 1;
328
- else if (val1 < val2) return -1;
329
- else return 0;
330
- */
417
+ comparable->val.s
418
+ = ((StringIndex *)index)->values[
419
+ ((StringIndex *)index)->index[hit->doc]];
420
+ }
421
+
422
+ int sf_string_compare(void *index, Hit *hit1, Hit *hit2)
423
+ {
424
+ char *s1 = ((StringIndex *)index)->values[
425
+ ((StringIndex *)index)->index[hit1->doc]];
426
+ char *s2 = ((StringIndex *)index)->values[
427
+ ((StringIndex *)index)->index[hit2->doc]];
428
+
429
+ if (s1 == NULL) return s1 ? -1 : 0;
430
+ if (s2 == NULL) return 1;
431
+
432
+ #ifdef POSH_OS_WIN32
433
+ return strcmp(s1, s2);
434
+ #else
435
+ return strcoll(s1, s2);
436
+ #endif
437
+
438
+ /*
439
+ * TODO: investigate whether it would be a good idea to presort strings.
440
+ *
441
+ int val1 = index->index[hit1->doc];
442
+ int val2 = index->index[hit2->doc];
443
+ if (val1 > val2) return 1;
444
+ else if (val1 < val2) return -1;
445
+ else return 0;
446
+ */
331
447
  }
332
448
 
333
449
  void *sf_string_create_index(int size)
334
450
  {
335
- StringIndex *self = ALLOC(StringIndex);
336
- ZEROSET(self, StringIndex, 1);
337
- self->size = size;
338
- self->index = ALLOC_N(int, size);
339
- ZEROSET(self->index, int, size);
340
- self->v_size = VALUES_ARRAY_START_SIZE;
341
- self->values = ALLOC_N(char *, VALUES_ARRAY_START_SIZE);
342
- return self;
451
+ StringIndex *self = ALLOC_AND_ZERO(StringIndex);
452
+ self->size = size;
453
+ self->index = ALLOC_AND_ZERO_N(int, size);
454
+ self->v_capa = VALUES_ARRAY_START_SIZE;
455
+ self->v_size = 1; /* leave the first value as NULL */
456
+ self->values = ALLOC_AND_ZERO_N(char *, VALUES_ARRAY_START_SIZE);
457
+ return self;
343
458
  }
344
459
 
345
460
  void sf_string_destroy_index(void *p)
346
461
  {
347
- StringIndex *self = (StringIndex *)p;
348
- int i;
349
- free(self->index);
350
- for (i = 0; i < self->v_cnt; i++) {
351
- free(self->values[i]);
352
- }
353
- free(self->values);
354
- free(self);
462
+ StringIndex *self = (StringIndex *)p;
463
+ int i;
464
+ free(self->index);
465
+ for (i = 0; i < self->v_size; i++) {
466
+ free(self->values[i]);
467
+ }
468
+ free(self->values);
469
+ free(self);
355
470
  }
356
471
 
357
472
  void sf_string_handle_term(void *index_ptr, TermDocEnum *tde, char *text)
358
473
  {
359
- StringIndex *index = (StringIndex *)index_ptr;
360
- if (index->v_cnt >= index->v_size) {
361
- index->v_size *= 2;
362
- index->values = REALLOC_N(index->values, char *, index->v_size);
363
- }
364
- index->values[index->v_cnt] = estrdup(text);
365
- while (tde->next(tde)) {
366
- index->index[tde->doc_num(tde)] = index->v_cnt;
367
- }
368
- index->v_cnt++;
474
+ StringIndex *index = (StringIndex *)index_ptr;
475
+ if (index->v_size >= index->v_capa) {
476
+ index->v_capa *= 2;
477
+ index->values = REALLOC_N(index->values, char *, index->v_capa);
478
+ }
479
+ index->values[index->v_size] = estrdup(text);
480
+ while (tde->next(tde)) {
481
+ index->index[tde->doc_num(tde)] = index->v_size;
482
+ }
483
+ index->v_size++;
369
484
  }
370
485
 
371
486
  void sort_field_string_methods(SortField *self)
372
487
  {
373
- self->compare = &sf_string_compare;
374
- self->create_index = &sf_string_create_index;
375
- self->destroy_index = &sf_string_destroy_index;
376
- self->handle_term = &sf_string_handle_term;
488
+ self->type = SORT_TYPE_STRING;
489
+ self->compare = &sf_string_compare;
490
+ self->get_val = &sf_string_get_val;
491
+ self->create_index = &sf_string_create_index;
492
+ self->destroy_index = &sf_string_destroy_index;
493
+ self->handle_term = &sf_string_handle_term;
377
494
  }
378
495
 
379
- SortField *sort_field_string_create(char *field, bool reverse)
496
+ SortField *sort_field_string_new(char *field, bool reverse)
380
497
  {
381
- SortField *self = sort_field_alloc(field, SORT_TYPE_STRING, reverse);
382
- sort_field_string_methods(self);
383
- return self;
498
+ SortField *self = sort_field_alloc(field, SORT_TYPE_STRING, reverse);
499
+ sort_field_string_methods(self);
500
+ return self;
384
501
  }
385
502
 
386
503
  /***************************************************************************
@@ -389,27 +506,27 @@ SortField *sort_field_string_create(char *field, bool reverse)
389
506
 
390
507
  void sort_field_auto_evaluate(SortField *sf, char *text)
391
508
  {
392
- int int_val;
393
- float float_val;
394
- size_t text_len = 0, scan_len = 0;
509
+ int int_val;
510
+ float float_val;
511
+ int text_len = 0, scan_len = 0;
395
512
 
396
- text_len = strlen(text);
397
- sscanf(text, "%d%n", &int_val, &scan_len);
398
- if (scan_len == text_len) {
399
- sort_field_int_methods(sf);
400
- } else {
401
- sscanf(text, "%f%n", &float_val, &scan_len);
513
+ text_len = (int)strlen(text);
514
+ sscanf(text, "%d%n", &int_val, &scan_len);
402
515
  if (scan_len == text_len) {
403
- sort_field_float_methods(sf);
516
+ sort_field_int_methods(sf);
404
517
  } else {
405
- sort_field_string_methods(sf);
518
+ sscanf(text, "%f%n", &float_val, &scan_len);
519
+ if (scan_len == text_len) {
520
+ sort_field_float_methods(sf);
521
+ } else {
522
+ sort_field_string_methods(sf);
523
+ }
406
524
  }
407
- }
408
525
  }
409
526
 
410
- SortField *sort_field_auto_create(char *field, bool reverse)
527
+ SortField *sort_field_auto_new(char *field, bool reverse)
411
528
  {
412
- return sort_field_alloc(field, SORT_TYPE_AUTO, reverse);
529
+ return sort_field_alloc(field, SORT_TYPE_AUTO, reverse);
413
530
  }
414
531
 
415
532
  /***************************************************************************
@@ -420,58 +537,60 @@ SortField *sort_field_auto_create(char *field, bool reverse)
420
537
 
421
538
  void *field_cache_get_index(IndexReader *ir, SortField *sf)
422
539
  {
423
- void *index = NULL;
424
- int length = 0;
425
- Term term;
426
- TermBuffer *tb;
427
- TermEnum *volatile te = NULL;
428
- TermDocEnum *volatile tde = NULL;
429
- char *field = sf->field;
430
- SortField *sf_clone;
431
-
432
- mutex_lock(&sf->mutex);
433
- if (!ir->sort_cache) {
434
- ir->sort_cache = h_new(&sort_field_hash, &sort_field_eq,
435
- &sort_field_destroy, NULL);
436
- }
437
- index = h_get(ir->sort_cache, sf);
438
-
439
- if (index == NULL) {
440
- length = ir->max_doc(ir);
441
- if (length > 0) {
442
- TRY
443
- tde = ir->term_docs(ir);
444
- term.field = field;
445
- term.text = "";
446
- te = ir->terms_from(ir, &term);
447
- if (te->tb_curr == NULL) {
448
- RAISE(ARG_ERROR, NO_TERM_ERROR_MSG);
449
- }
540
+ void *index = NULL;
541
+ int length = 0;
542
+ TermEnum *volatile te = NULL;
543
+ TermDocEnum *volatile tde = NULL;
544
+ SortField *sf_clone;
545
+ const int field_num = fis_get_field_num(ir->fis, sf->field);
546
+
547
+ if (field_num < 0) {
548
+ RAISE(ARG_ERROR,
549
+ "Cannot sort by field \"%s\". It doesn't exist in the index.",
550
+ sf->field);
551
+ }
450
552
 
451
- if (sf->type == SORT_TYPE_AUTO) {
452
- sort_field_auto_evaluate(sf, te->tb_curr->text);
453
- }
553
+ mutex_lock(&sf->mutex);
554
+ if (!ir->sort_cache) {
555
+ ir->sort_cache = h_new(&sort_field_hash, &sort_field_cache_eq,
556
+ &sort_field_destroy, NULL);
557
+ }
454
558
 
455
- index = sf->create_index(length);
456
-
457
- do {
458
- tb = te->tb_curr;
459
- if (strcmp(tb->field, field) != 0) break;
460
- term.text = tb->text;
461
- tde->seek(tde, &term);
462
- sf->handle_term(index, tde, tb->text);
463
- } while (te->next(te));
464
- XFINALLY
465
- tde->close(tde);
559
+ if (sf->type == SORT_TYPE_AUTO) {
560
+ te = ir->terms(ir, field_num);
561
+ if (!te->next(te)) {
562
+ RAISE(ARG_ERROR,
563
+ "Cannot sort by field \"%s\" as there are no terms "
564
+ "in that field in the index.", sf->field);
565
+ }
566
+ sort_field_auto_evaluate(sf, te->curr_term);
466
567
  te->close(te);
467
- XENDTRY
468
568
  }
469
- sf_clone = sort_field_clone(sf);
470
- sf_clone->index = index;
471
- h_set(ir->sort_cache, sf_clone, index);
472
- }
473
- mutex_unlock(&sf->mutex);
474
- return index;
569
+
570
+ index = h_get(ir->sort_cache, sf);
571
+
572
+ if (index == NULL) {
573
+ length = ir->max_doc(ir);
574
+ if (length > 0) {
575
+ TRY
576
+ tde = ir->term_docs(ir);
577
+ te = ir->terms(ir, field_num);
578
+ index = sf->create_index(length);
579
+ while (te->next(te)) {
580
+ tde->seek_te(tde, te);
581
+ sf->handle_term(index, tde, te->curr_term);
582
+ }
583
+ XFINALLY
584
+ tde->close(tde);
585
+ te->close(te);
586
+ XENDTRY
587
+ }
588
+ sf_clone = sort_field_clone(sf);
589
+ sf_clone->index = index;
590
+ h_set(ir->sort_cache, sf_clone, index);
591
+ }
592
+ mutex_unlock(&sf->mutex);
593
+ return index;
475
594
  }
476
595
 
477
596
  /***************************************************************************
@@ -485,19 +604,19 @@ void *field_cache_get_index(IndexReader *ir, SortField *sf)
485
604
  ***************************************************************************/
486
605
 
487
606
  typedef struct Comparator {
488
- void *index;
489
- bool reverse : 1;
490
- int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
607
+ void *index;
608
+ bool reverse : 1;
609
+ int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
491
610
  } Comparator;
492
611
 
493
- Comparator *comparator_create(void *index, bool reverse,
494
- int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2))
612
+ Comparator *comparator_new(void *index, bool reverse,
613
+ int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2))
495
614
  {
496
- Comparator *self = ALLOC(Comparator);
497
- self->index = index;
498
- self->reverse = reverse;
499
- self->compare = compare;
500
- return self;
615
+ Comparator *self = ALLOC(Comparator);
616
+ self->index = index;
617
+ self->reverse = reverse;
618
+ self->compare = compare;
619
+ return self;
501
620
  }
502
621
 
503
622
  /***************************************************************************
@@ -505,164 +624,279 @@ Comparator *comparator_create(void *index, bool reverse,
505
624
  ***************************************************************************/
506
625
 
507
626
  typedef struct Sorter {
508
- Comparator **comparators;
509
- int c_cnt;
627
+ Comparator **comparators;
628
+ int c_cnt;
629
+ Sort *sort;
510
630
  } Sorter;
511
631
 
512
632
  Comparator *sorter_get_comparator(SortField *sf, IndexReader *ir)
513
633
  {
514
- void *index = NULL;
634
+ void *index = NULL;
515
635
 
516
- if (sf->type > SORT_TYPE_DOC) {
517
- index = field_cache_get_index(ir, sf);
518
- }
519
- return comparator_create(index, sf->reverse, sf->compare);
636
+ if (sf->type > SORT_TYPE_DOC) {
637
+ index = field_cache_get_index(ir, sf);
638
+ }
639
+ return comparator_new(index, sf->reverse, sf->compare);
520
640
  }
521
641
 
522
- void sorter_destroy(void *p)
642
+ void sorter_destroy(Sorter *self)
523
643
  {
524
- int i;
525
- Sorter *self = (Sorter *)p;
644
+ int i;
526
645
 
527
- for (i = 0; i < self->c_cnt; i++) {
528
- free(self->comparators[i]);
529
- }
530
- free(self->comparators);
531
- free(self);
646
+ for (i = 0; i < self->c_cnt; i++) {
647
+ free(self->comparators[i]);
648
+ }
649
+ free(self->comparators);
650
+ free(self);
532
651
  }
533
652
 
534
- Sorter *sorter_create(int size)
653
+ Sorter *sorter_new(Sort *sort)
535
654
  {
536
- Sorter *self = ALLOC(Sorter);
537
- self->c_cnt = size;
538
- self->comparators = ALLOC_N(Comparator *, size);
539
- ZEROSET(self->comparators, Comparator *, size);
540
- return self;
655
+ Sorter *self = ALLOC(Sorter);
656
+ self->c_cnt = sort->size;
657
+ self->comparators = ALLOC_AND_ZERO_N(Comparator *, self->c_cnt);
658
+ self->sort = sort;
659
+ return self;
541
660
  }
542
661
 
543
662
  /***************************************************************************
544
663
  * FieldSortedHitQueue
545
664
  ***************************************************************************/
546
665
 
547
- bool fshq_less_than(void *hit1, void *hit2)
666
+ bool fshq_less_than(const void *hit1, const void *hit2)
548
667
  {
549
- int cmp = 0;
550
- printf("Whoops, shouldn't call this.\n");
551
- if (cmp != 0) {
552
- return cmp;
553
- } else {
554
- return ((Hit *)hit1)->score < ((Hit *)hit2)->score;
555
- }
668
+ int cmp = 0;
669
+ printf("Whoops, shouldn't call this.\n");
670
+ if (cmp != 0) {
671
+ return cmp;
672
+ } else {
673
+ return ((Hit *)hit1)->score < ((Hit *)hit2)->score;
674
+ }
556
675
  }
557
676
 
558
- inline bool fshq_lt(Hit *sorter_ptr, Hit *hit1, Hit *hit2)
677
+ inline bool fshq_lt(Sorter *sorter, Hit *hit1, Hit *hit2)
559
678
  {
560
- Sorter *sorter = (Sorter *)sorter_ptr;
561
- Comparator *comp;
562
- int diff = 0, i;
563
- for (i = 0; i < sorter->c_cnt && diff == 0; i++) {
564
- comp = sorter->comparators[i];
565
- if (comp->reverse) {
566
- diff = comp->compare(comp->index, hit2, hit1);
567
- } else {
568
- diff = comp->compare(comp->index, hit1, hit2);
679
+ Comparator *comp;
680
+ int diff = 0, i;
681
+ for (i = 0; i < sorter->c_cnt && diff == 0; i++) {
682
+ comp = sorter->comparators[i];
683
+ if (comp->reverse) {
684
+ diff = comp->compare(comp->index, hit2, hit1);
685
+ } else {
686
+ diff = comp->compare(comp->index, hit1, hit2);
687
+ }
569
688
  }
570
- }
571
689
 
572
- if (diff != 0) {
573
- return diff > 0;
574
- } else {
575
- return hit1->doc > hit2->doc;
576
- }
690
+ if (diff != 0) {
691
+ return diff > 0;
692
+ } else {
693
+ return hit1->doc > hit2->doc;
694
+ }
577
695
  }
578
696
 
579
697
  void fshq_pq_down(PriorityQueue *pq)
580
698
  {
581
- register int i = 1;
582
- register int j = 2; //i << 1;
583
- register int k = 3; //j + 1;
584
- Hit **heap = (Hit **)pq->heap;
585
- Hit *node = heap[i]; // save top node
699
+ register int i = 1;
700
+ register int j = 2; /* i << 1; */
701
+ register int k = 3; /* j + 1; */
702
+ Hit **heap = (Hit **)pq->heap;
703
+ Hit *node = heap[i]; /* save top node */
704
+ Sorter *sorter = (Sorter *)heap[0];
586
705
 
587
- if ((k <= pq->count) && fshq_lt(heap[0], heap[k], heap[j]))
588
- j = k;
706
+ if ((k <= pq->size) && fshq_lt(sorter, heap[k], heap[j])) {
707
+ j = k;
708
+ }
589
709
 
590
- while ((j <= pq->count) && fshq_lt(heap[0], heap[j], node)) {
591
- heap[i] = heap[j]; // shift up child
592
- i = j;
593
- j = i << 1;
594
- k = j + 1;
595
- if ((k <= pq->count) && fshq_lt(heap[0], heap[k], heap[j]))
596
- j = k;
597
- }
598
- heap[i] = node;
710
+ while ((j <= pq->size) && fshq_lt(sorter, heap[j], node)) {
711
+ heap[i] = heap[j]; /* shift up child */
712
+ i = j;
713
+ j = i << 1;
714
+ k = j + 1;
715
+ if ((k <= pq->size) && fshq_lt(sorter, heap[k], heap[j])) {
716
+ j = k;
717
+ }
718
+ }
719
+ heap[i] = node;
599
720
  }
600
721
 
601
722
  Hit *fshq_pq_pop(PriorityQueue *pq)
602
723
  {
603
- if (pq->count > 0) {
604
- Hit *result = (Hit *)pq->heap[1]; // save first value
605
- pq->heap[1] = pq->heap[pq->count]; // move last to first
606
- pq->heap[pq->count] = NULL;
607
- pq->count--;
608
- fshq_pq_down(pq); // adjust heap
609
- return result;
610
- } else {
611
- return NULL;
612
- }
724
+ if (pq->size > 0) {
725
+ Hit *hit = (Hit *)pq->heap[1]; /* save first value */
726
+ pq->heap[1] = pq->heap[pq->size]; /* move last to first */
727
+ pq->heap[pq->size] = NULL;
728
+ pq->size--;
729
+ fshq_pq_down(pq); /* adjust heap */
730
+ return hit;
731
+ } else {
732
+ return NULL;
733
+ }
613
734
  }
614
735
 
615
736
  inline void fshq_pq_up(PriorityQueue *pq)
616
737
  {
617
- Hit **heap = (Hit **)pq->heap;
618
- Hit *node;
619
- int i = pq->count;
620
- int j = i >> 1;
621
- node = heap[i];
622
-
623
- while ((j > 0) && fshq_lt(heap[0], node, heap[j])) {
624
- heap[i] = heap[j];
625
- i = j;
626
- j = j >> 1;
627
- }
628
- heap[i] = node;
738
+ Hit **heap = (Hit **)pq->heap;
739
+ Hit *node;
740
+ int i = pq->size;
741
+ int j = i >> 1;
742
+ Sorter *sorter = (Sorter *)heap[0];
743
+ node = heap[i];
744
+
745
+ while ((j > 0) && fshq_lt(sorter, node, heap[j])) {
746
+ heap[i] = heap[j];
747
+ i = j;
748
+ j = j >> 1;
749
+ }
750
+ heap[i] = node;
629
751
  }
630
752
 
631
753
  void fshq_pq_insert(PriorityQueue *pq, Hit *hit)
632
754
  {
633
- if (pq->count < pq->size) {
634
- Hit *new_hit = ALLOC(Hit);
635
- memcpy(new_hit, hit, sizeof(Hit));
636
- pq->count++;
637
- pq->heap[pq->count] = new_hit;
638
- fshq_pq_up(pq);
639
- } else if (pq->count > 0 &&
640
- fshq_lt((Hit *)pq->heap[0], (Hit *)pq->heap[1], hit)) {
641
- memcpy(pq->heap[1], hit, sizeof(Hit));
642
- fshq_pq_down(pq);
643
- }
755
+ if (pq->size < pq->capa) {
756
+ Hit *new_hit = ALLOC(Hit);
757
+ memcpy(new_hit, hit, sizeof(Hit));
758
+ pq->size++;
759
+ if (pq->size >= pq->mem_capa) {
760
+ pq->mem_capa <<= 1;
761
+ REALLOC_N(pq->heap, void *, pq->mem_capa);
762
+ }
763
+ pq->heap[pq->size] = new_hit;
764
+ fshq_pq_up(pq);
765
+ } else if (pq->size > 0
766
+ && fshq_lt((Sorter *)pq->heap[0], (Hit *)pq->heap[1], hit)) {
767
+ memcpy(pq->heap[1], hit, sizeof(Hit));
768
+ fshq_pq_down(pq);
769
+ }
644
770
  }
645
771
 
646
772
  void fshq_pq_destroy(PriorityQueue *self)
647
773
  {
648
- sorter_destroy(self->heap[0]);
649
- pq_destroy(self);
774
+ sorter_destroy(self->heap[0]);
775
+ pq_destroy(self);
776
+ }
777
+
778
+ PriorityQueue *fshq_pq_new(int size, Sort *sort, IndexReader *ir)
779
+ {
780
+ PriorityQueue *self = pq_new(size, &fshq_less_than, &free);
781
+ int i;
782
+ Sorter *sorter = sorter_new(sort);
783
+ SortField *sf;
784
+
785
+ for (i = 0; i < sort->size; i++) {
786
+ sf = sort->sort_fields[i];
787
+ sorter->comparators[i] = sorter_get_comparator(sf, ir);
788
+ }
789
+ self->heap[0] = sorter;
790
+
791
+ return self;
792
+ }
793
+
794
+ Hit *fshq_pq_pop_fd(PriorityQueue *pq)
795
+ {
796
+ if (pq->size <= 0) {
797
+ return NULL;
798
+ }
799
+ else {
800
+ int j;
801
+ Sorter *sorter = (Sorter *)pq->heap[0];
802
+ const int cmp_cnt = sorter->c_cnt;
803
+ SortField **sort_fields = sorter->sort->sort_fields;
804
+ Hit *hit = (Hit *)pq->heap[1]; /* save first value */
805
+ FieldDoc *field_doc;
806
+ Comparable *comparables;
807
+ Comparator **comparators = sorter->comparators;
808
+ pq->heap[1] = pq->heap[pq->size]; /* move last to first */
809
+ pq->heap[pq->size] = NULL;
810
+ pq->size--;
811
+ fshq_pq_down(pq); /* adjust heap */
812
+
813
+ field_doc = (FieldDoc *)emalloc(sizeof(FieldDoc)
814
+ + sizeof(Comparable)*cmp_cnt);
815
+ comparables = field_doc->comparables;
816
+ memcpy(field_doc, hit, sizeof(Hit));
817
+ field_doc->size = cmp_cnt;
818
+
819
+ for (j = 0; j < cmp_cnt; j++) {
820
+ SortField *sf = sort_fields[j];
821
+ Comparator *comparator = comparators[j];
822
+ sf->get_val(comparator->index, hit, &(comparables[j]));
823
+ comparables[j].type = sf->type;
824
+ comparables[j].reverse = comparator->reverse;
825
+ }
826
+ free(hit);
827
+ return (Hit *)field_doc;
828
+ }
650
829
  }
651
830
 
652
- PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir)
831
+ /***************************************************************************
832
+ * FieldDoc
833
+ ***************************************************************************/
834
+
835
+ void fd_destroy(FieldDoc *fd)
653
836
  {
654
- PriorityQueue *self = pq_create(size, &fshq_less_than);
655
- int i;
656
- Sorter *sorter = sorter_create(sort->sf_cnt);
657
- SortField *sf;
837
+ free(fd);
838
+ }
658
839
 
659
- for (i = 0; i < sort->sf_cnt; i++) {
660
- sf = sort->sort_fields[i];
661
- sorter->comparators[i] = sorter_get_comparator(sf, ir);
662
- }
663
- self->heap[0] = sorter;
664
-
665
- return self;
840
+ /***************************************************************************
841
+ * FieldDocSortedHitQueue
842
+ ***************************************************************************/
843
+
844
+ bool fdshq_lt(FieldDoc *fd1, FieldDoc *fd2)
845
+ {
846
+ int c = 0, i;
847
+ Comparable *cmps1 = fd1->comparables;
848
+ Comparable *cmps2 = fd2->comparables;
849
+
850
+ for (i = 0; i < fd1->size && c == 0; i++) {
851
+ int type = cmps1[i].type;
852
+ switch (type) {
853
+ case SORT_TYPE_SCORE:
854
+ if (cmps1[i].val.f < cmps2[i].val.f) c = 1;
855
+ if (cmps1[i].val.f > cmps2[i].val.f) c = -1;
856
+ break;
857
+ case SORT_TYPE_FLOAT:
858
+ if (cmps1[i].val.f > cmps2[i].val.f) c = 1;
859
+ if (cmps1[i].val.f < cmps2[i].val.f) c = -1;
860
+ break;
861
+ case SORT_TYPE_DOC:
862
+ if (fd1->hit.doc > fd2->hit.doc) c = 1;
863
+ if (fd1->hit.doc < fd2->hit.doc) c = -1;
864
+ break;
865
+ case SORT_TYPE_INTEGER:
866
+ if (cmps1[i].val.i > cmps2[i].val.i) c = 1;
867
+ if (cmps1[i].val.i < cmps2[i].val.i) c = -1;
868
+ break;
869
+ case SORT_TYPE_BYTE:
870
+ if (cmps1[i].val.i > cmps2[i].val.i) c = 1;
871
+ if (cmps1[i].val.i < cmps2[i].val.i) c = -1;
872
+ break;
873
+ case SORT_TYPE_STRING:
874
+ do {
875
+ char *s1 = cmps1[i].val.s;
876
+ char *s2 = cmps2[i].val.s;
877
+ if (s1 == NULL) c = s2 ? -1 : 0;
878
+ else if (s2 == NULL) c = 1;
879
+ #ifdef POSH_OS_WIN32
880
+ else c = strcmp(s1, s2);
881
+ #else
882
+ else c = strcoll(s1, s2);
883
+ #endif
884
+ } while (0);
885
+ break;
886
+ default:
887
+ RAISE(ERROR, "Unknown sort type: %d.", type);
888
+ break;
889
+ }
890
+ if (cmps1[i].reverse) {
891
+ c = -c;
892
+ }
893
+ }
894
+ if (c == 0) {
895
+ return fd1->hit.doc > fd2->hit.doc;
896
+ }
897
+ else {
898
+ return c > 0;
899
+ }
666
900
  }
667
901
 
668
902
  /***************************************************************************
@@ -671,75 +905,78 @@ PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir)
671
905
  *
672
906
  ***************************************************************************/
673
907
 
674
- Sort *sort_create()
908
+ #define SORT_INIT_SIZE 4
909
+
910
+ Sort *sort_new()
675
911
  {
676
- Sort *self = ALLOC(Sort);
677
- self->sf_cnt = 0;
678
- self->sf_capa = ARRAY_INIT_SIZE;
679
- self->sort_fields = ALLOC_N(SortField *, ARRAY_INIT_SIZE);
680
- self->destroy_all = true;
912
+ Sort *self = ALLOC(Sort);
913
+ self->size = 0;
914
+ self->capa = SORT_INIT_SIZE;
915
+ self->sort_fields = ALLOC_N(SortField *, SORT_INIT_SIZE);
916
+ self->destroy_all = true;
917
+ self->start = 0;
681
918
 
682
- return self;
919
+ return self;
683
920
  }
684
921
 
685
922
  void sort_clear(Sort *self)
686
923
  {
687
- int i;
688
- if (self->destroy_all) {
689
- for (i = 0; i < self->sf_cnt; i++) {
690
- sort_field_destroy(self->sort_fields[i]);
924
+ int i;
925
+ if (self->destroy_all) {
926
+ for (i = 0; i < self->size; i++) {
927
+ sort_field_destroy(self->sort_fields[i]);
928
+ }
691
929
  }
692
- }
693
- self->sf_cnt = 0;
930
+ self->size = 0;
694
931
  }
695
932
 
696
933
  void sort_destroy(void *p)
697
934
  {
698
- Sort *self = (Sort *)p;
699
- sort_clear(self);
700
- free(self->sort_fields);
701
- free(self);
935
+ Sort *self = (Sort *)p;
936
+ sort_clear(self);
937
+ free(self->sort_fields);
938
+ free(self);
702
939
  }
703
940
 
704
941
  void sort_add_sort_field(Sort *self, SortField *sf)
705
942
  {
706
- if (self->sf_cnt == self->sf_capa) {
707
- self->sf_capa *= 2;
708
- REALLOC_N(self->sort_fields, SortField *, self->sf_capa);
709
- }
943
+ if (self->size == self->capa) {
944
+ self->capa <<= 1;
945
+ REALLOC_N(self->sort_fields, SortField *, self->capa);
946
+ }
710
947
 
711
- self->sort_fields[self->sf_cnt] = sf;
712
- self->sf_cnt++;
948
+ self->sort_fields[self->size] = sf;
949
+ self->size++;
713
950
  }
714
951
 
715
952
  char *sort_to_s(Sort *self)
716
953
  {
717
- int i, len = 20;
718
- char *s;
719
- char *str;
720
- char **sf_strs = ALLOC_N(char *, self->sf_cnt);
721
-
722
- for (i = 0; i < self->sf_cnt; i++) {
723
- sf_strs[i] = s = sort_field_to_s(self->sort_fields[i]);
724
- len += (int)strlen(s) + 2;
725
- }
726
-
727
- str = ALLOC_N(char, len);
728
- s = "Sort[";
729
- len = (int)strlen(s);
730
- memcpy(str, s, len);
731
-
732
- s = str + len;
733
- for (i = 0; i < self->sf_cnt; i++) {
734
- sprintf(s, "%s, ", sf_strs[i]);
735
- s += (int)strlen(s);
736
- free(sf_strs[i]);
737
- }
738
- free(sf_strs);
739
-
740
- if (self->sf_cnt > 0) {
741
- s -= 2;
742
- }
743
- sprintf(s, "]");
744
- return str;
954
+ int i, len = 20;
955
+ char *s;
956
+ char *str;
957
+ char **sf_strs = ALLOC_N(char *, self->size);
958
+
959
+ for (i = 0; i < self->size; i++) {
960
+ sf_strs[i] = s = sort_field_to_s(self->sort_fields[i]);
961
+ len += (int)strlen(s) + 2;
962
+ }
963
+
964
+ str = ALLOC_N(char, len);
965
+ s = "Sort[";
966
+ len = (int)strlen(s);
967
+ memcpy(str, s, len);
968
+
969
+ s = str + len;
970
+ for (i = 0; i < self->size; i++) {
971
+ sprintf(s, "%s, ", sf_strs[i]);
972
+ s += (int)strlen(s);
973
+ free(sf_strs[i]);
974
+ }
975
+ free(sf_strs);
976
+
977
+ if (self->size > 0) {
978
+ s -= 2;
979
+ }
980
+ sprintf(s, "]");
981
+ return str;
745
982
  }