ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/similarity.c CHANGED
@@ -1,172 +1,150 @@
1
- #include <search.h>
2
- #include <global.h>
1
+ #include "similarity.h"
2
+ #include "search.h"
3
+ #include "array.h"
4
+ #include "helper.h"
3
5
  #include <math.h>
6
+ #include <stdlib.h>
7
+ #include <string.h>
4
8
 
5
- static int low_bit = 0, low_mid_bit = 0, high_mid_bit = 0, high_bit = 0;
6
- static void
7
- setup_endian()
8
- {
9
- static int init = 0;
10
- char *p;
11
-
12
- if (init) return;
13
- init = 1;
14
- p = (char*)&init;
15
-
16
- if (p[0]) {
17
- low_bit = 0;
18
- low_mid_bit = 1;
19
- high_mid_bit = 2;
20
- high_bit = 3;
21
- } else {
22
- low_bit = 3;
23
- low_mid_bit = 2;
24
- high_mid_bit = 1;
25
- high_bit = 0;
26
- }
27
- }
9
+ /****************************************************************************
10
+ *
11
+ * Term
12
+ *
13
+ ****************************************************************************/
28
14
 
29
- float byte_to_float(uchar b)
15
+ Term *term_new(const char *field, const char *text)
30
16
  {
31
- char flt[4];
32
- if (b == 0) {
33
- return 0.0;
34
- } else {
35
- int mantissa = b & 0x07; // 0x07 = 7 = 0b00000111
36
- int exponent = (b >> 3) & 0x1F; // 0x1f = 31 = 0b00011111
37
-
38
- if (!low_mid_bit) setup_endian();
39
- flt[low_bit] = flt[low_mid_bit] = 0;
40
- flt[high_mid_bit] = mantissa << 5;
41
- flt[high_bit] = exponent + 48;
42
- return *((float *)flt);
43
- }
17
+ Term *t = ALLOC(Term);
18
+ t->field = estrdup(field);
19
+ t->text = estrdup(text);
20
+ return t;
44
21
  }
45
22
 
46
- uchar float_to_byte(float f)
23
+ void term_destroy(Term *self)
47
24
  {
48
- if (f <= 0.0) {
49
- return 0;
50
- } else {
51
- char *bits = (char *)&f;
52
- int mantissa = (bits[high_mid_bit] & 0xEf) >> 5;
53
- int exponent = (bits[high_bit] - 48);
54
-
55
- if (exponent > 0x1f) {
56
- exponent = 0x1f; // 0x1f = 31 = 0b00011111
57
- mantissa = 0x07; // 0x07 = 7 = 0b00000111
58
- }
25
+ free(self->text);
26
+ free(self->field);
27
+ free(self);
28
+ }
59
29
 
60
- if (exponent < 0) {
61
- exponent = 0;
62
- mantissa = 1;
63
- }
30
+ int term_eq(const void *t1, const void *t2)
31
+ {
32
+ return (strcmp(((Term *)t1)->text, ((Term *)t2)->text)) == 0 &&
33
+ (strcmp(((Term *)t1)->field, ((Term *)t2)->field) == 0);
34
+ }
64
35
 
65
- return ((exponent<<3) | mantissa);
66
- }
36
+ ulong term_hash(const void *t)
37
+ {
38
+ return str_hash(((Term *)t)->text) * str_hash(((Term *)t)->field);
67
39
  }
68
40
 
69
- float simdef_length_norm(Similarity *s, char *field, int num_terms)
41
+ /****************************************************************************
42
+ *
43
+ * Similarity
44
+ *
45
+ ****************************************************************************/
46
+
47
+ float simdef_length_norm(Similarity *s, const char *field, int num_terms)
70
48
  {
71
- return (float)(1.0 / sqrt(num_terms));
49
+ (void)s;
50
+ (void)field;
51
+ return (float)(1.0 / sqrt(num_terms));
72
52
  }
73
53
 
74
54
  float simdef_query_norm(struct Similarity *s, float sum_of_squared_weights)
75
55
  {
76
- return (float)(1.0 / sqrt(sum_of_squared_weights));
56
+ (void)s;
57
+ return (float)(1.0 / sqrt(sum_of_squared_weights));
77
58
  }
78
59
 
79
60
  float simdef_tf(struct Similarity *s, float freq)
80
61
  {
81
- return (float)sqrt(freq);
62
+ (void)s;
63
+ return (float)sqrt(freq);
82
64
  }
83
65
 
84
66
  float simdef_sloppy_freq(struct Similarity *s, int distance)
85
67
  {
86
- return (float)(1.0 / (double)(distance + 1));
68
+ (void)s;
69
+ return (float)(1.0 / (double)(distance + 1));
87
70
  }
88
71
 
89
- float simdef_idf_term(struct Similarity *s, Term *term, Searcher *searcher)
72
+ float simdef_idf_term(struct Similarity *s, const char *field, char *term,
73
+ Searcher *searcher)
90
74
  {
91
- return s->idf(s, searcher->doc_freq(searcher, term), searcher->max_doc(searcher));
75
+ return s->idf(s, searcher->doc_freq(searcher, field, term),
76
+ searcher->max_doc(searcher));
92
77
  }
93
78
 
94
- float simdef_idf_phrase(struct Similarity *s, Term **terms, int tcnt, Searcher *searcher)
79
+ float simdef_idf_phrase(struct Similarity *s, const char *field,
80
+ PhrasePosition *positions,
81
+ int pp_cnt, Searcher *searcher)
95
82
  {
96
- float idf = 0.0;
97
- int i;
98
- for (i = 0; i < tcnt; i++) {
99
- idf += s->idf_term(s, terms[i], searcher);
100
- }
101
- return idf;
83
+ float idf = 0.0;
84
+ int i, j;
85
+ for (i = 0; i < pp_cnt; i++) {
86
+ char **terms = positions[i].terms;
87
+ for (j = ary_size(terms) - 1; j >= 0; j--) {
88
+ idf += sim_idf_term(s, field, terms[j], searcher);
89
+ }
90
+ }
91
+ return idf;
102
92
  }
103
93
 
104
94
  float simdef_idf(struct Similarity *s, int doc_freq, int num_docs)
105
95
  {
106
- return (float)(log((float)num_docs/(float)(doc_freq+1)) + 1.0);
96
+ (void)s;
97
+ return (float)(log((float)num_docs/(float)(doc_freq+1)) + 1.0);
107
98
  }
108
99
 
109
100
  float simdef_coord(struct Similarity *s, int overlap, int max_overlap)
110
101
  {
111
- return (float)((double)overlap / (double)max_overlap);
102
+ (void)s;
103
+ return (float)((double)overlap / (double)max_overlap);
112
104
  }
113
105
 
114
106
  float simdef_decode_norm(struct Similarity *s, uchar b)
115
107
  {
116
- return s->norm_table[b];
108
+ return s->norm_table[b];
117
109
  }
118
110
 
119
111
  uchar simdef_encode_norm(struct Similarity *s, float f)
120
112
  {
121
- return float_to_byte(f);
113
+ (void)s;
114
+ return float2byte(f);
122
115
  }
123
116
 
124
117
  void simdef_destroy(Similarity *s)
125
118
  {
126
- /* nothing to do here */
119
+ (void)s;
120
+ /* nothing to do here */
127
121
  }
128
122
 
129
- #ifdef WIN32
130
123
  static Similarity default_similarity = {
131
- NULL,
132
- {0},
133
- &simdef_length_norm,
134
- &simdef_query_norm,
135
- &simdef_tf,
136
- &simdef_sloppy_freq,
137
- &simdef_idf_term,
138
- &simdef_idf_phrase,
139
- &simdef_idf,
140
- &simdef_coord,
141
- &simdef_decode_norm,
142
- &simdef_encode_norm,
143
- &simdef_destroy
124
+ NULL,
125
+ {0},
126
+ &simdef_length_norm,
127
+ &simdef_query_norm,
128
+ &simdef_tf,
129
+ &simdef_sloppy_freq,
130
+ &simdef_idf_term,
131
+ &simdef_idf_phrase,
132
+ &simdef_idf,
133
+ &simdef_coord,
134
+ &simdef_decode_norm,
135
+ &simdef_encode_norm,
136
+ &simdef_destroy
144
137
  };
145
- #else
146
- static Similarity default_similarity = {
147
- data:NULL,
148
- length_norm:&simdef_length_norm,
149
- query_norm:&simdef_query_norm,
150
- tf:&simdef_tf,
151
- sloppy_freq:&simdef_sloppy_freq,
152
- idf_term:&simdef_idf_term,
153
- idf_phrase:&simdef_idf_phrase,
154
- idf:&simdef_idf,
155
- coord:&simdef_coord,
156
- decode_norm:&simdef_decode_norm,
157
- encode_norm:&simdef_encode_norm,
158
- destroy:&simdef_destroy
159
- };
160
- #endif
161
138
 
162
139
  Similarity *sim_create_default()
163
140
  {
164
- int i;
165
- if (!default_similarity.data) {
166
- for (i = 0; i < 256; i++)
167
- default_similarity.norm_table[i] = byte_to_float(i);
141
+ int i;
142
+ if (!default_similarity.data) {
143
+ for (i = 0; i < 256; i++) {
144
+ default_similarity.norm_table[i] = byte2float((unsigned char)i);
145
+ }
168
146
 
169
- default_similarity.data = &default_similarity;
170
- }
171
- return &default_similarity;
147
+ default_similarity.data = &default_similarity;
148
+ }
149
+ return &default_similarity;
172
150
  }
data/ext/similarity.h CHANGED
@@ -9,18 +9,31 @@ typedef struct Searcher Searcher;
9
9
  *
10
10
  ****************************************************************************/
11
11
 
12
- typedef struct Term {
13
- char *field;
14
- char *text;
12
+ #define term_set_new() \
13
+ hs_new((hash_ft)&term_hash, (eq_ft)&term_eq, (free_ft)&term_destroy)
14
+
15
+ typedef struct Term
16
+ {
17
+ char *field;
18
+ char *text;
15
19
  } Term;
16
20
 
17
- Term *term_clone(Term *term);
18
- Term *term_create(const char *field, char *text);
19
- void term_destroy(Term *self);
20
- int term_cmp(void *t1, void *t2);
21
- int term_eq(const void *t1, const void *t2);
22
- unsigned int term_hash(const void *t);
23
- char *term_to_s(Term *term);
21
+ extern Term *term_new(const char *field, const char *text);
22
+ extern void term_destroy(Term *self);
23
+ extern int term_eq(const void *t1, const void *t2);
24
+ extern unsigned long term_hash(const void *t);
25
+
26
+ /***************************************************************************
27
+ *
28
+ * PhrasePosition
29
+ *
30
+ ***************************************************************************/
31
+
32
+ typedef struct PhrasePosition
33
+ {
34
+ int pos;
35
+ char **terms;
36
+ } PhrasePosition;
24
37
 
25
38
  /***************************************************************************
26
39
  *
@@ -30,38 +43,40 @@ char *term_to_s(Term *term);
30
43
 
31
44
  typedef struct Similarity Similarity;
32
45
 
33
- struct Similarity {
34
- void *data;
35
- float norm_table[256];
36
- float (*length_norm)(Similarity *self, char *field, int num_terms);
37
- float (*query_norm)(Similarity *self, float sum_of_squared_weights);
38
- float (*tf)(Similarity *self, float freq);
39
- float (*sloppy_freq)(Similarity *self, int distance);
40
- float (*idf_term)(Similarity *self, Term *term, Searcher *searcher);
41
- float (*idf_phrase)(Similarity *self, Term **terms,
42
- int tcnt, Searcher *searcher);
43
- float (*idf)(Similarity *self, int doc_freq, int num_docs);
44
- float (*coord)(Similarity *self, int overlap, int max_overlap);
45
- float (*decode_norm)(Similarity *self, uchar b);
46
- uchar (*encode_norm)(Similarity *self, float f);
47
- void (*destroy)(Similarity *self);
46
+ struct Similarity
47
+ {
48
+ void *data;
49
+ float norm_table[256];
50
+ float (*length_norm)(Similarity *self, const char *field, int num_terms);
51
+ float (*query_norm)(Similarity *self, float sum_of_squared_weights);
52
+ float (*tf)(Similarity *self, float freq);
53
+ float (*sloppy_freq)(Similarity *self, int distance);
54
+ float (*idf_term)(Similarity *self, const char *field, char *term,
55
+ Searcher *searcher);
56
+ float (*idf_phrase)(Similarity *self, const char *field,
57
+ PhrasePosition *positions,
58
+ int pp_cnt, Searcher *searcher);
59
+ float (*idf)(Similarity *self, int doc_freq, int num_docs);
60
+ float (*coord)(Similarity *self, int overlap, int max_overlap);
61
+ float (*decode_norm)(Similarity *self, unsigned char b);
62
+ unsigned char (*encode_norm)(Similarity *self, float f);
63
+ void (*destroy)(Similarity *self);
48
64
  };
49
65
 
50
66
  #define sim_length_norm(msim, field, num_terms) msim->length_norm(msim, field, num_terms)
51
67
  #define sim_query_norm(msim, sosw) msim->query_norm(msim, sosw)
52
68
  #define sim_tf(msim, freq) msim->tf(msim, freq)
53
69
  #define sim_sloppy_freq(msim, distance) msim->sloppy_freq(msim, distance)
54
- #define sim_idf_term(msim, term, searcher) msim->idf_term(msim, term, searcher)
55
- #define sim_idf_phrase(msim, terms, tcnt, searcher) msim->idf_phrase(msim, terms, tcnt, searcher)
70
+ #define sim_idf_term(msim, field, term, searcher)\
71
+ msim->idf_term(msim, field, term, searcher)
72
+ #define sim_idf_phrase(msim, field, positions, pos_cnt, searcher)\
73
+ msim->idf_phrase(msim, field, positions, pos_cnt, searcher)
56
74
  #define sim_idf(msim, doc_freq, num_docs) msim->idf(msim, doc_freq, num_docs)
57
75
  #define sim_coord(msim, overlap, max_overlap) msim->coord(msim, overlap, max_overlap)
58
76
  #define sim_decode_norm(msim, b) msim->decode_norm(msim, b)
59
77
  #define sim_encode_norm(msim, f) msim->encode_norm(msim, f)
60
78
  #define sim_destroy(msim) msim->destroy(msim)
61
79
 
62
- float byte_to_float(uchar b);
63
- uchar float_to_byte(float f);
64
-
65
80
  Similarity *sim_create_default();
66
81
 
67
82
  #endif
data/ext/sort.c CHANGED
@@ -2,94 +2,96 @@
2
2
  #include "search.h"
3
3
  #include "index.h"
4
4
 
5
- static char * const NO_TERM_ERROR_MSG = "no terms in field to sort by";
6
-
7
5
  /***************************************************************************
8
6
  *
9
7
  * SortField
10
8
  *
11
9
  ***************************************************************************/
12
10
 
13
- unsigned int sort_field_hash(const void *p)
11
+ ulong sort_field_hash(const void *p)
14
12
  {
15
- SortField *self = (SortField *)p;
16
- return str_hash(self->field) ^ (self->type*37);
13
+ SortField *self = (SortField *)p;
14
+ return str_hash(self->field) ^ (self->type*37);
17
15
  }
18
16
 
19
17
  int sort_field_eq(const void *p1, const void *p2)
20
18
  {
21
- SortField *key1 = (SortField *)p1;
22
- SortField *key2 = (SortField *)p2;
23
- int equal = (strcmp(key1->field, key2->field) == 0) && key1->type == key2->type;
24
- /*
25
- * TODO: The could probable be done more cleanly.
26
- * If the sort field is an auto field then it was evaluated before it was
27
- * entered into the cache so we need to pass the compare function back to
28
- * the new sort field.
29
- */
30
- if (equal && (key1->type == SORT_TYPE_AUTO)) {
31
- key2->compare = key1->compare;
32
- }
33
- return equal;
34
- }
35
-
36
- SortField *sort_field_clone(SortField *self)
37
- {
38
- SortField *clone = ALLOC(SortField);
39
- memcpy(clone, self, sizeof(SortField));
40
- mutex_init(&clone->mutex, NULL);
41
- clone->field = estrdup(self->field);
42
- return clone;
43
- }
44
-
45
- SortField *sort_field_alloc(char *field, int type, bool reverse)
46
- {
47
- SortField *self = ALLOC(SortField);
48
- mutex_init(&self->mutex, NULL);
49
- self->field = field ? estrdup(field) : NULL;
50
- self->type = type;
51
- self->reverse = reverse;
52
- self->index = NULL;
53
- self->destroy_index = &free;
54
- self->compare = NULL;
55
- return self;
56
- }
57
-
58
- SortField *sort_field_create(char *field, int type, bool reverse)
59
- {
60
- SortField *sf = NULL;
61
- switch (type) {
62
- case SORT_TYPE_SCORE:
63
- sf = sort_field_score_create(reverse);
64
- break;
65
- case SORT_TYPE_DOC:
66
- sf = sort_field_doc_create(reverse);
67
- break;
68
- case SORT_TYPE_INTEGER:
69
- sf = sort_field_int_create(field, reverse);
70
- break;
71
- case SORT_TYPE_FLOAT:
72
- sf = sort_field_float_create(field, reverse);
73
- break;
74
- case SORT_TYPE_STRING:
75
- sf = sort_field_string_create(field, reverse);
76
- break;
77
- case SORT_TYPE_AUTO:
78
- sf = sort_field_auto_create(field, reverse);
79
- break;
80
- }
81
- return sf;
19
+ SortField *key1 = (SortField *)p1;
20
+ SortField *key2 = (SortField *)p2;
21
+ return (strcmp(key1->field, key2->field) == 0)
22
+ && key1->type == key2->type;
23
+ }
24
+
25
+ static int sort_field_cache_eq(const void *p1, const void *p2)
26
+ {
27
+ SortField *key1 = (SortField *)p1;
28
+ SortField *key2 = (SortField *)p2;
29
+ int equal = (strcmp(key1->field, key2->field) == 0)
30
+ && key1->type == key2->type;
31
+
32
+ return equal;
33
+ }
34
+
35
+ static SortField *sort_field_clone(SortField *self)
36
+ {
37
+ SortField *clone = ALLOC(SortField);
38
+ memcpy(clone, self, sizeof(SortField));
39
+ mutex_init(&clone->mutex, NULL);
40
+ clone->field = estrdup(self->field);
41
+ return clone;
42
+ }
43
+
44
+ static SortField *sort_field_alloc(char *field, int type, bool reverse)
45
+ {
46
+ SortField *self = ALLOC(SortField);
47
+ mutex_init(&self->mutex, NULL);
48
+ self->field = field ? estrdup(field) : NULL;
49
+ self->type = type;
50
+ self->reverse = reverse;
51
+ self->index = NULL;
52
+ self->destroy_index = &free;
53
+ self->compare = NULL;
54
+ return self;
55
+ }
56
+
57
+ SortField *sort_field_new(char *field, enum SORT_TYPE type, bool reverse)
58
+ {
59
+ SortField *sf = NULL;
60
+ switch (type) {
61
+ case SORT_TYPE_SCORE:
62
+ sf = sort_field_score_new(reverse);
63
+ break;
64
+ case SORT_TYPE_DOC:
65
+ sf = sort_field_doc_new(reverse);
66
+ break;
67
+ case SORT_TYPE_BYTE:
68
+ sf = sort_field_byte_new(field, reverse);
69
+ break;
70
+ case SORT_TYPE_INTEGER:
71
+ sf = sort_field_int_new(field, reverse);
72
+ break;
73
+ case SORT_TYPE_FLOAT:
74
+ sf = sort_field_float_new(field, reverse);
75
+ break;
76
+ case SORT_TYPE_STRING:
77
+ sf = sort_field_string_new(field, reverse);
78
+ break;
79
+ case SORT_TYPE_AUTO:
80
+ sf = sort_field_auto_new(field, reverse);
81
+ break;
82
+ }
83
+ return sf;
82
84
  }
83
85
 
84
86
  void sort_field_destroy(void *p)
85
87
  {
86
- SortField *self = (SortField *)p;
87
- if (self->index) {
88
- self->destroy_index(self->index);
89
- }
90
- free(self->field);
91
- mutex_destroy(&self->mutex);
92
- free(p);
88
+ SortField *self = (SortField *)p;
89
+ if (self->index) {
90
+ self->destroy_index(self->index);
91
+ }
92
+ free(self->field);
93
+ mutex_destroy(&self->mutex);
94
+ free(p);
93
95
  }
94
96
 
95
97
  /*
@@ -97,210 +99,304 @@ void sort_field_destroy(void *p)
97
99
  */
98
100
  char *sort_field_to_s(SortField *self)
99
101
  {
100
- char *str;
101
- char *type = NULL;
102
- switch (self->type) {
103
- case SORT_TYPE_SCORE:
104
- type = "<SCORE>";
105
- break;
106
- case SORT_TYPE_DOC:
107
- type = "<DOC>";
108
- break;
109
- case SORT_TYPE_INTEGER:
110
- type = "<integer>";
111
- break;
112
- case SORT_TYPE_FLOAT:
113
- type = "<float>";
114
- break;
115
- case SORT_TYPE_STRING:
116
- type = "<string>";
117
- break;
118
- case SORT_TYPE_AUTO:
119
- type = "<auto>";
120
- break;
121
- }
122
- if (self->field) {
123
- str = ALLOC_N(char, 10 + strlen(self->field) + strlen(type));
124
- sprintf(str, "%s:%s%s", self->field, type, (self->reverse ? "!" : ""));
125
- } else {
126
- str = ALLOC_N(char, 10 + strlen(type));
127
- sprintf(str, "%s%s", type, (self->reverse ? "!" : ""));
128
- }
129
- return str;
102
+ char *str;
103
+ char *type = NULL;
104
+ switch (self->type) {
105
+ case SORT_TYPE_SCORE:
106
+ type = "<SCORE>";
107
+ break;
108
+ case SORT_TYPE_DOC:
109
+ type = "<DOC>";
110
+ break;
111
+ case SORT_TYPE_BYTE:
112
+ type = "<byte>";
113
+ break;
114
+ case SORT_TYPE_INTEGER:
115
+ type = "<integer>";
116
+ break;
117
+ case SORT_TYPE_FLOAT:
118
+ type = "<float>";
119
+ break;
120
+ case SORT_TYPE_STRING:
121
+ type = "<string>";
122
+ break;
123
+ case SORT_TYPE_AUTO:
124
+ type = "<auto>";
125
+ break;
126
+ }
127
+ if (self->field) {
128
+ str = ALLOC_N(char, 10 + strlen(self->field) + strlen(type));
129
+ sprintf(str, "%s:%s%s", self->field, type, (self->reverse ? "!" : ""));
130
+ } else {
131
+ str = ALLOC_N(char, 10 + strlen(type));
132
+ sprintf(str, "%s%s", type, (self->reverse ? "!" : ""));
133
+ }
134
+ return str;
130
135
  }
131
136
 
132
137
  /***************************************************************************
133
138
  * ScoreSortField
134
139
  ***************************************************************************/
135
140
 
141
+ void sf_score_get_val(void *index, Hit *hit, Comparable *comparable)
142
+ {
143
+ (void)index;
144
+ comparable->val.f = hit->score;
145
+ }
146
+
136
147
  int sf_score_compare(void *index_ptr, Hit *hit2, Hit *hit1)
137
148
  {
138
- float val1 = hit1->score;
139
- float val2 = hit2->score;
140
- if (val1 > val2) return 1;
141
- else if (val1 < val2) return -1;
142
- else return 0;
149
+ float val1 = hit1->score;
150
+ float val2 = hit2->score;
151
+ (void)index_ptr;
152
+
153
+ if (val1 > val2) return 1;
154
+ else if (val1 < val2) return -1;
155
+ else return 0;
143
156
  }
144
157
 
145
- SortField *sort_field_score_create(bool reverse)
158
+ SortField *sort_field_score_new(bool reverse)
146
159
  {
147
- SortField *self = sort_field_alloc(NULL, SORT_TYPE_SCORE, reverse);
148
- self->compare = &sf_score_compare;
149
- return self;
160
+ SortField *self = sort_field_alloc(NULL, SORT_TYPE_SCORE, reverse);
161
+ self->compare = &sf_score_compare;
162
+ self->get_val = &sf_score_get_val;
163
+ return self;
150
164
  }
151
165
 
152
- SortField SORT_FIELD_SCORE = {
153
- MUTEX_INITIALIZER,
154
- /* field */NULL,
155
- /* type */SORT_TYPE_SCORE,
156
- /* reverse */false,
157
- /* index */NULL,
158
- /* compare */&sf_score_compare,
159
- /* create_index */NULL,
160
- /* destroy_index */NULL,
161
- /* handle_term */NULL
166
+ const SortField SORT_FIELD_SCORE = {
167
+ MUTEX_INITIALIZER,
168
+ NULL, /* field */
169
+ SORT_TYPE_SCORE, /* type */
170
+ false, /* reverse */
171
+ NULL, /* index */
172
+ &sf_score_compare, /* compare */
173
+ &sf_score_get_val, /* get_val */
174
+ NULL, /* create_index */
175
+ NULL, /* destroy_index */
176
+ NULL, /* handle_term */
162
177
  };
163
178
 
164
- SortField SORT_FIELD_SCORE_REV = {
165
- MUTEX_INITIALIZER,
166
- /* field */NULL,
167
- /* type */SORT_TYPE_SCORE,
168
- /* reverse */true,
169
- /* index */NULL,
170
- /* compare */&sf_score_compare,
171
- /* create_index */NULL,
172
- /* destroy_index */NULL,
173
- /* handle_term */NULL
179
+ const SortField SORT_FIELD_SCORE_REV = {
180
+ MUTEX_INITIALIZER,
181
+ NULL, /* field */
182
+ SORT_TYPE_SCORE, /* type */
183
+ true, /* reverse */
184
+ NULL, /* index */
185
+ &sf_score_compare, /* compare */
186
+ &sf_score_get_val, /* get_val */
187
+ NULL, /* create_index */
188
+ NULL, /* destroy_index */
189
+ NULL, /* handle_term */
174
190
  };
175
191
 
176
192
  /**************************************************************************
177
193
  * DocSortField
178
194
  ***************************************************************************/
179
195
 
196
+ void sf_doc_get_val(void *index, Hit *hit, Comparable *comparable)
197
+ {
198
+ (void)index;
199
+ comparable->val.i = hit->doc;
200
+ }
201
+
180
202
  int sf_doc_compare(void *index_ptr, Hit *hit1, Hit *hit2)
181
203
  {
182
- int val1 = hit1->doc;
183
- int val2 = hit2->doc;
184
- if (val1 > val2) return 1;
185
- else if (val1 < val2) return -1;
186
- else return 0;
204
+ int val1 = hit1->doc;
205
+ int val2 = hit2->doc;
206
+ (void)index_ptr;
207
+
208
+ if (val1 > val2) return 1;
209
+ else if (val1 < val2) return -1;
210
+ else return 0;
187
211
  }
188
212
 
189
- SortField *sort_field_doc_create(bool reverse)
213
+ SortField *sort_field_doc_new(bool reverse)
190
214
  {
191
- SortField *self = sort_field_alloc(NULL, SORT_TYPE_DOC, reverse);
192
- self->compare = &sf_doc_compare;
193
- return self;
215
+ SortField *self = sort_field_alloc(NULL, SORT_TYPE_DOC, reverse);
216
+ self->compare = &sf_doc_compare;
217
+ self->get_val = &sf_doc_get_val;
218
+ return self;
194
219
  }
195
220
 
196
- SortField SORT_FIELD_DOC = {
197
- MUTEX_INITIALIZER,
198
- /* field */NULL,
199
- /* type */SORT_TYPE_DOC,
200
- /* reverse */false,
201
- /* index */NULL,
202
- /* compare */&sf_doc_compare,
203
- /* create_index */NULL,
204
- /* destroy_index */NULL,
205
- /* handle_term */NULL
221
+ const SortField SORT_FIELD_DOC = {
222
+ MUTEX_INITIALIZER,
223
+ NULL, /* field */
224
+ SORT_TYPE_DOC, /* type */
225
+ false, /* reverse */
226
+ NULL, /* index */
227
+ &sf_doc_compare, /* compare */
228
+ &sf_doc_get_val, /* get_val */
229
+ NULL, /* create_index */
230
+ NULL, /* destroy_index */
231
+ NULL, /* handle_term */
206
232
  };
207
233
 
208
- SortField SORT_FIELD_DOC_REV = {
209
- MUTEX_INITIALIZER,
210
- /* field */NULL,
211
- /* type */SORT_TYPE_DOC,
212
- /* reverse */true,
213
- /* index */NULL,
214
- /* compare */&sf_doc_compare,
215
- /* create_index */NULL,
216
- /* destroy_index */NULL,
217
- /* handle_term */NULL
234
+ const SortField SORT_FIELD_DOC_REV = {
235
+ MUTEX_INITIALIZER,
236
+ NULL, /* field */
237
+ SORT_TYPE_DOC, /* type */
238
+ true, /* reverse */
239
+ NULL, /* index */
240
+ &sf_doc_compare, /* compare */
241
+ &sf_doc_get_val, /* get_val */
242
+ NULL, /* create_index */
243
+ NULL, /* destroy_index */
244
+ NULL, /* handle_term */
218
245
  };
219
246
 
247
+ /***************************************************************************
248
+ * ByteSortField
249
+ ***************************************************************************/
250
+
251
+ static void sf_byte_get_val(void *index, Hit *hit, Comparable *comparable)
252
+ {
253
+ comparable->val.i = ((int *)index)[hit->doc];
254
+ }
255
+
256
+ static int sf_byte_compare(void *index, Hit *hit1, Hit *hit2)
257
+ {
258
+ int val1 = ((int *)index)[hit1->doc];
259
+ int val2 = ((int *)index)[hit2->doc];
260
+ if (val1 > val2) return 1;
261
+ else if (val1 < val2) return -1;
262
+ else return 0;
263
+ }
264
+
265
+ static void *sf_byte_create_index(int size)
266
+ {
267
+ int *index = ALLOC_AND_ZERO_N(int, size + 1);
268
+ return &index[1];
269
+ }
270
+
271
+ static void sf_byte_destroy_index(void *p)
272
+ {
273
+ int *index = (int *)p;
274
+ free(&index[-1]);
275
+ }
276
+
277
+ static void sf_byte_handle_term(void *index_ptr, TermDocEnum *tde, char *text)
278
+ {
279
+ int *index = (int *)index_ptr;
280
+ int val = index[-1]++;
281
+ (void)text;
282
+ while (tde->next(tde)) {
283
+ index[tde->doc_num(tde)] = val;
284
+ }
285
+ }
286
+
287
+ static void sort_field_byte_methods(SortField *self)
288
+ {
289
+ self->type = SORT_TYPE_BYTE;
290
+ self->compare = &sf_byte_compare;
291
+ self->get_val = &sf_byte_get_val;
292
+ self->create_index = &sf_byte_create_index;
293
+ self->destroy_index = &sf_byte_destroy_index;
294
+ self->handle_term = &sf_byte_handle_term;
295
+ }
296
+
297
+ SortField *sort_field_byte_new(char *field, bool reverse)
298
+ {
299
+ SortField *self = sort_field_alloc(field, SORT_TYPE_BYTE, reverse);
300
+ sort_field_byte_methods(self);
301
+ return self;
302
+ }
303
+
220
304
  /***************************************************************************
221
305
  * IntegerSortField
222
306
  ***************************************************************************/
223
307
 
224
- int sf_int_compare(void *index_ptr, Hit *hit1, Hit *hit2)
308
+ void sf_int_get_val(void *index, Hit *hit, Comparable *comparable)
225
309
  {
226
- int *index = (int *)index_ptr;
227
- int val1 = index[hit1->doc];
228
- int val2 = index[hit2->doc];
229
- if (val1 > val2) return 1;
230
- else if (val1 < val2) return -1;
231
- else return 0;
310
+ comparable->val.i = ((int *)index)[hit->doc];
311
+ }
312
+
313
+ int sf_int_compare(void *index, Hit *hit1, Hit *hit2)
314
+ {
315
+ int val1 = ((int *)index)[hit1->doc];
316
+ int val2 = ((int *)index)[hit2->doc];
317
+ if (val1 > val2) return 1;
318
+ else if (val1 < val2) return -1;
319
+ else return 0;
232
320
  }
233
321
 
234
322
  void *sf_int_create_index(int size)
235
323
  {
236
- return ALLOC_N(int, size);
324
+ return ALLOC_AND_ZERO_N(int, size);
237
325
  }
238
326
 
239
327
  void sf_int_handle_term(void *index_ptr, TermDocEnum *tde, char *text)
240
328
  {
241
- int *index = (int *)index_ptr;
242
- int val;
243
- sscanf(text, "%d", &val);
244
- while (tde->next(tde)) {
245
- index[tde->doc_num(tde)] = val;
246
- }
329
+ int *index = (int *)index_ptr;
330
+ int val;
331
+ sscanf(text, "%d", &val);
332
+ while (tde->next(tde)) {
333
+ index[tde->doc_num(tde)] = val;
334
+ }
247
335
  }
248
336
 
249
337
  void sort_field_int_methods(SortField *self)
250
338
  {
251
- self->compare = &sf_int_compare;
252
- self->create_index = &sf_int_create_index;
253
- self->handle_term = &sf_int_handle_term;
339
+ self->type = SORT_TYPE_INTEGER;
340
+ self->compare = &sf_int_compare;
341
+ self->get_val = &sf_int_get_val;
342
+ self->create_index = &sf_int_create_index;
343
+ self->handle_term = &sf_int_handle_term;
254
344
  }
255
345
 
256
- SortField *sort_field_int_create(char *field, bool reverse)
346
+ SortField *sort_field_int_new(char *field, bool reverse)
257
347
  {
258
- SortField *self = sort_field_alloc(field, SORT_TYPE_INTEGER, reverse);
259
- sort_field_int_methods(self);
260
- return self;
348
+ SortField *self = sort_field_alloc(field, SORT_TYPE_INTEGER, reverse);
349
+ sort_field_int_methods(self);
350
+ return self;
261
351
  }
262
352
 
263
353
  /***************************************************************************
264
354
  * FloatSortField
265
355
  ***************************************************************************/
266
356
 
267
- int sf_float_compare(void *index_ptr, Hit *hit1, Hit *hit2)
357
+ void sf_float_get_val(void *index, Hit *hit, Comparable *comparable)
268
358
  {
269
- float *index = (float *)index_ptr;
270
- float val1 = index[hit1->doc];
271
- float val2 = index[hit2->doc];
272
- if (val1 > val2) return 1;
273
- else if (val1 < val2) return -1;
274
- else return 0;
359
+ comparable->val.f = ((float *)index)[hit->doc];
360
+ }
361
+
362
+ int sf_float_compare(void *index, Hit *hit1, Hit *hit2)
363
+ {
364
+ float val1 = ((float *)index)[hit1->doc];
365
+ float val2 = ((float *)index)[hit2->doc];
366
+ if (val1 > val2) return 1;
367
+ else if (val1 < val2) return -1;
368
+ else return 0;
275
369
  }
276
370
 
277
371
  void *sf_float_create_index(int size)
278
372
  {
279
- return ALLOC_N(float, size);
373
+ return ALLOC_AND_ZERO_N(float, size);
280
374
  }
281
375
 
282
376
  void sf_float_handle_term(void *index_ptr, TermDocEnum *tde, char *text)
283
377
  {
284
- float *index = (float *)index_ptr;
285
- float val;
286
- sscanf(text, "%g", &val);
287
- while (tde->next(tde)) {
288
- index[tde->doc_num(tde)] = val;
289
- }
378
+ float *index = (float *)index_ptr;
379
+ float val;
380
+ sscanf(text, "%g", &val);
381
+ while (tde->next(tde)) {
382
+ index[tde->doc_num(tde)] = val;
383
+ }
290
384
  }
291
385
 
292
386
  void sort_field_float_methods(SortField *self)
293
387
  {
294
- self->compare = &sf_float_compare;
295
- self->create_index = &sf_float_create_index;
296
- self->handle_term = &sf_float_handle_term;
388
+ self->type = SORT_TYPE_FLOAT;
389
+ self->compare = &sf_float_compare;
390
+ self->get_val = &sf_float_get_val;
391
+ self->create_index = &sf_float_create_index;
392
+ self->handle_term = &sf_float_handle_term;
297
393
  }
298
394
 
299
- SortField *sort_field_float_create(char *field, bool reverse)
395
+ SortField *sort_field_float_new(char *field, bool reverse)
300
396
  {
301
- SortField *self = sort_field_alloc(field, SORT_TYPE_FLOAT, reverse);
302
- sort_field_float_methods(self);
303
- return self;
397
+ SortField *self = sort_field_alloc(field, SORT_TYPE_FLOAT, reverse);
398
+ sort_field_float_methods(self);
399
+ return self;
304
400
  }
305
401
 
306
402
  /***************************************************************************
@@ -309,78 +405,99 @@ SortField *sort_field_float_create(char *field, bool reverse)
309
405
 
310
406
  #define VALUES_ARRAY_START_SIZE 8
311
407
  typedef struct StringIndex {
312
- int size;
313
- int *index;
314
- char **values;
315
- int v_cnt;
316
- int v_size;
408
+ int size;
409
+ int *index;
410
+ char **values;
411
+ int v_size;
412
+ int v_capa;
317
413
  } StringIndex;
318
414
 
319
- int sf_string_compare(void *index_ptr, Hit *hit1, Hit *hit2)
415
+ void sf_string_get_val(void *index, Hit *hit, Comparable *comparable)
320
416
  {
321
- StringIndex *index = (StringIndex *)index_ptr;
322
- return strcoll(index->values[index->index[hit1->doc]],
323
- index->values[index->index[hit2->doc]]);
324
- /*
325
- int val1 = index->index[hit1->doc];
326
- int val2 = index->index[hit2->doc];
327
- if (val1 > val2) return 1;
328
- else if (val1 < val2) return -1;
329
- else return 0;
330
- */
417
+ comparable->val.s
418
+ = ((StringIndex *)index)->values[
419
+ ((StringIndex *)index)->index[hit->doc]];
420
+ }
421
+
422
+ int sf_string_compare(void *index, Hit *hit1, Hit *hit2)
423
+ {
424
+ char *s1 = ((StringIndex *)index)->values[
425
+ ((StringIndex *)index)->index[hit1->doc]];
426
+ char *s2 = ((StringIndex *)index)->values[
427
+ ((StringIndex *)index)->index[hit2->doc]];
428
+
429
+ if (s1 == NULL) return s1 ? -1 : 0;
430
+ if (s2 == NULL) return 1;
431
+
432
+ #ifdef POSH_OS_WIN32
433
+ return strcmp(s1, s2);
434
+ #else
435
+ return strcoll(s1, s2);
436
+ #endif
437
+
438
+ /*
439
+ * TODO: investigate whether it would be a good idea to presort strings.
440
+ *
441
+ int val1 = index->index[hit1->doc];
442
+ int val2 = index->index[hit2->doc];
443
+ if (val1 > val2) return 1;
444
+ else if (val1 < val2) return -1;
445
+ else return 0;
446
+ */
331
447
  }
332
448
 
333
449
  void *sf_string_create_index(int size)
334
450
  {
335
- StringIndex *self = ALLOC(StringIndex);
336
- ZEROSET(self, StringIndex, 1);
337
- self->size = size;
338
- self->index = ALLOC_N(int, size);
339
- ZEROSET(self->index, int, size);
340
- self->v_size = VALUES_ARRAY_START_SIZE;
341
- self->values = ALLOC_N(char *, VALUES_ARRAY_START_SIZE);
342
- return self;
451
+ StringIndex *self = ALLOC_AND_ZERO(StringIndex);
452
+ self->size = size;
453
+ self->index = ALLOC_AND_ZERO_N(int, size);
454
+ self->v_capa = VALUES_ARRAY_START_SIZE;
455
+ self->v_size = 1; /* leave the first value as NULL */
456
+ self->values = ALLOC_AND_ZERO_N(char *, VALUES_ARRAY_START_SIZE);
457
+ return self;
343
458
  }
344
459
 
345
460
  void sf_string_destroy_index(void *p)
346
461
  {
347
- StringIndex *self = (StringIndex *)p;
348
- int i;
349
- free(self->index);
350
- for (i = 0; i < self->v_cnt; i++) {
351
- free(self->values[i]);
352
- }
353
- free(self->values);
354
- free(self);
462
+ StringIndex *self = (StringIndex *)p;
463
+ int i;
464
+ free(self->index);
465
+ for (i = 0; i < self->v_size; i++) {
466
+ free(self->values[i]);
467
+ }
468
+ free(self->values);
469
+ free(self);
355
470
  }
356
471
 
357
472
  void sf_string_handle_term(void *index_ptr, TermDocEnum *tde, char *text)
358
473
  {
359
- StringIndex *index = (StringIndex *)index_ptr;
360
- if (index->v_cnt >= index->v_size) {
361
- index->v_size *= 2;
362
- index->values = REALLOC_N(index->values, char *, index->v_size);
363
- }
364
- index->values[index->v_cnt] = estrdup(text);
365
- while (tde->next(tde)) {
366
- index->index[tde->doc_num(tde)] = index->v_cnt;
367
- }
368
- index->v_cnt++;
474
+ StringIndex *index = (StringIndex *)index_ptr;
475
+ if (index->v_size >= index->v_capa) {
476
+ index->v_capa *= 2;
477
+ index->values = REALLOC_N(index->values, char *, index->v_capa);
478
+ }
479
+ index->values[index->v_size] = estrdup(text);
480
+ while (tde->next(tde)) {
481
+ index->index[tde->doc_num(tde)] = index->v_size;
482
+ }
483
+ index->v_size++;
369
484
  }
370
485
 
371
486
  void sort_field_string_methods(SortField *self)
372
487
  {
373
- self->compare = &sf_string_compare;
374
- self->create_index = &sf_string_create_index;
375
- self->destroy_index = &sf_string_destroy_index;
376
- self->handle_term = &sf_string_handle_term;
488
+ self->type = SORT_TYPE_STRING;
489
+ self->compare = &sf_string_compare;
490
+ self->get_val = &sf_string_get_val;
491
+ self->create_index = &sf_string_create_index;
492
+ self->destroy_index = &sf_string_destroy_index;
493
+ self->handle_term = &sf_string_handle_term;
377
494
  }
378
495
 
379
- SortField *sort_field_string_create(char *field, bool reverse)
496
+ SortField *sort_field_string_new(char *field, bool reverse)
380
497
  {
381
- SortField *self = sort_field_alloc(field, SORT_TYPE_STRING, reverse);
382
- sort_field_string_methods(self);
383
- return self;
498
+ SortField *self = sort_field_alloc(field, SORT_TYPE_STRING, reverse);
499
+ sort_field_string_methods(self);
500
+ return self;
384
501
  }
385
502
 
386
503
  /***************************************************************************
@@ -389,27 +506,27 @@ SortField *sort_field_string_create(char *field, bool reverse)
389
506
 
390
507
  void sort_field_auto_evaluate(SortField *sf, char *text)
391
508
  {
392
- int int_val;
393
- float float_val;
394
- size_t text_len = 0, scan_len = 0;
509
+ int int_val;
510
+ float float_val;
511
+ int text_len = 0, scan_len = 0;
395
512
 
396
- text_len = strlen(text);
397
- sscanf(text, "%d%n", &int_val, &scan_len);
398
- if (scan_len == text_len) {
399
- sort_field_int_methods(sf);
400
- } else {
401
- sscanf(text, "%f%n", &float_val, &scan_len);
513
+ text_len = (int)strlen(text);
514
+ sscanf(text, "%d%n", &int_val, &scan_len);
402
515
  if (scan_len == text_len) {
403
- sort_field_float_methods(sf);
516
+ sort_field_int_methods(sf);
404
517
  } else {
405
- sort_field_string_methods(sf);
518
+ sscanf(text, "%f%n", &float_val, &scan_len);
519
+ if (scan_len == text_len) {
520
+ sort_field_float_methods(sf);
521
+ } else {
522
+ sort_field_string_methods(sf);
523
+ }
406
524
  }
407
- }
408
525
  }
409
526
 
410
- SortField *sort_field_auto_create(char *field, bool reverse)
527
+ SortField *sort_field_auto_new(char *field, bool reverse)
411
528
  {
412
- return sort_field_alloc(field, SORT_TYPE_AUTO, reverse);
529
+ return sort_field_alloc(field, SORT_TYPE_AUTO, reverse);
413
530
  }
414
531
 
415
532
  /***************************************************************************
@@ -420,58 +537,60 @@ SortField *sort_field_auto_create(char *field, bool reverse)
420
537
 
421
538
  void *field_cache_get_index(IndexReader *ir, SortField *sf)
422
539
  {
423
- void *index = NULL;
424
- int length = 0;
425
- Term term;
426
- TermBuffer *tb;
427
- TermEnum *volatile te = NULL;
428
- TermDocEnum *volatile tde = NULL;
429
- char *field = sf->field;
430
- SortField *sf_clone;
431
-
432
- mutex_lock(&sf->mutex);
433
- if (!ir->sort_cache) {
434
- ir->sort_cache = h_new(&sort_field_hash, &sort_field_eq,
435
- &sort_field_destroy, NULL);
436
- }
437
- index = h_get(ir->sort_cache, sf);
438
-
439
- if (index == NULL) {
440
- length = ir->max_doc(ir);
441
- if (length > 0) {
442
- TRY
443
- tde = ir->term_docs(ir);
444
- term.field = field;
445
- term.text = "";
446
- te = ir->terms_from(ir, &term);
447
- if (te->tb_curr == NULL) {
448
- RAISE(ARG_ERROR, NO_TERM_ERROR_MSG);
449
- }
540
+ void *index = NULL;
541
+ int length = 0;
542
+ TermEnum *volatile te = NULL;
543
+ TermDocEnum *volatile tde = NULL;
544
+ SortField *sf_clone;
545
+ const int field_num = fis_get_field_num(ir->fis, sf->field);
546
+
547
+ if (field_num < 0) {
548
+ RAISE(ARG_ERROR,
549
+ "Cannot sort by field \"%s\". It doesn't exist in the index.",
550
+ sf->field);
551
+ }
450
552
 
451
- if (sf->type == SORT_TYPE_AUTO) {
452
- sort_field_auto_evaluate(sf, te->tb_curr->text);
453
- }
553
+ mutex_lock(&sf->mutex);
554
+ if (!ir->sort_cache) {
555
+ ir->sort_cache = h_new(&sort_field_hash, &sort_field_cache_eq,
556
+ &sort_field_destroy, NULL);
557
+ }
454
558
 
455
- index = sf->create_index(length);
456
-
457
- do {
458
- tb = te->tb_curr;
459
- if (strcmp(tb->field, field) != 0) break;
460
- term.text = tb->text;
461
- tde->seek(tde, &term);
462
- sf->handle_term(index, tde, tb->text);
463
- } while (te->next(te));
464
- XFINALLY
465
- tde->close(tde);
559
+ if (sf->type == SORT_TYPE_AUTO) {
560
+ te = ir->terms(ir, field_num);
561
+ if (!te->next(te)) {
562
+ RAISE(ARG_ERROR,
563
+ "Cannot sort by field \"%s\" as there are no terms "
564
+ "in that field in the index.", sf->field);
565
+ }
566
+ sort_field_auto_evaluate(sf, te->curr_term);
466
567
  te->close(te);
467
- XENDTRY
468
568
  }
469
- sf_clone = sort_field_clone(sf);
470
- sf_clone->index = index;
471
- h_set(ir->sort_cache, sf_clone, index);
472
- }
473
- mutex_unlock(&sf->mutex);
474
- return index;
569
+
570
+ index = h_get(ir->sort_cache, sf);
571
+
572
+ if (index == NULL) {
573
+ length = ir->max_doc(ir);
574
+ if (length > 0) {
575
+ TRY
576
+ tde = ir->term_docs(ir);
577
+ te = ir->terms(ir, field_num);
578
+ index = sf->create_index(length);
579
+ while (te->next(te)) {
580
+ tde->seek_te(tde, te);
581
+ sf->handle_term(index, tde, te->curr_term);
582
+ }
583
+ XFINALLY
584
+ tde->close(tde);
585
+ te->close(te);
586
+ XENDTRY
587
+ }
588
+ sf_clone = sort_field_clone(sf);
589
+ sf_clone->index = index;
590
+ h_set(ir->sort_cache, sf_clone, index);
591
+ }
592
+ mutex_unlock(&sf->mutex);
593
+ return index;
475
594
  }
476
595
 
477
596
  /***************************************************************************
@@ -485,19 +604,19 @@ void *field_cache_get_index(IndexReader *ir, SortField *sf)
485
604
  ***************************************************************************/
486
605
 
487
606
  typedef struct Comparator {
488
- void *index;
489
- bool reverse : 1;
490
- int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
607
+ void *index;
608
+ bool reverse : 1;
609
+ int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
491
610
  } Comparator;
492
611
 
493
- Comparator *comparator_create(void *index, bool reverse,
494
- int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2))
612
+ Comparator *comparator_new(void *index, bool reverse,
613
+ int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2))
495
614
  {
496
- Comparator *self = ALLOC(Comparator);
497
- self->index = index;
498
- self->reverse = reverse;
499
- self->compare = compare;
500
- return self;
615
+ Comparator *self = ALLOC(Comparator);
616
+ self->index = index;
617
+ self->reverse = reverse;
618
+ self->compare = compare;
619
+ return self;
501
620
  }
502
621
 
503
622
  /***************************************************************************
@@ -505,164 +624,279 @@ Comparator *comparator_create(void *index, bool reverse,
505
624
  ***************************************************************************/
506
625
 
507
626
  typedef struct Sorter {
508
- Comparator **comparators;
509
- int c_cnt;
627
+ Comparator **comparators;
628
+ int c_cnt;
629
+ Sort *sort;
510
630
  } Sorter;
511
631
 
512
632
  Comparator *sorter_get_comparator(SortField *sf, IndexReader *ir)
513
633
  {
514
- void *index = NULL;
634
+ void *index = NULL;
515
635
 
516
- if (sf->type > SORT_TYPE_DOC) {
517
- index = field_cache_get_index(ir, sf);
518
- }
519
- return comparator_create(index, sf->reverse, sf->compare);
636
+ if (sf->type > SORT_TYPE_DOC) {
637
+ index = field_cache_get_index(ir, sf);
638
+ }
639
+ return comparator_new(index, sf->reverse, sf->compare);
520
640
  }
521
641
 
522
- void sorter_destroy(void *p)
642
+ void sorter_destroy(Sorter *self)
523
643
  {
524
- int i;
525
- Sorter *self = (Sorter *)p;
644
+ int i;
526
645
 
527
- for (i = 0; i < self->c_cnt; i++) {
528
- free(self->comparators[i]);
529
- }
530
- free(self->comparators);
531
- free(self);
646
+ for (i = 0; i < self->c_cnt; i++) {
647
+ free(self->comparators[i]);
648
+ }
649
+ free(self->comparators);
650
+ free(self);
532
651
  }
533
652
 
534
- Sorter *sorter_create(int size)
653
+ Sorter *sorter_new(Sort *sort)
535
654
  {
536
- Sorter *self = ALLOC(Sorter);
537
- self->c_cnt = size;
538
- self->comparators = ALLOC_N(Comparator *, size);
539
- ZEROSET(self->comparators, Comparator *, size);
540
- return self;
655
+ Sorter *self = ALLOC(Sorter);
656
+ self->c_cnt = sort->size;
657
+ self->comparators = ALLOC_AND_ZERO_N(Comparator *, self->c_cnt);
658
+ self->sort = sort;
659
+ return self;
541
660
  }
542
661
 
543
662
  /***************************************************************************
544
663
  * FieldSortedHitQueue
545
664
  ***************************************************************************/
546
665
 
547
- bool fshq_less_than(void *hit1, void *hit2)
666
+ bool fshq_less_than(const void *hit1, const void *hit2)
548
667
  {
549
- int cmp = 0;
550
- printf("Whoops, shouldn't call this.\n");
551
- if (cmp != 0) {
552
- return cmp;
553
- } else {
554
- return ((Hit *)hit1)->score < ((Hit *)hit2)->score;
555
- }
668
+ int cmp = 0;
669
+ printf("Whoops, shouldn't call this.\n");
670
+ if (cmp != 0) {
671
+ return cmp;
672
+ } else {
673
+ return ((Hit *)hit1)->score < ((Hit *)hit2)->score;
674
+ }
556
675
  }
557
676
 
558
- inline bool fshq_lt(Hit *sorter_ptr, Hit *hit1, Hit *hit2)
677
+ inline bool fshq_lt(Sorter *sorter, Hit *hit1, Hit *hit2)
559
678
  {
560
- Sorter *sorter = (Sorter *)sorter_ptr;
561
- Comparator *comp;
562
- int diff = 0, i;
563
- for (i = 0; i < sorter->c_cnt && diff == 0; i++) {
564
- comp = sorter->comparators[i];
565
- if (comp->reverse) {
566
- diff = comp->compare(comp->index, hit2, hit1);
567
- } else {
568
- diff = comp->compare(comp->index, hit1, hit2);
679
+ Comparator *comp;
680
+ int diff = 0, i;
681
+ for (i = 0; i < sorter->c_cnt && diff == 0; i++) {
682
+ comp = sorter->comparators[i];
683
+ if (comp->reverse) {
684
+ diff = comp->compare(comp->index, hit2, hit1);
685
+ } else {
686
+ diff = comp->compare(comp->index, hit1, hit2);
687
+ }
569
688
  }
570
- }
571
689
 
572
- if (diff != 0) {
573
- return diff > 0;
574
- } else {
575
- return hit1->doc > hit2->doc;
576
- }
690
+ if (diff != 0) {
691
+ return diff > 0;
692
+ } else {
693
+ return hit1->doc > hit2->doc;
694
+ }
577
695
  }
578
696
 
579
697
  void fshq_pq_down(PriorityQueue *pq)
580
698
  {
581
- register int i = 1;
582
- register int j = 2; //i << 1;
583
- register int k = 3; //j + 1;
584
- Hit **heap = (Hit **)pq->heap;
585
- Hit *node = heap[i]; // save top node
699
+ register int i = 1;
700
+ register int j = 2; /* i << 1; */
701
+ register int k = 3; /* j + 1; */
702
+ Hit **heap = (Hit **)pq->heap;
703
+ Hit *node = heap[i]; /* save top node */
704
+ Sorter *sorter = (Sorter *)heap[0];
586
705
 
587
- if ((k <= pq->count) && fshq_lt(heap[0], heap[k], heap[j]))
588
- j = k;
706
+ if ((k <= pq->size) && fshq_lt(sorter, heap[k], heap[j])) {
707
+ j = k;
708
+ }
589
709
 
590
- while ((j <= pq->count) && fshq_lt(heap[0], heap[j], node)) {
591
- heap[i] = heap[j]; // shift up child
592
- i = j;
593
- j = i << 1;
594
- k = j + 1;
595
- if ((k <= pq->count) && fshq_lt(heap[0], heap[k], heap[j]))
596
- j = k;
597
- }
598
- heap[i] = node;
710
+ while ((j <= pq->size) && fshq_lt(sorter, heap[j], node)) {
711
+ heap[i] = heap[j]; /* shift up child */
712
+ i = j;
713
+ j = i << 1;
714
+ k = j + 1;
715
+ if ((k <= pq->size) && fshq_lt(sorter, heap[k], heap[j])) {
716
+ j = k;
717
+ }
718
+ }
719
+ heap[i] = node;
599
720
  }
600
721
 
601
722
  Hit *fshq_pq_pop(PriorityQueue *pq)
602
723
  {
603
- if (pq->count > 0) {
604
- Hit *result = (Hit *)pq->heap[1]; // save first value
605
- pq->heap[1] = pq->heap[pq->count]; // move last to first
606
- pq->heap[pq->count] = NULL;
607
- pq->count--;
608
- fshq_pq_down(pq); // adjust heap
609
- return result;
610
- } else {
611
- return NULL;
612
- }
724
+ if (pq->size > 0) {
725
+ Hit *hit = (Hit *)pq->heap[1]; /* save first value */
726
+ pq->heap[1] = pq->heap[pq->size]; /* move last to first */
727
+ pq->heap[pq->size] = NULL;
728
+ pq->size--;
729
+ fshq_pq_down(pq); /* adjust heap */
730
+ return hit;
731
+ } else {
732
+ return NULL;
733
+ }
613
734
  }
614
735
 
615
736
  inline void fshq_pq_up(PriorityQueue *pq)
616
737
  {
617
- Hit **heap = (Hit **)pq->heap;
618
- Hit *node;
619
- int i = pq->count;
620
- int j = i >> 1;
621
- node = heap[i];
622
-
623
- while ((j > 0) && fshq_lt(heap[0], node, heap[j])) {
624
- heap[i] = heap[j];
625
- i = j;
626
- j = j >> 1;
627
- }
628
- heap[i] = node;
738
+ Hit **heap = (Hit **)pq->heap;
739
+ Hit *node;
740
+ int i = pq->size;
741
+ int j = i >> 1;
742
+ Sorter *sorter = (Sorter *)heap[0];
743
+ node = heap[i];
744
+
745
+ while ((j > 0) && fshq_lt(sorter, node, heap[j])) {
746
+ heap[i] = heap[j];
747
+ i = j;
748
+ j = j >> 1;
749
+ }
750
+ heap[i] = node;
629
751
  }
630
752
 
631
753
  void fshq_pq_insert(PriorityQueue *pq, Hit *hit)
632
754
  {
633
- if (pq->count < pq->size) {
634
- Hit *new_hit = ALLOC(Hit);
635
- memcpy(new_hit, hit, sizeof(Hit));
636
- pq->count++;
637
- pq->heap[pq->count] = new_hit;
638
- fshq_pq_up(pq);
639
- } else if (pq->count > 0 &&
640
- fshq_lt((Hit *)pq->heap[0], (Hit *)pq->heap[1], hit)) {
641
- memcpy(pq->heap[1], hit, sizeof(Hit));
642
- fshq_pq_down(pq);
643
- }
755
+ if (pq->size < pq->capa) {
756
+ Hit *new_hit = ALLOC(Hit);
757
+ memcpy(new_hit, hit, sizeof(Hit));
758
+ pq->size++;
759
+ if (pq->size >= pq->mem_capa) {
760
+ pq->mem_capa <<= 1;
761
+ REALLOC_N(pq->heap, void *, pq->mem_capa);
762
+ }
763
+ pq->heap[pq->size] = new_hit;
764
+ fshq_pq_up(pq);
765
+ } else if (pq->size > 0
766
+ && fshq_lt((Sorter *)pq->heap[0], (Hit *)pq->heap[1], hit)) {
767
+ memcpy(pq->heap[1], hit, sizeof(Hit));
768
+ fshq_pq_down(pq);
769
+ }
644
770
  }
645
771
 
646
772
  void fshq_pq_destroy(PriorityQueue *self)
647
773
  {
648
- sorter_destroy(self->heap[0]);
649
- pq_destroy(self);
774
+ sorter_destroy(self->heap[0]);
775
+ pq_destroy(self);
776
+ }
777
+
778
+ PriorityQueue *fshq_pq_new(int size, Sort *sort, IndexReader *ir)
779
+ {
780
+ PriorityQueue *self = pq_new(size, &fshq_less_than, &free);
781
+ int i;
782
+ Sorter *sorter = sorter_new(sort);
783
+ SortField *sf;
784
+
785
+ for (i = 0; i < sort->size; i++) {
786
+ sf = sort->sort_fields[i];
787
+ sorter->comparators[i] = sorter_get_comparator(sf, ir);
788
+ }
789
+ self->heap[0] = sorter;
790
+
791
+ return self;
792
+ }
793
+
794
+ Hit *fshq_pq_pop_fd(PriorityQueue *pq)
795
+ {
796
+ if (pq->size <= 0) {
797
+ return NULL;
798
+ }
799
+ else {
800
+ int j;
801
+ Sorter *sorter = (Sorter *)pq->heap[0];
802
+ const int cmp_cnt = sorter->c_cnt;
803
+ SortField **sort_fields = sorter->sort->sort_fields;
804
+ Hit *hit = (Hit *)pq->heap[1]; /* save first value */
805
+ FieldDoc *field_doc;
806
+ Comparable *comparables;
807
+ Comparator **comparators = sorter->comparators;
808
+ pq->heap[1] = pq->heap[pq->size]; /* move last to first */
809
+ pq->heap[pq->size] = NULL;
810
+ pq->size--;
811
+ fshq_pq_down(pq); /* adjust heap */
812
+
813
+ field_doc = (FieldDoc *)emalloc(sizeof(FieldDoc)
814
+ + sizeof(Comparable)*cmp_cnt);
815
+ comparables = field_doc->comparables;
816
+ memcpy(field_doc, hit, sizeof(Hit));
817
+ field_doc->size = cmp_cnt;
818
+
819
+ for (j = 0; j < cmp_cnt; j++) {
820
+ SortField *sf = sort_fields[j];
821
+ Comparator *comparator = comparators[j];
822
+ sf->get_val(comparator->index, hit, &(comparables[j]));
823
+ comparables[j].type = sf->type;
824
+ comparables[j].reverse = comparator->reverse;
825
+ }
826
+ free(hit);
827
+ return (Hit *)field_doc;
828
+ }
650
829
  }
651
830
 
652
- PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir)
831
+ /***************************************************************************
832
+ * FieldDoc
833
+ ***************************************************************************/
834
+
835
+ void fd_destroy(FieldDoc *fd)
653
836
  {
654
- PriorityQueue *self = pq_create(size, &fshq_less_than);
655
- int i;
656
- Sorter *sorter = sorter_create(sort->sf_cnt);
657
- SortField *sf;
837
+ free(fd);
838
+ }
658
839
 
659
- for (i = 0; i < sort->sf_cnt; i++) {
660
- sf = sort->sort_fields[i];
661
- sorter->comparators[i] = sorter_get_comparator(sf, ir);
662
- }
663
- self->heap[0] = sorter;
664
-
665
- return self;
840
+ /***************************************************************************
841
+ * FieldDocSortedHitQueue
842
+ ***************************************************************************/
843
+
844
+ bool fdshq_lt(FieldDoc *fd1, FieldDoc *fd2)
845
+ {
846
+ int c = 0, i;
847
+ Comparable *cmps1 = fd1->comparables;
848
+ Comparable *cmps2 = fd2->comparables;
849
+
850
+ for (i = 0; i < fd1->size && c == 0; i++) {
851
+ int type = cmps1[i].type;
852
+ switch (type) {
853
+ case SORT_TYPE_SCORE:
854
+ if (cmps1[i].val.f < cmps2[i].val.f) c = 1;
855
+ if (cmps1[i].val.f > cmps2[i].val.f) c = -1;
856
+ break;
857
+ case SORT_TYPE_FLOAT:
858
+ if (cmps1[i].val.f > cmps2[i].val.f) c = 1;
859
+ if (cmps1[i].val.f < cmps2[i].val.f) c = -1;
860
+ break;
861
+ case SORT_TYPE_DOC:
862
+ if (fd1->hit.doc > fd2->hit.doc) c = 1;
863
+ if (fd1->hit.doc < fd2->hit.doc) c = -1;
864
+ break;
865
+ case SORT_TYPE_INTEGER:
866
+ if (cmps1[i].val.i > cmps2[i].val.i) c = 1;
867
+ if (cmps1[i].val.i < cmps2[i].val.i) c = -1;
868
+ break;
869
+ case SORT_TYPE_BYTE:
870
+ if (cmps1[i].val.i > cmps2[i].val.i) c = 1;
871
+ if (cmps1[i].val.i < cmps2[i].val.i) c = -1;
872
+ break;
873
+ case SORT_TYPE_STRING:
874
+ do {
875
+ char *s1 = cmps1[i].val.s;
876
+ char *s2 = cmps2[i].val.s;
877
+ if (s1 == NULL) c = s2 ? -1 : 0;
878
+ else if (s2 == NULL) c = 1;
879
+ #ifdef POSH_OS_WIN32
880
+ else c = strcmp(s1, s2);
881
+ #else
882
+ else c = strcoll(s1, s2);
883
+ #endif
884
+ } while (0);
885
+ break;
886
+ default:
887
+ RAISE(ERROR, "Unknown sort type: %d.", type);
888
+ break;
889
+ }
890
+ if (cmps1[i].reverse) {
891
+ c = -c;
892
+ }
893
+ }
894
+ if (c == 0) {
895
+ return fd1->hit.doc > fd2->hit.doc;
896
+ }
897
+ else {
898
+ return c > 0;
899
+ }
666
900
  }
667
901
 
668
902
  /***************************************************************************
@@ -671,75 +905,78 @@ PriorityQueue *fshq_pq_create(int size, Sort *sort, IndexReader *ir)
671
905
  *
672
906
  ***************************************************************************/
673
907
 
674
- Sort *sort_create()
908
+ #define SORT_INIT_SIZE 4
909
+
910
+ Sort *sort_new()
675
911
  {
676
- Sort *self = ALLOC(Sort);
677
- self->sf_cnt = 0;
678
- self->sf_capa = ARRAY_INIT_SIZE;
679
- self->sort_fields = ALLOC_N(SortField *, ARRAY_INIT_SIZE);
680
- self->destroy_all = true;
912
+ Sort *self = ALLOC(Sort);
913
+ self->size = 0;
914
+ self->capa = SORT_INIT_SIZE;
915
+ self->sort_fields = ALLOC_N(SortField *, SORT_INIT_SIZE);
916
+ self->destroy_all = true;
917
+ self->start = 0;
681
918
 
682
- return self;
919
+ return self;
683
920
  }
684
921
 
685
922
  void sort_clear(Sort *self)
686
923
  {
687
- int i;
688
- if (self->destroy_all) {
689
- for (i = 0; i < self->sf_cnt; i++) {
690
- sort_field_destroy(self->sort_fields[i]);
924
+ int i;
925
+ if (self->destroy_all) {
926
+ for (i = 0; i < self->size; i++) {
927
+ sort_field_destroy(self->sort_fields[i]);
928
+ }
691
929
  }
692
- }
693
- self->sf_cnt = 0;
930
+ self->size = 0;
694
931
  }
695
932
 
696
933
  void sort_destroy(void *p)
697
934
  {
698
- Sort *self = (Sort *)p;
699
- sort_clear(self);
700
- free(self->sort_fields);
701
- free(self);
935
+ Sort *self = (Sort *)p;
936
+ sort_clear(self);
937
+ free(self->sort_fields);
938
+ free(self);
702
939
  }
703
940
 
704
941
  void sort_add_sort_field(Sort *self, SortField *sf)
705
942
  {
706
- if (self->sf_cnt == self->sf_capa) {
707
- self->sf_capa *= 2;
708
- REALLOC_N(self->sort_fields, SortField *, self->sf_capa);
709
- }
943
+ if (self->size == self->capa) {
944
+ self->capa <<= 1;
945
+ REALLOC_N(self->sort_fields, SortField *, self->capa);
946
+ }
710
947
 
711
- self->sort_fields[self->sf_cnt] = sf;
712
- self->sf_cnt++;
948
+ self->sort_fields[self->size] = sf;
949
+ self->size++;
713
950
  }
714
951
 
715
952
  char *sort_to_s(Sort *self)
716
953
  {
717
- int i, len = 20;
718
- char *s;
719
- char *str;
720
- char **sf_strs = ALLOC_N(char *, self->sf_cnt);
721
-
722
- for (i = 0; i < self->sf_cnt; i++) {
723
- sf_strs[i] = s = sort_field_to_s(self->sort_fields[i]);
724
- len += (int)strlen(s) + 2;
725
- }
726
-
727
- str = ALLOC_N(char, len);
728
- s = "Sort[";
729
- len = (int)strlen(s);
730
- memcpy(str, s, len);
731
-
732
- s = str + len;
733
- for (i = 0; i < self->sf_cnt; i++) {
734
- sprintf(s, "%s, ", sf_strs[i]);
735
- s += (int)strlen(s);
736
- free(sf_strs[i]);
737
- }
738
- free(sf_strs);
739
-
740
- if (self->sf_cnt > 0) {
741
- s -= 2;
742
- }
743
- sprintf(s, "]");
744
- return str;
954
+ int i, len = 20;
955
+ char *s;
956
+ char *str;
957
+ char **sf_strs = ALLOC_N(char *, self->size);
958
+
959
+ for (i = 0; i < self->size; i++) {
960
+ sf_strs[i] = s = sort_field_to_s(self->sort_fields[i]);
961
+ len += (int)strlen(s) + 2;
962
+ }
963
+
964
+ str = ALLOC_N(char, len);
965
+ s = "Sort[";
966
+ len = (int)strlen(s);
967
+ memcpy(str, s, len);
968
+
969
+ s = str + len;
970
+ for (i = 0; i < self->size; i++) {
971
+ sprintf(s, "%s, ", sf_strs[i]);
972
+ s += (int)strlen(s);
973
+ free(sf_strs[i]);
974
+ }
975
+ free(sf_strs);
976
+
977
+ if (self->size > 0) {
978
+ s -= 2;
979
+ }
980
+ sprintf(s, "]");
981
+ return str;
745
982
  }