sdsykes-ferret 0.11.6.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (195) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/analysis.c +1555 -0
  9. data/ext/analysis.h +219 -0
  10. data/ext/api.c +69 -0
  11. data/ext/api.h +27 -0
  12. data/ext/array.c +123 -0
  13. data/ext/array.h +53 -0
  14. data/ext/bitvector.c +540 -0
  15. data/ext/bitvector.h +272 -0
  16. data/ext/compound_io.c +383 -0
  17. data/ext/config.h +42 -0
  18. data/ext/document.c +156 -0
  19. data/ext/document.h +53 -0
  20. data/ext/except.c +120 -0
  21. data/ext/except.h +168 -0
  22. data/ext/extconf.rb +14 -0
  23. data/ext/ferret.c +402 -0
  24. data/ext/ferret.h +91 -0
  25. data/ext/filter.c +156 -0
  26. data/ext/fs_store.c +483 -0
  27. data/ext/global.c +418 -0
  28. data/ext/global.h +117 -0
  29. data/ext/hash.c +567 -0
  30. data/ext/hash.h +473 -0
  31. data/ext/hashset.c +170 -0
  32. data/ext/hashset.h +187 -0
  33. data/ext/header.h +58 -0
  34. data/ext/helper.c +62 -0
  35. data/ext/helper.h +13 -0
  36. data/ext/inc/lang.h +48 -0
  37. data/ext/inc/threading.h +31 -0
  38. data/ext/index.c +6425 -0
  39. data/ext/index.h +961 -0
  40. data/ext/lang.h +66 -0
  41. data/ext/libstemmer.c +92 -0
  42. data/ext/libstemmer.h +79 -0
  43. data/ext/mempool.c +87 -0
  44. data/ext/mempool.h +35 -0
  45. data/ext/modules.h +162 -0
  46. data/ext/multimapper.c +310 -0
  47. data/ext/multimapper.h +51 -0
  48. data/ext/posh.c +1006 -0
  49. data/ext/posh.h +1007 -0
  50. data/ext/priorityqueue.c +151 -0
  51. data/ext/priorityqueue.h +143 -0
  52. data/ext/q_boolean.c +1608 -0
  53. data/ext/q_const_score.c +161 -0
  54. data/ext/q_filtered_query.c +209 -0
  55. data/ext/q_fuzzy.c +268 -0
  56. data/ext/q_match_all.c +148 -0
  57. data/ext/q_multi_term.c +677 -0
  58. data/ext/q_parser.c +2825 -0
  59. data/ext/q_phrase.c +1126 -0
  60. data/ext/q_prefix.c +100 -0
  61. data/ext/q_range.c +350 -0
  62. data/ext/q_span.c +2402 -0
  63. data/ext/q_term.c +337 -0
  64. data/ext/q_wildcard.c +171 -0
  65. data/ext/r_analysis.c +2575 -0
  66. data/ext/r_index.c +3472 -0
  67. data/ext/r_qparser.c +585 -0
  68. data/ext/r_search.c +4105 -0
  69. data/ext/r_store.c +513 -0
  70. data/ext/r_utils.c +963 -0
  71. data/ext/ram_store.c +471 -0
  72. data/ext/search.c +1741 -0
  73. data/ext/search.h +885 -0
  74. data/ext/similarity.c +150 -0
  75. data/ext/similarity.h +82 -0
  76. data/ext/sort.c +983 -0
  77. data/ext/stem_ISO_8859_1_danish.c +338 -0
  78. data/ext/stem_ISO_8859_1_danish.h +16 -0
  79. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  80. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  81. data/ext/stem_ISO_8859_1_english.c +1156 -0
  82. data/ext/stem_ISO_8859_1_english.h +16 -0
  83. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  84. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  85. data/ext/stem_ISO_8859_1_french.c +1276 -0
  86. data/ext/stem_ISO_8859_1_french.h +16 -0
  87. data/ext/stem_ISO_8859_1_german.c +512 -0
  88. data/ext/stem_ISO_8859_1_german.h +16 -0
  89. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  90. data/ext/stem_ISO_8859_1_italian.h +16 -0
  91. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  92. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  93. data/ext/stem_ISO_8859_1_porter.c +776 -0
  94. data/ext/stem_ISO_8859_1_porter.h +16 -0
  95. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  96. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  97. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  98. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  99. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  100. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  101. data/ext/stem_KOI8_R_russian.c +701 -0
  102. data/ext/stem_KOI8_R_russian.h +16 -0
  103. data/ext/stem_UTF_8_danish.c +344 -0
  104. data/ext/stem_UTF_8_danish.h +16 -0
  105. data/ext/stem_UTF_8_dutch.c +653 -0
  106. data/ext/stem_UTF_8_dutch.h +16 -0
  107. data/ext/stem_UTF_8_english.c +1176 -0
  108. data/ext/stem_UTF_8_english.h +16 -0
  109. data/ext/stem_UTF_8_finnish.c +808 -0
  110. data/ext/stem_UTF_8_finnish.h +16 -0
  111. data/ext/stem_UTF_8_french.c +1296 -0
  112. data/ext/stem_UTF_8_french.h +16 -0
  113. data/ext/stem_UTF_8_german.c +526 -0
  114. data/ext/stem_UTF_8_german.h +16 -0
  115. data/ext/stem_UTF_8_italian.c +1113 -0
  116. data/ext/stem_UTF_8_italian.h +16 -0
  117. data/ext/stem_UTF_8_norwegian.c +302 -0
  118. data/ext/stem_UTF_8_norwegian.h +16 -0
  119. data/ext/stem_UTF_8_porter.c +794 -0
  120. data/ext/stem_UTF_8_porter.h +16 -0
  121. data/ext/stem_UTF_8_portuguese.c +1055 -0
  122. data/ext/stem_UTF_8_portuguese.h +16 -0
  123. data/ext/stem_UTF_8_russian.c +709 -0
  124. data/ext/stem_UTF_8_russian.h +16 -0
  125. data/ext/stem_UTF_8_spanish.c +1137 -0
  126. data/ext/stem_UTF_8_spanish.h +16 -0
  127. data/ext/stem_UTF_8_swedish.c +313 -0
  128. data/ext/stem_UTF_8_swedish.h +16 -0
  129. data/ext/stopwords.c +401 -0
  130. data/ext/store.c +692 -0
  131. data/ext/store.h +777 -0
  132. data/ext/term_vectors.c +352 -0
  133. data/ext/threading.h +31 -0
  134. data/ext/utilities.c +446 -0
  135. data/ext/win32.h +54 -0
  136. data/lib/ferret.rb +29 -0
  137. data/lib/ferret/browser.rb +246 -0
  138. data/lib/ferret/browser/s/global.js +192 -0
  139. data/lib/ferret/browser/s/style.css +148 -0
  140. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  141. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  142. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  143. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  144. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  145. data/lib/ferret/browser/views/layout.rhtml +22 -0
  146. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  147. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  148. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  149. data/lib/ferret/browser/webrick.rb +14 -0
  150. data/lib/ferret/document.rb +130 -0
  151. data/lib/ferret/field_infos.rb +44 -0
  152. data/lib/ferret/index.rb +786 -0
  153. data/lib/ferret/number_tools.rb +157 -0
  154. data/lib/ferret_version.rb +3 -0
  155. data/setup.rb +1555 -0
  156. data/test/test_all.rb +5 -0
  157. data/test/test_helper.rb +24 -0
  158. data/test/threading/number_to_spoken.rb +132 -0
  159. data/test/threading/thread_safety_index_test.rb +79 -0
  160. data/test/threading/thread_safety_read_write_test.rb +76 -0
  161. data/test/threading/thread_safety_test.rb +133 -0
  162. data/test/unit/analysis/tc_analyzer.rb +548 -0
  163. data/test/unit/analysis/tc_token_stream.rb +646 -0
  164. data/test/unit/index/tc_index.rb +762 -0
  165. data/test/unit/index/tc_index_reader.rb +699 -0
  166. data/test/unit/index/tc_index_writer.rb +437 -0
  167. data/test/unit/index/th_doc.rb +315 -0
  168. data/test/unit/largefile/tc_largefile.rb +46 -0
  169. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  170. data/test/unit/search/tc_filter.rb +135 -0
  171. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  172. data/test/unit/search/tc_index_searcher.rb +61 -0
  173. data/test/unit/search/tc_multi_searcher.rb +128 -0
  174. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  175. data/test/unit/search/tc_search_and_sort.rb +179 -0
  176. data/test/unit/search/tc_sort.rb +49 -0
  177. data/test/unit/search/tc_sort_field.rb +27 -0
  178. data/test/unit/search/tc_spans.rb +190 -0
  179. data/test/unit/search/tm_searcher.rb +384 -0
  180. data/test/unit/store/tc_fs_store.rb +77 -0
  181. data/test/unit/store/tc_ram_store.rb +35 -0
  182. data/test/unit/store/tm_store.rb +34 -0
  183. data/test/unit/store/tm_store_lock.rb +68 -0
  184. data/test/unit/tc_document.rb +81 -0
  185. data/test/unit/ts_analysis.rb +2 -0
  186. data/test/unit/ts_index.rb +2 -0
  187. data/test/unit/ts_largefile.rb +4 -0
  188. data/test/unit/ts_query_parser.rb +2 -0
  189. data/test/unit/ts_search.rb +2 -0
  190. data/test/unit/ts_store.rb +2 -0
  191. data/test/unit/ts_utils.rb +2 -0
  192. data/test/unit/utils/tc_bit_vector.rb +295 -0
  193. data/test/unit/utils/tc_number_tools.rb +117 -0
  194. data/test/unit/utils/tc_priority_queue.rb +106 -0
  195. metadata +285 -0
@@ -0,0 +1,150 @@
1
+ #include "similarity.h"
2
+ #include "search.h"
3
+ #include "array.h"
4
+ #include "helper.h"
5
+ #include <math.h>
6
+ #include <stdlib.h>
7
+ #include <string.h>
8
+
9
+ /****************************************************************************
10
+ *
11
+ * Term
12
+ *
13
+ ****************************************************************************/
14
+
15
+ Term *term_new(const char *field, const char *text)
16
+ {
17
+ Term *t = ALLOC(Term);
18
+ t->field = estrdup(field);
19
+ t->text = estrdup(text);
20
+ return t;
21
+ }
22
+
23
+ void term_destroy(Term *self)
24
+ {
25
+ free(self->text);
26
+ free(self->field);
27
+ free(self);
28
+ }
29
+
30
+ int term_eq(const void *t1, const void *t2)
31
+ {
32
+ return (strcmp(((Term *)t1)->text, ((Term *)t2)->text)) == 0 &&
33
+ (strcmp(((Term *)t1)->field, ((Term *)t2)->field) == 0);
34
+ }
35
+
36
+ unsigned long term_hash(const void *t)
37
+ {
38
+ return str_hash(((Term *)t)->text) * str_hash(((Term *)t)->field);
39
+ }
40
+
41
+ /****************************************************************************
42
+ *
43
+ * Similarity
44
+ *
45
+ ****************************************************************************/
46
+
47
+ float simdef_length_norm(Similarity *s, const char *field, int num_terms)
48
+ {
49
+ (void)s;
50
+ (void)field;
51
+ return (float)(1.0 / sqrt(num_terms));
52
+ }
53
+
54
+ float simdef_query_norm(struct Similarity *s, float sum_of_squared_weights)
55
+ {
56
+ (void)s;
57
+ return (float)(1.0 / sqrt(sum_of_squared_weights));
58
+ }
59
+
60
+ float simdef_tf(struct Similarity *s, float freq)
61
+ {
62
+ (void)s;
63
+ return (float)sqrt(freq);
64
+ }
65
+
66
+ float simdef_sloppy_freq(struct Similarity *s, int distance)
67
+ {
68
+ (void)s;
69
+ return (float)(1.0 / (double)(distance + 1));
70
+ }
71
+
72
+ float simdef_idf_term(struct Similarity *s, const char *field, char *term,
73
+ Searcher *searcher)
74
+ {
75
+ return s->idf(s, searcher->doc_freq(searcher, field, term),
76
+ searcher->max_doc(searcher));
77
+ }
78
+
79
+ float simdef_idf_phrase(struct Similarity *s, const char *field,
80
+ PhrasePosition *positions,
81
+ int pp_cnt, Searcher *searcher)
82
+ {
83
+ float idf = 0.0;
84
+ int i, j;
85
+ for (i = 0; i < pp_cnt; i++) {
86
+ char **terms = positions[i].terms;
87
+ for (j = ary_size(terms) - 1; j >= 0; j--) {
88
+ idf += sim_idf_term(s, field, terms[j], searcher);
89
+ }
90
+ }
91
+ return idf;
92
+ }
93
+
94
+ float simdef_idf(struct Similarity *s, int doc_freq, int num_docs)
95
+ {
96
+ (void)s;
97
+ return (float)(log((float)num_docs/(float)(doc_freq+1)) + 1.0);
98
+ }
99
+
100
+ float simdef_coord(struct Similarity *s, int overlap, int max_overlap)
101
+ {
102
+ (void)s;
103
+ return (float)((double)overlap / (double)max_overlap);
104
+ }
105
+
106
+ float simdef_decode_norm(struct Similarity *s, uchar b)
107
+ {
108
+ return s->norm_table[b];
109
+ }
110
+
111
+ uchar simdef_encode_norm(struct Similarity *s, float f)
112
+ {
113
+ (void)s;
114
+ return float2byte(f);
115
+ }
116
+
117
+ void simdef_destroy(Similarity *s)
118
+ {
119
+ (void)s;
120
+ /* nothing to do here */
121
+ }
122
+
123
+ static Similarity default_similarity = {
124
+ NULL,
125
+ {0},
126
+ &simdef_length_norm,
127
+ &simdef_query_norm,
128
+ &simdef_tf,
129
+ &simdef_sloppy_freq,
130
+ &simdef_idf_term,
131
+ &simdef_idf_phrase,
132
+ &simdef_idf,
133
+ &simdef_coord,
134
+ &simdef_decode_norm,
135
+ &simdef_encode_norm,
136
+ &simdef_destroy
137
+ };
138
+
139
+ Similarity *sim_create_default()
140
+ {
141
+ int i;
142
+ if (!default_similarity.data) {
143
+ for (i = 0; i < 256; i++) {
144
+ default_similarity.norm_table[i] = byte2float((unsigned char)i);
145
+ }
146
+
147
+ default_similarity.data = &default_similarity;
148
+ }
149
+ return &default_similarity;
150
+ }
@@ -0,0 +1,82 @@
1
+ #ifndef FRT_SIMILARITY_H
2
+ #define FRT_SIMILARITY_H
3
+
4
+ typedef struct Searcher Searcher;
5
+
6
+ /****************************************************************************
7
+ *
8
+ * Term
9
+ *
10
+ ****************************************************************************/
11
+
12
+ #define term_set_new() \
13
+ hs_new((hash_ft)&term_hash, (eq_ft)&term_eq, (free_ft)&term_destroy)
14
+
15
+ typedef struct Term
16
+ {
17
+ char *field;
18
+ char *text;
19
+ } Term;
20
+
21
+ extern Term *term_new(const char *field, const char *text);
22
+ extern void term_destroy(Term *self);
23
+ extern int term_eq(const void *t1, const void *t2);
24
+ extern unsigned long term_hash(const void *t);
25
+
26
+ /***************************************************************************
27
+ *
28
+ * PhrasePosition
29
+ *
30
+ ***************************************************************************/
31
+
32
+ typedef struct PhrasePosition
33
+ {
34
+ int pos;
35
+ char **terms;
36
+ } PhrasePosition;
37
+
38
+ /***************************************************************************
39
+ *
40
+ * Similarity
41
+ *
42
+ ***************************************************************************/
43
+
44
+ typedef struct Similarity Similarity;
45
+
46
+ struct Similarity
47
+ {
48
+ void *data;
49
+ float norm_table[256];
50
+ float (*length_norm)(Similarity *self, const char *field, int num_terms);
51
+ float (*query_norm)(Similarity *self, float sum_of_squared_weights);
52
+ float (*tf)(Similarity *self, float freq);
53
+ float (*sloppy_freq)(Similarity *self, int distance);
54
+ float (*idf_term)(Similarity *self, const char *field, char *term,
55
+ Searcher *searcher);
56
+ float (*idf_phrase)(Similarity *self, const char *field,
57
+ PhrasePosition *positions,
58
+ int pp_cnt, Searcher *searcher);
59
+ float (*idf)(Similarity *self, int doc_freq, int num_docs);
60
+ float (*coord)(Similarity *self, int overlap, int max_overlap);
61
+ float (*decode_norm)(Similarity *self, unsigned char b);
62
+ unsigned char (*encode_norm)(Similarity *self, float f);
63
+ void (*destroy)(Similarity *self);
64
+ };
65
+
66
+ #define sim_length_norm(msim, field, num_terms) msim->length_norm(msim, field, num_terms)
67
+ #define sim_query_norm(msim, sosw) msim->query_norm(msim, sosw)
68
+ #define sim_tf(msim, freq) msim->tf(msim, freq)
69
+ #define sim_sloppy_freq(msim, distance) msim->sloppy_freq(msim, distance)
70
+ #define sim_idf_term(msim, field, term, searcher)\
71
+ msim->idf_term(msim, field, term, searcher)
72
+ #define sim_idf_phrase(msim, field, positions, pos_cnt, searcher)\
73
+ msim->idf_phrase(msim, field, positions, pos_cnt, searcher)
74
+ #define sim_idf(msim, doc_freq, num_docs) msim->idf(msim, doc_freq, num_docs)
75
+ #define sim_coord(msim, overlap, max_overlap) msim->coord(msim, overlap, max_overlap)
76
+ #define sim_decode_norm(msim, b) msim->decode_norm(msim, b)
77
+ #define sim_encode_norm(msim, f) msim->encode_norm(msim, f)
78
+ #define sim_destroy(msim) msim->destroy(msim)
79
+
80
+ Similarity *sim_create_default();
81
+
82
+ #endif
@@ -0,0 +1,983 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+ #include "index.h"
4
+
5
+ /***************************************************************************
6
+ *
7
+ * SortField
8
+ *
9
+ ***************************************************************************/
10
+
11
+ unsigned long sort_field_hash(const void *p)
12
+ {
13
+ SortField *self = (SortField *)p;
14
+ return str_hash(self->field) ^ (self->type*37);
15
+ }
16
+
17
+ int sort_field_eq(const void *p1, const void *p2)
18
+ {
19
+ SortField *key1 = (SortField *)p1;
20
+ SortField *key2 = (SortField *)p2;
21
+ return (strcmp(key1->field, key2->field) == 0)
22
+ && key1->type == key2->type;
23
+ }
24
+
25
+ static int sort_field_cache_eq(const void *p1, const void *p2)
26
+ {
27
+ SortField *key1 = (SortField *)p1;
28
+ SortField *key2 = (SortField *)p2;
29
+ int equal = (strcmp(key1->field, key2->field) == 0)
30
+ && key1->type == key2->type;
31
+
32
+ return equal;
33
+ }
34
+
35
+ static SortField *sort_field_clone(SortField *self)
36
+ {
37
+ SortField *clone = ALLOC(SortField);
38
+ memcpy(clone, self, sizeof(SortField));
39
+ mutex_init(&clone->mutex, NULL);
40
+ clone->field = estrdup(self->field);
41
+ return clone;
42
+ }
43
+
44
+ static SortField *sort_field_alloc(char *field, int type, bool reverse)
45
+ {
46
+ SortField *self = ALLOC(SortField);
47
+ mutex_init(&self->mutex, NULL);
48
+ self->field = field ? estrdup(field) : NULL;
49
+ self->type = type;
50
+ self->reverse = reverse;
51
+ self->index = NULL;
52
+ self->destroy_index = &free;
53
+ self->compare = NULL;
54
+ return self;
55
+ }
56
+
57
+ SortField *sort_field_new(char *field, enum SORT_TYPE type, bool reverse)
58
+ {
59
+ SortField *sf = NULL;
60
+ switch (type) {
61
+ case SORT_TYPE_SCORE:
62
+ sf = sort_field_score_new(reverse);
63
+ break;
64
+ case SORT_TYPE_DOC:
65
+ sf = sort_field_doc_new(reverse);
66
+ break;
67
+ case SORT_TYPE_BYTE:
68
+ sf = sort_field_byte_new(field, reverse);
69
+ break;
70
+ case SORT_TYPE_INTEGER:
71
+ sf = sort_field_int_new(field, reverse);
72
+ break;
73
+ case SORT_TYPE_FLOAT:
74
+ sf = sort_field_float_new(field, reverse);
75
+ break;
76
+ case SORT_TYPE_STRING:
77
+ sf = sort_field_string_new(field, reverse);
78
+ break;
79
+ case SORT_TYPE_AUTO:
80
+ sf = sort_field_auto_new(field, reverse);
81
+ break;
82
+ }
83
+ return sf;
84
+ }
85
+
86
+ void sort_field_destroy(void *p)
87
+ {
88
+ SortField *self = (SortField *)p;
89
+ if (self->index) {
90
+ self->destroy_index(self->index);
91
+ }
92
+ free(self->field);
93
+ mutex_destroy(&self->mutex);
94
+ free(p);
95
+ }
96
+
97
+ /*
98
+ * field:<type>!
99
+ */
100
+ char *sort_field_to_s(SortField *self)
101
+ {
102
+ char *str;
103
+ char *type = NULL;
104
+ switch (self->type) {
105
+ case SORT_TYPE_SCORE:
106
+ type = "<SCORE>";
107
+ break;
108
+ case SORT_TYPE_DOC:
109
+ type = "<DOC>";
110
+ break;
111
+ case SORT_TYPE_BYTE:
112
+ type = "<byte>";
113
+ break;
114
+ case SORT_TYPE_INTEGER:
115
+ type = "<integer>";
116
+ break;
117
+ case SORT_TYPE_FLOAT:
118
+ type = "<float>";
119
+ break;
120
+ case SORT_TYPE_STRING:
121
+ type = "<string>";
122
+ break;
123
+ case SORT_TYPE_AUTO:
124
+ type = "<auto>";
125
+ break;
126
+ }
127
+ if (self->field) {
128
+ str = ALLOC_N(char, 10 + strlen(self->field) + strlen(type));
129
+ sprintf(str, "%s:%s%s", self->field, type, (self->reverse ? "!" : ""));
130
+ } else {
131
+ str = ALLOC_N(char, 10 + strlen(type));
132
+ sprintf(str, "%s%s", type, (self->reverse ? "!" : ""));
133
+ }
134
+ return str;
135
+ }
136
+
137
+ /***************************************************************************
138
+ * ScoreSortField
139
+ ***************************************************************************/
140
+
141
+ void sf_score_get_val(void *index, Hit *hit, Comparable *comparable)
142
+ {
143
+ (void)index;
144
+ comparable->val.f = hit->score;
145
+ }
146
+
147
+ int sf_score_compare(void *index_ptr, Hit *hit2, Hit *hit1)
148
+ {
149
+ float val1 = hit1->score;
150
+ float val2 = hit2->score;
151
+ (void)index_ptr;
152
+
153
+ if (val1 > val2) return 1;
154
+ else if (val1 < val2) return -1;
155
+ else return 0;
156
+ }
157
+
158
+ SortField *sort_field_score_new(bool reverse)
159
+ {
160
+ SortField *self = sort_field_alloc(NULL, SORT_TYPE_SCORE, reverse);
161
+ self->compare = &sf_score_compare;
162
+ self->get_val = &sf_score_get_val;
163
+ return self;
164
+ }
165
+
166
+ const SortField SORT_FIELD_SCORE = {
167
+ MUTEX_INITIALIZER,
168
+ NULL, /* field */
169
+ SORT_TYPE_SCORE, /* type */
170
+ false, /* reverse */
171
+ NULL, /* index */
172
+ &sf_score_compare, /* compare */
173
+ &sf_score_get_val, /* get_val */
174
+ NULL, /* create_index */
175
+ NULL, /* destroy_index */
176
+ NULL, /* handle_term */
177
+ };
178
+
179
+ const SortField SORT_FIELD_SCORE_REV = {
180
+ MUTEX_INITIALIZER,
181
+ NULL, /* field */
182
+ SORT_TYPE_SCORE, /* type */
183
+ true, /* reverse */
184
+ NULL, /* index */
185
+ &sf_score_compare, /* compare */
186
+ &sf_score_get_val, /* get_val */
187
+ NULL, /* create_index */
188
+ NULL, /* destroy_index */
189
+ NULL, /* handle_term */
190
+ };
191
+
192
+ /**************************************************************************
193
+ * DocSortField
194
+ ***************************************************************************/
195
+
196
+ void sf_doc_get_val(void *index, Hit *hit, Comparable *comparable)
197
+ {
198
+ (void)index;
199
+ comparable->val.i = hit->doc;
200
+ }
201
+
202
+ int sf_doc_compare(void *index_ptr, Hit *hit1, Hit *hit2)
203
+ {
204
+ int val1 = hit1->doc;
205
+ int val2 = hit2->doc;
206
+ (void)index_ptr;
207
+
208
+ if (val1 > val2) return 1;
209
+ else if (val1 < val2) return -1;
210
+ else return 0;
211
+ }
212
+
213
+ SortField *sort_field_doc_new(bool reverse)
214
+ {
215
+ SortField *self = sort_field_alloc(NULL, SORT_TYPE_DOC, reverse);
216
+ self->compare = &sf_doc_compare;
217
+ self->get_val = &sf_doc_get_val;
218
+ return self;
219
+ }
220
+
221
+ const SortField SORT_FIELD_DOC = {
222
+ MUTEX_INITIALIZER,
223
+ NULL, /* field */
224
+ SORT_TYPE_DOC, /* type */
225
+ false, /* reverse */
226
+ NULL, /* index */
227
+ &sf_doc_compare, /* compare */
228
+ &sf_doc_get_val, /* get_val */
229
+ NULL, /* create_index */
230
+ NULL, /* destroy_index */
231
+ NULL, /* handle_term */
232
+ };
233
+
234
+ const SortField SORT_FIELD_DOC_REV = {
235
+ MUTEX_INITIALIZER,
236
+ NULL, /* field */
237
+ SORT_TYPE_DOC, /* type */
238
+ true, /* reverse */
239
+ NULL, /* index */
240
+ &sf_doc_compare, /* compare */
241
+ &sf_doc_get_val, /* get_val */
242
+ NULL, /* create_index */
243
+ NULL, /* destroy_index */
244
+ NULL, /* handle_term */
245
+ };
246
+
247
+ /***************************************************************************
248
+ * ByteSortField
249
+ ***************************************************************************/
250
+
251
+ static void sf_byte_get_val(void *index, Hit *hit, Comparable *comparable)
252
+ {
253
+ comparable->val.i = ((int *)index)[hit->doc];
254
+ }
255
+
256
+ static int sf_byte_compare(void *index, Hit *hit1, Hit *hit2)
257
+ {
258
+ int val1 = ((int *)index)[hit1->doc];
259
+ int val2 = ((int *)index)[hit2->doc];
260
+ if (val1 > val2) return 1;
261
+ else if (val1 < val2) return -1;
262
+ else return 0;
263
+ }
264
+
265
+ static void *sf_byte_create_index(int size)
266
+ {
267
+ int *index = ALLOC_AND_ZERO_N(int, size + 1);
268
+ index[0]++;
269
+ return &index[1];
270
+ }
271
+
272
+ static void sf_byte_destroy_index(void *p)
273
+ {
274
+ int *index = (int *)p;
275
+ free(&index[-1]);
276
+ }
277
+
278
+ static void sf_byte_handle_term(void *index_ptr, TermDocEnum *tde, char *text)
279
+ {
280
+ int *index = (int *)index_ptr;
281
+ int val = index[-1]++;
282
+ (void)text;
283
+ while (tde->next(tde)) {
284
+ index[tde->doc_num(tde)] = val;
285
+ }
286
+ }
287
+
288
+ static void sort_field_byte_methods(SortField *self)
289
+ {
290
+ self->type = SORT_TYPE_BYTE;
291
+ self->compare = &sf_byte_compare;
292
+ self->get_val = &sf_byte_get_val;
293
+ self->create_index = &sf_byte_create_index;
294
+ self->destroy_index = &sf_byte_destroy_index;
295
+ self->handle_term = &sf_byte_handle_term;
296
+ }
297
+
298
+ SortField *sort_field_byte_new(char *field, bool reverse)
299
+ {
300
+ SortField *self = sort_field_alloc(field, SORT_TYPE_BYTE, reverse);
301
+ sort_field_byte_methods(self);
302
+ return self;
303
+ }
304
+
305
+ /***************************************************************************
306
+ * IntegerSortField
307
+ ***************************************************************************/
308
+
309
+ void sf_int_get_val(void *index, Hit *hit, Comparable *comparable)
310
+ {
311
+ comparable->val.i = ((int *)index)[hit->doc];
312
+ }
313
+
314
+ int sf_int_compare(void *index, Hit *hit1, Hit *hit2)
315
+ {
316
+ int val1 = ((int *)index)[hit1->doc];
317
+ int val2 = ((int *)index)[hit2->doc];
318
+ if (val1 > val2) return 1;
319
+ else if (val1 < val2) return -1;
320
+ else return 0;
321
+ }
322
+
323
+ void *sf_int_create_index(int size)
324
+ {
325
+ return ALLOC_AND_ZERO_N(int, size);
326
+ }
327
+
328
+ void sf_int_handle_term(void *index_ptr, TermDocEnum *tde, char *text)
329
+ {
330
+ int *index = (int *)index_ptr;
331
+ int val;
332
+ sscanf(text, "%d", &val);
333
+ while (tde->next(tde)) {
334
+ index[tde->doc_num(tde)] = val;
335
+ }
336
+ }
337
+
338
+ void sort_field_int_methods(SortField *self)
339
+ {
340
+ self->type = SORT_TYPE_INTEGER;
341
+ self->compare = &sf_int_compare;
342
+ self->get_val = &sf_int_get_val;
343
+ self->create_index = &sf_int_create_index;
344
+ self->handle_term = &sf_int_handle_term;
345
+ }
346
+
347
+ SortField *sort_field_int_new(char *field, bool reverse)
348
+ {
349
+ SortField *self = sort_field_alloc(field, SORT_TYPE_INTEGER, reverse);
350
+ sort_field_int_methods(self);
351
+ return self;
352
+ }
353
+
354
+ /***************************************************************************
355
+ * FloatSortField
356
+ ***************************************************************************/
357
+
358
+ void sf_float_get_val(void *index, Hit *hit, Comparable *comparable)
359
+ {
360
+ comparable->val.f = ((float *)index)[hit->doc];
361
+ }
362
+
363
+ int sf_float_compare(void *index, Hit *hit1, Hit *hit2)
364
+ {
365
+ float val1 = ((float *)index)[hit1->doc];
366
+ float val2 = ((float *)index)[hit2->doc];
367
+ if (val1 > val2) return 1;
368
+ else if (val1 < val2) return -1;
369
+ else return 0;
370
+ }
371
+
372
+ void *sf_float_create_index(int size)
373
+ {
374
+ return ALLOC_AND_ZERO_N(float, size);
375
+ }
376
+
377
+ void sf_float_handle_term(void *index_ptr, TermDocEnum *tde, char *text)
378
+ {
379
+ float *index = (float *)index_ptr;
380
+ float val;
381
+ sscanf(text, "%g", &val);
382
+ while (tde->next(tde)) {
383
+ index[tde->doc_num(tde)] = val;
384
+ }
385
+ }
386
+
387
+ void sort_field_float_methods(SortField *self)
388
+ {
389
+ self->type = SORT_TYPE_FLOAT;
390
+ self->compare = &sf_float_compare;
391
+ self->get_val = &sf_float_get_val;
392
+ self->create_index = &sf_float_create_index;
393
+ self->handle_term = &sf_float_handle_term;
394
+ }
395
+
396
+ SortField *sort_field_float_new(char *field, bool reverse)
397
+ {
398
+ SortField *self = sort_field_alloc(field, SORT_TYPE_FLOAT, reverse);
399
+ sort_field_float_methods(self);
400
+ return self;
401
+ }
402
+
403
+ /***************************************************************************
404
+ * StringSortField
405
+ ***************************************************************************/
406
+
407
+ #define VALUES_ARRAY_START_SIZE 8
408
+ typedef struct StringIndex {
409
+ int size;
410
+ int *index;
411
+ char **values;
412
+ int v_size;
413
+ int v_capa;
414
+ } StringIndex;
415
+
416
+ void sf_string_get_val(void *index, Hit *hit, Comparable *comparable)
417
+ {
418
+ comparable->val.s
419
+ = ((StringIndex *)index)->values[
420
+ ((StringIndex *)index)->index[hit->doc]];
421
+ }
422
+
423
+ int sf_string_compare(void *index, Hit *hit1, Hit *hit2)
424
+ {
425
+ char *s1 = ((StringIndex *)index)->values[
426
+ ((StringIndex *)index)->index[hit1->doc]];
427
+ char *s2 = ((StringIndex *)index)->values[
428
+ ((StringIndex *)index)->index[hit2->doc]];
429
+
430
+ if (s1 == NULL) return s2 ? 1 : 0;
431
+ if (s2 == NULL) return -1;
432
+
433
+ #ifdef POSH_OS_WIN32
434
+ return strcmp(s1, s2);
435
+ #else
436
+ return strcoll(s1, s2);
437
+ #endif
438
+
439
+ /*
440
+ * TODO: investigate whether it would be a good idea to presort strings.
441
+ *
442
+ int val1 = index->index[hit1->doc];
443
+ int val2 = index->index[hit2->doc];
444
+ if (val1 > val2) return 1;
445
+ else if (val1 < val2) return -1;
446
+ else return 0;
447
+ */
448
+ }
449
+
450
+ void *sf_string_create_index(int size)
451
+ {
452
+ StringIndex *self = ALLOC_AND_ZERO(StringIndex);
453
+ self->size = size;
454
+ self->index = ALLOC_AND_ZERO_N(int, size);
455
+ self->v_capa = VALUES_ARRAY_START_SIZE;
456
+ self->v_size = 1; /* leave the first value as NULL */
457
+ self->values = ALLOC_AND_ZERO_N(char *, VALUES_ARRAY_START_SIZE);
458
+ return self;
459
+ }
460
+
461
+ void sf_string_destroy_index(void *p)
462
+ {
463
+ StringIndex *self = (StringIndex *)p;
464
+ int i;
465
+ free(self->index);
466
+ for (i = 0; i < self->v_size; i++) {
467
+ free(self->values[i]);
468
+ }
469
+ free(self->values);
470
+ free(self);
471
+ }
472
+
473
+ void sf_string_handle_term(void *index_ptr, TermDocEnum *tde, char *text)
474
+ {
475
+ StringIndex *index = (StringIndex *)index_ptr;
476
+ if (index->v_size >= index->v_capa) {
477
+ index->v_capa *= 2;
478
+ index->values = REALLOC_N(index->values, char *, index->v_capa);
479
+ }
480
+ index->values[index->v_size] = estrdup(text);
481
+ while (tde->next(tde)) {
482
+ index->index[tde->doc_num(tde)] = index->v_size;
483
+ }
484
+ index->v_size++;
485
+ }
486
+
487
+ void sort_field_string_methods(SortField *self)
488
+ {
489
+ self->type = SORT_TYPE_STRING;
490
+ self->compare = &sf_string_compare;
491
+ self->get_val = &sf_string_get_val;
492
+ self->create_index = &sf_string_create_index;
493
+ self->destroy_index = &sf_string_destroy_index;
494
+ self->handle_term = &sf_string_handle_term;
495
+ }
496
+
497
+ SortField *sort_field_string_new(char *field, bool reverse)
498
+ {
499
+ SortField *self = sort_field_alloc(field, SORT_TYPE_STRING, reverse);
500
+ sort_field_string_methods(self);
501
+ return self;
502
+ }
503
+
504
+ /***************************************************************************
505
+ * AutoSortField
506
+ ***************************************************************************/
507
+
508
+ void sort_field_auto_evaluate(SortField *sf, char *text)
509
+ {
510
+ int int_val;
511
+ float float_val;
512
+ int text_len = 0, scan_len = 0;
513
+
514
+ text_len = (int)strlen(text);
515
+ sscanf(text, "%d%n", &int_val, &scan_len);
516
+ if (scan_len == text_len) {
517
+ sort_field_int_methods(sf);
518
+ } else {
519
+ sscanf(text, "%f%n", &float_val, &scan_len);
520
+ if (scan_len == text_len) {
521
+ sort_field_float_methods(sf);
522
+ } else {
523
+ sort_field_string_methods(sf);
524
+ }
525
+ }
526
+ }
527
+
528
+ SortField *sort_field_auto_new(char *field, bool reverse)
529
+ {
530
+ return sort_field_alloc(field, SORT_TYPE_AUTO, reverse);
531
+ }
532
+
533
+ /***************************************************************************
534
+ *
535
+ * FieldCache
536
+ *
537
+ ***************************************************************************/
538
+
539
+ void *field_cache_get_index(IndexReader *ir, SortField *sf)
540
+ {
541
+ void *volatile index = NULL;
542
+ int length = 0;
543
+ TermEnum *volatile te = NULL;
544
+ TermDocEnum *volatile tde = NULL;
545
+ SortField *sf_clone;
546
+ const int field_num = fis_get_field_num(ir->fis, sf->field);
547
+
548
+ if (field_num < 0) {
549
+ RAISE(ARG_ERROR,
550
+ "Cannot sort by field \"%s\". It doesn't exist in the index.",
551
+ sf->field);
552
+ }
553
+
554
+ mutex_lock(&sf->mutex);
555
+ if (!ir->sort_cache) {
556
+ ir->sort_cache = h_new(&sort_field_hash, &sort_field_cache_eq,
557
+ &sort_field_destroy, NULL);
558
+ }
559
+
560
+ if (sf->type == SORT_TYPE_AUTO) {
561
+ te = ir->terms(ir, field_num);
562
+ if (!te->next(te) && (ir->num_docs(ir) > 0)) {
563
+ RAISE(ARG_ERROR,
564
+ "Cannot sort by field \"%s\" as there are no terms "
565
+ "in that field in the index.", sf->field);
566
+ }
567
+ sort_field_auto_evaluate(sf, te->curr_term);
568
+ te->close(te);
569
+ }
570
+
571
+ index = h_get(ir->sort_cache, sf);
572
+
573
+ if (index == NULL) {
574
+ length = ir->max_doc(ir);
575
+ if (length > 0) {
576
+ TRY
577
+ tde = ir->term_docs(ir);
578
+ te = ir->terms(ir, field_num);
579
+ index = sf->create_index(length);
580
+ while (te->next(te)) {
581
+ tde->seek_te(tde, te);
582
+ sf->handle_term(index, tde, te->curr_term);
583
+ }
584
+ XFINALLY
585
+ tde->close(tde);
586
+ te->close(te);
587
+ XENDTRY
588
+ }
589
+ sf_clone = sort_field_clone(sf);
590
+ sf_clone->index = index;
591
+ h_set(ir->sort_cache, sf_clone, index);
592
+ }
593
+ mutex_unlock(&sf->mutex);
594
+ return index;
595
+ }
596
+
597
+ /***************************************************************************
598
+ *
599
+ * FieldSortedHitQueue
600
+ *
601
+ ***************************************************************************/
602
+
603
+ /***************************************************************************
604
+ * Comparator
605
+ ***************************************************************************/
606
+
607
+ typedef struct Comparator {
608
+ void *index;
609
+ bool reverse : 1;
610
+ int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2);
611
+ } Comparator;
612
+
613
+ Comparator *comparator_new(void *index, bool reverse,
614
+ int (*compare)(void *index_ptr, Hit *hit1, Hit *hit2))
615
+ {
616
+ Comparator *self = ALLOC(Comparator);
617
+ self->index = index;
618
+ self->reverse = reverse;
619
+ self->compare = compare;
620
+ return self;
621
+ }
622
+
623
+ /***************************************************************************
624
+ * Sorter
625
+ ***************************************************************************/
626
+
627
+ typedef struct Sorter {
628
+ Comparator **comparators;
629
+ int c_cnt;
630
+ Sort *sort;
631
+ } Sorter;
632
+
633
+ Comparator *sorter_get_comparator(SortField *sf, IndexReader *ir)
634
+ {
635
+ void *index = NULL;
636
+
637
+ if (sf->type > SORT_TYPE_DOC) {
638
+ index = field_cache_get_index(ir, sf);
639
+ }
640
+ return comparator_new(index, sf->reverse, sf->compare);
641
+ }
642
+
643
+ void sorter_destroy(Sorter *self)
644
+ {
645
+ int i;
646
+
647
+ for (i = 0; i < self->c_cnt; i++) {
648
+ free(self->comparators[i]);
649
+ }
650
+ free(self->comparators);
651
+ free(self);
652
+ }
653
+
654
+ Sorter *sorter_new(Sort *sort)
655
+ {
656
+ Sorter *self = ALLOC(Sorter);
657
+ self->c_cnt = sort->size;
658
+ self->comparators = ALLOC_AND_ZERO_N(Comparator *, self->c_cnt);
659
+ self->sort = sort;
660
+ return self;
661
+ }
662
+
663
+ /***************************************************************************
664
+ * FieldSortedHitQueue
665
+ ***************************************************************************/
666
+
667
+ bool fshq_less_than(const void *hit1, const void *hit2)
668
+ {
669
+ int cmp = 0;
670
+ printf("Whoops, shouldn't call this.\n");
671
+ if (cmp != 0) {
672
+ return cmp;
673
+ } else {
674
+ return ((Hit *)hit1)->score < ((Hit *)hit2)->score;
675
+ }
676
+ }
677
+
678
+ INLINE bool fshq_lt(Sorter *sorter, Hit *hit1, Hit *hit2)
679
+ {
680
+ Comparator *comp;
681
+ int diff = 0, i;
682
+ for (i = 0; i < sorter->c_cnt && diff == 0; i++) {
683
+ comp = sorter->comparators[i];
684
+ if (comp->reverse) {
685
+ diff = comp->compare(comp->index, hit2, hit1);
686
+ } else {
687
+ diff = comp->compare(comp->index, hit1, hit2);
688
+ }
689
+ }
690
+
691
+ if (diff != 0) {
692
+ return diff > 0;
693
+ } else {
694
+ return hit1->doc > hit2->doc;
695
+ }
696
+ }
697
+
698
+ void fshq_pq_down(PriorityQueue *pq)
699
+ {
700
+ register int i = 1;
701
+ register int j = 2; /* i << 1; */
702
+ register int k = 3; /* j + 1; */
703
+ Hit **heap = (Hit **)pq->heap;
704
+ Hit *node = heap[i]; /* save top node */
705
+ Sorter *sorter = (Sorter *)heap[0];
706
+
707
+ if ((k <= pq->size) && fshq_lt(sorter, heap[k], heap[j])) {
708
+ j = k;
709
+ }
710
+
711
+ while ((j <= pq->size) && fshq_lt(sorter, heap[j], node)) {
712
+ heap[i] = heap[j]; /* shift up child */
713
+ i = j;
714
+ j = i << 1;
715
+ k = j + 1;
716
+ if ((k <= pq->size) && fshq_lt(sorter, heap[k], heap[j])) {
717
+ j = k;
718
+ }
719
+ }
720
+ heap[i] = node;
721
+ }
722
+
723
+ Hit *fshq_pq_pop(PriorityQueue *pq)
724
+ {
725
+ if (pq->size > 0) {
726
+ Hit *hit = (Hit *)pq->heap[1]; /* save first value */
727
+ pq->heap[1] = pq->heap[pq->size]; /* move last to first */
728
+ pq->heap[pq->size] = NULL;
729
+ pq->size--;
730
+ fshq_pq_down(pq); /* adjust heap */
731
+ return hit;
732
+ } else {
733
+ return NULL;
734
+ }
735
+ }
736
+
737
+ INLINE void fshq_pq_up(PriorityQueue *pq)
738
+ {
739
+ Hit **heap = (Hit **)pq->heap;
740
+ Hit *node;
741
+ int i = pq->size;
742
+ int j = i >> 1;
743
+ Sorter *sorter = (Sorter *)heap[0];
744
+ node = heap[i];
745
+
746
+ while ((j > 0) && fshq_lt(sorter, node, heap[j])) {
747
+ heap[i] = heap[j];
748
+ i = j;
749
+ j = j >> 1;
750
+ }
751
+ heap[i] = node;
752
+ }
753
+
754
+ void fshq_pq_insert(PriorityQueue *pq, Hit *hit)
755
+ {
756
+ if (pq->size < pq->capa) {
757
+ Hit *new_hit = ALLOC(Hit);
758
+ memcpy(new_hit, hit, sizeof(Hit));
759
+ pq->size++;
760
+ if (pq->size >= pq->mem_capa) {
761
+ pq->mem_capa <<= 1;
762
+ REALLOC_N(pq->heap, void *, pq->mem_capa);
763
+ }
764
+ pq->heap[pq->size] = new_hit;
765
+ fshq_pq_up(pq);
766
+ } else if (pq->size > 0
767
+ && fshq_lt((Sorter *)pq->heap[0], (Hit *)pq->heap[1], hit)) {
768
+ memcpy(pq->heap[1], hit, sizeof(Hit));
769
+ fshq_pq_down(pq);
770
+ }
771
+ }
772
+
773
+ void fshq_pq_destroy(PriorityQueue *self)
774
+ {
775
+ sorter_destroy(self->heap[0]);
776
+ pq_destroy(self);
777
+ }
778
+
779
+ PriorityQueue *fshq_pq_new(int size, Sort *sort, IndexReader *ir)
780
+ {
781
+ PriorityQueue *self = pq_new(size, &fshq_less_than, &free);
782
+ int i;
783
+ Sorter *sorter = sorter_new(sort);
784
+ SortField *sf;
785
+
786
+ for (i = 0; i < sort->size; i++) {
787
+ sf = sort->sort_fields[i];
788
+ sorter->comparators[i] = sorter_get_comparator(sf, ir);
789
+ }
790
+ self->heap[0] = sorter;
791
+
792
+ return self;
793
+ }
794
+
795
+ Hit *fshq_pq_pop_fd(PriorityQueue *pq)
796
+ {
797
+ if (pq->size <= 0) {
798
+ return NULL;
799
+ }
800
+ else {
801
+ int j;
802
+ Sorter *sorter = (Sorter *)pq->heap[0];
803
+ const int cmp_cnt = sorter->c_cnt;
804
+ SortField **sort_fields = sorter->sort->sort_fields;
805
+ Hit *hit = (Hit *)pq->heap[1]; /* save first value */
806
+ FieldDoc *field_doc;
807
+ Comparable *comparables;
808
+ Comparator **comparators = sorter->comparators;
809
+ pq->heap[1] = pq->heap[pq->size]; /* move last to first */
810
+ pq->heap[pq->size] = NULL;
811
+ pq->size--;
812
+ fshq_pq_down(pq); /* adjust heap */
813
+
814
+ field_doc = (FieldDoc *)emalloc(sizeof(FieldDoc)
815
+ + sizeof(Comparable)*cmp_cnt);
816
+ comparables = field_doc->comparables;
817
+ memcpy(field_doc, hit, sizeof(Hit));
818
+ field_doc->size = cmp_cnt;
819
+
820
+ for (j = 0; j < cmp_cnt; j++) {
821
+ SortField *sf = sort_fields[j];
822
+ Comparator *comparator = comparators[j];
823
+ sf->get_val(comparator->index, hit, &(comparables[j]));
824
+ comparables[j].type = sf->type;
825
+ comparables[j].reverse = comparator->reverse;
826
+ }
827
+ free(hit);
828
+ return (Hit *)field_doc;
829
+ }
830
+ }
831
+
832
+ /***************************************************************************
833
+ * FieldDoc
834
+ ***************************************************************************/
835
+
836
+ void fd_destroy(FieldDoc *fd)
837
+ {
838
+ free(fd);
839
+ }
840
+
841
+ /***************************************************************************
842
+ * FieldDocSortedHitQueue
843
+ ***************************************************************************/
844
+
845
+ bool fdshq_lt(FieldDoc *fd1, FieldDoc *fd2)
846
+ {
847
+ int c = 0, i;
848
+ Comparable *cmps1 = fd1->comparables;
849
+ Comparable *cmps2 = fd2->comparables;
850
+
851
+ for (i = 0; i < fd1->size && c == 0; i++) {
852
+ int type = cmps1[i].type;
853
+ switch (type) {
854
+ case SORT_TYPE_SCORE:
855
+ if (cmps1[i].val.f < cmps2[i].val.f) c = 1;
856
+ if (cmps1[i].val.f > cmps2[i].val.f) c = -1;
857
+ break;
858
+ case SORT_TYPE_FLOAT:
859
+ if (cmps1[i].val.f > cmps2[i].val.f) c = 1;
860
+ if (cmps1[i].val.f < cmps2[i].val.f) c = -1;
861
+ break;
862
+ case SORT_TYPE_DOC:
863
+ if (fd1->hit.doc > fd2->hit.doc) c = 1;
864
+ if (fd1->hit.doc < fd2->hit.doc) c = -1;
865
+ break;
866
+ case SORT_TYPE_INTEGER:
867
+ if (cmps1[i].val.i > cmps2[i].val.i) c = 1;
868
+ if (cmps1[i].val.i < cmps2[i].val.i) c = -1;
869
+ break;
870
+ case SORT_TYPE_BYTE:
871
+ if (cmps1[i].val.i > cmps2[i].val.i) c = 1;
872
+ if (cmps1[i].val.i < cmps2[i].val.i) c = -1;
873
+ break;
874
+ case SORT_TYPE_STRING:
875
+ do {
876
+ char *s1 = cmps1[i].val.s;
877
+ char *s2 = cmps2[i].val.s;
878
+ if (s1 == NULL) c = s2 ? 1 : 0;
879
+ else if (s2 == NULL) c = -1;
880
+ #ifdef POSH_OS_WIN32
881
+ else c = strcmp(s1, s2);
882
+ #else
883
+ else c = strcoll(s1, s2);
884
+ #endif
885
+ } while (0);
886
+ break;
887
+ default:
888
+ RAISE(ARG_ERROR, "Unknown sort type: %d.", type);
889
+ break;
890
+ }
891
+ if (cmps1[i].reverse) {
892
+ c = -c;
893
+ }
894
+ }
895
+ if (c == 0) {
896
+ return fd1->hit.doc > fd2->hit.doc;
897
+ }
898
+ else {
899
+ return c > 0;
900
+ }
901
+ }
902
+
903
+ /***************************************************************************
904
+ *
905
+ * Sort
906
+ *
907
+ ***************************************************************************/
908
+
909
+ #define SORT_INIT_SIZE 4
910
+
911
+ Sort *sort_new()
912
+ {
913
+ Sort *self = ALLOC(Sort);
914
+ self->size = 0;
915
+ self->capa = SORT_INIT_SIZE;
916
+ self->sort_fields = ALLOC_N(SortField *, SORT_INIT_SIZE);
917
+ self->destroy_all = true;
918
+ self->start = 0;
919
+
920
+ return self;
921
+ }
922
+
923
+ void sort_clear(Sort *self)
924
+ {
925
+ int i;
926
+ if (self->destroy_all) {
927
+ for (i = 0; i < self->size; i++) {
928
+ sort_field_destroy(self->sort_fields[i]);
929
+ }
930
+ }
931
+ self->size = 0;
932
+ }
933
+
934
+ void sort_destroy(void *p)
935
+ {
936
+ Sort *self = (Sort *)p;
937
+ sort_clear(self);
938
+ free(self->sort_fields);
939
+ free(self);
940
+ }
941
+
942
+ void sort_add_sort_field(Sort *self, SortField *sf)
943
+ {
944
+ if (self->size == self->capa) {
945
+ self->capa <<= 1;
946
+ REALLOC_N(self->sort_fields, SortField *, self->capa);
947
+ }
948
+
949
+ self->sort_fields[self->size] = sf;
950
+ self->size++;
951
+ }
952
+
953
+ char *sort_to_s(Sort *self)
954
+ {
955
+ int i, len = 20;
956
+ char *s;
957
+ char *str;
958
+ char **sf_strs = ALLOC_N(char *, self->size);
959
+
960
+ for (i = 0; i < self->size; i++) {
961
+ sf_strs[i] = s = sort_field_to_s(self->sort_fields[i]);
962
+ len += (int)strlen(s) + 2;
963
+ }
964
+
965
+ str = ALLOC_N(char, len);
966
+ s = "Sort[";
967
+ len = (int)strlen(s);
968
+ memcpy(str, s, len);
969
+
970
+ s = str + len;
971
+ for (i = 0; i < self->size; i++) {
972
+ sprintf(s, "%s, ", sf_strs[i]);
973
+ s += (int)strlen(s);
974
+ free(sf_strs[i]);
975
+ }
976
+ free(sf_strs);
977
+
978
+ if (self->size > 0) {
979
+ s -= 2;
980
+ }
981
+ sprintf(s, "]");
982
+ return str;
983
+ }