ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/extconf.rb CHANGED
@@ -1,14 +1,7 @@
1
1
  # extconf.rb for Ferret extensions
2
2
  if (/mswin/ =~ RUBY_PLATFORM) and ENV['make'].nil?
3
- File.open('Makefile', "w") {}
4
- begin
5
- `nmake`
6
- require 'mkmf'
7
- create_makefile("ferret_ext")
8
- rescue => error
9
- require 'fileutils'
10
- FileUtils.copy('dummy.exe', 'nmake.exe')
11
- end
3
+ require 'mkmf'
4
+ create_makefile("ferret_ext")
12
5
  else
13
6
  require 'mkmf'
14
7
  $CFLAGS += " -fno-common"
data/ext/ferret.c CHANGED
@@ -1,53 +1,57 @@
1
+ #include <errno.h>
1
2
  #include "ferret.h"
2
3
  #include "except.h"
3
4
  #include "hash.h"
4
5
  #include "hashset.h"
6
+ #include "threading.h"
5
7
 
6
8
  /* Object Map */
7
- static HshTable *object_map;
9
+ static HashTable *object_map;
8
10
 
9
11
  /* IDs */
10
12
  ID id_new;
13
+ ID id_capacity;
14
+ ID id_less_than;
15
+ ID id_lt;
11
16
  ID id_call;
12
17
  ID id_is_directory;
18
+ ID id_data;
13
19
 
14
20
  static ID id_mkdir_p;
15
21
 
22
+ /* Symbols */
23
+ VALUE sym_yes;
24
+ VALUE sym_no;
25
+ VALUE sym_true;
26
+ VALUE sym_false;
27
+ VALUE sym_path;
28
+ VALUE sym_dir;
29
+
16
30
  /* Modules */
17
31
  VALUE mFerret;
18
- VALUE mAnalysis;
19
- VALUE mDocument;
20
- VALUE mIndex;
21
- VALUE mSearch;
22
32
  VALUE mStore;
23
33
  VALUE mStringHelper;
24
- VALUE mUtils;
25
34
  VALUE mSpans;
26
35
 
27
36
  /* Classes */
28
37
  /*
29
38
  */
30
39
 
31
- xcontext_t *xtop_context = NULL;
32
-
33
- unsigned int
34
- value_hash(const void *key)
40
+ unsigned long value_hash(const void *key)
35
41
  {
36
- return (unsigned int)key;
42
+ return (unsigned long)key;
37
43
  }
38
44
 
39
- int
40
- value_eq(const void *key1, const void *key2)
45
+ int value_eq(const void *key1, const void *key2)
41
46
  {
42
- return key1 == key2;
47
+ return key1 == key2;
43
48
  }
44
49
 
45
- VALUE
46
- object_get(void *key)
50
+ VALUE object_get(void *key)
47
51
  {
48
- VALUE val = (VALUE)h_get(object_map, key);
49
- if (!val) val = Qnil;
50
- return val;
52
+ VALUE val = (VALUE)h_get(object_map, key);
53
+ if (!val) val = Qnil;
54
+ return val;
51
55
  }
52
56
 
53
57
  //static int hash_cnt = 0;
@@ -55,133 +59,236 @@ void
55
59
  //object_add(void *key, VALUE obj)
56
60
  object_add2(void *key, VALUE obj, const char *file, int line)
57
61
  {
58
- if (h_get(object_map, key))
59
- printf("failed adding %d. %s:%d\n", (int)key, file, line);
60
- //printf("adding %d. now contains %d %s:%d\n", (int)key, ++hash_cnt, file, line);
61
- h_set(object_map, key, (void *)obj);
62
+ if (h_get(object_map, key))
63
+ printf("failed adding %d. %s:%d\n", (int)key, file, line);
64
+ //printf("adding %d. now contains %d %s:%d\n", (int)key, ++hash_cnt, file, line);
65
+ h_set(object_map, key, (void *)obj);
62
66
  }
63
67
 
64
68
  void
65
69
  //object_set(void *key, VALUE obj)
66
70
  object_set2(void *key, VALUE obj, const char *file, int line)
67
71
  {
68
- //if (!h_get(object_map, key))
69
- //printf("seting %d. now contains %d %s:%d\n", (int)key, ++hash_cnt, file, line);
70
- h_set(object_map, key, (void *)obj);
72
+ //if (!h_get(object_map, key))
73
+ //printf("adding %d. now contains %d %s:%d\n", (int)key, ++hash_cnt, file, line);
74
+ h_set(object_map, key, (void *)obj);
71
75
  }
72
76
 
73
77
  void
74
78
  //object_del(void *key)
75
79
  object_del2(void *key, const char *file, int line)
76
80
  {
77
- if (object_get(key) == Qnil)
78
- printf("failed deleting %d. %s:%d\n", (int)key, file, line);
79
- //printf("deleting %d. now contains %d, %s:%d\n", (int)key, --hash_cnt, file, line);
80
- h_del(object_map, key);
81
+ if (object_get(key) == Qnil)
82
+ printf("failed deleting %d. %s:%d\n", (int)key, file, line);
83
+ //printf("deleting %d. now contains %d, %s:%d\n", (int)key, --hash_cnt, file, line);
84
+ h_del(object_map, key);
81
85
  }
82
86
 
83
- void
84
- frt_gc_mark(void *key)
87
+ void frt_gc_mark(void *key)
85
88
  {
86
- VALUE val = (VALUE)h_get(object_map, key);
87
- if (val)
88
- rb_gc_mark(val);
89
+ VALUE val = (VALUE)h_get(object_map, key);
90
+ if (val)
91
+ rb_gc_mark(val);
89
92
  }
90
93
 
91
- VALUE
92
- frt_data_alloc(VALUE klass)
94
+ VALUE frt_data_alloc(VALUE klass)
93
95
  {
94
- return Frt_Make_Struct(klass);
96
+ return Frt_Make_Struct(klass);
95
97
  }
96
98
 
97
- void
98
- frt_deref_free(void *p)
99
+ void frt_deref_free(void *p)
99
100
  {
100
- object_del(p);
101
+ object_del(p);
101
102
  }
102
103
 
103
- void
104
- frt_thread_once(int *once_control, void (*init_routine) (void))
104
+ void frt_thread_once(int *once_control, void (*init_routine) (void))
105
105
  {
106
- if (*once_control) {
107
- init_routine();
108
- *once_control = 0;
109
- }
106
+ if (*once_control) {
107
+ init_routine();
108
+ *once_control = 0;
109
+ }
110
110
  }
111
111
 
112
- void
113
- frt_thread_key_create(thread_key_t *key, void (*destr_function) (void *))
112
+ void frt_thread_key_create(thread_key_t *key, void (*destr_function)(void *))
114
113
  {
115
- *key = h_new(&value_hash, &value_eq, NULL, destr_function);
114
+ *key = h_new(&value_hash, &value_eq, NULL, destr_function);
116
115
  }
117
116
 
118
- void
119
- frt_thread_key_delete(thread_key_t key)
117
+ void frt_thread_key_delete(thread_key_t key)
120
118
  {
121
- h_destroy(key);
119
+ h_destroy(key);
122
120
  }
123
121
 
124
- void
125
- frt_thread_setspecific(thread_key_t key, const void *pointer)
122
+ void frt_thread_setspecific(thread_key_t key, const void *pointer)
126
123
  {
127
- h_set(key, (void *)rb_thread_current(), (void *)pointer);
124
+ h_set(key, (void *)rb_thread_current(), (void *)pointer);
128
125
  }
129
126
 
130
- void *
131
- frt_thread_getspecific(thread_key_t key)
127
+ void *frt_thread_getspecific(thread_key_t key)
132
128
  {
133
- return h_get(key, (void *)rb_thread_current());
129
+ return h_get(key, (void *)rb_thread_current());
134
130
  }
135
131
 
136
- void
137
- frt_create_dir(VALUE rpath)
132
+ void frt_create_dir(VALUE rpath)
138
133
  {
139
- VALUE mFileUtils;
140
- rb_require("fileutils");
141
- mFileUtils = rb_define_module("FileUtils");
142
- rb_funcall(mFileUtils, id_mkdir_p, 1, rpath);
134
+ VALUE mFileUtils;
135
+ rb_require("fileutils");
136
+ mFileUtils = rb_define_module("FileUtils");
137
+ rb_funcall(mFileUtils, id_mkdir_p, 1, rpath);
143
138
  }
144
139
 
145
- VALUE
146
- frt_hs_to_rb_ary(HashSet *hs)
140
+ VALUE frt_hs_to_rb_ary(HashSet *hs)
147
141
  {
148
- int i;
149
- VALUE ary = rb_ary_new();
150
- for (i = 0; i < hs->size; i++) {
151
- rb_ary_push(ary, rb_str_new2(hs->elems[i]));
152
- }
153
- return ary;
142
+ int i;
143
+ VALUE ary = rb_ary_new();
144
+ for (i = 0; i < hs->size; i++) {
145
+ rb_ary_push(ary, rb_str_new2(hs->elems[i]));
146
+ }
147
+ return ary;
154
148
  }
155
149
 
156
- void
157
- Init_ferret_ext(void)
158
- {
159
- /* initialize object map */
160
- object_map = h_new(&value_hash, &value_eq, NULL, NULL);
161
-
162
- /* IDs */
163
- id_new = rb_intern("new");
164
- id_call = rb_intern("call");
165
-
166
- id_mkdir_p = rb_intern("mkdir_p");
167
- id_is_directory = rb_intern("directory?");
168
-
169
- /* Modules */
170
- mFerret = rb_define_module("Ferret");
171
- mAnalysis = rb_define_module_under(mFerret, "Analysis");
172
- mDocument = rb_define_module_under(mFerret, "Document");
173
- mIndex = rb_define_module_under(mFerret, "Index");
174
- mSearch = rb_define_module_under(mFerret, "Search");
175
- mStore = rb_define_module_under(mFerret, "Store");
176
- mUtils = rb_define_module_under(mFerret, "Utils");
177
- mSpans = rb_define_module_under(mSearch, "Spans");
178
-
179
- /* Inits */
180
- Init_term();
181
- Init_analysis();
182
- Init_doc();
183
- Init_dir();
184
- Init_index_io();
185
- Init_search();
186
- Init_qparser();
150
+ void *frt_rb_data_ptr(VALUE val)
151
+ {
152
+ Check_Type(val, T_DATA);
153
+ return DATA_PTR(val);
154
+ }
155
+
156
+ char *
157
+ frt_field(VALUE rfield)
158
+ {
159
+ switch (TYPE(rfield)) {
160
+ case T_SYMBOL:
161
+ return rb_id2name(SYM2ID(rfield));
162
+ case T_STRING:
163
+ return RSTRING(rfield)->ptr;
164
+ default:
165
+ rb_raise(rb_eArgError, "field name must be a symbol");
166
+ }
167
+ return NULL;
168
+ }
169
+
170
+ static VALUE error_map;
171
+
172
+ VALUE frt_get_error(const char *err_type)
173
+ {
174
+ VALUE error_class;
175
+ if (Qnil != (error_class = rb_hash_aref(error_map, rb_intern(err_type)))) {
176
+ return error_class;
177
+ }
178
+ return rb_eStandardError;
179
+ }
180
+
181
+ #define FRT_BUF_SIZ 2046
182
+ #ifdef FRT_HAS_VARARGS
183
+ void vfrt_rb_raise(const char *file, int line_num, const char *func,
184
+ const char *err_type, const char *fmt, va_list args)
185
+ #endif
186
+ {
187
+ char buf[FRT_BUF_SIZ];
188
+ size_t so_far = 0;
189
+ #ifdef FRT_HAS_VARARGS
190
+ snprintf(buf, FRT_BUF_SIZ, "%s occured at <%s>:%d in %s\n",
191
+ err_type, file, line_num, func);
192
+ #else
193
+ snprintf(buf, FRT_BUF_SIZ, "%s occured:\n", err_type);
194
+ #endif
195
+ so_far = strlen(buf);
196
+ vsnprintf(buf + so_far, FRT_BUF_SIZ - so_far, fmt, args);
197
+
198
+ so_far = strlen(buf);
199
+ if (fmt[0] != '\0' && fmt[strlen(fmt) - 1] == ':') {
200
+ snprintf(buf + so_far, FRT_BUF_SIZ - so_far, " %s", strerror(errno));
201
+ so_far = strlen(buf);
202
+ }
203
+
204
+ snprintf(buf + so_far, FRT_BUF_SIZ - so_far, "\n");
205
+ rb_raise(frt_get_error(err_type), buf);
206
+ }
207
+
208
+ #ifdef FRT_HAS_VARARGS
209
+ void frt_rb_raise(const char *file, int line_num, const char *func,
210
+ const char *err_type, const char *fmt, ...)
211
+ #else
212
+ void FRT_EXIT(const char *err_type, const char *fmt, ...)
213
+ #endif
214
+ {
215
+ va_list args;
216
+ va_start(args, fmt);
217
+ #ifdef FRT_HAS_VARARGS
218
+ vfrt_rb_raise(file, line_num, func, err_type, fmt, args);
219
+ #else
220
+ V_FRT_EXIT(err_type, fmt, args);
221
+ #endif
222
+ va_end(args);
223
+ }
224
+
225
+ /*
226
+ * Document-module: Ferret
227
+ *
228
+ * See the README
229
+ */
230
+ void Init_Ferret(void)
231
+ {
232
+ mFerret = rb_define_module("Ferret");
233
+ }
234
+
235
+ void Init_ferret_ext(void)
236
+ {
237
+ VALUE cParseError;
238
+ VALUE cStateError;
239
+
240
+ /* initialize object map */
241
+ object_map = h_new(&value_hash, &value_eq, NULL, NULL);
242
+
243
+ /* IDs */
244
+ id_new = rb_intern("new");
245
+ id_call = rb_intern("call");
246
+
247
+ id_capacity = rb_intern("capacity");
248
+ id_less_than = rb_intern("less_than");
249
+ id_lt = rb_intern("<");
250
+
251
+ id_mkdir_p = rb_intern("mkdir_p");
252
+ id_is_directory = rb_intern("directory?");
253
+
254
+ id_data = rb_intern("@data");
255
+
256
+ /* Symbols */
257
+ sym_yes = ID2SYM(rb_intern("yes"));;
258
+ sym_no = ID2SYM(rb_intern("no"));;
259
+ sym_true = ID2SYM(rb_intern("true"));;
260
+ sym_false = ID2SYM(rb_intern("false"));;
261
+ sym_path = ID2SYM(rb_intern("path"));;
262
+ sym_dir = ID2SYM(rb_intern("dir"));;
263
+
264
+ /* Inits */
265
+ Init_Ferret();
266
+ Init_Utils();
267
+ Init_Analysis();
268
+ Init_Store();
269
+ Init_Index();
270
+ Init_Search();
271
+ Init_QueryParser();
272
+
273
+ /* Error Classes */
274
+ cParseError =
275
+ rb_define_class_under(mFerret, "ParseError", rb_eStandardError);
276
+ cStateError =
277
+ rb_define_class_under(mFerret, "StateError", rb_eStandardError);
278
+
279
+ error_map = rb_hash_new();
280
+ rb_hash_aset(error_map, rb_intern("Exception"), rb_eStandardError);
281
+ rb_hash_aset(error_map, rb_intern("IO Error"), rb_eIOError);
282
+ rb_hash_aset(error_map, rb_intern("Argument Error"), rb_eArgError);
283
+ rb_hash_aset(error_map, rb_intern("End-of-File Error"), rb_eEOFError);
284
+ rb_hash_aset(error_map, rb_intern("Unsupported Function Error"),
285
+ rb_eNotImpError);
286
+ rb_hash_aset(error_map, rb_intern("State Error"), cStateError);
287
+ rb_hash_aset(error_map, rb_intern("ParseError"), cParseError);
288
+ rb_hash_aset(error_map, rb_intern("Memory Error"), rb_eNoMemError);
289
+ rb_hash_aset(error_map, rb_intern("Index Error"), rb_eIndexError);
290
+ rb_hash_aset(error_map, rb_intern("Lock Error"), cLockError);
291
+
292
+ rb_define_const(mFerret, "EXCEPTION_MAP", error_map);
293
+ rb_define_const(mFerret, "FIX_INT_MAX", INT2FIX(INT_MAX >> 1));
187
294
  }
data/ext/ferret.h CHANGED
@@ -7,31 +7,41 @@
7
7
 
8
8
  /* IDs */
9
9
  extern ID id_new;
10
+ extern ID id_capacity;
11
+ extern ID id_less_than;
12
+ extern ID id_lt;
10
13
  extern ID id_call;
11
14
  extern ID id_is_directory;
15
+ extern ID id_data;
16
+
17
+ /* Symbols */
18
+ extern VALUE sym_yes;
19
+ extern VALUE sym_no;
20
+ extern VALUE sym_true;
21
+ extern VALUE sym_false;
22
+ extern VALUE sym_path;
23
+ extern VALUE sym_dir;
12
24
 
13
25
  /* Modules */
14
26
  extern VALUE mFerret;
15
- extern VALUE mAnalysis;
16
- extern VALUE mDocument;
17
27
  extern VALUE mIndex;
18
28
  extern VALUE mSearch;
19
29
  extern VALUE mStore;
20
30
  extern VALUE mStringHelper;
21
- extern VALUE mUtils;
22
31
  extern VALUE mSpans;
23
32
 
24
33
  /* Classes */
25
34
  extern VALUE cDirectory;
35
+ extern VALUE cLockError;
26
36
 
27
37
  /* Ferret Inits */
28
- extern void Init_term();
29
- extern void Init_dir();
30
- extern void Init_analysis();
31
- extern void Init_doc();
32
- extern void Init_index_io();
33
- extern void Init_search();
34
- extern void Init_qparser();
38
+ extern void Init_Utils();
39
+ extern void Init_Analysis();
40
+ extern void Init_Store();
41
+ extern void Init_Index();
42
+ extern void Init_Search();
43
+ extern void Init_QueryParser();
44
+
35
45
  //extern void object_add(void *key, VALUE obj);
36
46
  #define object_add(key, obj) object_add2(key, obj, __FILE__, __LINE__)
37
47
  extern void object_add2(void *key, VALUE obj, const char *file, int line);
@@ -44,10 +54,11 @@ extern void object_del2(void *key, const char *file, int line);
44
54
  extern void frt_gc_mark(void *key);
45
55
  extern VALUE object_get(void *key);
46
56
  extern VALUE frt_data_alloc(VALUE klass);
47
- extern VALUE frt_get_doc(Document *doc);
48
57
  extern void frt_deref_free(void *p);
49
58
  extern void frt_create_dir(VALUE rpath);
50
59
  extern VALUE frt_hs_to_rb_ary(HashSet *hs);
60
+ extern void *frt_rb_data_ptr(VALUE val);
61
+ extern char * frt_field(VALUE rfield);
51
62
 
52
63
  #define Frt_Make_Struct(klass)\
53
64
  rb_data_object_alloc(klass,NULL,(RUBY_DATA_FUNC)NULL,(RUBY_DATA_FUNC)NULL)