ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/r_store.c CHANGED
@@ -2,6 +2,7 @@
2
2
  #include "store.h"
3
3
 
4
4
  VALUE cLock;
5
+ VALUE cLockError;
5
6
  VALUE cDirectory;
6
7
  VALUE cRAMDirectory;
7
8
  VALUE cFSDirectory;
@@ -15,69 +16,120 @@ VALUE cFSDirectory;
15
16
  void
16
17
  frt_lock_free(void *p)
17
18
  {
18
- Lock *lock = (Lock *)p;
19
- if (RTEST(object_get(lock->store))) {
20
- lock->store->close_lock(lock);
21
- } else {
22
- free(lock->name);
23
- free(lock);
24
- }
19
+ Lock *lock = (Lock *)p;
20
+ if (object_get(lock->store) != Qnil) {
21
+ lock->store->close_lock(lock);
22
+ } else {
23
+ free(lock->name);
24
+ free(lock);
25
+ }
25
26
  }
26
27
 
27
28
  void
28
29
  frt_lock_mark(void *p)
29
30
  {
30
- Lock *lock = (Lock *)p;
31
- rb_gc_mark(object_get(lock->store));
31
+ Lock *lock = (Lock *)p;
32
+ frt_gc_mark(lock->store);
32
33
  }
33
34
 
34
- #define GET_LOCK Lock *lock; Data_Get_Struct(self, Lock, lock);
35
+ #define GET_LOCK(lock, self) Data_Get_Struct(self, Lock, lock)
36
+
37
+ /*
38
+ * call-seq:
39
+ * lock.obtain(timeout = 1) -> bool
40
+ *
41
+ * Obtain a lock. Returns true if lock was successfully obtained. Make sure
42
+ * the lock is released using Lock#release. Otherwise you'll be left with a
43
+ * stale lock file.
44
+ *
45
+ * The timeout defaults to 1 second and 5 attempts are made to obtain the
46
+ * lock. If you're doing large batch updates on the index with multiple
47
+ * processes you may need to increase the lock timeout but 1 second will be
48
+ * substantial in most cases.
49
+ *
50
+ * timeout:: seconds to wait to obtain lock before timing out and returning
51
+ * false
52
+ * return:: true if lock was successfully obtained. Raises a
53
+ * Lock::LockError otherwise.
54
+ */
35
55
  static VALUE
36
56
  frt_lock_obtain(int argc, VALUE *argv, VALUE self)
37
57
  {
38
- VALUE rtimeout;
39
- int timeout = 1;
40
- GET_LOCK;
41
- if (rb_scan_args(argc, argv, "01", &rtimeout) > 0) {
42
- timeout = FIX2INT(rtimeout);
43
- }
44
- /* TODO: use the lock timeout */
45
- if (!lock->obtain(lock)) {
46
- rb_raise(rb_eStandardError, "could not obtain lock: #%s", lock->name);
47
- }
48
- return Qtrue;
58
+ VALUE rtimeout;
59
+ int timeout = 1;
60
+ Lock *lock;
61
+ GET_LOCK(lock, self);
62
+
63
+ if (rb_scan_args(argc, argv, "01", &rtimeout) > 0) {
64
+ timeout = FIX2INT(rtimeout);
65
+ }
66
+ /* TODO: use the lock timeout */
67
+ if (!lock->obtain(lock)) {
68
+ rb_raise(cLockError, "could not obtain lock: #%s", lock->name);
69
+ }
70
+ return Qtrue;
49
71
  }
50
72
 
73
+ /*
74
+ * call-seq:
75
+ * lock.while_locked(timeout = 1) { do_something() } -> bool
76
+ *
77
+ * Run the code in a block while a lock is obtained, automatically releasing
78
+ * the lock when the block returns.
79
+ *
80
+ * See Lock#obtain for more information on lock timeout.
81
+ *
82
+ * timeout:: seconds to wait to obtain lock before timing out and returning
83
+ * false
84
+ * return:: true if lock was successfully obtained. Raises a
85
+ * Lock::LockError otherwise.
86
+ */
51
87
  static VALUE
52
88
  frt_lock_while_locked(int argc, VALUE *argv, VALUE self)
53
89
  {
54
- VALUE rtimeout;
55
- int timeout = 1;
56
- GET_LOCK;
57
- if (rb_scan_args(argc, argv, "01", &rtimeout) > 0) {
58
- timeout = FIX2INT(rtimeout);
59
- }
60
- if (!lock->obtain(lock)) {
61
- rb_raise(rb_eStandardError, "could not obtain lock: #%s", lock->name);
62
- }
63
- rb_yield(Qnil);
64
- lock->release(lock);
65
- return Qtrue;
90
+ VALUE rtimeout;
91
+ int timeout = 1;
92
+ Lock *lock;
93
+ GET_LOCK(lock, self);
94
+ if (rb_scan_args(argc, argv, "01", &rtimeout) > 0) {
95
+ timeout = FIX2INT(rtimeout);
96
+ }
97
+ if (!lock->obtain(lock)) {
98
+ rb_raise(cLockError, "could not obtain lock: #%s", lock->name);
99
+ }
100
+ rb_yield(Qnil);
101
+ lock->release(lock);
102
+ return Qtrue;
66
103
  }
67
104
 
105
+ /*
106
+ * call-seq:
107
+ * lock.locked? -> bool
108
+ *
109
+ * Returns true if the lock has been obtained.
110
+ */
68
111
  static VALUE
69
112
  frt_lock_is_locked(VALUE self)
70
113
  {
71
- GET_LOCK;
72
- return lock->is_locked(lock) ? Qtrue : Qfalse;
114
+ Lock *lock;
115
+ GET_LOCK(lock, self);
116
+ return lock->is_locked(lock) ? Qtrue : Qfalse;
73
117
  }
74
118
 
119
+ /*
120
+ * call-seq:
121
+ * lock.release() -> self
122
+ *
123
+ * Release the lock. This should only be called by the process which obtains
124
+ * the lock.
125
+ */
75
126
  static VALUE
76
127
  frt_lock_release(VALUE self)
77
128
  {
78
- GET_LOCK;
79
- lock->release(lock);
80
- return Qnil;
129
+ Lock *lock;
130
+ GET_LOCK(lock, self);
131
+ lock->release(lock);
132
+ return self;
81
133
  }
82
134
 
83
135
  /****************************************************************************
@@ -89,86 +141,142 @@ frt_lock_release(VALUE self)
89
141
  void
90
142
  frt_dir_free(Store *store)
91
143
  {
92
- object_del(store);
93
- store_deref(store);
144
+ object_del(store);
145
+ store_deref(store);
94
146
  }
95
147
 
96
- #define GET_STORE Store *store; Data_Get_Struct(self, Store, store)
148
+ #define GET_STORE(store, self) Data_Get_Struct(self, Store, store)
149
+ /*
150
+ * call-seq:
151
+ * dir.close() -> nil
152
+ *
153
+ * It is a good idea to close a directory when you have finished using it.
154
+ * Although the garbage collector will currently handle this for you, this
155
+ * behaviour may change in future.
156
+ */
97
157
  static VALUE
98
158
  frt_dir_close(VALUE self)
99
159
  {
100
- /*
101
- * No need to do anything here. Leave it do the garbage collector
102
- GET_STORE;
103
- Frt_Unwrap_Struct(self);
104
- object_del(store);
105
- store_deref(store);
106
- */
107
- return Qnil;
160
+ /*
161
+ * No need to do anything here. Leave it to the garbage collector
162
+ GET_STORE;
163
+ Frt_Unwrap_Struct(self);
164
+ object_del(store);
165
+ store_deref(store);
166
+ */
167
+ return Qnil;
108
168
  }
109
169
 
170
+ /*
171
+ * call-seq:
172
+ * dir.exists?(file_name) -> nil
173
+ *
174
+ * Return true if a file with the name +file_name+ exists in the directory.
175
+ */
110
176
  static VALUE
111
177
  frt_dir_exists(VALUE self, VALUE rfname)
112
178
  {
113
- GET_STORE;
114
- rfname = rb_obj_as_string(rfname);
115
- return store->exists(store, RSTRING(rfname)->ptr) ? Qtrue : Qfalse;
179
+ Store *store;
180
+ GET_STORE(store, self);
181
+ StringValue(rfname);
182
+ return store->exists(store, RSTRING(rfname)->ptr) ? Qtrue : Qfalse;
116
183
  }
117
184
 
185
+ /*
186
+ * call-seq:
187
+ * dir.touch(file_name) -> nil
188
+ *
189
+ * Create an empty file in the directory with the name +file_name+.
190
+ */
118
191
  static VALUE
119
192
  frt_dir_touch(VALUE self, VALUE rfname)
120
193
  {
121
- GET_STORE;
122
- rfname = rb_obj_as_string(rfname);
123
- store->touch(store, RSTRING(rfname)->ptr);
124
- return Qnil;
194
+ Store *store;
195
+ GET_STORE(store, self);
196
+ StringValue(rfname);
197
+ store->touch(store, RSTRING(rfname)->ptr);
198
+ return Qnil;
125
199
  }
126
200
 
127
- typedef struct RTerm {
128
- VALUE field;
129
- VALUE text;
130
- } RTerm;
131
-
201
+ /*
202
+ * call-seq:
203
+ * dir.delete(file_name) -> nil
204
+ *
205
+ * Remove file +file_name+ from the directory. Returns true if succussful.
206
+ */
132
207
  static VALUE
133
208
  frt_dir_delete(VALUE self, VALUE rfname)
134
209
  {
135
- GET_STORE;
136
- rfname = rb_obj_as_string(rfname);
137
- return INT2FIX(store->remove(store, RSTRING(rfname)->ptr));
210
+ Store *store;
211
+ GET_STORE(store, self);
212
+ StringValue(rfname);
213
+ return (store->remove(store, RSTRING(rfname)->ptr) == 0) ? Qtrue : Qfalse;
138
214
  }
139
215
 
216
+ /*
217
+ * call-seq:
218
+ * dir.count -> integer
219
+ *
220
+ * Return a count of the number of files in the directory.
221
+ */
140
222
  static VALUE
141
223
  frt_dir_file_count(VALUE self)
142
224
  {
143
- GET_STORE;
144
- return INT2FIX(store->count(store));
225
+ Store *store;
226
+ GET_STORE(store, self);
227
+ return INT2FIX(store->count(store));
145
228
  }
146
229
 
230
+ /*
231
+ * call-seq:
232
+ * dir.refresh -> self
233
+ *
234
+ * Delete all files in the directory. It gives you a clean slate.
235
+ */
147
236
  static VALUE
148
237
  frt_dir_refresh(VALUE self)
149
238
  {
150
- GET_STORE;
151
- store->clear_all(store);
152
- return Qnil;
239
+ Store *store;
240
+ GET_STORE(store, self);
241
+ store->clear_all(store);
242
+ return self;
153
243
  }
154
244
 
245
+ /*
246
+ * call-seq:
247
+ * dir.rename(from, to) -> self
248
+ *
249
+ * Rename a file from +from+ to +to+. An error will be raised if the file
250
+ * doesn't exist or there is some other type of IOError.
251
+ */
155
252
  static VALUE
156
253
  frt_dir_rename(VALUE self, VALUE rfrom, VALUE rto)
157
254
  {
158
- GET_STORE;
159
- rfrom = rb_obj_as_string(rfrom);
160
- rto = rb_obj_as_string(rto);
161
- store->rename(store, RSTRING(rfrom)->ptr, RSTRING(rto)->ptr);
162
- return Qnil;
255
+ Store *store;
256
+ GET_STORE(store, self);
257
+ StringValue(rfrom);
258
+ StringValue(rto);
259
+ store->rename(store, RSTRING(rfrom)->ptr, RSTRING(rto)->ptr);
260
+ return self;
163
261
  }
164
262
 
263
+ /*
264
+ * call-seq:
265
+ * dir.make_lock(lock_name) -> self
266
+ *
267
+ * Make a lock with the name +lock_name+. Note that lockfiles will be stored
268
+ * in the directory with other files but they won't be visible to you. You
269
+ * should avoid using files with a .lck extension as this extension is
270
+ * reserved for lock files
271
+ */
165
272
  static VALUE
166
273
  frt_dir_make_lock(VALUE self, VALUE rlock_name)
167
274
  {
168
- GET_STORE;
169
- rlock_name = rb_obj_as_string(rlock_name);
170
- return Data_Wrap_Struct(cLock, &frt_lock_mark, &frt_lock_free,
171
- store->open_lock(store, RSTRING(rlock_name)->ptr));
275
+ Store *store;
276
+ GET_STORE(store, self);
277
+ StringValue(rlock_name);
278
+ return Data_Wrap_Struct(cLock, &frt_lock_mark, &frt_lock_free,
279
+ store->open_lock(store, RSTRING(rlock_name)->ptr));
172
280
  }
173
281
 
174
282
  /****************************************************************************
@@ -177,26 +285,35 @@ frt_dir_make_lock(VALUE self, VALUE rlock_name)
177
285
  *
178
286
  ****************************************************************************/
179
287
 
288
+ /*
289
+ * call-seq:
290
+ * RAMDirectory.new(dir = nil)
291
+ *
292
+ * Create a new RAMDirectory.
293
+ *
294
+ * You can optionally load another Directory (usually a FSDirectory) into
295
+ * memory. This may be useful to speed up search performance but usually the
296
+ * speedup won't be worth the trouble. Be sure to benchmark.
297
+ *
298
+ * dir:: Directory to load into memory
299
+ */
180
300
  static VALUE
181
301
  frt_ramdir_init(int argc, VALUE *argv, VALUE self)
182
302
  {
183
- VALUE rdir, rclose_dir;
184
- Store *store;
185
- bool close_dir = false;
186
- switch (rb_scan_args(argc, argv, "02", &rdir, &rclose_dir)) {
187
- case 2: close_dir = RTEST(rclose_dir);
188
- case 1: {
189
- Store *ostore;
190
- Data_Get_Struct(rdir, Store, ostore);
191
- if (close_dir) Frt_Unwrap_Struct(rdir);
192
- store = open_ram_store_and_copy(ostore, close_dir);
193
- break;
194
- }
195
- default: store = open_ram_store();
196
- }
197
- Frt_Wrap_Struct(self, NULL, frt_dir_free, store);
198
- object_add(store, self);
199
- return self;
303
+ VALUE rdir;
304
+ Store *store;
305
+ switch (rb_scan_args(argc, argv, "01", &rdir)) {
306
+ case 1: {
307
+ Store *ostore;
308
+ Data_Get_Struct(rdir, Store, ostore);
309
+ store = open_ram_store_and_copy(ostore, false);
310
+ break;
311
+ }
312
+ default: store = open_ram_store();
313
+ }
314
+ Frt_Wrap_Struct(self, NULL, frt_dir_free, store);
315
+ object_add(store, self);
316
+ return self;
200
317
  }
201
318
 
202
319
  /****************************************************************************
@@ -205,29 +322,46 @@ frt_ramdir_init(int argc, VALUE *argv, VALUE self)
205
322
  *
206
323
  ****************************************************************************/
207
324
 
325
+ /*
326
+ * call-seq:
327
+ * FSDirectory.new(/path/to/index/, create = false)
328
+ *
329
+ * Create a new FSDirectory at +/path/to/index/+ which must be a valid path
330
+ * on your file system. If it doesn't exist it will be created. You can also
331
+ * specify the +create+ parameter. If +create+ is true the FSDirectory will
332
+ * be refreshed as new. That is to say, any existing files in the directory
333
+ * will be deleted. The default value for +create+ is false.
334
+ *
335
+ * path:: path to index directory. Must be a valid path on your system
336
+ * create:: set to true if you want any existing files in the directory to be
337
+ * deleted
338
+ */
208
339
  static VALUE
209
- frt_fsdir_new(VALUE klass, VALUE rpath, VALUE rcreate)
340
+ frt_fsdir_new(int argc, VALUE *argv, VALUE klass)
210
341
  {
211
- VALUE self;
212
- Store *store;
213
- bool create = RTEST(rcreate);
214
- rpath = rb_obj_as_string(rpath);
215
- if (create) {
216
- frt_create_dir(rpath);
217
- }
218
- if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
219
- rb_raise(rb_eIOError, "There is no directory: %s. Use create = true to "
220
- "create one.", RSTRING(rpath)->ptr);
221
- }
222
- store = open_fs_store(RSTRING(rpath)->ptr);
223
- if (create) store->clear_all(store);
224
- if ((self = object_get(store)) == Qnil) {
225
- self = Data_Wrap_Struct(klass, NULL, &frt_dir_free, store);
226
- object_add(store, self);
227
- } else {
228
- store_deref(store);
229
- }
230
- return self;
342
+ VALUE self, rpath, rcreate;
343
+ Store *store;
344
+ bool create;
345
+
346
+ rb_scan_args(argc, argv, "11", &rpath, &rcreate);
347
+ StringValue(rpath);
348
+ create = RTEST(rcreate);
349
+ if (create) {
350
+ frt_create_dir(rpath);
351
+ }
352
+ if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
353
+ rb_raise(rb_eIOError, "There is no directory: %s. Use create = true to "
354
+ "create one.", RSTRING(rpath)->ptr);
355
+ }
356
+ store = open_fs_store(RSTRING(rpath)->ptr);
357
+ if (create) store->clear_all(store);
358
+ if ((self = object_get(store)) == Qnil) {
359
+ self = Data_Wrap_Struct(klass, NULL, &frt_dir_free, store);
360
+ object_add(store, self);
361
+ } else {
362
+ store_deref(store);
363
+ }
364
+ return self;
231
365
  }
232
366
 
233
367
  /****************************************************************************
@@ -236,33 +370,128 @@ frt_fsdir_new(VALUE klass, VALUE rpath, VALUE rcreate)
236
370
  *
237
371
  ****************************************************************************/
238
372
 
373
+ /*
374
+ * Document-class: Ferret::Store::Directory
375
+ *
376
+ * A Directory is an object which is used to access the index storage.
377
+ * Ruby's IO API is not used so that we can use different storage
378
+ * mechanisms to store the index. Some examples are;
379
+ *
380
+ * * File system based storage (currently implemented as FSDirectory)
381
+ * * RAM based storage (currently implemented as RAMDirectory)
382
+ * * Database based storage
383
+ *
384
+ * NOTE: Once a file has been written and closed, it can no longer be
385
+ * modified. To make any changes to the file it must be deleted and
386
+ * rewritten. For this reason, the method to open a file for writing is
387
+ * called _create_output_, while the method to open a file for reading is
388
+ * called _open_input_ If there is a risk of simultaneous modifications of
389
+ * the files then locks should be used. See Lock to find out how.
390
+ */
391
+ void
392
+ Init_Directory(void)
393
+ {
394
+ cDirectory = rb_define_class_under(mStore, "Directory", rb_cObject);
395
+ rb_define_const(cDirectory, "LOCK_PREFIX", rb_str_new2(LOCK_PREFIX));
396
+ rb_define_method(cDirectory, "close", frt_dir_close, 0);
397
+ rb_define_method(cDirectory, "exists?", frt_dir_exists, 1);
398
+ rb_define_method(cDirectory, "touch", frt_dir_touch, 1);
399
+ rb_define_method(cDirectory, "delete", frt_dir_delete, 1);
400
+ rb_define_method(cDirectory, "file_count", frt_dir_file_count, 0);
401
+ rb_define_method(cDirectory, "refresh", frt_dir_refresh, 0);
402
+ rb_define_method(cDirectory, "rename", frt_dir_rename, 2);
403
+ rb_define_method(cDirectory, "make_lock", frt_dir_make_lock, 1);
404
+ }
405
+
406
+ /*
407
+ * Document-class: Ferret::Store::Lock
408
+ *
409
+ * A Lock is used to lock a data source so that not more than one
410
+ * output stream can access a data source at one time. It is possible
411
+ * that locks could be disabled. For example a read only index stored
412
+ * on a CDROM would have no need for a lock.
413
+ *
414
+ * You can use a lock in two ways. Firstly:
415
+ *
416
+ * write_lock = @directory.make_lock(LOCK_NAME)
417
+ * write_lock.obtain(WRITE_LOCK_TIME_OUT)
418
+ * ... # Do your file modifications # ...
419
+ * write_lock.release()
420
+ *
421
+ * Alternatively you could use the while locked method. This ensures that
422
+ * the lock will be released once processing has finished.
423
+ *
424
+ * write_lock = @directory.make_lock(LOCK_NAME)
425
+ * write_lock.while_locked(WRITE_LOCK_TIME_OUT) do
426
+ * ... # Do your file modifications # ...
427
+ * end
428
+ */
429
+ void
430
+ Init_Lock(void)
431
+ {
432
+ cLock = rb_define_class_under(mStore, "Lock", rb_cObject);
433
+ rb_define_method(cLock, "obtain", frt_lock_obtain, -1);
434
+ rb_define_method(cLock, "while_locked", frt_lock_while_locked, -1);
435
+ rb_define_method(cLock, "release", frt_lock_release, 0);
436
+ rb_define_method(cLock, "locked?", frt_lock_is_locked, 0);
437
+
438
+ cLockError = rb_define_class_under(cLock, "LockError", rb_eStandardError);
439
+ }
440
+
441
+ /*
442
+ * Document-class: Ferret::Store::RAMDirectory
443
+ *
444
+ * Memory resident Directory implementation. You should use a RAMDirectory
445
+ * during testing but otherwise you should stick with FSDirectory. While
446
+ * loading an index into memory may slightly speed things up, on most
447
+ * operating systems there won't be much difference so it wouldn't be worth
448
+ * your trouble.
449
+ */
450
+ void
451
+ Init_RAMDirectory(void)
452
+ {
453
+ cRAMDirectory = rb_define_class_under(mStore, "RAMDirectory", cDirectory);
454
+ rb_define_alloc_func(cRAMDirectory, frt_data_alloc);
455
+ rb_define_method(cRAMDirectory, "initialize", frt_ramdir_init, -1);
456
+ }
457
+
458
+ /*
459
+ * Document-class: Ferret::Store::RAMDirectory
460
+ *
461
+ * File-system resident Directory implementation. The FSDirectory will use a
462
+ * single directory to store all of it's files. You should not otherwise
463
+ * touch this directory. Modifying the files in the directory will corrupt
464
+ * the index. The one exception to this rule is you may need to delete stale
465
+ * lock files which have a ".lck" extension.
466
+ */
467
+ void
468
+ Init_FSDirectory(void)
469
+ {
470
+ cFSDirectory = rb_define_class_under(mStore, "FSDirectory", cDirectory);
471
+ rb_define_alloc_func(cFSDirectory, frt_data_alloc);
472
+ rb_define_singleton_method(cFSDirectory, "new", frt_fsdir_new, -1);
473
+ }
474
+
475
+ /* rdoc hack
476
+ extern VALUE mFerret = rb_define_module("Ferret");
477
+ */
478
+
479
+ /*
480
+ * Document-module: Ferret::Store
481
+ *
482
+ * The Store module contains all the classes required to handle the storing
483
+ * of an index.
484
+ *
485
+ * NOTE: You can currently store an index on a file-system or in memory. If
486
+ * you want to add a different type of Directory, like a database Directory
487
+ * for instance, you will to implement it in C.
488
+ */
239
489
  void
240
- Init_dir(void)
490
+ Init_Store(void)
241
491
  {
242
- cLock = rb_define_class_under(mStore, "Lock", rb_cObject);
243
- rb_define_method(cLock, "obtain", frt_lock_obtain, -1);
244
- rb_define_method(cLock, "while_locked", frt_lock_while_locked, -1);
245
- rb_define_method(cLock, "release", frt_lock_release, 0);
246
- rb_define_method(cLock, "locked?", frt_lock_is_locked, 0);
247
-
248
- cDirectory = rb_define_class_under(mStore, "Directory", rb_cObject);
249
- rb_define_const(cDirectory, "LOCK_PREFIX", rb_str_new2(LOCK_PREFIX));
250
- rb_define_method(cDirectory, "close", frt_dir_close, 0);\
251
- rb_define_method(cDirectory, "exists?", frt_dir_exists, 1);\
252
- rb_define_method(cDirectory, "touch", frt_dir_touch, 1);\
253
- rb_define_method(cDirectory, "delete", frt_dir_delete, 1);\
254
- rb_define_method(cDirectory, "file_count", frt_dir_file_count, 0);\
255
- rb_define_method(cDirectory, "refresh", frt_dir_refresh, 0);\
256
- rb_define_method(cDirectory, "rename", frt_dir_rename, 2);\
257
- rb_define_method(cDirectory, "make_lock", frt_dir_make_lock, 1);
258
-
259
- /* RAMDirectory */
260
- cRAMDirectory = rb_define_class_under(mStore, "RAMDirectory", cDirectory);
261
- rb_define_alloc_func(cRAMDirectory, frt_data_alloc);
262
- rb_define_method(cRAMDirectory, "initialize", frt_ramdir_init, -1);
263
-
264
- /* FSDirectory */
265
- cFSDirectory = rb_define_class_under(mStore, "FSDirectory", cDirectory);
266
- rb_define_alloc_func(cFSDirectory, frt_data_alloc);
267
- rb_define_singleton_method(cFSDirectory, "new", frt_fsdir_new, 2);
492
+ mStore = rb_define_module_under(mFerret, "Store");
493
+ Init_Directory();
494
+ Init_Lock();
495
+ Init_RAMDirectory();
496
+ Init_FSDirectory();
268
497
  }