isomorfeus-ferret 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +612 -0
  3. data/README.md +44 -0
  4. data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
  5. data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
  6. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
  7. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
  8. data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
  9. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
  10. data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
  11. data/ext/isomorfeus_ferret_ext/email.rl +21 -0
  12. data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
  13. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
  14. data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
  15. data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
  16. data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
  17. data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
  18. data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
  19. data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
  20. data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
  21. data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
  22. data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
  23. data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
  24. data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
  25. data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
  26. data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
  27. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
  28. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
  29. data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
  30. data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
  31. data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
  32. data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
  33. data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
  34. data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
  42. data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
  43. data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
  44. data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
  45. data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
  46. data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
  47. data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
  48. data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
  50. data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
  51. data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
  52. data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
  53. data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
  54. data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
  55. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
  56. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
  57. data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
  58. data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
  59. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
  60. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
  61. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
  62. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
  63. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
  64. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
  65. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
  66. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
  67. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
  68. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
  69. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
  70. data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
  71. data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
  72. data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
  73. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
  74. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
  75. data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
  76. data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
  77. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
  78. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
  79. data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
  80. data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
  81. data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
  82. data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
  83. data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
  84. data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
  85. data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
  86. data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
  87. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
  88. data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
  89. data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
  90. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
  91. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
  92. data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
  93. data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
  94. data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
  95. data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
  96. data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
  97. data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
  98. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
  99. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
  100. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
  101. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
  102. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
  103. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
  104. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
  105. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
  106. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
  129. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
  130. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
  131. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
  132. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
  133. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
  134. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
  135. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
  136. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
  137. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
  138. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
  139. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
  140. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
  141. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
  143. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
  144. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
  145. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
  162. data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
  163. data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
  164. data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
  165. data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
  166. data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
  167. data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
  168. data/ext/isomorfeus_ferret_ext/test.c +850 -0
  169. data/ext/isomorfeus_ferret_ext/test.h +416 -0
  170. data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
  171. data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
  172. data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
  173. data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
  174. data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
  175. data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
  176. data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
  177. data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
  178. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
  179. data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
  180. data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
  181. data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
  182. data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
  183. data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
  184. data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
  185. data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
  186. data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
  187. data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
  188. data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
  189. data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
  190. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
  191. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
  192. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
  193. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
  194. data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
  195. data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
  196. data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
  197. data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
  198. data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
  199. data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
  200. data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
  201. data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
  202. data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
  203. data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
  204. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
  205. data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
  206. data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
  207. data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
  208. data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
  209. data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
  210. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
  211. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
  212. data/ext/isomorfeus_ferret_ext/url.rl +27 -0
  213. data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
  214. data/lib/isomorfeus/ferret/document.rb +132 -0
  215. data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
  216. data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
  217. data/lib/isomorfeus/ferret/index/index.rb +970 -0
  218. data/lib/isomorfeus/ferret/monitor.rb +323 -0
  219. data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
  220. data/lib/isomorfeus/ferret/version.rb +5 -0
  221. data/lib/isomorfeus-ferret.rb +8 -0
  222. metadata +307 -0
@@ -0,0 +1,970 @@
1
+ module Isomorfeus
2
+ module Ferret
3
+ module Index
4
+ # This is a simplified interface to the index. See the TUTORIAL for more
5
+ # information on how to use this class.
6
+ class Index
7
+ include Isomorfeus::Ferret::MonitorMixin
8
+ include Isomorfeus::Ferret::Store
9
+ include Isomorfeus::Ferret::Search
10
+
11
+ attr_reader :options
12
+
13
+ # If you create an Index without any options, it'll simply create an index
14
+ # in memory. But this class is highly configurable and every option that
15
+ # you can supply to IndexWriter and QueryParser, you can also set here.
16
+ # Please look at the options for the constructors to these classes.
17
+ #
18
+ # === Options
19
+ #
20
+ # See;
21
+ #
22
+ # * QueryParser
23
+ # * IndexWriter
24
+ #
25
+ # default_input_field:: Default: "id". This specifies the default field
26
+ # that will be used when you add a simple string
27
+ # to the index using #add_document or <<.
28
+ # id_field:: Default: "id". This field is as the field to
29
+ # search when doing searches on a term. For
30
+ # example, if you do a lookup by term "cat", ie
31
+ # index["cat"], this will be the field that is
32
+ # searched.
33
+ # key:: Default: nil. Expert: This should only be used
34
+ # if you really know what you are doing. Basically
35
+ # you can set a field or an array of fields to be
36
+ # the key for the index. So if you add a document
37
+ # with a same key as an existing document, the
38
+ # existing document will be replaced by the new
39
+ # object. Using a multiple field key will slow
40
+ # down indexing so it should not be done if
41
+ # performance is a concern. A single field key (or
42
+ # id) should be find however. Also, you must make
43
+ # sure that your key/keys are either untokenized
44
+ # or that they are not broken up by the analyzer.
45
+ # auto_flush:: Default: false. Set this option to true if you
46
+ # want the index automatically flushed every time
47
+ # you do a write (includes delete) to the index.
48
+ # This is useful if you have multiple processes
49
+ # accessing the index and you don't want lock
50
+ # errors. Setting :auto_flush to true has a huge
51
+ # performance impact so don't use it if you are
52
+ # concerned about performance. In that case you
53
+ # should think about setting up a DRb indexing
54
+ # service.
55
+ # lock_retry_time:: Default: 2 seconds. This parameter specifies how
56
+ # long to wait before retrying to obtain the
57
+ # commit lock when detecting if the IndexReader is
58
+ # at the latest version.
59
+ # close_dir:: Default: false. If you explicitly pass a
60
+ # Directory object to this class and you want
61
+ # Index to close it when it is closed itself then
62
+ # set this to true.
63
+ # use_typed_range_query:: Default: true. Use TypedRangeQuery instead of
64
+ # the standard RangeQuery when parsing
65
+ # range queries. This is useful if you have number
66
+ # fields which you want to perform range queries
67
+ # on. You won't need to pad or normalize the data
68
+ # in the field in anyway to get correct results.
69
+ # However, performance will be a lot slower for
70
+ # large indexes, hence the default.
71
+ #
72
+ # == Examples
73
+ #
74
+ # index = Index::Index.new(:analyzer => WhiteSpaceAnalyzer.new())
75
+ #
76
+ # index = Index::Index.new(:path => '/path/to/index',
77
+ # :create_if_missing => false,
78
+ # :auto_flush => true)
79
+ #
80
+ # index = Index::Index.new(:dir => directory,
81
+ # :default_slop => 2,
82
+ # :handle_parse_errors => false)
83
+ #
84
+ # You can also pass a block if you like. The index will be yielded and
85
+ # closed at the index of the box. For example;
86
+ #
87
+ # Ferret::I.new() do |index|
88
+ # # do stuff with index. Most of your actions will be cached.
89
+ # end
90
+ def initialize(options = {}, &block)
91
+ super()
92
+
93
+ if options[:key]
94
+ @key = options[:key]
95
+ if @key.is_a?(Array)
96
+ @key.flatten.map {|k| k.to_s.intern}
97
+ end
98
+ else
99
+ @key = nil
100
+ end
101
+
102
+ if (fi = options[:field_infos]).is_a?(String)
103
+ options[:field_infos] = Isomorfeus::Ferret::Index::FieldInfos.load(fi)
104
+ end
105
+
106
+ @close_dir = options[:close_dir]
107
+ if options[:dir].is_a?(String)
108
+ options[:path] = options[:dir]
109
+ end
110
+ if options[:path]
111
+ @close_dir = true
112
+ begin
113
+ @dir = FSDirectory.new(options[:path], options[:create])
114
+ rescue IOError
115
+ @dir = FSDirectory.new(options[:path],
116
+ options[:create_if_missing] != false)
117
+ end
118
+ elsif options[:dir]
119
+ @dir = options[:dir]
120
+ else
121
+ options[:create] = true # this should always be true for a new RAMDir
122
+ @close_dir = true
123
+ @dir = RAMDirectory.new
124
+ end
125
+
126
+ @dir.extend(MonitorMixin) unless @dir.kind_of? MonitorMixin
127
+ options[:dir] = @dir
128
+ options[:lock_retry_time]||= 2
129
+ @options = options
130
+ if (!@dir.exists?("segments")) || options[:create]
131
+ IndexWriter.new(options).close
132
+ end
133
+ options[:analyzer]||= Ferret::Analysis::StandardAnalyzer.new
134
+ if options[:use_typed_range_query].nil?
135
+ options[:use_typed_range_query] = true
136
+ end
137
+
138
+ @searcher = nil
139
+ @writer = nil
140
+ @reader = nil
141
+
142
+ @options.delete(:create) # only create the first time if at all
143
+ @auto_flush = @options[:auto_flush] || false
144
+ if (@options[:id_field].nil? and @key.is_a?(Symbol))
145
+ @id_field = @key
146
+ else
147
+ @id_field = @options[:id_field] || :id
148
+ end
149
+ @default_field = (@options[:default_field]||= :*)
150
+ @default_input_field = options[:default_input_field] || @id_field
151
+
152
+ if @default_input_field.respond_to?(:intern)
153
+ @default_input_field = @default_input_field.intern
154
+ end
155
+ @open = true
156
+ @qp = nil
157
+ if block
158
+ yield self
159
+ self.close
160
+ end
161
+ end
162
+
163
+ # Returns an array of strings with the matches highlighted. The +query+ can
164
+ # either a query String or a Ferret::Search::Query object. The doc_id is
165
+ # the id of the document you want to highlight (usually returned by the
166
+ # search methods). There are also a number of options you can pass;
167
+ #
168
+ # === Options
169
+ #
170
+ # field:: Default: @options[:default_field]. The default_field
171
+ # is the field that is usually highlighted but you can
172
+ # specify which field you want to highlight here. If
173
+ # you want to highlight multiple fields then you will
174
+ # need to call this method multiple times.
175
+ # excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
176
+ # terms will be in the centre of the excerpt. Set to
177
+ # :all to highlight the entire field.
178
+ # num_excerpts:: Default: 2. Number of excerpts to return.
179
+ # pre_tag:: Default: "<b>". Tag to place to the left of the
180
+ # match. You'll probably want to change this to a
181
+ # "<span>" tag with a class. Try "\033[36m" for use in
182
+ # a terminal.
183
+ # post_tag:: Default: "</b>". This tag should close the
184
+ # +:pre_tag+. Try tag "\033[m" in the terminal.
185
+ # ellipsis:: Default: "...". This is the string that is appended
186
+ # at the beginning and end of excerpts (unless the
187
+ # excerpt hits the start or end of the field.
188
+ # Alternatively you may want to use the HTML entity
189
+ # &#8230; or the UTF-8 string "\342\200\246".
190
+ def highlight(query, doc_id, options = {})
191
+ @dir.synchronize do
192
+ ensure_searcher_open()
193
+ @searcher.highlight(do_process_query(query),
194
+ doc_id,
195
+ options[:field]||@options[:default_field],
196
+ options)
197
+ end
198
+ end
199
+
200
+ # Closes this index by closing its associated reader and writer objects.
201
+ def close
202
+ @dir.synchronize do
203
+ if not @open
204
+ raise(StandardError, "tried to close an already closed directory")
205
+ end
206
+ @searcher.close() if @searcher
207
+ @reader.close() if @reader
208
+ @writer.close() if @writer
209
+ @dir.close() if @close_dir
210
+
211
+ @open = false
212
+ end
213
+ end
214
+
215
+ # Get the reader for this index.
216
+ # NOTE:: This will close the writer from this index.
217
+ def reader
218
+ ensure_reader_open()
219
+ return @reader
220
+ end
221
+
222
+ # Get the searcher for this index.
223
+ # NOTE:: This will close the writer from this index.
224
+ def searcher
225
+ ensure_searcher_open()
226
+ return @searcher
227
+ end
228
+
229
+ # Get the writer for this index.
230
+ # NOTE:: This will close the reader from this index.
231
+ def writer
232
+ ensure_writer_open()
233
+ return @writer
234
+ end
235
+
236
+ # Adds a document to this index, using the provided analyzer instead of
237
+ # the local analyzer if provided. If the document contains more than
238
+ # IndexWriter::MAX_FIELD_LENGTH terms for a given field, the remainder are
239
+ # discarded.
240
+ #
241
+ # There are three ways to add a document to the index.
242
+ # To add a document you can simply add a string or an array of strings.
243
+ # This will store all the strings in the "" (ie empty string) field
244
+ # (unless you specify the default_field when you create the index).
245
+ #
246
+ # index << "This is a new document to be indexed"
247
+ # index << ["And here", "is another", "new document", "to be indexed"]
248
+ #
249
+ # But these are pretty simple documents. If this is all you want to index
250
+ # you could probably just use SimpleSearch. So let's give our documents
251
+ # some fields;
252
+ #
253
+ # index << {:title => "Programming Ruby", :content => "blah blah blah"}
254
+ # index << {:title => "Programming Ruby", :content => "yada yada yada"}
255
+ #
256
+ # Or if you are indexing data stored in a database, you'll probably want
257
+ # to store the id;
258
+ #
259
+ # index << {:id => row.id, :title => row.title, :date => row.date}
260
+ #
261
+ # See FieldInfos for more information on how to set field properties.
262
+ def add_document(doc, analyzer = nil)
263
+ @dir.synchronize do
264
+ ensure_writer_open()
265
+ if doc.is_a?(String) or doc.is_a?(Array)
266
+ doc = {@default_input_field => doc}
267
+ end
268
+
269
+ # delete existing documents with the same key
270
+ if @key
271
+ if @key.is_a?(Array)
272
+ query = @key.inject(BooleanQuery.new()) do |bq, field|
273
+ bq.add_query(TermQuery.new(field, doc[field].to_s), :must)
274
+ bq
275
+ end
276
+ query_delete(query)
277
+ else
278
+ id = doc[@key].to_s
279
+ if id
280
+ @writer.delete(@key, id)
281
+ end
282
+ end
283
+ end
284
+ ensure_writer_open()
285
+
286
+ if analyzer
287
+ old_analyzer = @writer.analyzer
288
+ @writer.analyzer = analyzer
289
+ @writer.add_document(doc)
290
+ @writer.analyzer = old_analyzer
291
+ else
292
+ @writer.add_document(doc)
293
+ end
294
+
295
+ flush() if @auto_flush
296
+ end
297
+ end
298
+ alias :<< :add_document
299
+
300
+ # Run a query through the Searcher on the index. A TopDocs object is
301
+ # returned with the relevant results. The +query+ is a built in Query
302
+ # object or a query string that can be parsed by the Ferret::QueryParser.
303
+ # Here are the options;
304
+ #
305
+ # === Options
306
+ #
307
+ # offset:: Default: 0. The offset of the start of the section of the
308
+ # result-set to return. This is used for paging through
309
+ # results. Let's say you have a page size of 10. If you
310
+ # don't find the result you want among the first 10 results
311
+ # then set +:offset+ to 10 and look at the next 10 results,
312
+ # then 20 and so on.
313
+ # limit:: Default: 10. This is the number of results you want
314
+ # returned, also called the page size. Set +:limit+ to
315
+ # +:all+ to return all results
316
+ # sort:: A Sort object or sort string describing how the field
317
+ # should be sorted. A sort string is made up of field names
318
+ # which cannot contain spaces and the word "DESC" if you
319
+ # want the field reversed, all separated by commas. For
320
+ # example; "rating DESC, author, title". Note that Ferret
321
+ # will try to determine a field's type by looking at the
322
+ # first term in the index and seeing if it can be parsed as
323
+ # an integer or a float. Keep this in mind as you may need
324
+ # to specify a fields type to sort it correctly. For more
325
+ # on this, see the documentation for SortField
326
+ # filter:: a Filter object to filter the search results with
327
+ # filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
328
+ # and the Searcher object as its parameters and returns a
329
+ # Boolean value specifying whether the result should be
330
+ # included in the result set.
331
+ def search(query, options = {})
332
+ @dir.synchronize do
333
+ return do_search(query, options)
334
+ end
335
+ end
336
+
337
+ # Run a query through the Searcher on the index. A TopDocs object is
338
+ # returned with the relevant results. The +query+ is a Query object or a
339
+ # query string that can be validly parsed by the Ferret::QueryParser. The
340
+ # Searcher#search_each method yields the internal document id (used to
341
+ # reference documents in the Searcher object like this;
342
+ # +searcher[doc_id]+) and the search score for that document. It is
343
+ # possible for the score to be greater than 1.0 for some queries and
344
+ # taking boosts into account. This method will also normalize scores to
345
+ # the range 0.0..1.0 when the max-score is greater than 1.0. Here are the
346
+ # options;
347
+ #
348
+ # === Options
349
+ #
350
+ # offset:: Default: 0. The offset of the start of the section of the
351
+ # result-set to return. This is used for paging through
352
+ # results. Let's say you have a page size of 10. If you
353
+ # don't find the result you want among the first 10 results
354
+ # then set +:offset+ to 10 and look at the next 10 results,
355
+ # then 20 and so on.
356
+ # limit:: Default: 10. This is the number of results you want
357
+ # returned, also called the page size. Set +:limit+ to
358
+ # +:all+ to return all results
359
+ # sort:: A Sort object or sort string describing how the field
360
+ # should be sorted. A sort string is made up of field names
361
+ # which cannot contain spaces and the word "DESC" if you
362
+ # want the field reversed, all separated by commas. For
363
+ # example; "rating DESC, author, title". Note that Ferret
364
+ # will try to determine a field's type by looking at the
365
+ # first term in the index and seeing if it can be parsed as
366
+ # an integer or a float. Keep this in mind as you may need
367
+ # to specify a fields type to sort it correctly. For more
368
+ # on this, see the documentation for SortField
369
+ # filter:: a Filter object to filter the search results with
370
+ # filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
371
+ # and the Searcher object as its parameters and returns a
372
+ # Boolean value specifying whether the result should be
373
+ # included in the result set.
374
+ #
375
+ # returns:: The total number of hits.
376
+ #
377
+ # === Example
378
+ # eg.
379
+ # index.search_each(query, options = {}) do |doc, score|
380
+ # puts "hit document number #{doc} with a score of #{score}"
381
+ # end
382
+ #
383
+ def search_each(query, options = {}) # :yield: doc, score
384
+ @dir.synchronize do
385
+ ensure_searcher_open()
386
+ query = do_process_query(query)
387
+
388
+ @searcher.search_each(query, options) do |doc, score|
389
+ yield doc, score
390
+ end
391
+ end
392
+ end
393
+
394
+ # Run a query through the Searcher on the index, ignoring scoring and
395
+ # starting at +:start_doc+ and stopping when +:limit+ matches have been
396
+ # found. It returns an array of the matching document numbers.
397
+ #
398
+ # There is a big performance advange when using this search method on a
399
+ # very large index when there are potentially thousands of matching
400
+ # documents and you only want say 50 of them. The other search methods need
401
+ # to look at every single match to decide which one has the highest score.
402
+ # This search method just needs to find +:limit+ number of matches before
403
+ # it returns.
404
+ #
405
+ # === Options
406
+ #
407
+ # start_doc:: Default: 0. The start document to start the search from.
408
+ # NOTE very carefully that this is not the same as the
409
+ # +:offset+ parameter used in the other search methods
410
+ # which refers to the offset in the result-set. This is the
411
+ # document to start the scan from. So if you scanning
412
+ # through the index in increments of 50 documents at a time
413
+ # you need to use the last matched doc in the previous
414
+ # search to start your next search. See the example below.
415
+ # limit:: Default: 50. This is the number of results you want
416
+ # returned, also called the page size. Set +:limit+ to
417
+ # +:all+ to return all results.
418
+ # TODO: add option to return loaded documents instead
419
+ #
420
+ # === Options
421
+ #
422
+ # start_doc = 0
423
+ # begin
424
+ # results = @searcher.scan(query, :start_doc => start_doc)
425
+ # yield results # or do something with them
426
+ # start_doc = results.last
427
+ # # start_doc will be nil now if results is empty, ie no more matches
428
+ # end while start_doc
429
+ def scan(query, options = {})
430
+ @dir.synchronize do
431
+ ensure_searcher_open()
432
+ query = do_process_query(query)
433
+
434
+ @searcher.scan(query, options)
435
+ end
436
+ end
437
+
438
+ # Retrieves a document/documents from the index. The method for retrieval
439
+ # depends on the type of the argument passed.
440
+ #
441
+ # If +arg+ is an Integer then return the document based on the internal
442
+ # document number.
443
+ #
444
+ # If +arg+ is a Range, then return the documents within the range based on
445
+ # internal document number.
446
+ #
447
+ # If +arg+ is a String then search for the first document with +arg+ in
448
+ # the +id+ field. The +id+ field is either :id or whatever you set
449
+ # +:id_field+ parameter to when you create the Index object.
450
+ def doc(*arg)
451
+ @dir.synchronize do
452
+ id = arg[0]
453
+ if id.kind_of?(String) or id.kind_of?(Symbol)
454
+ ensure_reader_open()
455
+ term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
456
+ return term_doc_enum.next? ? @reader[term_doc_enum.doc] : nil
457
+ else
458
+ ensure_reader_open(false)
459
+ return @reader[*arg]
460
+ end
461
+ end
462
+ end
463
+ alias :[] :doc
464
+
465
+ # Retrieves the term_vector for a document. The document can be referenced
466
+ # by either a string id to match the id field or an integer corresponding
467
+ # to Ferret's document number.
468
+ #
469
+ # See Ferret::Index::IndexReader#term_vector
470
+ def term_vector(id, field)
471
+ @dir.synchronize do
472
+ ensure_reader_open()
473
+ if id.kind_of?(String) or id.kind_of?(Symbol)
474
+ term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
475
+ if term_doc_enum.next?
476
+ id = term_doc_enum.doc
477
+ else
478
+ return nil
479
+ end
480
+ end
481
+ return @reader.term_vector(id, field)
482
+ end
483
+ end
484
+
485
+ # iterate through all documents in the index. This method preloads the
486
+ # documents so you don't need to call #load on the document to load all the
487
+ # fields.
488
+ def each
489
+ @dir.synchronize do
490
+ ensure_reader_open
491
+ (0...@reader.max_doc).each do |i|
492
+ yield @reader[i].load unless @reader.deleted?(i)
493
+ end
494
+ end
495
+ end
496
+
497
+ # Deletes a document/documents from the index. The method for determining
498
+ # the document to delete depends on the type of the argument passed.
499
+ #
500
+ # If +arg+ is an Integer then delete the document based on the internal
501
+ # document number. Will raise an error if the document does not exist.
502
+ #
503
+ # If +arg+ is a String then search for the documents with +arg+ in the
504
+ # +id+ field. The +id+ field is either :id or whatever you set +:id_field+
505
+ # parameter to when you create the Index object. Will fail quietly if the
506
+ # no document exists.
507
+ #
508
+ # If +arg+ is a Hash or an Array then a batch delete will be performed.
509
+ # If +arg+ is an Array then it will be considered an array of +id+'s. If
510
+ # it is a Hash, then its keys will be used instead as the Array of
511
+ # document +id+'s. If the +id+ is an Integer then it is considered a
512
+ # Ferret document number and the corresponding document will be deleted.
513
+ # If the +id+ is a String or a Symbol then the +id+ will be considered a
514
+ # term and the documents that contain that term in the +:id_field+ will be
515
+ # deleted.
516
+ def delete(arg)
517
+ @dir.synchronize do
518
+ if arg.is_a?(String) or arg.is_a?(Symbol)
519
+ ensure_writer_open()
520
+ @writer.delete(@id_field, arg.to_s)
521
+ elsif arg.is_a?(Integer)
522
+ ensure_reader_open()
523
+ _cnt = @reader.delete(arg)
524
+ elsif arg.is_a?(Hash) or arg.is_a?(Array)
525
+ batch_delete(arg)
526
+ else
527
+ raise ArgumentError, "Cannot delete for arg of type #{arg.class}"
528
+ end
529
+ flush() if @auto_flush
530
+ end
531
+ return self
532
+ end
533
+
534
+ # Delete all documents returned by the query.
535
+ #
536
+ # query:: The query to find documents you wish to delete. Can either be a
537
+ # string (in which case it is parsed by the standard query parser)
538
+ # or an actual query object.
539
+ def query_delete(query)
540
+ @dir.synchronize do
541
+ ensure_writer_open()
542
+ ensure_searcher_open()
543
+ query = do_process_query(query)
544
+ @searcher.search_each(query, :limit => :all) do |doc, score|
545
+ @reader.delete(doc)
546
+ end
547
+ flush() if @auto_flush
548
+ end
549
+ end
550
+
551
+ # Returns true if document +n+ has been deleted
552
+ def deleted?(n)
553
+ @dir.synchronize do
554
+ ensure_reader_open()
555
+ return @reader.deleted?(n)
556
+ end
557
+ end
558
+
559
+ # Update the document referenced by the document number +id+ if +id+ is an
560
+ # integer or all of the documents which have the term +id+ if +id+ is a
561
+ # term..
562
+ # For batch update of set of documents, for performance reasons, see batch_update
563
+ #
564
+ # id:: The number of the document to update. Can also be a string
565
+ # representing the value in the +id+ field. Also consider using
566
+ # the :key attribute.
567
+ # new_doc:: The document to replace the old document with
568
+ def update(id, new_doc)
569
+ @dir.synchronize do
570
+ ensure_writer_open()
571
+ delete(id)
572
+ if id.is_a?(String) or id.is_a?(Symbol)
573
+ @writer.commit
574
+ else
575
+ ensure_writer_open()
576
+ end
577
+ @writer << new_doc
578
+ flush() if @auto_flush
579
+ end
580
+ end
581
+
582
+ # Batch updates the documents in an index. You can pass either a Hash or
583
+ # an Array.
584
+ #
585
+ # === Array (recommended)
586
+ #
587
+ # If you pass an Array then each value needs to be a Document or a Hash
588
+ # and each of those documents must have an +:id_field+ which will be used
589
+ # to delete the old document that this document is replacing.
590
+ #
591
+ # === Hash
592
+ #
593
+ # If you pass a Hash then the keys of the Hash will be considered the
594
+ # +id+'s and the values will be the new documents to replace the old ones
595
+ # with.If the +id+ is an Integer then it is considered a Ferret document
596
+ # number and the corresponding document will be deleted. If the +id+ is a
597
+ # String or a Symbol then the +id+ will be considered a term and the
598
+ # documents that contain that term in the +:id_field+ will be deleted.
599
+ #
600
+ # Note: No error will be raised if the document does not currently
601
+ # exist. A new document will simply be created.
602
+ #
603
+ # == Examples
604
+ #
605
+ # # will replace the documents with the +id+'s id:133 and id:254
606
+ # @index.batch_update({
607
+ # '133' => {:id => '133', :content => 'yada yada yada'},
608
+ # '253' => {:id => '253', :content => 'bla bla bal'}
609
+ # })
610
+ #
611
+ # # will replace the documents with the Ferret Document numbers 2 and 92
612
+ # @index.batch_update({
613
+ # 2 => {:id => '133', :content => 'yada yada yada'},
614
+ # 92 => {:id => '253', :content => 'bla bla bal'}
615
+ # })
616
+ #
617
+ # # will replace the documents with the +id+'s id:133 and id:254
618
+ # # this is recommended as it guarantees no duplicate keys
619
+ # @index.batch_update([
620
+ # {:id => '133', :content => 'yada yada yada'},
621
+ # {:id => '253', :content => 'bla bla bal'}
622
+ # ])
623
+ #
624
+ # docs:: A Hash of id/document pairs. The set of documents to be updated
625
+ def batch_update(docs)
626
+ @dir.synchronize do
627
+ ids = nil
628
+ case docs
629
+ when Array
630
+ ids = docs.collect{|doc| doc[@id_field].to_s}
631
+ if ids.include?(nil)
632
+ raise ArgumentError, "all documents must have an #{@id_field} field when doing a batch update"
633
+ end
634
+ when Hash
635
+ ids = docs.keys
636
+ docs = docs.values
637
+ else
638
+ raise ArgumentError, "must pass Hash or Array, not #{docs.class}"
639
+ end
640
+ batch_delete(ids)
641
+ ensure_writer_open()
642
+ docs.each {|new_doc| @writer << new_doc }
643
+ flush()
644
+ end
645
+ end
646
+
647
+
648
+ # Update all the documents returned by the query.
649
+ #
650
+ # query:: The query to find documents you wish to update. Can either be
651
+ # a string (in which case it is parsed by the standard query
652
+ # parser) or an actual query object.
653
+ # new_val:: The values we are updating. This can be a string in which case
654
+ # the default field is updated, or it can be a hash, in which
655
+ # case, all fields in the hash are merged into the old hash.
656
+ # That is, the old fields are replaced by values in the new hash
657
+ # if they exist.
658
+ #
659
+ # === Example
660
+ #
661
+ # index << {:id => "26", :title => "Babylon", :artist => "David Grey"}
662
+ # index << {:id => "29", :title => "My Oh My", :artist => "David Grey"}
663
+ #
664
+ # # correct
665
+ # index.query_update('artist:"David Grey"', {:artist => "David Gray"})
666
+ #
667
+ # index["26"]
668
+ # #=> {:id => "26", :title => "Babylon", :artist => "David Gray"}
669
+ # index["28"]
670
+ # #=> {:id => "28", :title => "My Oh My", :artist => "David Gray"}
671
+ #
672
+ def query_update(query, new_val)
673
+ @dir.synchronize do
674
+ ensure_writer_open()
675
+ ensure_searcher_open()
676
+ docs_to_add = []
677
+ query = do_process_query(query)
678
+ @searcher.search_each(query, :limit => :all) do |id, score|
679
+ document = @searcher[id].load
680
+ if new_val.is_a?(Hash)
681
+ document.merge!(new_val)
682
+ else new_val.is_a?(String) or new_val.is_a?(Symbol)
683
+ document[@default_input_field] = new_val.to_s
684
+ end
685
+ docs_to_add << document
686
+ @reader.delete(id)
687
+ end
688
+ ensure_writer_open()
689
+ docs_to_add.each {|doc| @writer << doc }
690
+ flush() if @auto_flush
691
+ end
692
+ end
693
+
694
+ # Returns true if any documents have been deleted since the index was last
695
+ # flushed.
696
+ def has_deletions?()
697
+ @dir.synchronize do
698
+ ensure_reader_open()
699
+ return @reader.has_deletions?
700
+ end
701
+ end
702
+
703
+ # Flushes all writes to the index. This will not optimize the index but it
704
+ # will make sure that all writes are written to it.
705
+ #
706
+ # NOTE: this is not necessary if you are only using this class. All writes
707
+ # will automatically flush when you perform an operation that reads the
708
+ # index.
709
+ def flush()
710
+ @dir.synchronize do
711
+ if @reader
712
+ if @searcher
713
+ @searcher.close
714
+ @searcher = nil
715
+ end
716
+ @reader.commit
717
+ elsif @writer
718
+ @writer.close
719
+ @writer = nil
720
+ end
721
+ end
722
+ end
723
+ alias :commit :flush
724
+
725
+ # optimizes the index. This should only be called when the index will no
726
+ # longer be updated very often, but will be read a lot.
727
+ def optimize()
728
+ @dir.synchronize do
729
+ ensure_writer_open()
730
+ @writer.optimize()
731
+ @writer.close()
732
+ @writer = nil
733
+ end
734
+ end
735
+
736
+ # returns the number of documents in the index
737
+ def size()
738
+ @dir.synchronize do
739
+ ensure_reader_open()
740
+ return @reader.num_docs()
741
+ end
742
+ end
743
+
744
+ # Merges all segments from an index or an array of indexes into this
745
+ # index. You can pass a single Index::Index, Index::Reader,
746
+ # Store::Directory or an array of any single one of these.
747
+ #
748
+ # This may be used to parallelize batch indexing. A large document
749
+ # collection can be broken into sub-collections. Each sub-collection can
750
+ # be indexed in parallel, on a different thread, process or machine and
751
+ # perhaps all in memory. The complete index can then be created by
752
+ # merging sub-collection indexes with this method.
753
+ #
754
+ # After this completes, the index is optimized.
755
+ def add_indexes(indexes)
756
+ @dir.synchronize do
757
+ ensure_writer_open()
758
+ indexes = [indexes].flatten # make sure we have an array
759
+ return if indexes.size == 0 # nothing to do
760
+ if indexes[0].is_a?(Index)
761
+ indexes.delete(self) # don't merge with self
762
+ indexes = indexes.map {|index| index.reader }
763
+ elsif indexes[0].is_a?(Ferret::Store::Directory)
764
+ indexes.delete(@dir) # don't merge with self
765
+ indexes = indexes.map {|dir| IndexReader.new(dir) }
766
+ elsif indexes[0].is_a?(IndexReader)
767
+ indexes.delete(@reader) # don't merge with self
768
+ else
769
+ raise ArgumentError, "Unknown index type when trying to merge indexes"
770
+ end
771
+ ensure_writer_open
772
+ @writer.add_readers(indexes)
773
+ end
774
+ end
775
+
776
+ # This is a simple utility method for saving an in memory or RAM index to
777
+ # the file system. The same thing can be achieved by using the
778
+ # Index::Index#add_indexes method and you will have more options when
779
+ # creating the new index, however this is a simple way to turn a RAM index
780
+ # into a file system index.
781
+ #
782
+ # directory:: This can either be a Store::Directory object or a String
783
+ # representing the path to the directory where you would
784
+ # like to store the index.
785
+ #
786
+ # create:: True if you'd like to create the directory if it doesn't
787
+ # exist or copy over an existing directory. False if you'd
788
+ # like to merge with the existing directory. This defaults to
789
+ # false.
790
+ def persist(directory, create = true)
791
+ synchronize do
792
+ close_all()
793
+ old_dir = @dir
794
+ if directory.is_a?(String)
795
+ @dir = FSDirectory.new(directory, create)
796
+ elsif directory.is_a?(Ferret::Store::Directory)
797
+ @dir = directory
798
+ end
799
+ @dir.extend(MonitorMixin) unless @dir.kind_of? MonitorMixin
800
+ @options[:dir] = @dir
801
+ @options[:create_if_missing] = true
802
+ add_indexes([old_dir])
803
+ end
804
+ end
805
+
806
+ def to_s
807
+ buf = ""
808
+ (0...(size)).each do |i|
809
+ buf << self[i].to_s + "\n" if not deleted?(i)
810
+ end
811
+ buf
812
+ end
813
+
814
+ # Returns an Explanation that describes how +doc+ scored against
815
+ # +query+.
816
+ #
817
+ # This is intended to be used in developing Similarity implementations,
818
+ # and, for good performance, should not be displayed with every hit.
819
+ # Computing an explanation is as expensive as executing the query over the
820
+ # entire index.
821
+ def explain(query, doc)
822
+ @dir.synchronize do
823
+ ensure_searcher_open()
824
+ query = do_process_query(query)
825
+
826
+ return @searcher.explain(query, doc)
827
+ end
828
+ end
829
+
830
+ # Turn a query string into a Query object with the Index's QueryParser
831
+ def process_query(query)
832
+ @dir.synchronize do
833
+ ensure_searcher_open()
834
+ return do_process_query(query)
835
+ end
836
+ end
837
+
838
+ # Returns the field_infos object so that you can add new fields to the
839
+ # index.
840
+ def field_infos
841
+ @dir.synchronize do
842
+ ensure_writer_open()
843
+ return @writer.field_infos
844
+ end
845
+ end
846
+
847
+
848
+ protected
849
+ def ensure_writer_open()
850
+ raise "tried to use a closed index" if not @open
851
+ return if @writer
852
+ if @reader
853
+ @searcher.close if @searcher
854
+ @reader.close
855
+ @reader = nil
856
+ @searcher = nil
857
+ end
858
+ @writer = IndexWriter.new(@options)
859
+ end
860
+
861
+ # returns the new reader if one is opened
862
+ def ensure_reader_open(get_latest = true)
863
+ raise "tried to use a closed index" if not @open
864
+ if @reader
865
+ if get_latest
866
+ latest = false
867
+ begin
868
+ latest = @reader.latest?
869
+ rescue Lock::LockError
870
+ sleep(@options[:lock_retry_time]) # sleep for 2 seconds and try again
871
+ latest = @reader.latest?
872
+ end
873
+ if not latest
874
+ @searcher.close if @searcher
875
+ @reader.close
876
+ return @reader = IndexReader.new(@dir)
877
+ end
878
+ end
879
+ else
880
+ if @writer
881
+ @writer.close
882
+ @writer = nil
883
+ end
884
+ return @reader = IndexReader.new(@dir)
885
+ end
886
+ return false
887
+ end
888
+
889
+ def ensure_searcher_open()
890
+ raise "tried to use a closed index" if not @open
891
+ if ensure_reader_open() or not @searcher
892
+ @searcher = Searcher.new(@reader)
893
+ end
894
+ end
895
+
896
+ private
897
+ def do_process_query(query)
898
+ if query.is_a?(String)
899
+ if @qp.nil?
900
+ @qp = Ferret::QueryParser.new(@options)
901
+ end
902
+ # we need to set this every time, in case a new field has been added
903
+ @qp.fields = @reader.fields unless options[:all_fields] || options[:fields]
904
+ @qp.tokenized_fields = @reader.tokenized_fields unless options[:tokenized_fields]
905
+ query = @qp.parse(query)
906
+ end
907
+ return query
908
+ end
909
+
910
+ def do_search(query, options)
911
+ ensure_searcher_open()
912
+ query = do_process_query(query)
913
+
914
+ return @searcher.search(query, options)
915
+ end
916
+
917
+ def close_all()
918
+ @dir.synchronize do
919
+ @searcher.close if @searcher
920
+ @reader.close if @reader
921
+ @writer.close if @writer
922
+ @reader = nil
923
+ @searcher = nil
924
+ @writer = nil
925
+ end
926
+ end
927
+
928
+ # If +docs+ is a Hash or an Array then a batch delete will be performed.
929
+ # If +docs+ is an Array then it will be considered an array of +id+'s. If
930
+ # it is a Hash, then its keys will be used instead as the Array of
931
+ # document +id+'s. If the +id+ is an Integers then it is considered a
932
+ # Ferret document number and the corresponding document will be deleted.
933
+ # If the +id+ is a String or a Symbol then the +id+ will be considered a
934
+ # term and the documents that contain that term in the +:id_field+ will
935
+ # be deleted.
936
+ #
937
+ # docs:: An Array of docs to be deleted, or a Hash (in which case the keys
938
+ # are used)
939
+ def batch_delete(docs)
940
+ docs = docs.keys if docs.is_a?(Hash)
941
+ raise ArgumentError, "must pass Array or Hash" unless docs.is_a? Array
942
+ ids = []
943
+ terms = []
944
+ docs.each do |doc|
945
+ case doc
946
+ when String then terms << doc
947
+ when Symbol then terms << doc.to_s
948
+ when Integer then ids << doc
949
+ else
950
+ raise ArgumentError, "Cannot delete for arg of type #{id.class}"
951
+ end
952
+ end
953
+ if ids.size > 0
954
+ ensure_reader_open
955
+ ids.each {|id| @reader.delete(id)}
956
+ end
957
+ if terms.size > 0
958
+ ensure_writer_open()
959
+ @writer.delete(@id_field, terms)
960
+ end
961
+ return self
962
+ end
963
+
964
+ end
965
+ end
966
+
967
+ I = Index::Index
968
+ end
969
+ end
970
+