ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -1,312 +0,0 @@
1
- require 'racc/parser'
2
- module Ferret
3
- # = QueryParser
4
- #
5
- # The Ferret::QueryParser is used to parse Ferret Query Language (FQL) into
6
- # a Ferret Query. FQL is described Bellow.
7
- #
8
- # == Ferret Query Language
9
- #
10
- # === Preamble
11
- #
12
- # The following characters are special characters in FQL;
13
- #
14
- # :, (, ), [, ], {, }, !, +, ", ~, ^, -, |, <, >, =, *, ?, \
15
- #
16
- # If you want to use one of these characters in one of your terms you need
17
- # to escape it with a \ character. \ escapes itself. The exception to this
18
- # rule is within Phrases which a strings surrounded by double quotes (and
19
- # will be explained further bellow in the section on PhraseQueries). In
20
- # Phrases, only ", | and <> have special meaning and need to be escaped if
21
- # you want the literal value. <> is escaped \<\>.
22
- #
23
- # In the following examples I have only written the query string. This would
24
- # be parse like;
25
- #
26
- # query = query_parser.parse("pet:(dog AND cat)")
27
- # puts query # => "+pet:dog +pet:cat"
28
- #
29
- # === TermQuery
30
- #
31
- # A term query is the most basic query of all and is what most of the other
32
- # queries are built upon. The term consists of a single word. eg;
33
- #
34
- # 'term'
35
- #
36
- # Note that the analyzer will be run on the term and if it splits the term
37
- # in two then it will be turned into a phrase query. For example, with the
38
- # plain Ferret::Analysis::Analyzer, the following;
39
- #
40
- # 'dave12balmain'
41
- #
42
- # is equivalent to;
43
- #
44
- # '"dave balmain"'
45
- #
46
- # Which we will explain now...
47
- #
48
- # === PhraseQuery
49
- #
50
- # A phrase query is a string of terms surrounded by double quotes. For
51
- # example you could write;
52
- #
53
- # '"quick brown fox"'
54
- #
55
- # But if a "fast" fox is just as good as a quick one you could use the |
56
- # character to specify alternate terms.
57
- #
58
- # '"quick|speedy|fast brown fox"'
59
- #
60
- # What if we don't care what colour the fox is. We can use the <> to specify
61
- # a place setter. eg;
62
- #
63
- # '"quick|speedy|fast <> fox"'
64
- #
65
- # This will match any word in between quick and fox. Alternatively we could
66
- # set the "slop" for the phrase which allows a certain variation in the
67
- # match of the phrase. The slop for a phrase is an integer indicating how
68
- # many positions you are allowed to move the terms to get a match. Read more
69
- # about the slop factor in Ferret::Search::PhraseQuery. To set the slop
70
- # factor for a phrase you can type;
71
- #
72
- # '"big house"~2'
73
- #
74
- # This would match "big house", "big red house", "big red brick house" and
75
- # even "house big". That's right, you don't need to have th terms in order
76
- # if you allow some slop in your phrases. (See Ferret::Search::Spans if you
77
- # need a phrase type query with ordered terms.)
78
- #
79
- # These basic queries will be run on the default field which is set when you
80
- # create the query_parser. But what if you want to search a different field.
81
- # You'll be needing a ...
82
- #
83
- # === FieldQuery
84
- #
85
- # A field query is any field prefixed by <fieldname>:. For example, to
86
- # search for all instances of the term "ski" in field "sport", you'd write;
87
- #
88
- # 'sport:ski'
89
- # Or we can apply a field to phrase;
90
- #
91
- # 'sport:"skiing is fun"'
92
- #
93
- # Now we have a few types of queries, we'll be needing to glue them together
94
- # with a ...
95
- #
96
- # === BooleanQuery
97
- #
98
- # There are a couple of ways of writing boolean queries. Firstly you can
99
- # specify which terms are required, optional or required not to exist (not).
100
- #
101
- # * '+' or "REQ" can be used to indicate a required query. "REQ" must be
102
- # surrounded by white space.
103
- # * '-', '!' or "NOT" are used to indicate query that is required to be
104
- # false. "NOT" must be surrounded by white space.
105
- # * all other queries are optional if the above symbols are used.
106
- #
107
- # Some examples;
108
- #
109
- # '+sport:ski -sport:snowboard sport:toboggan'
110
- # '+ingredient:chocolate +ingredient:strawberries -ingredient:wheat'
111
- #
112
- # You may also use the boolean operators "AND", "&&", "OR" and "||". eg;
113
- #
114
- # 'sport:ski AND NOT sport:snowboard OR sport:toboggan'
115
- # 'ingredient:chocolate AND ingredient:strawberries AND NOT ingredient:wheat'
116
- #
117
- # You can set the default operator when you create the query parse.
118
- #
119
- # === RangeQuery
120
- #
121
- # A range query finds all documents with terms between the two query terms.
122
- # This can be very useful in particular for dates. eg;
123
- #
124
- # 'date:[20050725 20050905]' # all dates >= 20050725 and <= 20050905
125
- # 'date:[20050725 20050905}' # all dates >= 20050725 and < 20050905
126
- # 'date:{20050725 20050905]' # all dates > 20050725 and <= 20050905
127
- # 'date:{20050725 20050905}' # all dates > 20050725 and < 20050905
128
- #
129
- # You can also do open ended queries like this;
130
- #
131
- # 'date:[20050725>' # all dates >= 20050725
132
- # 'date:{20050725>' # all dates > 20050725
133
- # 'date:<20050905]' # all dates <= 20050905
134
- # 'date:<20050905}' # all dates < 20050905
135
- #
136
- # Or like this;
137
- #
138
- # 'date: >= 20050725'
139
- # 'date: > 20050725'
140
- # 'date: <= 20050905'
141
- # 'date: < 20050905'
142
- #
143
- # If you prefer the above style you could use a boolean query but like this;
144
- #
145
- # 'date:( >= 20050725 AND <= 20050905)'
146
- #
147
- # But rangequery only solution shown first will be faster.
148
- #
149
- # === WildQuery
150
- #
151
- # A wild query is a query using the pattern matching characters * and ?. *
152
- # matchs 0 or more characters while ? matchs a single character. This type
153
- # of query can be really useful for matching heirarchical categories for
154
- # example. Let's say we had this structure;
155
- #
156
- # /sport/skiing
157
- # /sport/cycling
158
- # /coding1/ruby
159
- # /coding1/c
160
- # /coding2/python
161
- # /coding2/perl
162
- #
163
- # If you wanted all categories with programming languages you could use the
164
- # query;
165
- #
166
- # 'category:/coding?/*'
167
- #
168
- # Note that this query can be quite expensive if not used carefully. In the
169
- # example above there would be no problem but you should be careful not use
170
- # the wild characters at the beginning of the query as it'll have to iterate
171
- # through every term in that field. Having said that, some fields like the
172
- # category field above will only have a small number of distinct fields so
173
- # this could be ok.
174
- #
175
- # === FuzzyQuery
176
- #
177
- # This is like the sloppy phrase query above, except you are now adding slop
178
- # to a term. Basically it measures the Levenshtein distance between two
179
- # terms and if the value is below the slop threshold the term is a match.
180
- # This time though the slop must be a float between 0 and 1.0, 1.0 being a
181
- # perfect match and 0 being far from a match. The default is set to 0.5 so
182
- # you don't need to give a slop value if you don't want to. You can set the
183
- # default in the Ferret::Search::FuzzyQuery class. Here are a couple of
184
- # examples;
185
- #
186
- # 'content:ferret~'
187
- # 'content:Ostralya~0.4'
188
- #
189
- # Note that this query can be quite expensive. If you'd like to use this
190
- # query, you may want to set a mininum prefix length in the FuzzyQuery
191
- # class. This can substantially reduce the number of terms that the query
192
- # will iterate over.
193
- #
194
- # Well, that's it for the query language. Next we have...
195
- #
196
- # == Extending the Query Parser
197
- #
198
- # The query parser has a number of methods which you may want to subclass if
199
- # you are interested in extending the query parser.
200
- #
201
- # get_term_query:: Called for each term in the query. You may want
202
- # to discard all but the first token instead or
203
- # doing a phrase query.
204
- #
205
- # get_fuzzy_query:: These are expensive. You could set the default
206
- # prefix or perhaps disallow these all together by
207
- # raising an exception.
208
- #
209
- # get_range_query:: You'll probably want to leave this as is.
210
- #
211
- # get_phrase_query:: This method is passed an array of terms or
212
- # perhaps an array of arrays of terms in the case
213
- # of a multi-term phrase query as well as the slop
214
- # and it returns a phrase query. Perhaps you'd
215
- # like to use a span query instead of the standard
216
- # phrase query to ensure the order of the terms
217
- # remains intact.
218
- #
219
- # get_normal_phrase_query:: Called for phrases without any multi-terms. This
220
- # method is called by the standard
221
- # get_phrase_query.
222
- #
223
- # get_multi_phrase_query:: Called for phrases with multi-terms. This method
224
- # is called by the standard get_phrase_query.
225
- #
226
- # get_boolean_query:: Called with an array of clauses.
227
- #
228
- class QueryParser < Racc::Parser
229
- include Ferret::Search
230
- include Ferret::Index
231
-
232
- # Create a new QueryParser.
233
- #
234
- # default_field:: all queries without a specified query string are run on
235
- # this field.
236
- #
237
- # options:: the following options exist and should be passed in as a
238
- # hash. eg;
239
- #
240
- # qp = QueryParser.new("*", { :analyzer => WhiteSpaceAnalyzer.new(),
241
- # :wild_lower => true})
242
- #
243
- # === Options
244
- #
245
- # analyzer:: The analyzer is used to break phrases up into
246
- # terms and to turn terms in tokens recognized in
247
- # the index. Analysis::Analyzer is the default
248
- # occur_default:: Set to either BooleanClause::Occur::SHOULD
249
- # (default) or BooleanClause::Occur::MUST to specify
250
- # the default Occur operator.
251
- # wild_lower:: Set to false if you don't want the terms in fuzzy
252
- # and wild queries to be set to lower case. You
253
- # should do this if your analyzer doesn't downcase.
254
- # The default is true.
255
- # default_slop:: Set the default slop for phrase queries. This
256
- # defaults to 0.
257
- # handle_parse_errors:: Set this to true if you want the QueryParser to
258
- # degrade gracefully on errors. If the query parser
259
- # fails to parse this query, it will try to parse it
260
- # as a straight boolean query on the default field
261
- # ignoring all query punctuation. If this fails, it
262
- # will return an empty TermQuery. If you use this
263
- # and you need to know why your query isn't working
264
- # you can use the Query#to_s method on the query
265
- # returned to see what is happening to your query.
266
- # This defualts to false, in which case a
267
- # QueryParseException is thrown.
268
- def initialize(default_field = "", options = {})
269
- end
270
-
271
- # parses a string into a Ferret::Search::Query. The string needs to be
272
- # parseable FQL.
273
- def parse(str)
274
- end
275
-
276
- # Set to false if you don't want the terms in fuzzy and wild queries to be
277
- # set to lower case. You should do this if your analyzer doesn't downcase.
278
- def wild_lower=()
279
- end
280
-
281
- # Returns the value of wild_lower. See #wild_lower=.
282
- def wild_lower?()
283
- end
284
-
285
- # Processes the query string escaping all special characters within
286
- # phrases and making sure that double quotes and brackets are matching.
287
- # This class will be called by the parse method so you should subclass it
288
- # if you'd like to do your own query string cleaning.
289
- def clean_string(str)
290
- end
291
-
292
- # The exception thrown when there is an error parsing the query string.
293
- # This also holds the Racc::ParseError that was thrown in case you want to
294
- # investigate why a query won't parse.
295
- class QueryParseException < Exception
296
- attr_reader :parse_error
297
-
298
- # Create a new QueryParseException
299
- #
300
- # error:: An error string describing the query that failed
301
- # parse_error:: The actual parse error that was thrown by Racc. It is a
302
- # Racc::ParseError object.
303
- def initialize(error, parse_error)
304
- super(error)
305
- @parse_error = parse_error
306
- end
307
- end
308
- end
309
-
310
- end
311
-
312
- require 'ferret/query_parser/query_parser.tab.rb'
@@ -1,928 +0,0 @@
1
- #
2
- # DO NOT MODIFY!!!!
3
- # This file is automatically generated by racc 1.4.4
4
- # from racc grammer file "lib/ferret/query_parser/query_parser.y".
5
- #
6
-
7
- require 'racc/parser'
8
-
9
-
10
- module Ferret
11
-
12
- class QueryParser < Racc::Parser
13
-
14
- module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id07e7308361', 'lib/ferret/query_parser/query_parser.y', 126
15
- attr_accessor :default_field, :fields, :handle_parse_errors
16
-
17
- def initialize(default_field = "*", options = {})
18
- @yydebug = true
19
- if default_field.is_a?(String) and default_field.index("|")
20
- default_field = default_field.split("|")
21
- end
22
- @field = @default_field = default_field
23
- @analyzer = options[:analyzer] || Analysis::StandardAnalyzer.new
24
- @wild_lower = options[:wild_lower].nil? ? true : options[:wild_lower]
25
- @occur_default = options[:occur_default] || BooleanClause::Occur::SHOULD
26
- @default_slop = options[:default_slop] || 0
27
- @fields = options[:fields]||[]
28
- @handle_parse_errors = options[:handle_parse_errors] || false
29
- end
30
-
31
- RESERVED = {
32
- 'AND' => :AND,
33
- '&&' => :AND,
34
- 'OR' => :OR,
35
- '||' => :OR,
36
- 'NOT' => :NOT,
37
- '!' => :NOT,
38
- '-' => :NOT,
39
- 'REQ' => :REQ,
40
- '+' => :REQ
41
- }
42
-
43
- ECHR = %q,:()\[\]{}!+"~^\-\|<>\=\*\?,
44
- EWCHR = %q,:()\[\]{}!+"~^\-\|<>\=,
45
-
46
- def parse(str)
47
- orig_str = str
48
- str = clean_string(str)
49
- str.strip!
50
- @q = []
51
-
52
- until str.empty? do
53
- case str
54
- when /\A\s+/
55
- ;
56
- when /\A([#{EWCHR}]|[*?](?=:))/
57
- @q.push [ RESERVED[$&]||$&, $& ]
58
- when /\A(\&\&|\|\|)/
59
- @q.push [ RESERVED[$&], $& ]
60
- when /\A(\\[#{ECHR}]|[^\s#{ECHR}])*[?*](\\[#{EWCHR}]|[^\s#{EWCHR}])*/
61
- str = $'
62
- unescaped = $&.gsub(/\\(?!\\)/,"")
63
- @q.push [ :WILD_STRING, unescaped ]
64
- next
65
- when /\A(\\[#{ECHR}]|[^\s#{ECHR}])+/
66
- symbol = RESERVED[$&]
67
- if symbol
68
- @q.push [ symbol, $& ]
69
- else
70
- str = $'
71
- unescaped = $&.gsub(/\\(?!\\)/,"")
72
- @q.push [ :WORD, unescaped ]
73
- next
74
- end
75
- else
76
- raise RuntimeError, "shouldn't happen"
77
- end
78
- str = $'
79
- end
80
- if @q.empty?
81
- return TermQuery.new(Term.new(@default_field, ""))
82
- end
83
-
84
- @q.push([ false, '$' ])
85
-
86
- query = nil
87
- begin
88
- query = do_parse
89
- rescue Racc::ParseError => e
90
- if @handle_parse_errors
91
- @field = @default_field
92
- query = _get_bad_query(orig_str)
93
- else
94
- raise QueryParseException.new("Could not parse #{str}", e)
95
- end
96
- end
97
- return query
98
- end
99
-
100
- def next_token
101
- @q.shift
102
- end
103
-
104
- PHRASE_CHARS = [?<, ?>, ?|, ?"] # these chars have meaning within phrases
105
- def clean_string(str)
106
- escape_chars = ECHR.gsub(/\\/,"").unpack("c*")
107
- pb = nil
108
- br_stack = []
109
- quote_open = false
110
- # leave a little extra
111
- new_str = []
112
-
113
- str.each_byte do |b|
114
- # ignore escaped characters
115
- if pb == ?\\
116
- if quote_open and PHRASE_CHARS.index(b)
117
- new_str << ?\\ # this was left off the first time through
118
- end
119
-
120
- new_str << b
121
- pb = (b == ?\\ ? ?: : b) # \\ has escaped itself so does nothing more
122
- next
123
- end
124
- case b
125
- when ?\\
126
- new_str << b if !quote_open # We do our own escaping below
127
- when ?"
128
- quote_open = !quote_open
129
- new_str << b
130
- when ?(
131
- if !quote_open
132
- br_stack << b
133
- else
134
- new_str << ?\\
135
- end
136
- new_str << b
137
- when ?)
138
- if !quote_open
139
- if br_stack.size == 0
140
- new_str.unshift(?()
141
- else
142
- br_stack.pop
143
- end
144
- else
145
- new_str << ?\\
146
- end
147
- new_str << b
148
- when ?>
149
- if quote_open
150
- if pb == ?<
151
- new_str.delete_at(-2)
152
- else
153
- new_str << ?\\
154
- end
155
- end
156
- new_str << b
157
- else
158
- if quote_open
159
- if escape_chars.index(b) and b != ?|
160
- new_str << ?\\
161
- end
162
- end
163
- new_str << b
164
- end
165
- pb = b
166
- end
167
- new_str << ?" if quote_open
168
- br_stack.each { |b| new_str << ?) }
169
- return new_str.pack("c*")
170
- end
171
-
172
- def get_bad_query(field, str)
173
- get_term_query(field, str) || BooleanQuery.new()
174
- end
175
-
176
- def get_range_query(field, start_word, end_word, inc_upper, inc_lower)
177
- RangeQuery.new(field, start_word, end_word, inc_upper, inc_lower)
178
- end
179
-
180
- def get_term_query(field, word)
181
- tokens = []
182
- stream = @analyzer.token_stream(field, word)
183
- while token = stream.next
184
- tokens << token
185
- end
186
- if tokens.length == 0
187
- return nil
188
- elsif tokens.length == 1
189
- return TermQuery.new(Term.new(field, tokens[0].text))
190
- else
191
- pq = PhraseQuery.new()
192
- tokens.each do |token|
193
- pq.add(Term.new(field, token.text), nil, token.pos_inc)
194
- end
195
- return pq
196
- end
197
- end
198
-
199
- def get_fuzzy_query(field, word, min_sim = nil)
200
- tokens = []
201
- stream = @analyzer.token_stream(field, word)
202
- if token = stream.next # only makes sense to look at one term for fuzzy
203
- if min_sim
204
- return FuzzyQuery.new(Term.new(field, token.text), min_sim.to_f)
205
- else
206
- return FuzzyQuery.new(Term.new(field, token.text))
207
- end
208
- else
209
- return TermQuery.new(Term.new(field, ""))
210
- end
211
- end
212
-
213
- def get_wild_query(field, regexp)
214
- if (regexp =~ /^([^?*]*)\*$/)
215
- return PrefixQuery.new(Term.new(field, $1))
216
- else
217
- return WildcardQuery.new(Term.new(field, regexp))
218
- end
219
- end
220
-
221
- def add_multi_word(words, word)
222
- last_word = words[-1]
223
- if not last_word.is_a?(Array)
224
- last_word = words[-1] = [words[-1]]
225
- end
226
- last_word << word
227
- return words
228
- end
229
-
230
- def get_normal_phrase_query(field, positions)
231
- pq = PhraseQuery.new()
232
- pq.slop = @default_slop
233
- pos_inc = 0
234
-
235
- positions.each do |position|
236
- if position.nil?
237
- pos_inc += 1
238
- next
239
- end
240
- stream = @analyzer.token_stream(field, position)
241
- tokens = []
242
- while token = stream.next
243
- tokens << token
244
- end
245
- tokens.each do |token|
246
- pq.add(Term.new(field, token.text), nil,
247
- token.pos_inc + pos_inc)
248
- pos_inc = 0
249
- end
250
- end
251
- return pq
252
- end
253
-
254
- def get_multi_phrase_query(field, positions)
255
- mpq = MultiPhraseQuery.new()
256
- mpq.slop = @default_slop
257
- pos_inc = 0
258
-
259
- positions.each do |position|
260
- if position.nil?
261
- pos_inc += 1
262
- next
263
- end
264
- if position.is_a?(Array)
265
- position.compact! # it doesn't make sense to have an empty spot here
266
- terms = []
267
- position.each do |word|
268
- stream = @analyzer.token_stream(field, word)
269
- if token = stream.next # only put one term per word
270
- terms << Term.new(field, token.text)
271
- end
272
- end
273
- mpq.add(terms, nil, pos_inc + 1) # must go at least one forward
274
- pos_inc = 0
275
- else
276
- stream = @analyzer.token_stream(field, position)
277
- tokens = []
278
- while token = stream.next
279
- tokens << token
280
- end
281
- tokens.each do |token|
282
- mpq.add([Term.new(field, token.text)], nil,
283
- token.pos_inc + pos_inc)
284
- pos_inc = 0
285
- end
286
- end
287
- end
288
- return mpq
289
- end
290
-
291
- def get_phrase_query(positions, slop = nil)
292
- if positions.size == 1
293
- if positions[0].is_a?(Array)
294
- clauses = positions[0].map { |word|
295
- BooleanClause.new(_get_term_query(word), BooleanClause::Occur::SHOULD)
296
- }
297
- return get_boolean_query(clauses)
298
- else
299
- return _get_term_query(positions[0])
300
- end
301
- end
302
-
303
- multi_phrase = false
304
- positions.each do |position|
305
- if position.is_a?(Array)
306
- position.compact!
307
- if position.size > 1
308
- multi_phrase = true
309
- end
310
- end
311
- end
312
-
313
- return do_multiple_fields() do |field|
314
- q = nil
315
- if not multi_phrase
316
- q = get_normal_phrase_query(field, positions.flatten)
317
- else
318
- q = get_multi_phrase_query(field, positions)
319
- end
320
- q.slop = slop if slop
321
- next q
322
- end
323
- end
324
-
325
- def add_and_clause(clauses, clause)
326
- (clauses||=[]).compact!
327
- if (clauses.length == 1)
328
- last_cl = clauses[0]
329
- last_cl.occur = BooleanClause::Occur::MUST if not last_cl.prohibited?
330
- end
331
-
332
- return if clause.nil? # incase a query got destroyed by the analyzer
333
-
334
- clause.occur = BooleanClause::Occur::MUST if not clause.prohibited?
335
- clauses << clause
336
- end
337
-
338
- def add_or_clause(clauses, clause)
339
- clauses << clause
340
- end
341
-
342
- def add_default_clause(clauses, clause)
343
- if @occur_default == BooleanClause::Occur::MUST
344
- add_and_clause(clauses, clause)
345
- else
346
- add_or_clause(clauses, clause)
347
- end
348
- end
349
-
350
- def get_boolean_query(clauses)
351
- # possible that we got all nil clauses so check
352
- bq = BooleanQuery.new()
353
- return bq if clauses.nil?
354
- clauses.compact!
355
- return bq if clauses.size == 0
356
-
357
- if clauses.size == 1 and not clauses[0].prohibited?
358
- return clauses[0].query
359
- end
360
- clauses.each {|clause| bq << clause }
361
- return bq
362
- end
363
-
364
- def get_boolean_clause(query, occur)
365
- return nil if query.nil?
366
- return BooleanClause.new(query, occur)
367
- end
368
-
369
- def do_multiple_fields()
370
- # set @field to all fields if @field is the multi-field operator
371
- @field = @fields if @field.is_a?(String) and @field == "*"
372
- if @field.is_a?(String)
373
- return yield(@field)
374
- elsif @field.size == 1
375
- return yield(@field[0])
376
- else
377
- bq = BooleanQuery.new()
378
- @field.each do |field|
379
- q = yield(field)
380
- bq << BooleanClause.new(q) if q
381
- end
382
- return (bq.clauses.size == 0) ? nil : bq
383
- end
384
- end
385
-
386
- def method_missing(meth, *args)
387
- if meth.to_s =~ /_(get_[a-z_]+_query)/
388
- do_multiple_fields() do |field|
389
- send($1, *([field] + args))
390
- end
391
- else
392
- raise NoMethodError.new("No such method #{meth} in #{self.class}", meth, args)
393
- end
394
- end
395
-
396
- def QueryParser.parse(query, default_field = "*", options = {})
397
- qp = QueryParser.new(default_field, options)
398
- return qp.parse(query)
399
- end
400
-
401
- ..end lib/ferret/query_parser/query_parser.y modeval..id07e7308361
402
-
403
- ##### racc 1.4.4 generates ###
404
-
405
- racc_reduce_table = [
406
- 0, 0, :racc_error,
407
- 1, 26, :_reduce_1,
408
- 1, 27, :_reduce_2,
409
- 3, 27, :_reduce_3,
410
- 3, 27, :_reduce_4,
411
- 2, 27, :_reduce_5,
412
- 2, 28, :_reduce_6,
413
- 2, 28, :_reduce_7,
414
- 1, 28, :_reduce_8,
415
- 1, 29, :_reduce_none,
416
- 3, 29, :_reduce_10,
417
- 1, 30, :_reduce_none,
418
- 3, 30, :_reduce_12,
419
- 1, 30, :_reduce_none,
420
- 1, 30, :_reduce_none,
421
- 1, 30, :_reduce_none,
422
- 1, 30, :_reduce_none,
423
- 1, 31, :_reduce_17,
424
- 3, 31, :_reduce_18,
425
- 2, 31, :_reduce_19,
426
- 1, 35, :_reduce_20,
427
- 0, 37, :_reduce_21,
428
- 4, 32, :_reduce_22,
429
- 0, 38, :_reduce_23,
430
- 0, 39, :_reduce_24,
431
- 5, 32, :_reduce_25,
432
- 1, 36, :_reduce_26,
433
- 3, 36, :_reduce_27,
434
- 3, 33, :_reduce_28,
435
- 5, 33, :_reduce_29,
436
- 2, 33, :_reduce_30,
437
- 4, 33, :_reduce_31,
438
- 1, 40, :_reduce_32,
439
- 2, 40, :_reduce_33,
440
- 3, 40, :_reduce_34,
441
- 3, 40, :_reduce_35,
442
- 4, 34, :_reduce_36,
443
- 4, 34, :_reduce_37,
444
- 4, 34, :_reduce_38,
445
- 4, 34, :_reduce_39,
446
- 3, 34, :_reduce_40,
447
- 3, 34, :_reduce_41,
448
- 3, 34, :_reduce_42,
449
- 3, 34, :_reduce_43,
450
- 2, 34, :_reduce_44,
451
- 3, 34, :_reduce_45,
452
- 3, 34, :_reduce_46,
453
- 2, 34, :_reduce_47 ]
454
-
455
- racc_reduce_n = 48
456
-
457
- racc_shift_n = 78
458
-
459
- racc_action_table = [
460
- 8, 10, 67, 66, 75, 74, 50, 21, 2, 40,
461
- 25, 7, 9, 38, 13, 15, 17, 19, 8, 10,
462
- 3, 53, 46, 39, 26, 21, 2, 37, -26, 7,
463
- 9, 45, 13, 15, 17, 19, 8, 10, 3, 43,
464
- 64, 49, -26, 21, 2, 60, 59, 7, 9, 63,
465
- 13, 15, 17, 19, 58, 57, 3, 8, 10, 31,
466
- 33, 54, 55, 56, 21, 2, 44, 48, 7, 9,
467
- 61, 13, 15, 17, 19, 36, 62, 3, 8, 10,
468
- 31, 33, 34, 42, 65, 21, 2, 41, 30, 7,
469
- 9, 70, 13, 15, 17, 19, 8, 10, 3, 71,
470
- 72, 73, 24, 21, 2, 77, nil, 7, 9, nil,
471
- 13, 15, 17, 19, 21, 2, 3, nil, 7, 9,
472
- nil, 13, 15, 17, 19, 21, 2, 3, nil, 7,
473
- 9, nil, 13, 15, 17, 19, 21, 2, 3, nil,
474
- 7, 9, nil, 13, 15, 17, 19, 21, 2, 3,
475
- nil, 7, 9, nil, 13, 15, 17, 19, nil, nil,
476
- 3 ]
477
-
478
- racc_action_check = [
479
- 0, 0, 46, 46, 64, 64, 30, 0, 0, 17,
480
- 6, 0, 0, 15, 0, 0, 0, 0, 2, 2,
481
- 0, 34, 24, 17, 6, 2, 2, 15, 21, 2,
482
- 2, 24, 2, 2, 2, 2, 33, 33, 2, 21,
483
- 42, 28, 21, 33, 33, 38, 38, 33, 33, 42,
484
- 33, 33, 33, 33, 37, 35, 33, 23, 23, 23,
485
- 23, 35, 35, 35, 23, 23, 23, 26, 23, 23,
486
- 39, 23, 23, 23, 23, 13, 41, 23, 12, 12,
487
- 12, 12, 13, 19, 43, 12, 12, 18, 11, 12,
488
- 12, 53, 12, 12, 12, 12, 31, 31, 12, 54,
489
- 55, 56, 3, 31, 31, 72, nil, 31, 31, nil,
490
- 31, 31, 31, 31, 8, 8, 31, nil, 8, 8,
491
- nil, 8, 8, 8, 8, 25, 25, 8, nil, 25,
492
- 25, nil, 25, 25, 25, 25, 10, 10, 25, nil,
493
- 10, 10, nil, 10, 10, 10, 10, 49, 49, 10,
494
- nil, 49, 49, nil, 49, 49, 49, 49, nil, nil,
495
- 49 ]
496
-
497
- racc_action_pointer = [
498
- -3, nil, 15, 92, nil, nil, 8, nil, 104, nil,
499
- 126, 88, 75, 65, nil, 3, nil, -1, 78, 73,
500
- nil, 26, nil, 54, 12, 115, 57, nil, 39, nil,
501
- 6, 93, nil, 33, 8, 45, nil, 44, 24, 60,
502
- nil, 66, 30, 74, nil, nil, -19, nil, nil, 137,
503
- nil, nil, nil, 81, 89, 87, 82, nil, nil, nil,
504
- nil, nil, nil, nil, -17, nil, nil, nil, nil, nil,
505
- nil, nil, 95, nil, nil, nil, nil, nil ]
506
-
507
- racc_action_default = [
508
- -48, -14, -48, -48, -15, -16, -48, -20, -48, -23,
509
- -48, -48, -1, -48, -2, -48, -8, -48, -9, -48,
510
- -11, -17, -13, -48, -48, -48, -48, -6, -48, -7,
511
- -48, -48, -5, -48, -30, -48, -32, -48, -44, -48,
512
- -47, -48, -48, -19, -12, -43, -48, -21, -27, -48,
513
- 78, -3, -4, -48, -48, -28, -48, -33, -45, -40,
514
- -41, -46, -10, -42, -48, -18, -39, -38, -22, -24,
515
- -31, -35, -48, -34, -37, -36, -25, -29 ]
516
-
517
- racc_goto_table = [
518
- 47, 32, 12, 27, 23, 29, 11, 68, 28, 76,
519
- 35, nil, 32, nil, nil, nil, nil, nil, nil, nil,
520
- 51, nil, 52, nil, 69 ]
521
-
522
- racc_goto_check = [
523
- 5, 3, 2, 4, 2, 4, 1, 12, 13, 14,
524
- 15, nil, 3, nil, nil, nil, nil, nil, nil, nil,
525
- 3, nil, 3, nil, 5 ]
526
-
527
- racc_goto_pointer = [
528
- nil, 6, 2, -11, -5, -25, nil, nil, nil, nil,
529
- nil, nil, -40, -1, -60, -3 ]
530
-
531
- racc_goto_default = [
532
- nil, nil, nil, 14, 16, 18, 20, 22, 1, 4,
533
- 5, 6, nil, nil, nil, nil ]
534
-
535
- racc_token_table = {
536
- false => 0,
537
- Object.new => 1,
538
- ":" => 2,
539
- :REQ => 3,
540
- :NOT => 4,
541
- :AND => 5,
542
- :OR => 6,
543
- :HIGH => 7,
544
- :LOW => 8,
545
- "^" => 9,
546
- :WORD => 10,
547
- "(" => 11,
548
- ")" => 12,
549
- "~" => 13,
550
- :WILD_STRING => 14,
551
- "*" => 15,
552
- "|" => 16,
553
- "\"" => 17,
554
- "<" => 18,
555
- ">" => 19,
556
- "[" => 20,
557
- "]" => 21,
558
- "}" => 22,
559
- "{" => 23,
560
- "=" => 24 }
561
-
562
- racc_use_result_var = false
563
-
564
- racc_nt_base = 25
565
-
566
- Racc_arg = [
567
- racc_action_table,
568
- racc_action_check,
569
- racc_action_default,
570
- racc_action_pointer,
571
- racc_goto_table,
572
- racc_goto_check,
573
- racc_goto_default,
574
- racc_goto_pointer,
575
- racc_nt_base,
576
- racc_reduce_table,
577
- racc_token_table,
578
- racc_shift_n,
579
- racc_reduce_n,
580
- racc_use_result_var ]
581
-
582
- Racc_token_to_s_table = [
583
- '$end',
584
- 'error',
585
- '":"',
586
- 'REQ',
587
- 'NOT',
588
- 'AND',
589
- 'OR',
590
- 'HIGH',
591
- 'LOW',
592
- '"^"',
593
- 'WORD',
594
- '"("',
595
- '")"',
596
- '"~"',
597
- 'WILD_STRING',
598
- '"*"',
599
- '"|"',
600
- '"\""',
601
- '"<"',
602
- '">"',
603
- '"["',
604
- '"]"',
605
- '"}"',
606
- '"{"',
607
- '"="',
608
- '$start',
609
- 'top_query',
610
- 'bool_query',
611
- 'bool_clause',
612
- 'boosted_query',
613
- 'query',
614
- 'term_query',
615
- 'field_query',
616
- 'phrase_query',
617
- 'range_query',
618
- 'wild_query',
619
- 'field',
620
- '@1',
621
- '@2',
622
- '@3',
623
- 'phrase_words']
624
-
625
- Racc_debug_parser = false
626
-
627
- ##### racc system variables end #####
628
-
629
- # reduce 0 omitted
630
-
631
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 19
632
- def _reduce_1( val, _values)
633
- get_boolean_query(val[0])
634
- end
635
- .,.,
636
-
637
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 24
638
- def _reduce_2( val, _values)
639
- [val[0]]
640
- end
641
- .,.,
642
-
643
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 28
644
- def _reduce_3( val, _values)
645
- add_and_clause(val[0], val[2])
646
- end
647
- .,.,
648
-
649
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 32
650
- def _reduce_4( val, _values)
651
- add_or_clause(val[0], val[2])
652
- end
653
- .,.,
654
-
655
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 36
656
- def _reduce_5( val, _values)
657
- add_default_clause(val[0], val[1])
658
- end
659
- .,.,
660
-
661
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 41
662
- def _reduce_6( val, _values)
663
- get_boolean_clause(val[1], BooleanClause::Occur::MUST)
664
- end
665
- .,.,
666
-
667
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 45
668
- def _reduce_7( val, _values)
669
- get_boolean_clause(val[1], BooleanClause::Occur::MUST_NOT)
670
- end
671
- .,.,
672
-
673
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 49
674
- def _reduce_8( val, _values)
675
- get_boolean_clause(val[0], BooleanClause::Occur::SHOULD)
676
- end
677
- .,.,
678
-
679
- # reduce 9 omitted
680
-
681
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 51
682
- def _reduce_10( val, _values)
683
- val[0].boost = val[2].to_f; return val[0]
684
- end
685
- .,.,
686
-
687
- # reduce 11 omitted
688
-
689
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 58
690
- def _reduce_12( val, _values)
691
- get_boolean_query(val[1])
692
- end
693
- .,.,
694
-
695
- # reduce 13 omitted
696
-
697
- # reduce 14 omitted
698
-
699
- # reduce 15 omitted
700
-
701
- # reduce 16 omitted
702
-
703
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 67
704
- def _reduce_17( val, _values)
705
- _get_term_query(val[0])
706
- end
707
- .,.,
708
-
709
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 71
710
- def _reduce_18( val, _values)
711
- _get_fuzzy_query(val[0], val[2])
712
- end
713
- .,.,
714
-
715
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 75
716
- def _reduce_19( val, _values)
717
- _get_fuzzy_query(val[0])
718
- end
719
- .,.,
720
-
721
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 80
722
- def _reduce_20( val, _values)
723
- _get_wild_query(val[0])
724
- end
725
- .,.,
726
-
727
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 81
728
- def _reduce_21( val, _values)
729
- @field = @default_field
730
- end
731
- .,.,
732
-
733
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 85
734
- def _reduce_22( val, _values)
735
- val[2]
736
- end
737
- .,.,
738
-
739
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 85
740
- def _reduce_23( val, _values)
741
- @field = "*"
742
- end
743
- .,.,
744
-
745
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 85
746
- def _reduce_24( val, _values)
747
- @field = @default_field
748
- end
749
- .,.,
750
-
751
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 89
752
- def _reduce_25( val, _values)
753
- val[3]
754
- end
755
- .,.,
756
-
757
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 90
758
- def _reduce_26( val, _values)
759
- @field = [val[0]]
760
- end
761
- .,.,
762
-
763
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 91
764
- def _reduce_27( val, _values)
765
- @field = val[0] += [val[2]]
766
- end
767
- .,.,
768
-
769
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 97
770
- def _reduce_28( val, _values)
771
- get_phrase_query(val[1])
772
- end
773
- .,.,
774
-
775
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 101
776
- def _reduce_29( val, _values)
777
- get_phrase_query(val[1], val[4].to_i)
778
- end
779
- .,.,
780
-
781
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 101
782
- def _reduce_30( val, _values)
783
- nil
784
- end
785
- .,.,
786
-
787
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 102
788
- def _reduce_31( val, _values)
789
- nil
790
- end
791
- .,.,
792
-
793
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 104
794
- def _reduce_32( val, _values)
795
- [val[0]]
796
- end
797
- .,.,
798
-
799
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 105
800
- def _reduce_33( val, _values)
801
- val[0] << val[1]
802
- end
803
- .,.,
804
-
805
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 106
806
- def _reduce_34( val, _values)
807
- val[0] << nil
808
- end
809
- .,.,
810
-
811
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 107
812
- def _reduce_35( val, _values)
813
- add_multi_word(val[0], val[2])
814
- end
815
- .,.,
816
-
817
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 109
818
- def _reduce_36( val, _values)
819
- _get_range_query(val[1], val[2], true, true)
820
- end
821
- .,.,
822
-
823
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 110
824
- def _reduce_37( val, _values)
825
- _get_range_query(val[1], val[2], true, false)
826
- end
827
- .,.,
828
-
829
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 111
830
- def _reduce_38( val, _values)
831
- _get_range_query(val[1], val[2], false, true)
832
- end
833
- .,.,
834
-
835
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 112
836
- def _reduce_39( val, _values)
837
- _get_range_query(val[1], val[2], false, false)
838
- end
839
- .,.,
840
-
841
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 113
842
- def _reduce_40( val, _values)
843
- _get_range_query(nil, val[1], false, false)
844
- end
845
- .,.,
846
-
847
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 114
848
- def _reduce_41( val, _values)
849
- _get_range_query(nil, val[1], false, true)
850
- end
851
- .,.,
852
-
853
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 115
854
- def _reduce_42( val, _values)
855
- _get_range_query(val[1], nil, true, false)
856
- end
857
- .,.,
858
-
859
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 116
860
- def _reduce_43( val, _values)
861
- _get_range_query(val[1], nil, false, false)
862
- end
863
- .,.,
864
-
865
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 117
866
- def _reduce_44( val, _values)
867
- _get_range_query(nil, val[1], false, false)
868
- end
869
- .,.,
870
-
871
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 118
872
- def _reduce_45( val, _values)
873
- _get_range_query(nil, val[2], false, true)
874
- end
875
- .,.,
876
-
877
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 119
878
- def _reduce_46( val, _values)
879
- _get_range_query(val[2], nil, true, false)
880
- end
881
- .,.,
882
-
883
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 120
884
- def _reduce_47( val, _values)
885
- _get_range_query(val[1], nil, false, false)
886
- end
887
- .,.,
888
-
889
- def _reduce_none( val, _values)
890
- val[0]
891
- end
892
-
893
- end # class QueryParser
894
-
895
- end # module Ferret
896
-
897
-
898
- if __FILE__ == $0
899
- $:.unshift File.join(File.dirname(__FILE__), '..')
900
- $:.unshift File.join(File.dirname(__FILE__), '../..')
901
- require 'utils'
902
- require 'analysis'
903
- require 'document'
904
- require 'store'
905
- require 'index'
906
- require 'search'
907
-
908
- include Ferret::Search
909
- include Ferret::Index
910
-
911
- st = "\033[7m"
912
- en = "\033[m"
913
-
914
- parser = Ferret::QueryParser.new("default",
915
- :fields => ["f1", "f2", "f3"],
916
- :analyzer => Ferret::Analysis::StandardAnalyzer.new,
917
- :handle_parse_errors => true)
918
-
919
- $stdin.each do |line|
920
- query = parser.parse(line)
921
- if query
922
- puts "#{query.class}"
923
- puts query.to_s(parser.default_field)
924
- else
925
- puts "No query was returned"
926
- end
927
- end
928
- end