ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -1,312 +0,0 @@
1
- require 'racc/parser'
2
- module Ferret
3
- # = QueryParser
4
- #
5
- # The Ferret::QueryParser is used to parse Ferret Query Language (FQL) into
6
- # a Ferret Query. FQL is described Bellow.
7
- #
8
- # == Ferret Query Language
9
- #
10
- # === Preamble
11
- #
12
- # The following characters are special characters in FQL;
13
- #
14
- # :, (, ), [, ], {, }, !, +, ", ~, ^, -, |, <, >, =, *, ?, \
15
- #
16
- # If you want to use one of these characters in one of your terms you need
17
- # to escape it with a \ character. \ escapes itself. The exception to this
18
- # rule is within Phrases which a strings surrounded by double quotes (and
19
- # will be explained further bellow in the section on PhraseQueries). In
20
- # Phrases, only ", | and <> have special meaning and need to be escaped if
21
- # you want the literal value. <> is escaped \<\>.
22
- #
23
- # In the following examples I have only written the query string. This would
24
- # be parse like;
25
- #
26
- # query = query_parser.parse("pet:(dog AND cat)")
27
- # puts query # => "+pet:dog +pet:cat"
28
- #
29
- # === TermQuery
30
- #
31
- # A term query is the most basic query of all and is what most of the other
32
- # queries are built upon. The term consists of a single word. eg;
33
- #
34
- # 'term'
35
- #
36
- # Note that the analyzer will be run on the term and if it splits the term
37
- # in two then it will be turned into a phrase query. For example, with the
38
- # plain Ferret::Analysis::Analyzer, the following;
39
- #
40
- # 'dave12balmain'
41
- #
42
- # is equivalent to;
43
- #
44
- # '"dave balmain"'
45
- #
46
- # Which we will explain now...
47
- #
48
- # === PhraseQuery
49
- #
50
- # A phrase query is a string of terms surrounded by double quotes. For
51
- # example you could write;
52
- #
53
- # '"quick brown fox"'
54
- #
55
- # But if a "fast" fox is just as good as a quick one you could use the |
56
- # character to specify alternate terms.
57
- #
58
- # '"quick|speedy|fast brown fox"'
59
- #
60
- # What if we don't care what colour the fox is. We can use the <> to specify
61
- # a place setter. eg;
62
- #
63
- # '"quick|speedy|fast <> fox"'
64
- #
65
- # This will match any word in between quick and fox. Alternatively we could
66
- # set the "slop" for the phrase which allows a certain variation in the
67
- # match of the phrase. The slop for a phrase is an integer indicating how
68
- # many positions you are allowed to move the terms to get a match. Read more
69
- # about the slop factor in Ferret::Search::PhraseQuery. To set the slop
70
- # factor for a phrase you can type;
71
- #
72
- # '"big house"~2'
73
- #
74
- # This would match "big house", "big red house", "big red brick house" and
75
- # even "house big". That's right, you don't need to have th terms in order
76
- # if you allow some slop in your phrases. (See Ferret::Search::Spans if you
77
- # need a phrase type query with ordered terms.)
78
- #
79
- # These basic queries will be run on the default field which is set when you
80
- # create the query_parser. But what if you want to search a different field.
81
- # You'll be needing a ...
82
- #
83
- # === FieldQuery
84
- #
85
- # A field query is any field prefixed by <fieldname>:. For example, to
86
- # search for all instances of the term "ski" in field "sport", you'd write;
87
- #
88
- # 'sport:ski'
89
- # Or we can apply a field to phrase;
90
- #
91
- # 'sport:"skiing is fun"'
92
- #
93
- # Now we have a few types of queries, we'll be needing to glue them together
94
- # with a ...
95
- #
96
- # === BooleanQuery
97
- #
98
- # There are a couple of ways of writing boolean queries. Firstly you can
99
- # specify which terms are required, optional or required not to exist (not).
100
- #
101
- # * '+' or "REQ" can be used to indicate a required query. "REQ" must be
102
- # surrounded by white space.
103
- # * '-', '!' or "NOT" are used to indicate query that is required to be
104
- # false. "NOT" must be surrounded by white space.
105
- # * all other queries are optional if the above symbols are used.
106
- #
107
- # Some examples;
108
- #
109
- # '+sport:ski -sport:snowboard sport:toboggan'
110
- # '+ingredient:chocolate +ingredient:strawberries -ingredient:wheat'
111
- #
112
- # You may also use the boolean operators "AND", "&&", "OR" and "||". eg;
113
- #
114
- # 'sport:ski AND NOT sport:snowboard OR sport:toboggan'
115
- # 'ingredient:chocolate AND ingredient:strawberries AND NOT ingredient:wheat'
116
- #
117
- # You can set the default operator when you create the query parse.
118
- #
119
- # === RangeQuery
120
- #
121
- # A range query finds all documents with terms between the two query terms.
122
- # This can be very useful in particular for dates. eg;
123
- #
124
- # 'date:[20050725 20050905]' # all dates >= 20050725 and <= 20050905
125
- # 'date:[20050725 20050905}' # all dates >= 20050725 and < 20050905
126
- # 'date:{20050725 20050905]' # all dates > 20050725 and <= 20050905
127
- # 'date:{20050725 20050905}' # all dates > 20050725 and < 20050905
128
- #
129
- # You can also do open ended queries like this;
130
- #
131
- # 'date:[20050725>' # all dates >= 20050725
132
- # 'date:{20050725>' # all dates > 20050725
133
- # 'date:<20050905]' # all dates <= 20050905
134
- # 'date:<20050905}' # all dates < 20050905
135
- #
136
- # Or like this;
137
- #
138
- # 'date: >= 20050725'
139
- # 'date: > 20050725'
140
- # 'date: <= 20050905'
141
- # 'date: < 20050905'
142
- #
143
- # If you prefer the above style you could use a boolean query but like this;
144
- #
145
- # 'date:( >= 20050725 AND <= 20050905)'
146
- #
147
- # But rangequery only solution shown first will be faster.
148
- #
149
- # === WildQuery
150
- #
151
- # A wild query is a query using the pattern matching characters * and ?. *
152
- # matchs 0 or more characters while ? matchs a single character. This type
153
- # of query can be really useful for matching heirarchical categories for
154
- # example. Let's say we had this structure;
155
- #
156
- # /sport/skiing
157
- # /sport/cycling
158
- # /coding1/ruby
159
- # /coding1/c
160
- # /coding2/python
161
- # /coding2/perl
162
- #
163
- # If you wanted all categories with programming languages you could use the
164
- # query;
165
- #
166
- # 'category:/coding?/*'
167
- #
168
- # Note that this query can be quite expensive if not used carefully. In the
169
- # example above there would be no problem but you should be careful not use
170
- # the wild characters at the beginning of the query as it'll have to iterate
171
- # through every term in that field. Having said that, some fields like the
172
- # category field above will only have a small number of distinct fields so
173
- # this could be ok.
174
- #
175
- # === FuzzyQuery
176
- #
177
- # This is like the sloppy phrase query above, except you are now adding slop
178
- # to a term. Basically it measures the Levenshtein distance between two
179
- # terms and if the value is below the slop threshold the term is a match.
180
- # This time though the slop must be a float between 0 and 1.0, 1.0 being a
181
- # perfect match and 0 being far from a match. The default is set to 0.5 so
182
- # you don't need to give a slop value if you don't want to. You can set the
183
- # default in the Ferret::Search::FuzzyQuery class. Here are a couple of
184
- # examples;
185
- #
186
- # 'content:ferret~'
187
- # 'content:Ostralya~0.4'
188
- #
189
- # Note that this query can be quite expensive. If you'd like to use this
190
- # query, you may want to set a mininum prefix length in the FuzzyQuery
191
- # class. This can substantially reduce the number of terms that the query
192
- # will iterate over.
193
- #
194
- # Well, that's it for the query language. Next we have...
195
- #
196
- # == Extending the Query Parser
197
- #
198
- # The query parser has a number of methods which you may want to subclass if
199
- # you are interested in extending the query parser.
200
- #
201
- # get_term_query:: Called for each term in the query. You may want
202
- # to discard all but the first token instead or
203
- # doing a phrase query.
204
- #
205
- # get_fuzzy_query:: These are expensive. You could set the default
206
- # prefix or perhaps disallow these all together by
207
- # raising an exception.
208
- #
209
- # get_range_query:: You'll probably want to leave this as is.
210
- #
211
- # get_phrase_query:: This method is passed an array of terms or
212
- # perhaps an array of arrays of terms in the case
213
- # of a multi-term phrase query as well as the slop
214
- # and it returns a phrase query. Perhaps you'd
215
- # like to use a span query instead of the standard
216
- # phrase query to ensure the order of the terms
217
- # remains intact.
218
- #
219
- # get_normal_phrase_query:: Called for phrases without any multi-terms. This
220
- # method is called by the standard
221
- # get_phrase_query.
222
- #
223
- # get_multi_phrase_query:: Called for phrases with multi-terms. This method
224
- # is called by the standard get_phrase_query.
225
- #
226
- # get_boolean_query:: Called with an array of clauses.
227
- #
228
- class QueryParser < Racc::Parser
229
- include Ferret::Search
230
- include Ferret::Index
231
-
232
- # Create a new QueryParser.
233
- #
234
- # default_field:: all queries without a specified query string are run on
235
- # this field.
236
- #
237
- # options:: the following options exist and should be passed in as a
238
- # hash. eg;
239
- #
240
- # qp = QueryParser.new("*", { :analyzer => WhiteSpaceAnalyzer.new(),
241
- # :wild_lower => true})
242
- #
243
- # === Options
244
- #
245
- # analyzer:: The analyzer is used to break phrases up into
246
- # terms and to turn terms in tokens recognized in
247
- # the index. Analysis::Analyzer is the default
248
- # occur_default:: Set to either BooleanClause::Occur::SHOULD
249
- # (default) or BooleanClause::Occur::MUST to specify
250
- # the default Occur operator.
251
- # wild_lower:: Set to false if you don't want the terms in fuzzy
252
- # and wild queries to be set to lower case. You
253
- # should do this if your analyzer doesn't downcase.
254
- # The default is true.
255
- # default_slop:: Set the default slop for phrase queries. This
256
- # defaults to 0.
257
- # handle_parse_errors:: Set this to true if you want the QueryParser to
258
- # degrade gracefully on errors. If the query parser
259
- # fails to parse this query, it will try to parse it
260
- # as a straight boolean query on the default field
261
- # ignoring all query punctuation. If this fails, it
262
- # will return an empty TermQuery. If you use this
263
- # and you need to know why your query isn't working
264
- # you can use the Query#to_s method on the query
265
- # returned to see what is happening to your query.
266
- # This defualts to false, in which case a
267
- # QueryParseException is thrown.
268
- def initialize(default_field = "", options = {})
269
- end
270
-
271
- # parses a string into a Ferret::Search::Query. The string needs to be
272
- # parseable FQL.
273
- def parse(str)
274
- end
275
-
276
- # Set to false if you don't want the terms in fuzzy and wild queries to be
277
- # set to lower case. You should do this if your analyzer doesn't downcase.
278
- def wild_lower=()
279
- end
280
-
281
- # Returns the value of wild_lower. See #wild_lower=.
282
- def wild_lower?()
283
- end
284
-
285
- # Processes the query string escaping all special characters within
286
- # phrases and making sure that double quotes and brackets are matching.
287
- # This class will be called by the parse method so you should subclass it
288
- # if you'd like to do your own query string cleaning.
289
- def clean_string(str)
290
- end
291
-
292
- # The exception thrown when there is an error parsing the query string.
293
- # This also holds the Racc::ParseError that was thrown in case you want to
294
- # investigate why a query won't parse.
295
- class QueryParseException < Exception
296
- attr_reader :parse_error
297
-
298
- # Create a new QueryParseException
299
- #
300
- # error:: An error string describing the query that failed
301
- # parse_error:: The actual parse error that was thrown by Racc. It is a
302
- # Racc::ParseError object.
303
- def initialize(error, parse_error)
304
- super(error)
305
- @parse_error = parse_error
306
- end
307
- end
308
- end
309
-
310
- end
311
-
312
- require 'ferret/query_parser/query_parser.tab.rb'
@@ -1,928 +0,0 @@
1
- #
2
- # DO NOT MODIFY!!!!
3
- # This file is automatically generated by racc 1.4.4
4
- # from racc grammer file "lib/ferret/query_parser/query_parser.y".
5
- #
6
-
7
- require 'racc/parser'
8
-
9
-
10
- module Ferret
11
-
12
- class QueryParser < Racc::Parser
13
-
14
- module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id07e7308361', 'lib/ferret/query_parser/query_parser.y', 126
15
- attr_accessor :default_field, :fields, :handle_parse_errors
16
-
17
- def initialize(default_field = "*", options = {})
18
- @yydebug = true
19
- if default_field.is_a?(String) and default_field.index("|")
20
- default_field = default_field.split("|")
21
- end
22
- @field = @default_field = default_field
23
- @analyzer = options[:analyzer] || Analysis::StandardAnalyzer.new
24
- @wild_lower = options[:wild_lower].nil? ? true : options[:wild_lower]
25
- @occur_default = options[:occur_default] || BooleanClause::Occur::SHOULD
26
- @default_slop = options[:default_slop] || 0
27
- @fields = options[:fields]||[]
28
- @handle_parse_errors = options[:handle_parse_errors] || false
29
- end
30
-
31
- RESERVED = {
32
- 'AND' => :AND,
33
- '&&' => :AND,
34
- 'OR' => :OR,
35
- '||' => :OR,
36
- 'NOT' => :NOT,
37
- '!' => :NOT,
38
- '-' => :NOT,
39
- 'REQ' => :REQ,
40
- '+' => :REQ
41
- }
42
-
43
- ECHR = %q,:()\[\]{}!+"~^\-\|<>\=\*\?,
44
- EWCHR = %q,:()\[\]{}!+"~^\-\|<>\=,
45
-
46
- def parse(str)
47
- orig_str = str
48
- str = clean_string(str)
49
- str.strip!
50
- @q = []
51
-
52
- until str.empty? do
53
- case str
54
- when /\A\s+/
55
- ;
56
- when /\A([#{EWCHR}]|[*?](?=:))/
57
- @q.push [ RESERVED[$&]||$&, $& ]
58
- when /\A(\&\&|\|\|)/
59
- @q.push [ RESERVED[$&], $& ]
60
- when /\A(\\[#{ECHR}]|[^\s#{ECHR}])*[?*](\\[#{EWCHR}]|[^\s#{EWCHR}])*/
61
- str = $'
62
- unescaped = $&.gsub(/\\(?!\\)/,"")
63
- @q.push [ :WILD_STRING, unescaped ]
64
- next
65
- when /\A(\\[#{ECHR}]|[^\s#{ECHR}])+/
66
- symbol = RESERVED[$&]
67
- if symbol
68
- @q.push [ symbol, $& ]
69
- else
70
- str = $'
71
- unescaped = $&.gsub(/\\(?!\\)/,"")
72
- @q.push [ :WORD, unescaped ]
73
- next
74
- end
75
- else
76
- raise RuntimeError, "shouldn't happen"
77
- end
78
- str = $'
79
- end
80
- if @q.empty?
81
- return TermQuery.new(Term.new(@default_field, ""))
82
- end
83
-
84
- @q.push([ false, '$' ])
85
-
86
- query = nil
87
- begin
88
- query = do_parse
89
- rescue Racc::ParseError => e
90
- if @handle_parse_errors
91
- @field = @default_field
92
- query = _get_bad_query(orig_str)
93
- else
94
- raise QueryParseException.new("Could not parse #{str}", e)
95
- end
96
- end
97
- return query
98
- end
99
-
100
- def next_token
101
- @q.shift
102
- end
103
-
104
- PHRASE_CHARS = [?<, ?>, ?|, ?"] # these chars have meaning within phrases
105
- def clean_string(str)
106
- escape_chars = ECHR.gsub(/\\/,"").unpack("c*")
107
- pb = nil
108
- br_stack = []
109
- quote_open = false
110
- # leave a little extra
111
- new_str = []
112
-
113
- str.each_byte do |b|
114
- # ignore escaped characters
115
- if pb == ?\\
116
- if quote_open and PHRASE_CHARS.index(b)
117
- new_str << ?\\ # this was left off the first time through
118
- end
119
-
120
- new_str << b
121
- pb = (b == ?\\ ? ?: : b) # \\ has escaped itself so does nothing more
122
- next
123
- end
124
- case b
125
- when ?\\
126
- new_str << b if !quote_open # We do our own escaping below
127
- when ?"
128
- quote_open = !quote_open
129
- new_str << b
130
- when ?(
131
- if !quote_open
132
- br_stack << b
133
- else
134
- new_str << ?\\
135
- end
136
- new_str << b
137
- when ?)
138
- if !quote_open
139
- if br_stack.size == 0
140
- new_str.unshift(?()
141
- else
142
- br_stack.pop
143
- end
144
- else
145
- new_str << ?\\
146
- end
147
- new_str << b
148
- when ?>
149
- if quote_open
150
- if pb == ?<
151
- new_str.delete_at(-2)
152
- else
153
- new_str << ?\\
154
- end
155
- end
156
- new_str << b
157
- else
158
- if quote_open
159
- if escape_chars.index(b) and b != ?|
160
- new_str << ?\\
161
- end
162
- end
163
- new_str << b
164
- end
165
- pb = b
166
- end
167
- new_str << ?" if quote_open
168
- br_stack.each { |b| new_str << ?) }
169
- return new_str.pack("c*")
170
- end
171
-
172
- def get_bad_query(field, str)
173
- get_term_query(field, str) || BooleanQuery.new()
174
- end
175
-
176
- def get_range_query(field, start_word, end_word, inc_upper, inc_lower)
177
- RangeQuery.new(field, start_word, end_word, inc_upper, inc_lower)
178
- end
179
-
180
- def get_term_query(field, word)
181
- tokens = []
182
- stream = @analyzer.token_stream(field, word)
183
- while token = stream.next
184
- tokens << token
185
- end
186
- if tokens.length == 0
187
- return nil
188
- elsif tokens.length == 1
189
- return TermQuery.new(Term.new(field, tokens[0].text))
190
- else
191
- pq = PhraseQuery.new()
192
- tokens.each do |token|
193
- pq.add(Term.new(field, token.text), nil, token.pos_inc)
194
- end
195
- return pq
196
- end
197
- end
198
-
199
- def get_fuzzy_query(field, word, min_sim = nil)
200
- tokens = []
201
- stream = @analyzer.token_stream(field, word)
202
- if token = stream.next # only makes sense to look at one term for fuzzy
203
- if min_sim
204
- return FuzzyQuery.new(Term.new(field, token.text), min_sim.to_f)
205
- else
206
- return FuzzyQuery.new(Term.new(field, token.text))
207
- end
208
- else
209
- return TermQuery.new(Term.new(field, ""))
210
- end
211
- end
212
-
213
- def get_wild_query(field, regexp)
214
- if (regexp =~ /^([^?*]*)\*$/)
215
- return PrefixQuery.new(Term.new(field, $1))
216
- else
217
- return WildcardQuery.new(Term.new(field, regexp))
218
- end
219
- end
220
-
221
- def add_multi_word(words, word)
222
- last_word = words[-1]
223
- if not last_word.is_a?(Array)
224
- last_word = words[-1] = [words[-1]]
225
- end
226
- last_word << word
227
- return words
228
- end
229
-
230
- def get_normal_phrase_query(field, positions)
231
- pq = PhraseQuery.new()
232
- pq.slop = @default_slop
233
- pos_inc = 0
234
-
235
- positions.each do |position|
236
- if position.nil?
237
- pos_inc += 1
238
- next
239
- end
240
- stream = @analyzer.token_stream(field, position)
241
- tokens = []
242
- while token = stream.next
243
- tokens << token
244
- end
245
- tokens.each do |token|
246
- pq.add(Term.new(field, token.text), nil,
247
- token.pos_inc + pos_inc)
248
- pos_inc = 0
249
- end
250
- end
251
- return pq
252
- end
253
-
254
- def get_multi_phrase_query(field, positions)
255
- mpq = MultiPhraseQuery.new()
256
- mpq.slop = @default_slop
257
- pos_inc = 0
258
-
259
- positions.each do |position|
260
- if position.nil?
261
- pos_inc += 1
262
- next
263
- end
264
- if position.is_a?(Array)
265
- position.compact! # it doesn't make sense to have an empty spot here
266
- terms = []
267
- position.each do |word|
268
- stream = @analyzer.token_stream(field, word)
269
- if token = stream.next # only put one term per word
270
- terms << Term.new(field, token.text)
271
- end
272
- end
273
- mpq.add(terms, nil, pos_inc + 1) # must go at least one forward
274
- pos_inc = 0
275
- else
276
- stream = @analyzer.token_stream(field, position)
277
- tokens = []
278
- while token = stream.next
279
- tokens << token
280
- end
281
- tokens.each do |token|
282
- mpq.add([Term.new(field, token.text)], nil,
283
- token.pos_inc + pos_inc)
284
- pos_inc = 0
285
- end
286
- end
287
- end
288
- return mpq
289
- end
290
-
291
- def get_phrase_query(positions, slop = nil)
292
- if positions.size == 1
293
- if positions[0].is_a?(Array)
294
- clauses = positions[0].map { |word|
295
- BooleanClause.new(_get_term_query(word), BooleanClause::Occur::SHOULD)
296
- }
297
- return get_boolean_query(clauses)
298
- else
299
- return _get_term_query(positions[0])
300
- end
301
- end
302
-
303
- multi_phrase = false
304
- positions.each do |position|
305
- if position.is_a?(Array)
306
- position.compact!
307
- if position.size > 1
308
- multi_phrase = true
309
- end
310
- end
311
- end
312
-
313
- return do_multiple_fields() do |field|
314
- q = nil
315
- if not multi_phrase
316
- q = get_normal_phrase_query(field, positions.flatten)
317
- else
318
- q = get_multi_phrase_query(field, positions)
319
- end
320
- q.slop = slop if slop
321
- next q
322
- end
323
- end
324
-
325
- def add_and_clause(clauses, clause)
326
- (clauses||=[]).compact!
327
- if (clauses.length == 1)
328
- last_cl = clauses[0]
329
- last_cl.occur = BooleanClause::Occur::MUST if not last_cl.prohibited?
330
- end
331
-
332
- return if clause.nil? # incase a query got destroyed by the analyzer
333
-
334
- clause.occur = BooleanClause::Occur::MUST if not clause.prohibited?
335
- clauses << clause
336
- end
337
-
338
- def add_or_clause(clauses, clause)
339
- clauses << clause
340
- end
341
-
342
- def add_default_clause(clauses, clause)
343
- if @occur_default == BooleanClause::Occur::MUST
344
- add_and_clause(clauses, clause)
345
- else
346
- add_or_clause(clauses, clause)
347
- end
348
- end
349
-
350
- def get_boolean_query(clauses)
351
- # possible that we got all nil clauses so check
352
- bq = BooleanQuery.new()
353
- return bq if clauses.nil?
354
- clauses.compact!
355
- return bq if clauses.size == 0
356
-
357
- if clauses.size == 1 and not clauses[0].prohibited?
358
- return clauses[0].query
359
- end
360
- clauses.each {|clause| bq << clause }
361
- return bq
362
- end
363
-
364
- def get_boolean_clause(query, occur)
365
- return nil if query.nil?
366
- return BooleanClause.new(query, occur)
367
- end
368
-
369
- def do_multiple_fields()
370
- # set @field to all fields if @field is the multi-field operator
371
- @field = @fields if @field.is_a?(String) and @field == "*"
372
- if @field.is_a?(String)
373
- return yield(@field)
374
- elsif @field.size == 1
375
- return yield(@field[0])
376
- else
377
- bq = BooleanQuery.new()
378
- @field.each do |field|
379
- q = yield(field)
380
- bq << BooleanClause.new(q) if q
381
- end
382
- return (bq.clauses.size == 0) ? nil : bq
383
- end
384
- end
385
-
386
- def method_missing(meth, *args)
387
- if meth.to_s =~ /_(get_[a-z_]+_query)/
388
- do_multiple_fields() do |field|
389
- send($1, *([field] + args))
390
- end
391
- else
392
- raise NoMethodError.new("No such method #{meth} in #{self.class}", meth, args)
393
- end
394
- end
395
-
396
- def QueryParser.parse(query, default_field = "*", options = {})
397
- qp = QueryParser.new(default_field, options)
398
- return qp.parse(query)
399
- end
400
-
401
- ..end lib/ferret/query_parser/query_parser.y modeval..id07e7308361
402
-
403
- ##### racc 1.4.4 generates ###
404
-
405
- racc_reduce_table = [
406
- 0, 0, :racc_error,
407
- 1, 26, :_reduce_1,
408
- 1, 27, :_reduce_2,
409
- 3, 27, :_reduce_3,
410
- 3, 27, :_reduce_4,
411
- 2, 27, :_reduce_5,
412
- 2, 28, :_reduce_6,
413
- 2, 28, :_reduce_7,
414
- 1, 28, :_reduce_8,
415
- 1, 29, :_reduce_none,
416
- 3, 29, :_reduce_10,
417
- 1, 30, :_reduce_none,
418
- 3, 30, :_reduce_12,
419
- 1, 30, :_reduce_none,
420
- 1, 30, :_reduce_none,
421
- 1, 30, :_reduce_none,
422
- 1, 30, :_reduce_none,
423
- 1, 31, :_reduce_17,
424
- 3, 31, :_reduce_18,
425
- 2, 31, :_reduce_19,
426
- 1, 35, :_reduce_20,
427
- 0, 37, :_reduce_21,
428
- 4, 32, :_reduce_22,
429
- 0, 38, :_reduce_23,
430
- 0, 39, :_reduce_24,
431
- 5, 32, :_reduce_25,
432
- 1, 36, :_reduce_26,
433
- 3, 36, :_reduce_27,
434
- 3, 33, :_reduce_28,
435
- 5, 33, :_reduce_29,
436
- 2, 33, :_reduce_30,
437
- 4, 33, :_reduce_31,
438
- 1, 40, :_reduce_32,
439
- 2, 40, :_reduce_33,
440
- 3, 40, :_reduce_34,
441
- 3, 40, :_reduce_35,
442
- 4, 34, :_reduce_36,
443
- 4, 34, :_reduce_37,
444
- 4, 34, :_reduce_38,
445
- 4, 34, :_reduce_39,
446
- 3, 34, :_reduce_40,
447
- 3, 34, :_reduce_41,
448
- 3, 34, :_reduce_42,
449
- 3, 34, :_reduce_43,
450
- 2, 34, :_reduce_44,
451
- 3, 34, :_reduce_45,
452
- 3, 34, :_reduce_46,
453
- 2, 34, :_reduce_47 ]
454
-
455
- racc_reduce_n = 48
456
-
457
- racc_shift_n = 78
458
-
459
- racc_action_table = [
460
- 8, 10, 67, 66, 75, 74, 50, 21, 2, 40,
461
- 25, 7, 9, 38, 13, 15, 17, 19, 8, 10,
462
- 3, 53, 46, 39, 26, 21, 2, 37, -26, 7,
463
- 9, 45, 13, 15, 17, 19, 8, 10, 3, 43,
464
- 64, 49, -26, 21, 2, 60, 59, 7, 9, 63,
465
- 13, 15, 17, 19, 58, 57, 3, 8, 10, 31,
466
- 33, 54, 55, 56, 21, 2, 44, 48, 7, 9,
467
- 61, 13, 15, 17, 19, 36, 62, 3, 8, 10,
468
- 31, 33, 34, 42, 65, 21, 2, 41, 30, 7,
469
- 9, 70, 13, 15, 17, 19, 8, 10, 3, 71,
470
- 72, 73, 24, 21, 2, 77, nil, 7, 9, nil,
471
- 13, 15, 17, 19, 21, 2, 3, nil, 7, 9,
472
- nil, 13, 15, 17, 19, 21, 2, 3, nil, 7,
473
- 9, nil, 13, 15, 17, 19, 21, 2, 3, nil,
474
- 7, 9, nil, 13, 15, 17, 19, 21, 2, 3,
475
- nil, 7, 9, nil, 13, 15, 17, 19, nil, nil,
476
- 3 ]
477
-
478
- racc_action_check = [
479
- 0, 0, 46, 46, 64, 64, 30, 0, 0, 17,
480
- 6, 0, 0, 15, 0, 0, 0, 0, 2, 2,
481
- 0, 34, 24, 17, 6, 2, 2, 15, 21, 2,
482
- 2, 24, 2, 2, 2, 2, 33, 33, 2, 21,
483
- 42, 28, 21, 33, 33, 38, 38, 33, 33, 42,
484
- 33, 33, 33, 33, 37, 35, 33, 23, 23, 23,
485
- 23, 35, 35, 35, 23, 23, 23, 26, 23, 23,
486
- 39, 23, 23, 23, 23, 13, 41, 23, 12, 12,
487
- 12, 12, 13, 19, 43, 12, 12, 18, 11, 12,
488
- 12, 53, 12, 12, 12, 12, 31, 31, 12, 54,
489
- 55, 56, 3, 31, 31, 72, nil, 31, 31, nil,
490
- 31, 31, 31, 31, 8, 8, 31, nil, 8, 8,
491
- nil, 8, 8, 8, 8, 25, 25, 8, nil, 25,
492
- 25, nil, 25, 25, 25, 25, 10, 10, 25, nil,
493
- 10, 10, nil, 10, 10, 10, 10, 49, 49, 10,
494
- nil, 49, 49, nil, 49, 49, 49, 49, nil, nil,
495
- 49 ]
496
-
497
- racc_action_pointer = [
498
- -3, nil, 15, 92, nil, nil, 8, nil, 104, nil,
499
- 126, 88, 75, 65, nil, 3, nil, -1, 78, 73,
500
- nil, 26, nil, 54, 12, 115, 57, nil, 39, nil,
501
- 6, 93, nil, 33, 8, 45, nil, 44, 24, 60,
502
- nil, 66, 30, 74, nil, nil, -19, nil, nil, 137,
503
- nil, nil, nil, 81, 89, 87, 82, nil, nil, nil,
504
- nil, nil, nil, nil, -17, nil, nil, nil, nil, nil,
505
- nil, nil, 95, nil, nil, nil, nil, nil ]
506
-
507
- racc_action_default = [
508
- -48, -14, -48, -48, -15, -16, -48, -20, -48, -23,
509
- -48, -48, -1, -48, -2, -48, -8, -48, -9, -48,
510
- -11, -17, -13, -48, -48, -48, -48, -6, -48, -7,
511
- -48, -48, -5, -48, -30, -48, -32, -48, -44, -48,
512
- -47, -48, -48, -19, -12, -43, -48, -21, -27, -48,
513
- 78, -3, -4, -48, -48, -28, -48, -33, -45, -40,
514
- -41, -46, -10, -42, -48, -18, -39, -38, -22, -24,
515
- -31, -35, -48, -34, -37, -36, -25, -29 ]
516
-
517
- racc_goto_table = [
518
- 47, 32, 12, 27, 23, 29, 11, 68, 28, 76,
519
- 35, nil, 32, nil, nil, nil, nil, nil, nil, nil,
520
- 51, nil, 52, nil, 69 ]
521
-
522
- racc_goto_check = [
523
- 5, 3, 2, 4, 2, 4, 1, 12, 13, 14,
524
- 15, nil, 3, nil, nil, nil, nil, nil, nil, nil,
525
- 3, nil, 3, nil, 5 ]
526
-
527
- racc_goto_pointer = [
528
- nil, 6, 2, -11, -5, -25, nil, nil, nil, nil,
529
- nil, nil, -40, -1, -60, -3 ]
530
-
531
- racc_goto_default = [
532
- nil, nil, nil, 14, 16, 18, 20, 22, 1, 4,
533
- 5, 6, nil, nil, nil, nil ]
534
-
535
- racc_token_table = {
536
- false => 0,
537
- Object.new => 1,
538
- ":" => 2,
539
- :REQ => 3,
540
- :NOT => 4,
541
- :AND => 5,
542
- :OR => 6,
543
- :HIGH => 7,
544
- :LOW => 8,
545
- "^" => 9,
546
- :WORD => 10,
547
- "(" => 11,
548
- ")" => 12,
549
- "~" => 13,
550
- :WILD_STRING => 14,
551
- "*" => 15,
552
- "|" => 16,
553
- "\"" => 17,
554
- "<" => 18,
555
- ">" => 19,
556
- "[" => 20,
557
- "]" => 21,
558
- "}" => 22,
559
- "{" => 23,
560
- "=" => 24 }
561
-
562
- racc_use_result_var = false
563
-
564
- racc_nt_base = 25
565
-
566
- Racc_arg = [
567
- racc_action_table,
568
- racc_action_check,
569
- racc_action_default,
570
- racc_action_pointer,
571
- racc_goto_table,
572
- racc_goto_check,
573
- racc_goto_default,
574
- racc_goto_pointer,
575
- racc_nt_base,
576
- racc_reduce_table,
577
- racc_token_table,
578
- racc_shift_n,
579
- racc_reduce_n,
580
- racc_use_result_var ]
581
-
582
- Racc_token_to_s_table = [
583
- '$end',
584
- 'error',
585
- '":"',
586
- 'REQ',
587
- 'NOT',
588
- 'AND',
589
- 'OR',
590
- 'HIGH',
591
- 'LOW',
592
- '"^"',
593
- 'WORD',
594
- '"("',
595
- '")"',
596
- '"~"',
597
- 'WILD_STRING',
598
- '"*"',
599
- '"|"',
600
- '"\""',
601
- '"<"',
602
- '">"',
603
- '"["',
604
- '"]"',
605
- '"}"',
606
- '"{"',
607
- '"="',
608
- '$start',
609
- 'top_query',
610
- 'bool_query',
611
- 'bool_clause',
612
- 'boosted_query',
613
- 'query',
614
- 'term_query',
615
- 'field_query',
616
- 'phrase_query',
617
- 'range_query',
618
- 'wild_query',
619
- 'field',
620
- '@1',
621
- '@2',
622
- '@3',
623
- 'phrase_words']
624
-
625
- Racc_debug_parser = false
626
-
627
- ##### racc system variables end #####
628
-
629
- # reduce 0 omitted
630
-
631
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 19
632
- def _reduce_1( val, _values)
633
- get_boolean_query(val[0])
634
- end
635
- .,.,
636
-
637
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 24
638
- def _reduce_2( val, _values)
639
- [val[0]]
640
- end
641
- .,.,
642
-
643
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 28
644
- def _reduce_3( val, _values)
645
- add_and_clause(val[0], val[2])
646
- end
647
- .,.,
648
-
649
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 32
650
- def _reduce_4( val, _values)
651
- add_or_clause(val[0], val[2])
652
- end
653
- .,.,
654
-
655
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 36
656
- def _reduce_5( val, _values)
657
- add_default_clause(val[0], val[1])
658
- end
659
- .,.,
660
-
661
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 41
662
- def _reduce_6( val, _values)
663
- get_boolean_clause(val[1], BooleanClause::Occur::MUST)
664
- end
665
- .,.,
666
-
667
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 45
668
- def _reduce_7( val, _values)
669
- get_boolean_clause(val[1], BooleanClause::Occur::MUST_NOT)
670
- end
671
- .,.,
672
-
673
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 49
674
- def _reduce_8( val, _values)
675
- get_boolean_clause(val[0], BooleanClause::Occur::SHOULD)
676
- end
677
- .,.,
678
-
679
- # reduce 9 omitted
680
-
681
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 51
682
- def _reduce_10( val, _values)
683
- val[0].boost = val[2].to_f; return val[0]
684
- end
685
- .,.,
686
-
687
- # reduce 11 omitted
688
-
689
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 58
690
- def _reduce_12( val, _values)
691
- get_boolean_query(val[1])
692
- end
693
- .,.,
694
-
695
- # reduce 13 omitted
696
-
697
- # reduce 14 omitted
698
-
699
- # reduce 15 omitted
700
-
701
- # reduce 16 omitted
702
-
703
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 67
704
- def _reduce_17( val, _values)
705
- _get_term_query(val[0])
706
- end
707
- .,.,
708
-
709
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 71
710
- def _reduce_18( val, _values)
711
- _get_fuzzy_query(val[0], val[2])
712
- end
713
- .,.,
714
-
715
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 75
716
- def _reduce_19( val, _values)
717
- _get_fuzzy_query(val[0])
718
- end
719
- .,.,
720
-
721
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 80
722
- def _reduce_20( val, _values)
723
- _get_wild_query(val[0])
724
- end
725
- .,.,
726
-
727
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 81
728
- def _reduce_21( val, _values)
729
- @field = @default_field
730
- end
731
- .,.,
732
-
733
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 85
734
- def _reduce_22( val, _values)
735
- val[2]
736
- end
737
- .,.,
738
-
739
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 85
740
- def _reduce_23( val, _values)
741
- @field = "*"
742
- end
743
- .,.,
744
-
745
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 85
746
- def _reduce_24( val, _values)
747
- @field = @default_field
748
- end
749
- .,.,
750
-
751
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 89
752
- def _reduce_25( val, _values)
753
- val[3]
754
- end
755
- .,.,
756
-
757
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 90
758
- def _reduce_26( val, _values)
759
- @field = [val[0]]
760
- end
761
- .,.,
762
-
763
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 91
764
- def _reduce_27( val, _values)
765
- @field = val[0] += [val[2]]
766
- end
767
- .,.,
768
-
769
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 97
770
- def _reduce_28( val, _values)
771
- get_phrase_query(val[1])
772
- end
773
- .,.,
774
-
775
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 101
776
- def _reduce_29( val, _values)
777
- get_phrase_query(val[1], val[4].to_i)
778
- end
779
- .,.,
780
-
781
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 101
782
- def _reduce_30( val, _values)
783
- nil
784
- end
785
- .,.,
786
-
787
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 102
788
- def _reduce_31( val, _values)
789
- nil
790
- end
791
- .,.,
792
-
793
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 104
794
- def _reduce_32( val, _values)
795
- [val[0]]
796
- end
797
- .,.,
798
-
799
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 105
800
- def _reduce_33( val, _values)
801
- val[0] << val[1]
802
- end
803
- .,.,
804
-
805
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 106
806
- def _reduce_34( val, _values)
807
- val[0] << nil
808
- end
809
- .,.,
810
-
811
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 107
812
- def _reduce_35( val, _values)
813
- add_multi_word(val[0], val[2])
814
- end
815
- .,.,
816
-
817
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 109
818
- def _reduce_36( val, _values)
819
- _get_range_query(val[1], val[2], true, true)
820
- end
821
- .,.,
822
-
823
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 110
824
- def _reduce_37( val, _values)
825
- _get_range_query(val[1], val[2], true, false)
826
- end
827
- .,.,
828
-
829
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 111
830
- def _reduce_38( val, _values)
831
- _get_range_query(val[1], val[2], false, true)
832
- end
833
- .,.,
834
-
835
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 112
836
- def _reduce_39( val, _values)
837
- _get_range_query(val[1], val[2], false, false)
838
- end
839
- .,.,
840
-
841
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 113
842
- def _reduce_40( val, _values)
843
- _get_range_query(nil, val[1], false, false)
844
- end
845
- .,.,
846
-
847
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 114
848
- def _reduce_41( val, _values)
849
- _get_range_query(nil, val[1], false, true)
850
- end
851
- .,.,
852
-
853
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 115
854
- def _reduce_42( val, _values)
855
- _get_range_query(val[1], nil, true, false)
856
- end
857
- .,.,
858
-
859
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 116
860
- def _reduce_43( val, _values)
861
- _get_range_query(val[1], nil, false, false)
862
- end
863
- .,.,
864
-
865
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 117
866
- def _reduce_44( val, _values)
867
- _get_range_query(nil, val[1], false, false)
868
- end
869
- .,.,
870
-
871
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 118
872
- def _reduce_45( val, _values)
873
- _get_range_query(nil, val[2], false, true)
874
- end
875
- .,.,
876
-
877
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 119
878
- def _reduce_46( val, _values)
879
- _get_range_query(val[2], nil, true, false)
880
- end
881
- .,.,
882
-
883
- module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 120
884
- def _reduce_47( val, _values)
885
- _get_range_query(val[1], nil, false, false)
886
- end
887
- .,.,
888
-
889
- def _reduce_none( val, _values)
890
- val[0]
891
- end
892
-
893
- end # class QueryParser
894
-
895
- end # module Ferret
896
-
897
-
898
- if __FILE__ == $0
899
- $:.unshift File.join(File.dirname(__FILE__), '..')
900
- $:.unshift File.join(File.dirname(__FILE__), '../..')
901
- require 'utils'
902
- require 'analysis'
903
- require 'document'
904
- require 'store'
905
- require 'index'
906
- require 'search'
907
-
908
- include Ferret::Search
909
- include Ferret::Index
910
-
911
- st = "\033[7m"
912
- en = "\033[m"
913
-
914
- parser = Ferret::QueryParser.new("default",
915
- :fields => ["f1", "f2", "f3"],
916
- :analyzer => Ferret::Analysis::StandardAnalyzer.new,
917
- :handle_parse_errors => true)
918
-
919
- $stdin.each do |line|
920
- query = parser.parse(line)
921
- if query
922
- puts "#{query.class}"
923
- puts query.to_s(parser.default_field)
924
- else
925
- puts "No query was returned"
926
- end
927
- end
928
- end