ferret 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +1 -1
- data/TODO +4 -1
- data/TUTORIAL +9 -1
- data/ext/ferret.c +28 -3
- data/ext/ferret.h +24 -24
- data/ext/index_io.c +13 -28
- data/ext/ram_directory.c +11 -11
- data/ext/segment_merge_queue.c +2 -2
- data/ext/string_helper.c +1 -1
- data/ext/term.c +19 -13
- data/ext/term_buffer.c +3 -3
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +8 -0
- data/lib/ferret/analysis/tokenizers.rb +6 -6
- data/lib/ferret/index/index.rb +120 -2
- data/lib/ferret/index/index_writer.rb +7 -4
- data/lib/ferret/index/multi_reader.rb +1 -1
- data/lib/ferret/index/segment_reader.rb +1 -1
- data/lib/ferret/query_parser.rb +25 -17
- data/lib/ferret/query_parser/query_parser.tab.rb +292 -208
- data/lib/ferret/search/range_query.rb +2 -2
- data/test/test_helper.rb +7 -0
- data/test/unit/index/tc_index.rb +10 -5
- data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +4 -1
- data/test/unit/query_parser/tc_query_parser.rb +43 -15
- data/test/unit/store/tm_store.rb +2 -2
- data/test/unit/ts_analysis.rb +1 -15
- data/test/unit/ts_document.rb +2 -4
- data/test/unit/ts_index.rb +2 -18
- data/test/unit/ts_query_parser.rb +2 -3
- data/test/unit/ts_search.rb +2 -10
- data/test/unit/ts_store.rb +1 -5
- data/test/unit/ts_utils.rb +1 -9
- metadata +2 -2
@@ -75,5 +75,13 @@ module Ferret::Analysis
|
|
75
75
|
end
|
76
76
|
@position_increment = pos_inc
|
77
77
|
end
|
78
|
+
|
79
|
+
# Returns a string representation of the token with all the attributes.
|
80
|
+
def to_s
|
81
|
+
buf = "#{term_text}:#{start_offset}->#{end_offset}"
|
82
|
+
buf << "(pos_inc=#{@position_increment})" if (@position_increment != 1)
|
83
|
+
buf << "(type=#{@type})" if (@type != "word")
|
84
|
+
buf
|
85
|
+
end
|
78
86
|
end
|
79
87
|
end
|
@@ -20,16 +20,16 @@ module Ferret::Analysis
|
|
20
20
|
# An abstract base class for simple regular expression oriented
|
21
21
|
# tokenizers. Very powerful tokenizers can be created using this class as
|
22
22
|
# can be seen from the StandardTokenizer class. Bellow is an example of a
|
23
|
-
# simple implementation of a LetterTokenizer using an
|
23
|
+
# simple implementation of a LetterTokenizer using an RegExpTokenizer.
|
24
24
|
# Basically, a token is a sequence of alphabetic characters separated by
|
25
25
|
# one or more non-alphabetic characters.
|
26
26
|
#
|
27
|
-
# class LetterTokenizer <
|
27
|
+
# class LetterTokenizer < RegExpTokenizer
|
28
28
|
# def token_re()
|
29
29
|
# /[a-zA-Z]+/
|
30
30
|
# end
|
31
31
|
# end
|
32
|
-
class
|
32
|
+
class RegExpTokenizer < Tokenizer
|
33
33
|
|
34
34
|
# Initialize with an IO implementing input such as a file.
|
35
35
|
#
|
@@ -76,10 +76,10 @@ module Ferret::Analysis
|
|
76
76
|
# A LetterTokenizer is a tokenizer that divides text at non-letters.
|
77
77
|
# That's to say, it defines tokens as maximal strings of adjacent letters,
|
78
78
|
# as defined by the regular expression _/[a-zA-Z]+/_.
|
79
|
-
class LetterTokenizer <
|
79
|
+
class LetterTokenizer < RegExpTokenizer
|
80
80
|
protected
|
81
81
|
# Collects only characters which satisfy the regular expression
|
82
|
-
|
82
|
+
# _/[a-zA-Z]+/_.
|
83
83
|
def token_re()
|
84
84
|
/[a-zA-Z]+/
|
85
85
|
end
|
@@ -97,7 +97,7 @@ module Ferret::Analysis
|
|
97
97
|
|
98
98
|
# A WhiteSpaceTokenizer is a tokenizer that divides text at whiteSpace.
|
99
99
|
# Adjacent sequences of non-WhiteSpace characters form tokens.
|
100
|
-
class WhiteSpaceTokenizer <
|
100
|
+
class WhiteSpaceTokenizer < RegExpTokenizer
|
101
101
|
protected
|
102
102
|
# Collects only characters which are not spaces tabs or carraige returns
|
103
103
|
def token_re()
|
data/lib/ferret/index/index.rb
CHANGED
@@ -1,9 +1,81 @@
|
|
1
1
|
module Ferret::Index
|
2
|
+
# This is a simplified interface to the index. See the TUTORIAL for more
|
3
|
+
# information on how to use this class.
|
2
4
|
class Index
|
3
5
|
include Ferret::Store
|
4
6
|
include Ferret::Search
|
5
7
|
include Ferret::Document
|
6
8
|
|
9
|
+
# If you create an Index without any options, it'll simply create an index
|
10
|
+
# in memory. But this class is highly configurable and every option that
|
11
|
+
# you can supply to IndexWriter and QueryParser, you can also set here.
|
12
|
+
#
|
13
|
+
# === Options
|
14
|
+
#
|
15
|
+
# path:: A string representing the path to the index
|
16
|
+
# directory. If you are creating the index for the
|
17
|
+
# first time the directory will be created if it's
|
18
|
+
# missing. You should not choose a directory which
|
19
|
+
# contains other files.
|
20
|
+
# create_if_missing:: Create the index if no index is found in the
|
21
|
+
# specified directory. Otherwise, use the existing
|
22
|
+
# index. This defaults to true and has no effect on
|
23
|
+
# in memory indexes.
|
24
|
+
# create:: Creates the index, even if one already exists.
|
25
|
+
# That means any existing index will be deleted.
|
26
|
+
# This option defaults to false and has no effect
|
27
|
+
# for in memory indexes. It is probably better to
|
28
|
+
# use the create_if_missing option.
|
29
|
+
# default_field:: This specifies the default field that will be
|
30
|
+
# used when you add a simple string to the index
|
31
|
+
# using #add_document. This will also be used for
|
32
|
+
# default_search_field unless you set it
|
33
|
+
# explicitly.
|
34
|
+
# default_search_field:: This specifies the field or fields that will be
|
35
|
+
# searched by the query parser. You can use a
|
36
|
+
# string to specify one field, eg, "title". Or you
|
37
|
+
# can specify multiple fields with a String -
|
38
|
+
# "title|content" - or with an Array - ["title",
|
39
|
+
# "content"]. This defaults to the value passed in
|
40
|
+
# for default_field. If default_field is nil then
|
41
|
+
# the default is "*" which signifies all fields in
|
42
|
+
# the index.
|
43
|
+
# analyzer:: Sets the default analyzer for the index. This is
|
44
|
+
# used by both the IndexWriter and the QueryParser
|
45
|
+
# to tokenize the input. The default is the
|
46
|
+
# StandardAnalyzer.
|
47
|
+
# dir:: This is an Ferret::Store::Directory object. This
|
48
|
+
# can be useful if you have an already existing
|
49
|
+
# in-memory index which you'd like to read with
|
50
|
+
# this class. If you want to create a new index,
|
51
|
+
# you are better off passing in a path.
|
52
|
+
# close_dir:: This specifies whether you would this class to
|
53
|
+
# close the index directory when this class is
|
54
|
+
# closed. This only has any meaning when you pass
|
55
|
+
# in a directory object in the *dir* option, in
|
56
|
+
# which case it defaults to false. Otherwise it is
|
57
|
+
# always true.
|
58
|
+
# occur_default:: Set to either BooleanClause::Occur::SHOULD
|
59
|
+
# (default) or BooleanClause::Occur::MUST to
|
60
|
+
# specify the default Occur operator.
|
61
|
+
# wild_lower:: Set to false if you don't want the terms in fuzzy
|
62
|
+
# and wild queries to be set to lower case. You
|
63
|
+
# should do this if your analyzer doesn't downcase.
|
64
|
+
# The default is true.
|
65
|
+
# default_slop:: Set the default slop for phrase queries. This
|
66
|
+
# defaults to 0.
|
67
|
+
#
|
68
|
+
# Some examples;
|
69
|
+
#
|
70
|
+
# index = Index::Index.new(:analyzer => WhiteSpaceAnalyzer.new())
|
71
|
+
#
|
72
|
+
# index = Index::Index.new(:path => '/path/to/index',
|
73
|
+
# :create_if_missing => false)
|
74
|
+
#
|
75
|
+
# index = Index::Index.new(:dir => directory,
|
76
|
+
# :close_dir => false
|
77
|
+
# :default_slop => 2)
|
78
|
+
#
|
7
79
|
def initialize(options = {})
|
8
80
|
if options[:path]
|
9
81
|
options[:create_if_missing] = true if options[:create_if_missing].nil?
|
@@ -23,10 +95,13 @@ module Ferret::Index
|
|
23
95
|
@reader = nil
|
24
96
|
@options.delete(:create) # only want to create the first time if at all
|
25
97
|
@close_dir = @options.delete(:close_dir) || false # we'll hold this here
|
98
|
+
@default_search_field = (@options[:default_search_field] || \
|
99
|
+
@options[:default_field] || "*")
|
26
100
|
@default_field = @options[:default_field] || ""
|
27
101
|
@open = true
|
28
102
|
end
|
29
103
|
|
104
|
+
# Closes this index by closing its associated reader and writer objects.
|
30
105
|
def close
|
31
106
|
if not @open
|
32
107
|
raise "tried to close an already closed directory"
|
@@ -63,6 +138,47 @@ module Ferret::Index
|
|
63
138
|
# the local analyzer if provided. If the document contains more than
|
64
139
|
# IndexWriter::MAX_FIELD_LENGTH terms for a given field, the remainder are
|
65
140
|
# discarded.
|
141
|
+
#
|
142
|
+
# There are three ways to add a document to the index.
|
143
|
+
# To add a document you can simply add a string or an array of strings.
|
144
|
+
# This will store all the strings in the "" (ie empty string) field
|
145
|
+
# (unless you specify the default_field when you create the index).
|
146
|
+
#
|
147
|
+
# index << "This is a new document to be indexed"
|
148
|
+
# index << ["And here", "is another", "new document", "to be indexed"]
|
149
|
+
#
|
150
|
+
# But these are pretty simple documents. If this is all you want to index you
|
151
|
+
# could probably just use SimpleSearch. So let's give our documents some fields;
|
152
|
+
#
|
153
|
+
# index << {:title => "Programming Ruby", :content => "blah blah blah"}
|
154
|
+
# index << {:title => "Programming Ruby", :content => "yada yada yada"}
|
155
|
+
#
|
156
|
+
# Or if you are indexing data stored in a database, you'll probably want to
|
157
|
+
# store the id;
|
158
|
+
#
|
159
|
+
# index << {:id => row.id, :title => row.title, :date => row.date}
|
160
|
+
#
|
161
|
+
# The methods above while store all of the input data as well tokenizing and
|
162
|
+
# indexing it. Sometimes we won't want to tokenize (divide the string into
|
163
|
+
# tokens) the data. For example, we might want to leave the title as a complete
|
164
|
+
# string and only allow searchs for that complete string. Sometimes we won't
|
165
|
+
# want to store the data as it's already stored in the database so it'll be a
|
166
|
+
# waste to store it in the index. Or perhaps we are doing without a database and
|
167
|
+
# using Ferret to store all of our data, in which case we might not want to
|
168
|
+
# index it. For example, if we are storing images in the index, we won't want to
|
169
|
+
# index them. All of this can be done using Ferret's Ferret::Document module.
|
170
|
+
# eg;
|
171
|
+
#
|
172
|
+
# include Ferret::Document
|
173
|
+
# doc = Document.new
|
174
|
+
# doc << Field.new("id", row.id, Field::Store::NO, Field::Index::UNTOKENIZED)
|
175
|
+
# doc << Field.new("title", row.title, Field::Store::YES, Field::Index::UNTOKENIZED)
|
176
|
+
# doc << Field.new("data", row.data, Field::Store::YES, Field::Index::TOKENIZED)
|
177
|
+
# doc << Field.new("image", row.image, Field::Store::YES, Field::Index::NO)
|
178
|
+
# index << doc
|
179
|
+
#
|
180
|
+
# You can also compress the data that you are storing or store term vectors with
|
181
|
+
# the data. Read more about this in Ferret::Document::Field.
|
66
182
|
def add_document(doc, analyzer = nil)
|
67
183
|
ensure_writer_open()
|
68
184
|
fdoc = nil
|
@@ -104,14 +220,16 @@ module Ferret::Index
|
|
104
220
|
# num_docs:: The number of results returned. Default is 10
|
105
221
|
# sort:: an array of SortFields describing how to sort the results.
|
106
222
|
def search(query, options = {})
|
223
|
+
ensure_searcher_open()
|
107
224
|
if query.is_a?(String)
|
108
225
|
if @qp.nil?
|
109
|
-
@qp = Ferret::QueryParser.new(@
|
226
|
+
@qp = Ferret::QueryParser.new(@default_search_field, @options)
|
110
227
|
end
|
228
|
+
# we need to set this ever time, in case a new field has been added
|
229
|
+
@qp.fields = @reader.get_field_names.to_a
|
111
230
|
query = @qp.parse(query)
|
112
231
|
end
|
113
232
|
|
114
|
-
ensure_searcher_open()
|
115
233
|
return @searcher.search(query, options)
|
116
234
|
end
|
117
235
|
|
@@ -53,12 +53,15 @@ module Index
|
|
53
53
|
# +dir+, replacing the index already there, if any.
|
54
54
|
# NOTE:: all options are passed in a hash.
|
55
55
|
#
|
56
|
-
# dir::
|
57
|
-
# analyzer::
|
58
|
-
# create::
|
59
|
-
#
|
56
|
+
# dir:: the index directory
|
57
|
+
# analyzer:: the analyzer to use. Defaults to StandardAnalyzer.
|
58
|
+
# create:: +true+ to create the index or overwrite the existing
|
59
|
+
# one +false+ to append to the existing index
|
60
60
|
# create_if_missing:: +true+ to create the index if it's missing
|
61
61
|
# +false+ to throw an IOError if it's missing
|
62
|
+
# close_dir:: This specifies whether you would this class to close
|
63
|
+
# the index directory when this class is closed. The
|
64
|
+
# default is false.
|
62
65
|
def initialize(dir, options = {})
|
63
66
|
super()
|
64
67
|
create = options[:create]||false
|
@@ -178,7 +178,7 @@ module Ferret
|
|
178
178
|
end
|
179
179
|
|
180
180
|
# See IndexReader#get_field_names
|
181
|
-
def get_field_names(field_option)
|
181
|
+
def get_field_names(field_option = IndexReader::FieldOption::ALL)
|
182
182
|
# maintain a unique set of field names
|
183
183
|
field_set = Set.new
|
184
184
|
@sub_readers.each do |reader|
|
@@ -188,7 +188,7 @@ module Ferret::Index
|
|
188
188
|
end
|
189
189
|
|
190
190
|
# See IndexReader#get_field_names
|
191
|
-
def get_field_names(field_option)
|
191
|
+
def get_field_names(field_option = IndexReader::FieldOption::ALL)
|
192
192
|
field_set = Set.new
|
193
193
|
@field_infos.each do |fi|
|
194
194
|
if (field_option == IndexReader::FieldOption::ALL)
|
data/lib/ferret/query_parser.rb
CHANGED
@@ -106,12 +106,12 @@ module Ferret
|
|
106
106
|
#
|
107
107
|
# Some examples;
|
108
108
|
#
|
109
|
-
# '+sport:ski -sport:snowboard sport:
|
109
|
+
# '+sport:ski -sport:snowboard sport:toboggan'
|
110
110
|
# '+ingredient:chocolate +ingredient:strawberries -ingredient:wheat'
|
111
111
|
#
|
112
112
|
# You may also use the boolean operators "AND", "&&", "OR" and "||". eg;
|
113
113
|
#
|
114
|
-
# 'sport:ski AND NOT sport:snowboard OR sport:
|
114
|
+
# 'sport:ski AND NOT sport:snowboard OR sport:toboggan'
|
115
115
|
# 'ingredient:chocolate AND ingredient:strawberries AND NOT ingredient:wheat'
|
116
116
|
#
|
117
117
|
# You can set the default operator when you create the query parse.
|
@@ -128,10 +128,10 @@ module Ferret
|
|
128
128
|
#
|
129
129
|
# You can also do open ended queries like this;
|
130
130
|
#
|
131
|
-
# 'date:[20050725
|
132
|
-
# 'date:{20050725
|
133
|
-
# 'date
|
134
|
-
# 'date
|
131
|
+
# 'date:[20050725>' # all dates >= 20050725
|
132
|
+
# 'date:{20050725>' # all dates > 20050725
|
133
|
+
# 'date:<20050905]' # all dates <= 20050905
|
134
|
+
# 'date:<20050905}' # all dates < 20050905
|
135
135
|
#
|
136
136
|
# Or like this;
|
137
137
|
#
|
@@ -233,18 +233,26 @@ module Ferret
|
|
233
233
|
#
|
234
234
|
# default_field:: all queries without a specified query string are run on
|
235
235
|
# this field.
|
236
|
-
# options:: the following options exist;
|
237
236
|
#
|
238
|
-
#
|
239
|
-
#
|
240
|
-
#
|
241
|
-
#
|
242
|
-
#
|
243
|
-
#
|
244
|
-
#
|
245
|
-
#
|
246
|
-
#
|
247
|
-
#
|
237
|
+
# options:: the following options exist and should be passed in as a
|
238
|
+
# hash. eg;
|
239
|
+
#
|
240
|
+
# qp = QueryParser.new("*", { :analyzer => WhiteSpaceAnalyzer.new(),
|
241
|
+
# :wild_lower => true})
|
242
|
+
#
|
243
|
+
# === Options
|
244
|
+
#
|
245
|
+
# analyzer:: The analyzer is used to break phrases up into terms and
|
246
|
+
# to turn terms in tokens recognized in the index.
|
247
|
+
# Analysis::Analyzer is the default
|
248
|
+
# occur_default:: Set to either BooleanClause::Occur::SHOULD (default)
|
249
|
+
# or BooleanClause::Occur::MUST to specify the default
|
250
|
+
# Occur operator.
|
251
|
+
# wild_lower:: Set to false if you don't want the terms in fuzzy and
|
252
|
+
# wild queries to be set to lower case. You should do this
|
253
|
+
# if your analyzer doesn't downcase. The default is true.
|
254
|
+
# default_slop:: Set the default slop for phrase queries. This defaults
|
255
|
+
# to 0.
|
248
256
|
def initialize(default_field = "", options = {})
|
249
257
|
end
|
250
258
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# DO NOT MODIFY!!!!
|
3
3
|
# This file is automatically generated by racc 1.4.4
|
4
|
-
# from racc grammer file "
|
4
|
+
# from racc grammer file "query_parser.y".
|
5
5
|
#
|
6
6
|
|
7
7
|
require 'racc/parser'
|
@@ -11,8 +11,8 @@ module Ferret
|
|
11
11
|
|
12
12
|
class QueryParser < Racc::Parser
|
13
13
|
|
14
|
-
module_eval <<'..end
|
15
|
-
attr_accessor :default_field
|
14
|
+
module_eval <<'..end query_parser.y modeval..id2aa118b008', 'query_parser.y', 126
|
15
|
+
attr_accessor :default_field, :fields
|
16
16
|
|
17
17
|
# true if you want to downcase wild card queries. This is set to try by
|
18
18
|
# default.
|
@@ -21,13 +21,17 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id4dd1430e5
|
|
21
21
|
def wild_lower?() @wild_lower end
|
22
22
|
|
23
23
|
|
24
|
-
def initialize(default_field = "", options = {})
|
24
|
+
def initialize(default_field = "*", options = {})
|
25
25
|
@yydebug = true
|
26
|
+
if default_field.is_a?(String) and default_field.index("|")
|
27
|
+
default_field = default_field.split("|")
|
28
|
+
end
|
26
29
|
@field = @default_field = default_field
|
27
30
|
@analyzer = options[:analyzer] || Analysis::Analyzer.new
|
28
31
|
@wild_lower = options[:wild_lower].nil? ? true : options[:wild_lower]
|
29
32
|
@occur_default = options[:occur_default] || BooleanClause::Occur::SHOULD
|
30
33
|
@default_slop = options[:default_slop] || 0
|
34
|
+
@fields = options[:fields]||[]
|
31
35
|
end
|
32
36
|
|
33
37
|
RESERVED = {
|
@@ -156,43 +160,47 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id4dd1430e5
|
|
156
160
|
return new_str.pack("c*")
|
157
161
|
end
|
158
162
|
|
159
|
-
def get_range_query(start_word, end_word, inc_upper, inc_lower)
|
160
|
-
|
163
|
+
def get_range_query(field, start_word, end_word, inc_upper, inc_lower)
|
164
|
+
RangeQuery.new(field, start_word, end_word, inc_upper, inc_lower)
|
161
165
|
end
|
162
166
|
|
163
|
-
def get_term_query(word)
|
167
|
+
def get_term_query(field, word)
|
164
168
|
tokens = []
|
165
|
-
stream = @analyzer.token_stream(
|
169
|
+
stream = @analyzer.token_stream(field, word)
|
166
170
|
while token = stream.next
|
167
171
|
tokens << token
|
168
172
|
end
|
169
173
|
if tokens.length == 0
|
170
174
|
return nil
|
171
175
|
elsif tokens.length == 1
|
172
|
-
return TermQuery.new(Term.new(
|
176
|
+
return TermQuery.new(Term.new(field, tokens[0].term_text))
|
173
177
|
else
|
174
178
|
pq = PhraseQuery.new()
|
175
179
|
tokens.each do |token|
|
176
|
-
pq.add(Term.new(
|
180
|
+
pq.add(Term.new(field, token.term_text), nil, token.position_increment)
|
177
181
|
end
|
178
182
|
return pq
|
179
183
|
end
|
180
184
|
end
|
181
185
|
|
182
|
-
def get_fuzzy_query(word, min_sim = nil)
|
186
|
+
def get_fuzzy_query(field, word, min_sim = nil)
|
183
187
|
tokens = []
|
184
|
-
stream = @analyzer.token_stream(
|
188
|
+
stream = @analyzer.token_stream(field, word)
|
185
189
|
if token = stream.next # only makes sense to look at one term for fuzzy
|
186
190
|
if min_sim
|
187
|
-
return FuzzyQuery.new(Term.new(
|
191
|
+
return FuzzyQuery.new(Term.new(field, token.term_text), min_sim.to_f)
|
188
192
|
else
|
189
|
-
return FuzzyQuery.new(Term.new(
|
193
|
+
return FuzzyQuery.new(Term.new(field, token.term_text))
|
190
194
|
end
|
191
195
|
else
|
192
196
|
return nil
|
193
197
|
end
|
194
198
|
end
|
195
199
|
|
200
|
+
def get_wild_query(field, regexp)
|
201
|
+
WildcardQuery.new(Term.new(field, regexp))
|
202
|
+
end
|
203
|
+
|
196
204
|
def add_multi_word(words, word)
|
197
205
|
last_word = words[-1]
|
198
206
|
if not last_word.is_a?(Array)
|
@@ -202,7 +210,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id4dd1430e5
|
|
202
210
|
return words
|
203
211
|
end
|
204
212
|
|
205
|
-
def get_normal_phrase_query(positions)
|
213
|
+
def get_normal_phrase_query(field, positions)
|
206
214
|
pq = PhraseQuery.new()
|
207
215
|
pq.slop = @default_slop
|
208
216
|
pos_inc = 0
|
@@ -212,13 +220,13 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id4dd1430e5
|
|
212
220
|
pos_inc += 1
|
213
221
|
next
|
214
222
|
end
|
215
|
-
stream = @analyzer.token_stream(
|
223
|
+
stream = @analyzer.token_stream(field, position)
|
216
224
|
tokens = []
|
217
225
|
while token = stream.next
|
218
226
|
tokens << token
|
219
227
|
end
|
220
228
|
tokens.each do |token|
|
221
|
-
pq.add(Term.new(
|
229
|
+
pq.add(Term.new(field, token.term_text), nil,
|
222
230
|
token.position_increment + pos_inc)
|
223
231
|
pos_inc = 0
|
224
232
|
end
|
@@ -226,7 +234,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id4dd1430e5
|
|
226
234
|
return pq
|
227
235
|
end
|
228
236
|
|
229
|
-
def get_multi_phrase_query(positions)
|
237
|
+
def get_multi_phrase_query(field, positions)
|
230
238
|
mpq = MultiPhraseQuery.new()
|
231
239
|
mpq.slop = @default_slop
|
232
240
|
pos_inc = 0
|
@@ -240,21 +248,21 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id4dd1430e5
|
|
240
248
|
position.compact! # it doesn't make sense to have an empty spot here
|
241
249
|
terms = []
|
242
250
|
position.each do |word|
|
243
|
-
stream = @analyzer.token_stream(
|
251
|
+
stream = @analyzer.token_stream(field, word)
|
244
252
|
if token = stream.next # only put one term per word
|
245
|
-
terms << Term.new(
|
253
|
+
terms << Term.new(field, token.term_text)
|
246
254
|
end
|
247
255
|
end
|
248
256
|
mpq.add(terms, nil, pos_inc + 1) # must go at least one forward
|
249
257
|
pos_inc = 0
|
250
258
|
else
|
251
|
-
stream = @analyzer.token_stream(
|
259
|
+
stream = @analyzer.token_stream(field, position)
|
252
260
|
tokens = []
|
253
261
|
while token = stream.next
|
254
262
|
tokens << token
|
255
263
|
end
|
256
264
|
tokens.each do |token|
|
257
|
-
mpq.add([Term.new(
|
265
|
+
mpq.add([Term.new(field, token.term_text)], nil,
|
258
266
|
token.position_increment + pos_inc)
|
259
267
|
pos_inc = 0
|
260
268
|
end
|
@@ -265,7 +273,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id4dd1430e5
|
|
265
273
|
|
266
274
|
def get_phrase_query(positions, slop = nil)
|
267
275
|
if positions.size == 1 and not positions[0].is_a?(Array)
|
268
|
-
return
|
276
|
+
return _get_term_query(positions[0])
|
269
277
|
end
|
270
278
|
|
271
279
|
multi_phrase = false
|
@@ -278,14 +286,16 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id4dd1430e5
|
|
278
286
|
end
|
279
287
|
end
|
280
288
|
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
289
|
+
return do_multiple_fields() do |field|
|
290
|
+
q = nil
|
291
|
+
if not multi_phrase
|
292
|
+
q = get_normal_phrase_query(field, positions.flatten)
|
293
|
+
else
|
294
|
+
q = get_multi_phrase_query(field, positions)
|
295
|
+
end
|
296
|
+
q.slop = slop if slop
|
297
|
+
next q
|
286
298
|
end
|
287
|
-
q.slop = slop if slop
|
288
|
-
return q
|
289
299
|
end
|
290
300
|
|
291
301
|
def add_and_clause(clauses, clause)
|
@@ -323,7 +333,7 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id4dd1430e5
|
|
323
333
|
return clauses[0].query
|
324
334
|
end
|
325
335
|
bq = BooleanQuery.new()
|
326
|
-
clauses.each {|clause|
|
336
|
+
clauses.each {|clause| bq << clause }
|
327
337
|
return bq
|
328
338
|
end
|
329
339
|
|
@@ -332,131 +342,171 @@ module_eval <<'..end lib/ferret/query_parser/query_parser.y modeval..id4dd1430e5
|
|
332
342
|
return BooleanClause.new(query, occur)
|
333
343
|
end
|
334
344
|
|
335
|
-
|
345
|
+
def do_multiple_fields()
|
346
|
+
# set @field to all fields if @field is the multi-field operator
|
347
|
+
@field = @fields if @field.is_a?(String) and @field == "*"
|
348
|
+
if @field.is_a?(String)
|
349
|
+
return yield(@field)
|
350
|
+
elsif @field.size == 1
|
351
|
+
return yield(@field[0])
|
352
|
+
else
|
353
|
+
bq = BooleanQuery.new()
|
354
|
+
@field.each do |field|
|
355
|
+
q = yield(field)
|
356
|
+
bq << BooleanClause.new(q) if q
|
357
|
+
end
|
358
|
+
return bq
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
def method_missing(meth, *args)
|
363
|
+
if meth.to_s =~ /_(get_[a-z_]+_query)/
|
364
|
+
do_multiple_fields() do |field|
|
365
|
+
send($1, *([field] + args))
|
366
|
+
end
|
367
|
+
else
|
368
|
+
raise NoMethodError.new("No such method #{meth} in #{self.class}", meth, args)
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
|
373
|
+
..end query_parser.y modeval..id2aa118b008
|
336
374
|
|
337
375
|
##### racc 1.4.4 generates ###
|
338
376
|
|
339
377
|
racc_reduce_table = [
|
340
378
|
0, 0, :racc_error,
|
341
|
-
1,
|
342
|
-
1,
|
343
|
-
3,
|
344
|
-
3,
|
345
|
-
2,
|
346
|
-
2,
|
347
|
-
2,
|
348
|
-
1,
|
379
|
+
1, 26, :_reduce_1,
|
380
|
+
1, 27, :_reduce_2,
|
381
|
+
3, 27, :_reduce_3,
|
382
|
+
3, 27, :_reduce_4,
|
383
|
+
2, 27, :_reduce_5,
|
384
|
+
2, 28, :_reduce_6,
|
385
|
+
2, 28, :_reduce_7,
|
386
|
+
1, 28, :_reduce_8,
|
387
|
+
1, 30, :_reduce_none,
|
388
|
+
3, 30, :_reduce_10,
|
389
|
+
1, 29, :_reduce_none,
|
390
|
+
3, 29, :_reduce_12,
|
391
|
+
1, 29, :_reduce_none,
|
392
|
+
1, 29, :_reduce_none,
|
393
|
+
1, 29, :_reduce_none,
|
349
394
|
1, 29, :_reduce_none,
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
1,
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
3, 30, :_reduce_18,
|
359
|
-
2, 30, :_reduce_19,
|
360
|
-
1, 34, :_reduce_20,
|
361
|
-
0, 36, :_reduce_21,
|
362
|
-
4, 31, :_reduce_22,
|
363
|
-
1, 35, :_reduce_23,
|
364
|
-
3, 32, :_reduce_24,
|
395
|
+
1, 31, :_reduce_17,
|
396
|
+
3, 31, :_reduce_18,
|
397
|
+
2, 31, :_reduce_19,
|
398
|
+
1, 35, :_reduce_20,
|
399
|
+
0, 37, :_reduce_21,
|
400
|
+
4, 32, :_reduce_22,
|
401
|
+
0, 38, :_reduce_23,
|
402
|
+
0, 39, :_reduce_24,
|
365
403
|
5, 32, :_reduce_25,
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
3,
|
382
|
-
3,
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
404
|
+
1, 36, :_reduce_26,
|
405
|
+
3, 36, :_reduce_27,
|
406
|
+
3, 33, :_reduce_28,
|
407
|
+
5, 33, :_reduce_29,
|
408
|
+
2, 33, :_reduce_30,
|
409
|
+
4, 33, :_reduce_31,
|
410
|
+
1, 40, :_reduce_32,
|
411
|
+
2, 40, :_reduce_33,
|
412
|
+
3, 40, :_reduce_34,
|
413
|
+
3, 40, :_reduce_35,
|
414
|
+
4, 34, :_reduce_36,
|
415
|
+
4, 34, :_reduce_37,
|
416
|
+
4, 34, :_reduce_38,
|
417
|
+
4, 34, :_reduce_39,
|
418
|
+
3, 34, :_reduce_40,
|
419
|
+
3, 34, :_reduce_41,
|
420
|
+
3, 34, :_reduce_42,
|
421
|
+
3, 34, :_reduce_43,
|
422
|
+
2, 34, :_reduce_44,
|
423
|
+
3, 34, :_reduce_45,
|
424
|
+
3, 34, :_reduce_46,
|
425
|
+
2, 34, :_reduce_47 ]
|
426
|
+
|
427
|
+
racc_reduce_n = 48
|
428
|
+
|
429
|
+
racc_shift_n = 78
|
388
430
|
|
389
431
|
racc_action_table = [
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
432
|
+
8, 10, 60, 59, 75, 74, 50, 21, 2, 25,
|
433
|
+
-26, 7, 9, 41, 13, 15, 17, 19, 8, 10,
|
434
|
+
3, 43, 64, 26, -26, 21, 2, 40, 38, 7,
|
435
|
+
9, 63, 13, 15, 17, 19, 8, 10, 3, 36,
|
436
|
+
46, 53, 37, 21, 2, 49, 34, 7, 9, 45,
|
437
|
+
13, 15, 17, 19, 58, 57, 3, 8, 10, 31,
|
438
|
+
33, 54, 55, 56, 21, 2, 44, 48, 7, 9,
|
439
|
+
61, 13, 15, 17, 19, 67, 66, 3, 8, 10,
|
440
|
+
31, 33, 62, 42, 65, 21, 2, 39, 30, 7,
|
441
|
+
9, 70, 13, 15, 17, 19, 8, 10, 3, 71,
|
442
|
+
72, 73, 24, 21, 2, 77, nil, 7, 9, nil,
|
443
|
+
13, 15, 17, 19, 21, 2, 3, nil, 7, 9,
|
444
|
+
nil, 13, 15, 17, 19, 21, 2, 3, nil, 7,
|
445
|
+
9, nil, 13, 15, 17, 19, 21, 2, 3, nil,
|
446
|
+
7, 9, nil, 13, 15, 17, 19, 21, 2, 3,
|
447
|
+
nil, 7, 9, nil, 13, 15, 17, 19, nil, nil,
|
448
|
+
3 ]
|
405
449
|
|
406
450
|
racc_action_check = [
|
407
|
-
0, 0,
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
25, nil,
|
421
|
-
|
451
|
+
0, 0, 38, 38, 64, 64, 30, 0, 0, 6,
|
452
|
+
21, 0, 0, 17, 0, 0, 0, 0, 2, 2,
|
453
|
+
0, 21, 42, 6, 21, 2, 2, 17, 15, 2,
|
454
|
+
2, 42, 2, 2, 2, 2, 33, 33, 2, 13,
|
455
|
+
24, 34, 15, 33, 33, 28, 13, 33, 33, 24,
|
456
|
+
33, 33, 33, 33, 37, 35, 33, 23, 23, 23,
|
457
|
+
23, 35, 35, 35, 23, 23, 23, 26, 23, 23,
|
458
|
+
39, 23, 23, 23, 23, 46, 46, 23, 12, 12,
|
459
|
+
12, 12, 40, 19, 43, 12, 12, 16, 11, 12,
|
460
|
+
12, 53, 12, 12, 12, 12, 31, 31, 12, 54,
|
461
|
+
55, 56, 3, 31, 31, 72, nil, 31, 31, nil,
|
462
|
+
31, 31, 31, 31, 49, 49, 31, nil, 49, 49,
|
463
|
+
nil, 49, 49, 49, 49, 25, 25, 49, nil, 25,
|
464
|
+
25, nil, 25, 25, 25, 25, 8, 8, 25, nil,
|
465
|
+
8, 8, nil, 8, 8, 8, 8, 10, 10, 8,
|
466
|
+
nil, 10, 10, nil, 10, 10, 10, 10, nil, nil,
|
467
|
+
10 ]
|
422
468
|
|
423
469
|
racc_action_pointer = [
|
424
|
-
-3,
|
425
|
-
|
426
|
-
nil,
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
nil,
|
431
|
-
nil, nil, nil ]
|
470
|
+
-3, nil, 15, 92, nil, nil, 7, nil, 126, nil,
|
471
|
+
137, 88, 75, 29, nil, 18, 78, 3, nil, 73,
|
472
|
+
nil, 8, nil, 54, 30, 115, 57, nil, 43, nil,
|
473
|
+
6, 93, nil, 33, 28, 45, nil, 44, -19, 60,
|
474
|
+
72, nil, 12, 74, nil, nil, 54, nil, nil, 104,
|
475
|
+
nil, nil, nil, 81, 89, 87, 82, nil, nil, nil,
|
476
|
+
nil, nil, nil, nil, -17, nil, nil, nil, nil, nil,
|
477
|
+
nil, nil, 95, nil, nil, nil, nil, nil ]
|
432
478
|
|
433
479
|
racc_action_default = [
|
434
|
-
-
|
435
|
-
-
|
436
|
-
-
|
437
|
-
-
|
438
|
-
-
|
439
|
-
|
440
|
-
-
|
441
|
-
-
|
480
|
+
-48, -14, -48, -48, -15, -16, -48, -20, -48, -23,
|
481
|
+
-48, -48, -1, -48, -2, -48, -9, -48, -8, -48,
|
482
|
+
-11, -17, -13, -48, -48, -48, -48, -6, -48, -7,
|
483
|
+
-48, -48, -5, -48, -30, -48, -32, -48, -44, -48,
|
484
|
+
-48, -47, -48, -19, -12, -43, -48, -21, -27, -48,
|
485
|
+
78, -3, -4, -48, -48, -28, -48, -33, -45, -40,
|
486
|
+
-41, -10, -46, -42, -48, -18, -39, -38, -22, -24,
|
487
|
+
-31, -35, -48, -34, -37, -36, -25, -29 ]
|
442
488
|
|
443
489
|
racc_goto_table = [
|
444
|
-
32,
|
445
|
-
nil, nil,
|
446
|
-
nil, nil,
|
490
|
+
27, 32, 29, 12, 68, 23, 11, 28, 76, 35,
|
491
|
+
nil, nil, 32, nil, nil, nil, nil, 47, nil, nil,
|
492
|
+
51, nil, 52, nil, nil, nil, nil, nil, nil, nil,
|
493
|
+
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
|
494
|
+
nil, 69 ]
|
447
495
|
|
448
496
|
racc_goto_check = [
|
449
|
-
3, 4, 2,
|
450
|
-
nil, nil,
|
451
|
-
|
497
|
+
4, 3, 4, 2, 12, 2, 1, 13, 14, 15,
|
498
|
+
nil, nil, 3, nil, nil, nil, nil, 4, nil, nil,
|
499
|
+
3, nil, 3, nil, nil, nil, nil, nil, nil, nil,
|
500
|
+
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
|
501
|
+
nil, 4 ]
|
452
502
|
|
453
503
|
racc_goto_pointer = [
|
454
|
-
nil,
|
455
|
-
nil, nil, -
|
504
|
+
nil, 6, 3, -11, -8, nil, nil, nil, nil, nil,
|
505
|
+
nil, nil, -43, -2, -61, -4 ]
|
456
506
|
|
457
507
|
racc_goto_default = [
|
458
|
-
nil, nil, nil,
|
459
|
-
|
508
|
+
nil, nil, nil, 14, 16, 18, 20, 22, 1, 4,
|
509
|
+
5, 6, nil, nil, nil, nil ]
|
460
510
|
|
461
511
|
racc_token_table = {
|
462
512
|
false => 0,
|
@@ -474,19 +524,20 @@ racc_token_table = {
|
|
474
524
|
")" => 12,
|
475
525
|
"~" => 13,
|
476
526
|
:WILD_STRING => 14,
|
477
|
-
"
|
478
|
-
"
|
479
|
-
"
|
480
|
-
"
|
481
|
-
"
|
482
|
-
"
|
483
|
-
"
|
484
|
-
"
|
485
|
-
"
|
527
|
+
"*" => 15,
|
528
|
+
"|" => 16,
|
529
|
+
"\"" => 17,
|
530
|
+
"<" => 18,
|
531
|
+
">" => 19,
|
532
|
+
"[" => 20,
|
533
|
+
"]" => 21,
|
534
|
+
"}" => 22,
|
535
|
+
"{" => 23,
|
536
|
+
"=" => 24 }
|
486
537
|
|
487
538
|
racc_use_result_var = false
|
488
539
|
|
489
|
-
racc_nt_base =
|
540
|
+
racc_nt_base = 25
|
490
541
|
|
491
542
|
Racc_arg = [
|
492
543
|
racc_action_table,
|
@@ -520,10 +571,11 @@ Racc_token_to_s_table = [
|
|
520
571
|
'")"',
|
521
572
|
'"~"',
|
522
573
|
'WILD_STRING',
|
574
|
+
'"*"',
|
575
|
+
'"|"',
|
523
576
|
'"\""',
|
524
577
|
'"<"',
|
525
578
|
'">"',
|
526
|
-
'"|"',
|
527
579
|
'"["',
|
528
580
|
'"]"',
|
529
581
|
'"}"',
|
@@ -540,8 +592,10 @@ Racc_token_to_s_table = [
|
|
540
592
|
'phrase_query',
|
541
593
|
'range_query',
|
542
594
|
'wild_query',
|
543
|
-
'
|
595
|
+
'field',
|
544
596
|
'@1',
|
597
|
+
'@2',
|
598
|
+
'@3',
|
545
599
|
'phrase_words']
|
546
600
|
|
547
601
|
Racc_debug_parser = false
|
@@ -550,49 +604,49 @@ Racc_debug_parser = false
|
|
550
604
|
|
551
605
|
# reduce 0 omitted
|
552
606
|
|
553
|
-
module_eval <<'.,.,', '
|
607
|
+
module_eval <<'.,.,', 'query_parser.y', 19
|
554
608
|
def _reduce_1( val, _values)
|
555
609
|
get_boolean_query(val[0])
|
556
610
|
end
|
557
611
|
.,.,
|
558
612
|
|
559
|
-
module_eval <<'.,.,', '
|
613
|
+
module_eval <<'.,.,', 'query_parser.y', 24
|
560
614
|
def _reduce_2( val, _values)
|
561
615
|
[val[0]]
|
562
616
|
end
|
563
617
|
.,.,
|
564
618
|
|
565
|
-
module_eval <<'.,.,', '
|
619
|
+
module_eval <<'.,.,', 'query_parser.y', 28
|
566
620
|
def _reduce_3( val, _values)
|
567
621
|
add_and_clause(val[0], val[2])
|
568
622
|
end
|
569
623
|
.,.,
|
570
624
|
|
571
|
-
module_eval <<'.,.,', '
|
625
|
+
module_eval <<'.,.,', 'query_parser.y', 32
|
572
626
|
def _reduce_4( val, _values)
|
573
627
|
add_or_clause(val[0], val[2])
|
574
628
|
end
|
575
629
|
.,.,
|
576
630
|
|
577
|
-
module_eval <<'.,.,', '
|
631
|
+
module_eval <<'.,.,', 'query_parser.y', 36
|
578
632
|
def _reduce_5( val, _values)
|
579
633
|
add_default_clause(val[0], val[1])
|
580
634
|
end
|
581
635
|
.,.,
|
582
636
|
|
583
|
-
module_eval <<'.,.,', '
|
637
|
+
module_eval <<'.,.,', 'query_parser.y', 41
|
584
638
|
def _reduce_6( val, _values)
|
585
639
|
get_boolean_clause(val[1], BooleanClause::Occur::MUST)
|
586
640
|
end
|
587
641
|
.,.,
|
588
642
|
|
589
|
-
module_eval <<'.,.,', '
|
643
|
+
module_eval <<'.,.,', 'query_parser.y', 45
|
590
644
|
def _reduce_7( val, _values)
|
591
645
|
get_boolean_clause(val[1], BooleanClause::Occur::MUST_NOT)
|
592
646
|
end
|
593
647
|
.,.,
|
594
648
|
|
595
|
-
module_eval <<'.,.,', '
|
649
|
+
module_eval <<'.,.,', 'query_parser.y', 49
|
596
650
|
def _reduce_8( val, _values)
|
597
651
|
get_boolean_clause(val[0], BooleanClause::Occur::SHOULD)
|
598
652
|
end
|
@@ -600,7 +654,7 @@ module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 49
|
|
600
654
|
|
601
655
|
# reduce 9 omitted
|
602
656
|
|
603
|
-
module_eval <<'.,.,', '
|
657
|
+
module_eval <<'.,.,', 'query_parser.y', 51
|
604
658
|
def _reduce_10( val, _values)
|
605
659
|
val[0].boost = val[2].to_f; return val[0]
|
606
660
|
end
|
@@ -608,7 +662,7 @@ module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 51
|
|
608
662
|
|
609
663
|
# reduce 11 omitted
|
610
664
|
|
611
|
-
module_eval <<'.,.,', '
|
665
|
+
module_eval <<'.,.,', 'query_parser.y', 58
|
612
666
|
def _reduce_12( val, _values)
|
613
667
|
get_boolean_query(val[1])
|
614
668
|
end
|
@@ -622,165 +676,189 @@ module_eval <<'.,.,', 'lib/ferret/query_parser/query_parser.y', 58
|
|
622
676
|
|
623
677
|
# reduce 16 omitted
|
624
678
|
|
625
|
-
module_eval <<'.,.,', '
|
679
|
+
module_eval <<'.,.,', 'query_parser.y', 67
|
626
680
|
def _reduce_17( val, _values)
|
627
|
-
|
681
|
+
_get_term_query(val[0])
|
628
682
|
end
|
629
683
|
.,.,
|
630
684
|
|
631
|
-
module_eval <<'.,.,', '
|
685
|
+
module_eval <<'.,.,', 'query_parser.y', 71
|
632
686
|
def _reduce_18( val, _values)
|
633
|
-
|
687
|
+
_get_fuzzy_query(val[0], val[2])
|
634
688
|
end
|
635
689
|
.,.,
|
636
690
|
|
637
|
-
module_eval <<'.,.,', '
|
691
|
+
module_eval <<'.,.,', 'query_parser.y', 75
|
638
692
|
def _reduce_19( val, _values)
|
639
|
-
|
693
|
+
_get_fuzzy_query(val[0])
|
640
694
|
end
|
641
695
|
.,.,
|
642
696
|
|
643
|
-
module_eval <<'.,.,', '
|
697
|
+
module_eval <<'.,.,', 'query_parser.y', 80
|
644
698
|
def _reduce_20( val, _values)
|
645
|
-
|
699
|
+
_get_wild_query(val[0])
|
646
700
|
end
|
647
701
|
.,.,
|
648
702
|
|
649
|
-
module_eval <<'.,.,', '
|
703
|
+
module_eval <<'.,.,', 'query_parser.y', 81
|
650
704
|
def _reduce_21( val, _values)
|
651
705
|
@field = @default_field
|
652
706
|
end
|
653
707
|
.,.,
|
654
708
|
|
655
|
-
module_eval <<'.,.,', '
|
709
|
+
module_eval <<'.,.,', 'query_parser.y', 85
|
656
710
|
def _reduce_22( val, _values)
|
657
711
|
val[2]
|
658
712
|
end
|
659
713
|
.,.,
|
660
714
|
|
661
|
-
module_eval <<'.,.,', '
|
715
|
+
module_eval <<'.,.,', 'query_parser.y', 85
|
662
716
|
def _reduce_23( val, _values)
|
663
|
-
|
717
|
+
@field = "*"
|
664
718
|
end
|
665
719
|
.,.,
|
666
720
|
|
667
|
-
module_eval <<'.,.,', '
|
721
|
+
module_eval <<'.,.,', 'query_parser.y', 85
|
668
722
|
def _reduce_24( val, _values)
|
669
|
-
|
723
|
+
@field = @default_field
|
670
724
|
end
|
671
725
|
.,.,
|
672
726
|
|
673
|
-
module_eval <<'.,.,', '
|
727
|
+
module_eval <<'.,.,', 'query_parser.y', 89
|
674
728
|
def _reduce_25( val, _values)
|
675
|
-
|
729
|
+
val[3]
|
676
730
|
end
|
677
731
|
.,.,
|
678
732
|
|
679
|
-
module_eval <<'.,.,', '
|
733
|
+
module_eval <<'.,.,', 'query_parser.y', 90
|
680
734
|
def _reduce_26( val, _values)
|
681
|
-
|
735
|
+
@field = [val[0]]
|
682
736
|
end
|
683
737
|
.,.,
|
684
738
|
|
685
|
-
module_eval <<'.,.,', '
|
739
|
+
module_eval <<'.,.,', 'query_parser.y', 91
|
686
740
|
def _reduce_27( val, _values)
|
687
|
-
|
741
|
+
@field = val[0] += [val[2]]
|
688
742
|
end
|
689
743
|
.,.,
|
690
744
|
|
691
|
-
module_eval <<'.,.,', '
|
745
|
+
module_eval <<'.,.,', 'query_parser.y', 97
|
692
746
|
def _reduce_28( val, _values)
|
693
|
-
|
747
|
+
get_phrase_query(val[1])
|
694
748
|
end
|
695
749
|
.,.,
|
696
750
|
|
697
|
-
module_eval <<'.,.,', '
|
751
|
+
module_eval <<'.,.,', 'query_parser.y', 101
|
698
752
|
def _reduce_29( val, _values)
|
699
|
-
|
753
|
+
get_phrase_query(val[1], val[4].to_i)
|
700
754
|
end
|
701
755
|
.,.,
|
702
756
|
|
703
|
-
module_eval <<'.,.,', '
|
757
|
+
module_eval <<'.,.,', 'query_parser.y', 101
|
704
758
|
def _reduce_30( val, _values)
|
705
|
-
|
759
|
+
nil
|
706
760
|
end
|
707
761
|
.,.,
|
708
762
|
|
709
|
-
module_eval <<'.,.,', '
|
763
|
+
module_eval <<'.,.,', 'query_parser.y', 102
|
710
764
|
def _reduce_31( val, _values)
|
711
|
-
|
765
|
+
nil
|
712
766
|
end
|
713
767
|
.,.,
|
714
768
|
|
715
|
-
module_eval <<'.,.,', '
|
769
|
+
module_eval <<'.,.,', 'query_parser.y', 104
|
716
770
|
def _reduce_32( val, _values)
|
717
|
-
|
771
|
+
[val[0]]
|
718
772
|
end
|
719
773
|
.,.,
|
720
774
|
|
721
|
-
module_eval <<'.,.,', '
|
775
|
+
module_eval <<'.,.,', 'query_parser.y', 105
|
722
776
|
def _reduce_33( val, _values)
|
723
|
-
|
777
|
+
val[0] << val[1]
|
724
778
|
end
|
725
779
|
.,.,
|
726
780
|
|
727
|
-
module_eval <<'.,.,', '
|
781
|
+
module_eval <<'.,.,', 'query_parser.y', 106
|
728
782
|
def _reduce_34( val, _values)
|
729
|
-
|
783
|
+
val[0] << nil
|
730
784
|
end
|
731
785
|
.,.,
|
732
786
|
|
733
|
-
module_eval <<'.,.,', '
|
787
|
+
module_eval <<'.,.,', 'query_parser.y', 107
|
734
788
|
def _reduce_35( val, _values)
|
735
|
-
|
789
|
+
add_multi_word(val[0], val[2])
|
736
790
|
end
|
737
791
|
.,.,
|
738
792
|
|
739
|
-
module_eval <<'.,.,', '
|
793
|
+
module_eval <<'.,.,', 'query_parser.y', 109
|
740
794
|
def _reduce_36( val, _values)
|
741
|
-
|
795
|
+
_get_range_query(val[1], val[2], true, true)
|
742
796
|
end
|
743
797
|
.,.,
|
744
798
|
|
745
|
-
module_eval <<'.,.,', '
|
799
|
+
module_eval <<'.,.,', 'query_parser.y', 110
|
746
800
|
def _reduce_37( val, _values)
|
747
|
-
|
801
|
+
_get_range_query(val[1], val[2], true, false)
|
748
802
|
end
|
749
803
|
.,.,
|
750
804
|
|
751
|
-
module_eval <<'.,.,', '
|
805
|
+
module_eval <<'.,.,', 'query_parser.y', 111
|
752
806
|
def _reduce_38( val, _values)
|
753
|
-
|
807
|
+
_get_range_query(val[1], val[2], false, true)
|
754
808
|
end
|
755
809
|
.,.,
|
756
810
|
|
757
|
-
module_eval <<'.,.,', '
|
811
|
+
module_eval <<'.,.,', 'query_parser.y', 112
|
758
812
|
def _reduce_39( val, _values)
|
759
|
-
|
813
|
+
_get_range_query(val[1], val[2], false, false)
|
760
814
|
end
|
761
815
|
.,.,
|
762
816
|
|
763
|
-
module_eval <<'.,.,', '
|
817
|
+
module_eval <<'.,.,', 'query_parser.y', 113
|
764
818
|
def _reduce_40( val, _values)
|
765
|
-
|
819
|
+
_get_range_query(nil, val[1], false, false)
|
766
820
|
end
|
767
821
|
.,.,
|
768
822
|
|
769
|
-
module_eval <<'.,.,', '
|
823
|
+
module_eval <<'.,.,', 'query_parser.y', 114
|
770
824
|
def _reduce_41( val, _values)
|
771
|
-
|
825
|
+
_get_range_query(nil, val[1], false, true)
|
772
826
|
end
|
773
827
|
.,.,
|
774
828
|
|
775
|
-
module_eval <<'.,.,', '
|
829
|
+
module_eval <<'.,.,', 'query_parser.y', 115
|
776
830
|
def _reduce_42( val, _values)
|
777
|
-
|
831
|
+
_get_range_query(val[1], nil, true, false)
|
778
832
|
end
|
779
833
|
.,.,
|
780
834
|
|
781
|
-
module_eval <<'.,.,', '
|
835
|
+
module_eval <<'.,.,', 'query_parser.y', 116
|
782
836
|
def _reduce_43( val, _values)
|
783
|
-
|
837
|
+
_get_range_query(val[1], nil, false, false)
|
838
|
+
end
|
839
|
+
.,.,
|
840
|
+
|
841
|
+
module_eval <<'.,.,', 'query_parser.y', 117
|
842
|
+
def _reduce_44( val, _values)
|
843
|
+
_get_range_query(nil, val[1], false, false)
|
844
|
+
end
|
845
|
+
.,.,
|
846
|
+
|
847
|
+
module_eval <<'.,.,', 'query_parser.y', 118
|
848
|
+
def _reduce_45( val, _values)
|
849
|
+
_get_range_query(nil, val[2], false, true)
|
850
|
+
end
|
851
|
+
.,.,
|
852
|
+
|
853
|
+
module_eval <<'.,.,', 'query_parser.y', 119
|
854
|
+
def _reduce_46( val, _values)
|
855
|
+
_get_range_query(val[2], nil, true, false)
|
856
|
+
end
|
857
|
+
.,.,
|
858
|
+
|
859
|
+
module_eval <<'.,.,', 'query_parser.y', 120
|
860
|
+
def _reduce_47( val, _values)
|
861
|
+
_get_range_query(val[1], nil, false, false)
|
784
862
|
end
|
785
863
|
.,.,
|
786
864
|
|
@@ -795,6 +873,7 @@ end # module Ferret
|
|
795
873
|
|
796
874
|
if __FILE__ == $0
|
797
875
|
$:.unshift File.join(File.dirname(__FILE__), '..')
|
876
|
+
$:.unshift File.join(File.dirname(__FILE__), '../..')
|
798
877
|
require 'utils'
|
799
878
|
require 'analysis'
|
800
879
|
require 'document'
|
@@ -802,10 +881,15 @@ if __FILE__ == $0
|
|
802
881
|
require 'index'
|
803
882
|
require 'search'
|
804
883
|
|
884
|
+
include Ferret::Search
|
885
|
+
include Ferret::Index
|
886
|
+
|
805
887
|
st = "\033[7m"
|
806
888
|
en = "\033[m"
|
807
889
|
|
808
|
-
parser = Ferret::QueryParser.new("default"
|
890
|
+
parser = Ferret::QueryParser.new("default",
|
891
|
+
:fields => ["f1", "f2", "f3"],
|
892
|
+
:analyzer => Ferret::Analysis::StandardAnalyzer.new)
|
809
893
|
|
810
894
|
$stdin.each do |line|
|
811
895
|
query = parser.parse(line)
|