ferret 0.10.5 → 0.10.6
Sign up to get free protection for your applications and to get access to all the features.
- data/TUTORIAL +95 -70
- data/ext/q_multi_term.c +5 -3
- data/ext/q_parser.c +60 -57
- data/ext/r_qparser.c +1 -0
- data/ext/r_search.c +35 -2
- data/lib/ferret/index.rb +1 -1
- data/lib/ferret_version.rb +1 -1
- metadata +2 -2
data/TUTORIAL
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
= Quick Introduction to Ferret
|
2
2
|
|
3
3
|
The simplest way to use Ferret is through the Ferret::Index::Index class.
|
4
|
-
|
4
|
+
This is now aliased by Ferret::I for quick and easy access. Start by including
|
5
|
+
the Ferret module.
|
5
6
|
|
6
7
|
require 'ferret'
|
7
8
|
include Ferret
|
@@ -41,32 +42,32 @@ could probably just use SimpleSearch. So let's give our documents some fields;
|
|
41
42
|
index << {:title => "Programming Ruby", :content => "blah blah blah"}
|
42
43
|
index << {:title => "Programming Ruby", :content => "yada yada yada"}
|
43
44
|
|
44
|
-
|
45
|
-
|
45
|
+
Note the way that all field-names are Symbols. Although Strings will work,
|
46
|
+
this is a best-practice in Ferret. Or if you are indexing data stored in a
|
47
|
+
database, you'll probably want to store the id;
|
46
48
|
|
47
49
|
index << {:id => row.id, :title => row.title, :date => row.date}
|
48
50
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
include Ferret::Document
|
61
|
-
doc = Document.new
|
62
|
-
doc << Field.new("id", row.id, Field::Store::NO, Field::Index::UNTOKENIZED)
|
63
|
-
doc << Field.new("title", row.title, Field::Store::YES, Field::Index::UNTOKENIZED)
|
64
|
-
doc << Field.new("data", row.data, Field::Store::YES, Field::Index::TOKENIZED)
|
65
|
-
doc << Field.new("image", row.image, Field::Store::YES, Field::Index::NO)
|
66
|
-
index << doc
|
51
|
+
So far we have been storing and tokenizing all of the input data along with
|
52
|
+
term vectors. If we want to change this we need to change the way we setup the
|
53
|
+
index. You must create a FieldInfos object describing the index:
|
54
|
+
|
55
|
+
field_infos = FieldInfos.new(:store => :no,
|
56
|
+
:index => :untokenized_omit_norms,
|
57
|
+
:term_vector => :no)
|
58
|
+
|
59
|
+
The values that you set FieldInfos to have will be used by default by all
|
60
|
+
fields. If you want to change the properties for specific fields, you need to
|
61
|
+
add a FieldInfo to field_infos.
|
67
62
|
|
68
|
-
|
69
|
-
|
63
|
+
field_infos.add_field(:title, :store => :yes, :index => :yes, :boost => 10.0)
|
64
|
+
field_infos.add_field(:content, :store => :yes,
|
65
|
+
:index => :yes,
|
66
|
+
:term_vector => :with_positions_offsets)
|
67
|
+
|
68
|
+
If you need to add a field to an already open index you do so like this:
|
69
|
+
|
70
|
+
index.field_infos.add_field(:new_field, :store => :yes)
|
70
71
|
|
71
72
|
=== Searching
|
72
73
|
|
@@ -76,23 +77,23 @@ Index#search_each. The first method returns a Ferret::Index::TopDocs object.
|
|
76
77
|
The second we'll show here. Lets say we wanted to find all documents with the
|
77
78
|
phrase "quick brown fox" in the content field. We'd write;
|
78
79
|
|
79
|
-
index.search_each('content:"quick brown fox"') do |
|
80
|
-
puts "Document #{
|
80
|
+
index.search_each('content:"quick brown fox"') do |id, score|
|
81
|
+
puts "Document #{id} found with a score of #{score}"
|
81
82
|
end
|
82
83
|
|
83
84
|
But "fast" has a pretty similar meaning to "quick" and we don't mind if the
|
84
85
|
fox is a little red. Also, the phrase could be in the title so we'll search
|
85
86
|
there as well. So we could expand our search like this;
|
86
87
|
|
87
|
-
index.search_each('title|content:"quick|fast brown|red fox"') do |
|
88
|
-
puts "Document #{
|
88
|
+
index.search_each('title|content:"quick|fast brown|red fox"') do |id, score|
|
89
|
+
puts "Document #{id} found with a score of #{score}"
|
89
90
|
end
|
90
91
|
|
91
92
|
What if we want to find all documents entered on or after 5th of September,
|
92
93
|
2005 with the words "ruby" or "rails" in any field. We could type something like;
|
93
94
|
|
94
|
-
index.search_each('date:( >= 20050905) *:(ruby OR rails)') do |
|
95
|
-
puts "Document #{
|
95
|
+
index.search_each('date:( >= 20050905) *:(ruby OR rails)') do |id, score|
|
96
|
+
puts "Document #{index[id][:title]} found with a score of #{score}"
|
96
97
|
end
|
97
98
|
|
98
99
|
Ferret has quite a complex query language. To find out more about Ferret's
|
@@ -100,40 +101,72 @@ query language, see Ferret::QueryParser. You can also construct even more
|
|
100
101
|
complex queries like Ferret::Search::Spans by hand. See Ferret::Search::Query
|
101
102
|
for more information.
|
102
103
|
|
104
|
+
=== Highlighting
|
105
|
+
|
106
|
+
Ferret now has a super-fast highlighting method. See
|
107
|
+
Ferret::Index::Index#highlight. Here is an example of how you would use it
|
108
|
+
when printing to the console:
|
109
|
+
|
110
|
+
index.search_each('date:( >= 20050905) content:(ruby OR rails)') do |id, score|
|
111
|
+
puts "Document #{index[id][:title]} found with a score of #{score}"
|
112
|
+
highlights = index.highlight("content:(ruby OR rails)", 0,
|
113
|
+
:field => :content,
|
114
|
+
:pre_tag = "\033[36m",
|
115
|
+
:post_tag = "\033[m")
|
116
|
+
puts highlights
|
117
|
+
end
|
118
|
+
|
119
|
+
And if you want to highlight a whole document, set :excert_length to :all:
|
120
|
+
|
121
|
+
puts index.highlight(query, doc_id,
|
122
|
+
:field => :content,
|
123
|
+
:pre_tag = "\033[36m",
|
124
|
+
:post_tag = "\033[m",
|
125
|
+
:excerpt_length => :all)
|
126
|
+
|
103
127
|
=== Accessing Documents
|
104
128
|
|
105
|
-
You may have noticed that when we run a search we only get the document
|
129
|
+
You may have noticed that when we run a search we only get the document id
|
106
130
|
back. By itself this isn't much use to us. Getting the data from the index is
|
107
|
-
very straightforward. For example if we want the title field form the 3rd
|
131
|
+
very straightforward. For example if we want the :title field form the 3rd
|
108
132
|
document type;
|
109
133
|
|
110
|
-
index[2][
|
134
|
+
index[2][:title]
|
135
|
+
|
136
|
+
Documents are lazy loading so if you try this:
|
111
137
|
|
112
|
-
|
138
|
+
puts index[2]
|
113
139
|
|
114
|
-
|
115
|
-
|
140
|
+
You will always get an empty Hash. To load all fields, call the load method:
|
141
|
+
|
142
|
+
puts index[2].load
|
143
|
+
|
144
|
+
NOTE: documents are indexed from 0. You can also use array-like index
|
145
|
+
parameters to access index. For example
|
146
|
+
|
147
|
+
index[1..4]
|
148
|
+
index[10, 10]
|
149
|
+
index[-5]
|
150
|
+
|
151
|
+
The default field is :id (although you can change this with index's
|
152
|
+
:default_create_field parameter);
|
116
153
|
|
117
154
|
index << "This is a document"
|
118
|
-
index[0][
|
155
|
+
index[0][:id]
|
119
156
|
|
120
157
|
Let's go back to the database example above. If we store all of our documents
|
121
158
|
with an id then we can access that field using the id. As long as we called
|
122
|
-
our id field
|
123
|
-
|
124
|
-
id = "89721347"
|
125
|
-
index[id]["title"]
|
126
|
-
|
127
|
-
If however we called our id field "key" we'll have to do this;
|
159
|
+
our id field :id we can do this
|
128
160
|
|
129
|
-
|
130
|
-
index[id]["title"]
|
161
|
+
index["89721347"]["title"]
|
131
162
|
|
132
163
|
Pretty simple huh? You should note though that if there are more then one
|
133
164
|
document with the same *id* or *key* then only the first one will be returned
|
134
|
-
so it is probably better that you ensure the key is unique somehow.
|
135
|
-
|
136
|
-
|
165
|
+
so it is probably better that you ensure the key is unique somehow. By setting
|
166
|
+
Index's :key attribute to :id, Ferret will do this automatically for you. It
|
167
|
+
can even handle multiple field primary keys. For example, you could set to
|
168
|
+
:key to [:id, :model] and Ferret would keep the documents unique for that pair
|
169
|
+
of fields.
|
137
170
|
|
138
171
|
=== Modifying and Deleting Documents
|
139
172
|
|
@@ -147,35 +180,33 @@ document;
|
|
147
180
|
|
148
181
|
index << {:title => "Programing Rbuy", :content => "blah blah blah"}
|
149
182
|
doc_num = nil
|
150
|
-
index.
|
151
|
-
return unless
|
152
|
-
doc = index[
|
153
|
-
index.delete(
|
183
|
+
index.search_each('title:"Programing Rbuy"') {|id, score| doc_id = id}
|
184
|
+
return unless doc_id
|
185
|
+
doc = index[doc_id]
|
186
|
+
index.delete(doc_id)
|
154
187
|
|
155
|
-
# modify doc
|
156
|
-
doc[
|
188
|
+
# modify doc. It is just a Hash afterall
|
189
|
+
doc[:title] = "Programming Ruby"
|
157
190
|
|
158
191
|
index << doc
|
159
192
|
|
160
|
-
|
161
|
-
|
162
|
-
|
193
|
+
If you set the :key parameter as described in the last section there is no
|
194
|
+
need to delete the document. It will be automatically deleted when you add
|
195
|
+
another document with the same key.
|
196
|
+
|
197
|
+
Also, we can use the id field, as above, to delete documents. This time though
|
198
|
+
every document that matches the id will be deleted. Again, it is probably a
|
199
|
+
good idea if you somehow ensure that your *ids* are kept unique.
|
163
200
|
|
164
201
|
id = "23453422"
|
165
202
|
index.delete(id)
|
166
203
|
|
167
|
-
Or;
|
168
|
-
|
169
|
-
id = Index::Term.new("key", "23452345")
|
170
|
-
index.delete(id)
|
171
|
-
|
172
204
|
=== Onwards
|
173
205
|
|
174
206
|
This is just a small sampling of what Ferret allows you to do. Ferret, like
|
175
207
|
Lucene, is designed to be extended, and allows you to construct your own query
|
176
|
-
types, analyzers, and so on.
|
177
|
-
|
178
|
-
your own. For now you can look in the following places for more documentation;
|
208
|
+
types, analyzers, and so on. Going onwards you should check out the following
|
209
|
+
documentation:
|
179
210
|
|
180
211
|
* Ferret::Analysis: for more information on how the data is processed when it
|
181
212
|
is tokenized. There are a number of things you can do with your data such as
|
@@ -188,12 +219,6 @@ your own. For now you can look in the following places for more documentation;
|
|
188
219
|
your own. You may however want to take advantage of the sorting or filtering
|
189
220
|
abilities of Ferret to present your data the best way you see fit.
|
190
221
|
|
191
|
-
* Ferret::Document: to find out how to create documents. This part of Ferret
|
192
|
-
is relatively straightforward. The main thing that we haven't gone into here
|
193
|
-
is the use of term vectors. These allow you to store and retrieve the
|
194
|
-
positions and offsets of the data which can be very useful in document
|
195
|
-
comparison amoung other things. == More information
|
196
|
-
|
197
222
|
* Ferret::QueryParser: if you want to find out more about what you can do with
|
198
223
|
Ferret's Query Parser, this is the place to look. The query parser is one
|
199
224
|
area that could use a bit of work so please send your suggestions.
|
data/ext/q_multi_term.c
CHANGED
@@ -474,6 +474,7 @@ Explanation *multi_tw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
474
474
|
static Weight *multi_tw_new(Query *query, Searcher *searcher)
|
475
475
|
{
|
476
476
|
int i;
|
477
|
+
int doc_freq = 0;
|
477
478
|
Weight *self = w_new(Weight, query);
|
478
479
|
const char *field = MTQ(query)->field;
|
479
480
|
PriorityQueue *bt_pq = MTQ(query)->boosted_terms;
|
@@ -487,10 +488,11 @@ static Weight *multi_tw_new(Query *query, Searcher *searcher)
|
|
487
488
|
self->idf = 0.0;
|
488
489
|
|
489
490
|
for (i = bt_pq->size; i > 0; i--) {
|
490
|
-
|
491
|
-
|
492
|
-
searcher);
|
491
|
+
doc_freq += searcher->doc_freq(searcher, field,
|
492
|
+
((BoostedTerm *)bt_pq->heap[i])->term);
|
493
493
|
}
|
494
|
+
self->idf += sim_idf(self->similarity, doc_freq,
|
495
|
+
searcher->max_doc(searcher));
|
494
496
|
|
495
497
|
return self;
|
496
498
|
}
|
data/ext/q_parser.c
CHANGED
@@ -102,6 +102,9 @@ typedef struct BCArray {
|
|
102
102
|
BooleanClause **clauses;
|
103
103
|
} BCArray;
|
104
104
|
|
105
|
+
float qp_default_fuzzy_min_sim = 0.5;
|
106
|
+
int qp_default_fuzzy_pre_len = 0;
|
107
|
+
|
105
108
|
|
106
109
|
|
107
110
|
/* Enabling traces. */
|
@@ -123,7 +126,7 @@ typedef struct BCArray {
|
|
123
126
|
#endif
|
124
127
|
|
125
128
|
#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
|
126
|
-
#line
|
129
|
+
#line 26 "src/q_parser.y"
|
127
130
|
typedef union YYSTYPE {
|
128
131
|
Query *query;
|
129
132
|
BooleanClause *bcls;
|
@@ -133,7 +136,7 @@ typedef union YYSTYPE {
|
|
133
136
|
char *str;
|
134
137
|
} YYSTYPE;
|
135
138
|
/* Line 196 of yacc.c. */
|
136
|
-
#line
|
139
|
+
#line 140 "y.tab.c"
|
137
140
|
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
|
138
141
|
# define YYSTYPE_IS_DECLARED 1
|
139
142
|
# define YYSTYPE_IS_TRIVIAL 1
|
@@ -142,7 +145,7 @@ typedef union YYSTYPE {
|
|
142
145
|
|
143
146
|
|
144
147
|
/* Copy the second part of user declarations. */
|
145
|
-
#line
|
148
|
+
#line 34 "src/q_parser.y"
|
146
149
|
|
147
150
|
static int yylex(YYSTYPE *lvalp, QParser *qp);
|
148
151
|
static int yyerror(QParser *qp, char const *msg);
|
@@ -197,7 +200,7 @@ static Query *get_r_q(QParser *qp, char *field, char *from, char *to,
|
|
197
200
|
|
198
201
|
|
199
202
|
/* Line 219 of yacc.c. */
|
200
|
-
#line
|
203
|
+
#line 204 "y.tab.c"
|
201
204
|
|
202
205
|
#if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
|
203
206
|
# define YYSIZE_T __SIZE_TYPE__
|
@@ -436,12 +439,12 @@ static const yysigned_char yyrhs[] =
|
|
436
439
|
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
|
437
440
|
static const unsigned char yyrline[] =
|
438
441
|
{
|
439
|
-
0,
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
442
|
+
0, 102, 102, 103, 105, 106, 107, 108, 110, 111,
|
443
|
+
112, 114, 115, 117, 118, 119, 120, 121, 122, 124,
|
444
|
+
125, 126, 128, 130, 130, 132, 132, 132, 135, 136,
|
445
|
+
138, 139, 140, 141, 143, 144, 145, 146, 147, 149,
|
446
|
+
150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
|
447
|
+
160
|
445
448
|
};
|
446
449
|
#endif
|
447
450
|
|
@@ -1249,217 +1252,217 @@ yyreduce:
|
|
1249
1252
|
switch (yyn)
|
1250
1253
|
{
|
1251
1254
|
case 2:
|
1252
|
-
#line
|
1255
|
+
#line 102 "src/q_parser.y"
|
1253
1256
|
{ qp->result = (yyval.query) = NULL; }
|
1254
1257
|
break;
|
1255
1258
|
|
1256
1259
|
case 3:
|
1257
|
-
#line
|
1260
|
+
#line 103 "src/q_parser.y"
|
1258
1261
|
{ qp->result = (yyval.query) = get_bool_q((yyvsp[0].bclss)); }
|
1259
1262
|
break;
|
1260
1263
|
|
1261
1264
|
case 4:
|
1262
|
-
#line
|
1265
|
+
#line 105 "src/q_parser.y"
|
1263
1266
|
{ (yyval.bclss) = first_cls((yyvsp[0].bcls)); }
|
1264
1267
|
break;
|
1265
1268
|
|
1266
1269
|
case 5:
|
1267
|
-
#line
|
1270
|
+
#line 106 "src/q_parser.y"
|
1268
1271
|
{ (yyval.bclss) = add_and_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
|
1269
1272
|
break;
|
1270
1273
|
|
1271
1274
|
case 6:
|
1272
|
-
#line
|
1275
|
+
#line 107 "src/q_parser.y"
|
1273
1276
|
{ (yyval.bclss) = add_or_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
|
1274
1277
|
break;
|
1275
1278
|
|
1276
1279
|
case 7:
|
1277
|
-
#line
|
1280
|
+
#line 108 "src/q_parser.y"
|
1278
1281
|
{ (yyval.bclss) = add_default_cls(qp, (yyvsp[-1].bclss), (yyvsp[0].bcls)); }
|
1279
1282
|
break;
|
1280
1283
|
|
1281
1284
|
case 8:
|
1282
|
-
#line
|
1285
|
+
#line 110 "src/q_parser.y"
|
1283
1286
|
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST); }
|
1284
1287
|
break;
|
1285
1288
|
|
1286
1289
|
case 9:
|
1287
|
-
#line
|
1290
|
+
#line 111 "src/q_parser.y"
|
1288
1291
|
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST_NOT); }
|
1289
1292
|
break;
|
1290
1293
|
|
1291
1294
|
case 10:
|
1292
|
-
#line
|
1295
|
+
#line 112 "src/q_parser.y"
|
1293
1296
|
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_SHOULD); }
|
1294
1297
|
break;
|
1295
1298
|
|
1296
1299
|
case 12:
|
1297
|
-
#line
|
1300
|
+
#line 115 "src/q_parser.y"
|
1298
1301
|
{ if ((yyvsp[-2].query)) sscanf((yyvsp[0].str),"%f",&((yyvsp[-2].query)->boost)); (yyval.query)=(yyvsp[-2].query); }
|
1299
1302
|
break;
|
1300
1303
|
|
1301
1304
|
case 14:
|
1302
|
-
#line
|
1305
|
+
#line 118 "src/q_parser.y"
|
1303
1306
|
{ (yyval.query) = get_bool_q((yyvsp[-1].bclss)); }
|
1304
1307
|
break;
|
1305
1308
|
|
1306
1309
|
case 19:
|
1307
|
-
#line
|
1310
|
+
#line 124 "src/q_parser.y"
|
1308
1311
|
{ FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str))); }
|
1309
1312
|
break;
|
1310
1313
|
|
1311
1314
|
case 20:
|
1312
|
-
#line
|
1315
|
+
#line 125 "src/q_parser.y"
|
1313
1316
|
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str))); }
|
1314
1317
|
break;
|
1315
1318
|
|
1316
1319
|
case 21:
|
1317
|
-
#line
|
1320
|
+
#line 126 "src/q_parser.y"
|
1318
1321
|
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL)); }
|
1319
1322
|
break;
|
1320
1323
|
|
1321
1324
|
case 22:
|
1322
|
-
#line
|
1325
|
+
#line 128 "src/q_parser.y"
|
1323
1326
|
{ FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str))); }
|
1324
1327
|
break;
|
1325
1328
|
|
1326
1329
|
case 23:
|
1327
|
-
#line
|
1330
|
+
#line 130 "src/q_parser.y"
|
1328
1331
|
{ qp->fields = qp->def_fields; }
|
1329
1332
|
break;
|
1330
1333
|
|
1331
1334
|
case 24:
|
1332
|
-
#line
|
1335
|
+
#line 131 "src/q_parser.y"
|
1333
1336
|
{ (yyval.query) = (yyvsp[-1].query); }
|
1334
1337
|
break;
|
1335
1338
|
|
1336
1339
|
case 25:
|
1337
|
-
#line
|
1340
|
+
#line 132 "src/q_parser.y"
|
1338
1341
|
{ qp->fields = qp->all_fields; }
|
1339
1342
|
break;
|
1340
1343
|
|
1341
1344
|
case 26:
|
1342
|
-
#line
|
1345
|
+
#line 132 "src/q_parser.y"
|
1343
1346
|
{qp->fields = qp->def_fields;}
|
1344
1347
|
break;
|
1345
1348
|
|
1346
1349
|
case 27:
|
1347
|
-
#line
|
1350
|
+
#line 133 "src/q_parser.y"
|
1348
1351
|
{ (yyval.query) = (yyvsp[-1].query); }
|
1349
1352
|
break;
|
1350
1353
|
|
1351
1354
|
case 28:
|
1352
|
-
#line
|
1355
|
+
#line 135 "src/q_parser.y"
|
1353
1356
|
{ (yyval.hashset) = first_field(qp, (yyvsp[0].str)); }
|
1354
1357
|
break;
|
1355
1358
|
|
1356
1359
|
case 29:
|
1357
|
-
#line
|
1360
|
+
#line 136 "src/q_parser.y"
|
1358
1361
|
{ (yyval.hashset) = add_field(qp, (yyvsp[0].str));}
|
1359
1362
|
break;
|
1360
1363
|
|
1361
1364
|
case 30:
|
1362
|
-
#line
|
1365
|
+
#line 138 "src/q_parser.y"
|
1363
1366
|
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL); }
|
1364
1367
|
break;
|
1365
1368
|
|
1366
1369
|
case 31:
|
1367
|
-
#line
|
1370
|
+
#line 139 "src/q_parser.y"
|
1368
1371
|
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str)); }
|
1369
1372
|
break;
|
1370
1373
|
|
1371
1374
|
case 32:
|
1372
|
-
#line
|
1375
|
+
#line 140 "src/q_parser.y"
|
1373
1376
|
{ (yyval.query) = NULL; }
|
1374
1377
|
break;
|
1375
1378
|
|
1376
1379
|
case 33:
|
1377
|
-
#line
|
1380
|
+
#line 141 "src/q_parser.y"
|
1378
1381
|
{ (yyval.query) = NULL; }
|
1379
1382
|
break;
|
1380
1383
|
|
1381
1384
|
case 34:
|
1382
|
-
#line
|
1385
|
+
#line 143 "src/q_parser.y"
|
1383
1386
|
{ (yyval.phrase) = ph_first_word((yyvsp[0].str)); }
|
1384
1387
|
break;
|
1385
1388
|
|
1386
1389
|
case 35:
|
1387
|
-
#line
|
1390
|
+
#line 144 "src/q_parser.y"
|
1388
1391
|
{ (yyval.phrase) = ph_first_word(NULL); }
|
1389
1392
|
break;
|
1390
1393
|
|
1391
1394
|
case 36:
|
1392
|
-
#line
|
1395
|
+
#line 145 "src/q_parser.y"
|
1393
1396
|
{ (yyval.phrase) = ph_add_word((yyvsp[-1].phrase), (yyvsp[0].str)); }
|
1394
1397
|
break;
|
1395
1398
|
|
1396
1399
|
case 37:
|
1397
|
-
#line
|
1400
|
+
#line 146 "src/q_parser.y"
|
1398
1401
|
{ (yyval.phrase) = ph_add_word((yyvsp[-2].phrase), NULL); }
|
1399
1402
|
break;
|
1400
1403
|
|
1401
1404
|
case 38:
|
1402
|
-
#line
|
1405
|
+
#line 147 "src/q_parser.y"
|
1403
1406
|
{ (yyval.phrase) = ph_add_multi_word((yyvsp[-2].phrase), (yyvsp[0].str)); }
|
1404
1407
|
break;
|
1405
1408
|
|
1406
1409
|
case 39:
|
1407
|
-
#line
|
1410
|
+
#line 149 "src/q_parser.y"
|
1408
1411
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), true, true)); }
|
1409
1412
|
break;
|
1410
1413
|
|
1411
1414
|
case 40:
|
1412
|
-
#line
|
1415
|
+
#line 150 "src/q_parser.y"
|
1413
1416
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), true, false)); }
|
1414
1417
|
break;
|
1415
1418
|
|
1416
1419
|
case 41:
|
1417
|
-
#line
|
1420
|
+
#line 151 "src/q_parser.y"
|
1418
1421
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), false, true)); }
|
1419
1422
|
break;
|
1420
1423
|
|
1421
1424
|
case 42:
|
1422
|
-
#line
|
1425
|
+
#line 152 "src/q_parser.y"
|
1423
1426
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), false, false)); }
|
1424
1427
|
break;
|
1425
1428
|
|
1426
1429
|
case 43:
|
1427
|
-
#line
|
1430
|
+
#line 153 "src/q_parser.y"
|
1428
1431
|
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[-1].str), false, false)); }
|
1429
1432
|
break;
|
1430
1433
|
|
1431
1434
|
case 44:
|
1432
|
-
#line
|
1435
|
+
#line 154 "src/q_parser.y"
|
1433
1436
|
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[-1].str), false, true)); }
|
1434
1437
|
break;
|
1435
1438
|
|
1436
1439
|
case 45:
|
1437
|
-
#line
|
1440
|
+
#line 155 "src/q_parser.y"
|
1438
1441
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-1].str), NULL,true, false)); }
|
1439
1442
|
break;
|
1440
1443
|
|
1441
1444
|
case 46:
|
1442
|
-
#line
|
1445
|
+
#line 156 "src/q_parser.y"
|
1443
1446
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-1].str), NULL,false, false)); }
|
1444
1447
|
break;
|
1445
1448
|
|
1446
1449
|
case 47:
|
1447
|
-
#line
|
1450
|
+
#line 157 "src/q_parser.y"
|
1448
1451
|
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[0].str), false, false)); }
|
1449
1452
|
break;
|
1450
1453
|
|
1451
1454
|
case 48:
|
1452
|
-
#line
|
1455
|
+
#line 158 "src/q_parser.y"
|
1453
1456
|
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[0].str), false, true)); }
|
1454
1457
|
break;
|
1455
1458
|
|
1456
1459
|
case 49:
|
1457
|
-
#line
|
1460
|
+
#line 159 "src/q_parser.y"
|
1458
1461
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[0].str), NULL,true, false)); }
|
1459
1462
|
break;
|
1460
1463
|
|
1461
1464
|
case 50:
|
1462
|
-
#line
|
1465
|
+
#line 160 "src/q_parser.y"
|
1463
1466
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[0].str), NULL,false, false)); }
|
1464
1467
|
break;
|
1465
1468
|
|
@@ -1468,7 +1471,7 @@ yyreduce:
|
|
1468
1471
|
}
|
1469
1472
|
|
1470
1473
|
/* Line 1126 of yacc.c. */
|
1471
|
-
#line
|
1474
|
+
#line 1475 "y.tab.c"
|
1472
1475
|
|
1473
1476
|
yyvsp -= yylen;
|
1474
1477
|
yyssp -= yylen;
|
@@ -1736,7 +1739,7 @@ yyreturn:
|
|
1736
1739
|
}
|
1737
1740
|
|
1738
1741
|
|
1739
|
-
#line
|
1742
|
+
#line 162 "src/q_parser.y"
|
1740
1743
|
|
1741
1744
|
|
1742
1745
|
const char *special_char = "&:()[]{}!\"~^|<>=*?+-";
|
@@ -2009,11 +2012,11 @@ static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
|
|
2009
2012
|
}
|
2010
2013
|
else {
|
2011
2014
|
/* it only makes sense to find one term in a fuzzy query */
|
2012
|
-
float slop =
|
2015
|
+
float slop = qp_default_fuzzy_min_sim;
|
2013
2016
|
if (slop_str) {
|
2014
2017
|
sscanf(slop_str, "%f", &slop);
|
2015
2018
|
}
|
2016
|
-
q = fuzq_new_conf(field, token->text, slop,
|
2019
|
+
q = fuzq_new_conf(field, token->text, slop, qp_default_fuzzy_pre_len,
|
2017
2020
|
qp->max_clauses);
|
2018
2021
|
}
|
2019
2022
|
return q;
|
data/ext/r_qparser.c
CHANGED
data/ext/r_search.c
CHANGED
@@ -1240,6 +1240,32 @@ frt_fq_init(int argc, VALUE *argv, VALUE self)
|
|
1240
1240
|
return self;
|
1241
1241
|
}
|
1242
1242
|
|
1243
|
+
/*
|
1244
|
+
* call-seq:
|
1245
|
+
* FuzzyQuery.prefix_length -> prefix_length
|
1246
|
+
*
|
1247
|
+
* Get the +:prefix_length+ for the query.
|
1248
|
+
*/
|
1249
|
+
static VALUE
|
1250
|
+
frt_fq_pre_len(VALUE self)
|
1251
|
+
{
|
1252
|
+
GET_Q();
|
1253
|
+
return INT2FIX(((FuzzyQuery *)q)->pre_len);
|
1254
|
+
}
|
1255
|
+
|
1256
|
+
/*
|
1257
|
+
* call-seq:
|
1258
|
+
* FuzzyQuery.min_similarity -> min_similarity
|
1259
|
+
*
|
1260
|
+
* Get the +:min_similarity+ for the query.
|
1261
|
+
*/
|
1262
|
+
static VALUE
|
1263
|
+
frt_fq_min_sim(VALUE self)
|
1264
|
+
{
|
1265
|
+
GET_Q();
|
1266
|
+
return rb_float_new((double)((FuzzyQuery *)q)->min_sim);
|
1267
|
+
}
|
1268
|
+
|
1243
1269
|
/*
|
1244
1270
|
* call-seq:
|
1245
1271
|
* FuzzyQuery.default_min_similarity -> number
|
@@ -1252,6 +1278,7 @@ frt_fq_get_dms(VALUE self)
|
|
1252
1278
|
return rb_cvar_get(cFuzzyQuery, id_default_min_similarity);
|
1253
1279
|
}
|
1254
1280
|
|
1281
|
+
extern float qp_default_fuzzy_min_sim;
|
1255
1282
|
/*
|
1256
1283
|
* call-seq:
|
1257
1284
|
* FuzzyQuery.default_min_similarity = min_sim -> min_sim
|
@@ -1269,6 +1296,7 @@ frt_fq_set_dms(VALUE self, VALUE val)
|
|
1269
1296
|
rb_raise(rb_eArgError,
|
1270
1297
|
"%f < 0.0. :min_similarity must be > 0.0", min_sim);
|
1271
1298
|
}
|
1299
|
+
qp_default_fuzzy_min_sim = (float)min_sim;
|
1272
1300
|
rb_cvar_set(cFuzzyQuery, id_default_min_similarity, val, Qfalse);
|
1273
1301
|
return val;
|
1274
1302
|
}
|
@@ -1285,6 +1313,7 @@ frt_fq_get_dpl(VALUE self)
|
|
1285
1313
|
return rb_cvar_get(cFuzzyQuery, id_default_prefix_length);
|
1286
1314
|
}
|
1287
1315
|
|
1316
|
+
extern int qp_default_fuzzy_pre_len;
|
1288
1317
|
/*
|
1289
1318
|
* call-seq:
|
1290
1319
|
* FuzzyQuery.default_prefix_length = prefix_length -> prefix_length
|
@@ -1294,15 +1323,17 @@ frt_fq_get_dpl(VALUE self)
|
|
1294
1323
|
static VALUE
|
1295
1324
|
frt_fq_set_dpl(VALUE self, VALUE val)
|
1296
1325
|
{
|
1297
|
-
int pre_len =
|
1326
|
+
int pre_len = FIX2INT(val);
|
1298
1327
|
if (pre_len < 0) {
|
1299
1328
|
rb_raise(rb_eArgError,
|
1300
1329
|
"%d < 0. :prefix_length must be >= 0", pre_len);
|
1301
1330
|
}
|
1331
|
+
qp_default_fuzzy_pre_len = pre_len;
|
1302
1332
|
rb_cvar_set(cFuzzyQuery, id_default_prefix_length, val, Qfalse);
|
1303
1333
|
return val;
|
1304
1334
|
}
|
1305
1335
|
|
1336
|
+
|
1306
1337
|
/****************************************************************************
|
1307
1338
|
*
|
1308
1339
|
* MatchAllQuery Methods
|
@@ -3159,7 +3190,9 @@ Init_FuzzyQuery(void)
|
|
3159
3190
|
rb_define_singleton_method(cFuzzyQuery, "default_prefix_length=",
|
3160
3191
|
frt_fq_set_dpl, 1);
|
3161
3192
|
|
3162
|
-
rb_define_method(cFuzzyQuery, "initialize",
|
3193
|
+
rb_define_method(cFuzzyQuery, "initialize", frt_fq_init, -1);
|
3194
|
+
rb_define_method(cFuzzyQuery, "prefix_length", frt_fq_pre_len, 0);
|
3195
|
+
rb_define_method(cFuzzyQuery, "min_similarity", frt_fq_min_sim, 0);
|
3163
3196
|
}
|
3164
3197
|
|
3165
3198
|
/*
|
data/lib/ferret/index.rb
CHANGED
@@ -684,7 +684,7 @@ module Ferret::Index
|
|
684
684
|
@qp = Ferret::QueryParser.new(@options)
|
685
685
|
end
|
686
686
|
# we need to set this ever time, in case a new field has been added
|
687
|
-
@qp.fields = @reader.field_names
|
687
|
+
@qp.fields = @reader.field_names unless options[:all_fields]
|
688
688
|
query = @qp.parse(query)
|
689
689
|
end
|
690
690
|
return query
|
data/lib/ferret_version.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.10.
|
7
|
-
date: 2006-09-
|
6
|
+
version: 0.10.6
|
7
|
+
date: 2006-09-21 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|