ferret 0.10.5 → 0.10.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TUTORIAL +95 -70
- data/ext/q_multi_term.c +5 -3
- data/ext/q_parser.c +60 -57
- data/ext/r_qparser.c +1 -0
- data/ext/r_search.c +35 -2
- data/lib/ferret/index.rb +1 -1
- data/lib/ferret_version.rb +1 -1
- metadata +2 -2
data/TUTORIAL
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
= Quick Introduction to Ferret
|
2
2
|
|
3
3
|
The simplest way to use Ferret is through the Ferret::Index::Index class.
|
4
|
-
|
4
|
+
This is now aliased by Ferret::I for quick and easy access. Start by including
|
5
|
+
the Ferret module.
|
5
6
|
|
6
7
|
require 'ferret'
|
7
8
|
include Ferret
|
@@ -41,32 +42,32 @@ could probably just use SimpleSearch. So let's give our documents some fields;
|
|
41
42
|
index << {:title => "Programming Ruby", :content => "blah blah blah"}
|
42
43
|
index << {:title => "Programming Ruby", :content => "yada yada yada"}
|
43
44
|
|
44
|
-
|
45
|
-
|
45
|
+
Note the way that all field-names are Symbols. Although Strings will work,
|
46
|
+
this is a best-practice in Ferret. Or if you are indexing data stored in a
|
47
|
+
database, you'll probably want to store the id;
|
46
48
|
|
47
49
|
index << {:id => row.id, :title => row.title, :date => row.date}
|
48
50
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
include Ferret::Document
|
61
|
-
doc = Document.new
|
62
|
-
doc << Field.new("id", row.id, Field::Store::NO, Field::Index::UNTOKENIZED)
|
63
|
-
doc << Field.new("title", row.title, Field::Store::YES, Field::Index::UNTOKENIZED)
|
64
|
-
doc << Field.new("data", row.data, Field::Store::YES, Field::Index::TOKENIZED)
|
65
|
-
doc << Field.new("image", row.image, Field::Store::YES, Field::Index::NO)
|
66
|
-
index << doc
|
51
|
+
So far we have been storing and tokenizing all of the input data along with
|
52
|
+
term vectors. If we want to change this we need to change the way we setup the
|
53
|
+
index. You must create a FieldInfos object describing the index:
|
54
|
+
|
55
|
+
field_infos = FieldInfos.new(:store => :no,
|
56
|
+
:index => :untokenized_omit_norms,
|
57
|
+
:term_vector => :no)
|
58
|
+
|
59
|
+
The values that you set FieldInfos to have will be used by default by all
|
60
|
+
fields. If you want to change the properties for specific fields, you need to
|
61
|
+
add a FieldInfo to field_infos.
|
67
62
|
|
68
|
-
|
69
|
-
|
63
|
+
field_infos.add_field(:title, :store => :yes, :index => :yes, :boost => 10.0)
|
64
|
+
field_infos.add_field(:content, :store => :yes,
|
65
|
+
:index => :yes,
|
66
|
+
:term_vector => :with_positions_offsets)
|
67
|
+
|
68
|
+
If you need to add a field to an already open index you do so like this:
|
69
|
+
|
70
|
+
index.field_infos.add_field(:new_field, :store => :yes)
|
70
71
|
|
71
72
|
=== Searching
|
72
73
|
|
@@ -76,23 +77,23 @@ Index#search_each. The first method returns a Ferret::Index::TopDocs object.
|
|
76
77
|
The second we'll show here. Lets say we wanted to find all documents with the
|
77
78
|
phrase "quick brown fox" in the content field. We'd write;
|
78
79
|
|
79
|
-
index.search_each('content:"quick brown fox"') do |
|
80
|
-
puts "Document #{
|
80
|
+
index.search_each('content:"quick brown fox"') do |id, score|
|
81
|
+
puts "Document #{id} found with a score of #{score}"
|
81
82
|
end
|
82
83
|
|
83
84
|
But "fast" has a pretty similar meaning to "quick" and we don't mind if the
|
84
85
|
fox is a little red. Also, the phrase could be in the title so we'll search
|
85
86
|
there as well. So we could expand our search like this;
|
86
87
|
|
87
|
-
index.search_each('title|content:"quick|fast brown|red fox"') do |
|
88
|
-
puts "Document #{
|
88
|
+
index.search_each('title|content:"quick|fast brown|red fox"') do |id, score|
|
89
|
+
puts "Document #{id} found with a score of #{score}"
|
89
90
|
end
|
90
91
|
|
91
92
|
What if we want to find all documents entered on or after 5th of September,
|
92
93
|
2005 with the words "ruby" or "rails" in any field. We could type something like;
|
93
94
|
|
94
|
-
index.search_each('date:( >= 20050905) *:(ruby OR rails)') do |
|
95
|
-
puts "Document #{
|
95
|
+
index.search_each('date:( >= 20050905) *:(ruby OR rails)') do |id, score|
|
96
|
+
puts "Document #{index[id][:title]} found with a score of #{score}"
|
96
97
|
end
|
97
98
|
|
98
99
|
Ferret has quite a complex query language. To find out more about Ferret's
|
@@ -100,40 +101,72 @@ query language, see Ferret::QueryParser. You can also construct even more
|
|
100
101
|
complex queries like Ferret::Search::Spans by hand. See Ferret::Search::Query
|
101
102
|
for more information.
|
102
103
|
|
104
|
+
=== Highlighting
|
105
|
+
|
106
|
+
Ferret now has a super-fast highlighting method. See
|
107
|
+
Ferret::Index::Index#highlight. Here is an example of how you would use it
|
108
|
+
when printing to the console:
|
109
|
+
|
110
|
+
index.search_each('date:( >= 20050905) content:(ruby OR rails)') do |id, score|
|
111
|
+
puts "Document #{index[id][:title]} found with a score of #{score}"
|
112
|
+
highlights = index.highlight("content:(ruby OR rails)", 0,
|
113
|
+
:field => :content,
|
114
|
+
:pre_tag = "\033[36m",
|
115
|
+
:post_tag = "\033[m")
|
116
|
+
puts highlights
|
117
|
+
end
|
118
|
+
|
119
|
+
And if you want to highlight a whole document, set :excert_length to :all:
|
120
|
+
|
121
|
+
puts index.highlight(query, doc_id,
|
122
|
+
:field => :content,
|
123
|
+
:pre_tag = "\033[36m",
|
124
|
+
:post_tag = "\033[m",
|
125
|
+
:excerpt_length => :all)
|
126
|
+
|
103
127
|
=== Accessing Documents
|
104
128
|
|
105
|
-
You may have noticed that when we run a search we only get the document
|
129
|
+
You may have noticed that when we run a search we only get the document id
|
106
130
|
back. By itself this isn't much use to us. Getting the data from the index is
|
107
|
-
very straightforward. For example if we want the title field form the 3rd
|
131
|
+
very straightforward. For example if we want the :title field form the 3rd
|
108
132
|
document type;
|
109
133
|
|
110
|
-
index[2][
|
134
|
+
index[2][:title]
|
135
|
+
|
136
|
+
Documents are lazy loading so if you try this:
|
111
137
|
|
112
|
-
|
138
|
+
puts index[2]
|
113
139
|
|
114
|
-
|
115
|
-
|
140
|
+
You will always get an empty Hash. To load all fields, call the load method:
|
141
|
+
|
142
|
+
puts index[2].load
|
143
|
+
|
144
|
+
NOTE: documents are indexed from 0. You can also use array-like index
|
145
|
+
parameters to access index. For example
|
146
|
+
|
147
|
+
index[1..4]
|
148
|
+
index[10, 10]
|
149
|
+
index[-5]
|
150
|
+
|
151
|
+
The default field is :id (although you can change this with index's
|
152
|
+
:default_create_field parameter);
|
116
153
|
|
117
154
|
index << "This is a document"
|
118
|
-
index[0][
|
155
|
+
index[0][:id]
|
119
156
|
|
120
157
|
Let's go back to the database example above. If we store all of our documents
|
121
158
|
with an id then we can access that field using the id. As long as we called
|
122
|
-
our id field
|
123
|
-
|
124
|
-
id = "89721347"
|
125
|
-
index[id]["title"]
|
126
|
-
|
127
|
-
If however we called our id field "key" we'll have to do this;
|
159
|
+
our id field :id we can do this
|
128
160
|
|
129
|
-
|
130
|
-
index[id]["title"]
|
161
|
+
index["89721347"]["title"]
|
131
162
|
|
132
163
|
Pretty simple huh? You should note though that if there are more then one
|
133
164
|
document with the same *id* or *key* then only the first one will be returned
|
134
|
-
so it is probably better that you ensure the key is unique somehow.
|
135
|
-
|
136
|
-
|
165
|
+
so it is probably better that you ensure the key is unique somehow. By setting
|
166
|
+
Index's :key attribute to :id, Ferret will do this automatically for you. It
|
167
|
+
can even handle multiple field primary keys. For example, you could set to
|
168
|
+
:key to [:id, :model] and Ferret would keep the documents unique for that pair
|
169
|
+
of fields.
|
137
170
|
|
138
171
|
=== Modifying and Deleting Documents
|
139
172
|
|
@@ -147,35 +180,33 @@ document;
|
|
147
180
|
|
148
181
|
index << {:title => "Programing Rbuy", :content => "blah blah blah"}
|
149
182
|
doc_num = nil
|
150
|
-
index.
|
151
|
-
return unless
|
152
|
-
doc = index[
|
153
|
-
index.delete(
|
183
|
+
index.search_each('title:"Programing Rbuy"') {|id, score| doc_id = id}
|
184
|
+
return unless doc_id
|
185
|
+
doc = index[doc_id]
|
186
|
+
index.delete(doc_id)
|
154
187
|
|
155
|
-
# modify doc
|
156
|
-
doc[
|
188
|
+
# modify doc. It is just a Hash afterall
|
189
|
+
doc[:title] = "Programming Ruby"
|
157
190
|
|
158
191
|
index << doc
|
159
192
|
|
160
|
-
|
161
|
-
|
162
|
-
|
193
|
+
If you set the :key parameter as described in the last section there is no
|
194
|
+
need to delete the document. It will be automatically deleted when you add
|
195
|
+
another document with the same key.
|
196
|
+
|
197
|
+
Also, we can use the id field, as above, to delete documents. This time though
|
198
|
+
every document that matches the id will be deleted. Again, it is probably a
|
199
|
+
good idea if you somehow ensure that your *ids* are kept unique.
|
163
200
|
|
164
201
|
id = "23453422"
|
165
202
|
index.delete(id)
|
166
203
|
|
167
|
-
Or;
|
168
|
-
|
169
|
-
id = Index::Term.new("key", "23452345")
|
170
|
-
index.delete(id)
|
171
|
-
|
172
204
|
=== Onwards
|
173
205
|
|
174
206
|
This is just a small sampling of what Ferret allows you to do. Ferret, like
|
175
207
|
Lucene, is designed to be extended, and allows you to construct your own query
|
176
|
-
types, analyzers, and so on.
|
177
|
-
|
178
|
-
your own. For now you can look in the following places for more documentation;
|
208
|
+
types, analyzers, and so on. Going onwards you should check out the following
|
209
|
+
documentation:
|
179
210
|
|
180
211
|
* Ferret::Analysis: for more information on how the data is processed when it
|
181
212
|
is tokenized. There are a number of things you can do with your data such as
|
@@ -188,12 +219,6 @@ your own. For now you can look in the following places for more documentation;
|
|
188
219
|
your own. You may however want to take advantage of the sorting or filtering
|
189
220
|
abilities of Ferret to present your data the best way you see fit.
|
190
221
|
|
191
|
-
* Ferret::Document: to find out how to create documents. This part of Ferret
|
192
|
-
is relatively straightforward. The main thing that we haven't gone into here
|
193
|
-
is the use of term vectors. These allow you to store and retrieve the
|
194
|
-
positions and offsets of the data which can be very useful in document
|
195
|
-
comparison amoung other things. == More information
|
196
|
-
|
197
222
|
* Ferret::QueryParser: if you want to find out more about what you can do with
|
198
223
|
Ferret's Query Parser, this is the place to look. The query parser is one
|
199
224
|
area that could use a bit of work so please send your suggestions.
|
data/ext/q_multi_term.c
CHANGED
@@ -474,6 +474,7 @@ Explanation *multi_tw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
474
474
|
static Weight *multi_tw_new(Query *query, Searcher *searcher)
|
475
475
|
{
|
476
476
|
int i;
|
477
|
+
int doc_freq = 0;
|
477
478
|
Weight *self = w_new(Weight, query);
|
478
479
|
const char *field = MTQ(query)->field;
|
479
480
|
PriorityQueue *bt_pq = MTQ(query)->boosted_terms;
|
@@ -487,10 +488,11 @@ static Weight *multi_tw_new(Query *query, Searcher *searcher)
|
|
487
488
|
self->idf = 0.0;
|
488
489
|
|
489
490
|
for (i = bt_pq->size; i > 0; i--) {
|
490
|
-
|
491
|
-
|
492
|
-
searcher);
|
491
|
+
doc_freq += searcher->doc_freq(searcher, field,
|
492
|
+
((BoostedTerm *)bt_pq->heap[i])->term);
|
493
493
|
}
|
494
|
+
self->idf += sim_idf(self->similarity, doc_freq,
|
495
|
+
searcher->max_doc(searcher));
|
494
496
|
|
495
497
|
return self;
|
496
498
|
}
|
data/ext/q_parser.c
CHANGED
@@ -102,6 +102,9 @@ typedef struct BCArray {
|
|
102
102
|
BooleanClause **clauses;
|
103
103
|
} BCArray;
|
104
104
|
|
105
|
+
float qp_default_fuzzy_min_sim = 0.5;
|
106
|
+
int qp_default_fuzzy_pre_len = 0;
|
107
|
+
|
105
108
|
|
106
109
|
|
107
110
|
/* Enabling traces. */
|
@@ -123,7 +126,7 @@ typedef struct BCArray {
|
|
123
126
|
#endif
|
124
127
|
|
125
128
|
#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED)
|
126
|
-
#line
|
129
|
+
#line 26 "src/q_parser.y"
|
127
130
|
typedef union YYSTYPE {
|
128
131
|
Query *query;
|
129
132
|
BooleanClause *bcls;
|
@@ -133,7 +136,7 @@ typedef union YYSTYPE {
|
|
133
136
|
char *str;
|
134
137
|
} YYSTYPE;
|
135
138
|
/* Line 196 of yacc.c. */
|
136
|
-
#line
|
139
|
+
#line 140 "y.tab.c"
|
137
140
|
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
|
138
141
|
# define YYSTYPE_IS_DECLARED 1
|
139
142
|
# define YYSTYPE_IS_TRIVIAL 1
|
@@ -142,7 +145,7 @@ typedef union YYSTYPE {
|
|
142
145
|
|
143
146
|
|
144
147
|
/* Copy the second part of user declarations. */
|
145
|
-
#line
|
148
|
+
#line 34 "src/q_parser.y"
|
146
149
|
|
147
150
|
static int yylex(YYSTYPE *lvalp, QParser *qp);
|
148
151
|
static int yyerror(QParser *qp, char const *msg);
|
@@ -197,7 +200,7 @@ static Query *get_r_q(QParser *qp, char *field, char *from, char *to,
|
|
197
200
|
|
198
201
|
|
199
202
|
/* Line 219 of yacc.c. */
|
200
|
-
#line
|
203
|
+
#line 204 "y.tab.c"
|
201
204
|
|
202
205
|
#if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
|
203
206
|
# define YYSIZE_T __SIZE_TYPE__
|
@@ -436,12 +439,12 @@ static const yysigned_char yyrhs[] =
|
|
436
439
|
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
|
437
440
|
static const unsigned char yyrline[] =
|
438
441
|
{
|
439
|
-
0,
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
442
|
+
0, 102, 102, 103, 105, 106, 107, 108, 110, 111,
|
443
|
+
112, 114, 115, 117, 118, 119, 120, 121, 122, 124,
|
444
|
+
125, 126, 128, 130, 130, 132, 132, 132, 135, 136,
|
445
|
+
138, 139, 140, 141, 143, 144, 145, 146, 147, 149,
|
446
|
+
150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
|
447
|
+
160
|
445
448
|
};
|
446
449
|
#endif
|
447
450
|
|
@@ -1249,217 +1252,217 @@ yyreduce:
|
|
1249
1252
|
switch (yyn)
|
1250
1253
|
{
|
1251
1254
|
case 2:
|
1252
|
-
#line
|
1255
|
+
#line 102 "src/q_parser.y"
|
1253
1256
|
{ qp->result = (yyval.query) = NULL; }
|
1254
1257
|
break;
|
1255
1258
|
|
1256
1259
|
case 3:
|
1257
|
-
#line
|
1260
|
+
#line 103 "src/q_parser.y"
|
1258
1261
|
{ qp->result = (yyval.query) = get_bool_q((yyvsp[0].bclss)); }
|
1259
1262
|
break;
|
1260
1263
|
|
1261
1264
|
case 4:
|
1262
|
-
#line
|
1265
|
+
#line 105 "src/q_parser.y"
|
1263
1266
|
{ (yyval.bclss) = first_cls((yyvsp[0].bcls)); }
|
1264
1267
|
break;
|
1265
1268
|
|
1266
1269
|
case 5:
|
1267
|
-
#line
|
1270
|
+
#line 106 "src/q_parser.y"
|
1268
1271
|
{ (yyval.bclss) = add_and_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
|
1269
1272
|
break;
|
1270
1273
|
|
1271
1274
|
case 6:
|
1272
|
-
#line
|
1275
|
+
#line 107 "src/q_parser.y"
|
1273
1276
|
{ (yyval.bclss) = add_or_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
|
1274
1277
|
break;
|
1275
1278
|
|
1276
1279
|
case 7:
|
1277
|
-
#line
|
1280
|
+
#line 108 "src/q_parser.y"
|
1278
1281
|
{ (yyval.bclss) = add_default_cls(qp, (yyvsp[-1].bclss), (yyvsp[0].bcls)); }
|
1279
1282
|
break;
|
1280
1283
|
|
1281
1284
|
case 8:
|
1282
|
-
#line
|
1285
|
+
#line 110 "src/q_parser.y"
|
1283
1286
|
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST); }
|
1284
1287
|
break;
|
1285
1288
|
|
1286
1289
|
case 9:
|
1287
|
-
#line
|
1290
|
+
#line 111 "src/q_parser.y"
|
1288
1291
|
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST_NOT); }
|
1289
1292
|
break;
|
1290
1293
|
|
1291
1294
|
case 10:
|
1292
|
-
#line
|
1295
|
+
#line 112 "src/q_parser.y"
|
1293
1296
|
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_SHOULD); }
|
1294
1297
|
break;
|
1295
1298
|
|
1296
1299
|
case 12:
|
1297
|
-
#line
|
1300
|
+
#line 115 "src/q_parser.y"
|
1298
1301
|
{ if ((yyvsp[-2].query)) sscanf((yyvsp[0].str),"%f",&((yyvsp[-2].query)->boost)); (yyval.query)=(yyvsp[-2].query); }
|
1299
1302
|
break;
|
1300
1303
|
|
1301
1304
|
case 14:
|
1302
|
-
#line
|
1305
|
+
#line 118 "src/q_parser.y"
|
1303
1306
|
{ (yyval.query) = get_bool_q((yyvsp[-1].bclss)); }
|
1304
1307
|
break;
|
1305
1308
|
|
1306
1309
|
case 19:
|
1307
|
-
#line
|
1310
|
+
#line 124 "src/q_parser.y"
|
1308
1311
|
{ FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str))); }
|
1309
1312
|
break;
|
1310
1313
|
|
1311
1314
|
case 20:
|
1312
|
-
#line
|
1315
|
+
#line 125 "src/q_parser.y"
|
1313
1316
|
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str))); }
|
1314
1317
|
break;
|
1315
1318
|
|
1316
1319
|
case 21:
|
1317
|
-
#line
|
1320
|
+
#line 126 "src/q_parser.y"
|
1318
1321
|
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL)); }
|
1319
1322
|
break;
|
1320
1323
|
|
1321
1324
|
case 22:
|
1322
|
-
#line
|
1325
|
+
#line 128 "src/q_parser.y"
|
1323
1326
|
{ FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str))); }
|
1324
1327
|
break;
|
1325
1328
|
|
1326
1329
|
case 23:
|
1327
|
-
#line
|
1330
|
+
#line 130 "src/q_parser.y"
|
1328
1331
|
{ qp->fields = qp->def_fields; }
|
1329
1332
|
break;
|
1330
1333
|
|
1331
1334
|
case 24:
|
1332
|
-
#line
|
1335
|
+
#line 131 "src/q_parser.y"
|
1333
1336
|
{ (yyval.query) = (yyvsp[-1].query); }
|
1334
1337
|
break;
|
1335
1338
|
|
1336
1339
|
case 25:
|
1337
|
-
#line
|
1340
|
+
#line 132 "src/q_parser.y"
|
1338
1341
|
{ qp->fields = qp->all_fields; }
|
1339
1342
|
break;
|
1340
1343
|
|
1341
1344
|
case 26:
|
1342
|
-
#line
|
1345
|
+
#line 132 "src/q_parser.y"
|
1343
1346
|
{qp->fields = qp->def_fields;}
|
1344
1347
|
break;
|
1345
1348
|
|
1346
1349
|
case 27:
|
1347
|
-
#line
|
1350
|
+
#line 133 "src/q_parser.y"
|
1348
1351
|
{ (yyval.query) = (yyvsp[-1].query); }
|
1349
1352
|
break;
|
1350
1353
|
|
1351
1354
|
case 28:
|
1352
|
-
#line
|
1355
|
+
#line 135 "src/q_parser.y"
|
1353
1356
|
{ (yyval.hashset) = first_field(qp, (yyvsp[0].str)); }
|
1354
1357
|
break;
|
1355
1358
|
|
1356
1359
|
case 29:
|
1357
|
-
#line
|
1360
|
+
#line 136 "src/q_parser.y"
|
1358
1361
|
{ (yyval.hashset) = add_field(qp, (yyvsp[0].str));}
|
1359
1362
|
break;
|
1360
1363
|
|
1361
1364
|
case 30:
|
1362
|
-
#line
|
1365
|
+
#line 138 "src/q_parser.y"
|
1363
1366
|
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL); }
|
1364
1367
|
break;
|
1365
1368
|
|
1366
1369
|
case 31:
|
1367
|
-
#line
|
1370
|
+
#line 139 "src/q_parser.y"
|
1368
1371
|
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str)); }
|
1369
1372
|
break;
|
1370
1373
|
|
1371
1374
|
case 32:
|
1372
|
-
#line
|
1375
|
+
#line 140 "src/q_parser.y"
|
1373
1376
|
{ (yyval.query) = NULL; }
|
1374
1377
|
break;
|
1375
1378
|
|
1376
1379
|
case 33:
|
1377
|
-
#line
|
1380
|
+
#line 141 "src/q_parser.y"
|
1378
1381
|
{ (yyval.query) = NULL; }
|
1379
1382
|
break;
|
1380
1383
|
|
1381
1384
|
case 34:
|
1382
|
-
#line
|
1385
|
+
#line 143 "src/q_parser.y"
|
1383
1386
|
{ (yyval.phrase) = ph_first_word((yyvsp[0].str)); }
|
1384
1387
|
break;
|
1385
1388
|
|
1386
1389
|
case 35:
|
1387
|
-
#line
|
1390
|
+
#line 144 "src/q_parser.y"
|
1388
1391
|
{ (yyval.phrase) = ph_first_word(NULL); }
|
1389
1392
|
break;
|
1390
1393
|
|
1391
1394
|
case 36:
|
1392
|
-
#line
|
1395
|
+
#line 145 "src/q_parser.y"
|
1393
1396
|
{ (yyval.phrase) = ph_add_word((yyvsp[-1].phrase), (yyvsp[0].str)); }
|
1394
1397
|
break;
|
1395
1398
|
|
1396
1399
|
case 37:
|
1397
|
-
#line
|
1400
|
+
#line 146 "src/q_parser.y"
|
1398
1401
|
{ (yyval.phrase) = ph_add_word((yyvsp[-2].phrase), NULL); }
|
1399
1402
|
break;
|
1400
1403
|
|
1401
1404
|
case 38:
|
1402
|
-
#line
|
1405
|
+
#line 147 "src/q_parser.y"
|
1403
1406
|
{ (yyval.phrase) = ph_add_multi_word((yyvsp[-2].phrase), (yyvsp[0].str)); }
|
1404
1407
|
break;
|
1405
1408
|
|
1406
1409
|
case 39:
|
1407
|
-
#line
|
1410
|
+
#line 149 "src/q_parser.y"
|
1408
1411
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), true, true)); }
|
1409
1412
|
break;
|
1410
1413
|
|
1411
1414
|
case 40:
|
1412
|
-
#line
|
1415
|
+
#line 150 "src/q_parser.y"
|
1413
1416
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), true, false)); }
|
1414
1417
|
break;
|
1415
1418
|
|
1416
1419
|
case 41:
|
1417
|
-
#line
|
1420
|
+
#line 151 "src/q_parser.y"
|
1418
1421
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), false, true)); }
|
1419
1422
|
break;
|
1420
1423
|
|
1421
1424
|
case 42:
|
1422
|
-
#line
|
1425
|
+
#line 152 "src/q_parser.y"
|
1423
1426
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-2].str), (yyvsp[-1].str), false, false)); }
|
1424
1427
|
break;
|
1425
1428
|
|
1426
1429
|
case 43:
|
1427
|
-
#line
|
1430
|
+
#line 153 "src/q_parser.y"
|
1428
1431
|
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[-1].str), false, false)); }
|
1429
1432
|
break;
|
1430
1433
|
|
1431
1434
|
case 44:
|
1432
|
-
#line
|
1435
|
+
#line 154 "src/q_parser.y"
|
1433
1436
|
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[-1].str), false, true)); }
|
1434
1437
|
break;
|
1435
1438
|
|
1436
1439
|
case 45:
|
1437
|
-
#line
|
1440
|
+
#line 155 "src/q_parser.y"
|
1438
1441
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-1].str), NULL,true, false)); }
|
1439
1442
|
break;
|
1440
1443
|
|
1441
1444
|
case 46:
|
1442
|
-
#line
|
1445
|
+
#line 156 "src/q_parser.y"
|
1443
1446
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[-1].str), NULL,false, false)); }
|
1444
1447
|
break;
|
1445
1448
|
|
1446
1449
|
case 47:
|
1447
|
-
#line
|
1450
|
+
#line 157 "src/q_parser.y"
|
1448
1451
|
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[0].str), false, false)); }
|
1449
1452
|
break;
|
1450
1453
|
|
1451
1454
|
case 48:
|
1452
|
-
#line
|
1455
|
+
#line 158 "src/q_parser.y"
|
1453
1456
|
{ FLDS((yyval.query), get_r_q(qp, field, NULL,(yyvsp[0].str), false, true)); }
|
1454
1457
|
break;
|
1455
1458
|
|
1456
1459
|
case 49:
|
1457
|
-
#line
|
1460
|
+
#line 159 "src/q_parser.y"
|
1458
1461
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[0].str), NULL,true, false)); }
|
1459
1462
|
break;
|
1460
1463
|
|
1461
1464
|
case 50:
|
1462
|
-
#line
|
1465
|
+
#line 160 "src/q_parser.y"
|
1463
1466
|
{ FLDS((yyval.query), get_r_q(qp, field, (yyvsp[0].str), NULL,false, false)); }
|
1464
1467
|
break;
|
1465
1468
|
|
@@ -1468,7 +1471,7 @@ yyreduce:
|
|
1468
1471
|
}
|
1469
1472
|
|
1470
1473
|
/* Line 1126 of yacc.c. */
|
1471
|
-
#line
|
1474
|
+
#line 1475 "y.tab.c"
|
1472
1475
|
|
1473
1476
|
yyvsp -= yylen;
|
1474
1477
|
yyssp -= yylen;
|
@@ -1736,7 +1739,7 @@ yyreturn:
|
|
1736
1739
|
}
|
1737
1740
|
|
1738
1741
|
|
1739
|
-
#line
|
1742
|
+
#line 162 "src/q_parser.y"
|
1740
1743
|
|
1741
1744
|
|
1742
1745
|
const char *special_char = "&:()[]{}!\"~^|<>=*?+-";
|
@@ -2009,11 +2012,11 @@ static Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
|
|
2009
2012
|
}
|
2010
2013
|
else {
|
2011
2014
|
/* it only makes sense to find one term in a fuzzy query */
|
2012
|
-
float slop =
|
2015
|
+
float slop = qp_default_fuzzy_min_sim;
|
2013
2016
|
if (slop_str) {
|
2014
2017
|
sscanf(slop_str, "%f", &slop);
|
2015
2018
|
}
|
2016
|
-
q = fuzq_new_conf(field, token->text, slop,
|
2019
|
+
q = fuzq_new_conf(field, token->text, slop, qp_default_fuzzy_pre_len,
|
2017
2020
|
qp->max_clauses);
|
2018
2021
|
}
|
2019
2022
|
return q;
|
data/ext/r_qparser.c
CHANGED
data/ext/r_search.c
CHANGED
@@ -1240,6 +1240,32 @@ frt_fq_init(int argc, VALUE *argv, VALUE self)
|
|
1240
1240
|
return self;
|
1241
1241
|
}
|
1242
1242
|
|
1243
|
+
/*
|
1244
|
+
* call-seq:
|
1245
|
+
* FuzzyQuery.prefix_length -> prefix_length
|
1246
|
+
*
|
1247
|
+
* Get the +:prefix_length+ for the query.
|
1248
|
+
*/
|
1249
|
+
static VALUE
|
1250
|
+
frt_fq_pre_len(VALUE self)
|
1251
|
+
{
|
1252
|
+
GET_Q();
|
1253
|
+
return INT2FIX(((FuzzyQuery *)q)->pre_len);
|
1254
|
+
}
|
1255
|
+
|
1256
|
+
/*
|
1257
|
+
* call-seq:
|
1258
|
+
* FuzzyQuery.min_similarity -> min_similarity
|
1259
|
+
*
|
1260
|
+
* Get the +:min_similarity+ for the query.
|
1261
|
+
*/
|
1262
|
+
static VALUE
|
1263
|
+
frt_fq_min_sim(VALUE self)
|
1264
|
+
{
|
1265
|
+
GET_Q();
|
1266
|
+
return rb_float_new((double)((FuzzyQuery *)q)->min_sim);
|
1267
|
+
}
|
1268
|
+
|
1243
1269
|
/*
|
1244
1270
|
* call-seq:
|
1245
1271
|
* FuzzyQuery.default_min_similarity -> number
|
@@ -1252,6 +1278,7 @@ frt_fq_get_dms(VALUE self)
|
|
1252
1278
|
return rb_cvar_get(cFuzzyQuery, id_default_min_similarity);
|
1253
1279
|
}
|
1254
1280
|
|
1281
|
+
extern float qp_default_fuzzy_min_sim;
|
1255
1282
|
/*
|
1256
1283
|
* call-seq:
|
1257
1284
|
* FuzzyQuery.default_min_similarity = min_sim -> min_sim
|
@@ -1269,6 +1296,7 @@ frt_fq_set_dms(VALUE self, VALUE val)
|
|
1269
1296
|
rb_raise(rb_eArgError,
|
1270
1297
|
"%f < 0.0. :min_similarity must be > 0.0", min_sim);
|
1271
1298
|
}
|
1299
|
+
qp_default_fuzzy_min_sim = (float)min_sim;
|
1272
1300
|
rb_cvar_set(cFuzzyQuery, id_default_min_similarity, val, Qfalse);
|
1273
1301
|
return val;
|
1274
1302
|
}
|
@@ -1285,6 +1313,7 @@ frt_fq_get_dpl(VALUE self)
|
|
1285
1313
|
return rb_cvar_get(cFuzzyQuery, id_default_prefix_length);
|
1286
1314
|
}
|
1287
1315
|
|
1316
|
+
extern int qp_default_fuzzy_pre_len;
|
1288
1317
|
/*
|
1289
1318
|
* call-seq:
|
1290
1319
|
* FuzzyQuery.default_prefix_length = prefix_length -> prefix_length
|
@@ -1294,15 +1323,17 @@ frt_fq_get_dpl(VALUE self)
|
|
1294
1323
|
static VALUE
|
1295
1324
|
frt_fq_set_dpl(VALUE self, VALUE val)
|
1296
1325
|
{
|
1297
|
-
int pre_len =
|
1326
|
+
int pre_len = FIX2INT(val);
|
1298
1327
|
if (pre_len < 0) {
|
1299
1328
|
rb_raise(rb_eArgError,
|
1300
1329
|
"%d < 0. :prefix_length must be >= 0", pre_len);
|
1301
1330
|
}
|
1331
|
+
qp_default_fuzzy_pre_len = pre_len;
|
1302
1332
|
rb_cvar_set(cFuzzyQuery, id_default_prefix_length, val, Qfalse);
|
1303
1333
|
return val;
|
1304
1334
|
}
|
1305
1335
|
|
1336
|
+
|
1306
1337
|
/****************************************************************************
|
1307
1338
|
*
|
1308
1339
|
* MatchAllQuery Methods
|
@@ -3159,7 +3190,9 @@ Init_FuzzyQuery(void)
|
|
3159
3190
|
rb_define_singleton_method(cFuzzyQuery, "default_prefix_length=",
|
3160
3191
|
frt_fq_set_dpl, 1);
|
3161
3192
|
|
3162
|
-
rb_define_method(cFuzzyQuery, "initialize",
|
3193
|
+
rb_define_method(cFuzzyQuery, "initialize", frt_fq_init, -1);
|
3194
|
+
rb_define_method(cFuzzyQuery, "prefix_length", frt_fq_pre_len, 0);
|
3195
|
+
rb_define_method(cFuzzyQuery, "min_similarity", frt_fq_min_sim, 0);
|
3163
3196
|
}
|
3164
3197
|
|
3165
3198
|
/*
|
data/lib/ferret/index.rb
CHANGED
@@ -684,7 +684,7 @@ module Ferret::Index
|
|
684
684
|
@qp = Ferret::QueryParser.new(@options)
|
685
685
|
end
|
686
686
|
# we need to set this ever time, in case a new field has been added
|
687
|
-
@qp.fields = @reader.field_names
|
687
|
+
@qp.fields = @reader.field_names unless options[:all_fields]
|
688
688
|
query = @qp.parse(query)
|
689
689
|
end
|
690
690
|
return query
|
data/lib/ferret_version.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.10.
|
7
|
-
date: 2006-09-
|
6
|
+
version: 0.10.6
|
7
|
+
date: 2006-09-21 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|