ferret 0.11.4 → 0.11.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. data/Rakefile +1 -0
  2. data/TUTORIAL +3 -3
  3. data/ext/analysis.c +12 -9
  4. data/ext/array.c +10 -10
  5. data/ext/array.h +8 -1
  6. data/ext/bitvector.c +2 -2
  7. data/ext/except.c +1 -1
  8. data/ext/ferret.c +2 -2
  9. data/ext/ferret.h +1 -1
  10. data/ext/fs_store.c +13 -2
  11. data/ext/global.c +4 -4
  12. data/ext/global.h +6 -0
  13. data/ext/hash.c +1 -1
  14. data/ext/helper.c +1 -1
  15. data/ext/helper.h +1 -1
  16. data/ext/index.c +48 -22
  17. data/ext/index.h +17 -16
  18. data/ext/mempool.c +4 -1
  19. data/ext/mempool.h +1 -1
  20. data/ext/multimapper.c +2 -2
  21. data/ext/q_fuzzy.c +2 -2
  22. data/ext/q_multi_term.c +2 -2
  23. data/ext/q_parser.c +39 -8
  24. data/ext/q_range.c +32 -1
  25. data/ext/r_analysis.c +66 -28
  26. data/ext/r_index.c +18 -19
  27. data/ext/r_qparser.c +21 -6
  28. data/ext/r_search.c +74 -49
  29. data/ext/r_store.c +1 -1
  30. data/ext/r_utils.c +17 -17
  31. data/ext/search.c +10 -5
  32. data/ext/search.h +3 -1
  33. data/ext/sort.c +2 -2
  34. data/ext/stopwords.c +23 -34
  35. data/ext/store.c +9 -9
  36. data/ext/store.h +5 -4
  37. data/lib/ferret/document.rb +2 -2
  38. data/lib/ferret/field_infos.rb +37 -35
  39. data/lib/ferret/index.rb +16 -6
  40. data/lib/ferret/number_tools.rb +2 -2
  41. data/lib/ferret_version.rb +1 -1
  42. data/test/unit/analysis/tc_token_stream.rb +40 -0
  43. data/test/unit/index/tc_index.rb +64 -101
  44. data/test/unit/index/tc_index_reader.rb +13 -0
  45. data/test/unit/largefile/tc_largefile.rb +46 -0
  46. data/test/unit/query_parser/tc_query_parser.rb +17 -1
  47. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  48. data/test/unit/search/tm_searcher.rb +27 -1
  49. data/test/unit/ts_largefile.rb +4 -0
  50. metadata +147 -144
@@ -235,10 +235,13 @@ module SearcherTests
235
235
  dir = Ferret::Store::RAMDirectory.new
236
236
  iw = Ferret::Index::IndexWriter.new(:dir => dir,
237
237
  :analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new())
238
+ long_text = "big " + "between " * 2000 + 'house'
238
239
  [
239
240
  {:field => "the words we are searching for are one and two also " +
240
241
  "sometimes looking for them as a phrase like this; one " +
241
- "two lets see how it goes"}
242
+ "two lets see how it goes"},
243
+ {:long => 'before ' * 1000 + long_text + ' after' * 1000},
244
+ {:dates => '20070505 20071230 20060920 20081111'},
242
245
  ].each {|doc| iw << doc }
243
246
  iw.close
244
247
 
@@ -354,5 +357,28 @@ module SearcherTests
354
357
  assert_equal(2, highlights.size)
355
358
  assert_equal("<b>the words</b>...", highlights[0])
356
359
  assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
360
+
361
+ # {:dates => '20070505, 20071230, 20060920, 20081111'},
362
+ [
363
+ [RangeQuery.new(:dates, :>= => '20081111'),
364
+ '20070505 20071230 20060920 <b>20081111</b>'],
365
+ [RangeQuery.new(:dates, :>= => '20070101'),
366
+ '<b>20070505</b> <b>20071230</b> 20060920 <b>20081111</b>'],
367
+ [PrefixQuery.new(:dates, '2007'),
368
+ '<b>20070505</b> <b>20071230</b> 20060920 20081111'],
369
+ ].each do |query, expected|
370
+ assert_equal([expected],
371
+ searcher.highlight(query, 2, :dates))
372
+ end
373
+
374
+ #q = PhraseQuery.new(:long) << 'big' << 'house'
375
+ #q.slop = 4000
376
+ #highlights = searcher.highlight(q, 1, :long,
377
+ # :excerpt_length => 400,
378
+ # :num_excerpts => 2)
379
+ #assert_equal(1, highlights.size)
380
+ #puts highlights[0]
381
+ #assert_equal("<b>the words</b>...", highlights[0])
382
+ #assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
357
383
  end
358
384
  end
@@ -0,0 +1,4 @@
1
+ if ENV['FERRET_DEV']
2
+ require File.join(File.dirname(__FILE__), "../test_helper.rb")
3
+ load_test_dir('unit/largefile')
4
+ end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.0
2
+ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.11.4
7
- date: 2007-04-07 00:00:00 +10:00
6
+ version: 0.11.5
7
+ date: 2007-11-17 00:00:00 +11:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib
@@ -30,194 +30,197 @@ authors:
30
30
  - David Balmain
31
31
  files:
32
32
  - setup.rb
33
- - CHANGELOG
34
- - Rakefile
35
- - TUTORIAL
36
33
  - TODO
37
- - MIT-LICENSE
34
+ - TUTORIAL
35
+ - Rakefile
36
+ - CHANGELOG
38
37
  - README
39
- - ext/q_multi_term.c
40
- - ext/r_qparser.c
41
- - ext/r_utils.c
42
- - ext/r_analysis.c
43
- - ext/r_search.c
44
- - ext/ferret.c
38
+ - MIT-LICENSE
39
+ - ext/lang.h
40
+ - ext/q_filtered_query.c
41
+ - ext/stem_ISO_8859_1_spanish.h
42
+ - ext/stem_ISO_8859_1_spanish.c
43
+ - ext/global.c
44
+ - ext/stem_UTF_8_italian.c
45
+ - ext/modules.h
46
+ - ext/stem_UTF_8_portuguese.c
47
+ - ext/stem_ISO_8859_1_dutch.c
48
+ - ext/store.h
49
+ - ext/stem_UTF_8_spanish.h
50
+ - ext/array.c
51
+ - ext/stem_ISO_8859_1_finnish.c
52
+ - ext/stem_ISO_8859_1_finnish.h
53
+ - ext/stem_UTF_8_finnish.h
54
+ - ext/stem_ISO_8859_1_italian.h
55
+ - ext/analysis.h
56
+ - ext/q_wildcard.c
57
+ - ext/except.h
58
+ - ext/stem_UTF_8_english.c
59
+ - ext/stem_ISO_8859_1_german.h
60
+ - ext/api.c
45
61
  - ext/r_index.c
46
- - ext/ferret.h
47
- - ext/r_store.c
48
62
  - ext/hashset.c
63
+ - ext/stem_ISO_8859_1_german.c
64
+ - ext/stem_ISO_8859_1_swedish.h
65
+ - ext/utilities.c
66
+ - ext/stem_UTF_8_spanish.c
67
+ - ext/stem_UTF_8_dutch.h
68
+ - ext/stem_ISO_8859_1_danish.c
49
69
  - ext/q_match_all.c
50
- - ext/bitvector.c
51
- - ext/mempool.c
52
- - ext/index.c
70
+ - ext/stem_UTF_8_german.c
53
71
  - ext/compound_io.c
54
- - ext/q_wildcard.c
55
- - ext/priorityqueue.c
56
- - ext/q_range.c
72
+ - ext/hash.h
57
73
  - ext/q_prefix.c
58
- - ext/array.c
74
+ - ext/stopwords.c
75
+ - ext/stem_ISO_8859_1_italian.c
76
+ - ext/bitvector.h
77
+ - ext/stem_ISO_8859_1_dutch.h
78
+ - ext/bitvector.c
79
+ - ext/r_analysis.c
80
+ - ext/mempool.h
81
+ - ext/config.h
82
+ - ext/stem_UTF_8_german.h
83
+ - ext/stem_UTF_8_porter.c
84
+ - ext/stem_ISO_8859_1_norwegian.c
85
+ - ext/threading.h
86
+ - ext/stem_UTF_8_norwegian.c
87
+ - ext/stem_ISO_8859_1_swedish.c
88
+ - ext/stem_UTF_8_portuguese.h
89
+ - ext/search.h
59
90
  - ext/hash.c
91
+ - ext/stem_UTF_8_swedish.c
60
92
  - ext/fs_store.c
61
- - ext/multimapper.c
62
- - ext/similarity.c
63
- - ext/except.c
64
- - ext/ram_store.c
65
- - ext/q_boolean.c
66
- - ext/q_phrase.c
67
- - ext/global.c
68
- - ext/q_filtered_query.c
69
- - ext/q_const_score.c
93
+ - ext/stem_UTF_8_french.h
94
+ - ext/stem_UTF_8_finnish.c
95
+ - ext/stem_UTF_8_danish.c
96
+ - ext/stem_UTF_8_porter.h
97
+ - ext/ferret.c
70
98
  - ext/term_vectors.c
71
- - ext/stopwords.c
72
- - ext/search.c
73
- - ext/q_fuzzy.c
74
99
  - ext/posh.c
75
- - ext/q_parser.c
76
- - ext/document.c
100
+ - ext/stem_ISO_8859_1_norwegian.h
77
101
  - ext/helper.c
78
- - ext/q_span.c
102
+ - ext/mempool.c
103
+ - ext/r_utils.c
104
+ - ext/document.c
105
+ - ext/stem_KOI8_R_russian.c
106
+ - ext/stem_KOI8_R_russian.h
107
+ - ext/r_search.c
108
+ - ext/libstemmer.c
109
+ - ext/api.h
110
+ - ext/stem_ISO_8859_1_portuguese.c
111
+ - ext/r_qparser.c
112
+ - ext/multimapper.h
113
+ - ext/stem_ISO_8859_1_french.c
114
+ - ext/stem_UTF_8_danish.h
115
+ - ext/stem_ISO_8859_1_portuguese.h
116
+ - ext/hashset.h
117
+ - ext/q_parser.c
79
118
  - ext/analysis.c
80
- - ext/filter.c
119
+ - ext/r_store.c
120
+ - ext/stem_UTF_8_swedish.h
121
+ - ext/stem_UTF_8_italian.h
122
+ - ext/q_phrase.c
81
123
  - ext/q_term.c
82
- - ext/sort.c
83
- - ext/store.c
84
- - ext/hashset.h
85
- - ext/store.h
86
- - ext/multimapper.h
87
- - ext/global.h
88
- - ext/bitvector.h
89
- - ext/win32.h
90
- - ext/config.h
91
- - ext/search.h
92
- - ext/threading.h
124
+ - ext/priorityqueue.h
125
+ - ext/libstemmer.h
126
+ - ext/filter.c
127
+ - ext/q_span.c
128
+ - ext/multimapper.c
93
129
  - ext/index.h
94
- - ext/mempool.h
95
- - ext/posh.h
96
- - ext/document.h
97
- - ext/array.h
98
130
  - ext/helper.h
99
- - ext/lang.h
100
- - ext/hash.h
101
- - ext/analysis.h
102
- - ext/priorityqueue.h
103
131
  - ext/similarity.h
104
- - ext/except.h
105
- - ext/stem_ISO_8859_1_italian.c
106
- - ext/stem_UTF_8_portuguese.c
107
- - ext/stem_UTF_8_portuguese.h
108
132
  - ext/stem_UTF_8_french.c
109
- - ext/stem_UTF_8_spanish.c
110
- - ext/stem_UTF_8_dutch.c
111
- - ext/stem_ISO_8859_1_italian.h
112
- - ext/stem_UTF_8_german.c
113
- - ext/stem_UTF_8_french.h
114
- - ext/stem_UTF_8_spanish.h
115
- - ext/stem_ISO_8859_1_english.c
116
- - ext/stem_ISO_8859_1_norwegian.c
117
- - ext/stem_UTF_8_porter.c
118
- - ext/stem_UTF_8_dutch.h
119
- - ext/stem_UTF_8_german.h
120
- - ext/stem_ISO_8859_1_english.h
121
- - ext/stem_ISO_8859_1_norwegian.h
122
- - ext/stem_UTF_8_porter.h
123
- - ext/stem_ISO_8859_1_portuguese.c
133
+ - ext/header.h
134
+ - ext/index.c
135
+ - ext/posh.h
136
+ - ext/document.h
137
+ - ext/similarity.c
124
138
  - ext/stem_UTF_8_russian.c
125
- - ext/stem_ISO_8859_1_spanish.c
126
- - ext/stem_ISO_8859_1_french.c
127
- - ext/stem_ISO_8859_1_portuguese.h
128
- - ext/stem_ISO_8859_1_dutch.c
129
- - ext/stem_UTF_8_russian.h
130
- - ext/stem_KOI8_R_russian.c
131
- - ext/stem_ISO_8859_1_german.c
132
- - ext/stem_ISO_8859_1_spanish.h
133
- - ext/stem_ISO_8859_1_french.h
139
+ - ext/win32.h
140
+ - ext/q_fuzzy.c
141
+ - ext/search.c
134
142
  - ext/stem_ISO_8859_1_porter.c
135
- - ext/stem_ISO_8859_1_dutch.h
136
- - ext/stem_UTF_8_finnish.c
137
- - ext/stem_KOI8_R_russian.h
138
- - ext/stem_ISO_8859_1_german.h
143
+ - ext/stem_UTF_8_russian.h
144
+ - ext/ram_store.c
139
145
  - ext/stem_ISO_8859_1_porter.h
140
- - ext/stem_UTF_8_finnish.h
141
- - ext/stem_UTF_8_danish.c
142
- - ext/stem_UTF_8_swedish.c
143
- - ext/stem_UTF_8_danish.h
144
- - ext/stem_UTF_8_swedish.h
145
- - ext/stem_ISO_8859_1_finnish.c
146
- - ext/stem_UTF_8_italian.c
147
- - ext/stem_ISO_8859_1_finnish.h
148
- - ext/stem_UTF_8_italian.h
149
- - ext/stem_ISO_8859_1_swedish.c
150
- - ext/stem_ISO_8859_1_danish.c
151
- - ext/stem_UTF_8_english.c
152
- - ext/stem_UTF_8_norwegian.c
153
- - ext/stem_ISO_8859_1_swedish.h
146
+ - ext/except.c
147
+ - ext/q_range.c
148
+ - ext/sort.c
149
+ - ext/q_multi_term.c
150
+ - ext/q_const_score.c
151
+ - ext/q_boolean.c
152
+ - ext/stem_UTF_8_norwegian.h
153
+ - ext/priorityqueue.c
154
+ - ext/array.h
154
155
  - ext/stem_ISO_8859_1_danish.h
156
+ - ext/store.c
155
157
  - ext/stem_UTF_8_english.h
156
- - ext/stem_UTF_8_norwegian.h
157
- - ext/utilities.c
158
- - ext/api.h
159
- - ext/api.c
160
- - ext/header.h
161
- - ext/libstemmer.c
162
- - ext/modules.h
163
- - ext/libstemmer.h
164
- - ext/inc/threading.h
158
+ - ext/stem_ISO_8859_1_english.h
159
+ - ext/stem_ISO_8859_1_french.h
160
+ - ext/stem_ISO_8859_1_english.c
161
+ - ext/ferret.h
162
+ - ext/global.h
163
+ - ext/stem_UTF_8_dutch.c
165
164
  - ext/inc/lang.h
166
- - lib/ferret.rb
165
+ - ext/inc/threading.h
167
166
  - lib/ferret_version.rb
168
167
  - lib/ferret/field_infos.rb
168
+ - lib/ferret/browser/webrick.rb
169
+ - lib/ferret/browser.rb
169
170
  - lib/ferret/document.rb
170
171
  - lib/ferret/index.rb
171
- - lib/ferret/browser.rb
172
172
  - lib/ferret/number_tools.rb
173
- - lib/ferret/browser/webrick.rb
174
- - lib/ferret/browser/views/layout.rhtml
175
- - lib/ferret/browser/views/term/index.rhtml
176
- - lib/ferret/browser/views/term/termdocs.rhtml
173
+ - lib/ferret.rb
177
174
  - lib/ferret/browser/views/home/index.rhtml
178
175
  - lib/ferret/browser/views/help/index.rhtml
179
176
  - lib/ferret/browser/views/document/list.rhtml
180
177
  - lib/ferret/browser/views/document/show.rhtml
178
+ - lib/ferret/browser/views/term/index.rhtml
179
+ - lib/ferret/browser/views/term/termdocs.rhtml
181
180
  - lib/ferret/browser/views/error/index.rhtml
181
+ - lib/ferret/browser/views/layout.rhtml
182
182
  - lib/ferret/browser/views/term-vector/index.rhtml
183
183
  - lib/ferret/browser/s/style.css
184
184
  - lib/ferret/browser/s/global.js
185
- - test/test_helper.rb
186
- - test/test_all.rb
187
- - test/unit/tc_document.rb
185
+ - test/unit/index/th_doc.rb
186
+ - test/unit/index/tc_index_writer.rb
187
+ - test/unit/index/tc_index.rb
188
+ - test/unit/index/tc_index_reader.rb
188
189
  - test/unit/ts_index.rb
189
- - test/unit/ts_store.rb
190
- - test/unit/ts_analysis.rb
191
- - test/unit/ts_utils.rb
190
+ - test/unit/tc_document.rb
192
191
  - test/unit/ts_query_parser.rb
193
- - test/unit/ts_search.rb
194
- - test/unit/utils/tc_number_tools.rb
195
192
  - test/unit/utils/tc_bit_vector.rb
196
193
  - test/unit/utils/tc_priority_queue.rb
194
+ - test/unit/utils/tc_number_tools.rb
195
+ - test/unit/ts_analysis.rb
197
196
  - test/unit/query_parser/tc_query_parser.rb
198
- - test/unit/analysis/tc_token_stream.rb
199
- - test/unit/analysis/tc_analyzer.rb
200
- - test/unit/search/tc_sort_field.rb
201
- - test/unit/search/tc_sort.rb
202
197
  - test/unit/search/tc_search_and_sort.rb
203
- - test/unit/search/tc_fuzzy_query.rb
198
+ - test/unit/search/tc_sort.rb
204
199
  - test/unit/search/tc_index_searcher.rb
205
- - test/unit/search/tc_spans.rb
200
+ - test/unit/search/tc_sort_field.rb
201
+ - test/unit/search/tm_searcher.rb
202
+ - test/unit/search/tc_fuzzy_query.rb
206
203
  - test/unit/search/tc_filter.rb
204
+ - test/unit/search/tc_multiple_search_requests.rb
205
+ - test/unit/search/tc_spans.rb
207
206
  - test/unit/search/tc_multi_searcher.rb
208
- - test/unit/search/tm_searcher.rb
209
- - test/unit/index/tc_index.rb
210
- - test/unit/index/th_doc.rb
211
- - test/unit/index/tc_index_reader.rb
212
- - test/unit/index/tc_index_writer.rb
213
- - test/unit/store/tc_ram_store.rb
214
- - test/unit/store/tm_store_lock.rb
207
+ - test/unit/ts_search.rb
208
+ - test/unit/ts_store.rb
215
209
  - test/unit/store/tc_fs_store.rb
216
210
  - test/unit/store/tm_store.rb
211
+ - test/unit/store/tm_store_lock.rb
212
+ - test/unit/store/tc_ram_store.rb
213
+ - test/unit/ts_utils.rb
214
+ - test/unit/analysis/tc_token_stream.rb
215
+ - test/unit/analysis/tc_analyzer.rb
216
+ - test/unit/ts_largefile.rb
217
+ - test/unit/largefile/tc_largefile.rb
218
+ - test/test_all.rb
217
219
  - test/threading/thread_safety_index_test.rb
218
- - test/threading/thread_safety_test.rb
219
- - test/threading/thread_safety_read_write_test.rb
220
220
  - test/threading/number_to_spoken.rb
221
+ - test/threading/thread_safety_read_write_test.rb
222
+ - test/threading/thread_safety_test.rb
223
+ - test/test_helper.rb
221
224
  test_files: []
222
225
 
223
226
  rdoc_options:
@@ -233,11 +236,11 @@ extra_rdoc_files:
233
236
  - TODO
234
237
  - TUTORIAL
235
238
  - MIT-LICENSE
236
- - ext/r_qparser.c
237
- - ext/r_utils.c
239
+ - ext/r_index.c
238
240
  - ext/r_analysis.c
241
+ - ext/r_utils.c
239
242
  - ext/r_search.c
240
- - ext/r_index.c
243
+ - ext/r_qparser.c
241
244
  - ext/r_store.c
242
245
  - ext/ferret.c
243
246
  executables: