ferret 0.11.4 → 0.11.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. data/Rakefile +1 -0
  2. data/TUTORIAL +3 -3
  3. data/ext/analysis.c +12 -9
  4. data/ext/array.c +10 -10
  5. data/ext/array.h +8 -1
  6. data/ext/bitvector.c +2 -2
  7. data/ext/except.c +1 -1
  8. data/ext/ferret.c +2 -2
  9. data/ext/ferret.h +1 -1
  10. data/ext/fs_store.c +13 -2
  11. data/ext/global.c +4 -4
  12. data/ext/global.h +6 -0
  13. data/ext/hash.c +1 -1
  14. data/ext/helper.c +1 -1
  15. data/ext/helper.h +1 -1
  16. data/ext/index.c +48 -22
  17. data/ext/index.h +17 -16
  18. data/ext/mempool.c +4 -1
  19. data/ext/mempool.h +1 -1
  20. data/ext/multimapper.c +2 -2
  21. data/ext/q_fuzzy.c +2 -2
  22. data/ext/q_multi_term.c +2 -2
  23. data/ext/q_parser.c +39 -8
  24. data/ext/q_range.c +32 -1
  25. data/ext/r_analysis.c +66 -28
  26. data/ext/r_index.c +18 -19
  27. data/ext/r_qparser.c +21 -6
  28. data/ext/r_search.c +74 -49
  29. data/ext/r_store.c +1 -1
  30. data/ext/r_utils.c +17 -17
  31. data/ext/search.c +10 -5
  32. data/ext/search.h +3 -1
  33. data/ext/sort.c +2 -2
  34. data/ext/stopwords.c +23 -34
  35. data/ext/store.c +9 -9
  36. data/ext/store.h +5 -4
  37. data/lib/ferret/document.rb +2 -2
  38. data/lib/ferret/field_infos.rb +37 -35
  39. data/lib/ferret/index.rb +16 -6
  40. data/lib/ferret/number_tools.rb +2 -2
  41. data/lib/ferret_version.rb +1 -1
  42. data/test/unit/analysis/tc_token_stream.rb +40 -0
  43. data/test/unit/index/tc_index.rb +64 -101
  44. data/test/unit/index/tc_index_reader.rb +13 -0
  45. data/test/unit/largefile/tc_largefile.rb +46 -0
  46. data/test/unit/query_parser/tc_query_parser.rb +17 -1
  47. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  48. data/test/unit/search/tm_searcher.rb +27 -1
  49. data/test/unit/ts_largefile.rb +4 -0
  50. metadata +147 -144
@@ -235,10 +235,13 @@ module SearcherTests
235
235
  dir = Ferret::Store::RAMDirectory.new
236
236
  iw = Ferret::Index::IndexWriter.new(:dir => dir,
237
237
  :analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new())
238
+ long_text = "big " + "between " * 2000 + 'house'
238
239
  [
239
240
  {:field => "the words we are searching for are one and two also " +
240
241
  "sometimes looking for them as a phrase like this; one " +
241
- "two lets see how it goes"}
242
+ "two lets see how it goes"},
243
+ {:long => 'before ' * 1000 + long_text + ' after' * 1000},
244
+ {:dates => '20070505 20071230 20060920 20081111'},
242
245
  ].each {|doc| iw << doc }
243
246
  iw.close
244
247
 
@@ -354,5 +357,28 @@ module SearcherTests
354
357
  assert_equal(2, highlights.size)
355
358
  assert_equal("<b>the words</b>...", highlights[0])
356
359
  assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
360
+
361
+ # {:dates => '20070505, 20071230, 20060920, 20081111'},
362
+ [
363
+ [RangeQuery.new(:dates, :>= => '20081111'),
364
+ '20070505 20071230 20060920 <b>20081111</b>'],
365
+ [RangeQuery.new(:dates, :>= => '20070101'),
366
+ '<b>20070505</b> <b>20071230</b> 20060920 <b>20081111</b>'],
367
+ [PrefixQuery.new(:dates, '2007'),
368
+ '<b>20070505</b> <b>20071230</b> 20060920 20081111'],
369
+ ].each do |query, expected|
370
+ assert_equal([expected],
371
+ searcher.highlight(query, 2, :dates))
372
+ end
373
+
374
+ #q = PhraseQuery.new(:long) << 'big' << 'house'
375
+ #q.slop = 4000
376
+ #highlights = searcher.highlight(q, 1, :long,
377
+ # :excerpt_length => 400,
378
+ # :num_excerpts => 2)
379
+ #assert_equal(1, highlights.size)
380
+ #puts highlights[0]
381
+ #assert_equal("<b>the words</b>...", highlights[0])
382
+ #assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
357
383
  end
358
384
  end
@@ -0,0 +1,4 @@
1
+ if ENV['FERRET_DEV']
2
+ require File.join(File.dirname(__FILE__), "../test_helper.rb")
3
+ load_test_dir('unit/largefile')
4
+ end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.0
2
+ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.11.4
7
- date: 2007-04-07 00:00:00 +10:00
6
+ version: 0.11.5
7
+ date: 2007-11-17 00:00:00 +11:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib
@@ -30,194 +30,197 @@ authors:
30
30
  - David Balmain
31
31
  files:
32
32
  - setup.rb
33
- - CHANGELOG
34
- - Rakefile
35
- - TUTORIAL
36
33
  - TODO
37
- - MIT-LICENSE
34
+ - TUTORIAL
35
+ - Rakefile
36
+ - CHANGELOG
38
37
  - README
39
- - ext/q_multi_term.c
40
- - ext/r_qparser.c
41
- - ext/r_utils.c
42
- - ext/r_analysis.c
43
- - ext/r_search.c
44
- - ext/ferret.c
38
+ - MIT-LICENSE
39
+ - ext/lang.h
40
+ - ext/q_filtered_query.c
41
+ - ext/stem_ISO_8859_1_spanish.h
42
+ - ext/stem_ISO_8859_1_spanish.c
43
+ - ext/global.c
44
+ - ext/stem_UTF_8_italian.c
45
+ - ext/modules.h
46
+ - ext/stem_UTF_8_portuguese.c
47
+ - ext/stem_ISO_8859_1_dutch.c
48
+ - ext/store.h
49
+ - ext/stem_UTF_8_spanish.h
50
+ - ext/array.c
51
+ - ext/stem_ISO_8859_1_finnish.c
52
+ - ext/stem_ISO_8859_1_finnish.h
53
+ - ext/stem_UTF_8_finnish.h
54
+ - ext/stem_ISO_8859_1_italian.h
55
+ - ext/analysis.h
56
+ - ext/q_wildcard.c
57
+ - ext/except.h
58
+ - ext/stem_UTF_8_english.c
59
+ - ext/stem_ISO_8859_1_german.h
60
+ - ext/api.c
45
61
  - ext/r_index.c
46
- - ext/ferret.h
47
- - ext/r_store.c
48
62
  - ext/hashset.c
63
+ - ext/stem_ISO_8859_1_german.c
64
+ - ext/stem_ISO_8859_1_swedish.h
65
+ - ext/utilities.c
66
+ - ext/stem_UTF_8_spanish.c
67
+ - ext/stem_UTF_8_dutch.h
68
+ - ext/stem_ISO_8859_1_danish.c
49
69
  - ext/q_match_all.c
50
- - ext/bitvector.c
51
- - ext/mempool.c
52
- - ext/index.c
70
+ - ext/stem_UTF_8_german.c
53
71
  - ext/compound_io.c
54
- - ext/q_wildcard.c
55
- - ext/priorityqueue.c
56
- - ext/q_range.c
72
+ - ext/hash.h
57
73
  - ext/q_prefix.c
58
- - ext/array.c
74
+ - ext/stopwords.c
75
+ - ext/stem_ISO_8859_1_italian.c
76
+ - ext/bitvector.h
77
+ - ext/stem_ISO_8859_1_dutch.h
78
+ - ext/bitvector.c
79
+ - ext/r_analysis.c
80
+ - ext/mempool.h
81
+ - ext/config.h
82
+ - ext/stem_UTF_8_german.h
83
+ - ext/stem_UTF_8_porter.c
84
+ - ext/stem_ISO_8859_1_norwegian.c
85
+ - ext/threading.h
86
+ - ext/stem_UTF_8_norwegian.c
87
+ - ext/stem_ISO_8859_1_swedish.c
88
+ - ext/stem_UTF_8_portuguese.h
89
+ - ext/search.h
59
90
  - ext/hash.c
91
+ - ext/stem_UTF_8_swedish.c
60
92
  - ext/fs_store.c
61
- - ext/multimapper.c
62
- - ext/similarity.c
63
- - ext/except.c
64
- - ext/ram_store.c
65
- - ext/q_boolean.c
66
- - ext/q_phrase.c
67
- - ext/global.c
68
- - ext/q_filtered_query.c
69
- - ext/q_const_score.c
93
+ - ext/stem_UTF_8_french.h
94
+ - ext/stem_UTF_8_finnish.c
95
+ - ext/stem_UTF_8_danish.c
96
+ - ext/stem_UTF_8_porter.h
97
+ - ext/ferret.c
70
98
  - ext/term_vectors.c
71
- - ext/stopwords.c
72
- - ext/search.c
73
- - ext/q_fuzzy.c
74
99
  - ext/posh.c
75
- - ext/q_parser.c
76
- - ext/document.c
100
+ - ext/stem_ISO_8859_1_norwegian.h
77
101
  - ext/helper.c
78
- - ext/q_span.c
102
+ - ext/mempool.c
103
+ - ext/r_utils.c
104
+ - ext/document.c
105
+ - ext/stem_KOI8_R_russian.c
106
+ - ext/stem_KOI8_R_russian.h
107
+ - ext/r_search.c
108
+ - ext/libstemmer.c
109
+ - ext/api.h
110
+ - ext/stem_ISO_8859_1_portuguese.c
111
+ - ext/r_qparser.c
112
+ - ext/multimapper.h
113
+ - ext/stem_ISO_8859_1_french.c
114
+ - ext/stem_UTF_8_danish.h
115
+ - ext/stem_ISO_8859_1_portuguese.h
116
+ - ext/hashset.h
117
+ - ext/q_parser.c
79
118
  - ext/analysis.c
80
- - ext/filter.c
119
+ - ext/r_store.c
120
+ - ext/stem_UTF_8_swedish.h
121
+ - ext/stem_UTF_8_italian.h
122
+ - ext/q_phrase.c
81
123
  - ext/q_term.c
82
- - ext/sort.c
83
- - ext/store.c
84
- - ext/hashset.h
85
- - ext/store.h
86
- - ext/multimapper.h
87
- - ext/global.h
88
- - ext/bitvector.h
89
- - ext/win32.h
90
- - ext/config.h
91
- - ext/search.h
92
- - ext/threading.h
124
+ - ext/priorityqueue.h
125
+ - ext/libstemmer.h
126
+ - ext/filter.c
127
+ - ext/q_span.c
128
+ - ext/multimapper.c
93
129
  - ext/index.h
94
- - ext/mempool.h
95
- - ext/posh.h
96
- - ext/document.h
97
- - ext/array.h
98
130
  - ext/helper.h
99
- - ext/lang.h
100
- - ext/hash.h
101
- - ext/analysis.h
102
- - ext/priorityqueue.h
103
131
  - ext/similarity.h
104
- - ext/except.h
105
- - ext/stem_ISO_8859_1_italian.c
106
- - ext/stem_UTF_8_portuguese.c
107
- - ext/stem_UTF_8_portuguese.h
108
132
  - ext/stem_UTF_8_french.c
109
- - ext/stem_UTF_8_spanish.c
110
- - ext/stem_UTF_8_dutch.c
111
- - ext/stem_ISO_8859_1_italian.h
112
- - ext/stem_UTF_8_german.c
113
- - ext/stem_UTF_8_french.h
114
- - ext/stem_UTF_8_spanish.h
115
- - ext/stem_ISO_8859_1_english.c
116
- - ext/stem_ISO_8859_1_norwegian.c
117
- - ext/stem_UTF_8_porter.c
118
- - ext/stem_UTF_8_dutch.h
119
- - ext/stem_UTF_8_german.h
120
- - ext/stem_ISO_8859_1_english.h
121
- - ext/stem_ISO_8859_1_norwegian.h
122
- - ext/stem_UTF_8_porter.h
123
- - ext/stem_ISO_8859_1_portuguese.c
133
+ - ext/header.h
134
+ - ext/index.c
135
+ - ext/posh.h
136
+ - ext/document.h
137
+ - ext/similarity.c
124
138
  - ext/stem_UTF_8_russian.c
125
- - ext/stem_ISO_8859_1_spanish.c
126
- - ext/stem_ISO_8859_1_french.c
127
- - ext/stem_ISO_8859_1_portuguese.h
128
- - ext/stem_ISO_8859_1_dutch.c
129
- - ext/stem_UTF_8_russian.h
130
- - ext/stem_KOI8_R_russian.c
131
- - ext/stem_ISO_8859_1_german.c
132
- - ext/stem_ISO_8859_1_spanish.h
133
- - ext/stem_ISO_8859_1_french.h
139
+ - ext/win32.h
140
+ - ext/q_fuzzy.c
141
+ - ext/search.c
134
142
  - ext/stem_ISO_8859_1_porter.c
135
- - ext/stem_ISO_8859_1_dutch.h
136
- - ext/stem_UTF_8_finnish.c
137
- - ext/stem_KOI8_R_russian.h
138
- - ext/stem_ISO_8859_1_german.h
143
+ - ext/stem_UTF_8_russian.h
144
+ - ext/ram_store.c
139
145
  - ext/stem_ISO_8859_1_porter.h
140
- - ext/stem_UTF_8_finnish.h
141
- - ext/stem_UTF_8_danish.c
142
- - ext/stem_UTF_8_swedish.c
143
- - ext/stem_UTF_8_danish.h
144
- - ext/stem_UTF_8_swedish.h
145
- - ext/stem_ISO_8859_1_finnish.c
146
- - ext/stem_UTF_8_italian.c
147
- - ext/stem_ISO_8859_1_finnish.h
148
- - ext/stem_UTF_8_italian.h
149
- - ext/stem_ISO_8859_1_swedish.c
150
- - ext/stem_ISO_8859_1_danish.c
151
- - ext/stem_UTF_8_english.c
152
- - ext/stem_UTF_8_norwegian.c
153
- - ext/stem_ISO_8859_1_swedish.h
146
+ - ext/except.c
147
+ - ext/q_range.c
148
+ - ext/sort.c
149
+ - ext/q_multi_term.c
150
+ - ext/q_const_score.c
151
+ - ext/q_boolean.c
152
+ - ext/stem_UTF_8_norwegian.h
153
+ - ext/priorityqueue.c
154
+ - ext/array.h
154
155
  - ext/stem_ISO_8859_1_danish.h
156
+ - ext/store.c
155
157
  - ext/stem_UTF_8_english.h
156
- - ext/stem_UTF_8_norwegian.h
157
- - ext/utilities.c
158
- - ext/api.h
159
- - ext/api.c
160
- - ext/header.h
161
- - ext/libstemmer.c
162
- - ext/modules.h
163
- - ext/libstemmer.h
164
- - ext/inc/threading.h
158
+ - ext/stem_ISO_8859_1_english.h
159
+ - ext/stem_ISO_8859_1_french.h
160
+ - ext/stem_ISO_8859_1_english.c
161
+ - ext/ferret.h
162
+ - ext/global.h
163
+ - ext/stem_UTF_8_dutch.c
165
164
  - ext/inc/lang.h
166
- - lib/ferret.rb
165
+ - ext/inc/threading.h
167
166
  - lib/ferret_version.rb
168
167
  - lib/ferret/field_infos.rb
168
+ - lib/ferret/browser/webrick.rb
169
+ - lib/ferret/browser.rb
169
170
  - lib/ferret/document.rb
170
171
  - lib/ferret/index.rb
171
- - lib/ferret/browser.rb
172
172
  - lib/ferret/number_tools.rb
173
- - lib/ferret/browser/webrick.rb
174
- - lib/ferret/browser/views/layout.rhtml
175
- - lib/ferret/browser/views/term/index.rhtml
176
- - lib/ferret/browser/views/term/termdocs.rhtml
173
+ - lib/ferret.rb
177
174
  - lib/ferret/browser/views/home/index.rhtml
178
175
  - lib/ferret/browser/views/help/index.rhtml
179
176
  - lib/ferret/browser/views/document/list.rhtml
180
177
  - lib/ferret/browser/views/document/show.rhtml
178
+ - lib/ferret/browser/views/term/index.rhtml
179
+ - lib/ferret/browser/views/term/termdocs.rhtml
181
180
  - lib/ferret/browser/views/error/index.rhtml
181
+ - lib/ferret/browser/views/layout.rhtml
182
182
  - lib/ferret/browser/views/term-vector/index.rhtml
183
183
  - lib/ferret/browser/s/style.css
184
184
  - lib/ferret/browser/s/global.js
185
- - test/test_helper.rb
186
- - test/test_all.rb
187
- - test/unit/tc_document.rb
185
+ - test/unit/index/th_doc.rb
186
+ - test/unit/index/tc_index_writer.rb
187
+ - test/unit/index/tc_index.rb
188
+ - test/unit/index/tc_index_reader.rb
188
189
  - test/unit/ts_index.rb
189
- - test/unit/ts_store.rb
190
- - test/unit/ts_analysis.rb
191
- - test/unit/ts_utils.rb
190
+ - test/unit/tc_document.rb
192
191
  - test/unit/ts_query_parser.rb
193
- - test/unit/ts_search.rb
194
- - test/unit/utils/tc_number_tools.rb
195
192
  - test/unit/utils/tc_bit_vector.rb
196
193
  - test/unit/utils/tc_priority_queue.rb
194
+ - test/unit/utils/tc_number_tools.rb
195
+ - test/unit/ts_analysis.rb
197
196
  - test/unit/query_parser/tc_query_parser.rb
198
- - test/unit/analysis/tc_token_stream.rb
199
- - test/unit/analysis/tc_analyzer.rb
200
- - test/unit/search/tc_sort_field.rb
201
- - test/unit/search/tc_sort.rb
202
197
  - test/unit/search/tc_search_and_sort.rb
203
- - test/unit/search/tc_fuzzy_query.rb
198
+ - test/unit/search/tc_sort.rb
204
199
  - test/unit/search/tc_index_searcher.rb
205
- - test/unit/search/tc_spans.rb
200
+ - test/unit/search/tc_sort_field.rb
201
+ - test/unit/search/tm_searcher.rb
202
+ - test/unit/search/tc_fuzzy_query.rb
206
203
  - test/unit/search/tc_filter.rb
204
+ - test/unit/search/tc_multiple_search_requests.rb
205
+ - test/unit/search/tc_spans.rb
207
206
  - test/unit/search/tc_multi_searcher.rb
208
- - test/unit/search/tm_searcher.rb
209
- - test/unit/index/tc_index.rb
210
- - test/unit/index/th_doc.rb
211
- - test/unit/index/tc_index_reader.rb
212
- - test/unit/index/tc_index_writer.rb
213
- - test/unit/store/tc_ram_store.rb
214
- - test/unit/store/tm_store_lock.rb
207
+ - test/unit/ts_search.rb
208
+ - test/unit/ts_store.rb
215
209
  - test/unit/store/tc_fs_store.rb
216
210
  - test/unit/store/tm_store.rb
211
+ - test/unit/store/tm_store_lock.rb
212
+ - test/unit/store/tc_ram_store.rb
213
+ - test/unit/ts_utils.rb
214
+ - test/unit/analysis/tc_token_stream.rb
215
+ - test/unit/analysis/tc_analyzer.rb
216
+ - test/unit/ts_largefile.rb
217
+ - test/unit/largefile/tc_largefile.rb
218
+ - test/test_all.rb
217
219
  - test/threading/thread_safety_index_test.rb
218
- - test/threading/thread_safety_test.rb
219
- - test/threading/thread_safety_read_write_test.rb
220
220
  - test/threading/number_to_spoken.rb
221
+ - test/threading/thread_safety_read_write_test.rb
222
+ - test/threading/thread_safety_test.rb
223
+ - test/test_helper.rb
221
224
  test_files: []
222
225
 
223
226
  rdoc_options:
@@ -233,11 +236,11 @@ extra_rdoc_files:
233
236
  - TODO
234
237
  - TUTORIAL
235
238
  - MIT-LICENSE
236
- - ext/r_qparser.c
237
- - ext/r_utils.c
239
+ - ext/r_index.c
238
240
  - ext/r_analysis.c
241
+ - ext/r_utils.c
239
242
  - ext/r_search.c
240
- - ext/r_index.c
243
+ - ext/r_qparser.c
241
244
  - ext/r_store.c
242
245
  - ext/ferret.c
243
246
  executables: