gonzui 1.2-x86-mswin32-60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS.txt +9 -0
- data/History.txt +5539 -0
- data/Manifest.txt +115 -0
- data/PostInstall.txt +17 -0
- data/README.rdoc +149 -0
- data/Rakefile +28 -0
- data/bin/gonzui-db +167 -0
- data/bin/gonzui-import +177 -0
- data/bin/gonzui-remove +58 -0
- data/bin/gonzui-search +68 -0
- data/bin/gonzui-server +176 -0
- data/bin/gonzui-update +53 -0
- data/data/gonzui/catalog/catalog.ja +80 -0
- data/data/gonzui/doc/favicon.ico +0 -0
- data/data/gonzui/doc/folder.png +0 -0
- data/data/gonzui/doc/gonzui.css +279 -0
- data/data/gonzui/doc/gonzui.js +111 -0
- data/data/gonzui/doc/text.png +0 -0
- data/data/gonzuirc.sample +29 -0
- data/ext/autopack/autopack.c +88 -0
- data/ext/autopack/extconf.rb +3 -0
- data/ext/delta/delta.c +147 -0
- data/ext/delta/extconf.rb +5 -0
- data/ext/texttokenizer/extconf.rb +5 -0
- data/ext/texttokenizer/texttokenizer.c +93 -0
- data/ext/xmlformatter/extconf.rb +5 -0
- data/ext/xmlformatter/xmlformatter.c +207 -0
- data/lib/gonzui.rb +59 -0
- data/lib/gonzui/apt.rb +193 -0
- data/lib/gonzui/autopack.so +0 -0
- data/lib/gonzui/bdbdbm.rb +118 -0
- data/lib/gonzui/cmdapp.rb +14 -0
- data/lib/gonzui/cmdapp/app.rb +175 -0
- data/lib/gonzui/cmdapp/search.rb +134 -0
- data/lib/gonzui/config.rb +117 -0
- data/lib/gonzui/content.rb +19 -0
- data/lib/gonzui/dbm.rb +673 -0
- data/lib/gonzui/deindexer.rb +162 -0
- data/lib/gonzui/delta.rb +49 -0
- data/lib/gonzui/delta.so +0 -0
- data/lib/gonzui/extractor.rb +347 -0
- data/lib/gonzui/fetcher.rb +309 -0
- data/lib/gonzui/gettext.rb +144 -0
- data/lib/gonzui/importer.rb +84 -0
- data/lib/gonzui/indexer.rb +316 -0
- data/lib/gonzui/info.rb +80 -0
- data/lib/gonzui/license.rb +100 -0
- data/lib/gonzui/logger.rb +48 -0
- data/lib/gonzui/monitor.rb +177 -0
- data/lib/gonzui/progressbar.rb +235 -0
- data/lib/gonzui/remover.rb +38 -0
- data/lib/gonzui/searcher.rb +330 -0
- data/lib/gonzui/searchquery.rb +235 -0
- data/lib/gonzui/searchresult.rb +111 -0
- data/lib/gonzui/texttokenizer.so +0 -0
- data/lib/gonzui/updater.rb +254 -0
- data/lib/gonzui/util.rb +415 -0
- data/lib/gonzui/vcs.rb +128 -0
- data/lib/gonzui/webapp.rb +25 -0
- data/lib/gonzui/webapp/advsearch.rb +123 -0
- data/lib/gonzui/webapp/filehandler.rb +24 -0
- data/lib/gonzui/webapp/jsfeed.rb +61 -0
- data/lib/gonzui/webapp/markup.rb +445 -0
- data/lib/gonzui/webapp/search.rb +269 -0
- data/lib/gonzui/webapp/servlet.rb +319 -0
- data/lib/gonzui/webapp/snippet.rb +155 -0
- data/lib/gonzui/webapp/source.rb +37 -0
- data/lib/gonzui/webapp/stat.rb +137 -0
- data/lib/gonzui/webapp/top.rb +63 -0
- data/lib/gonzui/webapp/uri.rb +140 -0
- data/lib/gonzui/webapp/webrick.rb +48 -0
- data/lib/gonzui/webapp/xmlformatter.so +0 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/makemanifest.rb +21 -0
- data/tasks/extconf.rake +13 -0
- data/tasks/extconf/autopack.rake +43 -0
- data/tasks/extconf/delta.rake +43 -0
- data/tasks/extconf/texttokenizer.rake +43 -0
- data/tasks/extconf/xmlformatter.rake +43 -0
- data/test/_external_tools.rb +13 -0
- data/test/_test-util.rb +142 -0
- data/test/foo/Makefile.foo +66 -0
- data/test/foo/bar.c +5 -0
- data/test/foo/bar.h +6 -0
- data/test/foo/foo.c +25 -0
- data/test/foo/foo.spec +33 -0
- data/test/test_apt.rb +42 -0
- data/test/test_autopack_extn.rb +7 -0
- data/test/test_bdbdbm.rb +79 -0
- data/test/test_cmdapp-app.rb +35 -0
- data/test/test_cmdapp-search.rb +99 -0
- data/test/test_config.rb +28 -0
- data/test/test_content.rb +15 -0
- data/test/test_dbm.rb +171 -0
- data/test/test_deindexer.rb +50 -0
- data/test/test_delta.rb +66 -0
- data/test/test_extractor.rb +78 -0
- data/test/test_fetcher.rb +75 -0
- data/test/test_gettext.rb +50 -0
- data/test/test_gonzui.rb +11 -0
- data/test/test_helper.rb +10 -0
- data/test/test_importer.rb +56 -0
- data/test/test_indexer.rb +37 -0
- data/test/test_info.rb +82 -0
- data/test/test_license.rb +49 -0
- data/test/test_logger.rb +60 -0
- data/test/test_monitor.rb +23 -0
- data/test/test_searcher.rb +37 -0
- data/test/test_searchquery.rb +27 -0
- data/test/test_searchresult.rb +43 -0
- data/test/test_texttokenizer.rb +47 -0
- data/test/test_updater.rb +95 -0
- data/test/test_util.rb +149 -0
- data/test/test_vcs.rb +61 -0
- data/test/test_webapp-markup.rb +42 -0
- data/test/test_webapp-util.rb +19 -0
- data/test/test_webapp-xmlformatter.rb +19 -0
- metadata +292 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
#
|
2
|
+
# remover.rb - remove contents from gonzui.db
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Gonzui
|
13
|
+
class RemoverError < GonzuiError; end
|
14
|
+
|
15
|
+
class Remover < AbstractUpdater
|
16
|
+
private
|
17
|
+
def do_task_name
|
18
|
+
"removed"
|
19
|
+
end
|
20
|
+
|
21
|
+
public
|
22
|
+
def remove_package(package_name)
|
23
|
+
raise RemoverError.new("#{package_name}: package not found") unless
|
24
|
+
@dbm.has_package?(package_name)
|
25
|
+
ncontents = @dbm.get_ncontents_in_package(package_name)
|
26
|
+
|
27
|
+
pbar = make_progress_bar(package_name, ncontents)
|
28
|
+
package_id = @dbm.get_package_id(package_name)
|
29
|
+
@dbm.get_path_ids(package_id).each {|path_id|
|
30
|
+
normalized_path = @dbm.get_path(path_id)
|
31
|
+
deindex_content(normalized_path)
|
32
|
+
pbar.inc
|
33
|
+
}
|
34
|
+
pbar.finish
|
35
|
+
@npackages += 1
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,330 @@
|
|
1
|
+
#
|
2
|
+
# searcher.rb - searcher implementation
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Gonzui
|
13
|
+
# FIXME: It's not efficient. It's better to use a data
|
14
|
+
# structure like a priority queue to handle the
|
15
|
+
# list-of-list to achieve better performance.
|
16
|
+
class PhraseFinder
|
17
|
+
include Util
|
18
|
+
|
19
|
+
def initialize(dbm, path_id, words)
|
20
|
+
@word_ids = []
|
21
|
+
@list_of_list = []
|
22
|
+
|
23
|
+
words.each {|word|
|
24
|
+
word_id = dbm.get_word_id(word)
|
25
|
+
assert_non_nil(word_id)
|
26
|
+
info_list = dbm.get_all_word_info(path_id, word_id)
|
27
|
+
@word_ids.push(word_id)
|
28
|
+
@list_of_list.push(info_list)
|
29
|
+
}
|
30
|
+
@last_word = words.last
|
31
|
+
end
|
32
|
+
|
33
|
+
def match?(info_list, i)
|
34
|
+
j = 0
|
35
|
+
prev_seqno = nil
|
36
|
+
@word_ids.each {|word_id|
|
37
|
+
return false unless i + j < info_list.length
|
38
|
+
info = info_list[i + j]
|
39
|
+
return false unless word_id == info.word_id
|
40
|
+
return false unless prev_seqno.nil? or (prev_seqno + 1) == info.seqno
|
41
|
+
prev_seqno = info.seqno
|
42
|
+
j += 1
|
43
|
+
}
|
44
|
+
return true
|
45
|
+
end
|
46
|
+
|
47
|
+
public
|
48
|
+
def each
|
49
|
+
prev = nil
|
50
|
+
info_list = @list_of_list.flatten.sort_by {|info|
|
51
|
+
info.seqno
|
52
|
+
}.find_all {|info|
|
53
|
+
v = info.seqno != prev
|
54
|
+
prev = info.seqno
|
55
|
+
v
|
56
|
+
}
|
57
|
+
info_list.length.times {|i|
|
58
|
+
if match?(info_list, i)
|
59
|
+
first = info_list[i]
|
60
|
+
last = info_list[i + @word_ids.length - 1]
|
61
|
+
length = last.byteno + @last_word.length - first.byteno
|
62
|
+
occ = Occurrence.new(first.byteno, first.lineno, length)
|
63
|
+
yield(occ)
|
64
|
+
end
|
65
|
+
}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class QueryError < GonzuiError; end
|
70
|
+
class NotFoundError < GonzuiError; end
|
71
|
+
|
72
|
+
class Searcher
|
73
|
+
include Util
|
74
|
+
|
75
|
+
def initialize(dbm, search_query, at_most_nresults)
|
76
|
+
@dbm = dbm
|
77
|
+
@search_query = search_query
|
78
|
+
@at_most_nresults = at_most_nresults
|
79
|
+
# If "all" is specified, both IDs become nil. No problem.
|
80
|
+
@target_format_id = @dbm.get_format_id(@search_query.format)
|
81
|
+
@target_license_id = @dbm.get_license_id(@search_query.license)
|
82
|
+
end
|
83
|
+
|
84
|
+
def find_word_id(word)
|
85
|
+
word_id = @dbm.get_word_id(word)
|
86
|
+
raise NotFoundError.new unless word_id
|
87
|
+
return word_id
|
88
|
+
end
|
89
|
+
|
90
|
+
def find_package_id(package_name)
|
91
|
+
package_id = @dbm.get_package_id(package_name)
|
92
|
+
raise NotFoundError.new unless package_id
|
93
|
+
return package_id
|
94
|
+
end
|
95
|
+
|
96
|
+
def find_package_id_from_path_id(path_id)
|
97
|
+
package_id = @dbm.get_package_id_from_path_id(path_id)
|
98
|
+
assert_non_nil(package_id)
|
99
|
+
return package_id
|
100
|
+
end
|
101
|
+
|
102
|
+
def filter_package_ids_by_property(package_ids, target_id, get_ids)
|
103
|
+
if target_id
|
104
|
+
package_ids = package_ids.find_all {|package_id|
|
105
|
+
format_ids = @dbm.send(get_ids, package_id)
|
106
|
+
format_ids.include?(target_id)
|
107
|
+
}
|
108
|
+
end
|
109
|
+
return package_ids
|
110
|
+
end
|
111
|
+
|
112
|
+
def filter_package_ids_by_format(package_ids)
|
113
|
+
filter_package_ids_by_property(package_ids, @target_format_id,
|
114
|
+
:get_format_ids_from_package_id)
|
115
|
+
end
|
116
|
+
|
117
|
+
def filter_package_ids_by_license(package_ids)
|
118
|
+
filter_package_ids_by_property(package_ids, @target_license_id,
|
119
|
+
:get_license_ids_from_package_id)
|
120
|
+
end
|
121
|
+
|
122
|
+
def filter_package_ids(package_ids)
|
123
|
+
package_ids = filter_package_ids_by_format(package_ids)
|
124
|
+
package_ids = filter_package_ids_by_license(package_ids)
|
125
|
+
return package_ids
|
126
|
+
end
|
127
|
+
|
128
|
+
def filter_path_ids_by_property(path_ids, target_id, get_id)
|
129
|
+
if target_id
|
130
|
+
path_ids = path_ids.find_all {|path_id|
|
131
|
+
format_id = @dbm.send(get_id, path_id)
|
132
|
+
format_id == target_id
|
133
|
+
}
|
134
|
+
end
|
135
|
+
return path_ids
|
136
|
+
end
|
137
|
+
|
138
|
+
def filter_path_ids_by_format(path_ids)
|
139
|
+
filter_path_ids_by_property(path_ids, @target_format_id,
|
140
|
+
:get_format_id_from_path_id)
|
141
|
+
end
|
142
|
+
|
143
|
+
def filter_path_ids_by_license(path_ids)
|
144
|
+
filter_path_ids_by_property(path_ids, @target_license_id,
|
145
|
+
:get_license_id_from_path_id)
|
146
|
+
end
|
147
|
+
|
148
|
+
def filter_path_ids(path_ids)
|
149
|
+
path_ids = filter_path_ids_by_format(path_ids)
|
150
|
+
path_ids = filter_path_ids_by_license(path_ids)
|
151
|
+
return path_ids
|
152
|
+
end
|
153
|
+
|
154
|
+
def find_ids(get_proc, filter_proc)
|
155
|
+
ids = nil
|
156
|
+
@search_query.words.each {|word|
|
157
|
+
word_id = find_word_id(word)
|
158
|
+
tmp = get_proc.call(word_id)
|
159
|
+
tmp = filter_proc.call(tmp)
|
160
|
+
ids = if ids.nil? then tmp else ids & tmp end
|
161
|
+
break if ids.empty?
|
162
|
+
}
|
163
|
+
raise NotFoundError.new if ids.nil?
|
164
|
+
return ids
|
165
|
+
end
|
166
|
+
|
167
|
+
def find_package_ids
|
168
|
+
get_proc = lambda {|word_id| @dbm.get_package_ids(word_id) }
|
169
|
+
filter_proc = lambda {|ids| filter_package_ids(ids) }
|
170
|
+
return find_ids(get_proc, filter_proc)
|
171
|
+
end
|
172
|
+
|
173
|
+
def find_path_id(path)
|
174
|
+
path_id = @dbm.get_path_id(path)
|
175
|
+
raise NotFoundError.new unless path_id
|
176
|
+
return path_id
|
177
|
+
end
|
178
|
+
|
179
|
+
def find_path_ids(package_id)
|
180
|
+
get_proc = lambda {|word_id|
|
181
|
+
@dbm.get_path_ids_from_package_and_word_id(package_id, word_id)
|
182
|
+
}
|
183
|
+
filter_proc = lambda {|ids| filter_path_ids(ids) }
|
184
|
+
return find_ids(get_proc, filter_proc)
|
185
|
+
end
|
186
|
+
|
187
|
+
def match_target?(info, property)
|
188
|
+
if property
|
189
|
+
return info.match?(property)
|
190
|
+
else
|
191
|
+
return true
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def break_needed?(option)
|
196
|
+
judge = false
|
197
|
+
case option
|
198
|
+
when :all, :find_one_extra
|
199
|
+
when :exact
|
200
|
+
judge = true
|
201
|
+
else
|
202
|
+
assert_not_reached
|
203
|
+
end
|
204
|
+
return judge
|
205
|
+
end
|
206
|
+
|
207
|
+
# FIXME: It's too complicated
|
208
|
+
def get_result_item(path_id, option)
|
209
|
+
package_id = @dbm.get_package_id_from_path_id(path_id)
|
210
|
+
item = ResultItem.new(package_id, path_id)
|
211
|
+
@search_query.each {|qitem|
|
212
|
+
nfound = 0
|
213
|
+
if qitem.phrase?
|
214
|
+
finder = PhraseFinder.new(@dbm, path_id, qitem.value)
|
215
|
+
finder.each {|occ|
|
216
|
+
if option == :find_one_extra and nfound >= 1
|
217
|
+
item.has_more_in_path
|
218
|
+
break
|
219
|
+
end
|
220
|
+
item.push(occ)
|
221
|
+
nfound += 1
|
222
|
+
break if break_needed?(option)
|
223
|
+
}
|
224
|
+
else
|
225
|
+
word_id = find_word_id(qitem.value)
|
226
|
+
@dbm.find_word_info(path_id, word_id) {|info|
|
227
|
+
next unless match_target?(info, qitem.property)
|
228
|
+
occ = Occurrence.new(info.byteno, info.lineno, qitem.value.length)
|
229
|
+
if option == :find_one_extra and nfound >= 1
|
230
|
+
item.has_more_in_path
|
231
|
+
break
|
232
|
+
end
|
233
|
+
item.push(occ)
|
234
|
+
nfound += 1
|
235
|
+
break if break_needed?(option)
|
236
|
+
}
|
237
|
+
end
|
238
|
+
return nil if nfound == 0
|
239
|
+
}
|
240
|
+
return item
|
241
|
+
end
|
242
|
+
|
243
|
+
def search_with_path_internal(path_id)
|
244
|
+
result = SearchResult.new
|
245
|
+
item = get_result_item(path_id, :all)
|
246
|
+
raise NotFoundError.new if item.nil?
|
247
|
+
item.has_more_in_path if item.list.length > @search_query.length
|
248
|
+
result.push(item)
|
249
|
+
return result
|
250
|
+
end
|
251
|
+
|
252
|
+
def search_with_path
|
253
|
+
path_id = find_path_id(@search_query.path)
|
254
|
+
return search_with_path_internal(path_id)
|
255
|
+
end
|
256
|
+
|
257
|
+
def search_with_package_internal(package_id)
|
258
|
+
result = SearchResult.new
|
259
|
+
path_ids = find_path_ids(package_id)
|
260
|
+
path_ids.each {|path_id|
|
261
|
+
item = get_result_item(path_id, :find_one_extra)
|
262
|
+
next if item.nil?
|
263
|
+
result.push(item)
|
264
|
+
if result.length >= @at_most_nresults
|
265
|
+
result.limit_exceeded = true
|
266
|
+
break
|
267
|
+
end
|
268
|
+
}
|
269
|
+
if result.length == 1 and result.first.has_more?
|
270
|
+
return search_with_path_internal(result.first.path_id)
|
271
|
+
else
|
272
|
+
return result
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
def search_with_package
|
277
|
+
package_id = find_package_id(@search_query.package)
|
278
|
+
return search_with_package_internal(package_id)
|
279
|
+
end
|
280
|
+
|
281
|
+
def search_without_scope
|
282
|
+
result = SearchResult.new
|
283
|
+
package_ids = find_package_ids
|
284
|
+
package_ids.each {|package_id|
|
285
|
+
list = []
|
286
|
+
path_ids = find_path_ids(package_id)
|
287
|
+
path_ids.each {|path_id|
|
288
|
+
item = get_result_item(path_id, :find_one_extra)
|
289
|
+
next if item.nil?
|
290
|
+
list.push(item)
|
291
|
+
break if list.length >= 2
|
292
|
+
}
|
293
|
+
next if list.empty?
|
294
|
+
item = list.first
|
295
|
+
item.has_more_in_package if list.length > 1
|
296
|
+
result.push(item)
|
297
|
+
if result.length >= @at_most_nresults
|
298
|
+
result.limit_exceeded = true
|
299
|
+
break
|
300
|
+
end
|
301
|
+
}
|
302
|
+
if result.length == 1
|
303
|
+
return search_with_package_internal(result.first.package_id)
|
304
|
+
else
|
305
|
+
return result
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
public
|
310
|
+
def search
|
311
|
+
retried = false
|
312
|
+
begin
|
313
|
+
result = if @search_query.path
|
314
|
+
search_with_path
|
315
|
+
elsif @search_query.package
|
316
|
+
search_with_package
|
317
|
+
else
|
318
|
+
search_without_scope
|
319
|
+
end
|
320
|
+
return result
|
321
|
+
rescue NotFoundError
|
322
|
+
if retried == false and @search_query.tokenize_all
|
323
|
+
retried = true
|
324
|
+
retry
|
325
|
+
end
|
326
|
+
return SearchResult.new
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
330
|
+
end
|
@@ -0,0 +1,235 @@
|
|
1
|
+
#
|
2
|
+
# searchquery.rb - search query implementation
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Gonzui
|
13
|
+
QueryItem = Struct.new(:property, :value, :phrase_p)
|
14
|
+
class QueryItem
|
15
|
+
def to_s
|
16
|
+
string = ""
|
17
|
+
string << self.property.to_s << ":" if self.property
|
18
|
+
if phrase?
|
19
|
+
string << sprintf('"%s"', self.value.join(" "))
|
20
|
+
else
|
21
|
+
string << self.value
|
22
|
+
end
|
23
|
+
return string
|
24
|
+
end
|
25
|
+
|
26
|
+
def phrase?
|
27
|
+
self.phrase_p == true
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class SearchQuery
|
32
|
+
include Enumerable
|
33
|
+
include GetText
|
34
|
+
|
35
|
+
def initialize(config, query_string, options = {})
|
36
|
+
@query_string = query_string
|
37
|
+
@options = options
|
38
|
+
@items = []
|
39
|
+
|
40
|
+
@package = nil
|
41
|
+
@path = nil
|
42
|
+
@format = nil
|
43
|
+
@license = nil
|
44
|
+
@error = nil
|
45
|
+
|
46
|
+
@words = []
|
47
|
+
@ignored_words = []
|
48
|
+
|
49
|
+
@max_words = config.max_words
|
50
|
+
@nwords = 0
|
51
|
+
parse_query_string
|
52
|
+
end
|
53
|
+
attr_accessor :path
|
54
|
+
attr_reader :package
|
55
|
+
attr_reader :format
|
56
|
+
attr_reader :license
|
57
|
+
attr_reader :ignored_words
|
58
|
+
attr_reader :words
|
59
|
+
attr_reader :options
|
60
|
+
|
61
|
+
private
|
62
|
+
KnownProperties = []
|
63
|
+
[:path, :package].each {|property|
|
64
|
+
KnownProperties.push(property)
|
65
|
+
}
|
66
|
+
# :fundef, :funcall, etc.
|
67
|
+
LangScan::Type.each_group {|group|
|
68
|
+
group.each {|type_info|
|
69
|
+
KnownProperties.push(type_info.type)
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
def parse_query_string
|
74
|
+
kp = KnownProperties.join("|")
|
75
|
+
parts = @query_string.scan(/((?:#{kp}):)?(?:"(.+)"|(\S+))/)
|
76
|
+
parts.each {|prefix, quoted, bare|
|
77
|
+
phrase_p = if quoted then true else false end
|
78
|
+
text = (quoted or bare)
|
79
|
+
if prefix
|
80
|
+
property = prefix.chop.intern
|
81
|
+
case property
|
82
|
+
when :package
|
83
|
+
@error = QueryError.new(N_("package: duplicated.")) if @package
|
84
|
+
@package = text
|
85
|
+
when :path
|
86
|
+
@error = QueryError.new(N_("path: duplicated.")) if @path
|
87
|
+
@path = text
|
88
|
+
else
|
89
|
+
add_item(property, text, phrase_p)
|
90
|
+
end
|
91
|
+
else
|
92
|
+
add_item(nil, text, phrase_p)
|
93
|
+
end
|
94
|
+
}
|
95
|
+
@format = @options[:format]
|
96
|
+
@license = @options[:license]
|
97
|
+
if @package and @path
|
98
|
+
message = N_("package: and path: cannot be specified together.")
|
99
|
+
@error = QueryError.new(message)
|
100
|
+
end
|
101
|
+
make_words
|
102
|
+
end
|
103
|
+
|
104
|
+
def make_words
|
105
|
+
@words = @items.map {|i| i.value }.flatten
|
106
|
+
end
|
107
|
+
|
108
|
+
def add_item_for_phrase(property, text)
|
109
|
+
value = []
|
110
|
+
TextTokenizer.each_word(text) {|word, unused|
|
111
|
+
if @nwords < @max_words
|
112
|
+
@nwords += 1
|
113
|
+
value.push(word)
|
114
|
+
else
|
115
|
+
@ignored_words.push(word)
|
116
|
+
end
|
117
|
+
}
|
118
|
+
unless value.empty?
|
119
|
+
item = QueryItem.new(property, value, true)
|
120
|
+
@items.push(item)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def add_item_for_single_word(property, text)
|
125
|
+
if @nwords < @max_words
|
126
|
+
@nwords += 1
|
127
|
+
item = QueryItem.new(property, text, false)
|
128
|
+
@items.push(item)
|
129
|
+
else
|
130
|
+
@ignored_words.push(text)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def add_item(property, text, phrase_p)
|
135
|
+
if phrase_p or has_multi_byte_char?(text)
|
136
|
+
add_item_for_phrase(property, text)
|
137
|
+
else
|
138
|
+
add_item_for_single_word(property, text)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def has_multi_byte_char?(text)
|
143
|
+
/[^\x00-\x7f]/u.match(text)
|
144
|
+
end
|
145
|
+
|
146
|
+
def reset
|
147
|
+
@ignored_words = []
|
148
|
+
@items = []
|
149
|
+
@nwords = 0
|
150
|
+
end
|
151
|
+
|
152
|
+
public
|
153
|
+
def path_only?
|
154
|
+
@items.empty? and @path and @package.nil?
|
155
|
+
end
|
156
|
+
|
157
|
+
def package_only?
|
158
|
+
@items.empty? and @path.nil? and @package
|
159
|
+
end
|
160
|
+
|
161
|
+
def string
|
162
|
+
@query_string
|
163
|
+
end
|
164
|
+
|
165
|
+
def simplified_string
|
166
|
+
@items.map {|item| item.to_s }.join(" ")
|
167
|
+
end
|
168
|
+
|
169
|
+
def string_without_properties
|
170
|
+
@items.map {|item| item.value.to_s }.join(" ")
|
171
|
+
end
|
172
|
+
|
173
|
+
def empty?
|
174
|
+
@items.empty? and @package.nil? and @path.nil?
|
175
|
+
end
|
176
|
+
|
177
|
+
def first
|
178
|
+
@items.first
|
179
|
+
end
|
180
|
+
|
181
|
+
def last
|
182
|
+
@items.last
|
183
|
+
end
|
184
|
+
|
185
|
+
def each
|
186
|
+
@items.each {|item| yield(item) }
|
187
|
+
end
|
188
|
+
|
189
|
+
def length
|
190
|
+
@items.length
|
191
|
+
end
|
192
|
+
|
193
|
+
def collect
|
194
|
+
@items.find_all {|item|
|
195
|
+
if block_given?
|
196
|
+
yield(item)
|
197
|
+
else
|
198
|
+
true
|
199
|
+
end
|
200
|
+
}.map {|item|
|
201
|
+
item.value
|
202
|
+
}
|
203
|
+
end
|
204
|
+
|
205
|
+
def keywords
|
206
|
+
collect {|item| not item.phrase? }
|
207
|
+
end
|
208
|
+
|
209
|
+
def phrases
|
210
|
+
collect {|item| item.phrase? }
|
211
|
+
end
|
212
|
+
|
213
|
+
def tokenize_all
|
214
|
+
original_items = @items.clone
|
215
|
+
original_nwords = @nwords
|
216
|
+
reset
|
217
|
+
original_items.each {|item|
|
218
|
+
if item.phrase?
|
219
|
+
value = item.value.join(" ")
|
220
|
+
add_item_for_phrase(item.property, value)
|
221
|
+
else
|
222
|
+
add_item_for_phrase(item.property, item.value)
|
223
|
+
end
|
224
|
+
}
|
225
|
+
make_words
|
226
|
+
|
227
|
+
modified = if @nwords != original_nwords then true else false end
|
228
|
+
return modified
|
229
|
+
end
|
230
|
+
|
231
|
+
def validate
|
232
|
+
raise @error if @error
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|