gonzui 1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS.txt +9 -0
- data/History.txt +5539 -0
- data/Manifest.txt +115 -0
- data/PostInstall.txt +17 -0
- data/README.rdoc +149 -0
- data/Rakefile +28 -0
- data/bin/gonzui-db +167 -0
- data/bin/gonzui-import +177 -0
- data/bin/gonzui-remove +58 -0
- data/bin/gonzui-search +68 -0
- data/bin/gonzui-server +176 -0
- data/bin/gonzui-update +53 -0
- data/data/gonzui/catalog/catalog.ja +80 -0
- data/data/gonzui/doc/favicon.ico +0 -0
- data/data/gonzui/doc/folder.png +0 -0
- data/data/gonzui/doc/gonzui.css +279 -0
- data/data/gonzui/doc/gonzui.js +111 -0
- data/data/gonzui/doc/text.png +0 -0
- data/data/gonzuirc.sample +29 -0
- data/ext/autopack/autopack.c +88 -0
- data/ext/autopack/extconf.rb +3 -0
- data/ext/delta/delta.c +147 -0
- data/ext/delta/extconf.rb +5 -0
- data/ext/texttokenizer/extconf.rb +5 -0
- data/ext/texttokenizer/texttokenizer.c +93 -0
- data/ext/xmlformatter/extconf.rb +5 -0
- data/ext/xmlformatter/xmlformatter.c +207 -0
- data/lib/gonzui.rb +59 -0
- data/lib/gonzui/apt.rb +193 -0
- data/lib/gonzui/bdbdbm.rb +118 -0
- data/lib/gonzui/cmdapp.rb +14 -0
- data/lib/gonzui/cmdapp/app.rb +175 -0
- data/lib/gonzui/cmdapp/search.rb +134 -0
- data/lib/gonzui/config.rb +117 -0
- data/lib/gonzui/content.rb +19 -0
- data/lib/gonzui/dbm.rb +673 -0
- data/lib/gonzui/deindexer.rb +162 -0
- data/lib/gonzui/delta.rb +49 -0
- data/lib/gonzui/extractor.rb +347 -0
- data/lib/gonzui/fetcher.rb +309 -0
- data/lib/gonzui/gettext.rb +144 -0
- data/lib/gonzui/importer.rb +84 -0
- data/lib/gonzui/indexer.rb +316 -0
- data/lib/gonzui/info.rb +80 -0
- data/lib/gonzui/license.rb +100 -0
- data/lib/gonzui/logger.rb +48 -0
- data/lib/gonzui/monitor.rb +177 -0
- data/lib/gonzui/progressbar.rb +235 -0
- data/lib/gonzui/remover.rb +38 -0
- data/lib/gonzui/searcher.rb +330 -0
- data/lib/gonzui/searchquery.rb +235 -0
- data/lib/gonzui/searchresult.rb +111 -0
- data/lib/gonzui/updater.rb +254 -0
- data/lib/gonzui/util.rb +415 -0
- data/lib/gonzui/vcs.rb +128 -0
- data/lib/gonzui/webapp.rb +25 -0
- data/lib/gonzui/webapp/advsearch.rb +123 -0
- data/lib/gonzui/webapp/filehandler.rb +24 -0
- data/lib/gonzui/webapp/jsfeed.rb +61 -0
- data/lib/gonzui/webapp/markup.rb +445 -0
- data/lib/gonzui/webapp/search.rb +269 -0
- data/lib/gonzui/webapp/servlet.rb +319 -0
- data/lib/gonzui/webapp/snippet.rb +155 -0
- data/lib/gonzui/webapp/source.rb +37 -0
- data/lib/gonzui/webapp/stat.rb +137 -0
- data/lib/gonzui/webapp/top.rb +63 -0
- data/lib/gonzui/webapp/uri.rb +140 -0
- data/lib/gonzui/webapp/webrick.rb +48 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/makemanifest.rb +21 -0
- data/tasks/extconf.rake +13 -0
- data/tasks/extconf/autopack.rake +43 -0
- data/tasks/extconf/delta.rake +43 -0
- data/tasks/extconf/texttokenizer.rake +43 -0
- data/tasks/extconf/xmlformatter.rake +43 -0
- data/test/_external_tools.rb +13 -0
- data/test/_test-util.rb +142 -0
- data/test/foo/Makefile.foo +66 -0
- data/test/foo/bar.c +5 -0
- data/test/foo/bar.h +6 -0
- data/test/foo/foo.c +25 -0
- data/test/foo/foo.spec +33 -0
- data/test/test_apt.rb +42 -0
- data/test/test_autopack_extn.rb +7 -0
- data/test/test_bdbdbm.rb +79 -0
- data/test/test_cmdapp-app.rb +35 -0
- data/test/test_cmdapp-search.rb +99 -0
- data/test/test_config.rb +28 -0
- data/test/test_content.rb +15 -0
- data/test/test_dbm.rb +171 -0
- data/test/test_deindexer.rb +50 -0
- data/test/test_delta.rb +66 -0
- data/test/test_extractor.rb +78 -0
- data/test/test_fetcher.rb +75 -0
- data/test/test_gettext.rb +50 -0
- data/test/test_gonzui.rb +11 -0
- data/test/test_helper.rb +10 -0
- data/test/test_importer.rb +56 -0
- data/test/test_indexer.rb +37 -0
- data/test/test_info.rb +82 -0
- data/test/test_license.rb +49 -0
- data/test/test_logger.rb +60 -0
- data/test/test_monitor.rb +23 -0
- data/test/test_searcher.rb +37 -0
- data/test/test_searchquery.rb +27 -0
- data/test/test_searchresult.rb +43 -0
- data/test/test_texttokenizer.rb +47 -0
- data/test/test_updater.rb +95 -0
- data/test/test_util.rb +149 -0
- data/test/test_vcs.rb +61 -0
- data/test/test_webapp-markup.rb +42 -0
- data/test/test_webapp-util.rb +19 -0
- data/test/test_webapp-xmlformatter.rb +19 -0
- metadata +291 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
#
|
2
|
+
# remover.rb - remove contents from gonzui.db
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Gonzui
|
13
|
+
class RemoverError < GonzuiError; end
|
14
|
+
|
15
|
+
class Remover < AbstractUpdater
|
16
|
+
private
|
17
|
+
def do_task_name
|
18
|
+
"removed"
|
19
|
+
end
|
20
|
+
|
21
|
+
public
|
22
|
+
def remove_package(package_name)
|
23
|
+
raise RemoverError.new("#{package_name}: package not found") unless
|
24
|
+
@dbm.has_package?(package_name)
|
25
|
+
ncontents = @dbm.get_ncontents_in_package(package_name)
|
26
|
+
|
27
|
+
pbar = make_progress_bar(package_name, ncontents)
|
28
|
+
package_id = @dbm.get_package_id(package_name)
|
29
|
+
@dbm.get_path_ids(package_id).each {|path_id|
|
30
|
+
normalized_path = @dbm.get_path(path_id)
|
31
|
+
deindex_content(normalized_path)
|
32
|
+
pbar.inc
|
33
|
+
}
|
34
|
+
pbar.finish
|
35
|
+
@npackages += 1
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,330 @@
|
|
1
|
+
#
|
2
|
+
# searcher.rb - searcher implementation
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Gonzui
|
13
|
+
# FIXME: It's not efficient. It's better to use a data
|
14
|
+
# structure like a priority queue to handle the
|
15
|
+
# list-of-list to achieve better performance.
|
16
|
+
class PhraseFinder
|
17
|
+
include Util
|
18
|
+
|
19
|
+
def initialize(dbm, path_id, words)
|
20
|
+
@word_ids = []
|
21
|
+
@list_of_list = []
|
22
|
+
|
23
|
+
words.each {|word|
|
24
|
+
word_id = dbm.get_word_id(word)
|
25
|
+
assert_non_nil(word_id)
|
26
|
+
info_list = dbm.get_all_word_info(path_id, word_id)
|
27
|
+
@word_ids.push(word_id)
|
28
|
+
@list_of_list.push(info_list)
|
29
|
+
}
|
30
|
+
@last_word = words.last
|
31
|
+
end
|
32
|
+
|
33
|
+
def match?(info_list, i)
|
34
|
+
j = 0
|
35
|
+
prev_seqno = nil
|
36
|
+
@word_ids.each {|word_id|
|
37
|
+
return false unless i + j < info_list.length
|
38
|
+
info = info_list[i + j]
|
39
|
+
return false unless word_id == info.word_id
|
40
|
+
return false unless prev_seqno.nil? or (prev_seqno + 1) == info.seqno
|
41
|
+
prev_seqno = info.seqno
|
42
|
+
j += 1
|
43
|
+
}
|
44
|
+
return true
|
45
|
+
end
|
46
|
+
|
47
|
+
public
|
48
|
+
def each
|
49
|
+
prev = nil
|
50
|
+
info_list = @list_of_list.flatten.sort_by {|info|
|
51
|
+
info.seqno
|
52
|
+
}.find_all {|info|
|
53
|
+
v = info.seqno != prev
|
54
|
+
prev = info.seqno
|
55
|
+
v
|
56
|
+
}
|
57
|
+
info_list.length.times {|i|
|
58
|
+
if match?(info_list, i)
|
59
|
+
first = info_list[i]
|
60
|
+
last = info_list[i + @word_ids.length - 1]
|
61
|
+
length = last.byteno + @last_word.length - first.byteno
|
62
|
+
occ = Occurrence.new(first.byteno, first.lineno, length)
|
63
|
+
yield(occ)
|
64
|
+
end
|
65
|
+
}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class QueryError < GonzuiError; end
|
70
|
+
class NotFoundError < GonzuiError; end
|
71
|
+
|
72
|
+
class Searcher
|
73
|
+
include Util
|
74
|
+
|
75
|
+
def initialize(dbm, search_query, at_most_nresults)
|
76
|
+
@dbm = dbm
|
77
|
+
@search_query = search_query
|
78
|
+
@at_most_nresults = at_most_nresults
|
79
|
+
# If "all" is specified, both IDs become nil. No problem.
|
80
|
+
@target_format_id = @dbm.get_format_id(@search_query.format)
|
81
|
+
@target_license_id = @dbm.get_license_id(@search_query.license)
|
82
|
+
end
|
83
|
+
|
84
|
+
def find_word_id(word)
|
85
|
+
word_id = @dbm.get_word_id(word)
|
86
|
+
raise NotFoundError.new unless word_id
|
87
|
+
return word_id
|
88
|
+
end
|
89
|
+
|
90
|
+
def find_package_id(package_name)
|
91
|
+
package_id = @dbm.get_package_id(package_name)
|
92
|
+
raise NotFoundError.new unless package_id
|
93
|
+
return package_id
|
94
|
+
end
|
95
|
+
|
96
|
+
def find_package_id_from_path_id(path_id)
|
97
|
+
package_id = @dbm.get_package_id_from_path_id(path_id)
|
98
|
+
assert_non_nil(package_id)
|
99
|
+
return package_id
|
100
|
+
end
|
101
|
+
|
102
|
+
def filter_package_ids_by_property(package_ids, target_id, get_ids)
|
103
|
+
if target_id
|
104
|
+
package_ids = package_ids.find_all {|package_id|
|
105
|
+
format_ids = @dbm.send(get_ids, package_id)
|
106
|
+
format_ids.include?(target_id)
|
107
|
+
}
|
108
|
+
end
|
109
|
+
return package_ids
|
110
|
+
end
|
111
|
+
|
112
|
+
def filter_package_ids_by_format(package_ids)
|
113
|
+
filter_package_ids_by_property(package_ids, @target_format_id,
|
114
|
+
:get_format_ids_from_package_id)
|
115
|
+
end
|
116
|
+
|
117
|
+
def filter_package_ids_by_license(package_ids)
|
118
|
+
filter_package_ids_by_property(package_ids, @target_license_id,
|
119
|
+
:get_license_ids_from_package_id)
|
120
|
+
end
|
121
|
+
|
122
|
+
def filter_package_ids(package_ids)
|
123
|
+
package_ids = filter_package_ids_by_format(package_ids)
|
124
|
+
package_ids = filter_package_ids_by_license(package_ids)
|
125
|
+
return package_ids
|
126
|
+
end
|
127
|
+
|
128
|
+
def filter_path_ids_by_property(path_ids, target_id, get_id)
|
129
|
+
if target_id
|
130
|
+
path_ids = path_ids.find_all {|path_id|
|
131
|
+
format_id = @dbm.send(get_id, path_id)
|
132
|
+
format_id == target_id
|
133
|
+
}
|
134
|
+
end
|
135
|
+
return path_ids
|
136
|
+
end
|
137
|
+
|
138
|
+
def filter_path_ids_by_format(path_ids)
|
139
|
+
filter_path_ids_by_property(path_ids, @target_format_id,
|
140
|
+
:get_format_id_from_path_id)
|
141
|
+
end
|
142
|
+
|
143
|
+
def filter_path_ids_by_license(path_ids)
|
144
|
+
filter_path_ids_by_property(path_ids, @target_license_id,
|
145
|
+
:get_license_id_from_path_id)
|
146
|
+
end
|
147
|
+
|
148
|
+
def filter_path_ids(path_ids)
|
149
|
+
path_ids = filter_path_ids_by_format(path_ids)
|
150
|
+
path_ids = filter_path_ids_by_license(path_ids)
|
151
|
+
return path_ids
|
152
|
+
end
|
153
|
+
|
154
|
+
def find_ids(get_proc, filter_proc)
|
155
|
+
ids = nil
|
156
|
+
@search_query.words.each {|word|
|
157
|
+
word_id = find_word_id(word)
|
158
|
+
tmp = get_proc.call(word_id)
|
159
|
+
tmp = filter_proc.call(tmp)
|
160
|
+
ids = if ids.nil? then tmp else ids & tmp end
|
161
|
+
break if ids.empty?
|
162
|
+
}
|
163
|
+
raise NotFoundError.new if ids.nil?
|
164
|
+
return ids
|
165
|
+
end
|
166
|
+
|
167
|
+
def find_package_ids
|
168
|
+
get_proc = lambda {|word_id| @dbm.get_package_ids(word_id) }
|
169
|
+
filter_proc = lambda {|ids| filter_package_ids(ids) }
|
170
|
+
return find_ids(get_proc, filter_proc)
|
171
|
+
end
|
172
|
+
|
173
|
+
def find_path_id(path)
|
174
|
+
path_id = @dbm.get_path_id(path)
|
175
|
+
raise NotFoundError.new unless path_id
|
176
|
+
return path_id
|
177
|
+
end
|
178
|
+
|
179
|
+
def find_path_ids(package_id)
|
180
|
+
get_proc = lambda {|word_id|
|
181
|
+
@dbm.get_path_ids_from_package_and_word_id(package_id, word_id)
|
182
|
+
}
|
183
|
+
filter_proc = lambda {|ids| filter_path_ids(ids) }
|
184
|
+
return find_ids(get_proc, filter_proc)
|
185
|
+
end
|
186
|
+
|
187
|
+
def match_target?(info, property)
|
188
|
+
if property
|
189
|
+
return info.match?(property)
|
190
|
+
else
|
191
|
+
return true
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def break_needed?(option)
|
196
|
+
judge = false
|
197
|
+
case option
|
198
|
+
when :all, :find_one_extra
|
199
|
+
when :exact
|
200
|
+
judge = true
|
201
|
+
else
|
202
|
+
assert_not_reached
|
203
|
+
end
|
204
|
+
return judge
|
205
|
+
end
|
206
|
+
|
207
|
+
# FIXME: It's too complicated
|
208
|
+
def get_result_item(path_id, option)
|
209
|
+
package_id = @dbm.get_package_id_from_path_id(path_id)
|
210
|
+
item = ResultItem.new(package_id, path_id)
|
211
|
+
@search_query.each {|qitem|
|
212
|
+
nfound = 0
|
213
|
+
if qitem.phrase?
|
214
|
+
finder = PhraseFinder.new(@dbm, path_id, qitem.value)
|
215
|
+
finder.each {|occ|
|
216
|
+
if option == :find_one_extra and nfound >= 1
|
217
|
+
item.has_more_in_path
|
218
|
+
break
|
219
|
+
end
|
220
|
+
item.push(occ)
|
221
|
+
nfound += 1
|
222
|
+
break if break_needed?(option)
|
223
|
+
}
|
224
|
+
else
|
225
|
+
word_id = find_word_id(qitem.value)
|
226
|
+
@dbm.find_word_info(path_id, word_id) {|info|
|
227
|
+
next unless match_target?(info, qitem.property)
|
228
|
+
occ = Occurrence.new(info.byteno, info.lineno, qitem.value.length)
|
229
|
+
if option == :find_one_extra and nfound >= 1
|
230
|
+
item.has_more_in_path
|
231
|
+
break
|
232
|
+
end
|
233
|
+
item.push(occ)
|
234
|
+
nfound += 1
|
235
|
+
break if break_needed?(option)
|
236
|
+
}
|
237
|
+
end
|
238
|
+
return nil if nfound == 0
|
239
|
+
}
|
240
|
+
return item
|
241
|
+
end
|
242
|
+
|
243
|
+
def search_with_path_internal(path_id)
|
244
|
+
result = SearchResult.new
|
245
|
+
item = get_result_item(path_id, :all)
|
246
|
+
raise NotFoundError.new if item.nil?
|
247
|
+
item.has_more_in_path if item.list.length > @search_query.length
|
248
|
+
result.push(item)
|
249
|
+
return result
|
250
|
+
end
|
251
|
+
|
252
|
+
def search_with_path
|
253
|
+
path_id = find_path_id(@search_query.path)
|
254
|
+
return search_with_path_internal(path_id)
|
255
|
+
end
|
256
|
+
|
257
|
+
def search_with_package_internal(package_id)
|
258
|
+
result = SearchResult.new
|
259
|
+
path_ids = find_path_ids(package_id)
|
260
|
+
path_ids.each {|path_id|
|
261
|
+
item = get_result_item(path_id, :find_one_extra)
|
262
|
+
next if item.nil?
|
263
|
+
result.push(item)
|
264
|
+
if result.length >= @at_most_nresults
|
265
|
+
result.limit_exceeded = true
|
266
|
+
break
|
267
|
+
end
|
268
|
+
}
|
269
|
+
if result.length == 1 and result.first.has_more?
|
270
|
+
return search_with_path_internal(result.first.path_id)
|
271
|
+
else
|
272
|
+
return result
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
def search_with_package
|
277
|
+
package_id = find_package_id(@search_query.package)
|
278
|
+
return search_with_package_internal(package_id)
|
279
|
+
end
|
280
|
+
|
281
|
+
def search_without_scope
|
282
|
+
result = SearchResult.new
|
283
|
+
package_ids = find_package_ids
|
284
|
+
package_ids.each {|package_id|
|
285
|
+
list = []
|
286
|
+
path_ids = find_path_ids(package_id)
|
287
|
+
path_ids.each {|path_id|
|
288
|
+
item = get_result_item(path_id, :find_one_extra)
|
289
|
+
next if item.nil?
|
290
|
+
list.push(item)
|
291
|
+
break if list.length >= 2
|
292
|
+
}
|
293
|
+
next if list.empty?
|
294
|
+
item = list.first
|
295
|
+
item.has_more_in_package if list.length > 1
|
296
|
+
result.push(item)
|
297
|
+
if result.length >= @at_most_nresults
|
298
|
+
result.limit_exceeded = true
|
299
|
+
break
|
300
|
+
end
|
301
|
+
}
|
302
|
+
if result.length == 1
|
303
|
+
return search_with_package_internal(result.first.package_id)
|
304
|
+
else
|
305
|
+
return result
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
public
|
310
|
+
def search
|
311
|
+
retried = false
|
312
|
+
begin
|
313
|
+
result = if @search_query.path
|
314
|
+
search_with_path
|
315
|
+
elsif @search_query.package
|
316
|
+
search_with_package
|
317
|
+
else
|
318
|
+
search_without_scope
|
319
|
+
end
|
320
|
+
return result
|
321
|
+
rescue NotFoundError
|
322
|
+
if retried == false and @search_query.tokenize_all
|
323
|
+
retried = true
|
324
|
+
retry
|
325
|
+
end
|
326
|
+
return SearchResult.new
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
330
|
+
end
|
@@ -0,0 +1,235 @@
|
|
1
|
+
#
|
2
|
+
# searchquery.rb - search query implementation
|
3
|
+
#
|
4
|
+
# Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
|
5
|
+
# All rights reserved.
|
6
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
7
|
+
#
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the GNU General Public License version 2.
|
10
|
+
#
|
11
|
+
|
12
|
+
module Gonzui
|
13
|
+
QueryItem = Struct.new(:property, :value, :phrase_p)
|
14
|
+
class QueryItem
|
15
|
+
def to_s
|
16
|
+
string = ""
|
17
|
+
string << self.property.to_s << ":" if self.property
|
18
|
+
if phrase?
|
19
|
+
string << sprintf('"%s"', self.value.join(" "))
|
20
|
+
else
|
21
|
+
string << self.value
|
22
|
+
end
|
23
|
+
return string
|
24
|
+
end
|
25
|
+
|
26
|
+
def phrase?
|
27
|
+
self.phrase_p == true
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
class SearchQuery
|
32
|
+
include Enumerable
|
33
|
+
include GetText
|
34
|
+
|
35
|
+
def initialize(config, query_string, options = {})
|
36
|
+
@query_string = query_string
|
37
|
+
@options = options
|
38
|
+
@items = []
|
39
|
+
|
40
|
+
@package = nil
|
41
|
+
@path = nil
|
42
|
+
@format = nil
|
43
|
+
@license = nil
|
44
|
+
@error = nil
|
45
|
+
|
46
|
+
@words = []
|
47
|
+
@ignored_words = []
|
48
|
+
|
49
|
+
@max_words = config.max_words
|
50
|
+
@nwords = 0
|
51
|
+
parse_query_string
|
52
|
+
end
|
53
|
+
attr_accessor :path
|
54
|
+
attr_reader :package
|
55
|
+
attr_reader :format
|
56
|
+
attr_reader :license
|
57
|
+
attr_reader :ignored_words
|
58
|
+
attr_reader :words
|
59
|
+
attr_reader :options
|
60
|
+
|
61
|
+
private
|
62
|
+
KnownProperties = []
|
63
|
+
[:path, :package].each {|property|
|
64
|
+
KnownProperties.push(property)
|
65
|
+
}
|
66
|
+
# :fundef, :funcall, etc.
|
67
|
+
LangScan::Type.each_group {|group|
|
68
|
+
group.each {|type_info|
|
69
|
+
KnownProperties.push(type_info.type)
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
def parse_query_string
|
74
|
+
kp = KnownProperties.join("|")
|
75
|
+
parts = @query_string.scan(/((?:#{kp}):)?(?:"(.+)"|(\S+))/)
|
76
|
+
parts.each {|prefix, quoted, bare|
|
77
|
+
phrase_p = if quoted then true else false end
|
78
|
+
text = (quoted or bare)
|
79
|
+
if prefix
|
80
|
+
property = prefix.chop.intern
|
81
|
+
case property
|
82
|
+
when :package
|
83
|
+
@error = QueryError.new(N_("package: duplicated.")) if @package
|
84
|
+
@package = text
|
85
|
+
when :path
|
86
|
+
@error = QueryError.new(N_("path: duplicated.")) if @path
|
87
|
+
@path = text
|
88
|
+
else
|
89
|
+
add_item(property, text, phrase_p)
|
90
|
+
end
|
91
|
+
else
|
92
|
+
add_item(nil, text, phrase_p)
|
93
|
+
end
|
94
|
+
}
|
95
|
+
@format = @options[:format]
|
96
|
+
@license = @options[:license]
|
97
|
+
if @package and @path
|
98
|
+
message = N_("package: and path: cannot be specified together.")
|
99
|
+
@error = QueryError.new(message)
|
100
|
+
end
|
101
|
+
make_words
|
102
|
+
end
|
103
|
+
|
104
|
+
def make_words
|
105
|
+
@words = @items.map {|i| i.value }.flatten
|
106
|
+
end
|
107
|
+
|
108
|
+
def add_item_for_phrase(property, text)
|
109
|
+
value = []
|
110
|
+
TextTokenizer.each_word(text) {|word, unused|
|
111
|
+
if @nwords < @max_words
|
112
|
+
@nwords += 1
|
113
|
+
value.push(word)
|
114
|
+
else
|
115
|
+
@ignored_words.push(word)
|
116
|
+
end
|
117
|
+
}
|
118
|
+
unless value.empty?
|
119
|
+
item = QueryItem.new(property, value, true)
|
120
|
+
@items.push(item)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def add_item_for_single_word(property, text)
|
125
|
+
if @nwords < @max_words
|
126
|
+
@nwords += 1
|
127
|
+
item = QueryItem.new(property, text, false)
|
128
|
+
@items.push(item)
|
129
|
+
else
|
130
|
+
@ignored_words.push(text)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def add_item(property, text, phrase_p)
|
135
|
+
if phrase_p or has_multi_byte_char?(text)
|
136
|
+
add_item_for_phrase(property, text)
|
137
|
+
else
|
138
|
+
add_item_for_single_word(property, text)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def has_multi_byte_char?(text)
|
143
|
+
/[^\x00-\x7f]/u.match(text)
|
144
|
+
end
|
145
|
+
|
146
|
+
def reset
|
147
|
+
@ignored_words = []
|
148
|
+
@items = []
|
149
|
+
@nwords = 0
|
150
|
+
end
|
151
|
+
|
152
|
+
public
|
153
|
+
def path_only?
|
154
|
+
@items.empty? and @path and @package.nil?
|
155
|
+
end
|
156
|
+
|
157
|
+
def package_only?
|
158
|
+
@items.empty? and @path.nil? and @package
|
159
|
+
end
|
160
|
+
|
161
|
+
def string
|
162
|
+
@query_string
|
163
|
+
end
|
164
|
+
|
165
|
+
def simplified_string
|
166
|
+
@items.map {|item| item.to_s }.join(" ")
|
167
|
+
end
|
168
|
+
|
169
|
+
def string_without_properties
|
170
|
+
@items.map {|item| item.value.to_s }.join(" ")
|
171
|
+
end
|
172
|
+
|
173
|
+
def empty?
|
174
|
+
@items.empty? and @package.nil? and @path.nil?
|
175
|
+
end
|
176
|
+
|
177
|
+
def first
|
178
|
+
@items.first
|
179
|
+
end
|
180
|
+
|
181
|
+
def last
|
182
|
+
@items.last
|
183
|
+
end
|
184
|
+
|
185
|
+
def each
|
186
|
+
@items.each {|item| yield(item) }
|
187
|
+
end
|
188
|
+
|
189
|
+
def length
|
190
|
+
@items.length
|
191
|
+
end
|
192
|
+
|
193
|
+
def collect
|
194
|
+
@items.find_all {|item|
|
195
|
+
if block_given?
|
196
|
+
yield(item)
|
197
|
+
else
|
198
|
+
true
|
199
|
+
end
|
200
|
+
}.map {|item|
|
201
|
+
item.value
|
202
|
+
}
|
203
|
+
end
|
204
|
+
|
205
|
+
def keywords
|
206
|
+
collect {|item| not item.phrase? }
|
207
|
+
end
|
208
|
+
|
209
|
+
def phrases
|
210
|
+
collect {|item| item.phrase? }
|
211
|
+
end
|
212
|
+
|
213
|
+
def tokenize_all
|
214
|
+
original_items = @items.clone
|
215
|
+
original_nwords = @nwords
|
216
|
+
reset
|
217
|
+
original_items.each {|item|
|
218
|
+
if item.phrase?
|
219
|
+
value = item.value.join(" ")
|
220
|
+
add_item_for_phrase(item.property, value)
|
221
|
+
else
|
222
|
+
add_item_for_phrase(item.property, item.value)
|
223
|
+
end
|
224
|
+
}
|
225
|
+
make_words
|
226
|
+
|
227
|
+
modified = if @nwords != original_nwords then true else false end
|
228
|
+
return modified
|
229
|
+
end
|
230
|
+
|
231
|
+
def validate
|
232
|
+
raise @error if @error
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|