gonzui 1.2-x86-mswin32-60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. data/AUTHORS.txt +9 -0
  2. data/History.txt +5539 -0
  3. data/Manifest.txt +115 -0
  4. data/PostInstall.txt +17 -0
  5. data/README.rdoc +149 -0
  6. data/Rakefile +28 -0
  7. data/bin/gonzui-db +167 -0
  8. data/bin/gonzui-import +177 -0
  9. data/bin/gonzui-remove +58 -0
  10. data/bin/gonzui-search +68 -0
  11. data/bin/gonzui-server +176 -0
  12. data/bin/gonzui-update +53 -0
  13. data/data/gonzui/catalog/catalog.ja +80 -0
  14. data/data/gonzui/doc/favicon.ico +0 -0
  15. data/data/gonzui/doc/folder.png +0 -0
  16. data/data/gonzui/doc/gonzui.css +279 -0
  17. data/data/gonzui/doc/gonzui.js +111 -0
  18. data/data/gonzui/doc/text.png +0 -0
  19. data/data/gonzuirc.sample +29 -0
  20. data/ext/autopack/autopack.c +88 -0
  21. data/ext/autopack/extconf.rb +3 -0
  22. data/ext/delta/delta.c +147 -0
  23. data/ext/delta/extconf.rb +5 -0
  24. data/ext/texttokenizer/extconf.rb +5 -0
  25. data/ext/texttokenizer/texttokenizer.c +93 -0
  26. data/ext/xmlformatter/extconf.rb +5 -0
  27. data/ext/xmlformatter/xmlformatter.c +207 -0
  28. data/lib/gonzui.rb +59 -0
  29. data/lib/gonzui/apt.rb +193 -0
  30. data/lib/gonzui/autopack.so +0 -0
  31. data/lib/gonzui/bdbdbm.rb +118 -0
  32. data/lib/gonzui/cmdapp.rb +14 -0
  33. data/lib/gonzui/cmdapp/app.rb +175 -0
  34. data/lib/gonzui/cmdapp/search.rb +134 -0
  35. data/lib/gonzui/config.rb +117 -0
  36. data/lib/gonzui/content.rb +19 -0
  37. data/lib/gonzui/dbm.rb +673 -0
  38. data/lib/gonzui/deindexer.rb +162 -0
  39. data/lib/gonzui/delta.rb +49 -0
  40. data/lib/gonzui/delta.so +0 -0
  41. data/lib/gonzui/extractor.rb +347 -0
  42. data/lib/gonzui/fetcher.rb +309 -0
  43. data/lib/gonzui/gettext.rb +144 -0
  44. data/lib/gonzui/importer.rb +84 -0
  45. data/lib/gonzui/indexer.rb +316 -0
  46. data/lib/gonzui/info.rb +80 -0
  47. data/lib/gonzui/license.rb +100 -0
  48. data/lib/gonzui/logger.rb +48 -0
  49. data/lib/gonzui/monitor.rb +177 -0
  50. data/lib/gonzui/progressbar.rb +235 -0
  51. data/lib/gonzui/remover.rb +38 -0
  52. data/lib/gonzui/searcher.rb +330 -0
  53. data/lib/gonzui/searchquery.rb +235 -0
  54. data/lib/gonzui/searchresult.rb +111 -0
  55. data/lib/gonzui/texttokenizer.so +0 -0
  56. data/lib/gonzui/updater.rb +254 -0
  57. data/lib/gonzui/util.rb +415 -0
  58. data/lib/gonzui/vcs.rb +128 -0
  59. data/lib/gonzui/webapp.rb +25 -0
  60. data/lib/gonzui/webapp/advsearch.rb +123 -0
  61. data/lib/gonzui/webapp/filehandler.rb +24 -0
  62. data/lib/gonzui/webapp/jsfeed.rb +61 -0
  63. data/lib/gonzui/webapp/markup.rb +445 -0
  64. data/lib/gonzui/webapp/search.rb +269 -0
  65. data/lib/gonzui/webapp/servlet.rb +319 -0
  66. data/lib/gonzui/webapp/snippet.rb +155 -0
  67. data/lib/gonzui/webapp/source.rb +37 -0
  68. data/lib/gonzui/webapp/stat.rb +137 -0
  69. data/lib/gonzui/webapp/top.rb +63 -0
  70. data/lib/gonzui/webapp/uri.rb +140 -0
  71. data/lib/gonzui/webapp/webrick.rb +48 -0
  72. data/lib/gonzui/webapp/xmlformatter.so +0 -0
  73. data/script/console +10 -0
  74. data/script/destroy +14 -0
  75. data/script/generate +14 -0
  76. data/script/makemanifest.rb +21 -0
  77. data/tasks/extconf.rake +13 -0
  78. data/tasks/extconf/autopack.rake +43 -0
  79. data/tasks/extconf/delta.rake +43 -0
  80. data/tasks/extconf/texttokenizer.rake +43 -0
  81. data/tasks/extconf/xmlformatter.rake +43 -0
  82. data/test/_external_tools.rb +13 -0
  83. data/test/_test-util.rb +142 -0
  84. data/test/foo/Makefile.foo +66 -0
  85. data/test/foo/bar.c +5 -0
  86. data/test/foo/bar.h +6 -0
  87. data/test/foo/foo.c +25 -0
  88. data/test/foo/foo.spec +33 -0
  89. data/test/test_apt.rb +42 -0
  90. data/test/test_autopack_extn.rb +7 -0
  91. data/test/test_bdbdbm.rb +79 -0
  92. data/test/test_cmdapp-app.rb +35 -0
  93. data/test/test_cmdapp-search.rb +99 -0
  94. data/test/test_config.rb +28 -0
  95. data/test/test_content.rb +15 -0
  96. data/test/test_dbm.rb +171 -0
  97. data/test/test_deindexer.rb +50 -0
  98. data/test/test_delta.rb +66 -0
  99. data/test/test_extractor.rb +78 -0
  100. data/test/test_fetcher.rb +75 -0
  101. data/test/test_gettext.rb +50 -0
  102. data/test/test_gonzui.rb +11 -0
  103. data/test/test_helper.rb +10 -0
  104. data/test/test_importer.rb +56 -0
  105. data/test/test_indexer.rb +37 -0
  106. data/test/test_info.rb +82 -0
  107. data/test/test_license.rb +49 -0
  108. data/test/test_logger.rb +60 -0
  109. data/test/test_monitor.rb +23 -0
  110. data/test/test_searcher.rb +37 -0
  111. data/test/test_searchquery.rb +27 -0
  112. data/test/test_searchresult.rb +43 -0
  113. data/test/test_texttokenizer.rb +47 -0
  114. data/test/test_updater.rb +95 -0
  115. data/test/test_util.rb +149 -0
  116. data/test/test_vcs.rb +61 -0
  117. data/test/test_webapp-markup.rb +42 -0
  118. data/test/test_webapp-util.rb +19 -0
  119. data/test/test_webapp-xmlformatter.rb +19 -0
  120. metadata +292 -0
@@ -0,0 +1,38 @@
1
+ #
2
+ # remover.rb - remove contents from gonzui.db
3
+ #
4
+ # Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ module Gonzui
13
+ class RemoverError < GonzuiError; end
14
+
15
+ class Remover < AbstractUpdater
16
+ private
17
+ def do_task_name
18
+ "removed"
19
+ end
20
+
21
+ public
22
+ def remove_package(package_name)
23
+ raise RemoverError.new("#{package_name}: package not found") unless
24
+ @dbm.has_package?(package_name)
25
+ ncontents = @dbm.get_ncontents_in_package(package_name)
26
+
27
+ pbar = make_progress_bar(package_name, ncontents)
28
+ package_id = @dbm.get_package_id(package_name)
29
+ @dbm.get_path_ids(package_id).each {|path_id|
30
+ normalized_path = @dbm.get_path(path_id)
31
+ deindex_content(normalized_path)
32
+ pbar.inc
33
+ }
34
+ pbar.finish
35
+ @npackages += 1
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,330 @@
1
+ #
2
+ # searcher.rb - searcher implementation
3
+ #
4
+ # Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ module Gonzui
13
+ # FIXME: It's not efficient. It's better to use a data
14
+ # structure like a priority queue to handle the
15
+ # list-of-list to achieve better performance.
16
+ class PhraseFinder
17
+ include Util
18
+
19
+ def initialize(dbm, path_id, words)
20
+ @word_ids = []
21
+ @list_of_list = []
22
+
23
+ words.each {|word|
24
+ word_id = dbm.get_word_id(word)
25
+ assert_non_nil(word_id)
26
+ info_list = dbm.get_all_word_info(path_id, word_id)
27
+ @word_ids.push(word_id)
28
+ @list_of_list.push(info_list)
29
+ }
30
+ @last_word = words.last
31
+ end
32
+
33
+ def match?(info_list, i)
34
+ j = 0
35
+ prev_seqno = nil
36
+ @word_ids.each {|word_id|
37
+ return false unless i + j < info_list.length
38
+ info = info_list[i + j]
39
+ return false unless word_id == info.word_id
40
+ return false unless prev_seqno.nil? or (prev_seqno + 1) == info.seqno
41
+ prev_seqno = info.seqno
42
+ j += 1
43
+ }
44
+ return true
45
+ end
46
+
47
+ public
48
+ def each
49
+ prev = nil
50
+ info_list = @list_of_list.flatten.sort_by {|info|
51
+ info.seqno
52
+ }.find_all {|info|
53
+ v = info.seqno != prev
54
+ prev = info.seqno
55
+ v
56
+ }
57
+ info_list.length.times {|i|
58
+ if match?(info_list, i)
59
+ first = info_list[i]
60
+ last = info_list[i + @word_ids.length - 1]
61
+ length = last.byteno + @last_word.length - first.byteno
62
+ occ = Occurrence.new(first.byteno, first.lineno, length)
63
+ yield(occ)
64
+ end
65
+ }
66
+ end
67
+ end
68
+
69
+ class QueryError < GonzuiError; end
70
+ class NotFoundError < GonzuiError; end
71
+
72
+ class Searcher
73
+ include Util
74
+
75
+ def initialize(dbm, search_query, at_most_nresults)
76
+ @dbm = dbm
77
+ @search_query = search_query
78
+ @at_most_nresults = at_most_nresults
79
+ # If "all" is specified, both IDs become nil. No problem.
80
+ @target_format_id = @dbm.get_format_id(@search_query.format)
81
+ @target_license_id = @dbm.get_license_id(@search_query.license)
82
+ end
83
+
84
+ def find_word_id(word)
85
+ word_id = @dbm.get_word_id(word)
86
+ raise NotFoundError.new unless word_id
87
+ return word_id
88
+ end
89
+
90
+ def find_package_id(package_name)
91
+ package_id = @dbm.get_package_id(package_name)
92
+ raise NotFoundError.new unless package_id
93
+ return package_id
94
+ end
95
+
96
+ def find_package_id_from_path_id(path_id)
97
+ package_id = @dbm.get_package_id_from_path_id(path_id)
98
+ assert_non_nil(package_id)
99
+ return package_id
100
+ end
101
+
102
+ def filter_package_ids_by_property(package_ids, target_id, get_ids)
103
+ if target_id
104
+ package_ids = package_ids.find_all {|package_id|
105
+ format_ids = @dbm.send(get_ids, package_id)
106
+ format_ids.include?(target_id)
107
+ }
108
+ end
109
+ return package_ids
110
+ end
111
+
112
+ def filter_package_ids_by_format(package_ids)
113
+ filter_package_ids_by_property(package_ids, @target_format_id,
114
+ :get_format_ids_from_package_id)
115
+ end
116
+
117
+ def filter_package_ids_by_license(package_ids)
118
+ filter_package_ids_by_property(package_ids, @target_license_id,
119
+ :get_license_ids_from_package_id)
120
+ end
121
+
122
+ def filter_package_ids(package_ids)
123
+ package_ids = filter_package_ids_by_format(package_ids)
124
+ package_ids = filter_package_ids_by_license(package_ids)
125
+ return package_ids
126
+ end
127
+
128
+ def filter_path_ids_by_property(path_ids, target_id, get_id)
129
+ if target_id
130
+ path_ids = path_ids.find_all {|path_id|
131
+ format_id = @dbm.send(get_id, path_id)
132
+ format_id == target_id
133
+ }
134
+ end
135
+ return path_ids
136
+ end
137
+
138
+ def filter_path_ids_by_format(path_ids)
139
+ filter_path_ids_by_property(path_ids, @target_format_id,
140
+ :get_format_id_from_path_id)
141
+ end
142
+
143
+ def filter_path_ids_by_license(path_ids)
144
+ filter_path_ids_by_property(path_ids, @target_license_id,
145
+ :get_license_id_from_path_id)
146
+ end
147
+
148
+ def filter_path_ids(path_ids)
149
+ path_ids = filter_path_ids_by_format(path_ids)
150
+ path_ids = filter_path_ids_by_license(path_ids)
151
+ return path_ids
152
+ end
153
+
154
+ def find_ids(get_proc, filter_proc)
155
+ ids = nil
156
+ @search_query.words.each {|word|
157
+ word_id = find_word_id(word)
158
+ tmp = get_proc.call(word_id)
159
+ tmp = filter_proc.call(tmp)
160
+ ids = if ids.nil? then tmp else ids & tmp end
161
+ break if ids.empty?
162
+ }
163
+ raise NotFoundError.new if ids.nil?
164
+ return ids
165
+ end
166
+
167
+ def find_package_ids
168
+ get_proc = lambda {|word_id| @dbm.get_package_ids(word_id) }
169
+ filter_proc = lambda {|ids| filter_package_ids(ids) }
170
+ return find_ids(get_proc, filter_proc)
171
+ end
172
+
173
+ def find_path_id(path)
174
+ path_id = @dbm.get_path_id(path)
175
+ raise NotFoundError.new unless path_id
176
+ return path_id
177
+ end
178
+
179
+ def find_path_ids(package_id)
180
+ get_proc = lambda {|word_id|
181
+ @dbm.get_path_ids_from_package_and_word_id(package_id, word_id)
182
+ }
183
+ filter_proc = lambda {|ids| filter_path_ids(ids) }
184
+ return find_ids(get_proc, filter_proc)
185
+ end
186
+
187
+ def match_target?(info, property)
188
+ if property
189
+ return info.match?(property)
190
+ else
191
+ return true
192
+ end
193
+ end
194
+
195
+ def break_needed?(option)
196
+ judge = false
197
+ case option
198
+ when :all, :find_one_extra
199
+ when :exact
200
+ judge = true
201
+ else
202
+ assert_not_reached
203
+ end
204
+ return judge
205
+ end
206
+
207
+ # FIXME: It's too complicated
208
+ def get_result_item(path_id, option)
209
+ package_id = @dbm.get_package_id_from_path_id(path_id)
210
+ item = ResultItem.new(package_id, path_id)
211
+ @search_query.each {|qitem|
212
+ nfound = 0
213
+ if qitem.phrase?
214
+ finder = PhraseFinder.new(@dbm, path_id, qitem.value)
215
+ finder.each {|occ|
216
+ if option == :find_one_extra and nfound >= 1
217
+ item.has_more_in_path
218
+ break
219
+ end
220
+ item.push(occ)
221
+ nfound += 1
222
+ break if break_needed?(option)
223
+ }
224
+ else
225
+ word_id = find_word_id(qitem.value)
226
+ @dbm.find_word_info(path_id, word_id) {|info|
227
+ next unless match_target?(info, qitem.property)
228
+ occ = Occurrence.new(info.byteno, info.lineno, qitem.value.length)
229
+ if option == :find_one_extra and nfound >= 1
230
+ item.has_more_in_path
231
+ break
232
+ end
233
+ item.push(occ)
234
+ nfound += 1
235
+ break if break_needed?(option)
236
+ }
237
+ end
238
+ return nil if nfound == 0
239
+ }
240
+ return item
241
+ end
242
+
243
+ def search_with_path_internal(path_id)
244
+ result = SearchResult.new
245
+ item = get_result_item(path_id, :all)
246
+ raise NotFoundError.new if item.nil?
247
+ item.has_more_in_path if item.list.length > @search_query.length
248
+ result.push(item)
249
+ return result
250
+ end
251
+
252
+ def search_with_path
253
+ path_id = find_path_id(@search_query.path)
254
+ return search_with_path_internal(path_id)
255
+ end
256
+
257
+ def search_with_package_internal(package_id)
258
+ result = SearchResult.new
259
+ path_ids = find_path_ids(package_id)
260
+ path_ids.each {|path_id|
261
+ item = get_result_item(path_id, :find_one_extra)
262
+ next if item.nil?
263
+ result.push(item)
264
+ if result.length >= @at_most_nresults
265
+ result.limit_exceeded = true
266
+ break
267
+ end
268
+ }
269
+ if result.length == 1 and result.first.has_more?
270
+ return search_with_path_internal(result.first.path_id)
271
+ else
272
+ return result
273
+ end
274
+ end
275
+
276
+ def search_with_package
277
+ package_id = find_package_id(@search_query.package)
278
+ return search_with_package_internal(package_id)
279
+ end
280
+
281
+ def search_without_scope
282
+ result = SearchResult.new
283
+ package_ids = find_package_ids
284
+ package_ids.each {|package_id|
285
+ list = []
286
+ path_ids = find_path_ids(package_id)
287
+ path_ids.each {|path_id|
288
+ item = get_result_item(path_id, :find_one_extra)
289
+ next if item.nil?
290
+ list.push(item)
291
+ break if list.length >= 2
292
+ }
293
+ next if list.empty?
294
+ item = list.first
295
+ item.has_more_in_package if list.length > 1
296
+ result.push(item)
297
+ if result.length >= @at_most_nresults
298
+ result.limit_exceeded = true
299
+ break
300
+ end
301
+ }
302
+ if result.length == 1
303
+ return search_with_package_internal(result.first.package_id)
304
+ else
305
+ return result
306
+ end
307
+ end
308
+
309
+ public
310
+ def search
311
+ retried = false
312
+ begin
313
+ result = if @search_query.path
314
+ search_with_path
315
+ elsif @search_query.package
316
+ search_with_package
317
+ else
318
+ search_without_scope
319
+ end
320
+ return result
321
+ rescue NotFoundError
322
+ if retried == false and @search_query.tokenize_all
323
+ retried = true
324
+ retry
325
+ end
326
+ return SearchResult.new
327
+ end
328
+ end
329
+ end
330
+ end
@@ -0,0 +1,235 @@
1
+ #
2
+ # searchquery.rb - search query implementation
3
+ #
4
+ # Copyright (C) 2004-2005 Satoru Takabayashi <satoru@namazu.org>
5
+ # All rights reserved.
6
+ # This is free software with ABSOLUTELY NO WARRANTY.
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the GNU General Public License version 2.
10
+ #
11
+
12
+ module Gonzui
13
+ QueryItem = Struct.new(:property, :value, :phrase_p)
14
+ class QueryItem
15
+ def to_s
16
+ string = ""
17
+ string << self.property.to_s << ":" if self.property
18
+ if phrase?
19
+ string << sprintf('"%s"', self.value.join(" "))
20
+ else
21
+ string << self.value
22
+ end
23
+ return string
24
+ end
25
+
26
+ def phrase?
27
+ self.phrase_p == true
28
+ end
29
+ end
30
+
31
+ class SearchQuery
32
+ include Enumerable
33
+ include GetText
34
+
35
+ def initialize(config, query_string, options = {})
36
+ @query_string = query_string
37
+ @options = options
38
+ @items = []
39
+
40
+ @package = nil
41
+ @path = nil
42
+ @format = nil
43
+ @license = nil
44
+ @error = nil
45
+
46
+ @words = []
47
+ @ignored_words = []
48
+
49
+ @max_words = config.max_words
50
+ @nwords = 0
51
+ parse_query_string
52
+ end
53
+ attr_accessor :path
54
+ attr_reader :package
55
+ attr_reader :format
56
+ attr_reader :license
57
+ attr_reader :ignored_words
58
+ attr_reader :words
59
+ attr_reader :options
60
+
61
+ private
62
+ KnownProperties = []
63
+ [:path, :package].each {|property|
64
+ KnownProperties.push(property)
65
+ }
66
+ # :fundef, :funcall, etc.
67
+ LangScan::Type.each_group {|group|
68
+ group.each {|type_info|
69
+ KnownProperties.push(type_info.type)
70
+ }
71
+ }
72
+
73
+ def parse_query_string
74
+ kp = KnownProperties.join("|")
75
+ parts = @query_string.scan(/((?:#{kp}):)?(?:"(.+)"|(\S+))/)
76
+ parts.each {|prefix, quoted, bare|
77
+ phrase_p = if quoted then true else false end
78
+ text = (quoted or bare)
79
+ if prefix
80
+ property = prefix.chop.intern
81
+ case property
82
+ when :package
83
+ @error = QueryError.new(N_("package: duplicated.")) if @package
84
+ @package = text
85
+ when :path
86
+ @error = QueryError.new(N_("path: duplicated.")) if @path
87
+ @path = text
88
+ else
89
+ add_item(property, text, phrase_p)
90
+ end
91
+ else
92
+ add_item(nil, text, phrase_p)
93
+ end
94
+ }
95
+ @format = @options[:format]
96
+ @license = @options[:license]
97
+ if @package and @path
98
+ message = N_("package: and path: cannot be specified together.")
99
+ @error = QueryError.new(message)
100
+ end
101
+ make_words
102
+ end
103
+
104
+ def make_words
105
+ @words = @items.map {|i| i.value }.flatten
106
+ end
107
+
108
+ def add_item_for_phrase(property, text)
109
+ value = []
110
+ TextTokenizer.each_word(text) {|word, unused|
111
+ if @nwords < @max_words
112
+ @nwords += 1
113
+ value.push(word)
114
+ else
115
+ @ignored_words.push(word)
116
+ end
117
+ }
118
+ unless value.empty?
119
+ item = QueryItem.new(property, value, true)
120
+ @items.push(item)
121
+ end
122
+ end
123
+
124
+ def add_item_for_single_word(property, text)
125
+ if @nwords < @max_words
126
+ @nwords += 1
127
+ item = QueryItem.new(property, text, false)
128
+ @items.push(item)
129
+ else
130
+ @ignored_words.push(text)
131
+ end
132
+ end
133
+
134
+ def add_item(property, text, phrase_p)
135
+ if phrase_p or has_multi_byte_char?(text)
136
+ add_item_for_phrase(property, text)
137
+ else
138
+ add_item_for_single_word(property, text)
139
+ end
140
+ end
141
+
142
+ def has_multi_byte_char?(text)
143
+ /[^\x00-\x7f]/u.match(text)
144
+ end
145
+
146
+ def reset
147
+ @ignored_words = []
148
+ @items = []
149
+ @nwords = 0
150
+ end
151
+
152
+ public
153
+ def path_only?
154
+ @items.empty? and @path and @package.nil?
155
+ end
156
+
157
+ def package_only?
158
+ @items.empty? and @path.nil? and @package
159
+ end
160
+
161
+ def string
162
+ @query_string
163
+ end
164
+
165
+ def simplified_string
166
+ @items.map {|item| item.to_s }.join(" ")
167
+ end
168
+
169
+ def string_without_properties
170
+ @items.map {|item| item.value.to_s }.join(" ")
171
+ end
172
+
173
+ def empty?
174
+ @items.empty? and @package.nil? and @path.nil?
175
+ end
176
+
177
+ def first
178
+ @items.first
179
+ end
180
+
181
+ def last
182
+ @items.last
183
+ end
184
+
185
+ def each
186
+ @items.each {|item| yield(item) }
187
+ end
188
+
189
+ def length
190
+ @items.length
191
+ end
192
+
193
+ def collect
194
+ @items.find_all {|item|
195
+ if block_given?
196
+ yield(item)
197
+ else
198
+ true
199
+ end
200
+ }.map {|item|
201
+ item.value
202
+ }
203
+ end
204
+
205
+ def keywords
206
+ collect {|item| not item.phrase? }
207
+ end
208
+
209
+ def phrases
210
+ collect {|item| item.phrase? }
211
+ end
212
+
213
+ def tokenize_all
214
+ original_items = @items.clone
215
+ original_nwords = @nwords
216
+ reset
217
+ original_items.each {|item|
218
+ if item.phrase?
219
+ value = item.value.join(" ")
220
+ add_item_for_phrase(item.property, value)
221
+ else
222
+ add_item_for_phrase(item.property, item.value)
223
+ end
224
+ }
225
+ make_words
226
+
227
+ modified = if @nwords != original_nwords then true else false end
228
+ return modified
229
+ end
230
+
231
+ def validate
232
+ raise @error if @error
233
+ end
234
+ end
235
+ end