search_do 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +1 -0
- data/MIT-LICENSE +20 -0
- data/README.rdoc +63 -0
- data/Rakefile +47 -0
- data/TESTING +6 -0
- data/VERSION +1 -0
- data/examples/he_search.rb +13 -0
- data/examples/person.rb +20 -0
- data/init.rb +1 -0
- data/lib/estraier_admin.rb +47 -0
- data/lib/search_do/backends/hyper_estraier/estraier_pure_extention.rb +61 -0
- data/lib/search_do/backends/hyper_estraier.rb +213 -0
- data/lib/search_do/backends.rb +17 -0
- data/lib/search_do/dirty_tracking/bridge.rb +22 -0
- data/lib/search_do/dirty_tracking/self_made.rb +36 -0
- data/lib/search_do/dirty_tracking.rb +15 -0
- data/lib/search_do/indexer.rb +65 -0
- data/lib/search_do/utils.rb +11 -0
- data/lib/search_do.rb +330 -0
- data/lib/vendor/estraierpure.rb +1025 -0
- data/lib/vendor/overview +100 -0
- data/recipes/mode_maintenance.rb +52 -0
- data/spec/backends/hyper_estraier_spec.rb +220 -0
- data/spec/backends/result_document_spec.rb +26 -0
- data/spec/dirty_tracking/bridge_spec.rb +33 -0
- data/spec/estraier_admin_spec.rb +26 -0
- data/spec/fixtures/stories.yml +27 -0
- data/spec/indexer_spec.rb +59 -0
- data/spec/search_do_spec.rb +335 -0
- data/spec/setup_test_model.rb +38 -0
- data/spec/spec_helper.rb +52 -0
- data/tasks/acts_as_searchable_tasks.rake +70 -0
- metadata +95 -0
@@ -0,0 +1,1025 @@
|
|
1
|
+
#--
|
2
|
+
# Ruby interface of Hyper Estraier
|
3
|
+
# Copyright (C) 2004-2006 Mikio Hirabayashi
|
4
|
+
# All rights reserved.
|
5
|
+
# This file is part of Hyper Estraier.
|
6
|
+
# Redistribution and use in source and binary forms, with or without modification, are
|
7
|
+
# permitted provided that the following conditions are met:
|
8
|
+
#
|
9
|
+
# * Redistributions of source code must retain the above copyright notice, this list of
|
10
|
+
# conditions and the following disclaimer.
|
11
|
+
# * Redistributions in binary form must reproduce the above copyright notice, this list of
|
12
|
+
# conditions and the following disclaimer in the documentation and/or other materials
|
13
|
+
# provided with the distribution.
|
14
|
+
# * Neither the name of Mikio Hirabayashi nor the names of its contributors may be used to
|
15
|
+
# endorse or promote products derived from this software without specific prior written
|
16
|
+
# permission.
|
17
|
+
#
|
18
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
19
|
+
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
20
|
+
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
21
|
+
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
22
|
+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
23
|
+
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
24
|
+
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
25
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
26
|
+
# OF THE POSSIBILITY OF SUCH DAMAGE.
|
27
|
+
#++
|
28
|
+
#:include:overview
|
29
|
+
|
30
|
+
|
31
|
+
require "uri"
|
32
|
+
require "socket"
|
33
|
+
require "stringio"
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
#
|
38
|
+
# Module for the namespace of Hyper Estraier
|
39
|
+
#
|
40
|
+
module EstraierPure
|
41
|
+
#----------------------------------------------------------------
|
42
|
+
#++ Abstraction of document.
|
43
|
+
#----------------------------------------------------------------
|
44
|
+
class Document
|
45
|
+
#--------------------------------
|
46
|
+
# public methods
|
47
|
+
#--------------------------------
|
48
|
+
public
|
49
|
+
# Add an attribute.
|
50
|
+
# `name' specifies the name of an attribute.
|
51
|
+
# `value' specifies the value of the attribute. If it is `nil', the attribute is removed.
|
52
|
+
# The return value is always `nil'.
|
53
|
+
def add_attr(name, value)
|
54
|
+
Utility::check_types({ name=>String, value=>String }) if $DEBUG
|
55
|
+
name = name.gsub(/[ \t\r\n\v\f]+/, " ")
|
56
|
+
name = name.strip.squeeze(" ")
|
57
|
+
value = value.gsub(/[ \t\r\n\v\f]+/, " ")
|
58
|
+
value = value.strip.squeeze(" ")
|
59
|
+
@attrs[name] = value
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
# Add a sentence of text.
|
63
|
+
# `text' specifies a sentence of text.
|
64
|
+
# The return value is always `nil'.
|
65
|
+
def add_text(text)
|
66
|
+
Utility::check_types({ text=>String }) if $DEBUG
|
67
|
+
text = text.gsub(/[ \t\r\n\v\f]+/, " ")
|
68
|
+
text = text.strip.squeeze(" ")
|
69
|
+
@dtexts.push(text) if text.length
|
70
|
+
nil
|
71
|
+
end
|
72
|
+
# Add a hidden sentence.
|
73
|
+
# `text' specifies a hidden sentence.
|
74
|
+
# The return value is always `nil'.
|
75
|
+
def add_hidden_text(text)
|
76
|
+
Utility::check_types({ text=>String }) if $DEBUG
|
77
|
+
text = text.gsub(/[ \t\r\n\v\f]+/, " ")
|
78
|
+
text = text.strip.squeeze(" ")
|
79
|
+
@htexts.push(text) if text.length
|
80
|
+
nil
|
81
|
+
end
|
82
|
+
# Attache keywords.
|
83
|
+
# `kwords' specifies a map object of keywords. Keys of the map should be keywords of the
|
84
|
+
# document and values should be their scores in decimal string.
|
85
|
+
# The return value is always `nil'.
|
86
|
+
def set_keywords(kwords)
|
87
|
+
Utility::check_types({ kwords=>Hash }) if $DEBUG
|
88
|
+
@kwords = kwords
|
89
|
+
end
|
90
|
+
# Get the ID number.
|
91
|
+
# The return value is the ID number of the document object. If the object has never been
|
92
|
+
# registered, -1 is returned.
|
93
|
+
def id()
|
94
|
+
@id
|
95
|
+
end
|
96
|
+
# Get a list of attribute names of a document object.
|
97
|
+
# The return value is a list object of attribute names.
|
98
|
+
def attr_names()
|
99
|
+
@attrs.keys.sort
|
100
|
+
end
|
101
|
+
# Get the value of an attribute.
|
102
|
+
# `name' specifies the name of an attribute.
|
103
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
104
|
+
def attr(name)
|
105
|
+
Utility::check_types({ name=>String }) if $DEBUG
|
106
|
+
@attrs[name]
|
107
|
+
end
|
108
|
+
# Get a list of sentences of the text.
|
109
|
+
# The return value is a list object of sentences of the text.
|
110
|
+
def texts()
|
111
|
+
@dtexts
|
112
|
+
end
|
113
|
+
# Concatenate sentences of the text of a document object.
|
114
|
+
# The return value is concatenated sentences.
|
115
|
+
def cat_texts()
|
116
|
+
buf = StringIO::new
|
117
|
+
for i in 0...@dtexts.length
|
118
|
+
buf.write(" ") if i > 0
|
119
|
+
buf.write(@dtexts[i])
|
120
|
+
end
|
121
|
+
buf.string
|
122
|
+
end
|
123
|
+
# Dump draft data of a document object.
|
124
|
+
# The return value is draft data.
|
125
|
+
def dump_draft()
|
126
|
+
buf = StringIO::new
|
127
|
+
keys = @attrs.keys.sort
|
128
|
+
for i in 0...keys.length
|
129
|
+
buf.printf("%s=%s\n", keys[i], @attrs[keys[i]])
|
130
|
+
end
|
131
|
+
if @kwords
|
132
|
+
buf.printf("%%VECTOR");
|
133
|
+
@kwords.each() do |key, value|
|
134
|
+
buf.printf("\t%s\t%s", key, value);
|
135
|
+
end
|
136
|
+
buf.printf("\n");
|
137
|
+
end
|
138
|
+
buf.printf("\n")
|
139
|
+
for i in 0...@dtexts.length
|
140
|
+
buf.printf("%s\n", @dtexts[i])
|
141
|
+
end
|
142
|
+
for i in 0...@htexts.length
|
143
|
+
buf.printf("\t%s\n", @htexts[i])
|
144
|
+
end
|
145
|
+
buf.string
|
146
|
+
end
|
147
|
+
# Get attached keywords.
|
148
|
+
# The return value is a map object of keywords and their scores in decimal string. If no
|
149
|
+
# keyword is attached, `nil' is returned.
|
150
|
+
def keywords()
|
151
|
+
@kwords
|
152
|
+
end
|
153
|
+
#--------------------------------
|
154
|
+
# private methods
|
155
|
+
#--------------------------------
|
156
|
+
private
|
157
|
+
# Create a document object.
|
158
|
+
# `draft' specifies a string of draft data.
|
159
|
+
def initialize(draft = "")
|
160
|
+
Utility::check_types({ draft=>String }) if $DEBUG
|
161
|
+
@id = -1
|
162
|
+
@attrs = {}
|
163
|
+
@dtexts = []
|
164
|
+
@htexts = []
|
165
|
+
@kwords = nil
|
166
|
+
if draft.length
|
167
|
+
lines = draft.split(/\n/)
|
168
|
+
num = 0
|
169
|
+
while num < lines.length
|
170
|
+
line = lines[num]
|
171
|
+
num += 1
|
172
|
+
break if line.length < 1
|
173
|
+
if line =~ /^%/
|
174
|
+
if line =~ /^%VECTOR\t/
|
175
|
+
@kwords = {} unless @kwords
|
176
|
+
fields = line.split(/\t/)
|
177
|
+
i = 1
|
178
|
+
while i < fields.length - 1
|
179
|
+
@kwords[fields[i]] = fields[i+1]
|
180
|
+
i += 2
|
181
|
+
end
|
182
|
+
end
|
183
|
+
next
|
184
|
+
end
|
185
|
+
line = line.gsub(/[ \t\r\n\v\f]+/, " ")
|
186
|
+
line = line.strip.squeeze(" ")
|
187
|
+
if idx = line.index("=")
|
188
|
+
key = line[0...idx]
|
189
|
+
value = line[idx+1...line.length]
|
190
|
+
@attrs[key] = value
|
191
|
+
end
|
192
|
+
end
|
193
|
+
while num < lines.length
|
194
|
+
line = lines[num]
|
195
|
+
next unless line.length
|
196
|
+
if line[0] == 0x9
|
197
|
+
@htexts.push(line[1...line.length]) if line.length > 1
|
198
|
+
else
|
199
|
+
@dtexts.push(line)
|
200
|
+
end
|
201
|
+
num += 1
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
#----------------------------------------------------------------
|
207
|
+
#++ Abstraction of search condition.
|
208
|
+
#----------------------------------------------------------------
|
209
|
+
class Condition
|
210
|
+
#--------------------------------
|
211
|
+
# public constants
|
212
|
+
#--------------------------------
|
213
|
+
public
|
214
|
+
# option: check N-gram keys skipping by three
|
215
|
+
SURE = 1 << 0
|
216
|
+
# option: check N-gram keys skipping by two
|
217
|
+
USUAL = 1 << 1
|
218
|
+
# option: without TF-IDF tuning
|
219
|
+
FAST = 1 << 2
|
220
|
+
# option: with the simplified phrase
|
221
|
+
AGITO = 1 << 3
|
222
|
+
# option: check every N-gram key
|
223
|
+
NOIDF = 1 << 4
|
224
|
+
# option: check N-gram keys skipping by one
|
225
|
+
SIMPLE = 1 << 10
|
226
|
+
#--------------------------------
|
227
|
+
# public methods
|
228
|
+
#--------------------------------
|
229
|
+
public
|
230
|
+
# Set the search phrase.
|
231
|
+
# `phrase' specifies a search phrase.
|
232
|
+
# The return value is always `nil'.
|
233
|
+
def set_phrase(phrase)
|
234
|
+
Utility::check_types({ phrase=>String }) if $DEBUG
|
235
|
+
phrase = phrase.gsub(/[ \t\r\n\v\f]+/, " ")
|
236
|
+
phrase = phrase.strip.squeeze(" ")
|
237
|
+
@phrase = phrase
|
238
|
+
nil
|
239
|
+
end
|
240
|
+
# Add an expression for an attribute.
|
241
|
+
# `expr' specifies an expression for an attribute.
|
242
|
+
# The return value is always `nil'.
|
243
|
+
def add_attr(expr)
|
244
|
+
Utility::check_types({ expr=>String }) if $DEBUG
|
245
|
+
expr = expr.gsub(/[ \t\r\n\v\f]+/, " ")
|
246
|
+
expr = expr.strip.squeeze(" ")
|
247
|
+
@attrs.push(expr)
|
248
|
+
nil
|
249
|
+
end
|
250
|
+
# Set the order of a condition object.
|
251
|
+
# `expr' specifies an expression for the order. By default, the order is by score descending.
|
252
|
+
# The return value is always `nil'.
|
253
|
+
def set_order(expr)
|
254
|
+
Utility::check_types({ expr=>String }) if $DEBUG
|
255
|
+
expr = expr.gsub(/[ \t\r\n\v\f]+/, " ")
|
256
|
+
expr = expr.strip.squeeze(" ")
|
257
|
+
@order = expr
|
258
|
+
nil
|
259
|
+
end
|
260
|
+
# Set the maximum number of retrieval.
|
261
|
+
# `max' specifies the maximum number of retrieval. By default, the number of retrieval is
|
262
|
+
# not limited.
|
263
|
+
# The return value is always `nil'.
|
264
|
+
def set_max(max)
|
265
|
+
Utility::check_types({ max=>Integer }) if $DEBUG
|
266
|
+
@max = max if(max >= 0)
|
267
|
+
nil
|
268
|
+
end
|
269
|
+
# Set the number of skipped documents.
|
270
|
+
# `skip' specifies the number of documents to be skipped in the search result.
|
271
|
+
# The return value is always `nil'.
|
272
|
+
def set_skip(skip)
|
273
|
+
Utility::check_types({ skip=>Integer }) if $DEBUG
|
274
|
+
@skip = skip if(skip >= 0)
|
275
|
+
nil
|
276
|
+
end
|
277
|
+
# Set options of retrieval.
|
278
|
+
# `options' specifies options: `Condition::SURE' specifies that it checks every N-gram
|
279
|
+
# key, `Condition::USU', which is the default, specifies that it checks N-gram keys
|
280
|
+
# with skipping one key, `Condition::FAST' skips two keys, `Condition::AGITO'
|
281
|
+
# skips three keys, `Condition::NOIDF' specifies not to perform TF-IDF tuning,
|
282
|
+
# `Condition::SIMPLE' specifies to use simplified phrase. Each option can be specified at
|
283
|
+
# the same time by bitwise or. If keys are skipped, though search speed is improved, the
|
284
|
+
# relevance ratio grows less.
|
285
|
+
# The return value is always `nil'.
|
286
|
+
def set_options(options)
|
287
|
+
Utility::check_types({ options=>Integer }) if $DEBUG
|
288
|
+
@options |= options
|
289
|
+
nil
|
290
|
+
end
|
291
|
+
# Get the search phrase.
|
292
|
+
# The return value is the search phrase.
|
293
|
+
def phrase()
|
294
|
+
@phrase
|
295
|
+
end
|
296
|
+
# Get expressions for attributes.
|
297
|
+
# The return value is expressions for attributes.
|
298
|
+
def attrs()
|
299
|
+
@attrs
|
300
|
+
end
|
301
|
+
# Get the order expression.
|
302
|
+
# The return value is the order expression.
|
303
|
+
def order()
|
304
|
+
@order
|
305
|
+
end
|
306
|
+
# Get the maximum number of retrieval.
|
307
|
+
# The return value is the maximum number of retrieval.
|
308
|
+
def max()
|
309
|
+
@max
|
310
|
+
end
|
311
|
+
# Get the number of skipped documents.
|
312
|
+
# The return value is the number of documents to be skipped in the search result.
|
313
|
+
def skip()
|
314
|
+
@skip
|
315
|
+
end
|
316
|
+
# Get options of retrieval.
|
317
|
+
# The return value is options by bitwise or.
|
318
|
+
def options()
|
319
|
+
@options
|
320
|
+
end
|
321
|
+
#--------------------------------
|
322
|
+
# private methods
|
323
|
+
#--------------------------------
|
324
|
+
private
|
325
|
+
# Create a search condition object.
|
326
|
+
def initialize()
|
327
|
+
@phrase = nil
|
328
|
+
@attrs = []
|
329
|
+
@order = nil
|
330
|
+
@max = -1
|
331
|
+
@skip = 0
|
332
|
+
@options = 0
|
333
|
+
end
|
334
|
+
end
|
335
|
+
#----------------------------------------------------------------
|
336
|
+
#++ Abstraction of document in result set.
|
337
|
+
#----------------------------------------------------------------
|
338
|
+
class ResultDocument
|
339
|
+
#--------------------------------
|
340
|
+
# public methods
|
341
|
+
#--------------------------------
|
342
|
+
public
|
343
|
+
# Get the URI.
|
344
|
+
# The return value is the URI of the result document object.
|
345
|
+
def uri()
|
346
|
+
@uri
|
347
|
+
end
|
348
|
+
# Get a list of attribute names.
|
349
|
+
# The return value is a list object of attribute names.
|
350
|
+
def attr_names()
|
351
|
+
@attrs.keys.sort
|
352
|
+
end
|
353
|
+
# Get the value of an attribute.
|
354
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
355
|
+
def attr(name)
|
356
|
+
Utility::check_types({ name=>String }) if $DEBUG
|
357
|
+
@attrs[name]
|
358
|
+
end
|
359
|
+
# Get the snippet of a result document object.
|
360
|
+
# The return value is a string of the snippet of the result document object. There are tab
|
361
|
+
# separated values. Each line is a string to be shown. Though most lines have only one
|
362
|
+
# field, some lines have two fields. If the second field exists, the first field is to be
|
363
|
+
# shown with highlighted, and the second field means its normalized form.
|
364
|
+
def snippet()
|
365
|
+
@snippet
|
366
|
+
end
|
367
|
+
# Get keywords.
|
368
|
+
# The return value is a string of serialized keywords of the result document object. There
|
369
|
+
# are tab separated values. Keywords and their scores come alternately.
|
370
|
+
def keywords()
|
371
|
+
@keywords
|
372
|
+
end
|
373
|
+
#--------------------------------
|
374
|
+
# private methods
|
375
|
+
#--------------------------------
|
376
|
+
private
|
377
|
+
# Create a result document object.
|
378
|
+
def initialize(uri, attrs, snippet, keywords)
|
379
|
+
Utility::check_types({ uri=>String, attrs=>Hash,
|
380
|
+
snippet=>String, keywords=>String }) if $DEBUG
|
381
|
+
@uri = uri
|
382
|
+
@attrs = attrs
|
383
|
+
@snippet = snippet
|
384
|
+
@keywords = keywords
|
385
|
+
end
|
386
|
+
end
|
387
|
+
#----------------------------------------------------------------
|
388
|
+
#++ Abstraction of result set from node.
|
389
|
+
#----------------------------------------------------------------
|
390
|
+
class NodeResult
|
391
|
+
#--------------------------------
|
392
|
+
# public methods
|
393
|
+
#--------------------------------
|
394
|
+
public
|
395
|
+
# Get the number of documents.
|
396
|
+
# The return value is the number of documents.
|
397
|
+
def doc_num()
|
398
|
+
@docs.length
|
399
|
+
end
|
400
|
+
# Get the value of hint information.
|
401
|
+
# The return value is a result document object or `nil' if the index is out of bounds.
|
402
|
+
def get_doc(index)
|
403
|
+
Utility::check_types({ index=>Integer }) if $DEBUG
|
404
|
+
return nil if index < 0 || index >= @docs.length
|
405
|
+
@docs[index]
|
406
|
+
end
|
407
|
+
# Get the value of hint information.
|
408
|
+
# `key' specifies the key of a hint. "VERSION", "NODE", "HIT", "HINT#n", "DOCNUM", "WORDNUM",
|
409
|
+
# "TIME", "LINK#n", and "VIEW" are provided for keys.
|
410
|
+
# The return value is the hint or `nil' if the key does not exist.
|
411
|
+
def hint(key)
|
412
|
+
Utility::check_types({ key=>String }) if $DEBUG
|
413
|
+
@hints[key]
|
414
|
+
end
|
415
|
+
#--------------------------------
|
416
|
+
# private methods
|
417
|
+
#--------------------------------
|
418
|
+
private
|
419
|
+
# Create a node result object.
|
420
|
+
def initialize(docs, hints)
|
421
|
+
Utility::check_types({ docs=>Array, hints=>Hash }) if $DEBUG
|
422
|
+
@docs = docs
|
423
|
+
@hints = hints
|
424
|
+
end
|
425
|
+
end
|
426
|
+
#----------------------------------------------------------------
|
427
|
+
#++ Abstraction of connection to P2P node.
|
428
|
+
#----------------------------------------------------------------
|
429
|
+
class Node
|
430
|
+
#--------------------------------
|
431
|
+
# public methods
|
432
|
+
#--------------------------------
|
433
|
+
public
|
434
|
+
# Set the URL of a node server.
|
435
|
+
# `url' specifies the URL of a node.
|
436
|
+
# The return value is always `nil'.
|
437
|
+
def set_url(url)
|
438
|
+
Utility::check_types({ url=>String }) if $DEBUG
|
439
|
+
@url = url
|
440
|
+
nil
|
441
|
+
end
|
442
|
+
# Set the proxy information.
|
443
|
+
# `host' specifies the host name of a proxy server.
|
444
|
+
# `port' specifies the port number of the proxy server.
|
445
|
+
# The return value is always `nil'.
|
446
|
+
def set_proxy(host, port)
|
447
|
+
Utility::check_types({ host=>String, port=>Integer }) if $DEBUG
|
448
|
+
@pxhost = host
|
449
|
+
@pxport = port
|
450
|
+
nil
|
451
|
+
end
|
452
|
+
# Set timeout of a connection.
|
453
|
+
# `sec' specifies timeout of the connection in seconds.
|
454
|
+
# The return value is always `nil'.
|
455
|
+
def set_timeout(sec)
|
456
|
+
Utility::check_types({ sec=>Integer }) if $DEBUG
|
457
|
+
@timeout = sec
|
458
|
+
nil
|
459
|
+
end
|
460
|
+
# Set the authentication information.
|
461
|
+
# `name' specifies the name of authentication.
|
462
|
+
# `passwd' specifies the password of the authentication.
|
463
|
+
# The return value is always `nil'.
|
464
|
+
def set_auth(name, password)
|
465
|
+
Utility::check_types({ name=>String, password=>String }) if $DEBUG
|
466
|
+
@auth = name + ":" + password
|
467
|
+
nil
|
468
|
+
end
|
469
|
+
# Get the status code of the last request.
|
470
|
+
# The return value is the status code of the last request. -1 means failure of connection.
|
471
|
+
def status()
|
472
|
+
@status
|
473
|
+
end
|
474
|
+
# Add a document.
|
475
|
+
# `doc' specifies a document object. The document object should have the URI attribute.
|
476
|
+
# The return value is true if success, else it is false.
|
477
|
+
def put_doc(doc)
|
478
|
+
Utility::check_types({ doc=>Document }) if $DEBUG
|
479
|
+
@status = -1
|
480
|
+
return false if !@url
|
481
|
+
turl = @url + "/put_doc"
|
482
|
+
reqheads = [ "Content-Type: text/x-estraier-draft" ]
|
483
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
484
|
+
reqbody = doc.dump_draft
|
485
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
486
|
+
@status = rv
|
487
|
+
rv == 200
|
488
|
+
end
|
489
|
+
# Remove a document.
|
490
|
+
# `id' specifies the ID number of a registered document.
|
491
|
+
# The return value is true if success, else it is false.
|
492
|
+
def out_doc(id)
|
493
|
+
Utility::check_types({ id=>Integer }) if $DEBUG
|
494
|
+
@status = -1
|
495
|
+
return false if !@url
|
496
|
+
turl = @url + "/out_doc"
|
497
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
498
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
499
|
+
reqbody = "id=" + id.to_s
|
500
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
501
|
+
@status = rv
|
502
|
+
rv == 200
|
503
|
+
end
|
504
|
+
# Remove a document specified by URI.
|
505
|
+
# `uri' specifies the URI of a registered document.
|
506
|
+
# The return value is true if success, else it is false.
|
507
|
+
def out_doc_by_uri(uri)
|
508
|
+
Utility::check_types({ uri=>String }) if $DEBUG
|
509
|
+
@status = -1
|
510
|
+
return false if !@url
|
511
|
+
turl = @url + "/out_doc"
|
512
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
513
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
514
|
+
reqbody = "uri=" + URI::encode(uri)
|
515
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
516
|
+
@status = rv
|
517
|
+
rv == 200
|
518
|
+
end
|
519
|
+
# Edit attributes of a document.
|
520
|
+
# `doc' specifies a document object.
|
521
|
+
# The return value is true if success, else it is false.
|
522
|
+
def edit_doc(doc)
|
523
|
+
Utility::check_types({ doc=>Document }) if $DEBUG
|
524
|
+
@status = -1
|
525
|
+
return false if !@url
|
526
|
+
turl = @url + "/edit_doc"
|
527
|
+
reqheads = [ "Content-Type: text/x-estraier-draft" ]
|
528
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
529
|
+
reqbody = doc.dump_draft
|
530
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
531
|
+
@status = rv
|
532
|
+
rv == 200
|
533
|
+
end
|
534
|
+
# Retrieve a document.
|
535
|
+
# `id' specifies the ID number of a registered document.
|
536
|
+
# The return value is a document object. On error, `nil' is returned.
|
537
|
+
def get_doc(id)
|
538
|
+
Utility::check_types({ id=>Integer }) if $DEBUG
|
539
|
+
@status = -1
|
540
|
+
return nil if !@url
|
541
|
+
turl = @url + "/get_doc"
|
542
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
543
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
544
|
+
reqbody = "id=" + id.to_s
|
545
|
+
resbody = StringIO::new
|
546
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
547
|
+
@status = rv
|
548
|
+
return nil if rv != 200
|
549
|
+
Document::new(resbody.string)
|
550
|
+
end
|
551
|
+
# Retrieve a document.
|
552
|
+
# `uri' specifies the URI of a registered document.
|
553
|
+
# The return value is a document object. On error, `nil' is returned.
|
554
|
+
def get_doc_by_uri(uri)
|
555
|
+
Utility::check_types({ uri=>String }) if $DEBUG
|
556
|
+
@status = -1
|
557
|
+
return nil if !@url
|
558
|
+
turl = @url + "/get_doc"
|
559
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
560
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
561
|
+
reqbody = "uri=" + URI::encode(uri)
|
562
|
+
resbody = StringIO::new
|
563
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
564
|
+
@status = rv
|
565
|
+
return nil if rv != 200
|
566
|
+
Document::new(resbody.string)
|
567
|
+
end
|
568
|
+
# Retrieve the value of an attribute of a document.
|
569
|
+
# `id' specifies the ID number of a registered document.
|
570
|
+
# `name' specifies the name of an attribute.
|
571
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
572
|
+
def get_doc_attr(id, name)
|
573
|
+
Utility::check_types({ id=>Integer, name=>String }) if $DEBUG
|
574
|
+
@status = -1
|
575
|
+
return nil if !@url
|
576
|
+
turl = @url + "/get_doc_attr"
|
577
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
578
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
579
|
+
reqbody = "id=" + id.to_s + "&attr=" + URI::encode(name)
|
580
|
+
resbody = StringIO::new
|
581
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
582
|
+
@status = rv
|
583
|
+
return nil if rv != 200
|
584
|
+
resbody.string.chomp
|
585
|
+
end
|
586
|
+
# Retrieve the value of an attribute of a document specified by URI.
|
587
|
+
# `uri' specifies the URI of a registered document.
|
588
|
+
# `name' specifies the name of an attribute.
|
589
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
590
|
+
def get_doc_attr_by_uri(uri, name)
|
591
|
+
Utility::check_types({ uri=>String, name=>String }) if $DEBUG
|
592
|
+
@status = -1
|
593
|
+
return nil if !@url
|
594
|
+
turl = @url + "/get_doc_attr"
|
595
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
596
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
597
|
+
reqbody = "uri=" + URI::encode(uri) + "&attr=" + URI::encode(name)
|
598
|
+
resbody = StringIO::new
|
599
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
600
|
+
@status = rv
|
601
|
+
return nil if rv != 200
|
602
|
+
resbody.string.chomp
|
603
|
+
end
|
604
|
+
# Extract keywords of a document.
|
605
|
+
# `id' specifies the ID number of a registered document.
|
606
|
+
# The return value is a hash object of keywords and their scores in decimal string or `nil'
|
607
|
+
# on error.
|
608
|
+
def etch_doc(id)
|
609
|
+
Utility::check_types({ id=>Integer }) if $DEBUG
|
610
|
+
@status = -1
|
611
|
+
return nil if !@url
|
612
|
+
turl = @url + "/etch_doc"
|
613
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
614
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
615
|
+
reqbody = "id=" + id.to_s
|
616
|
+
resbody = StringIO::new
|
617
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
618
|
+
@status = rv
|
619
|
+
return nil if rv != 200
|
620
|
+
kwords = {}
|
621
|
+
lines = resbody.string.split(/\n/)
|
622
|
+
for i in 0...lines.length
|
623
|
+
pair = lines[i].split(/\t/)
|
624
|
+
next if pair.length < 2
|
625
|
+
kwords[pair[0]] = pair[1]
|
626
|
+
end
|
627
|
+
kwords
|
628
|
+
end
|
629
|
+
# Extract keywords of a document specified by URI.
|
630
|
+
# `uri' specifies the URI of a registered document.
|
631
|
+
# The return value is a hash object of keywords and their scores in decimal string or `nil'
|
632
|
+
# on error.
|
633
|
+
def etch_doc_by_uri(uri)
|
634
|
+
Utility::check_types({ uri=>String }) if $DEBUG
|
635
|
+
@status = -1
|
636
|
+
return nil if !@url
|
637
|
+
turl = @url + "/etch_doc"
|
638
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
639
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
640
|
+
reqbody = "uri=" + URI::encode(uri);
|
641
|
+
resbody = StringIO::new
|
642
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
643
|
+
@status = rv
|
644
|
+
return nil if rv != 200
|
645
|
+
kwords = {}
|
646
|
+
lines = resbody.string.split(/\n/)
|
647
|
+
for i in 0...lines.length
|
648
|
+
pair = lines[i].split(/\t/)
|
649
|
+
next if pair.length < 2
|
650
|
+
kwords[pair[0]] = pair[1]
|
651
|
+
end
|
652
|
+
kwords
|
653
|
+
end
|
654
|
+
# Get the ID of a document specified by URI.
|
655
|
+
# `uri' specifies the URI of a registered document.
|
656
|
+
# The return value is the ID of the document. On error, -1 is returned.
|
657
|
+
def uri_to_id(uri)
|
658
|
+
Utility::check_types({ uri=>String }) if $DEBUG
|
659
|
+
@status = -1
|
660
|
+
return -1 if !@url
|
661
|
+
turl = @url + "/uri_to_id"
|
662
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
663
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
664
|
+
reqbody = "uri=" + URI::encode(uri)
|
665
|
+
resbody = StringIO::new
|
666
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
667
|
+
@status = rv
|
668
|
+
return nil if rv != 200
|
669
|
+
resbody.string.chomp
|
670
|
+
end
|
671
|
+
# Get the name.
|
672
|
+
# The return value is the name. On error, `nil' is returned.
|
673
|
+
def name()
|
674
|
+
set_info if !@name
|
675
|
+
@name
|
676
|
+
end
|
677
|
+
# Get the label.
|
678
|
+
# The return value is the label. On error, `nil' is returned.
|
679
|
+
def label()
|
680
|
+
set_info if !@label
|
681
|
+
@label
|
682
|
+
end
|
683
|
+
# Get the number of documents.
|
684
|
+
# The return value is the number of documents. On error, -1 is returned.
|
685
|
+
def doc_num()
|
686
|
+
set_info if @dnum < 0
|
687
|
+
@dnum
|
688
|
+
end
|
689
|
+
# Get the number of unique words.
|
690
|
+
# The return value is the number of unique words. On error, -1 is returned.
|
691
|
+
def word_num()
|
692
|
+
set_info if @wnum < 0
|
693
|
+
@wnum
|
694
|
+
end
|
695
|
+
# Get the size of the datbase.
|
696
|
+
# The return value is the size of the datbase. On error, -1.0 is returned.
|
697
|
+
def size()
|
698
|
+
set_info if @size < 0.0
|
699
|
+
@size
|
700
|
+
end
|
701
|
+
# Search documents corresponding a condition.
|
702
|
+
# `cond' specifies a condition object.
|
703
|
+
# `depth' specifies the depth of meta search.
|
704
|
+
# The return value is a node result object. On error, `nil' is returned.
|
705
|
+
def search(cond, depth)
|
706
|
+
Utility::check_types({ cond=>Condition, depth=>Integer }) if $DEBUG
|
707
|
+
@status = -1
|
708
|
+
return nil if !@url
|
709
|
+
turl = @url + "/search"
|
710
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
711
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
712
|
+
reqbody = Utility::cond_to_query(cond, depth, @wwidth, @hwidth, @awidth)
|
713
|
+
resbody = StringIO::new
|
714
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
715
|
+
@status = rv
|
716
|
+
return nil if rv != 200
|
717
|
+
lines = resbody.string.split(/\n/)
|
718
|
+
return nil if lines.length < 1
|
719
|
+
docs = []
|
720
|
+
hints = {}
|
721
|
+
nres = NodeResult::new(docs, hints)
|
722
|
+
border = lines[0]
|
723
|
+
isend = false
|
724
|
+
lnum = 1
|
725
|
+
while lnum < lines.length
|
726
|
+
line = lines[lnum]
|
727
|
+
lnum += 1
|
728
|
+
if line.length >= border.length && line.index(border) == 0
|
729
|
+
isend = true if line[border.length...line.length] == ":END"
|
730
|
+
break
|
731
|
+
end
|
732
|
+
lidx = line.index("\t")
|
733
|
+
if lidx
|
734
|
+
key = line[0...lidx]
|
735
|
+
value = line[(lidx+1)...line.length]
|
736
|
+
hints[key] = value
|
737
|
+
end
|
738
|
+
end
|
739
|
+
snum = lnum
|
740
|
+
while !isend && lnum < lines.length
|
741
|
+
line = lines[lnum]
|
742
|
+
lnum += 1
|
743
|
+
if line.length >= border.length && line.index(border) == 0
|
744
|
+
if lnum > snum
|
745
|
+
rdattrs = {}
|
746
|
+
sb = StringIO::new
|
747
|
+
rdvector = ""
|
748
|
+
rlnum = snum
|
749
|
+
while rlnum < lnum - 1
|
750
|
+
rdline = lines[rlnum].strip
|
751
|
+
rlnum += 1
|
752
|
+
break if rdline.length < 1
|
753
|
+
if rdline =~ /^%/
|
754
|
+
lidx = rdline.index("\t")
|
755
|
+
rdvector = rdline[(lidx+1)...rdline.length] if rdline =~ /%VECTOR/ && lidx
|
756
|
+
else
|
757
|
+
lidx = rdline.index("=")
|
758
|
+
if lidx
|
759
|
+
key = rdline[0...lidx]
|
760
|
+
value = rdline[(lidx+1)...rdline.length]
|
761
|
+
rdattrs[key] = value
|
762
|
+
end
|
763
|
+
end
|
764
|
+
end
|
765
|
+
while rlnum < lnum - 1
|
766
|
+
rdline = lines[rlnum]
|
767
|
+
rlnum += 1
|
768
|
+
sb.printf("%s\n", rdline)
|
769
|
+
end
|
770
|
+
rduri = rdattrs["@uri"]
|
771
|
+
rdsnippet = sb.string
|
772
|
+
if rduri
|
773
|
+
rdoc = ResultDocument::new(rduri, rdattrs, rdsnippet, rdvector)
|
774
|
+
docs.push(rdoc)
|
775
|
+
end
|
776
|
+
end
|
777
|
+
snum = lnum
|
778
|
+
isend = true if line[border.length...line.length] == ":END"
|
779
|
+
end
|
780
|
+
end
|
781
|
+
return nil if !isend
|
782
|
+
return nres
|
783
|
+
end
|
784
|
+
# Set width of snippet in the result.
|
785
|
+
# `wwidth' specifies whole width of a snippet. By default, it is 480. If it is 0, no
|
786
|
+
# snippet is sent. If it is negative, whole body text is sent instead of snippet.
|
787
|
+
# `hwidth' specifies width of strings picked up from the beginning of the text. By default,
|
788
|
+
# it is 96. If it is negative 0, the current setting is not changed.
|
789
|
+
# `awidth' specifies width of strings picked up around each highlighted word. By default,
|
790
|
+
# it is 96. If it is negative, the current setting is not changed.
|
791
|
+
def set_snippet_width(wwidth, hwidth, awidth)
|
792
|
+
@wwidth = wwidth;
|
793
|
+
@hwidth = hwidth if hwidth >= 0
|
794
|
+
@awidth = awidth if awidth >= 0
|
795
|
+
end
|
796
|
+
# Manage a user account of a node.
|
797
|
+
# `name' specifies the name of a user.
|
798
|
+
# `mode' specifies the operation mode. 0 means to delete the account. 1 means to set the
|
799
|
+
# account as an administrator. 2 means to set the account as a guest.
|
800
|
+
# The return value is true if success, else it is false.
|
801
|
+
def set_user(name, mode)
|
802
|
+
Utility::check_types({ name=>String, mode=>Integer }) if $DEBUG
|
803
|
+
@status = -1
|
804
|
+
return false if !@url
|
805
|
+
turl = @url + "/_set_user"
|
806
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
807
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
808
|
+
reqbody = "name=" + URI::encode(name) + "&mode=" + mode.to_s
|
809
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
810
|
+
@status = rv
|
811
|
+
rv == 200
|
812
|
+
end
|
813
|
+
# Manage a link of a node.
|
814
|
+
# `url' specifies the URL of the target node of a link.
|
815
|
+
# `label' specifies the label of the link.
|
816
|
+
# `credit' specifies the credit of the link. If it is negative, the link is removed.
|
817
|
+
# The return value is true if success, else it is false.
|
818
|
+
def set_link(url, label, credit)
|
819
|
+
Utility::check_types({ url=>String, label=>String, credit=>Integer }) if $DEBUG
|
820
|
+
@status = -1
|
821
|
+
return false if !@url
|
822
|
+
turl = @url + "/_set_link"
|
823
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
824
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
825
|
+
reqbody = "url=" + URI::encode(url) + "&label=" + label
|
826
|
+
reqbody += "&credit=" + credit.to_s if credit >= 0
|
827
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
828
|
+
@status = rv
|
829
|
+
rv == 200
|
830
|
+
end
|
831
|
+
#--------------------------------
|
832
|
+
# private methods
|
833
|
+
#--------------------------------
|
834
|
+
private
|
835
|
+
# Create a node connection object.
|
836
|
+
def initialize()
|
837
|
+
@url = nil
|
838
|
+
@pxhost = nil
|
839
|
+
@pxport = -1
|
840
|
+
@timeout = -1
|
841
|
+
@auth = nil
|
842
|
+
@name = nil
|
843
|
+
@label = nil
|
844
|
+
@dnum = -1
|
845
|
+
@wnum = -1
|
846
|
+
@size = -1.0
|
847
|
+
@wwidth = 480;
|
848
|
+
@hwidth = 96;
|
849
|
+
@awidth = 96;
|
850
|
+
@status = -1
|
851
|
+
end
|
852
|
+
# Set information of the node.
|
853
|
+
def set_info()
|
854
|
+
@status = -1
|
855
|
+
return if !@url
|
856
|
+
turl = @url + "/inform"
|
857
|
+
reqheads = []
|
858
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
859
|
+
resbody = StringIO::new
|
860
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, nil, nil, resbody)
|
861
|
+
@status = rv
|
862
|
+
return if rv != 200
|
863
|
+
lines = resbody.string.chomp.split(/\n/)
|
864
|
+
return if lines.length < 1
|
865
|
+
elems = lines[0].chomp.split(/\t/)
|
866
|
+
return if elems.length != 5
|
867
|
+
@name = elems[0]
|
868
|
+
@label = elems[1]
|
869
|
+
@dnum = elems[2].to_i
|
870
|
+
@wnum = elems[3].to_i
|
871
|
+
@size = elems[4].to_f
|
872
|
+
end
|
873
|
+
end
|
874
|
+
#:stopdoc:
|
875
|
+
#
|
876
|
+
# Module for utility
|
877
|
+
#
|
878
|
+
module Utility
|
879
|
+
public
|
880
|
+
# Check types of arguments
|
881
|
+
# `types' specifies a hash object whose keys are arguments and values are class objects.
|
882
|
+
# If there is a invalid object, an exception is thrown.
|
883
|
+
def check_types(types)
|
884
|
+
i = 0
|
885
|
+
types.each_key do |key|
|
886
|
+
i += 1
|
887
|
+
unless key.kind_of?(types[key]) || key == nil
|
888
|
+
raise ArgumentError::new("Argument#" + i.to_s +
|
889
|
+
" should be a kind of " + types[key].to_s)
|
890
|
+
end
|
891
|
+
end
|
892
|
+
end
|
893
|
+
module_function :check_types
|
894
|
+
# Perform an interaction of a URL.
|
895
|
+
# `url' specifies a URL.
|
896
|
+
# `pxhost' specifies the host name of a proxy. If it is `nil', it is not used.
|
897
|
+
# `pxport' specifies the port number of the proxy.
|
898
|
+
# `outsec' specifies timeout in seconds. If it is negative, it is not used.
|
899
|
+
# `reqheads' specifies a list object of extension headers. If it is `nil', it is not used.
|
900
|
+
# `reqbody' specifies the pointer of the entitiy body of request. If it is `nil', "GET"
|
901
|
+
# method is used.
|
902
|
+
# `resheads' specifies a list object into which headers of response is stored. If it is
|
903
|
+
# `nil' it is not used.
|
904
|
+
# `resbody' specifies stream object into which the entity body of response is stored. If it
|
905
|
+
# is `nil', it is not used.
|
906
|
+
# The return value is the status code of the response or -1 on error.
|
907
|
+
def shuttle_url(url, pxhost, pxport, outsec, reqheads, reqbody, resheads, resbody)
|
908
|
+
begin
|
909
|
+
status = -1
|
910
|
+
th = Thread::start do
|
911
|
+
url = URI::parse(url)
|
912
|
+
url.normalize
|
913
|
+
Thread::current.exit if url.scheme != "http" || !url.host || url.port < 1
|
914
|
+
if pxhost
|
915
|
+
host = pxhost
|
916
|
+
port = pxport
|
917
|
+
query = "http://" + url.host + ":" + url.port.to_s + url.path
|
918
|
+
else
|
919
|
+
host = url.host
|
920
|
+
port = url.port
|
921
|
+
query = url.path
|
922
|
+
end
|
923
|
+
query += "?" + url.query if url.query && !reqbody
|
924
|
+
begin
|
925
|
+
sock = TCPSocket.open(host, port)
|
926
|
+
if reqbody
|
927
|
+
sock.printf("POST " + query + " HTTP/1.0\r\n")
|
928
|
+
else
|
929
|
+
sock.printf("GET " + query + " HTTP/1.0\r\n")
|
930
|
+
end
|
931
|
+
sock.printf("Host: %s:%d\r\n", url.host, url.port)
|
932
|
+
sock.printf("Connection: close\r\n")
|
933
|
+
sock.printf("User-Agent: HyperEstraierForRuby/1.0.0\r\n")
|
934
|
+
if reqheads
|
935
|
+
reqheads.each do |line|
|
936
|
+
sock.printf("%s\r\n", line)
|
937
|
+
end
|
938
|
+
end
|
939
|
+
sock.printf("Content-Length: %d\r\n", reqbody.length) if reqbody
|
940
|
+
sock.printf("\r\n")
|
941
|
+
sock.write(reqbody) if reqbody
|
942
|
+
line = sock.gets.chomp
|
943
|
+
elems = line.split(/ */)
|
944
|
+
Thread::current.exit if elems.length < 3 || !(elems[0] =~ /^HTTP/)
|
945
|
+
status = elems[1].to_i
|
946
|
+
resheads.push(line) if resheads
|
947
|
+
begin
|
948
|
+
line = sock.gets.chomp
|
949
|
+
resheads.push(line) if resheads
|
950
|
+
end while line.length > 0
|
951
|
+
while buf = sock.read(8192)
|
952
|
+
resbody.write(buf) if resbody
|
953
|
+
end
|
954
|
+
ensure
|
955
|
+
sock.close if sock
|
956
|
+
end
|
957
|
+
end
|
958
|
+
if outsec >= 0
|
959
|
+
unless th.join(outsec)
|
960
|
+
th.exit
|
961
|
+
th.join
|
962
|
+
return -1
|
963
|
+
end
|
964
|
+
else
|
965
|
+
th.join
|
966
|
+
end
|
967
|
+
return status
|
968
|
+
rescue
|
969
|
+
return -1
|
970
|
+
end
|
971
|
+
end
|
972
|
+
module_function :shuttle_url
|
973
|
+
# Serialize a condition object into a query string.
|
974
|
+
# `cond' specifies a condition object.
|
975
|
+
# `depth' specifies depth of meta search.
|
976
|
+
# `wwidth' specifies whole width of a snippet.
|
977
|
+
# `hwidth' specifies width of strings picked up from the beginning of the text.
|
978
|
+
# `awidth' specifies width of strings picked up around each highlighted word.
|
979
|
+
# The return value is the serialized string.
|
980
|
+
def cond_to_query(cond, depth, wwidth, hwidth, awidth)
|
981
|
+
buf = StringIO::new
|
982
|
+
if cond.phrase
|
983
|
+
buf.write("&") if buf.length > 0
|
984
|
+
buf.write("phrase=")
|
985
|
+
buf.write(URI::encode(cond.phrase))
|
986
|
+
end
|
987
|
+
for i in 0...cond.attrs.length
|
988
|
+
buf.write("&") if buf.length > 0
|
989
|
+
buf.write("attr" + (i + 1).to_s + "=")
|
990
|
+
buf.write(URI::encode(cond.attrs[i]))
|
991
|
+
end
|
992
|
+
if cond.order
|
993
|
+
buf.write("&") if buf.length > 0
|
994
|
+
buf.write("order=")
|
995
|
+
buf.write(URI::encode(cond.order))
|
996
|
+
end
|
997
|
+
if cond.max > 0
|
998
|
+
buf.write("&") if buf.length > 0
|
999
|
+
buf.write("max=" + cond.max.to_s)
|
1000
|
+
else
|
1001
|
+
buf.write("&") if buf.length > 0
|
1002
|
+
buf.write("max=" + (1 << 30).to_s)
|
1003
|
+
end
|
1004
|
+
buf.write("&options=" + cond.options.to_s) if cond.options > 0
|
1005
|
+
buf.write("&depth=" + depth.to_s) if depth > 0
|
1006
|
+
buf.write("&wwidth=" + wwidth.to_s)
|
1007
|
+
buf.write("&hwidth=" + hwidth.to_s)
|
1008
|
+
buf.write("&awidth=" + awidth.to_s)
|
1009
|
+
buf.write("&skip=" + cond.skip.to_s)
|
1010
|
+
buf.string
|
1011
|
+
end
|
1012
|
+
module_function :cond_to_query
|
1013
|
+
# Encode a byte sequence with Base64 encoding.
|
1014
|
+
# `data' specifyes a string object.
|
1015
|
+
# The return value is the encoded string.
|
1016
|
+
def base_encode(data)
|
1017
|
+
[data].pack("m").gsub(/[ \n]/, "")
|
1018
|
+
end
|
1019
|
+
module_function :base_encode
|
1020
|
+
end
|
1021
|
+
end
|
1022
|
+
|
1023
|
+
|
1024
|
+
|
1025
|
+
# END OF FILE
|