acts_as_searchable 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/MIT-LICENSE +20 -0
- data/README +32 -0
- data/Rakefile +186 -0
- data/TODO +0 -0
- data/init.rb +1 -0
- data/install.rb +1 -0
- data/lib/acts_as_searchable.rb +360 -0
- data/lib/vendor/estraierpure.rb +1025 -0
- data/lib/vendor/overview +100 -0
- data/rdoc/classes/ActiveRecord/Acts/Searchable.html +182 -0
- data/rdoc/classes/ActiveRecord/Acts/Searchable/ActMethods.html +233 -0
- data/rdoc/classes/ActiveRecord/Acts/Searchable/ClassMethods.html +387 -0
- data/rdoc/classes/EstraierPure/Condition.html +619 -0
- data/rdoc/classes/EstraierPure/Document.html +551 -0
- data/rdoc/classes/EstraierPure/Node.html +1172 -0
- data/rdoc/classes/EstraierPure/NodeResult.html +248 -0
- data/rdoc/classes/EstraierPure/ResultDocument.html +307 -0
- data/rdoc/created.rid +1 -0
- data/rdoc/files/README.html +165 -0
- data/rdoc/files/lib/acts_as_searchable_rb.html +140 -0
- data/rdoc/files/lib/vendor/estraierpure_rb.html +253 -0
- data/rdoc/fr_class_index.html +34 -0
- data/rdoc/fr_file_index.html +29 -0
- data/rdoc/fr_method_index.html +97 -0
- data/rdoc/index.html +24 -0
- data/rdoc/rdoc-style.css +208 -0
- data/tasks/acts_as_searchable_tasks.rake +21 -0
- data/test/abstract_unit.rb +34 -0
- data/test/acts_as_searchable_test.rb +153 -0
- data/test/database.yml +3 -0
- data/test/fixtures/article.rb +5 -0
- data/test/fixtures/articles.yml +23 -0
- data/test/fixtures/comment.rb +4 -0
- data/test/fixtures/comments.yml +4 -0
- data/test/schema.rb +14 -0
- metadata +96 -0
@@ -0,0 +1,1025 @@
|
|
1
|
+
#--
|
2
|
+
# Ruby interface of Hyper Estraier
|
3
|
+
# Copyright (C) 2004-2006 Mikio Hirabayashi
|
4
|
+
# All rights reserved.
|
5
|
+
# This file is part of Hyper Estraier.
|
6
|
+
# Redistribution and use in source and binary forms, with or without modification, are
|
7
|
+
# permitted provided that the following conditions are met:
|
8
|
+
#
|
9
|
+
# * Redistributions of source code must retain the above copyright notice, this list of
|
10
|
+
# conditions and the following disclaimer.
|
11
|
+
# * Redistributions in binary form must reproduce the above copyright notice, this list of
|
12
|
+
# conditions and the following disclaimer in the documentation and/or other materials
|
13
|
+
# provided with the distribution.
|
14
|
+
# * Neither the name of Mikio Hirabayashi nor the names of its contributors may be used to
|
15
|
+
# endorse or promote products derived from this software without specific prior written
|
16
|
+
# permission.
|
17
|
+
#
|
18
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
19
|
+
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
20
|
+
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
21
|
+
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
22
|
+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
23
|
+
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
24
|
+
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
25
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
26
|
+
# OF THE POSSIBILITY OF SUCH DAMAGE.
|
27
|
+
#++
|
28
|
+
#:include:overview
|
29
|
+
|
30
|
+
|
31
|
+
require "uri"
|
32
|
+
require "socket"
|
33
|
+
require "stringio"
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
#
|
38
|
+
# Module for the namespace of Hyper Estraier
|
39
|
+
#
|
40
|
+
module EstraierPure
|
41
|
+
#----------------------------------------------------------------
|
42
|
+
#++ Abstraction of document.
|
43
|
+
#----------------------------------------------------------------
|
44
|
+
class Document
|
45
|
+
#--------------------------------
|
46
|
+
# public methods
|
47
|
+
#--------------------------------
|
48
|
+
public
|
49
|
+
# Add an attribute.
|
50
|
+
# `name' specifies the name of an attribute.
|
51
|
+
# `value' specifies the value of the attribute. If it is `nil', the attribute is removed.
|
52
|
+
# The return value is always `nil'.
|
53
|
+
def add_attr(name, value)
|
54
|
+
Utility::check_types({ name=>String, value=>String }) if $DEBUG
|
55
|
+
name = name.gsub(/[ \t\r\n\v\f]+/, " ")
|
56
|
+
name = name.strip.squeeze(" ")
|
57
|
+
value = value.gsub(/[ \t\r\n\v\f]+/, " ")
|
58
|
+
value = value.strip.squeeze(" ")
|
59
|
+
@attrs[name] = value
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
# Add a sentence of text.
|
63
|
+
# `text' specifies a sentence of text.
|
64
|
+
# The return value is always `nil'.
|
65
|
+
def add_text(text)
|
66
|
+
Utility::check_types({ text=>String }) if $DEBUG
|
67
|
+
text = text.gsub(/[ \t\r\n\v\f]+/, " ")
|
68
|
+
text = text.strip.squeeze(" ")
|
69
|
+
@dtexts.push(text) if text.length
|
70
|
+
nil
|
71
|
+
end
|
72
|
+
# Add a hidden sentence.
|
73
|
+
# `text' specifies a hidden sentence.
|
74
|
+
# The return value is always `nil'.
|
75
|
+
def add_hidden_text(text)
|
76
|
+
Utility::check_types({ text=>String }) if $DEBUG
|
77
|
+
text = text.gsub(/[ \t\r\n\v\f]+/, " ")
|
78
|
+
text = text.strip.squeeze(" ")
|
79
|
+
@htexts.push(text) if text.length
|
80
|
+
nil
|
81
|
+
end
|
82
|
+
# Attache keywords.
|
83
|
+
# `kwords' specifies a map object of keywords. Keys of the map should be keywords of the
|
84
|
+
# document and values should be their scores in decimal string.
|
85
|
+
# The return value is always `nil'.
|
86
|
+
def set_keywords(kwords)
|
87
|
+
Utility::check_types({ kwords=>Hash }) if $DEBUG
|
88
|
+
@kwords = kwords
|
89
|
+
end
|
90
|
+
# Get the ID number.
|
91
|
+
# The return value is the ID number of the document object. If the object has never been
|
92
|
+
# registered, -1 is returned.
|
93
|
+
def id()
|
94
|
+
@id
|
95
|
+
end
|
96
|
+
# Get a list of attribute names of a document object.
|
97
|
+
# The return value is a list object of attribute names.
|
98
|
+
def attr_names()
|
99
|
+
@attrs.keys.sort
|
100
|
+
end
|
101
|
+
# Get the value of an attribute.
|
102
|
+
# `name' specifies the name of an attribute.
|
103
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
104
|
+
def attr(name)
|
105
|
+
Utility::check_types({ name=>String }) if $DEBUG
|
106
|
+
@attrs[name]
|
107
|
+
end
|
108
|
+
# Get a list of sentences of the text.
|
109
|
+
# The return value is a list object of sentences of the text.
|
110
|
+
def texts()
|
111
|
+
@dtexts
|
112
|
+
end
|
113
|
+
# Concatenate sentences of the text of a document object.
|
114
|
+
# The return value is concatenated sentences.
|
115
|
+
def cat_texts()
|
116
|
+
buf = StringIO::new
|
117
|
+
for i in 0...@dtexts.length
|
118
|
+
buf.write(" ") if i > 0
|
119
|
+
buf.write(@dtexts[i])
|
120
|
+
end
|
121
|
+
buf.string
|
122
|
+
end
|
123
|
+
# Dump draft data of a document object.
|
124
|
+
# The return value is draft data.
|
125
|
+
def dump_draft()
|
126
|
+
buf = StringIO::new
|
127
|
+
keys = @attrs.keys.sort
|
128
|
+
for i in 0...keys.length
|
129
|
+
buf.printf("%s=%s\n", keys[i], @attrs[keys[i]])
|
130
|
+
end
|
131
|
+
if @kwords
|
132
|
+
buf.printf("%%VECTOR");
|
133
|
+
@kwords.each() do |key, value|
|
134
|
+
buf.printf("\t%s\t%s", key, value);
|
135
|
+
end
|
136
|
+
buf.printf("\n");
|
137
|
+
end
|
138
|
+
buf.printf("\n")
|
139
|
+
for i in 0...@dtexts.length
|
140
|
+
buf.printf("%s\n", @dtexts[i])
|
141
|
+
end
|
142
|
+
for i in 0...@htexts.length
|
143
|
+
buf.printf("\t%s\n", @htexts[i])
|
144
|
+
end
|
145
|
+
buf.string
|
146
|
+
end
|
147
|
+
# Get attached keywords.
|
148
|
+
# The return value is a map object of keywords and their scores in decimal string. If no
|
149
|
+
# keyword is attached, `nil' is returned.
|
150
|
+
def keywords()
|
151
|
+
@kwords
|
152
|
+
end
|
153
|
+
#--------------------------------
|
154
|
+
# private methods
|
155
|
+
#--------------------------------
|
156
|
+
private
|
157
|
+
# Create a document object.
|
158
|
+
# `draft' specifies a string of draft data.
|
159
|
+
def initialize(draft = "")
|
160
|
+
Utility::check_types({ draft=>String }) if $DEBUG
|
161
|
+
@id = -1
|
162
|
+
@attrs = {}
|
163
|
+
@dtexts = []
|
164
|
+
@htexts = []
|
165
|
+
@kwords = nil
|
166
|
+
if draft.length
|
167
|
+
lines = draft.split(/\n/)
|
168
|
+
num = 0
|
169
|
+
while num < lines.length
|
170
|
+
line = lines[num]
|
171
|
+
num += 1
|
172
|
+
break if line.length < 1
|
173
|
+
if line =~ /^%/
|
174
|
+
if line =~ /^%VECTOR\t/
|
175
|
+
@kwords = {} unless @kwords
|
176
|
+
fields = line.split(/\t/)
|
177
|
+
i = 1
|
178
|
+
while i < fields.length - 1
|
179
|
+
@kwords[fields[i]] = fields[i+1]
|
180
|
+
i += 2
|
181
|
+
end
|
182
|
+
end
|
183
|
+
next
|
184
|
+
end
|
185
|
+
line = line.gsub(/[ \t\r\n\v\f]+/, " ")
|
186
|
+
line = line.strip.squeeze(" ")
|
187
|
+
if idx = line.index("=")
|
188
|
+
key = line[0...idx]
|
189
|
+
value = line[idx+1...line.length]
|
190
|
+
@attrs[key] = value
|
191
|
+
end
|
192
|
+
end
|
193
|
+
while num < lines.length
|
194
|
+
line = lines[num]
|
195
|
+
next unless line.length
|
196
|
+
if line[0] == 0x9
|
197
|
+
@htexts.push(line[1...line.length]) if line.length > 1
|
198
|
+
else
|
199
|
+
@dtexts.push(line)
|
200
|
+
end
|
201
|
+
num += 1
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
#----------------------------------------------------------------
|
207
|
+
#++ Abstraction of search condition.
|
208
|
+
#----------------------------------------------------------------
|
209
|
+
class Condition
|
210
|
+
#--------------------------------
|
211
|
+
# public constants
|
212
|
+
#--------------------------------
|
213
|
+
public
|
214
|
+
# option: check N-gram keys skipping by three
|
215
|
+
SURE = 1 << 0
|
216
|
+
# option: check N-gram keys skipping by two
|
217
|
+
USUAL = 1 << 1
|
218
|
+
# option: without TF-IDF tuning
|
219
|
+
FAST = 1 << 2
|
220
|
+
# option: with the simplified phrase
|
221
|
+
AGITO = 1 << 3
|
222
|
+
# option: check every N-gram key
|
223
|
+
NOIDF = 1 << 4
|
224
|
+
# option: check N-gram keys skipping by one
|
225
|
+
SIMPLE = 1 << 10
|
226
|
+
#--------------------------------
|
227
|
+
# public methods
|
228
|
+
#--------------------------------
|
229
|
+
public
|
230
|
+
# Set the search phrase.
|
231
|
+
# `phrase' specifies a search phrase.
|
232
|
+
# The return value is always `nil'.
|
233
|
+
def set_phrase(phrase)
|
234
|
+
Utility::check_types({ phrase=>String }) if $DEBUG
|
235
|
+
phrase = phrase.gsub(/[ \t\r\n\v\f]+/, " ")
|
236
|
+
phrase = phrase.strip.squeeze(" ")
|
237
|
+
@phrase = phrase
|
238
|
+
nil
|
239
|
+
end
|
240
|
+
# Add an expression for an attribute.
|
241
|
+
# `expr' specifies an expression for an attribute.
|
242
|
+
# The return value is always `nil'.
|
243
|
+
def add_attr(expr)
|
244
|
+
Utility::check_types({ expr=>String }) if $DEBUG
|
245
|
+
expr = expr.gsub(/[ \t\r\n\v\f]+/, " ")
|
246
|
+
expr = expr.strip.squeeze(" ")
|
247
|
+
@attrs.push(expr)
|
248
|
+
nil
|
249
|
+
end
|
250
|
+
# Set the order of a condition object.
|
251
|
+
# `expr' specifies an expression for the order. By default, the order is by score descending.
|
252
|
+
# The return value is always `nil'.
|
253
|
+
def set_order(expr)
|
254
|
+
Utility::check_types({ expr=>String }) if $DEBUG
|
255
|
+
expr = expr.gsub(/[ \t\r\n\v\f]+/, " ")
|
256
|
+
expr = expr.strip.squeeze(" ")
|
257
|
+
@order = expr
|
258
|
+
nil
|
259
|
+
end
|
260
|
+
# Set the maximum number of retrieval.
|
261
|
+
# `max' specifies the maximum number of retrieval. By default, the number of retrieval is
|
262
|
+
# not limited.
|
263
|
+
# The return value is always `nil'.
|
264
|
+
def set_max(max)
|
265
|
+
Utility::check_types({ max=>Integer }) if $DEBUG
|
266
|
+
@max = max if(max >= 0)
|
267
|
+
nil
|
268
|
+
end
|
269
|
+
# Set the number of skipped documents.
|
270
|
+
# `skip' specifies the number of documents to be skipped in the search result.
|
271
|
+
# The return value is always `nil'.
|
272
|
+
def set_skip(skip)
|
273
|
+
Utility::check_types({ skip=>Integer }) if $DEBUG
|
274
|
+
@skip = skip if(skip >= 0)
|
275
|
+
nil
|
276
|
+
end
|
277
|
+
# Set options of retrieval.
|
278
|
+
# `options' specifies options: `Condition::SURE' specifies that it checks every N-gram
|
279
|
+
# key, `Condition::USU', which is the default, specifies that it checks N-gram keys
|
280
|
+
# with skipping one key, `Condition::FAST' skips two keys, `Condition::AGITO'
|
281
|
+
# skips three keys, `Condition::NOIDF' specifies not to perform TF-IDF tuning,
|
282
|
+
# `Condition::SIMPLE' specifies to use simplified phrase. Each option can be specified at
|
283
|
+
# the same time by bitwise or. If keys are skipped, though search speed is improved, the
|
284
|
+
# relevance ratio grows less.
|
285
|
+
# The return value is always `nil'.
|
286
|
+
def set_options(options)
|
287
|
+
Utility::check_types({ options=>Integer }) if $DEBUG
|
288
|
+
@options |= options
|
289
|
+
nil
|
290
|
+
end
|
291
|
+
# Get the search phrase.
|
292
|
+
# The return value is the search phrase.
|
293
|
+
def phrase()
|
294
|
+
@phrase
|
295
|
+
end
|
296
|
+
# Get expressions for attributes.
|
297
|
+
# The return value is expressions for attributes.
|
298
|
+
def attrs()
|
299
|
+
@attrs
|
300
|
+
end
|
301
|
+
# Get the order expression.
|
302
|
+
# The return value is the order expression.
|
303
|
+
def order()
|
304
|
+
@order
|
305
|
+
end
|
306
|
+
# Get the maximum number of retrieval.
|
307
|
+
# The return value is the maximum number of retrieval.
|
308
|
+
def max()
|
309
|
+
@max
|
310
|
+
end
|
311
|
+
# Get the number of skipped documents.
|
312
|
+
# The return value is the number of documents to be skipped in the search result.
|
313
|
+
def skip()
|
314
|
+
@skip
|
315
|
+
end
|
316
|
+
# Get options of retrieval.
|
317
|
+
# The return value is options by bitwise or.
|
318
|
+
def options()
|
319
|
+
@options
|
320
|
+
end
|
321
|
+
#--------------------------------
|
322
|
+
# private methods
|
323
|
+
#--------------------------------
|
324
|
+
private
|
325
|
+
# Create a search condition object.
|
326
|
+
def initialize()
|
327
|
+
@phrase = nil
|
328
|
+
@attrs = []
|
329
|
+
@order = nil
|
330
|
+
@max = -1
|
331
|
+
@skip = 0
|
332
|
+
@options = 0
|
333
|
+
end
|
334
|
+
end
|
335
|
+
#----------------------------------------------------------------
|
336
|
+
#++ Abstraction of document in result set.
|
337
|
+
#----------------------------------------------------------------
|
338
|
+
class ResultDocument
|
339
|
+
#--------------------------------
|
340
|
+
# public methods
|
341
|
+
#--------------------------------
|
342
|
+
public
|
343
|
+
# Get the URI.
|
344
|
+
# The return value is the URI of the result document object.
|
345
|
+
def uri()
|
346
|
+
@uri
|
347
|
+
end
|
348
|
+
# Get a list of attribute names.
|
349
|
+
# The return value is a list object of attribute names.
|
350
|
+
def attr_names()
|
351
|
+
@attrs.keys.sort
|
352
|
+
end
|
353
|
+
# Get the value of an attribute.
|
354
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
355
|
+
def attr(name)
|
356
|
+
Utility::check_types({ name=>String }) if $DEBUG
|
357
|
+
@attrs[name]
|
358
|
+
end
|
359
|
+
# Get the snippet of a result document object.
|
360
|
+
# The return value is a string of the snippet of the result document object. There are tab
|
361
|
+
# separated values. Each line is a string to be shown. Though most lines have only one
|
362
|
+
# field, some lines have two fields. If the second field exists, the first field is to be
|
363
|
+
# shown with highlighted, and the second field means its normalized form.
|
364
|
+
def snippet()
|
365
|
+
@snippet
|
366
|
+
end
|
367
|
+
# Get keywords.
|
368
|
+
# The return value is a string of serialized keywords of the result document object. There
|
369
|
+
# are tab separated values. Keywords and their scores come alternately.
|
370
|
+
def keywords()
|
371
|
+
@keywords
|
372
|
+
end
|
373
|
+
#--------------------------------
|
374
|
+
# private methods
|
375
|
+
#--------------------------------
|
376
|
+
private
|
377
|
+
# Create a result document object.
|
378
|
+
def initialize(uri, attrs, snippet, keywords)
|
379
|
+
Utility::check_types({ uri=>String, attrs=>Hash,
|
380
|
+
snippet=>String, keywords=>String }) if $DEBUG
|
381
|
+
@uri = uri
|
382
|
+
@attrs = attrs
|
383
|
+
@snippet = snippet
|
384
|
+
@keywords = keywords
|
385
|
+
end
|
386
|
+
end
|
387
|
+
#----------------------------------------------------------------
|
388
|
+
#++ Abstraction of result set from node.
|
389
|
+
#----------------------------------------------------------------
|
390
|
+
class NodeResult
|
391
|
+
#--------------------------------
|
392
|
+
# public methods
|
393
|
+
#--------------------------------
|
394
|
+
public
|
395
|
+
# Get the number of documents.
|
396
|
+
# The return value is the number of documents.
|
397
|
+
def doc_num()
|
398
|
+
@docs.length
|
399
|
+
end
|
400
|
+
# Get the value of hint information.
|
401
|
+
# The return value is a result document object or `nil' if the index is out of bounds.
|
402
|
+
def get_doc(index)
|
403
|
+
Utility::check_types({ index=>Integer }) if $DEBUG
|
404
|
+
return nil if index < 0 || index >= @docs.length
|
405
|
+
@docs[index]
|
406
|
+
end
|
407
|
+
# Get the value of hint information.
|
408
|
+
# `key' specifies the key of a hint. "VERSION", "NODE", "HIT", "HINT#n", "DOCNUM", "WORDNUM",
|
409
|
+
# "TIME", "LINK#n", and "VIEW" are provided for keys.
|
410
|
+
# The return value is the hint or `nil' if the key does not exist.
|
411
|
+
def hint(key)
|
412
|
+
Utility::check_types({ key=>String }) if $DEBUG
|
413
|
+
@hints[key]
|
414
|
+
end
|
415
|
+
#--------------------------------
|
416
|
+
# private methods
|
417
|
+
#--------------------------------
|
418
|
+
private
|
419
|
+
# Create a node result object.
|
420
|
+
def initialize(docs, hints)
|
421
|
+
Utility::check_types({ docs=>Array, hints=>Hash }) if $DEBUG
|
422
|
+
@docs = docs
|
423
|
+
@hints = hints
|
424
|
+
end
|
425
|
+
end
|
426
|
+
#----------------------------------------------------------------
|
427
|
+
#++ Abstraction of connection to P2P node.
|
428
|
+
#----------------------------------------------------------------
|
429
|
+
class Node
|
430
|
+
#--------------------------------
|
431
|
+
# public methods
|
432
|
+
#--------------------------------
|
433
|
+
public
|
434
|
+
# Set the URL of a node server.
|
435
|
+
# `url' specifies the URL of a node.
|
436
|
+
# The return value is always `nil'.
|
437
|
+
def set_url(url)
|
438
|
+
Utility::check_types({ url=>String }) if $DEBUG
|
439
|
+
@url = url
|
440
|
+
nil
|
441
|
+
end
|
442
|
+
# Set the proxy information.
|
443
|
+
# `host' specifies the host name of a proxy server.
|
444
|
+
# `port' specifies the port number of the proxy server.
|
445
|
+
# The return value is always `nil'.
|
446
|
+
def set_proxy(host, port)
|
447
|
+
Utility::check_types({ host=>String, port=>Integer }) if $DEBUG
|
448
|
+
@pxhost = host
|
449
|
+
@pxport = port
|
450
|
+
nil
|
451
|
+
end
|
452
|
+
# Set timeout of a connection.
|
453
|
+
# `sec' specifies timeout of the connection in seconds.
|
454
|
+
# The return value is always `nil'.
|
455
|
+
def set_timeout(sec)
|
456
|
+
Utility::check_types({ sec=>Integer }) if $DEBUG
|
457
|
+
@timeout = sec
|
458
|
+
nil
|
459
|
+
end
|
460
|
+
# Set the authentication information.
|
461
|
+
# `name' specifies the name of authentication.
|
462
|
+
# `passwd' specifies the password of the authentication.
|
463
|
+
# The return value is always `nil'.
|
464
|
+
def set_auth(name, password)
|
465
|
+
Utility::check_types({ name=>String, password=>String }) if $DEBUG
|
466
|
+
@auth = name + ":" + password
|
467
|
+
nil
|
468
|
+
end
|
469
|
+
# Get the status code of the last request.
|
470
|
+
# The return value is the status code of the last request. -1 means failure of connection.
|
471
|
+
def status()
|
472
|
+
@status
|
473
|
+
end
|
474
|
+
# Add a document.
|
475
|
+
# `doc' specifies a document object. The document object should have the URI attribute.
|
476
|
+
# The return value is true if success, else it is false.
|
477
|
+
def put_doc(doc)
|
478
|
+
Utility::check_types({ doc=>Document }) if $DEBUG
|
479
|
+
@status = -1
|
480
|
+
return false if !@url
|
481
|
+
turl = @url + "/put_doc"
|
482
|
+
reqheads = [ "Content-Type: text/x-estraier-draft" ]
|
483
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
484
|
+
reqbody = doc.dump_draft
|
485
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
486
|
+
@status = rv
|
487
|
+
rv == 200
|
488
|
+
end
|
489
|
+
# Remove a document.
|
490
|
+
# `id' specifies the ID number of a registered document.
|
491
|
+
# The return value is true if success, else it is false.
|
492
|
+
def out_doc(id)
|
493
|
+
Utility::check_types({ id=>Integer }) if $DEBUG
|
494
|
+
@status = -1
|
495
|
+
return false if !@url
|
496
|
+
turl = @url + "/out_doc"
|
497
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
498
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
499
|
+
reqbody = "id=" + id.to_s
|
500
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
501
|
+
@status = rv
|
502
|
+
rv == 200
|
503
|
+
end
|
504
|
+
# Remove a document specified by URI.
|
505
|
+
# `uri' specifies the URI of a registered document.
|
506
|
+
# The return value is true if success, else it is false.
|
507
|
+
def out_doc_by_uri(uri)
|
508
|
+
Utility::check_types({ uri=>String }) if $DEBUG
|
509
|
+
@status = -1
|
510
|
+
return false if !@url
|
511
|
+
turl = @url + "/out_doc"
|
512
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
513
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
514
|
+
reqbody = "uri=" + URI::encode(uri)
|
515
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
516
|
+
@status = rv
|
517
|
+
rv == 200
|
518
|
+
end
|
519
|
+
# Edit attributes of a document.
|
520
|
+
# `doc' specifies a document object.
|
521
|
+
# The return value is true if success, else it is false.
|
522
|
+
def edit_doc(doc)
|
523
|
+
Utility::check_types({ doc=>Document }) if $DEBUG
|
524
|
+
@status = -1
|
525
|
+
return false if !@url
|
526
|
+
turl = @url + "/edit_doc"
|
527
|
+
reqheads = [ "Content-Type: text/x-estraier-draft" ]
|
528
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
529
|
+
reqbody = doc.dump_draft
|
530
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
531
|
+
@status = rv
|
532
|
+
rv == 200
|
533
|
+
end
|
534
|
+
# Retrieve a document.
|
535
|
+
# `id' specifies the ID number of a registered document.
|
536
|
+
# The return value is a document object. On error, `nil' is returned.
|
537
|
+
def get_doc(id)
|
538
|
+
Utility::check_types({ id=>Integer }) if $DEBUG
|
539
|
+
@status = -1
|
540
|
+
return nil if !@url
|
541
|
+
turl = @url + "/get_doc"
|
542
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
543
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
544
|
+
reqbody = "id=" + id.to_s
|
545
|
+
resbody = StringIO::new
|
546
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
547
|
+
@status = rv
|
548
|
+
return nil if rv != 200
|
549
|
+
Document::new(resbody.string)
|
550
|
+
end
|
551
|
+
# Retrieve a document.
|
552
|
+
# `uri' specifies the URI of a registered document.
|
553
|
+
# The return value is a document object. On error, `nil' is returned.
|
554
|
+
def get_doc_by_uri(uri)
|
555
|
+
Utility::check_types({ uri=>String }) if $DEBUG
|
556
|
+
@status = -1
|
557
|
+
return nil if !@url
|
558
|
+
turl = @url + "/get_doc"
|
559
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
560
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
561
|
+
reqbody = "uri=" + URI::encode(uri)
|
562
|
+
resbody = StringIO::new
|
563
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
564
|
+
@status = rv
|
565
|
+
return nil if rv != 200
|
566
|
+
Document::new(resbody.string)
|
567
|
+
end
|
568
|
+
# Retrieve the value of an attribute of a document.
|
569
|
+
# `id' specifies the ID number of a registered document.
|
570
|
+
# `name' specifies the name of an attribute.
|
571
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
572
|
+
def get_doc_attr(id, name)
|
573
|
+
Utility::check_types({ id=>Integer, name=>String }) if $DEBUG
|
574
|
+
@status = -1
|
575
|
+
return nil if !@url
|
576
|
+
turl = @url + "/get_doc_attr"
|
577
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
578
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
579
|
+
reqbody = "id=" + id.to_s + "&attr=" + URI::encode(name)
|
580
|
+
resbody = StringIO::new
|
581
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
582
|
+
@status = rv
|
583
|
+
return nil if rv != 200
|
584
|
+
resbody.string.chomp
|
585
|
+
end
|
586
|
+
# Retrieve the value of an attribute of a document specified by URI.
|
587
|
+
# `uri' specifies the URI of a registered document.
|
588
|
+
# `name' specifies the name of an attribute.
|
589
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
590
|
+
def get_doc_attr_by_uri(uri, name)
|
591
|
+
Utility::check_types({ uri=>String, name=>String }) if $DEBUG
|
592
|
+
@status = -1
|
593
|
+
return nil if !@url
|
594
|
+
turl = @url + "/get_doc_attr"
|
595
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
596
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
597
|
+
reqbody = "uri=" + URI::encode(uri) + "&attr=" + URI::encode(name)
|
598
|
+
resbody = StringIO::new
|
599
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
600
|
+
@status = rv
|
601
|
+
return nil if rv != 200
|
602
|
+
resbody.string.chomp
|
603
|
+
end
|
604
|
+
# Extract keywords of a document.
|
605
|
+
# `id' specifies the ID number of a registered document.
|
606
|
+
# The return value is a hash object of keywords and their scores in decimal string or `nil'
|
607
|
+
# on error.
|
608
|
+
def etch_doc(id)
|
609
|
+
Utility::check_types({ id=>Integer }) if $DEBUG
|
610
|
+
@status = -1
|
611
|
+
return nil if !@url
|
612
|
+
turl = @url + "/etch_doc"
|
613
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
614
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
615
|
+
reqbody = "id=" + id.to_s
|
616
|
+
resbody = StringIO::new
|
617
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
618
|
+
@status = rv
|
619
|
+
return nil if rv != 200
|
620
|
+
kwords = {}
|
621
|
+
lines = resbody.string.split(/\n/)
|
622
|
+
for i in 0...lines.length
|
623
|
+
pair = lines[i].split(/\t/)
|
624
|
+
next if pair.length < 2
|
625
|
+
kwords[pair[0]] = pair[1]
|
626
|
+
end
|
627
|
+
kwords
|
628
|
+
end
|
629
|
+
# Extract keywords of a document specified by URI.
|
630
|
+
# `uri' specifies the URI of a registered document.
|
631
|
+
# The return value is a hash object of keywords and their scores in decimal string or `nil'
|
632
|
+
# on error.
|
633
|
+
def etch_doc_by_uri(uri)
|
634
|
+
Utility::check_types({ uri=>String }) if $DEBUG
|
635
|
+
@status = -1
|
636
|
+
return nil if !@url
|
637
|
+
turl = @url + "/etch_doc"
|
638
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
639
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
640
|
+
reqbody = "uri=" + URI::encode(uri);
|
641
|
+
resbody = StringIO::new
|
642
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
643
|
+
@status = rv
|
644
|
+
return nil if rv != 200
|
645
|
+
kwords = {}
|
646
|
+
lines = resbody.string.split(/\n/)
|
647
|
+
for i in 0...lines.length
|
648
|
+
pair = lines[i].split(/\t/)
|
649
|
+
next if pair.length < 2
|
650
|
+
kwords[pair[0]] = pair[1]
|
651
|
+
end
|
652
|
+
kwords
|
653
|
+
end
|
654
|
+
# Get the ID of a document specified by URI.
|
655
|
+
# `uri' specifies the URI of a registered document.
|
656
|
+
# The return value is the ID of the document. On error, -1 is returned.
|
657
|
+
def uri_to_id(uri)
|
658
|
+
Utility::check_types({ uri=>String }) if $DEBUG
|
659
|
+
@status = -1
|
660
|
+
return -1 if !@url
|
661
|
+
turl = @url + "/uri_to_id"
|
662
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
663
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
664
|
+
reqbody = "uri=" + URI::encode(uri)
|
665
|
+
resbody = StringIO::new
|
666
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
667
|
+
@status = rv
|
668
|
+
return nil if rv != 200
|
669
|
+
resbody.string.chomp
|
670
|
+
end
|
671
|
+
# Get the name.
|
672
|
+
# The return value is the name. On error, `nil' is returned.
|
673
|
+
def name()
|
674
|
+
set_info if !@name
|
675
|
+
@name
|
676
|
+
end
|
677
|
+
# Get the label.
|
678
|
+
# The return value is the label. On error, `nil' is returned.
|
679
|
+
def label()
|
680
|
+
set_info if !@label
|
681
|
+
@label
|
682
|
+
end
|
683
|
+
# Get the number of documents.
|
684
|
+
# The return value is the number of documents. On error, -1 is returned.
|
685
|
+
def doc_num()
|
686
|
+
set_info if @dnum < 0
|
687
|
+
@dnum
|
688
|
+
end
|
689
|
+
# Get the number of unique words.
|
690
|
+
# The return value is the number of unique words. On error, -1 is returned.
|
691
|
+
def word_num()
|
692
|
+
set_info if @wnum < 0
|
693
|
+
@wnum
|
694
|
+
end
|
695
|
+
# Get the size of the datbase.
|
696
|
+
# The return value is the size of the datbase. On error, -1.0 is returned.
|
697
|
+
def size()
|
698
|
+
set_info if @size < 0.0
|
699
|
+
@size
|
700
|
+
end
|
701
|
+
# Search documents corresponding a condition.
|
702
|
+
# `cond' specifies a condition object.
|
703
|
+
# `depth' specifies the depth of meta search.
|
704
|
+
# The return value is a node result object. On error, `nil' is returned.
|
705
|
+
def search(cond, depth)
|
706
|
+
Utility::check_types({ cond=>Condition, depth=>Integer }) if $DEBUG
|
707
|
+
@status = -1
|
708
|
+
return nil if !@url
|
709
|
+
turl = @url + "/search"
|
710
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
711
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
712
|
+
reqbody = Utility::cond_to_query(cond, depth, @wwidth, @hwidth, @awidth)
|
713
|
+
resbody = StringIO::new
|
714
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
715
|
+
@status = rv
|
716
|
+
return nil if rv != 200
|
717
|
+
lines = resbody.string.split(/\n/)
|
718
|
+
return nil if lines.length < 1
|
719
|
+
docs = []
|
720
|
+
hints = {}
|
721
|
+
nres = NodeResult::new(docs, hints)
|
722
|
+
border = lines[0]
|
723
|
+
isend = false
|
724
|
+
lnum = 1
|
725
|
+
while lnum < lines.length
|
726
|
+
line = lines[lnum]
|
727
|
+
lnum += 1
|
728
|
+
if line.length >= border.length && line.index(border) == 0
|
729
|
+
isend = true if line[border.length...line.length] == ":END"
|
730
|
+
break
|
731
|
+
end
|
732
|
+
lidx = line.index("\t")
|
733
|
+
if lidx
|
734
|
+
key = line[0...lidx]
|
735
|
+
value = line[(lidx+1)...line.length]
|
736
|
+
hints[key] = value
|
737
|
+
end
|
738
|
+
end
|
739
|
+
snum = lnum
|
740
|
+
while !isend && lnum < lines.length
|
741
|
+
line = lines[lnum]
|
742
|
+
lnum += 1
|
743
|
+
if line.length >= border.length && line.index(border) == 0
|
744
|
+
if lnum > snum
|
745
|
+
rdattrs = {}
|
746
|
+
sb = StringIO::new
|
747
|
+
rdvector = ""
|
748
|
+
rlnum = snum
|
749
|
+
while rlnum < lnum - 1
|
750
|
+
rdline = lines[rlnum].strip
|
751
|
+
rlnum += 1
|
752
|
+
break if rdline.length < 1
|
753
|
+
if rdline =~ /^%/
|
754
|
+
lidx = rdline.index("\t")
|
755
|
+
rdvector = rdline[(lidx+1)...rdline.length] if rdline =~ /%VECTOR/ && lidx
|
756
|
+
else
|
757
|
+
lidx = rdline.index("=")
|
758
|
+
if lidx
|
759
|
+
key = rdline[0...lidx]
|
760
|
+
value = rdline[(lidx+1)...rdline.length]
|
761
|
+
rdattrs[key] = value
|
762
|
+
end
|
763
|
+
end
|
764
|
+
end
|
765
|
+
while rlnum < lnum - 1
|
766
|
+
rdline = lines[rlnum]
|
767
|
+
rlnum += 1
|
768
|
+
sb.printf("%s\n", rdline)
|
769
|
+
end
|
770
|
+
rduri = rdattrs["@uri"]
|
771
|
+
rdsnippet = sb.string
|
772
|
+
if rduri
|
773
|
+
rdoc = ResultDocument::new(rduri, rdattrs, rdsnippet, rdvector)
|
774
|
+
docs.push(rdoc)
|
775
|
+
end
|
776
|
+
end
|
777
|
+
snum = lnum
|
778
|
+
isend = true if line[border.length...line.length] == ":END"
|
779
|
+
end
|
780
|
+
end
|
781
|
+
return nil if !isend
|
782
|
+
return nres
|
783
|
+
end
|
784
|
+
# Set width of snippet in the result.
|
785
|
+
# `wwidth' specifies whole width of a snippet. By default, it is 480. If it is 0, no
|
786
|
+
# snippet is sent. If it is negative, whole body text is sent instead of snippet.
|
787
|
+
# `hwidth' specifies width of strings picked up from the beginning of the text. By default,
|
788
|
+
# it is 96. If it is negative 0, the current setting is not changed.
|
789
|
+
# `awidth' specifies width of strings picked up around each highlighted word. By default,
|
790
|
+
# it is 96. If it is negative, the current setting is not changed.
|
791
|
+
def set_snippet_width(wwidth, hwidth, awidth)
|
792
|
+
@wwidth = wwidth;
|
793
|
+
@hwidth = hwidth if hwidth >= 0
|
794
|
+
@awidth = awidth if awidth >= 0
|
795
|
+
end
|
796
|
+
# Manage a user account of a node.
|
797
|
+
# `name' specifies the name of a user.
|
798
|
+
# `mode' specifies the operation mode. 0 means to delete the account. 1 means to set the
|
799
|
+
# account as an administrator. 2 means to set the account as a guest.
|
800
|
+
# The return value is true if success, else it is false.
|
801
|
+
def set_user(name, mode)
|
802
|
+
Utility::check_types({ name=>String, mode=>Integer }) if $DEBUG
|
803
|
+
@status = -1
|
804
|
+
return false if !@url
|
805
|
+
turl = @url + "/_set_user"
|
806
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
807
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
808
|
+
reqbody = "name=" + URI::encode(name) + "&mode=" + mode.to_s
|
809
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
810
|
+
@status = rv
|
811
|
+
rv == 200
|
812
|
+
end
|
813
|
+
# Manage a link of a node.
|
814
|
+
# `url' specifies the URL of the target node of a link.
|
815
|
+
# `label' specifies the label of the link.
|
816
|
+
# `credit' specifies the credit of the link. If it is negative, the link is removed.
|
817
|
+
# The return value is true if success, else it is false.
|
818
|
+
def set_link(url, label, credit)
|
819
|
+
Utility::check_types({ url=>String, label=>String, credit=>Integer }) if $DEBUG
|
820
|
+
@status = -1
|
821
|
+
return false if !@url
|
822
|
+
turl = @url + "/_set_link"
|
823
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
824
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
825
|
+
reqbody = "url=" + URI::encode(url) + "&label=" + label
|
826
|
+
reqbody += "&credit=" + credit.to_s if credit >= 0
|
827
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
828
|
+
@status = rv
|
829
|
+
rv == 200
|
830
|
+
end
|
831
|
+
#--------------------------------
|
832
|
+
# private methods
|
833
|
+
#--------------------------------
|
834
|
+
private
|
835
|
+
# Create a node connection object.
|
836
|
+
def initialize()
|
837
|
+
@url = nil
|
838
|
+
@pxhost = nil
|
839
|
+
@pxport = -1
|
840
|
+
@timeout = -1
|
841
|
+
@auth = nil
|
842
|
+
@name = nil
|
843
|
+
@label = nil
|
844
|
+
@dnum = -1
|
845
|
+
@wnum = -1
|
846
|
+
@size = -1.0
|
847
|
+
@wwidth = 480;
|
848
|
+
@hwidth = 96;
|
849
|
+
@awidth = 96;
|
850
|
+
@status = -1
|
851
|
+
end
|
852
|
+
# Set information of the node.
|
853
|
+
def set_info()
|
854
|
+
@status = -1
|
855
|
+
return if !@url
|
856
|
+
turl = @url + "/inform"
|
857
|
+
reqheads = []
|
858
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
859
|
+
resbody = StringIO::new
|
860
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, nil, nil, resbody)
|
861
|
+
@status = rv
|
862
|
+
return if rv != 200
|
863
|
+
lines = resbody.string.chomp.split(/\n/)
|
864
|
+
return if lines.length < 1
|
865
|
+
elems = lines[0].chomp.split(/\t/)
|
866
|
+
return if elems.length != 5
|
867
|
+
@name = elems[0]
|
868
|
+
@label = elems[1]
|
869
|
+
@dnum = elems[2].to_i
|
870
|
+
@wnum = elems[3].to_i
|
871
|
+
@size = elems[4].to_f
|
872
|
+
end
|
873
|
+
end
|
874
|
+
#:stopdoc:
|
875
|
+
#
|
876
|
+
# Module for utility
|
877
|
+
#
|
878
|
+
module Utility
|
879
|
+
public
|
880
|
+
# Check types of arguments
|
881
|
+
# `types' specifies a hash object whose keys are arguments and values are class objects.
|
882
|
+
# If there is a invalid object, an exception is thrown.
|
883
|
+
def check_types(types)
|
884
|
+
i = 0
|
885
|
+
types.each_key do |key|
|
886
|
+
i += 1
|
887
|
+
unless key.kind_of?(types[key]) || key == nil
|
888
|
+
raise ArgumentError::new("Argument#" + i.to_s +
|
889
|
+
" should be a kind of " + types[key].to_s)
|
890
|
+
end
|
891
|
+
end
|
892
|
+
end
|
893
|
+
module_function :check_types
|
894
|
+
# Perform an interaction of a URL.
|
895
|
+
# `url' specifies a URL.
|
896
|
+
# `pxhost' specifies the host name of a proxy. If it is `nil', it is not used.
|
897
|
+
# `pxport' specifies the port number of the proxy.
|
898
|
+
# `outsec' specifies timeout in seconds. If it is negative, it is not used.
|
899
|
+
# `reqheads' specifies a list object of extension headers. If it is `nil', it is not used.
|
900
|
+
# `reqbody' specifies the pointer of the entitiy body of request. If it is `nil', "GET"
|
901
|
+
# method is used.
|
902
|
+
# `resheads' specifies a list object into which headers of response is stored. If it is
|
903
|
+
# `nil' it is not used.
|
904
|
+
# `resbody' specifies stream object into which the entity body of response is stored. If it
|
905
|
+
# is `nil', it is not used.
|
906
|
+
# The return value is the status code of the response or -1 on error.
|
907
|
+
def shuttle_url(url, pxhost, pxport, outsec, reqheads, reqbody, resheads, resbody)
|
908
|
+
begin
|
909
|
+
status = -1
|
910
|
+
th = Thread::start do
|
911
|
+
url = URI::parse(url)
|
912
|
+
url.normalize
|
913
|
+
Thread::current.exit if url.scheme != "http" || !url.host || url.port < 1
|
914
|
+
if pxhost
|
915
|
+
host = pxhost
|
916
|
+
port = pxport
|
917
|
+
query = "http://" + url.host + ":" + url.port.to_s + url.path
|
918
|
+
else
|
919
|
+
host = url.host
|
920
|
+
port = url.port
|
921
|
+
query = url.path
|
922
|
+
end
|
923
|
+
query += "?" + url.query if url.query && !reqbody
|
924
|
+
begin
|
925
|
+
sock = TCPSocket.open(host, port)
|
926
|
+
if reqbody
|
927
|
+
sock.printf("POST " + query + " HTTP/1.0\r\n")
|
928
|
+
else
|
929
|
+
sock.printf("GET " + query + " HTTP/1.0\r\n")
|
930
|
+
end
|
931
|
+
sock.printf("Host: %s:%d\r\n", url.host, url.port)
|
932
|
+
sock.printf("Connection: close\r\n")
|
933
|
+
sock.printf("User-Agent: HyperEstraierForRuby/1.0.0\r\n")
|
934
|
+
if reqheads
|
935
|
+
reqheads.each do |line|
|
936
|
+
sock.printf("%s\r\n", line)
|
937
|
+
end
|
938
|
+
end
|
939
|
+
sock.printf("Content-Length: %d\r\n", reqbody.length) if reqbody
|
940
|
+
sock.printf("\r\n")
|
941
|
+
sock.write(reqbody) if reqbody
|
942
|
+
line = sock.gets.chomp
|
943
|
+
elems = line.split(/ */)
|
944
|
+
Thread::current.exit if elems.length < 3 || !(elems[0] =~ /^HTTP/)
|
945
|
+
status = elems[1].to_i
|
946
|
+
resheads.push(line) if resheads
|
947
|
+
begin
|
948
|
+
line = sock.gets.chomp
|
949
|
+
resheads.push(line) if resheads
|
950
|
+
end while line.length > 0
|
951
|
+
while buf = sock.read(8192)
|
952
|
+
resbody.write(buf) if resbody
|
953
|
+
end
|
954
|
+
ensure
|
955
|
+
sock.close if sock
|
956
|
+
end
|
957
|
+
end
|
958
|
+
if outsec >= 0
|
959
|
+
unless th.join(outsec)
|
960
|
+
th.exit
|
961
|
+
th.join
|
962
|
+
return -1
|
963
|
+
end
|
964
|
+
else
|
965
|
+
th.join
|
966
|
+
end
|
967
|
+
return status
|
968
|
+
rescue
|
969
|
+
return -1
|
970
|
+
end
|
971
|
+
end
|
972
|
+
module_function :shuttle_url
|
973
|
+
# Serialize a condition object into a query string.
|
974
|
+
# `cond' specifies a condition object.
|
975
|
+
# `depth' specifies depth of meta search.
|
976
|
+
# `wwidth' specifies whole width of a snippet.
|
977
|
+
# `hwidth' specifies width of strings picked up from the beginning of the text.
|
978
|
+
# `awidth' specifies width of strings picked up around each highlighted word.
|
979
|
+
# The return value is the serialized string.
|
980
|
+
def cond_to_query(cond, depth, wwidth, hwidth, awidth)
|
981
|
+
buf = StringIO::new
|
982
|
+
if cond.phrase
|
983
|
+
buf.write("&") if buf.length > 0
|
984
|
+
buf.write("phrase=")
|
985
|
+
buf.write(URI::encode(cond.phrase))
|
986
|
+
end
|
987
|
+
for i in 0...cond.attrs.length
|
988
|
+
buf.write("&") if buf.length > 0
|
989
|
+
buf.write("attr" + (i + 1).to_s + "=")
|
990
|
+
buf.write(URI::encode(cond.attrs[i]))
|
991
|
+
end
|
992
|
+
if cond.order
|
993
|
+
buf.write("&") if buf.length > 0
|
994
|
+
buf.write("order=")
|
995
|
+
buf.write(URI::encode(cond.order))
|
996
|
+
end
|
997
|
+
if cond.max > 0
|
998
|
+
buf.write("&") if buf.length > 0
|
999
|
+
buf.write("max=" + cond.max.to_s)
|
1000
|
+
else
|
1001
|
+
buf.write("&") if buf.length > 0
|
1002
|
+
buf.write("max=" + (1 << 30).to_s)
|
1003
|
+
end
|
1004
|
+
buf.write("&options=" + cond.options.to_s) if cond.options > 0
|
1005
|
+
buf.write("&depth=" + depth.to_s) if depth > 0
|
1006
|
+
buf.write("&wwidth=" + wwidth.to_s)
|
1007
|
+
buf.write("&hwidth=" + hwidth.to_s)
|
1008
|
+
buf.write("&awidth=" + awidth.to_s)
|
1009
|
+
buf.write("&skip=" + cond.skip.to_s)
|
1010
|
+
buf.string
|
1011
|
+
end
|
1012
|
+
module_function :cond_to_query
|
1013
|
+
# Encode a byte sequence with Base64 encoding.
|
1014
|
+
# `data' specifyes a string object.
|
1015
|
+
# The return value is the encoded string.
|
1016
|
+
def base_encode(data)
|
1017
|
+
[data].pack("m").gsub(/[ \n]/, "")
|
1018
|
+
end
|
1019
|
+
module_function :base_encode
|
1020
|
+
end
|
1021
|
+
end
|
1022
|
+
|
1023
|
+
|
1024
|
+
|
1025
|
+
# END OF FILE
|