search_do 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1025 @@
1
+ #--
2
+ # Ruby interface of Hyper Estraier
3
+ # Copyright (C) 2004-2006 Mikio Hirabayashi
4
+ # All rights reserved.
5
+ # This file is part of Hyper Estraier.
6
+ # Redistribution and use in source and binary forms, with or without modification, are
7
+ # permitted provided that the following conditions are met:
8
+ #
9
+ # * Redistributions of source code must retain the above copyright notice, this list of
10
+ # conditions and the following disclaimer.
11
+ # * Redistributions in binary form must reproduce the above copyright notice, this list of
12
+ # conditions and the following disclaimer in the documentation and/or other materials
13
+ # provided with the distribution.
14
+ # * Neither the name of Mikio Hirabayashi nor the names of its contributors may be used to
15
+ # endorse or promote products derived from this software without specific prior written
16
+ # permission.
17
+ #
18
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
19
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21
+ # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24
+ # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
26
+ # OF THE POSSIBILITY OF SUCH DAMAGE.
27
+ #++
28
+ #:include:overview
29
+
30
+
31
+ require "uri"
32
+ require "socket"
33
+ require "stringio"
34
+
35
+
36
+
37
+ #
38
+ # Module for the namespace of Hyper Estraier
39
+ #
40
+ module EstraierPure
41
+ #----------------------------------------------------------------
42
+ #++ Abstraction of document.
43
+ #----------------------------------------------------------------
44
+ class Document
45
+ #--------------------------------
46
+ # public methods
47
+ #--------------------------------
48
+ public
49
+ # Add an attribute.
50
+ # `name' specifies the name of an attribute.
51
+ # `value' specifies the value of the attribute. If it is `nil', the attribute is removed.
52
+ # The return value is always `nil'.
53
+ def add_attr(name, value)
54
+ Utility::check_types({ name=>String, value=>String }) if $DEBUG
55
+ name = name.gsub(/[ \t\r\n\v\f]+/, " ")
56
+ name = name.strip.squeeze(" ")
57
+ value = value.gsub(/[ \t\r\n\v\f]+/, " ")
58
+ value = value.strip.squeeze(" ")
59
+ @attrs[name] = value
60
+ nil
61
+ end
62
+ # Add a sentence of text.
63
+ # `text' specifies a sentence of text.
64
+ # The return value is always `nil'.
65
+ def add_text(text)
66
+ Utility::check_types({ text=>String }) if $DEBUG
67
+ text = text.gsub(/[ \t\r\n\v\f]+/, " ")
68
+ text = text.strip.squeeze(" ")
69
+ @dtexts.push(text) if text.length
70
+ nil
71
+ end
72
+ # Add a hidden sentence.
73
+ # `text' specifies a hidden sentence.
74
+ # The return value is always `nil'.
75
+ def add_hidden_text(text)
76
+ Utility::check_types({ text=>String }) if $DEBUG
77
+ text = text.gsub(/[ \t\r\n\v\f]+/, " ")
78
+ text = text.strip.squeeze(" ")
79
+ @htexts.push(text) if text.length
80
+ nil
81
+ end
82
+ # Attache keywords.
83
+ # `kwords' specifies a map object of keywords. Keys of the map should be keywords of the
84
+ # document and values should be their scores in decimal string.
85
+ # The return value is always `nil'.
86
+ def set_keywords(kwords)
87
+ Utility::check_types({ kwords=>Hash }) if $DEBUG
88
+ @kwords = kwords
89
+ end
90
+ # Get the ID number.
91
+ # The return value is the ID number of the document object. If the object has never been
92
+ # registered, -1 is returned.
93
+ def id()
94
+ @id
95
+ end
96
+ # Get a list of attribute names of a document object.
97
+ # The return value is a list object of attribute names.
98
+ def attr_names()
99
+ @attrs.keys.sort
100
+ end
101
+ # Get the value of an attribute.
102
+ # `name' specifies the name of an attribute.
103
+ # The return value is the value of the attribute or `nil' if it does not exist.
104
+ def attr(name)
105
+ Utility::check_types({ name=>String }) if $DEBUG
106
+ @attrs[name]
107
+ end
108
+ # Get a list of sentences of the text.
109
+ # The return value is a list object of sentences of the text.
110
+ def texts()
111
+ @dtexts
112
+ end
113
+ # Concatenate sentences of the text of a document object.
114
+ # The return value is concatenated sentences.
115
+ def cat_texts()
116
+ buf = StringIO::new
117
+ for i in 0...@dtexts.length
118
+ buf.write(" ") if i > 0
119
+ buf.write(@dtexts[i])
120
+ end
121
+ buf.string
122
+ end
123
+ # Dump draft data of a document object.
124
+ # The return value is draft data.
125
+ def dump_draft()
126
+ buf = StringIO::new
127
+ keys = @attrs.keys.sort
128
+ for i in 0...keys.length
129
+ buf.printf("%s=%s\n", keys[i], @attrs[keys[i]])
130
+ end
131
+ if @kwords
132
+ buf.printf("%%VECTOR");
133
+ @kwords.each() do |key, value|
134
+ buf.printf("\t%s\t%s", key, value);
135
+ end
136
+ buf.printf("\n");
137
+ end
138
+ buf.printf("\n")
139
+ for i in 0...@dtexts.length
140
+ buf.printf("%s\n", @dtexts[i])
141
+ end
142
+ for i in 0...@htexts.length
143
+ buf.printf("\t%s\n", @htexts[i])
144
+ end
145
+ buf.string
146
+ end
147
+ # Get attached keywords.
148
+ # The return value is a map object of keywords and their scores in decimal string. If no
149
+ # keyword is attached, `nil' is returned.
150
+ def keywords()
151
+ @kwords
152
+ end
153
+ #--------------------------------
154
+ # private methods
155
+ #--------------------------------
156
+ private
157
+ # Create a document object.
158
+ # `draft' specifies a string of draft data.
159
+ def initialize(draft = "")
160
+ Utility::check_types({ draft=>String }) if $DEBUG
161
+ @id = -1
162
+ @attrs = {}
163
+ @dtexts = []
164
+ @htexts = []
165
+ @kwords = nil
166
+ if draft.length
167
+ lines = draft.split(/\n/)
168
+ num = 0
169
+ while num < lines.length
170
+ line = lines[num]
171
+ num += 1
172
+ break if line.length < 1
173
+ if line =~ /^%/
174
+ if line =~ /^%VECTOR\t/
175
+ @kwords = {} unless @kwords
176
+ fields = line.split(/\t/)
177
+ i = 1
178
+ while i < fields.length - 1
179
+ @kwords[fields[i]] = fields[i+1]
180
+ i += 2
181
+ end
182
+ end
183
+ next
184
+ end
185
+ line = line.gsub(/[ \t\r\n\v\f]+/, " ")
186
+ line = line.strip.squeeze(" ")
187
+ if idx = line.index("=")
188
+ key = line[0...idx]
189
+ value = line[idx+1...line.length]
190
+ @attrs[key] = value
191
+ end
192
+ end
193
+ while num < lines.length
194
+ line = lines[num]
195
+ next unless line.length
196
+ if line[0] == 0x9
197
+ @htexts.push(line[1...line.length]) if line.length > 1
198
+ else
199
+ @dtexts.push(line)
200
+ end
201
+ num += 1
202
+ end
203
+ end
204
+ end
205
+ end
206
+ #----------------------------------------------------------------
207
+ #++ Abstraction of search condition.
208
+ #----------------------------------------------------------------
209
+ class Condition
210
+ #--------------------------------
211
+ # public constants
212
+ #--------------------------------
213
+ public
214
+ # option: check N-gram keys skipping by three
215
+ SURE = 1 << 0
216
+ # option: check N-gram keys skipping by two
217
+ USUAL = 1 << 1
218
+ # option: without TF-IDF tuning
219
+ FAST = 1 << 2
220
+ # option: with the simplified phrase
221
+ AGITO = 1 << 3
222
+ # option: check every N-gram key
223
+ NOIDF = 1 << 4
224
+ # option: check N-gram keys skipping by one
225
+ SIMPLE = 1 << 10
226
+ #--------------------------------
227
+ # public methods
228
+ #--------------------------------
229
+ public
230
+ # Set the search phrase.
231
+ # `phrase' specifies a search phrase.
232
+ # The return value is always `nil'.
233
+ def set_phrase(phrase)
234
+ Utility::check_types({ phrase=>String }) if $DEBUG
235
+ phrase = phrase.gsub(/[ \t\r\n\v\f]+/, " ")
236
+ phrase = phrase.strip.squeeze(" ")
237
+ @phrase = phrase
238
+ nil
239
+ end
240
+ # Add an expression for an attribute.
241
+ # `expr' specifies an expression for an attribute.
242
+ # The return value is always `nil'.
243
+ def add_attr(expr)
244
+ Utility::check_types({ expr=>String }) if $DEBUG
245
+ expr = expr.gsub(/[ \t\r\n\v\f]+/, " ")
246
+ expr = expr.strip.squeeze(" ")
247
+ @attrs.push(expr)
248
+ nil
249
+ end
250
+ # Set the order of a condition object.
251
+ # `expr' specifies an expression for the order. By default, the order is by score descending.
252
+ # The return value is always `nil'.
253
+ def set_order(expr)
254
+ Utility::check_types({ expr=>String }) if $DEBUG
255
+ expr = expr.gsub(/[ \t\r\n\v\f]+/, " ")
256
+ expr = expr.strip.squeeze(" ")
257
+ @order = expr
258
+ nil
259
+ end
260
+ # Set the maximum number of retrieval.
261
+ # `max' specifies the maximum number of retrieval. By default, the number of retrieval is
262
+ # not limited.
263
+ # The return value is always `nil'.
264
+ def set_max(max)
265
+ Utility::check_types({ max=>Integer }) if $DEBUG
266
+ @max = max if(max >= 0)
267
+ nil
268
+ end
269
+ # Set the number of skipped documents.
270
+ # `skip' specifies the number of documents to be skipped in the search result.
271
+ # The return value is always `nil'.
272
+ def set_skip(skip)
273
+ Utility::check_types({ skip=>Integer }) if $DEBUG
274
+ @skip = skip if(skip >= 0)
275
+ nil
276
+ end
277
+ # Set options of retrieval.
278
+ # `options' specifies options: `Condition::SURE' specifies that it checks every N-gram
279
+ # key, `Condition::USU', which is the default, specifies that it checks N-gram keys
280
+ # with skipping one key, `Condition::FAST' skips two keys, `Condition::AGITO'
281
+ # skips three keys, `Condition::NOIDF' specifies not to perform TF-IDF tuning,
282
+ # `Condition::SIMPLE' specifies to use simplified phrase. Each option can be specified at
283
+ # the same time by bitwise or. If keys are skipped, though search speed is improved, the
284
+ # relevance ratio grows less.
285
+ # The return value is always `nil'.
286
+ def set_options(options)
287
+ Utility::check_types({ options=>Integer }) if $DEBUG
288
+ @options |= options
289
+ nil
290
+ end
291
+ # Get the search phrase.
292
+ # The return value is the search phrase.
293
+ def phrase()
294
+ @phrase
295
+ end
296
+ # Get expressions for attributes.
297
+ # The return value is expressions for attributes.
298
+ def attrs()
299
+ @attrs
300
+ end
301
+ # Get the order expression.
302
+ # The return value is the order expression.
303
+ def order()
304
+ @order
305
+ end
306
+ # Get the maximum number of retrieval.
307
+ # The return value is the maximum number of retrieval.
308
+ def max()
309
+ @max
310
+ end
311
+ # Get the number of skipped documents.
312
+ # The return value is the number of documents to be skipped in the search result.
313
+ def skip()
314
+ @skip
315
+ end
316
+ # Get options of retrieval.
317
+ # The return value is options by bitwise or.
318
+ def options()
319
+ @options
320
+ end
321
+ #--------------------------------
322
+ # private methods
323
+ #--------------------------------
324
+ private
325
+ # Create a search condition object.
326
+ def initialize()
327
+ @phrase = nil
328
+ @attrs = []
329
+ @order = nil
330
+ @max = -1
331
+ @skip = 0
332
+ @options = 0
333
+ end
334
+ end
335
+ #----------------------------------------------------------------
336
+ #++ Abstraction of document in result set.
337
+ #----------------------------------------------------------------
338
+ class ResultDocument
339
+ #--------------------------------
340
+ # public methods
341
+ #--------------------------------
342
+ public
343
+ # Get the URI.
344
+ # The return value is the URI of the result document object.
345
+ def uri()
346
+ @uri
347
+ end
348
+ # Get a list of attribute names.
349
+ # The return value is a list object of attribute names.
350
+ def attr_names()
351
+ @attrs.keys.sort
352
+ end
353
+ # Get the value of an attribute.
354
+ # The return value is the value of the attribute or `nil' if it does not exist.
355
+ def attr(name)
356
+ Utility::check_types({ name=>String }) if $DEBUG
357
+ @attrs[name]
358
+ end
359
+ # Get the snippet of a result document object.
360
+ # The return value is a string of the snippet of the result document object. There are tab
361
+ # separated values. Each line is a string to be shown. Though most lines have only one
362
+ # field, some lines have two fields. If the second field exists, the first field is to be
363
+ # shown with highlighted, and the second field means its normalized form.
364
+ def snippet()
365
+ @snippet
366
+ end
367
+ # Get keywords.
368
+ # The return value is a string of serialized keywords of the result document object. There
369
+ # are tab separated values. Keywords and their scores come alternately.
370
+ def keywords()
371
+ @keywords
372
+ end
373
+ #--------------------------------
374
+ # private methods
375
+ #--------------------------------
376
+ private
377
+ # Create a result document object.
378
+ def initialize(uri, attrs, snippet, keywords)
379
+ Utility::check_types({ uri=>String, attrs=>Hash,
380
+ snippet=>String, keywords=>String }) if $DEBUG
381
+ @uri = uri
382
+ @attrs = attrs
383
+ @snippet = snippet
384
+ @keywords = keywords
385
+ end
386
+ end
387
+ #----------------------------------------------------------------
388
+ #++ Abstraction of result set from node.
389
+ #----------------------------------------------------------------
390
+ class NodeResult
391
+ #--------------------------------
392
+ # public methods
393
+ #--------------------------------
394
+ public
395
+ # Get the number of documents.
396
+ # The return value is the number of documents.
397
+ def doc_num()
398
+ @docs.length
399
+ end
400
+ # Get the value of hint information.
401
+ # The return value is a result document object or `nil' if the index is out of bounds.
402
+ def get_doc(index)
403
+ Utility::check_types({ index=>Integer }) if $DEBUG
404
+ return nil if index < 0 || index >= @docs.length
405
+ @docs[index]
406
+ end
407
+ # Get the value of hint information.
408
+ # `key' specifies the key of a hint. "VERSION", "NODE", "HIT", "HINT#n", "DOCNUM", "WORDNUM",
409
+ # "TIME", "LINK#n", and "VIEW" are provided for keys.
410
+ # The return value is the hint or `nil' if the key does not exist.
411
+ def hint(key)
412
+ Utility::check_types({ key=>String }) if $DEBUG
413
+ @hints[key]
414
+ end
415
+ #--------------------------------
416
+ # private methods
417
+ #--------------------------------
418
+ private
419
+ # Create a node result object.
420
+ def initialize(docs, hints)
421
+ Utility::check_types({ docs=>Array, hints=>Hash }) if $DEBUG
422
+ @docs = docs
423
+ @hints = hints
424
+ end
425
+ end
426
+ #----------------------------------------------------------------
427
+ #++ Abstraction of connection to P2P node.
428
+ #----------------------------------------------------------------
429
+ class Node
430
+ #--------------------------------
431
+ # public methods
432
+ #--------------------------------
433
+ public
434
+ # Set the URL of a node server.
435
+ # `url' specifies the URL of a node.
436
+ # The return value is always `nil'.
437
+ def set_url(url)
438
+ Utility::check_types({ url=>String }) if $DEBUG
439
+ @url = url
440
+ nil
441
+ end
442
+ # Set the proxy information.
443
+ # `host' specifies the host name of a proxy server.
444
+ # `port' specifies the port number of the proxy server.
445
+ # The return value is always `nil'.
446
+ def set_proxy(host, port)
447
+ Utility::check_types({ host=>String, port=>Integer }) if $DEBUG
448
+ @pxhost = host
449
+ @pxport = port
450
+ nil
451
+ end
452
+ # Set timeout of a connection.
453
+ # `sec' specifies timeout of the connection in seconds.
454
+ # The return value is always `nil'.
455
+ def set_timeout(sec)
456
+ Utility::check_types({ sec=>Integer }) if $DEBUG
457
+ @timeout = sec
458
+ nil
459
+ end
460
+ # Set the authentication information.
461
+ # `name' specifies the name of authentication.
462
+ # `passwd' specifies the password of the authentication.
463
+ # The return value is always `nil'.
464
+ def set_auth(name, password)
465
+ Utility::check_types({ name=>String, password=>String }) if $DEBUG
466
+ @auth = name + ":" + password
467
+ nil
468
+ end
469
+ # Get the status code of the last request.
470
+ # The return value is the status code of the last request. -1 means failure of connection.
471
+ def status()
472
+ @status
473
+ end
474
+ # Add a document.
475
+ # `doc' specifies a document object. The document object should have the URI attribute.
476
+ # The return value is true if success, else it is false.
477
+ def put_doc(doc)
478
+ Utility::check_types({ doc=>Document }) if $DEBUG
479
+ @status = -1
480
+ return false if !@url
481
+ turl = @url + "/put_doc"
482
+ reqheads = [ "Content-Type: text/x-estraier-draft" ]
483
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
484
+ reqbody = doc.dump_draft
485
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
486
+ @status = rv
487
+ rv == 200
488
+ end
489
+ # Remove a document.
490
+ # `id' specifies the ID number of a registered document.
491
+ # The return value is true if success, else it is false.
492
+ def out_doc(id)
493
+ Utility::check_types({ id=>Integer }) if $DEBUG
494
+ @status = -1
495
+ return false if !@url
496
+ turl = @url + "/out_doc"
497
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
498
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
499
+ reqbody = "id=" + id.to_s
500
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
501
+ @status = rv
502
+ rv == 200
503
+ end
504
+ # Remove a document specified by URI.
505
+ # `uri' specifies the URI of a registered document.
506
+ # The return value is true if success, else it is false.
507
+ def out_doc_by_uri(uri)
508
+ Utility::check_types({ uri=>String }) if $DEBUG
509
+ @status = -1
510
+ return false if !@url
511
+ turl = @url + "/out_doc"
512
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
513
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
514
+ reqbody = "uri=" + URI::encode(uri)
515
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
516
+ @status = rv
517
+ rv == 200
518
+ end
519
+ # Edit attributes of a document.
520
+ # `doc' specifies a document object.
521
+ # The return value is true if success, else it is false.
522
+ def edit_doc(doc)
523
+ Utility::check_types({ doc=>Document }) if $DEBUG
524
+ @status = -1
525
+ return false if !@url
526
+ turl = @url + "/edit_doc"
527
+ reqheads = [ "Content-Type: text/x-estraier-draft" ]
528
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
529
+ reqbody = doc.dump_draft
530
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
531
+ @status = rv
532
+ rv == 200
533
+ end
534
+ # Retrieve a document.
535
+ # `id' specifies the ID number of a registered document.
536
+ # The return value is a document object. On error, `nil' is returned.
537
+ def get_doc(id)
538
+ Utility::check_types({ id=>Integer }) if $DEBUG
539
+ @status = -1
540
+ return nil if !@url
541
+ turl = @url + "/get_doc"
542
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
543
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
544
+ reqbody = "id=" + id.to_s
545
+ resbody = StringIO::new
546
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
547
+ @status = rv
548
+ return nil if rv != 200
549
+ Document::new(resbody.string)
550
+ end
551
+ # Retrieve a document.
552
+ # `uri' specifies the URI of a registered document.
553
+ # The return value is a document object. On error, `nil' is returned.
554
+ def get_doc_by_uri(uri)
555
+ Utility::check_types({ uri=>String }) if $DEBUG
556
+ @status = -1
557
+ return nil if !@url
558
+ turl = @url + "/get_doc"
559
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
560
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
561
+ reqbody = "uri=" + URI::encode(uri)
562
+ resbody = StringIO::new
563
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
564
+ @status = rv
565
+ return nil if rv != 200
566
+ Document::new(resbody.string)
567
+ end
568
+ # Retrieve the value of an attribute of a document.
569
+ # `id' specifies the ID number of a registered document.
570
+ # `name' specifies the name of an attribute.
571
+ # The return value is the value of the attribute or `nil' if it does not exist.
572
+ def get_doc_attr(id, name)
573
+ Utility::check_types({ id=>Integer, name=>String }) if $DEBUG
574
+ @status = -1
575
+ return nil if !@url
576
+ turl = @url + "/get_doc_attr"
577
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
578
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
579
+ reqbody = "id=" + id.to_s + "&attr=" + URI::encode(name)
580
+ resbody = StringIO::new
581
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
582
+ @status = rv
583
+ return nil if rv != 200
584
+ resbody.string.chomp
585
+ end
586
+ # Retrieve the value of an attribute of a document specified by URI.
587
+ # `uri' specifies the URI of a registered document.
588
+ # `name' specifies the name of an attribute.
589
+ # The return value is the value of the attribute or `nil' if it does not exist.
590
+ def get_doc_attr_by_uri(uri, name)
591
+ Utility::check_types({ uri=>String, name=>String }) if $DEBUG
592
+ @status = -1
593
+ return nil if !@url
594
+ turl = @url + "/get_doc_attr"
595
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
596
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
597
+ reqbody = "uri=" + URI::encode(uri) + "&attr=" + URI::encode(name)
598
+ resbody = StringIO::new
599
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
600
+ @status = rv
601
+ return nil if rv != 200
602
+ resbody.string.chomp
603
+ end
604
+ # Extract keywords of a document.
605
+ # `id' specifies the ID number of a registered document.
606
+ # The return value is a hash object of keywords and their scores in decimal string or `nil'
607
+ # on error.
608
+ def etch_doc(id)
609
+ Utility::check_types({ id=>Integer }) if $DEBUG
610
+ @status = -1
611
+ return nil if !@url
612
+ turl = @url + "/etch_doc"
613
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
614
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
615
+ reqbody = "id=" + id.to_s
616
+ resbody = StringIO::new
617
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
618
+ @status = rv
619
+ return nil if rv != 200
620
+ kwords = {}
621
+ lines = resbody.string.split(/\n/)
622
+ for i in 0...lines.length
623
+ pair = lines[i].split(/\t/)
624
+ next if pair.length < 2
625
+ kwords[pair[0]] = pair[1]
626
+ end
627
+ kwords
628
+ end
629
+ # Extract keywords of a document specified by URI.
630
+ # `uri' specifies the URI of a registered document.
631
+ # The return value is a hash object of keywords and their scores in decimal string or `nil'
632
+ # on error.
633
+ def etch_doc_by_uri(uri)
634
+ Utility::check_types({ uri=>String }) if $DEBUG
635
+ @status = -1
636
+ return nil if !@url
637
+ turl = @url + "/etch_doc"
638
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
639
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
640
+ reqbody = "uri=" + URI::encode(uri);
641
+ resbody = StringIO::new
642
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
643
+ @status = rv
644
+ return nil if rv != 200
645
+ kwords = {}
646
+ lines = resbody.string.split(/\n/)
647
+ for i in 0...lines.length
648
+ pair = lines[i].split(/\t/)
649
+ next if pair.length < 2
650
+ kwords[pair[0]] = pair[1]
651
+ end
652
+ kwords
653
+ end
654
+ # Get the ID of a document specified by URI.
655
+ # `uri' specifies the URI of a registered document.
656
+ # The return value is the ID of the document. On error, -1 is returned.
657
+ def uri_to_id(uri)
658
+ Utility::check_types({ uri=>String }) if $DEBUG
659
+ @status = -1
660
+ return -1 if !@url
661
+ turl = @url + "/uri_to_id"
662
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
663
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
664
+ reqbody = "uri=" + URI::encode(uri)
665
+ resbody = StringIO::new
666
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
667
+ @status = rv
668
+ return nil if rv != 200
669
+ resbody.string.chomp
670
+ end
671
+ # Get the name.
672
+ # The return value is the name. On error, `nil' is returned.
673
+ def name()
674
+ set_info if !@name
675
+ @name
676
+ end
677
+ # Get the label.
678
+ # The return value is the label. On error, `nil' is returned.
679
+ def label()
680
+ set_info if !@label
681
+ @label
682
+ end
683
+ # Get the number of documents.
684
+ # The return value is the number of documents. On error, -1 is returned.
685
+ def doc_num()
686
+ set_info if @dnum < 0
687
+ @dnum
688
+ end
689
+ # Get the number of unique words.
690
+ # The return value is the number of unique words. On error, -1 is returned.
691
+ def word_num()
692
+ set_info if @wnum < 0
693
+ @wnum
694
+ end
695
+ # Get the size of the datbase.
696
+ # The return value is the size of the datbase. On error, -1.0 is returned.
697
+ def size()
698
+ set_info if @size < 0.0
699
+ @size
700
+ end
701
+ # Search documents corresponding a condition.
702
+ # `cond' specifies a condition object.
703
+ # `depth' specifies the depth of meta search.
704
+ # The return value is a node result object. On error, `nil' is returned.
705
+ def search(cond, depth)
706
+ Utility::check_types({ cond=>Condition, depth=>Integer }) if $DEBUG
707
+ @status = -1
708
+ return nil if !@url
709
+ turl = @url + "/search"
710
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
711
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
712
+ reqbody = Utility::cond_to_query(cond, depth, @wwidth, @hwidth, @awidth)
713
+ resbody = StringIO::new
714
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
715
+ @status = rv
716
+ return nil if rv != 200
717
+ lines = resbody.string.split(/\n/)
718
+ return nil if lines.length < 1
719
+ docs = []
720
+ hints = {}
721
+ nres = NodeResult::new(docs, hints)
722
+ border = lines[0]
723
+ isend = false
724
+ lnum = 1
725
+ while lnum < lines.length
726
+ line = lines[lnum]
727
+ lnum += 1
728
+ if line.length >= border.length && line.index(border) == 0
729
+ isend = true if line[border.length...line.length] == ":END"
730
+ break
731
+ end
732
+ lidx = line.index("\t")
733
+ if lidx
734
+ key = line[0...lidx]
735
+ value = line[(lidx+1)...line.length]
736
+ hints[key] = value
737
+ end
738
+ end
739
+ snum = lnum
740
+ while !isend && lnum < lines.length
741
+ line = lines[lnum]
742
+ lnum += 1
743
+ if line.length >= border.length && line.index(border) == 0
744
+ if lnum > snum
745
+ rdattrs = {}
746
+ sb = StringIO::new
747
+ rdvector = ""
748
+ rlnum = snum
749
+ while rlnum < lnum - 1
750
+ rdline = lines[rlnum].strip
751
+ rlnum += 1
752
+ break if rdline.length < 1
753
+ if rdline =~ /^%/
754
+ lidx = rdline.index("\t")
755
+ rdvector = rdline[(lidx+1)...rdline.length] if rdline =~ /%VECTOR/ && lidx
756
+ else
757
+ lidx = rdline.index("=")
758
+ if lidx
759
+ key = rdline[0...lidx]
760
+ value = rdline[(lidx+1)...rdline.length]
761
+ rdattrs[key] = value
762
+ end
763
+ end
764
+ end
765
+ while rlnum < lnum - 1
766
+ rdline = lines[rlnum]
767
+ rlnum += 1
768
+ sb.printf("%s\n", rdline)
769
+ end
770
+ rduri = rdattrs["@uri"]
771
+ rdsnippet = sb.string
772
+ if rduri
773
+ rdoc = ResultDocument::new(rduri, rdattrs, rdsnippet, rdvector)
774
+ docs.push(rdoc)
775
+ end
776
+ end
777
+ snum = lnum
778
+ isend = true if line[border.length...line.length] == ":END"
779
+ end
780
+ end
781
+ return nil if !isend
782
+ return nres
783
+ end
784
+ # Set width of snippet in the result.
785
+ # `wwidth' specifies whole width of a snippet. By default, it is 480. If it is 0, no
786
+ # snippet is sent. If it is negative, whole body text is sent instead of snippet.
787
+ # `hwidth' specifies width of strings picked up from the beginning of the text. By default,
788
+ # it is 96. If it is negative 0, the current setting is not changed.
789
+ # `awidth' specifies width of strings picked up around each highlighted word. By default,
790
+ # it is 96. If it is negative, the current setting is not changed.
791
+ def set_snippet_width(wwidth, hwidth, awidth)
792
+ @wwidth = wwidth;
793
+ @hwidth = hwidth if hwidth >= 0
794
+ @awidth = awidth if awidth >= 0
795
+ end
796
+ # Manage a user account of a node.
797
+ # `name' specifies the name of a user.
798
+ # `mode' specifies the operation mode. 0 means to delete the account. 1 means to set the
799
+ # account as an administrator. 2 means to set the account as a guest.
800
+ # The return value is true if success, else it is false.
801
+ def set_user(name, mode)
802
+ Utility::check_types({ name=>String, mode=>Integer }) if $DEBUG
803
+ @status = -1
804
+ return false if !@url
805
+ turl = @url + "/_set_user"
806
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
807
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
808
+ reqbody = "name=" + URI::encode(name) + "&mode=" + mode.to_s
809
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
810
+ @status = rv
811
+ rv == 200
812
+ end
813
+ # Manage a link of a node.
814
+ # `url' specifies the URL of the target node of a link.
815
+ # `label' specifies the label of the link.
816
+ # `credit' specifies the credit of the link. If it is negative, the link is removed.
817
+ # The return value is true if success, else it is false.
818
+ def set_link(url, label, credit)
819
+ Utility::check_types({ url=>String, label=>String, credit=>Integer }) if $DEBUG
820
+ @status = -1
821
+ return false if !@url
822
+ turl = @url + "/_set_link"
823
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
824
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
825
+ reqbody = "url=" + URI::encode(url) + "&label=" + label
826
+ reqbody += "&credit=" + credit.to_s if credit >= 0
827
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
828
+ @status = rv
829
+ rv == 200
830
+ end
831
+ #--------------------------------
832
+ # private methods
833
+ #--------------------------------
834
+ private
835
+ # Create a node connection object.
836
+ def initialize()
837
+ @url = nil
838
+ @pxhost = nil
839
+ @pxport = -1
840
+ @timeout = -1
841
+ @auth = nil
842
+ @name = nil
843
+ @label = nil
844
+ @dnum = -1
845
+ @wnum = -1
846
+ @size = -1.0
847
+ @wwidth = 480;
848
+ @hwidth = 96;
849
+ @awidth = 96;
850
+ @status = -1
851
+ end
852
+ # Set information of the node.
853
+ def set_info()
854
+ @status = -1
855
+ return if !@url
856
+ turl = @url + "/inform"
857
+ reqheads = []
858
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
859
+ resbody = StringIO::new
860
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, nil, nil, resbody)
861
+ @status = rv
862
+ return if rv != 200
863
+ lines = resbody.string.chomp.split(/\n/)
864
+ return if lines.length < 1
865
+ elems = lines[0].chomp.split(/\t/)
866
+ return if elems.length != 5
867
+ @name = elems[0]
868
+ @label = elems[1]
869
+ @dnum = elems[2].to_i
870
+ @wnum = elems[3].to_i
871
+ @size = elems[4].to_f
872
+ end
873
+ end
874
+ #:stopdoc:
875
+ #
876
+ # Module for utility
877
+ #
878
+ module Utility
879
+ public
880
+ # Check types of arguments
881
+ # `types' specifies a hash object whose keys are arguments and values are class objects.
882
+ # If there is a invalid object, an exception is thrown.
883
+ def check_types(types)
884
+ i = 0
885
+ types.each_key do |key|
886
+ i += 1
887
+ unless key.kind_of?(types[key]) || key == nil
888
+ raise ArgumentError::new("Argument#" + i.to_s +
889
+ " should be a kind of " + types[key].to_s)
890
+ end
891
+ end
892
+ end
893
+ module_function :check_types
894
+ # Perform an interaction of a URL.
895
+ # `url' specifies a URL.
896
+ # `pxhost' specifies the host name of a proxy. If it is `nil', it is not used.
897
+ # `pxport' specifies the port number of the proxy.
898
+ # `outsec' specifies timeout in seconds. If it is negative, it is not used.
899
+ # `reqheads' specifies a list object of extension headers. If it is `nil', it is not used.
900
+ # `reqbody' specifies the pointer of the entitiy body of request. If it is `nil', "GET"
901
+ # method is used.
902
+ # `resheads' specifies a list object into which headers of response is stored. If it is
903
+ # `nil' it is not used.
904
+ # `resbody' specifies stream object into which the entity body of response is stored. If it
905
+ # is `nil', it is not used.
906
+ # The return value is the status code of the response or -1 on error.
907
+ def shuttle_url(url, pxhost, pxport, outsec, reqheads, reqbody, resheads, resbody)
908
+ begin
909
+ status = -1
910
+ th = Thread::start do
911
+ url = URI::parse(url)
912
+ url.normalize
913
+ Thread::current.exit if url.scheme != "http" || !url.host || url.port < 1
914
+ if pxhost
915
+ host = pxhost
916
+ port = pxport
917
+ query = "http://" + url.host + ":" + url.port.to_s + url.path
918
+ else
919
+ host = url.host
920
+ port = url.port
921
+ query = url.path
922
+ end
923
+ query += "?" + url.query if url.query && !reqbody
924
+ begin
925
+ sock = TCPSocket.open(host, port)
926
+ if reqbody
927
+ sock.printf("POST " + query + " HTTP/1.0\r\n")
928
+ else
929
+ sock.printf("GET " + query + " HTTP/1.0\r\n")
930
+ end
931
+ sock.printf("Host: %s:%d\r\n", url.host, url.port)
932
+ sock.printf("Connection: close\r\n")
933
+ sock.printf("User-Agent: HyperEstraierForRuby/1.0.0\r\n")
934
+ if reqheads
935
+ reqheads.each do |line|
936
+ sock.printf("%s\r\n", line)
937
+ end
938
+ end
939
+ sock.printf("Content-Length: %d\r\n", reqbody.length) if reqbody
940
+ sock.printf("\r\n")
941
+ sock.write(reqbody) if reqbody
942
+ line = sock.gets.chomp
943
+ elems = line.split(/ */)
944
+ Thread::current.exit if elems.length < 3 || !(elems[0] =~ /^HTTP/)
945
+ status = elems[1].to_i
946
+ resheads.push(line) if resheads
947
+ begin
948
+ line = sock.gets.chomp
949
+ resheads.push(line) if resheads
950
+ end while line.length > 0
951
+ while buf = sock.read(8192)
952
+ resbody.write(buf) if resbody
953
+ end
954
+ ensure
955
+ sock.close if sock
956
+ end
957
+ end
958
+ if outsec >= 0
959
+ unless th.join(outsec)
960
+ th.exit
961
+ th.join
962
+ return -1
963
+ end
964
+ else
965
+ th.join
966
+ end
967
+ return status
968
+ rescue
969
+ return -1
970
+ end
971
+ end
972
+ module_function :shuttle_url
973
+ # Serialize a condition object into a query string.
974
+ # `cond' specifies a condition object.
975
+ # `depth' specifies depth of meta search.
976
+ # `wwidth' specifies whole width of a snippet.
977
+ # `hwidth' specifies width of strings picked up from the beginning of the text.
978
+ # `awidth' specifies width of strings picked up around each highlighted word.
979
+ # The return value is the serialized string.
980
+ def cond_to_query(cond, depth, wwidth, hwidth, awidth)
981
+ buf = StringIO::new
982
+ if cond.phrase
983
+ buf.write("&") if buf.length > 0
984
+ buf.write("phrase=")
985
+ buf.write(URI::encode(cond.phrase))
986
+ end
987
+ for i in 0...cond.attrs.length
988
+ buf.write("&") if buf.length > 0
989
+ buf.write("attr" + (i + 1).to_s + "=")
990
+ buf.write(URI::encode(cond.attrs[i]))
991
+ end
992
+ if cond.order
993
+ buf.write("&") if buf.length > 0
994
+ buf.write("order=")
995
+ buf.write(URI::encode(cond.order))
996
+ end
997
+ if cond.max > 0
998
+ buf.write("&") if buf.length > 0
999
+ buf.write("max=" + cond.max.to_s)
1000
+ else
1001
+ buf.write("&") if buf.length > 0
1002
+ buf.write("max=" + (1 << 30).to_s)
1003
+ end
1004
+ buf.write("&options=" + cond.options.to_s) if cond.options > 0
1005
+ buf.write("&depth=" + depth.to_s) if depth > 0
1006
+ buf.write("&wwidth=" + wwidth.to_s)
1007
+ buf.write("&hwidth=" + hwidth.to_s)
1008
+ buf.write("&awidth=" + awidth.to_s)
1009
+ buf.write("&skip=" + cond.skip.to_s)
1010
+ buf.string
1011
+ end
1012
+ module_function :cond_to_query
1013
+ # Encode a byte sequence with Base64 encoding.
1014
+ # `data' specifyes a string object.
1015
+ # The return value is the encoded string.
1016
+ def base_encode(data)
1017
+ [data].pack("m").gsub(/[ \n]/, "")
1018
+ end
1019
+ module_function :base_encode
1020
+ end
1021
+ end
1022
+
1023
+
1024
+
1025
+ # END OF FILE