acts_as_estraier_doc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in acts_as_estraier_doc.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Takatoshi MORIYAMA
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ # ActsAsEstraierDoc
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'acts_as_estraier_doc'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install acts_as_estraier_doc
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/acts_as_estraier_doc/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Takatoshi MORIYAMA"]
6
+ gem.email = ["hawk@at-exit.com"]
7
+ gem.description = %q{Acts as EstraierDoc}
8
+ gem.summary = %q{Acts as EstraierDoc}
9
+ gem.homepage = ""
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "acts_as_estraier_doc"
15
+ gem.require_paths = ['lib', 'vendor']
16
+ gem.version = ActsAsEstraierDoc::VERSION
17
+
18
+ gem.add_runtime_dependency 'activerecord', '~> 2.0'
19
+ end
@@ -0,0 +1,205 @@
1
+ require 'estraierpure_ext'
2
+ require 'rexml/document'
3
+
4
+ module ActsAsEstraierDoc
5
+ def self.included(base)
6
+ base.extend ActMethods
7
+ end
8
+
9
+ module ActMethods
10
+ def acts_as_estraier_doc(options = {})
11
+ self.extend ClassMethods
12
+ send :include, ActsAsEstraierDoc::InstanceMethods
13
+ send :alias_method_chain, :to_xml, :estdoc
14
+ send :alias_method_chain, :to_json, :estdoc
15
+ send :attr_accessor, :estdoc
16
+ send :attr_accessor, :skip_update_est_index
17
+
18
+ cattr_accessor :configuration, :estraier_conn
19
+
20
+ self.configuration = {
21
+ :condition_options => EstraierPure::Condition::SIMPLE,
22
+ :depth => 0,
23
+ }
24
+ self.configuration.update(options) if options.is_a? Hash
25
+ self.configuration[:node][:host] = 'localhost' unless self.configuration[:node].include? :host
26
+ self.configuration[:node][:port] = 1978 unless self.configuration[:node].include? :port
27
+
28
+ self.estraier_conn = EstraierPure::Node::new
29
+ self.estraier_conn.set_url("http://#{self.configuration[:node][:host]}:#{self.configuration[:node][:port]}/node/#{self.configuration[:node][:node]}")
30
+ self.estraier_conn.set_auth(self.configuration[:node][:user], self.configuration[:node][:pass])
31
+
32
+ after_save :update_est_index
33
+ before_destroy :remove_est_index
34
+ end
35
+ end
36
+
37
+ module ClassMethods
38
+ HINT_KEYS = ['HIT', 'DOCNUM', 'WORDNUM', 'TIME']
39
+
40
+ def est_search(phrase, options = {})
41
+ cond = EstraierPure::Condition::new
42
+ condition_options = 0
43
+ if options.include? :condition_options
44
+ options[:condition_options].to_a.each do |condition_option|
45
+ condition_options = condition_options | condition_option
46
+ end
47
+ else
48
+ condition_options = self.configuration[:condition_options]
49
+ end
50
+ cond.set_options(condition_options)
51
+ cond.set_phrase(phrase.to_s)
52
+ options[:attributes].to_a.each { |attribute| cond.add_attr(attribute) } if options.include? :attributes
53
+ cond.set_max(options[:limit]) if options.include? :limit
54
+ cond.set_skip(options[:offset]) if options.include? :offset
55
+ cond.set_order(options[:order]) if options.include? :order
56
+ wwidth = options.include?(:snippet_wwidth) ? options[:snippet_wwidth] : 480
57
+ hwidth = options.include?(:snippet_hwidth) ? options[:snippet_hwidth] : -1
58
+ awidth = options.include?(:snippet_awidth) ? options[:snippet_awidth] : -1
59
+ self.estraier_conn.set_snippet_width(wwidth, hwidth, awidth)
60
+ Rails.logger.info cond.inspect if options[:debug]
61
+
62
+ result = {:records => [], :info => {}}
63
+ rs = self.estraier_conn.search(cond, options.include?(:depth) ? options[:depth] : 0)
64
+ if rs
65
+ docs = {}
66
+ ids = []
67
+ rs.each do |doc|
68
+ docs[doc.attr('record_id').to_i] = doc
69
+ ids << doc.attr('record_id').to_i
70
+ end
71
+ records = self.find :all, :conditions => {:id => ids}, :include => options[:include]
72
+ (ids - records.map(&:id)).each do |orphaned_id|
73
+ Rails.logger.info "[EstDoc] Remove orphaned index #{orphaned_id}"
74
+ self.estraier_conn.out_doc docs[orphaned_id].attr('@id')
75
+ end
76
+ result[:records] = records.map{|record| record.estdoc = docs[record.id]; record}
77
+ HINT_KEYS.each{|key| result[:info][key.downcase.to_sym] = rs.hint key}
78
+ else
79
+ raise
80
+ end
81
+ return result
82
+ end
83
+
84
+ def indexing!
85
+ self.transaction do
86
+ count = self.count
87
+ ((count / 50).to_i + 1).times do |offset|
88
+ self.find(:all, 'hoge', :limit => 50, :offset => 50 * offset).each do |record|
89
+ record.update_est_index
90
+ end
91
+ end
92
+ count
93
+ end
94
+ end
95
+ end
96
+
97
+ module InstanceMethods
98
+ def to_estdoc
99
+ doc = EstraierPure::Document::new
100
+ doc.add_attr('@uri', self.est_uri.to_s)
101
+ doc.add_attr('@title', self.est_title.to_s)
102
+ doc.add_attr('record_id', self.id.to_s)
103
+ doc.add_attr('record_class', self.class.to_s)
104
+ if respond_to? :est_attributes
105
+ est_attributes.each do |name, value|
106
+ doc.add_attr(name.to_s, value)
107
+ end
108
+ end
109
+ if respond_to? :est_hidden_texts
110
+ est_hidden_texts.to_a.each do |value|
111
+ doc.add_hidden_text(value)
112
+ end
113
+ end
114
+ est_texts.to_a.each do |value|
115
+ doc.add_text(value)
116
+ end
117
+ doc
118
+ end
119
+
120
+ def to_xml_with_estdoc(*args)
121
+ xml = to_xml_without_estdoc(*args).split("\n")
122
+ options = args.extract_options!
123
+ xml_foot = xml.pop
124
+ xml << ' <estraier>'
125
+ doc = estdoc || _estdoc
126
+ if options[:with_pseudo_attributes]
127
+ xml << ' <pseudo-attributes>'
128
+ doc.attr_names.grep(/^#/).each do |name|
129
+ xml << " <#{name.sub('#', '')}>#{doc.attr(name)}</#{name.sub('#', '')}>"
130
+ end
131
+ xml << ' </pseudo-attributes>'
132
+ end
133
+ xml << ' <system-attributes>'
134
+ doc.attr_names.grep(/^@/).each do |name|
135
+ xml << " <#{name.sub('@', '')}>#{doc.attr(name)}</#{name.sub('@', '')}>"
136
+ end
137
+ xml << ' </system-attributes>'
138
+ xml << ' <attributes>'
139
+ doc.attr_names.grep(/^[^@#]/).each do |name|
140
+ next if name == 'record_id' or name == 'record_class'
141
+ xml << " <#{name}>#{doc.attr(name)}</#{name}>"
142
+ end
143
+ xml << ' </attributes>'
144
+ xml << ' </estraier>'
145
+ xml << xml_foot
146
+ xml.join("\n")
147
+ end
148
+
149
+ def to_json_with_estdoc(*args)
150
+ obj = ActiveSupport::JSON.decode(to_json_without_estdoc(*args))
151
+ options = args.extract_options!
152
+ doc = estdoc || _estdoc
153
+ obj['estraier'] = {}
154
+ if options[:with_pseudo_attributes]
155
+ obj['estraier']['pseudo-attributes'] = {}
156
+ doc.attr_names.grep(/^#/).each do |name|
157
+ obj['estraier']['pseudo-attributes'][name.sub('#', '')] = doc.attr(name)
158
+ end
159
+ end
160
+ obj['estraier']['system-attributes'] = {}
161
+ doc.attr_names.grep(/^@/).each do |name|
162
+ obj['estraier']['system-attributes'][name.sub('@', '')] = doc.attr(name)
163
+ end
164
+ obj['estraier']['attributes'] = {}
165
+ doc.attr_names.grep(/^[^@#]/).each do |name|
166
+ next if name == 'record_id' or name == 'record_class'
167
+ obj['estraier']['attributes'][name] = doc.attr(name)
168
+ end
169
+ ActiveSupport::JSON.encode(obj)
170
+ end
171
+
172
+ def update_est_index
173
+ return if self.skip_update_est_index
174
+ raise if new_record?
175
+ begin
176
+ remove_est_index
177
+ rescue
178
+ end
179
+ add_est_index
180
+ end
181
+
182
+ def add_est_index
183
+ raise if new_record?
184
+ return if respond_to?(:est_no_index) and est_no_index
185
+ raise self.estraier_conn.status.to_s unless self.estraier_conn.put_doc(to_estdoc)
186
+ end
187
+
188
+ def remove_est_index
189
+ raise if new_record?
190
+ raise self.estraier_conn.status.to_s unless self.estraier_conn.out_doc(est_id)
191
+ end
192
+
193
+ def est_id
194
+ raise if new_record?
195
+ _estdoc.attr('@id')
196
+ end
197
+
198
+ private
199
+ def _estdoc
200
+ self.class.est_search('', :attributes => ["record_id NUMEQ #{self.id.to_s}", "record_class STREQ #{self.class.to_s}"])[:records][0].estdoc
201
+ end
202
+ end
203
+ end
204
+
205
+ ActiveRecord::Base.send :include, ActsAsEstraierDoc
@@ -0,0 +1,3 @@
1
+ module ActsAsEstraierDoc
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,13 @@
1
+ require 'estraierpure'
2
+
3
+ module EstraierPure
4
+ class NodeResult
5
+ include Enumerable
6
+
7
+ def each
8
+ for i in 0...self.doc_num
9
+ yield self.get_doc(i)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,1193 @@
1
+ #--
2
+ # Ruby interface of Hyper Estraier
3
+ # Copyright (C) 2004-2007 Mikio Hirabayashi
4
+ # All rights reserved.
5
+ # This file is part of Hyper Estraier.
6
+ # Redistribution and use in source and binary forms, with or without modification, are
7
+ # permitted provided that the following conditions are met:
8
+ #
9
+ # * Redistributions of source code must retain the above copyright notice, this list of
10
+ # conditions and the following disclaimer.
11
+ # * Redistributions in binary form must reproduce the above copyright notice, this list of
12
+ # conditions and the following disclaimer in the documentation and/or other materials
13
+ # provided with the distribution.
14
+ # * Neither the name of Mikio Hirabayashi nor the names of its contributors may be used to
15
+ # endorse or promote products derived from this software without specific prior written
16
+ # permission.
17
+ #
18
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
19
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21
+ # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24
+ # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
26
+ # OF THE POSSIBILITY OF SUCH DAMAGE.
27
+ #++
28
+ #:include:overview
29
+
30
+
31
+ require "uri"
32
+ require "cgi"
33
+ require "socket"
34
+ require "stringio"
35
+
36
+
37
+
38
+ #
39
+ # Module for the namespace of Hyper Estraier
40
+ #
41
+ module EstraierPure
42
+ #----------------------------------------------------------------
43
+ #++ Abstraction of document.
44
+ #----------------------------------------------------------------
45
+ class Document
46
+ #--------------------------------
47
+ # public methods
48
+ #--------------------------------
49
+ public
50
+ # Add an attribute.
51
+ # `name' specifies the name of an attribute.
52
+ # `value' specifies the value of the attribute. If it is `nil', the attribute is removed.
53
+ # The return value is always `nil'.
54
+ def add_attr(name, value)
55
+ Utility::check_types({ name=>String, value=>String }) if $DEBUG
56
+ name = name.gsub(/[ \t\r\n\v\f]+/, " ")
57
+ name = name.strip.squeeze(" ")
58
+ value = value.gsub(/[ \t\r\n\v\f]+/, " ")
59
+ value = value.strip.squeeze(" ")
60
+ @attrs[name] = value
61
+ nil
62
+ end
63
+ # Add a sentence of text.
64
+ # `text' specifies a sentence of text.
65
+ # The return value is always `nil'.
66
+ def add_text(text)
67
+ Utility::check_types({ text=>String }) if $DEBUG
68
+ text = text.gsub(/[ \t\r\n\v\f]+/, " ")
69
+ text = text.strip.squeeze(" ")
70
+ @dtexts.push(text) if text.length
71
+ nil
72
+ end
73
+ # Add a hidden sentence.
74
+ # `text' specifies a hidden sentence.
75
+ # The return value is always `nil'.
76
+ def add_hidden_text(text)
77
+ Utility::check_types({ text=>String }) if $DEBUG
78
+ text = text.gsub(/[ \t\r\n\v\f]+/, " ")
79
+ text = text.strip.squeeze(" ")
80
+ @htexts.push(text) if text.length
81
+ nil
82
+ end
83
+ # Attach keywords.
84
+ # `kwords' specifies a map object of keywords. Keys of the map should be keywords of the
85
+ # document and values should be their scores in decimal string.
86
+ # The return value is always `nil'.
87
+ def set_keywords(kwords)
88
+ Utility::check_types({ kwords=>Hash }) if $DEBUG
89
+ @kwords = kwords
90
+ nil
91
+ end
92
+ # Set the substitute score.
93
+ # `score' specifies the substitute score. It it is negative, the substitute score setting is
94
+ # nullified.
95
+ # The return value is always `nil'.
96
+ def set_score(score)
97
+ Utility::check_types({ score=>Integer }) if $DEBUG
98
+ @score = score
99
+ nil
100
+ end
101
+ # Get the ID number.
102
+ # The return value is the ID number of the document object. If the object has never been
103
+ # registered, -1 is returned.
104
+ def id()
105
+ @id
106
+ end
107
+ # Get an array of attribute names of a document object.
108
+ # The return value is an array object of attribute names.
109
+ def attr_names()
110
+ @attrs.keys.sort
111
+ end
112
+ # Get the value of an attribute.
113
+ # `name' specifies the name of an attribute.
114
+ # The return value is the value of the attribute or `nil' if it does not exist.
115
+ def attr(name)
116
+ Utility::check_types({ name=>String }) if $DEBUG
117
+ @attrs[name]
118
+ end
119
+ # Get an array of sentences of the text.
120
+ # The return value is an array object of sentences of the text.
121
+ def texts()
122
+ @dtexts
123
+ end
124
+ # Concatenate sentences of the text of a document object.
125
+ # The return value is concatenated sentences.
126
+ def cat_texts()
127
+ buf = StringIO::new
128
+ for i in 0...@dtexts.length
129
+ buf.write(" ") if i > 0
130
+ buf.write(@dtexts[i])
131
+ end
132
+ buf.string
133
+ end
134
+ # Dump draft data of a document object.
135
+ # The return value is draft data.
136
+ def dump_draft()
137
+ buf = StringIO::new
138
+ keys = @attrs.keys.sort
139
+ for i in 0...keys.length
140
+ buf.printf("%s=%s\n", keys[i], @attrs[keys[i]])
141
+ end
142
+ if @kwords
143
+ buf.printf("%%VECTOR")
144
+ @kwords.each() do |key, value|
145
+ buf.printf("\t%s\t%s", key, value)
146
+ end
147
+ buf.printf("\n")
148
+ end
149
+ buf.printf("%%SCORE\t%d\n", @score) if @score >= 0
150
+ buf.printf("\n")
151
+ for i in 0...@dtexts.length
152
+ buf.printf("%s\n", @dtexts[i])
153
+ end
154
+ for i in 0...@htexts.length
155
+ buf.printf("\t%s\n", @htexts[i])
156
+ end
157
+ buf.string
158
+ end
159
+ # Get attached keywords.
160
+ # The return value is a map object of keywords and their scores in decimal string. If no
161
+ # keyword is attached, `nil' is returned.
162
+ def keywords()
163
+ @kwords
164
+ end
165
+ # Get the substitute score.
166
+ # The return value is the substitute score or -1 if it is not set.
167
+ def score()
168
+ return -1 if(@score < 0)
169
+ @score
170
+ end
171
+ #--------------------------------
172
+ # private methods
173
+ #--------------------------------
174
+ private
175
+ # Create a document object.
176
+ # `draft' specifies a string of draft data.
177
+ def initialize(draft = "")
178
+ Utility::check_types({ draft=>String }) if $DEBUG
179
+ @id = -1
180
+ @attrs = {}
181
+ @dtexts = []
182
+ @htexts = []
183
+ @kwords = nil
184
+ @score = -1
185
+ if draft.length
186
+ lines = draft.split(/\n/, -1)
187
+ num = 0
188
+ while num < lines.length
189
+ line = lines[num]
190
+ num += 1
191
+ break if line.length < 1
192
+ if line =~ /^%/
193
+ if line =~ /^%VECTOR\t/
194
+ @kwords = {} unless @kwords
195
+ fields = line.split(/\t/)
196
+ i = 1
197
+ while i < fields.length - 1
198
+ @kwords[fields[i]] = fields[i+1]
199
+ i += 2
200
+ end
201
+ elsif line =~ /^%SCORE\t/
202
+ fields = line.split(/\t/)
203
+ @score = fields[1].to_i;
204
+ end
205
+ next
206
+ end
207
+ line = line.gsub(/[ \t\r\n\v\f]+/, " ")
208
+ line = line.strip.squeeze(" ")
209
+ if idx = line.index("=")
210
+ key = line[0...idx]
211
+ value = line[idx+1...line.length]
212
+ @attrs[key] = value
213
+ end
214
+ end
215
+ while num < lines.length
216
+ line = lines[num]
217
+ num += 1
218
+ next unless line.length > 0
219
+ if line[0] == 0x9
220
+ @htexts.push(line[1...line.length]) if line.length > 1
221
+ else
222
+ @dtexts.push(line)
223
+ end
224
+ end
225
+ end
226
+ end
227
+ end
228
+ #----------------------------------------------------------------
229
+ #++ Abstraction of search condition.
230
+ #----------------------------------------------------------------
231
+ class Condition
232
+ #--------------------------------
233
+ # public constants
234
+ #--------------------------------
235
+ public
236
+ # option: check every N-gram key
237
+ SURE = 1 << 0
238
+ # option: check N-gram keys skipping by one
239
+ USUAL = 1 << 1
240
+ # option: check N-gram keys skipping by two
241
+ FAST = 1 << 2
242
+ # option: check N-gram keys skipping by three
243
+ AGITO = 1 << 3
244
+ # option: without TF-IDF tuning
245
+ NOIDF = 1 << 4
246
+ # option: with the simplified phrase
247
+ SIMPLE = 1 << 10
248
+ # option: with the rough phrase
249
+ ROUGH = 1 << 11
250
+ # option: with the union phrase
251
+ UNION = 1 << 15
252
+ # option: with the intersection phrase
253
+ ISECT = 1 << 16
254
+ #--------------------------------
255
+ # public methods
256
+ #--------------------------------
257
+ public
258
+ # Set the search phrase.
259
+ # `phrase' specifies a search phrase.
260
+ # The return value is always `nil'.
261
+ def set_phrase(phrase)
262
+ Utility::check_types({ phrase=>String }) if $DEBUG
263
+ phrase = phrase.gsub(/[ \t\r\n\v\f]+/, " ")
264
+ phrase = phrase.strip.squeeze(" ")
265
+ @phrase = phrase
266
+ nil
267
+ end
268
+ # Add an expression for an attribute.
269
+ # `expr' specifies an expression for an attribute.
270
+ # The return value is always `nil'.
271
+ def add_attr(expr)
272
+ Utility::check_types({ expr=>String }) if $DEBUG
273
+ expr = expr.gsub(/[ \t\r\n\v\f]+/, " ")
274
+ expr = expr.strip.squeeze(" ")
275
+ @attrs.push(expr)
276
+ nil
277
+ end
278
+ # Set the order of a condition object.
279
+ # `expr' specifies an expression for the order. By default, the order is by score descending.
280
+ # The return value is always `nil'.
281
+ def set_order(expr)
282
+ Utility::check_types({ expr=>String }) if $DEBUG
283
+ expr = expr.gsub(/[ \t\r\n\v\f]+/, " ")
284
+ expr = expr.strip.squeeze(" ")
285
+ @order = expr
286
+ nil
287
+ end
288
+ # Set the maximum number of retrieval.
289
+ # `max' specifies the maximum number of retrieval. By default, the number of retrieval is
290
+ # not limited.
291
+ # The return value is always `nil'.
292
+ def set_max(max)
293
+ Utility::check_types({ max=>Integer }) if $DEBUG
294
+ @max = max if max >= 0
295
+ nil
296
+ end
297
+ # Set the number of skipped documents.
298
+ # `skip' specifies the number of documents to be skipped in the search result.
299
+ # The return value is always `nil'.
300
+ def set_skip(skip)
301
+ Utility::check_types({ skip=>Integer }) if $DEBUG
302
+ @skip = skip if skip >= 0
303
+ nil
304
+ end
305
+ # Set options of retrieval.
306
+ # `options' specifies options: `Condition::SURE' specifies that it checks every N-gram
307
+ # key, `Condition::USU', which is the default, specifies that it checks N-gram keys
308
+ # with skipping one key, `Condition::FAST' skips two keys, `Condition::AGITO'
309
+ # skips three keys, `Condition::NOIDF' specifies not to perform TF-IDF tuning,
310
+ # `Condition::SIMPLE' specifies to use simplified phrase, `Condition::ROUGH' specifies to use
311
+ # rough phrase, `Condition.UNION' specifies to use union phrase, `Condition.ISECT' specifies
312
+ # to use intersection phrase. Each option can be specified at the same time by bitwise or.
313
+ # If keys are skipped, though search speed is improved, the relevance ratio grows less.
314
+ # The return value is always `nil'.
315
+ def set_options(options)
316
+ Utility::check_types({ options=>Integer }) if $DEBUG
317
+ @options |= options
318
+ nil
319
+ end
320
+ # Set permission to adopt result of the auxiliary index.
321
+ # `min' specifies the minimum hits to adopt result of the auxiliary index. If it is not more
322
+ # than 0, the auxiliary index is not used. By default, it is 32.
323
+ # The return value is always `nil'.
324
+ def set_auxiliary(min)
325
+ Utility::check_types({ min=>Integer }) if $DEBUG
326
+ @auxiliary = min
327
+ nil
328
+ end
329
+ # Set the attribute distinction filter.
330
+ # `name' specifies the name of an attribute to be distinct.
331
+ # The return value is always `nil'.
332
+ def set_distinct(name)
333
+ Utility::check_types({ name=>String }) if $DEBUG
334
+ name = name.gsub(/[ \t\r\n\v\f]+/, " ")
335
+ name = name.strip.squeeze(" ")
336
+ @distinct = name
337
+ nil
338
+ end
339
+ # Set the mask of targets of meta search.
340
+ # `mask' specifies a masking number. 1 means the first target, 2 means the second target, 4
341
+ # means the third target, and power values of 2 and their summation compose the mask.
342
+ # The return value is always `nil'.
343
+ def set_mask(mask)
344
+ Utility::check_types({ mask=>Integer }) if $DEBUG
345
+ @mask = mask
346
+ nil
347
+ end
348
+ # Get the search phrase.
349
+ # The return value is the search phrase.
350
+ def phrase()
351
+ @phrase
352
+ end
353
+ # Get expressions for attributes.
354
+ # The return value is expressions for attributes.
355
+ def attrs()
356
+ @attrs
357
+ end
358
+ # Get the order expression.
359
+ # The return value is the order expression.
360
+ def order()
361
+ @order
362
+ end
363
+ # Get the maximum number of retrieval.
364
+ # The return value is the maximum number of retrieval.
365
+ def max()
366
+ @max
367
+ end
368
+ # Get the number of skipped documents.
369
+ # The return value is the number of documents to be skipped in the search result.
370
+ def skip()
371
+ @skip
372
+ end
373
+ # Get options of retrieval.
374
+ # The return value is options by bitwise or.
375
+ def options()
376
+ @options
377
+ end
378
+ # Get permission to adopt result of the auxiliary index.
379
+ # The return value is permission to adopt result of the auxiliary index.
380
+ def auxiliary()
381
+ @auxiliary
382
+ end
383
+ # Get the attribute distinction filter.
384
+ # The return value is the name of the distinct attribute.
385
+ def distinct()
386
+ @distinct
387
+ end
388
+ # Get the mask of targets of meta search.
389
+ # The return value is the mask of targets of meta search.
390
+ def mask()
391
+ @mask
392
+ end
393
+ #--------------------------------
394
+ # private methods
395
+ #--------------------------------
396
+ private
397
+ # Create a search condition object.
398
+ def initialize()
399
+ @phrase = nil
400
+ @attrs = []
401
+ @order = nil
402
+ @max = -1
403
+ @skip = 0
404
+ @options = 0
405
+ @auxiliary = 32
406
+ @distinct = nil
407
+ @mask = 0
408
+ end
409
+ end
410
+ #----------------------------------------------------------------
411
+ #++ Abstraction of document in result set.
412
+ #----------------------------------------------------------------
413
+ class ResultDocument
414
+ #--------------------------------
415
+ # public methods
416
+ #--------------------------------
417
+ public
418
+ # Get the URI.
419
+ # The return value is the URI of the result document object.
420
+ def uri()
421
+ @uri
422
+ end
423
+ # Get an array of attribute names.
424
+ # The return value is an array object of attribute names.
425
+ def attr_names()
426
+ @attrs.keys.sort
427
+ end
428
+ # Get the value of an attribute.
429
+ # The return value is the value of the attribute or `nil' if it does not exist.
430
+ def attr(name)
431
+ Utility::check_types({ name=>String }) if $DEBUG
432
+ @attrs[name]
433
+ end
434
+ # Get the snippet of a result document object.
435
+ # The return value is a string of the snippet of the result document object. There are tab
436
+ # separated values. Each line is a string to be shown. Though most lines have only one
437
+ # field, some lines have two fields. If the second field exists, the first field is to be
438
+ # shown with highlighted, and the second field means its normalized form.
439
+ def snippet()
440
+ @snippet
441
+ end
442
+ # Get keywords.
443
+ # The return value is a string of serialized keywords of the result document object. There
444
+ # are tab separated values. Keywords and their scores come alternately.
445
+ def keywords()
446
+ @keywords
447
+ end
448
+ #--------------------------------
449
+ # private methods
450
+ #--------------------------------
451
+ private
452
+ # Create a result document object.
453
+ def initialize(uri, attrs, snippet, keywords)
454
+ Utility::check_types({ uri=>String, attrs=>Hash,
455
+ snippet=>String, keywords=>String }) if $DEBUG
456
+ @uri = uri
457
+ @attrs = attrs
458
+ @snippet = snippet
459
+ @keywords = keywords
460
+ end
461
+ end
462
+ #----------------------------------------------------------------
463
+ #++ Abstraction of result set from node.
464
+ #----------------------------------------------------------------
465
+ class NodeResult
466
+ #--------------------------------
467
+ # public methods
468
+ #--------------------------------
469
+ public
470
+ # Get the number of documents.
471
+ # The return value is the number of documents.
472
+ def doc_num()
473
+ @docs.length
474
+ end
475
+ # Get the value of hint information.
476
+ # The return value is a result document object or `nil' if the index is out of bounds.
477
+ def get_doc(index)
478
+ Utility::check_types({ index=>Integer }) if $DEBUG
479
+ return nil if index < 0 || index >= @docs.length
480
+ @docs[index]
481
+ end
482
+ # Get the value of hint information.
483
+ # `key' specifies the key of a hint. "VERSION", "NODE", "HIT", "HINT#n", "DOCNUM", "WORDNUM",
484
+ # "TIME", "TIME#n", "LINK#n", and "VIEW" are provided for keys.
485
+ # The return value is the hint or `nil' if the key does not exist.
486
+ def hint(key)
487
+ Utility::check_types({ key=>String }) if $DEBUG
488
+ @hints[key]
489
+ end
490
+ #--------------------------------
491
+ # private methods
492
+ #--------------------------------
493
+ private
494
+ # Create a node result object.
495
+ def initialize(docs, hints)
496
+ Utility::check_types({ docs=>Array, hints=>Hash }) if $DEBUG
497
+ @docs = docs
498
+ @hints = hints
499
+ end
500
+ end
501
+ #----------------------------------------------------------------
502
+ #++ Abstraction of connection to P2P node.
503
+ #----------------------------------------------------------------
504
+ class Node
505
+ #--------------------------------
506
+ # public methods
507
+ #--------------------------------
508
+ public
509
+ # Set the URL of a node server.
510
+ # `url' specifies the URL of a node.
511
+ # The return value is always `nil'.
512
+ def set_url(url)
513
+ Utility::check_types({ url=>String }) if $DEBUG
514
+ @url = url
515
+ nil
516
+ end
517
+ # Set the proxy information.
518
+ # `host' specifies the host name of a proxy server.
519
+ # `port' specifies the port number of the proxy server.
520
+ # The return value is always `nil'.
521
+ def set_proxy(host, port)
522
+ Utility::check_types({ host=>String, port=>Integer }) if $DEBUG
523
+ @pxhost = host
524
+ @pxport = port
525
+ nil
526
+ end
527
+ # Set timeout of a connection.
528
+ # `sec' specifies timeout of the connection in seconds.
529
+ # The return value is always `nil'.
530
+ def set_timeout(sec)
531
+ Utility::check_types({ sec=>Integer }) if $DEBUG
532
+ @timeout = sec
533
+ nil
534
+ end
535
+ # Set the authentication information.
536
+ # `name' specifies the name of authentication.
537
+ # `passwd' specifies the password of the authentication.
538
+ # The return value is always `nil'.
539
+ def set_auth(name, password)
540
+ Utility::check_types({ name=>String, password=>String }) if $DEBUG
541
+ @auth = name + ":" + password
542
+ nil
543
+ end
544
+ # Get the status code of the last request.
545
+ # The return value is the status code of the last request. -1 means failure of connection.
546
+ def status()
547
+ @status
548
+ end
549
+ # Synchronize updating contents of the database.
550
+ # The return value is true if success, else it is false.
551
+ def sync()
552
+ @status = -1
553
+ return false unless @url
554
+ turl = @url + "/sync"
555
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
556
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
557
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, "", nil, nil)
558
+ @status = rv
559
+ rv == 200
560
+ end
561
+ # Optimize the database.
562
+ # The return value is true if success, else it is false.
563
+ def optimize()
564
+ @status = -1
565
+ return false unless @url
566
+ turl = @url + "/optimize"
567
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
568
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
569
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, "", nil, nil)
570
+ @status = rv
571
+ rv == 200
572
+ end
573
+ # Add a document.
574
+ # `doc' specifies a document object. The document object should have the URI attribute.
575
+ # The return value is true if success, else it is false.
576
+ def put_doc(doc)
577
+ Utility::check_types({ doc=>Document }) if $DEBUG
578
+ @status = -1
579
+ return false unless @url
580
+ turl = @url + "/put_doc"
581
+ reqheads = [ "Content-Type: text/x-estraier-draft" ]
582
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
583
+ reqbody = doc.dump_draft
584
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
585
+ @status = rv
586
+ rv == 200
587
+ end
588
+ # Remove a document.
589
+ # `id' specifies the ID number of a registered document.
590
+ # The return value is true if success, else it is false.
591
+ def out_doc(id)
592
+ Utility::check_types({ id=>Integer }) if $DEBUG
593
+ @status = -1
594
+ return false unless @url
595
+ turl = @url + "/out_doc"
596
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
597
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
598
+ reqbody = "id=" + id.to_s
599
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
600
+ @status = rv
601
+ rv == 200
602
+ end
603
+ # Remove a document specified by URI.
604
+ # `uri' specifies the URI of a registered document.
605
+ # The return value is true if success, else it is false.
606
+ def out_doc_by_uri(uri)
607
+ Utility::check_types({ uri=>String }) if $DEBUG
608
+ @status = -1
609
+ return false unless @url
610
+ turl = @url + "/out_doc"
611
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
612
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
613
+ reqbody = "uri=" + CGI::escape(uri)
614
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
615
+ @status = rv
616
+ rv == 200
617
+ end
618
+ # Edit attributes of a document.
619
+ # `doc' specifies a document object.
620
+ # The return value is true if success, else it is false.
621
+ def edit_doc(doc)
622
+ Utility::check_types({ doc=>Document }) if $DEBUG
623
+ @status = -1
624
+ return false unless @url
625
+ turl = @url + "/edit_doc"
626
+ reqheads = [ "Content-Type: text/x-estraier-draft" ]
627
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
628
+ reqbody = doc.dump_draft
629
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
630
+ @status = rv
631
+ rv == 200
632
+ end
633
+ # Retrieve a document.
634
+ # `id' specifies the ID number of a registered document.
635
+ # The return value is a document object. On error, `nil' is returned.
636
+ def get_doc(id)
637
+ Utility::check_types({ id=>Integer }) if $DEBUG
638
+ @status = -1
639
+ return nil unless @url
640
+ turl = @url + "/get_doc"
641
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
642
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
643
+ reqbody = "id=" + id.to_s
644
+ resbody = StringIO::new
645
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
646
+ @status = rv
647
+ return nil if rv != 200
648
+ Document::new(resbody.string)
649
+ end
650
+ # Retrieve a document.
651
+ # `uri' specifies the URI of a registered document.
652
+ # The return value is a document object. On error, `nil' is returned.
653
+ def get_doc_by_uri(uri)
654
+ Utility::check_types({ uri=>String }) if $DEBUG
655
+ @status = -1
656
+ return nil unless @url
657
+ turl = @url + "/get_doc"
658
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
659
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
660
+ reqbody = "uri=" + CGI::escape(uri)
661
+ resbody = StringIO::new
662
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
663
+ @status = rv
664
+ return nil if rv != 200
665
+ Document::new(resbody.string)
666
+ end
667
+ # Retrieve the value of an attribute of a document.
668
+ # `id' specifies the ID number of a registered document.
669
+ # `name' specifies the name of an attribute.
670
+ # The return value is the value of the attribute or `nil' if it does not exist.
671
+ def get_doc_attr(id, name)
672
+ Utility::check_types({ id=>Integer, name=>String }) if $DEBUG
673
+ @status = -1
674
+ return nil unless @url
675
+ turl = @url + "/get_doc_attr"
676
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
677
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
678
+ reqbody = "id=" + id.to_s + "&attr=" + CGI::escape(name)
679
+ resbody = StringIO::new
680
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
681
+ @status = rv
682
+ return nil if rv != 200
683
+ resbody.string.chomp
684
+ end
685
+ # Retrieve the value of an attribute of a document specified by URI.
686
+ # `uri' specifies the URI of a registered document.
687
+ # `name' specifies the name of an attribute.
688
+ # The return value is the value of the attribute or `nil' if it does not exist.
689
+ def get_doc_attr_by_uri(uri, name)
690
+ Utility::check_types({ uri=>String, name=>String }) if $DEBUG
691
+ @status = -1
692
+ return nil unless @url
693
+ turl = @url + "/get_doc_attr"
694
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
695
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
696
+ reqbody = "uri=" + CGI::escape(uri) + "&attr=" + CGI::escape(name)
697
+ resbody = StringIO::new
698
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
699
+ @status = rv
700
+ return nil if rv != 200
701
+ resbody.string.chomp
702
+ end
703
+ # Extract keywords of a document.
704
+ # `id' specifies the ID number of a registered document.
705
+ # The return value is a hash object of keywords and their scores in decimal string or `nil'
706
+ # on error.
707
+ def etch_doc(id)
708
+ Utility::check_types({ id=>Integer }) if $DEBUG
709
+ @status = -1
710
+ return nil unless @url
711
+ turl = @url + "/etch_doc"
712
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
713
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
714
+ reqbody = "id=" + id.to_s
715
+ resbody = StringIO::new
716
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
717
+ @status = rv
718
+ return nil if rv != 200
719
+ kwords = {}
720
+ lines = resbody.string.split(/\n/, -1)
721
+ for i in 0...lines.length
722
+ pair = lines[i].split(/\t/)
723
+ next if pair.length < 2
724
+ kwords[pair[0]] = pair[1]
725
+ end
726
+ kwords
727
+ end
728
+ # Extract keywords of a document specified by URI.
729
+ # `uri' specifies the URI of a registered document.
730
+ # The return value is a hash object of keywords and their scores in decimal string or `nil'
731
+ # on error.
732
+ def etch_doc_by_uri(uri)
733
+ Utility::check_types({ uri=>String }) if $DEBUG
734
+ @status = -1
735
+ return nil unless @url
736
+ turl = @url + "/etch_doc"
737
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
738
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
739
+ reqbody = "uri=" + CGI::escape(uri)
740
+ resbody = StringIO::new
741
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
742
+ @status = rv
743
+ return nil if rv != 200
744
+ kwords = {}
745
+ lines = resbody.string.split(/\n/, -1)
746
+ for i in 0...lines.length
747
+ pair = lines[i].split(/\t/)
748
+ next if pair.length < 2
749
+ kwords[pair[0]] = pair[1]
750
+ end
751
+ kwords
752
+ end
753
+ # Get the ID of a document specified by URI.
754
+ # `uri' specifies the URI of a registered document.
755
+ # The return value is the ID of the document. On error, -1 is returned.
756
+ def uri_to_id(uri)
757
+ Utility::check_types({ uri=>String }) if $DEBUG
758
+ @status = -1
759
+ return -1 unless @url
760
+ turl = @url + "/uri_to_id"
761
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
762
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
763
+ reqbody = "uri=" + CGI::escape(uri)
764
+ resbody = StringIO::new
765
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
766
+ @status = rv
767
+ return nil if rv != 200
768
+ resbody.string.chomp
769
+ end
770
+ # Get the name.
771
+ # The return value is the name. On error, `nil' is returned.
772
+ def name()
773
+ set_info if !@name
774
+ @name
775
+ end
776
+ # Get the label.
777
+ # The return value is the label. On error, `nil' is returned.
778
+ def label()
779
+ set_info if !@label
780
+ @label
781
+ end
782
+ # Get the number of documents.
783
+ # The return value is the number of documents. On error, -1 is returned.
784
+ def doc_num()
785
+ set_info if @dnum < 0
786
+ @dnum
787
+ end
788
+ # Get the number of unique words.
789
+ # The return value is the number of unique words. On error, -1 is returned.
790
+ def word_num()
791
+ set_info if @wnum < 0
792
+ @wnum
793
+ end
794
+ # Get the size of the datbase.
795
+ # The return value is the size of the datbase. On error, -1.0 is returned.
796
+ def size()
797
+ set_info if @size < 0.0
798
+ @size
799
+ end
800
+ # Get the usage ratio of the cache.
801
+ # The return value is the usage ratio of the cache. On error, -1.0 is returned.
802
+ def cache_usage()
803
+ @status = -1
804
+ return -1.0 unless @url
805
+ turl = @url + "/cacheusage"
806
+ reqheads = []
807
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
808
+ resbody = StringIO::new
809
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, nil, nil, resbody)
810
+ @status = rv
811
+ return -1.0 if rv != 200
812
+ return resbody.string.strip.to_f
813
+ end
814
+ # Get an array of names of administrators.
815
+ # The return value is an array object of names of administrators. On error, `nil' is
816
+ # returned.
817
+ def admins()
818
+ set_info unless @admins
819
+ @admins
820
+ end
821
+ # Get an array of names of users.
822
+ # The return value is an array object of names of users. On error, `nil' is returned.
823
+ def users()
824
+ set_info unless @users
825
+ @users
826
+ end
827
+ # Get an array of expressions of links.
828
+ # The return value is an array object of expressions of links. Each element is a TSV string
829
+ # and has three fields of the URL, the label, and the score. On error, `nil' is returned.
830
+ def links()
831
+ set_info unless @links
832
+ @links
833
+ end
834
+ # Search for documents corresponding a condition.
835
+ # `cond' specifies a condition object.
836
+ # `depth' specifies the depth of meta search.
837
+ # The return value is a node result object. On error, `nil' is returned.
838
+ def search(cond, depth)
839
+ Utility::check_types({ cond=>Condition, depth=>Integer }) if $DEBUG
840
+ @status = -1
841
+ return nil unless @url
842
+ turl = @url + "/search"
843
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
844
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
845
+ reqbody = Utility::cond_to_query(cond, depth, @wwidth, @hwidth, @awidth)
846
+ resbody = StringIO::new
847
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
848
+ @status = rv
849
+ return nil if rv != 200
850
+ lines = resbody.string.split(/\n/, -1)
851
+ return nil if lines.length < 1
852
+ docs = []
853
+ hints = {}
854
+ nres = NodeResult::new(docs, hints)
855
+ border = lines[0]
856
+ isend = false
857
+ lnum = 1
858
+ while lnum < lines.length
859
+ line = lines[lnum]
860
+ lnum += 1
861
+ if line.length >= border.length && line.index(border) == 0
862
+ isend = true if line[border.length...line.length] == ":END"
863
+ break
864
+ end
865
+ lidx = line.index("\t")
866
+ if lidx
867
+ key = line[0...lidx]
868
+ value = line[(lidx+1)...line.length]
869
+ hints[key] = value
870
+ end
871
+ end
872
+ snum = lnum
873
+ while !isend && lnum < lines.length
874
+ line = lines[lnum]
875
+ lnum += 1
876
+ if line.length >= border.length && line.index(border) == 0
877
+ if lnum > snum
878
+ rdattrs = {}
879
+ sb = StringIO::new
880
+ rdvector = ""
881
+ rlnum = snum
882
+ while rlnum < lnum - 1
883
+ rdline = lines[rlnum].strip
884
+ rlnum += 1
885
+ break if rdline.length < 1
886
+ if rdline =~ /^%/
887
+ lidx = rdline.index("\t")
888
+ rdvector = rdline[(lidx+1)...rdline.length] if rdline =~ /%VECTOR/ && lidx
889
+ else
890
+ lidx = rdline.index("=")
891
+ if lidx
892
+ key = rdline[0...lidx]
893
+ value = rdline[(lidx+1)...rdline.length]
894
+ rdattrs[key] = value
895
+ end
896
+ end
897
+ end
898
+ while rlnum < lnum - 1
899
+ rdline = lines[rlnum]
900
+ rlnum += 1
901
+ sb.printf("%s\n", rdline)
902
+ end
903
+ rduri = rdattrs["@uri"]
904
+ rdsnippet = sb.string
905
+ if rduri
906
+ rdoc = ResultDocument::new(rduri, rdattrs, rdsnippet, rdvector)
907
+ docs.push(rdoc)
908
+ end
909
+ end
910
+ snum = lnum
911
+ isend = true if line[border.length...line.length] == ":END"
912
+ end
913
+ end
914
+ return nil if !isend
915
+ return nres
916
+ end
917
+ # Set width of snippet in the result.
918
+ # `wwidth' specifies whole width of a snippet. By default, it is 480. If it is 0, no
919
+ # snippet is sent. If it is negative, whole body text is sent instead of snippet.
920
+ # `hwidth' specifies width of strings picked up from the beginning of the text. By default,
921
+ # it is 96. If it is negative 0, the current setting is not changed.
922
+ # `awidth' specifies width of strings picked up around each highlighted word. By default,
923
+ # it is 96. If it is negative, the current setting is not changed.
924
+ def set_snippet_width(wwidth, hwidth, awidth)
925
+ @wwidth = wwidth
926
+ @hwidth = hwidth if hwidth >= 0
927
+ @awidth = awidth if awidth >= 0
928
+ end
929
+ # Manage a user account of a node.
930
+ # `name' specifies the name of a user.
931
+ # `mode' specifies the operation mode. 0 means to delete the account. 1 means to set the
932
+ # account as an administrator. 2 means to set the account as a guest.
933
+ # The return value is true if success, else it is false.
934
+ def set_user(name, mode)
935
+ Utility::check_types({ name=>String, mode=>Integer }) if $DEBUG
936
+ @status = -1
937
+ return false unless @url
938
+ turl = @url + "/_set_user"
939
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
940
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
941
+ reqbody = "name=" + CGI::escape(name) + "&mode=" + mode.to_s
942
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
943
+ @status = rv
944
+ rv == 200
945
+ end
946
+ # Manage a link of a node.
947
+ # `url' specifies the URL of the target node of a link.
948
+ # `label' specifies the label of the link.
949
+ # `credit' specifies the credit of the link. If it is negative, the link is removed.
950
+ # The return value is true if success, else it is false.
951
+ def set_link(url, label, credit)
952
+ Utility::check_types({ url=>String, label=>String, credit=>Integer }) if $DEBUG
953
+ @status = -1
954
+ return false unless @url
955
+ turl = @url + "/_set_link"
956
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
957
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
958
+ reqbody = "url=" + CGI::escape(url) + "&label=" + label
959
+ reqbody += "&credit=" + credit.to_s if credit >= 0
960
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
961
+ @status = rv
962
+ rv == 200
963
+ end
964
+ #--------------------------------
965
+ # private methods
966
+ #--------------------------------
967
+ private
968
+ # Create a node connection object.
969
+ def initialize()
970
+ @url = nil
971
+ @pxhost = nil
972
+ @pxport = -1
973
+ @timeout = -1
974
+ @auth = nil
975
+ @name = nil
976
+ @label = nil
977
+ @dnum = -1
978
+ @wnum = -1
979
+ @size = -1.0
980
+ @admins = nil
981
+ @users = nil
982
+ @links = nil
983
+ @wwidth = 480
984
+ @hwidth = 96
985
+ @awidth = 96
986
+ @status = -1
987
+ end
988
+ # Set information of the node.
989
+ def set_info()
990
+ @status = -1
991
+ return unless @url
992
+ turl = @url + "/inform"
993
+ reqheads = []
994
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
995
+ resbody = StringIO::new
996
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, nil, nil, resbody)
997
+ @status = rv
998
+ return if rv != 200
999
+ lines = resbody.string.split(/\n/, -1)
1000
+ return if lines.length < 1
1001
+ elems = lines[0].chomp.split(/\t/)
1002
+ return if elems.length != 5
1003
+ @name = elems[0]
1004
+ @label = elems[1]
1005
+ @dnum = elems[2].to_i
1006
+ @wnum = elems[3].to_i
1007
+ @size = elems[4].to_f
1008
+ return if lines.length < 2
1009
+ lnum = 1
1010
+ lnum += 1 if(lnum < lines.length && lines[lnum].length < 1)
1011
+ @admins = []
1012
+ while(lnum < lines.length)
1013
+ line = lines[lnum]
1014
+ break if line.length < 1
1015
+ @admins.push(line)
1016
+ lnum += 1
1017
+ end
1018
+ lnum += 1 if(lnum < lines.length && lines[lnum].length < 1)
1019
+ @users = []
1020
+ while(lnum < lines.length)
1021
+ line = lines[lnum]
1022
+ break if line.length < 1
1023
+ @users.push(line)
1024
+ lnum += 1
1025
+ end
1026
+ lnum += 1 if(lines[lnum].length < 1)
1027
+ @links = []
1028
+ while(lnum < lines.length)
1029
+ line = lines[lnum]
1030
+ break if line.length < 1
1031
+ @links.push(line) if line.split(/\t/).length == 3
1032
+ lnum += 1
1033
+ end
1034
+ end
1035
+ end
1036
+ #:stopdoc:
1037
+ #
1038
+ # Module for utility
1039
+ #
1040
+ module Utility
1041
+ public
1042
+ # Check types of arguments
1043
+ # `types' specifies a hash object whose keys are arguments and values are class objects.
1044
+ # If there is a invalid object, an exception is thrown.
1045
+ def check_types(types)
1046
+ i = 0
1047
+ types.each_key do |key|
1048
+ i += 1
1049
+ unless key.kind_of?(types[key]) || key == nil
1050
+ raise ArgumentError::new("Argument#" + i.to_s +
1051
+ " should be a kind of " + types[key].to_s)
1052
+ end
1053
+ end
1054
+ end
1055
+ module_function :check_types
1056
+ # Perform an interaction of a URL.
1057
+ # `url' specifies a URL.
1058
+ # `pxhost' specifies the host name of a proxy. If it is `nil', it is not used.
1059
+ # `pxport' specifies the port number of the proxy.
1060
+ # `outsec' specifies timeout in seconds. If it is negative, it is not used.
1061
+ # `reqheads' specifies an array object of extension headers. If it is `nil', it is not used.
1062
+ # `reqbody' specifies the pointer of the entitiy body of request. If it is `nil', "GET"
1063
+ # method is used.
1064
+ # `resheads' specifies an array object into which headers of response is stored. If it is
1065
+ # `nil' it is not used.
1066
+ # `resbody' specifies stream object into which the entity body of response is stored. If it
1067
+ # is `nil', it is not used.
1068
+ # The return value is the status code of the response or -1 on error.
1069
+ def shuttle_url(url, pxhost, pxport, outsec, reqheads, reqbody, resheads, resbody)
1070
+ begin
1071
+ status = -1
1072
+ th = Thread::start do
1073
+ url = URI::parse(url)
1074
+ url.normalize
1075
+ Thread::current.exit if url.scheme != "http" || !url.host || url.port < 1
1076
+ if pxhost
1077
+ host = pxhost
1078
+ port = pxport
1079
+ query = "http://" + url.host + ":" + url.port.to_s + url.path
1080
+ else
1081
+ host = url.host
1082
+ port = url.port
1083
+ query = url.path
1084
+ end
1085
+ query += "?" + url.query if url.query && !reqbody
1086
+ begin
1087
+ sock = TCPSocket.open(host, port)
1088
+ if reqbody
1089
+ sock.printf("POST " + query + " HTTP/1.0\r\n")
1090
+ else
1091
+ sock.printf("GET " + query + " HTTP/1.0\r\n")
1092
+ end
1093
+ sock.printf("Host: %s:%d\r\n", url.host, url.port)
1094
+ sock.printf("Connection: close\r\n")
1095
+ sock.printf("User-Agent: HyperEstraierForRuby/1.0.0\r\n")
1096
+ if reqheads
1097
+ reqheads.each do |line|
1098
+ sock.printf("%s\r\n", line)
1099
+ end
1100
+ end
1101
+ sock.printf("Content-Length: %d\r\n", reqbody.length) if reqbody
1102
+ sock.printf("\r\n")
1103
+ sock.write(reqbody) if reqbody
1104
+ line = sock.gets.chomp
1105
+ elems = line.split(/ */)
1106
+ Thread::current.exit if elems.length < 3 || !(elems[0] =~ /^HTTP/)
1107
+ status = elems[1].to_i
1108
+ resheads.push(line) if resheads
1109
+ begin
1110
+ line = sock.gets.chomp
1111
+ resheads.push(line) if resheads
1112
+ end while line.length > 0
1113
+ while buf = sock.read(8192)
1114
+ resbody.write(buf) if resbody
1115
+ end
1116
+ ensure
1117
+ sock.close if sock
1118
+ end
1119
+ end
1120
+ if outsec >= 0
1121
+ unless th.join(outsec)
1122
+ th.exit
1123
+ th.join
1124
+ return -1
1125
+ end
1126
+ else
1127
+ th.join
1128
+ end
1129
+ return status
1130
+ rescue
1131
+ return -1
1132
+ end
1133
+ end
1134
+ module_function :shuttle_url
1135
+ # Serialize a condition object into a query string.
1136
+ # `cond' specifies a condition object.
1137
+ # `depth' specifies depth of meta search.
1138
+ # `wwidth' specifies whole width of a snippet.
1139
+ # `hwidth' specifies width of strings picked up from the beginning of the text.
1140
+ # `awidth' specifies width of strings picked up around each highlighted word.
1141
+ # The return value is the serialized string.
1142
+ def cond_to_query(cond, depth, wwidth, hwidth, awidth)
1143
+ buf = StringIO::new
1144
+ if cond.phrase
1145
+ buf.write("&") if buf.length > 0
1146
+ buf.write("phrase=")
1147
+ buf.write(CGI::escape(cond.phrase))
1148
+ end
1149
+ for i in 0...cond.attrs.length
1150
+ buf.write("&") if buf.length > 0
1151
+ buf.write("attr" + (i + 1).to_s + "=")
1152
+ buf.write(CGI::escape(cond.attrs[i]))
1153
+ end
1154
+ if cond.order
1155
+ buf.write("&") if buf.length > 0
1156
+ buf.write("order=")
1157
+ buf.write(CGI::escape(cond.order))
1158
+ end
1159
+ if cond.max >= 0
1160
+ buf.write("&") if buf.length > 0
1161
+ buf.write("max=" + cond.max.to_s)
1162
+ else
1163
+ buf.write("&") if buf.length > 0
1164
+ buf.write("max=" + (1 << 30).to_s)
1165
+ end
1166
+ buf.write("&options=" + cond.options.to_s) if cond.options > 0
1167
+ buf.write("&auxiliary=" + cond.auxiliary.to_s)
1168
+ if cond.distinct
1169
+ buf.write("&distinct=")
1170
+ buf.write(CGI::escape(cond.distinct))
1171
+ end
1172
+ buf.write("&depth=" + depth.to_s) if depth > 0
1173
+ buf.write("&wwidth=" + wwidth.to_s)
1174
+ buf.write("&hwidth=" + hwidth.to_s)
1175
+ buf.write("&awidth=" + awidth.to_s)
1176
+ buf.write("&skip=" + cond.skip.to_s)
1177
+ buf.write("&mask=" + cond.mask.to_s)
1178
+ buf.string
1179
+ end
1180
+ module_function :cond_to_query
1181
+ # Encode a byte sequence with Base64 encoding.
1182
+ # `data' specifyes a string object.
1183
+ # The return value is the encoded string.
1184
+ def base_encode(data)
1185
+ [data].pack("m").gsub(/[ \n]/, "")
1186
+ end
1187
+ module_function :base_encode
1188
+ end
1189
+ end
1190
+
1191
+
1192
+
1193
+ # END OF FILE