acts_as_estraier_doc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in acts_as_estraier_doc.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Takatoshi MORIYAMA
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ # ActsAsEstraierDoc
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'acts_as_estraier_doc'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install acts_as_estraier_doc
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/acts_as_estraier_doc/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Takatoshi MORIYAMA"]
6
+ gem.email = ["hawk@at-exit.com"]
7
+ gem.description = %q{Acts as EstraierDoc}
8
+ gem.summary = %q{Acts as EstraierDoc}
9
+ gem.homepage = ""
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "acts_as_estraier_doc"
15
+ gem.require_paths = ['lib', 'vendor']
16
+ gem.version = ActsAsEstraierDoc::VERSION
17
+
18
+ gem.add_runtime_dependency 'activerecord', '~> 2.0'
19
+ end
@@ -0,0 +1,205 @@
1
+ require 'estraierpure_ext'
2
+ require 'rexml/document'
3
+
4
+ module ActsAsEstraierDoc
5
+ def self.included(base)
6
+ base.extend ActMethods
7
+ end
8
+
9
+ module ActMethods
10
+ def acts_as_estraier_doc(options = {})
11
+ self.extend ClassMethods
12
+ send :include, ActsAsEstraierDoc::InstanceMethods
13
+ send :alias_method_chain, :to_xml, :estdoc
14
+ send :alias_method_chain, :to_json, :estdoc
15
+ send :attr_accessor, :estdoc
16
+ send :attr_accessor, :skip_update_est_index
17
+
18
+ cattr_accessor :configuration, :estraier_conn
19
+
20
+ self.configuration = {
21
+ :condition_options => EstraierPure::Condition::SIMPLE,
22
+ :depth => 0,
23
+ }
24
+ self.configuration.update(options) if options.is_a? Hash
25
+ self.configuration[:node][:host] = 'localhost' unless self.configuration[:node].include? :host
26
+ self.configuration[:node][:port] = 1978 unless self.configuration[:node].include? :port
27
+
28
+ self.estraier_conn = EstraierPure::Node::new
29
+ self.estraier_conn.set_url("http://#{self.configuration[:node][:host]}:#{self.configuration[:node][:port]}/node/#{self.configuration[:node][:node]}")
30
+ self.estraier_conn.set_auth(self.configuration[:node][:user], self.configuration[:node][:pass])
31
+
32
+ after_save :update_est_index
33
+ before_destroy :remove_est_index
34
+ end
35
+ end
36
+
37
+ module ClassMethods
38
+ HINT_KEYS = ['HIT', 'DOCNUM', 'WORDNUM', 'TIME']
39
+
40
+ def est_search(phrase, options = {})
41
+ cond = EstraierPure::Condition::new
42
+ condition_options = 0
43
+ if options.include? :condition_options
44
+ options[:condition_options].to_a.each do |condition_option|
45
+ condition_options = condition_options | condition_option
46
+ end
47
+ else
48
+ condition_options = self.configuration[:condition_options]
49
+ end
50
+ cond.set_options(condition_options)
51
+ cond.set_phrase(phrase.to_s)
52
+ options[:attributes].to_a.each { |attribute| cond.add_attr(attribute) } if options.include? :attributes
53
+ cond.set_max(options[:limit]) if options.include? :limit
54
+ cond.set_skip(options[:offset]) if options.include? :offset
55
+ cond.set_order(options[:order]) if options.include? :order
56
+ wwidth = options.include?(:snippet_wwidth) ? options[:snippet_wwidth] : 480
57
+ hwidth = options.include?(:snippet_hwidth) ? options[:snippet_hwidth] : -1
58
+ awidth = options.include?(:snippet_awidth) ? options[:snippet_awidth] : -1
59
+ self.estraier_conn.set_snippet_width(wwidth, hwidth, awidth)
60
+ Rails.logger.info cond.inspect if options[:debug]
61
+
62
+ result = {:records => [], :info => {}}
63
+ rs = self.estraier_conn.search(cond, options.include?(:depth) ? options[:depth] : 0)
64
+ if rs
65
+ docs = {}
66
+ ids = []
67
+ rs.each do |doc|
68
+ docs[doc.attr('record_id').to_i] = doc
69
+ ids << doc.attr('record_id').to_i
70
+ end
71
+ records = self.find :all, :conditions => {:id => ids}, :include => options[:include]
72
+ (ids - records.map(&:id)).each do |orphaned_id|
73
+ Rails.logger.info "[EstDoc] Remove orphaned index #{orphaned_id}"
74
+ self.estraier_conn.out_doc docs[orphaned_id].attr('@id')
75
+ end
76
+ result[:records] = records.map{|record| record.estdoc = docs[record.id]; record}
77
+ HINT_KEYS.each{|key| result[:info][key.downcase.to_sym] = rs.hint key}
78
+ else
79
+ raise
80
+ end
81
+ return result
82
+ end
83
+
84
+ def indexing!
85
+ self.transaction do
86
+ count = self.count
87
+ ((count / 50).to_i + 1).times do |offset|
88
+ self.find(:all, 'hoge', :limit => 50, :offset => 50 * offset).each do |record|
89
+ record.update_est_index
90
+ end
91
+ end
92
+ count
93
+ end
94
+ end
95
+ end
96
+
97
+ module InstanceMethods
98
+ def to_estdoc
99
+ doc = EstraierPure::Document::new
100
+ doc.add_attr('@uri', self.est_uri.to_s)
101
+ doc.add_attr('@title', self.est_title.to_s)
102
+ doc.add_attr('record_id', self.id.to_s)
103
+ doc.add_attr('record_class', self.class.to_s)
104
+ if respond_to? :est_attributes
105
+ est_attributes.each do |name, value|
106
+ doc.add_attr(name.to_s, value)
107
+ end
108
+ end
109
+ if respond_to? :est_hidden_texts
110
+ est_hidden_texts.to_a.each do |value|
111
+ doc.add_hidden_text(value)
112
+ end
113
+ end
114
+ est_texts.to_a.each do |value|
115
+ doc.add_text(value)
116
+ end
117
+ doc
118
+ end
119
+
120
+ def to_xml_with_estdoc(*args)
121
+ xml = to_xml_without_estdoc(*args).split("\n")
122
+ options = args.extract_options!
123
+ xml_foot = xml.pop
124
+ xml << ' <estraier>'
125
+ doc = estdoc || _estdoc
126
+ if options[:with_pseudo_attributes]
127
+ xml << ' <pseudo-attributes>'
128
+ doc.attr_names.grep(/^#/).each do |name|
129
+ xml << " <#{name.sub('#', '')}>#{doc.attr(name)}</#{name.sub('#', '')}>"
130
+ end
131
+ xml << ' </pseudo-attributes>'
132
+ end
133
+ xml << ' <system-attributes>'
134
+ doc.attr_names.grep(/^@/).each do |name|
135
+ xml << " <#{name.sub('@', '')}>#{doc.attr(name)}</#{name.sub('@', '')}>"
136
+ end
137
+ xml << ' </system-attributes>'
138
+ xml << ' <attributes>'
139
+ doc.attr_names.grep(/^[^@#]/).each do |name|
140
+ next if name == 'record_id' or name == 'record_class'
141
+ xml << " <#{name}>#{doc.attr(name)}</#{name}>"
142
+ end
143
+ xml << ' </attributes>'
144
+ xml << ' </estraier>'
145
+ xml << xml_foot
146
+ xml.join("\n")
147
+ end
148
+
149
+ def to_json_with_estdoc(*args)
150
+ obj = ActiveSupport::JSON.decode(to_json_without_estdoc(*args))
151
+ options = args.extract_options!
152
+ doc = estdoc || _estdoc
153
+ obj['estraier'] = {}
154
+ if options[:with_pseudo_attributes]
155
+ obj['estraier']['pseudo-attributes'] = {}
156
+ doc.attr_names.grep(/^#/).each do |name|
157
+ obj['estraier']['pseudo-attributes'][name.sub('#', '')] = doc.attr(name)
158
+ end
159
+ end
160
+ obj['estraier']['system-attributes'] = {}
161
+ doc.attr_names.grep(/^@/).each do |name|
162
+ obj['estraier']['system-attributes'][name.sub('@', '')] = doc.attr(name)
163
+ end
164
+ obj['estraier']['attributes'] = {}
165
+ doc.attr_names.grep(/^[^@#]/).each do |name|
166
+ next if name == 'record_id' or name == 'record_class'
167
+ obj['estraier']['attributes'][name] = doc.attr(name)
168
+ end
169
+ ActiveSupport::JSON.encode(obj)
170
+ end
171
+
172
+ def update_est_index
173
+ return if self.skip_update_est_index
174
+ raise if new_record?
175
+ begin
176
+ remove_est_index
177
+ rescue
178
+ end
179
+ add_est_index
180
+ end
181
+
182
+ def add_est_index
183
+ raise if new_record?
184
+ return if respond_to?(:est_no_index) and est_no_index
185
+ raise self.estraier_conn.status.to_s unless self.estraier_conn.put_doc(to_estdoc)
186
+ end
187
+
188
+ def remove_est_index
189
+ raise if new_record?
190
+ raise self.estraier_conn.status.to_s unless self.estraier_conn.out_doc(est_id)
191
+ end
192
+
193
+ def est_id
194
+ raise if new_record?
195
+ _estdoc.attr('@id')
196
+ end
197
+
198
+ private
199
+ def _estdoc
200
+ self.class.est_search('', :attributes => ["record_id NUMEQ #{self.id.to_s}", "record_class STREQ #{self.class.to_s}"])[:records][0].estdoc
201
+ end
202
+ end
203
+ end
204
+
205
+ ActiveRecord::Base.send :include, ActsAsEstraierDoc
@@ -0,0 +1,3 @@
1
+ module ActsAsEstraierDoc
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,13 @@
1
+ require 'estraierpure'
2
+
3
+ module EstraierPure
4
+ class NodeResult
5
+ include Enumerable
6
+
7
+ def each
8
+ for i in 0...self.doc_num
9
+ yield self.get_doc(i)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,1193 @@
1
+ #--
2
+ # Ruby interface of Hyper Estraier
3
+ # Copyright (C) 2004-2007 Mikio Hirabayashi
4
+ # All rights reserved.
5
+ # This file is part of Hyper Estraier.
6
+ # Redistribution and use in source and binary forms, with or without modification, are
7
+ # permitted provided that the following conditions are met:
8
+ #
9
+ # * Redistributions of source code must retain the above copyright notice, this list of
10
+ # conditions and the following disclaimer.
11
+ # * Redistributions in binary form must reproduce the above copyright notice, this list of
12
+ # conditions and the following disclaimer in the documentation and/or other materials
13
+ # provided with the distribution.
14
+ # * Neither the name of Mikio Hirabayashi nor the names of its contributors may be used to
15
+ # endorse or promote products derived from this software without specific prior written
16
+ # permission.
17
+ #
18
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
19
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21
+ # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24
+ # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
26
+ # OF THE POSSIBILITY OF SUCH DAMAGE.
27
+ #++
28
+ #:include:overview
29
+
30
+
31
+ require "uri"
32
+ require "cgi"
33
+ require "socket"
34
+ require "stringio"
35
+
36
+
37
+
38
+ #
39
+ # Module for the namespace of Hyper Estraier
40
+ #
41
+ module EstraierPure
42
+ #----------------------------------------------------------------
43
+ #++ Abstraction of document.
44
+ #----------------------------------------------------------------
45
+ class Document
46
+ #--------------------------------
47
+ # public methods
48
+ #--------------------------------
49
+ public
50
+ # Add an attribute.
51
+ # `name' specifies the name of an attribute.
52
+ # `value' specifies the value of the attribute. If it is `nil', the attribute is removed.
53
+ # The return value is always `nil'.
54
+ def add_attr(name, value)
55
+ Utility::check_types({ name=>String, value=>String }) if $DEBUG
56
+ name = name.gsub(/[ \t\r\n\v\f]+/, " ")
57
+ name = name.strip.squeeze(" ")
58
+ value = value.gsub(/[ \t\r\n\v\f]+/, " ")
59
+ value = value.strip.squeeze(" ")
60
+ @attrs[name] = value
61
+ nil
62
+ end
63
+ # Add a sentence of text.
64
+ # `text' specifies a sentence of text.
65
+ # The return value is always `nil'.
66
+ def add_text(text)
67
+ Utility::check_types({ text=>String }) if $DEBUG
68
+ text = text.gsub(/[ \t\r\n\v\f]+/, " ")
69
+ text = text.strip.squeeze(" ")
70
+ @dtexts.push(text) if text.length
71
+ nil
72
+ end
73
+ # Add a hidden sentence.
74
+ # `text' specifies a hidden sentence.
75
+ # The return value is always `nil'.
76
+ def add_hidden_text(text)
77
+ Utility::check_types({ text=>String }) if $DEBUG
78
+ text = text.gsub(/[ \t\r\n\v\f]+/, " ")
79
+ text = text.strip.squeeze(" ")
80
+ @htexts.push(text) if text.length
81
+ nil
82
+ end
83
+ # Attach keywords.
84
+ # `kwords' specifies a map object of keywords. Keys of the map should be keywords of the
85
+ # document and values should be their scores in decimal string.
86
+ # The return value is always `nil'.
87
+ def set_keywords(kwords)
88
+ Utility::check_types({ kwords=>Hash }) if $DEBUG
89
+ @kwords = kwords
90
+ nil
91
+ end
92
+ # Set the substitute score.
93
+ # `score' specifies the substitute score. It it is negative, the substitute score setting is
94
+ # nullified.
95
+ # The return value is always `nil'.
96
+ def set_score(score)
97
+ Utility::check_types({ score=>Integer }) if $DEBUG
98
+ @score = score
99
+ nil
100
+ end
101
+ # Get the ID number.
102
+ # The return value is the ID number of the document object. If the object has never been
103
+ # registered, -1 is returned.
104
+ def id()
105
+ @id
106
+ end
107
+ # Get an array of attribute names of a document object.
108
+ # The return value is an array object of attribute names.
109
+ def attr_names()
110
+ @attrs.keys.sort
111
+ end
112
+ # Get the value of an attribute.
113
+ # `name' specifies the name of an attribute.
114
+ # The return value is the value of the attribute or `nil' if it does not exist.
115
+ def attr(name)
116
+ Utility::check_types({ name=>String }) if $DEBUG
117
+ @attrs[name]
118
+ end
119
+ # Get an array of sentences of the text.
120
+ # The return value is an array object of sentences of the text.
121
+ def texts()
122
+ @dtexts
123
+ end
124
+ # Concatenate sentences of the text of a document object.
125
+ # The return value is concatenated sentences.
126
+ def cat_texts()
127
+ buf = StringIO::new
128
+ for i in 0...@dtexts.length
129
+ buf.write(" ") if i > 0
130
+ buf.write(@dtexts[i])
131
+ end
132
+ buf.string
133
+ end
134
+ # Dump draft data of a document object.
135
+ # The return value is draft data.
136
+ def dump_draft()
137
+ buf = StringIO::new
138
+ keys = @attrs.keys.sort
139
+ for i in 0...keys.length
140
+ buf.printf("%s=%s\n", keys[i], @attrs[keys[i]])
141
+ end
142
+ if @kwords
143
+ buf.printf("%%VECTOR")
144
+ @kwords.each() do |key, value|
145
+ buf.printf("\t%s\t%s", key, value)
146
+ end
147
+ buf.printf("\n")
148
+ end
149
+ buf.printf("%%SCORE\t%d\n", @score) if @score >= 0
150
+ buf.printf("\n")
151
+ for i in 0...@dtexts.length
152
+ buf.printf("%s\n", @dtexts[i])
153
+ end
154
+ for i in 0...@htexts.length
155
+ buf.printf("\t%s\n", @htexts[i])
156
+ end
157
+ buf.string
158
+ end
159
+ # Get attached keywords.
160
+ # The return value is a map object of keywords and their scores in decimal string. If no
161
+ # keyword is attached, `nil' is returned.
162
+ def keywords()
163
+ @kwords
164
+ end
165
+ # Get the substitute score.
166
+ # The return value is the substitute score or -1 if it is not set.
167
+ def score()
168
+ return -1 if(@score < 0)
169
+ @score
170
+ end
171
+ #--------------------------------
172
+ # private methods
173
+ #--------------------------------
174
+ private
175
+ # Create a document object.
176
+ # `draft' specifies a string of draft data.
177
+ def initialize(draft = "")
178
+ Utility::check_types({ draft=>String }) if $DEBUG
179
+ @id = -1
180
+ @attrs = {}
181
+ @dtexts = []
182
+ @htexts = []
183
+ @kwords = nil
184
+ @score = -1
185
+ if draft.length
186
+ lines = draft.split(/\n/, -1)
187
+ num = 0
188
+ while num < lines.length
189
+ line = lines[num]
190
+ num += 1
191
+ break if line.length < 1
192
+ if line =~ /^%/
193
+ if line =~ /^%VECTOR\t/
194
+ @kwords = {} unless @kwords
195
+ fields = line.split(/\t/)
196
+ i = 1
197
+ while i < fields.length - 1
198
+ @kwords[fields[i]] = fields[i+1]
199
+ i += 2
200
+ end
201
+ elsif line =~ /^%SCORE\t/
202
+ fields = line.split(/\t/)
203
+ @score = fields[1].to_i;
204
+ end
205
+ next
206
+ end
207
+ line = line.gsub(/[ \t\r\n\v\f]+/, " ")
208
+ line = line.strip.squeeze(" ")
209
+ if idx = line.index("=")
210
+ key = line[0...idx]
211
+ value = line[idx+1...line.length]
212
+ @attrs[key] = value
213
+ end
214
+ end
215
+ while num < lines.length
216
+ line = lines[num]
217
+ num += 1
218
+ next unless line.length > 0
219
+ if line[0] == 0x9
220
+ @htexts.push(line[1...line.length]) if line.length > 1
221
+ else
222
+ @dtexts.push(line)
223
+ end
224
+ end
225
+ end
226
+ end
227
+ end
228
+ #----------------------------------------------------------------
229
+ #++ Abstraction of search condition.
230
+ #----------------------------------------------------------------
231
+ class Condition
232
+ #--------------------------------
233
+ # public constants
234
+ #--------------------------------
235
+ public
236
+ # option: check every N-gram key
237
+ SURE = 1 << 0
238
+ # option: check N-gram keys skipping by one
239
+ USUAL = 1 << 1
240
+ # option: check N-gram keys skipping by two
241
+ FAST = 1 << 2
242
+ # option: check N-gram keys skipping by three
243
+ AGITO = 1 << 3
244
+ # option: without TF-IDF tuning
245
+ NOIDF = 1 << 4
246
+ # option: with the simplified phrase
247
+ SIMPLE = 1 << 10
248
+ # option: with the rough phrase
249
+ ROUGH = 1 << 11
250
+ # option: with the union phrase
251
+ UNION = 1 << 15
252
+ # option: with the intersection phrase
253
+ ISECT = 1 << 16
254
+ #--------------------------------
255
+ # public methods
256
+ #--------------------------------
257
+ public
258
+ # Set the search phrase.
259
+ # `phrase' specifies a search phrase.
260
+ # The return value is always `nil'.
261
+ def set_phrase(phrase)
262
+ Utility::check_types({ phrase=>String }) if $DEBUG
263
+ phrase = phrase.gsub(/[ \t\r\n\v\f]+/, " ")
264
+ phrase = phrase.strip.squeeze(" ")
265
+ @phrase = phrase
266
+ nil
267
+ end
268
+ # Add an expression for an attribute.
269
+ # `expr' specifies an expression for an attribute.
270
+ # The return value is always `nil'.
271
+ def add_attr(expr)
272
+ Utility::check_types({ expr=>String }) if $DEBUG
273
+ expr = expr.gsub(/[ \t\r\n\v\f]+/, " ")
274
+ expr = expr.strip.squeeze(" ")
275
+ @attrs.push(expr)
276
+ nil
277
+ end
278
+ # Set the order of a condition object.
279
+ # `expr' specifies an expression for the order. By default, the order is by score descending.
280
+ # The return value is always `nil'.
281
+ def set_order(expr)
282
+ Utility::check_types({ expr=>String }) if $DEBUG
283
+ expr = expr.gsub(/[ \t\r\n\v\f]+/, " ")
284
+ expr = expr.strip.squeeze(" ")
285
+ @order = expr
286
+ nil
287
+ end
288
+ # Set the maximum number of retrieval.
289
+ # `max' specifies the maximum number of retrieval. By default, the number of retrieval is
290
+ # not limited.
291
+ # The return value is always `nil'.
292
+ def set_max(max)
293
+ Utility::check_types({ max=>Integer }) if $DEBUG
294
+ @max = max if max >= 0
295
+ nil
296
+ end
297
+ # Set the number of skipped documents.
298
+ # `skip' specifies the number of documents to be skipped in the search result.
299
+ # The return value is always `nil'.
300
+ def set_skip(skip)
301
+ Utility::check_types({ skip=>Integer }) if $DEBUG
302
+ @skip = skip if skip >= 0
303
+ nil
304
+ end
305
+ # Set options of retrieval.
306
+ # `options' specifies options: `Condition::SURE' specifies that it checks every N-gram
307
+ # key, `Condition::USU', which is the default, specifies that it checks N-gram keys
308
+ # with skipping one key, `Condition::FAST' skips two keys, `Condition::AGITO'
309
+ # skips three keys, `Condition::NOIDF' specifies not to perform TF-IDF tuning,
310
+ # `Condition::SIMPLE' specifies to use simplified phrase, `Condition::ROUGH' specifies to use
311
+ # rough phrase, `Condition.UNION' specifies to use union phrase, `Condition.ISECT' specifies
312
+ # to use intersection phrase. Each option can be specified at the same time by bitwise or.
313
+ # If keys are skipped, though search speed is improved, the relevance ratio grows less.
314
+ # The return value is always `nil'.
315
+ def set_options(options)
316
+ Utility::check_types({ options=>Integer }) if $DEBUG
317
+ @options |= options
318
+ nil
319
+ end
320
+ # Set permission to adopt result of the auxiliary index.
321
+ # `min' specifies the minimum hits to adopt result of the auxiliary index. If it is not more
322
+ # than 0, the auxiliary index is not used. By default, it is 32.
323
+ # The return value is always `nil'.
324
+ def set_auxiliary(min)
325
+ Utility::check_types({ min=>Integer }) if $DEBUG
326
+ @auxiliary = min
327
+ nil
328
+ end
329
+ # Set the attribute distinction filter.
330
+ # `name' specifies the name of an attribute to be distinct.
331
+ # The return value is always `nil'.
332
+ def set_distinct(name)
333
+ Utility::check_types({ name=>String }) if $DEBUG
334
+ name = name.gsub(/[ \t\r\n\v\f]+/, " ")
335
+ name = name.strip.squeeze(" ")
336
+ @distinct = name
337
+ nil
338
+ end
339
+ # Set the mask of targets of meta search.
340
+ # `mask' specifies a masking number. 1 means the first target, 2 means the second target, 4
341
+ # means the third target, and power values of 2 and their summation compose the mask.
342
+ # The return value is always `nil'.
343
+ def set_mask(mask)
344
+ Utility::check_types({ mask=>Integer }) if $DEBUG
345
+ @mask = mask
346
+ nil
347
+ end
348
+ # Get the search phrase.
349
+ # The return value is the search phrase.
350
+ def phrase()
351
+ @phrase
352
+ end
353
+ # Get expressions for attributes.
354
+ # The return value is expressions for attributes.
355
+ def attrs()
356
+ @attrs
357
+ end
358
+ # Get the order expression.
359
+ # The return value is the order expression.
360
+ def order()
361
+ @order
362
+ end
363
+ # Get the maximum number of retrieval.
364
+ # The return value is the maximum number of retrieval.
365
+ def max()
366
+ @max
367
+ end
368
+ # Get the number of skipped documents.
369
+ # The return value is the number of documents to be skipped in the search result.
370
+ def skip()
371
+ @skip
372
+ end
373
+ # Get options of retrieval.
374
+ # The return value is options by bitwise or.
375
+ def options()
376
+ @options
377
+ end
378
+ # Get permission to adopt result of the auxiliary index.
379
+ # The return value is permission to adopt result of the auxiliary index.
380
+ def auxiliary()
381
+ @auxiliary
382
+ end
383
+ # Get the attribute distinction filter.
384
+ # The return value is the name of the distinct attribute.
385
+ def distinct()
386
+ @distinct
387
+ end
388
+ # Get the mask of targets of meta search.
389
+ # The return value is the mask of targets of meta search.
390
+ def mask()
391
+ @mask
392
+ end
393
+ #--------------------------------
394
+ # private methods
395
+ #--------------------------------
396
+ private
397
+ # Create a search condition object.
398
+ def initialize()
399
+ @phrase = nil
400
+ @attrs = []
401
+ @order = nil
402
+ @max = -1
403
+ @skip = 0
404
+ @options = 0
405
+ @auxiliary = 32
406
+ @distinct = nil
407
+ @mask = 0
408
+ end
409
+ end
410
+ #----------------------------------------------------------------
411
+ #++ Abstraction of document in result set.
412
+ #----------------------------------------------------------------
413
+ class ResultDocument
414
+ #--------------------------------
415
+ # public methods
416
+ #--------------------------------
417
+ public
418
+ # Get the URI.
419
+ # The return value is the URI of the result document object.
420
+ def uri()
421
+ @uri
422
+ end
423
+ # Get an array of attribute names.
424
+ # The return value is an array object of attribute names.
425
+ def attr_names()
426
+ @attrs.keys.sort
427
+ end
428
+ # Get the value of an attribute.
429
+ # The return value is the value of the attribute or `nil' if it does not exist.
430
+ def attr(name)
431
+ Utility::check_types({ name=>String }) if $DEBUG
432
+ @attrs[name]
433
+ end
434
+ # Get the snippet of a result document object.
435
+ # The return value is a string of the snippet of the result document object. There are tab
436
+ # separated values. Each line is a string to be shown. Though most lines have only one
437
+ # field, some lines have two fields. If the second field exists, the first field is to be
438
+ # shown with highlighted, and the second field means its normalized form.
439
+ def snippet()
440
+ @snippet
441
+ end
442
+ # Get keywords.
443
+ # The return value is a string of serialized keywords of the result document object. There
444
+ # are tab separated values. Keywords and their scores come alternately.
445
+ def keywords()
446
+ @keywords
447
+ end
448
+ #--------------------------------
449
+ # private methods
450
+ #--------------------------------
451
+ private
452
+ # Create a result document object.
453
+ def initialize(uri, attrs, snippet, keywords)
454
+ Utility::check_types({ uri=>String, attrs=>Hash,
455
+ snippet=>String, keywords=>String }) if $DEBUG
456
+ @uri = uri
457
+ @attrs = attrs
458
+ @snippet = snippet
459
+ @keywords = keywords
460
+ end
461
+ end
462
+ #----------------------------------------------------------------
463
+ #++ Abstraction of result set from node.
464
+ #----------------------------------------------------------------
465
+ class NodeResult
466
+ #--------------------------------
467
+ # public methods
468
+ #--------------------------------
469
+ public
470
+ # Get the number of documents.
471
+ # The return value is the number of documents.
472
+ def doc_num()
473
+ @docs.length
474
+ end
475
+ # Get the value of hint information.
476
+ # The return value is a result document object or `nil' if the index is out of bounds.
477
+ def get_doc(index)
478
+ Utility::check_types({ index=>Integer }) if $DEBUG
479
+ return nil if index < 0 || index >= @docs.length
480
+ @docs[index]
481
+ end
482
+ # Get the value of hint information.
483
+ # `key' specifies the key of a hint. "VERSION", "NODE", "HIT", "HINT#n", "DOCNUM", "WORDNUM",
484
+ # "TIME", "TIME#n", "LINK#n", and "VIEW" are provided for keys.
485
+ # The return value is the hint or `nil' if the key does not exist.
486
+ def hint(key)
487
+ Utility::check_types({ key=>String }) if $DEBUG
488
+ @hints[key]
489
+ end
490
+ #--------------------------------
491
+ # private methods
492
+ #--------------------------------
493
+ private
494
+ # Create a node result object.
495
+ def initialize(docs, hints)
496
+ Utility::check_types({ docs=>Array, hints=>Hash }) if $DEBUG
497
+ @docs = docs
498
+ @hints = hints
499
+ end
500
+ end
501
+ #----------------------------------------------------------------
502
+ #++ Abstraction of connection to P2P node.
503
+ #----------------------------------------------------------------
504
+ class Node
505
+ #--------------------------------
506
+ # public methods
507
+ #--------------------------------
508
+ public
509
+ # Set the URL of a node server.
510
+ # `url' specifies the URL of a node.
511
+ # The return value is always `nil'.
512
+ def set_url(url)
513
+ Utility::check_types({ url=>String }) if $DEBUG
514
+ @url = url
515
+ nil
516
+ end
517
+ # Set the proxy information.
518
+ # `host' specifies the host name of a proxy server.
519
+ # `port' specifies the port number of the proxy server.
520
+ # The return value is always `nil'.
521
+ def set_proxy(host, port)
522
+ Utility::check_types({ host=>String, port=>Integer }) if $DEBUG
523
+ @pxhost = host
524
+ @pxport = port
525
+ nil
526
+ end
527
+ # Set timeout of a connection.
528
+ # `sec' specifies timeout of the connection in seconds.
529
+ # The return value is always `nil'.
530
+ def set_timeout(sec)
531
+ Utility::check_types({ sec=>Integer }) if $DEBUG
532
+ @timeout = sec
533
+ nil
534
+ end
535
+ # Set the authentication information.
536
+ # `name' specifies the name of authentication.
537
+ # `passwd' specifies the password of the authentication.
538
+ # The return value is always `nil'.
539
+ def set_auth(name, password)
540
+ Utility::check_types({ name=>String, password=>String }) if $DEBUG
541
+ @auth = name + ":" + password
542
+ nil
543
+ end
544
+ # Get the status code of the last request.
545
+ # The return value is the status code of the last request. -1 means failure of connection.
546
+ def status()
547
+ @status
548
+ end
549
+ # Synchronize updating contents of the database.
550
+ # The return value is true if success, else it is false.
551
+ def sync()
552
+ @status = -1
553
+ return false unless @url
554
+ turl = @url + "/sync"
555
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
556
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
557
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, "", nil, nil)
558
+ @status = rv
559
+ rv == 200
560
+ end
561
+ # Optimize the database.
562
+ # The return value is true if success, else it is false.
563
+ def optimize()
564
+ @status = -1
565
+ return false unless @url
566
+ turl = @url + "/optimize"
567
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
568
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
569
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, "", nil, nil)
570
+ @status = rv
571
+ rv == 200
572
+ end
573
+ # Add a document.
574
+ # `doc' specifies a document object. The document object should have the URI attribute.
575
+ # The return value is true if success, else it is false.
576
+ def put_doc(doc)
577
+ Utility::check_types({ doc=>Document }) if $DEBUG
578
+ @status = -1
579
+ return false unless @url
580
+ turl = @url + "/put_doc"
581
+ reqheads = [ "Content-Type: text/x-estraier-draft" ]
582
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
583
+ reqbody = doc.dump_draft
584
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
585
+ @status = rv
586
+ rv == 200
587
+ end
588
+ # Remove a document.
589
+ # `id' specifies the ID number of a registered document.
590
+ # The return value is true if success, else it is false.
591
+ def out_doc(id)
592
+ Utility::check_types({ id=>Integer }) if $DEBUG
593
+ @status = -1
594
+ return false unless @url
595
+ turl = @url + "/out_doc"
596
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
597
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
598
+ reqbody = "id=" + id.to_s
599
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
600
+ @status = rv
601
+ rv == 200
602
+ end
603
+ # Remove a document specified by URI.
604
+ # `uri' specifies the URI of a registered document.
605
+ # The return value is true if success, else it is false.
606
+ def out_doc_by_uri(uri)
607
+ Utility::check_types({ uri=>String }) if $DEBUG
608
+ @status = -1
609
+ return false unless @url
610
+ turl = @url + "/out_doc"
611
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
612
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
613
+ reqbody = "uri=" + CGI::escape(uri)
614
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
615
+ @status = rv
616
+ rv == 200
617
+ end
618
+ # Edit attributes of a document.
619
+ # `doc' specifies a document object.
620
+ # The return value is true if success, else it is false.
621
+ def edit_doc(doc)
622
+ Utility::check_types({ doc=>Document }) if $DEBUG
623
+ @status = -1
624
+ return false unless @url
625
+ turl = @url + "/edit_doc"
626
+ reqheads = [ "Content-Type: text/x-estraier-draft" ]
627
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
628
+ reqbody = doc.dump_draft
629
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
630
+ @status = rv
631
+ rv == 200
632
+ end
633
+ # Retrieve a document.
634
+ # `id' specifies the ID number of a registered document.
635
+ # The return value is a document object. On error, `nil' is returned.
636
+ def get_doc(id)
637
+ Utility::check_types({ id=>Integer }) if $DEBUG
638
+ @status = -1
639
+ return nil unless @url
640
+ turl = @url + "/get_doc"
641
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
642
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
643
+ reqbody = "id=" + id.to_s
644
+ resbody = StringIO::new
645
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
646
+ @status = rv
647
+ return nil if rv != 200
648
+ Document::new(resbody.string)
649
+ end
650
+ # Retrieve a document.
651
+ # `uri' specifies the URI of a registered document.
652
+ # The return value is a document object. On error, `nil' is returned.
653
+ def get_doc_by_uri(uri)
654
+ Utility::check_types({ uri=>String }) if $DEBUG
655
+ @status = -1
656
+ return nil unless @url
657
+ turl = @url + "/get_doc"
658
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
659
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
660
+ reqbody = "uri=" + CGI::escape(uri)
661
+ resbody = StringIO::new
662
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
663
+ @status = rv
664
+ return nil if rv != 200
665
+ Document::new(resbody.string)
666
+ end
667
+ # Retrieve the value of an attribute of a document.
668
+ # `id' specifies the ID number of a registered document.
669
+ # `name' specifies the name of an attribute.
670
+ # The return value is the value of the attribute or `nil' if it does not exist.
671
+ def get_doc_attr(id, name)
672
+ Utility::check_types({ id=>Integer, name=>String }) if $DEBUG
673
+ @status = -1
674
+ return nil unless @url
675
+ turl = @url + "/get_doc_attr"
676
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
677
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
678
+ reqbody = "id=" + id.to_s + "&attr=" + CGI::escape(name)
679
+ resbody = StringIO::new
680
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
681
+ @status = rv
682
+ return nil if rv != 200
683
+ resbody.string.chomp
684
+ end
685
+ # Retrieve the value of an attribute of a document specified by URI.
686
+ # `uri' specifies the URI of a registered document.
687
+ # `name' specifies the name of an attribute.
688
+ # The return value is the value of the attribute or `nil' if it does not exist.
689
+ def get_doc_attr_by_uri(uri, name)
690
+ Utility::check_types({ uri=>String, name=>String }) if $DEBUG
691
+ @status = -1
692
+ return nil unless @url
693
+ turl = @url + "/get_doc_attr"
694
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
695
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
696
+ reqbody = "uri=" + CGI::escape(uri) + "&attr=" + CGI::escape(name)
697
+ resbody = StringIO::new
698
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
699
+ @status = rv
700
+ return nil if rv != 200
701
+ resbody.string.chomp
702
+ end
703
+ # Extract keywords of a document.
704
+ # `id' specifies the ID number of a registered document.
705
+ # The return value is a hash object of keywords and their scores in decimal string or `nil'
706
+ # on error.
707
+ def etch_doc(id)
708
+ Utility::check_types({ id=>Integer }) if $DEBUG
709
+ @status = -1
710
+ return nil unless @url
711
+ turl = @url + "/etch_doc"
712
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
713
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
714
+ reqbody = "id=" + id.to_s
715
+ resbody = StringIO::new
716
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
717
+ @status = rv
718
+ return nil if rv != 200
719
+ kwords = {}
720
+ lines = resbody.string.split(/\n/, -1)
721
+ for i in 0...lines.length
722
+ pair = lines[i].split(/\t/)
723
+ next if pair.length < 2
724
+ kwords[pair[0]] = pair[1]
725
+ end
726
+ kwords
727
+ end
728
+ # Extract keywords of a document specified by URI.
729
+ # `uri' specifies the URI of a registered document.
730
+ # The return value is a hash object of keywords and their scores in decimal string or `nil'
731
+ # on error.
732
+ def etch_doc_by_uri(uri)
733
+ Utility::check_types({ uri=>String }) if $DEBUG
734
+ @status = -1
735
+ return nil unless @url
736
+ turl = @url + "/etch_doc"
737
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
738
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
739
+ reqbody = "uri=" + CGI::escape(uri)
740
+ resbody = StringIO::new
741
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
742
+ @status = rv
743
+ return nil if rv != 200
744
+ kwords = {}
745
+ lines = resbody.string.split(/\n/, -1)
746
+ for i in 0...lines.length
747
+ pair = lines[i].split(/\t/)
748
+ next if pair.length < 2
749
+ kwords[pair[0]] = pair[1]
750
+ end
751
+ kwords
752
+ end
753
+ # Get the ID of a document specified by URI.
754
+ # `uri' specifies the URI of a registered document.
755
+ # The return value is the ID of the document. On error, -1 is returned.
756
+ def uri_to_id(uri)
757
+ Utility::check_types({ uri=>String }) if $DEBUG
758
+ @status = -1
759
+ return -1 unless @url
760
+ turl = @url + "/uri_to_id"
761
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
762
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
763
+ reqbody = "uri=" + CGI::escape(uri)
764
+ resbody = StringIO::new
765
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
766
+ @status = rv
767
+ return nil if rv != 200
768
+ resbody.string.chomp
769
+ end
770
+ # Get the name.
771
+ # The return value is the name. On error, `nil' is returned.
772
+ def name()
773
+ set_info if !@name
774
+ @name
775
+ end
776
+ # Get the label.
777
+ # The return value is the label. On error, `nil' is returned.
778
+ def label()
779
+ set_info if !@label
780
+ @label
781
+ end
782
+ # Get the number of documents.
783
+ # The return value is the number of documents. On error, -1 is returned.
784
+ def doc_num()
785
+ set_info if @dnum < 0
786
+ @dnum
787
+ end
788
+ # Get the number of unique words.
789
+ # The return value is the number of unique words. On error, -1 is returned.
790
+ def word_num()
791
+ set_info if @wnum < 0
792
+ @wnum
793
+ end
794
+ # Get the size of the datbase.
795
+ # The return value is the size of the datbase. On error, -1.0 is returned.
796
+ def size()
797
+ set_info if @size < 0.0
798
+ @size
799
+ end
800
+ # Get the usage ratio of the cache.
801
+ # The return value is the usage ratio of the cache. On error, -1.0 is returned.
802
+ def cache_usage()
803
+ @status = -1
804
+ return -1.0 unless @url
805
+ turl = @url + "/cacheusage"
806
+ reqheads = []
807
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
808
+ resbody = StringIO::new
809
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, nil, nil, resbody)
810
+ @status = rv
811
+ return -1.0 if rv != 200
812
+ return resbody.string.strip.to_f
813
+ end
814
+ # Get an array of names of administrators.
815
+ # The return value is an array object of names of administrators. On error, `nil' is
816
+ # returned.
817
+ def admins()
818
+ set_info unless @admins
819
+ @admins
820
+ end
821
+ # Get an array of names of users.
822
+ # The return value is an array object of names of users. On error, `nil' is returned.
823
+ def users()
824
+ set_info unless @users
825
+ @users
826
+ end
827
+ # Get an array of expressions of links.
828
+ # The return value is an array object of expressions of links. Each element is a TSV string
829
+ # and has three fields of the URL, the label, and the score. On error, `nil' is returned.
830
+ def links()
831
+ set_info unless @links
832
+ @links
833
+ end
834
+ # Search for documents corresponding a condition.
835
+ # `cond' specifies a condition object.
836
+ # `depth' specifies the depth of meta search.
837
+ # The return value is a node result object. On error, `nil' is returned.
838
+ def search(cond, depth)
839
+ Utility::check_types({ cond=>Condition, depth=>Integer }) if $DEBUG
840
+ @status = -1
841
+ return nil unless @url
842
+ turl = @url + "/search"
843
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
844
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
845
+ reqbody = Utility::cond_to_query(cond, depth, @wwidth, @hwidth, @awidth)
846
+ resbody = StringIO::new
847
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
848
+ @status = rv
849
+ return nil if rv != 200
850
+ lines = resbody.string.split(/\n/, -1)
851
+ return nil if lines.length < 1
852
+ docs = []
853
+ hints = {}
854
+ nres = NodeResult::new(docs, hints)
855
+ border = lines[0]
856
+ isend = false
857
+ lnum = 1
858
+ while lnum < lines.length
859
+ line = lines[lnum]
860
+ lnum += 1
861
+ if line.length >= border.length && line.index(border) == 0
862
+ isend = true if line[border.length...line.length] == ":END"
863
+ break
864
+ end
865
+ lidx = line.index("\t")
866
+ if lidx
867
+ key = line[0...lidx]
868
+ value = line[(lidx+1)...line.length]
869
+ hints[key] = value
870
+ end
871
+ end
872
+ snum = lnum
873
+ while !isend && lnum < lines.length
874
+ line = lines[lnum]
875
+ lnum += 1
876
+ if line.length >= border.length && line.index(border) == 0
877
+ if lnum > snum
878
+ rdattrs = {}
879
+ sb = StringIO::new
880
+ rdvector = ""
881
+ rlnum = snum
882
+ while rlnum < lnum - 1
883
+ rdline = lines[rlnum].strip
884
+ rlnum += 1
885
+ break if rdline.length < 1
886
+ if rdline =~ /^%/
887
+ lidx = rdline.index("\t")
888
+ rdvector = rdline[(lidx+1)...rdline.length] if rdline =~ /%VECTOR/ && lidx
889
+ else
890
+ lidx = rdline.index("=")
891
+ if lidx
892
+ key = rdline[0...lidx]
893
+ value = rdline[(lidx+1)...rdline.length]
894
+ rdattrs[key] = value
895
+ end
896
+ end
897
+ end
898
+ while rlnum < lnum - 1
899
+ rdline = lines[rlnum]
900
+ rlnum += 1
901
+ sb.printf("%s\n", rdline)
902
+ end
903
+ rduri = rdattrs["@uri"]
904
+ rdsnippet = sb.string
905
+ if rduri
906
+ rdoc = ResultDocument::new(rduri, rdattrs, rdsnippet, rdvector)
907
+ docs.push(rdoc)
908
+ end
909
+ end
910
+ snum = lnum
911
+ isend = true if line[border.length...line.length] == ":END"
912
+ end
913
+ end
914
+ return nil if !isend
915
+ return nres
916
+ end
917
+ # Set width of snippet in the result.
918
+ # `wwidth' specifies whole width of a snippet. By default, it is 480. If it is 0, no
919
+ # snippet is sent. If it is negative, whole body text is sent instead of snippet.
920
+ # `hwidth' specifies width of strings picked up from the beginning of the text. By default,
921
+ # it is 96. If it is negative 0, the current setting is not changed.
922
+ # `awidth' specifies width of strings picked up around each highlighted word. By default,
923
+ # it is 96. If it is negative, the current setting is not changed.
924
+ def set_snippet_width(wwidth, hwidth, awidth)
925
+ @wwidth = wwidth
926
+ @hwidth = hwidth if hwidth >= 0
927
+ @awidth = awidth if awidth >= 0
928
+ end
929
+ # Manage a user account of a node.
930
+ # `name' specifies the name of a user.
931
+ # `mode' specifies the operation mode. 0 means to delete the account. 1 means to set the
932
+ # account as an administrator. 2 means to set the account as a guest.
933
+ # The return value is true if success, else it is false.
934
+ def set_user(name, mode)
935
+ Utility::check_types({ name=>String, mode=>Integer }) if $DEBUG
936
+ @status = -1
937
+ return false unless @url
938
+ turl = @url + "/_set_user"
939
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
940
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
941
+ reqbody = "name=" + CGI::escape(name) + "&mode=" + mode.to_s
942
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
943
+ @status = rv
944
+ rv == 200
945
+ end
946
+ # Manage a link of a node.
947
+ # `url' specifies the URL of the target node of a link.
948
+ # `label' specifies the label of the link.
949
+ # `credit' specifies the credit of the link. If it is negative, the link is removed.
950
+ # The return value is true if success, else it is false.
951
+ def set_link(url, label, credit)
952
+ Utility::check_types({ url=>String, label=>String, credit=>Integer }) if $DEBUG
953
+ @status = -1
954
+ return false unless @url
955
+ turl = @url + "/_set_link"
956
+ reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
957
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
958
+ reqbody = "url=" + CGI::escape(url) + "&label=" + label
959
+ reqbody += "&credit=" + credit.to_s if credit >= 0
960
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
961
+ @status = rv
962
+ rv == 200
963
+ end
964
+ #--------------------------------
965
+ # private methods
966
+ #--------------------------------
967
+ private
968
+ # Create a node connection object.
969
+ def initialize()
970
+ @url = nil
971
+ @pxhost = nil
972
+ @pxport = -1
973
+ @timeout = -1
974
+ @auth = nil
975
+ @name = nil
976
+ @label = nil
977
+ @dnum = -1
978
+ @wnum = -1
979
+ @size = -1.0
980
+ @admins = nil
981
+ @users = nil
982
+ @links = nil
983
+ @wwidth = 480
984
+ @hwidth = 96
985
+ @awidth = 96
986
+ @status = -1
987
+ end
988
+ # Set information of the node.
989
+ def set_info()
990
+ @status = -1
991
+ return unless @url
992
+ turl = @url + "/inform"
993
+ reqheads = []
994
+ reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
995
+ resbody = StringIO::new
996
+ rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, nil, nil, resbody)
997
+ @status = rv
998
+ return if rv != 200
999
+ lines = resbody.string.split(/\n/, -1)
1000
+ return if lines.length < 1
1001
+ elems = lines[0].chomp.split(/\t/)
1002
+ return if elems.length != 5
1003
+ @name = elems[0]
1004
+ @label = elems[1]
1005
+ @dnum = elems[2].to_i
1006
+ @wnum = elems[3].to_i
1007
+ @size = elems[4].to_f
1008
+ return if lines.length < 2
1009
+ lnum = 1
1010
+ lnum += 1 if(lnum < lines.length && lines[lnum].length < 1)
1011
+ @admins = []
1012
+ while(lnum < lines.length)
1013
+ line = lines[lnum]
1014
+ break if line.length < 1
1015
+ @admins.push(line)
1016
+ lnum += 1
1017
+ end
1018
+ lnum += 1 if(lnum < lines.length && lines[lnum].length < 1)
1019
+ @users = []
1020
+ while(lnum < lines.length)
1021
+ line = lines[lnum]
1022
+ break if line.length < 1
1023
+ @users.push(line)
1024
+ lnum += 1
1025
+ end
1026
+ lnum += 1 if(lines[lnum].length < 1)
1027
+ @links = []
1028
+ while(lnum < lines.length)
1029
+ line = lines[lnum]
1030
+ break if line.length < 1
1031
+ @links.push(line) if line.split(/\t/).length == 3
1032
+ lnum += 1
1033
+ end
1034
+ end
1035
+ end
1036
+ #:stopdoc:
1037
+ #
1038
+ # Module for utility
1039
+ #
1040
+ module Utility
1041
+ public
1042
+ # Check types of arguments
1043
+ # `types' specifies a hash object whose keys are arguments and values are class objects.
1044
+ # If there is a invalid object, an exception is thrown.
1045
+ def check_types(types)
1046
+ i = 0
1047
+ types.each_key do |key|
1048
+ i += 1
1049
+ unless key.kind_of?(types[key]) || key == nil
1050
+ raise ArgumentError::new("Argument#" + i.to_s +
1051
+ " should be a kind of " + types[key].to_s)
1052
+ end
1053
+ end
1054
+ end
1055
+ module_function :check_types
1056
+ # Perform an interaction of a URL.
1057
+ # `url' specifies a URL.
1058
+ # `pxhost' specifies the host name of a proxy. If it is `nil', it is not used.
1059
+ # `pxport' specifies the port number of the proxy.
1060
+ # `outsec' specifies timeout in seconds. If it is negative, it is not used.
1061
+ # `reqheads' specifies an array object of extension headers. If it is `nil', it is not used.
1062
+ # `reqbody' specifies the pointer of the entitiy body of request. If it is `nil', "GET"
1063
+ # method is used.
1064
+ # `resheads' specifies an array object into which headers of response is stored. If it is
1065
+ # `nil' it is not used.
1066
+ # `resbody' specifies stream object into which the entity body of response is stored. If it
1067
+ # is `nil', it is not used.
1068
+ # The return value is the status code of the response or -1 on error.
1069
+ def shuttle_url(url, pxhost, pxport, outsec, reqheads, reqbody, resheads, resbody)
1070
+ begin
1071
+ status = -1
1072
+ th = Thread::start do
1073
+ url = URI::parse(url)
1074
+ url.normalize
1075
+ Thread::current.exit if url.scheme != "http" || !url.host || url.port < 1
1076
+ if pxhost
1077
+ host = pxhost
1078
+ port = pxport
1079
+ query = "http://" + url.host + ":" + url.port.to_s + url.path
1080
+ else
1081
+ host = url.host
1082
+ port = url.port
1083
+ query = url.path
1084
+ end
1085
+ query += "?" + url.query if url.query && !reqbody
1086
+ begin
1087
+ sock = TCPSocket.open(host, port)
1088
+ if reqbody
1089
+ sock.printf("POST " + query + " HTTP/1.0\r\n")
1090
+ else
1091
+ sock.printf("GET " + query + " HTTP/1.0\r\n")
1092
+ end
1093
+ sock.printf("Host: %s:%d\r\n", url.host, url.port)
1094
+ sock.printf("Connection: close\r\n")
1095
+ sock.printf("User-Agent: HyperEstraierForRuby/1.0.0\r\n")
1096
+ if reqheads
1097
+ reqheads.each do |line|
1098
+ sock.printf("%s\r\n", line)
1099
+ end
1100
+ end
1101
+ sock.printf("Content-Length: %d\r\n", reqbody.length) if reqbody
1102
+ sock.printf("\r\n")
1103
+ sock.write(reqbody) if reqbody
1104
+ line = sock.gets.chomp
1105
+ elems = line.split(/ */)
1106
+ Thread::current.exit if elems.length < 3 || !(elems[0] =~ /^HTTP/)
1107
+ status = elems[1].to_i
1108
+ resheads.push(line) if resheads
1109
+ begin
1110
+ line = sock.gets.chomp
1111
+ resheads.push(line) if resheads
1112
+ end while line.length > 0
1113
+ while buf = sock.read(8192)
1114
+ resbody.write(buf) if resbody
1115
+ end
1116
+ ensure
1117
+ sock.close if sock
1118
+ end
1119
+ end
1120
+ if outsec >= 0
1121
+ unless th.join(outsec)
1122
+ th.exit
1123
+ th.join
1124
+ return -1
1125
+ end
1126
+ else
1127
+ th.join
1128
+ end
1129
+ return status
1130
+ rescue
1131
+ return -1
1132
+ end
1133
+ end
1134
+ module_function :shuttle_url
1135
+ # Serialize a condition object into a query string.
1136
+ # `cond' specifies a condition object.
1137
+ # `depth' specifies depth of meta search.
1138
+ # `wwidth' specifies whole width of a snippet.
1139
+ # `hwidth' specifies width of strings picked up from the beginning of the text.
1140
+ # `awidth' specifies width of strings picked up around each highlighted word.
1141
+ # The return value is the serialized string.
1142
+ def cond_to_query(cond, depth, wwidth, hwidth, awidth)
1143
+ buf = StringIO::new
1144
+ if cond.phrase
1145
+ buf.write("&") if buf.length > 0
1146
+ buf.write("phrase=")
1147
+ buf.write(CGI::escape(cond.phrase))
1148
+ end
1149
+ for i in 0...cond.attrs.length
1150
+ buf.write("&") if buf.length > 0
1151
+ buf.write("attr" + (i + 1).to_s + "=")
1152
+ buf.write(CGI::escape(cond.attrs[i]))
1153
+ end
1154
+ if cond.order
1155
+ buf.write("&") if buf.length > 0
1156
+ buf.write("order=")
1157
+ buf.write(CGI::escape(cond.order))
1158
+ end
1159
+ if cond.max >= 0
1160
+ buf.write("&") if buf.length > 0
1161
+ buf.write("max=" + cond.max.to_s)
1162
+ else
1163
+ buf.write("&") if buf.length > 0
1164
+ buf.write("max=" + (1 << 30).to_s)
1165
+ end
1166
+ buf.write("&options=" + cond.options.to_s) if cond.options > 0
1167
+ buf.write("&auxiliary=" + cond.auxiliary.to_s)
1168
+ if cond.distinct
1169
+ buf.write("&distinct=")
1170
+ buf.write(CGI::escape(cond.distinct))
1171
+ end
1172
+ buf.write("&depth=" + depth.to_s) if depth > 0
1173
+ buf.write("&wwidth=" + wwidth.to_s)
1174
+ buf.write("&hwidth=" + hwidth.to_s)
1175
+ buf.write("&awidth=" + awidth.to_s)
1176
+ buf.write("&skip=" + cond.skip.to_s)
1177
+ buf.write("&mask=" + cond.mask.to_s)
1178
+ buf.string
1179
+ end
1180
+ module_function :cond_to_query
1181
+ # Encode a byte sequence with Base64 encoding.
1182
+ # `data' specifyes a string object.
1183
+ # The return value is the encoded string.
1184
+ def base_encode(data)
1185
+ [data].pack("m").gsub(/[ \n]/, "")
1186
+ end
1187
+ module_function :base_encode
1188
+ end
1189
+ end
1190
+
1191
+
1192
+
1193
+ # END OF FILE