acts_as_estraier_doc 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/acts_as_estraier_doc.gemspec +19 -0
- data/lib/acts_as_estraier_doc.rb +205 -0
- data/lib/acts_as_estraier_doc/version.rb +3 -0
- data/lib/estraierpure_ext.rb +13 -0
- data/vendor/estraierpure.rb +1193 -0
- data/vendor/overview +100 -0
- metadata +91 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Takatoshi MORIYAMA
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# ActsAsEstraierDoc
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'acts_as_estraier_doc'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install acts_as_estraier_doc
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/acts_as_estraier_doc/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Takatoshi MORIYAMA"]
|
6
|
+
gem.email = ["hawk@at-exit.com"]
|
7
|
+
gem.description = %q{Acts as EstraierDoc}
|
8
|
+
gem.summary = %q{Acts as EstraierDoc}
|
9
|
+
gem.homepage = ""
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "acts_as_estraier_doc"
|
15
|
+
gem.require_paths = ['lib', 'vendor']
|
16
|
+
gem.version = ActsAsEstraierDoc::VERSION
|
17
|
+
|
18
|
+
gem.add_runtime_dependency 'activerecord', '~> 2.0'
|
19
|
+
end
|
@@ -0,0 +1,205 @@
|
|
1
|
+
require 'estraierpure_ext'
|
2
|
+
require 'rexml/document'
|
3
|
+
|
4
|
+
module ActsAsEstraierDoc
|
5
|
+
def self.included(base)
|
6
|
+
base.extend ActMethods
|
7
|
+
end
|
8
|
+
|
9
|
+
module ActMethods
|
10
|
+
def acts_as_estraier_doc(options = {})
|
11
|
+
self.extend ClassMethods
|
12
|
+
send :include, ActsAsEstraierDoc::InstanceMethods
|
13
|
+
send :alias_method_chain, :to_xml, :estdoc
|
14
|
+
send :alias_method_chain, :to_json, :estdoc
|
15
|
+
send :attr_accessor, :estdoc
|
16
|
+
send :attr_accessor, :skip_update_est_index
|
17
|
+
|
18
|
+
cattr_accessor :configuration, :estraier_conn
|
19
|
+
|
20
|
+
self.configuration = {
|
21
|
+
:condition_options => EstraierPure::Condition::SIMPLE,
|
22
|
+
:depth => 0,
|
23
|
+
}
|
24
|
+
self.configuration.update(options) if options.is_a? Hash
|
25
|
+
self.configuration[:node][:host] = 'localhost' unless self.configuration[:node].include? :host
|
26
|
+
self.configuration[:node][:port] = 1978 unless self.configuration[:node].include? :port
|
27
|
+
|
28
|
+
self.estraier_conn = EstraierPure::Node::new
|
29
|
+
self.estraier_conn.set_url("http://#{self.configuration[:node][:host]}:#{self.configuration[:node][:port]}/node/#{self.configuration[:node][:node]}")
|
30
|
+
self.estraier_conn.set_auth(self.configuration[:node][:user], self.configuration[:node][:pass])
|
31
|
+
|
32
|
+
after_save :update_est_index
|
33
|
+
before_destroy :remove_est_index
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
module ClassMethods
|
38
|
+
HINT_KEYS = ['HIT', 'DOCNUM', 'WORDNUM', 'TIME']
|
39
|
+
|
40
|
+
def est_search(phrase, options = {})
|
41
|
+
cond = EstraierPure::Condition::new
|
42
|
+
condition_options = 0
|
43
|
+
if options.include? :condition_options
|
44
|
+
options[:condition_options].to_a.each do |condition_option|
|
45
|
+
condition_options = condition_options | condition_option
|
46
|
+
end
|
47
|
+
else
|
48
|
+
condition_options = self.configuration[:condition_options]
|
49
|
+
end
|
50
|
+
cond.set_options(condition_options)
|
51
|
+
cond.set_phrase(phrase.to_s)
|
52
|
+
options[:attributes].to_a.each { |attribute| cond.add_attr(attribute) } if options.include? :attributes
|
53
|
+
cond.set_max(options[:limit]) if options.include? :limit
|
54
|
+
cond.set_skip(options[:offset]) if options.include? :offset
|
55
|
+
cond.set_order(options[:order]) if options.include? :order
|
56
|
+
wwidth = options.include?(:snippet_wwidth) ? options[:snippet_wwidth] : 480
|
57
|
+
hwidth = options.include?(:snippet_hwidth) ? options[:snippet_hwidth] : -1
|
58
|
+
awidth = options.include?(:snippet_awidth) ? options[:snippet_awidth] : -1
|
59
|
+
self.estraier_conn.set_snippet_width(wwidth, hwidth, awidth)
|
60
|
+
Rails.logger.info cond.inspect if options[:debug]
|
61
|
+
|
62
|
+
result = {:records => [], :info => {}}
|
63
|
+
rs = self.estraier_conn.search(cond, options.include?(:depth) ? options[:depth] : 0)
|
64
|
+
if rs
|
65
|
+
docs = {}
|
66
|
+
ids = []
|
67
|
+
rs.each do |doc|
|
68
|
+
docs[doc.attr('record_id').to_i] = doc
|
69
|
+
ids << doc.attr('record_id').to_i
|
70
|
+
end
|
71
|
+
records = self.find :all, :conditions => {:id => ids}, :include => options[:include]
|
72
|
+
(ids - records.map(&:id)).each do |orphaned_id|
|
73
|
+
Rails.logger.info "[EstDoc] Remove orphaned index #{orphaned_id}"
|
74
|
+
self.estraier_conn.out_doc docs[orphaned_id].attr('@id')
|
75
|
+
end
|
76
|
+
result[:records] = records.map{|record| record.estdoc = docs[record.id]; record}
|
77
|
+
HINT_KEYS.each{|key| result[:info][key.downcase.to_sym] = rs.hint key}
|
78
|
+
else
|
79
|
+
raise
|
80
|
+
end
|
81
|
+
return result
|
82
|
+
end
|
83
|
+
|
84
|
+
def indexing!
|
85
|
+
self.transaction do
|
86
|
+
count = self.count
|
87
|
+
((count / 50).to_i + 1).times do |offset|
|
88
|
+
self.find(:all, 'hoge', :limit => 50, :offset => 50 * offset).each do |record|
|
89
|
+
record.update_est_index
|
90
|
+
end
|
91
|
+
end
|
92
|
+
count
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
module InstanceMethods
|
98
|
+
def to_estdoc
|
99
|
+
doc = EstraierPure::Document::new
|
100
|
+
doc.add_attr('@uri', self.est_uri.to_s)
|
101
|
+
doc.add_attr('@title', self.est_title.to_s)
|
102
|
+
doc.add_attr('record_id', self.id.to_s)
|
103
|
+
doc.add_attr('record_class', self.class.to_s)
|
104
|
+
if respond_to? :est_attributes
|
105
|
+
est_attributes.each do |name, value|
|
106
|
+
doc.add_attr(name.to_s, value)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
if respond_to? :est_hidden_texts
|
110
|
+
est_hidden_texts.to_a.each do |value|
|
111
|
+
doc.add_hidden_text(value)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
est_texts.to_a.each do |value|
|
115
|
+
doc.add_text(value)
|
116
|
+
end
|
117
|
+
doc
|
118
|
+
end
|
119
|
+
|
120
|
+
def to_xml_with_estdoc(*args)
|
121
|
+
xml = to_xml_without_estdoc(*args).split("\n")
|
122
|
+
options = args.extract_options!
|
123
|
+
xml_foot = xml.pop
|
124
|
+
xml << ' <estraier>'
|
125
|
+
doc = estdoc || _estdoc
|
126
|
+
if options[:with_pseudo_attributes]
|
127
|
+
xml << ' <pseudo-attributes>'
|
128
|
+
doc.attr_names.grep(/^#/).each do |name|
|
129
|
+
xml << " <#{name.sub('#', '')}>#{doc.attr(name)}</#{name.sub('#', '')}>"
|
130
|
+
end
|
131
|
+
xml << ' </pseudo-attributes>'
|
132
|
+
end
|
133
|
+
xml << ' <system-attributes>'
|
134
|
+
doc.attr_names.grep(/^@/).each do |name|
|
135
|
+
xml << " <#{name.sub('@', '')}>#{doc.attr(name)}</#{name.sub('@', '')}>"
|
136
|
+
end
|
137
|
+
xml << ' </system-attributes>'
|
138
|
+
xml << ' <attributes>'
|
139
|
+
doc.attr_names.grep(/^[^@#]/).each do |name|
|
140
|
+
next if name == 'record_id' or name == 'record_class'
|
141
|
+
xml << " <#{name}>#{doc.attr(name)}</#{name}>"
|
142
|
+
end
|
143
|
+
xml << ' </attributes>'
|
144
|
+
xml << ' </estraier>'
|
145
|
+
xml << xml_foot
|
146
|
+
xml.join("\n")
|
147
|
+
end
|
148
|
+
|
149
|
+
def to_json_with_estdoc(*args)
|
150
|
+
obj = ActiveSupport::JSON.decode(to_json_without_estdoc(*args))
|
151
|
+
options = args.extract_options!
|
152
|
+
doc = estdoc || _estdoc
|
153
|
+
obj['estraier'] = {}
|
154
|
+
if options[:with_pseudo_attributes]
|
155
|
+
obj['estraier']['pseudo-attributes'] = {}
|
156
|
+
doc.attr_names.grep(/^#/).each do |name|
|
157
|
+
obj['estraier']['pseudo-attributes'][name.sub('#', '')] = doc.attr(name)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
obj['estraier']['system-attributes'] = {}
|
161
|
+
doc.attr_names.grep(/^@/).each do |name|
|
162
|
+
obj['estraier']['system-attributes'][name.sub('@', '')] = doc.attr(name)
|
163
|
+
end
|
164
|
+
obj['estraier']['attributes'] = {}
|
165
|
+
doc.attr_names.grep(/^[^@#]/).each do |name|
|
166
|
+
next if name == 'record_id' or name == 'record_class'
|
167
|
+
obj['estraier']['attributes'][name] = doc.attr(name)
|
168
|
+
end
|
169
|
+
ActiveSupport::JSON.encode(obj)
|
170
|
+
end
|
171
|
+
|
172
|
+
def update_est_index
|
173
|
+
return if self.skip_update_est_index
|
174
|
+
raise if new_record?
|
175
|
+
begin
|
176
|
+
remove_est_index
|
177
|
+
rescue
|
178
|
+
end
|
179
|
+
add_est_index
|
180
|
+
end
|
181
|
+
|
182
|
+
def add_est_index
|
183
|
+
raise if new_record?
|
184
|
+
return if respond_to?(:est_no_index) and est_no_index
|
185
|
+
raise self.estraier_conn.status.to_s unless self.estraier_conn.put_doc(to_estdoc)
|
186
|
+
end
|
187
|
+
|
188
|
+
def remove_est_index
|
189
|
+
raise if new_record?
|
190
|
+
raise self.estraier_conn.status.to_s unless self.estraier_conn.out_doc(est_id)
|
191
|
+
end
|
192
|
+
|
193
|
+
def est_id
|
194
|
+
raise if new_record?
|
195
|
+
_estdoc.attr('@id')
|
196
|
+
end
|
197
|
+
|
198
|
+
private
|
199
|
+
def _estdoc
|
200
|
+
self.class.est_search('', :attributes => ["record_id NUMEQ #{self.id.to_s}", "record_class STREQ #{self.class.to_s}"])[:records][0].estdoc
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
ActiveRecord::Base.send :include, ActsAsEstraierDoc
|
@@ -0,0 +1,1193 @@
|
|
1
|
+
#--
|
2
|
+
# Ruby interface of Hyper Estraier
|
3
|
+
# Copyright (C) 2004-2007 Mikio Hirabayashi
|
4
|
+
# All rights reserved.
|
5
|
+
# This file is part of Hyper Estraier.
|
6
|
+
# Redistribution and use in source and binary forms, with or without modification, are
|
7
|
+
# permitted provided that the following conditions are met:
|
8
|
+
#
|
9
|
+
# * Redistributions of source code must retain the above copyright notice, this list of
|
10
|
+
# conditions and the following disclaimer.
|
11
|
+
# * Redistributions in binary form must reproduce the above copyright notice, this list of
|
12
|
+
# conditions and the following disclaimer in the documentation and/or other materials
|
13
|
+
# provided with the distribution.
|
14
|
+
# * Neither the name of Mikio Hirabayashi nor the names of its contributors may be used to
|
15
|
+
# endorse or promote products derived from this software without specific prior written
|
16
|
+
# permission.
|
17
|
+
#
|
18
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
19
|
+
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
20
|
+
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
21
|
+
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
22
|
+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
23
|
+
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
24
|
+
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
25
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
26
|
+
# OF THE POSSIBILITY OF SUCH DAMAGE.
|
27
|
+
#++
|
28
|
+
#:include:overview
|
29
|
+
|
30
|
+
|
31
|
+
require "uri"
|
32
|
+
require "cgi"
|
33
|
+
require "socket"
|
34
|
+
require "stringio"
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
#
|
39
|
+
# Module for the namespace of Hyper Estraier
|
40
|
+
#
|
41
|
+
module EstraierPure
|
42
|
+
#----------------------------------------------------------------
|
43
|
+
#++ Abstraction of document.
|
44
|
+
#----------------------------------------------------------------
|
45
|
+
class Document
|
46
|
+
#--------------------------------
|
47
|
+
# public methods
|
48
|
+
#--------------------------------
|
49
|
+
public
|
50
|
+
# Add an attribute.
|
51
|
+
# `name' specifies the name of an attribute.
|
52
|
+
# `value' specifies the value of the attribute. If it is `nil', the attribute is removed.
|
53
|
+
# The return value is always `nil'.
|
54
|
+
def add_attr(name, value)
|
55
|
+
Utility::check_types({ name=>String, value=>String }) if $DEBUG
|
56
|
+
name = name.gsub(/[ \t\r\n\v\f]+/, " ")
|
57
|
+
name = name.strip.squeeze(" ")
|
58
|
+
value = value.gsub(/[ \t\r\n\v\f]+/, " ")
|
59
|
+
value = value.strip.squeeze(" ")
|
60
|
+
@attrs[name] = value
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
# Add a sentence of text.
|
64
|
+
# `text' specifies a sentence of text.
|
65
|
+
# The return value is always `nil'.
|
66
|
+
def add_text(text)
|
67
|
+
Utility::check_types({ text=>String }) if $DEBUG
|
68
|
+
text = text.gsub(/[ \t\r\n\v\f]+/, " ")
|
69
|
+
text = text.strip.squeeze(" ")
|
70
|
+
@dtexts.push(text) if text.length
|
71
|
+
nil
|
72
|
+
end
|
73
|
+
# Add a hidden sentence.
|
74
|
+
# `text' specifies a hidden sentence.
|
75
|
+
# The return value is always `nil'.
|
76
|
+
def add_hidden_text(text)
|
77
|
+
Utility::check_types({ text=>String }) if $DEBUG
|
78
|
+
text = text.gsub(/[ \t\r\n\v\f]+/, " ")
|
79
|
+
text = text.strip.squeeze(" ")
|
80
|
+
@htexts.push(text) if text.length
|
81
|
+
nil
|
82
|
+
end
|
83
|
+
# Attach keywords.
|
84
|
+
# `kwords' specifies a map object of keywords. Keys of the map should be keywords of the
|
85
|
+
# document and values should be their scores in decimal string.
|
86
|
+
# The return value is always `nil'.
|
87
|
+
def set_keywords(kwords)
|
88
|
+
Utility::check_types({ kwords=>Hash }) if $DEBUG
|
89
|
+
@kwords = kwords
|
90
|
+
nil
|
91
|
+
end
|
92
|
+
# Set the substitute score.
|
93
|
+
# `score' specifies the substitute score. It it is negative, the substitute score setting is
|
94
|
+
# nullified.
|
95
|
+
# The return value is always `nil'.
|
96
|
+
def set_score(score)
|
97
|
+
Utility::check_types({ score=>Integer }) if $DEBUG
|
98
|
+
@score = score
|
99
|
+
nil
|
100
|
+
end
|
101
|
+
# Get the ID number.
|
102
|
+
# The return value is the ID number of the document object. If the object has never been
|
103
|
+
# registered, -1 is returned.
|
104
|
+
def id()
|
105
|
+
@id
|
106
|
+
end
|
107
|
+
# Get an array of attribute names of a document object.
|
108
|
+
# The return value is an array object of attribute names.
|
109
|
+
def attr_names()
|
110
|
+
@attrs.keys.sort
|
111
|
+
end
|
112
|
+
# Get the value of an attribute.
|
113
|
+
# `name' specifies the name of an attribute.
|
114
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
115
|
+
def attr(name)
|
116
|
+
Utility::check_types({ name=>String }) if $DEBUG
|
117
|
+
@attrs[name]
|
118
|
+
end
|
119
|
+
# Get an array of sentences of the text.
|
120
|
+
# The return value is an array object of sentences of the text.
|
121
|
+
def texts()
|
122
|
+
@dtexts
|
123
|
+
end
|
124
|
+
# Concatenate sentences of the text of a document object.
|
125
|
+
# The return value is concatenated sentences.
|
126
|
+
def cat_texts()
|
127
|
+
buf = StringIO::new
|
128
|
+
for i in 0...@dtexts.length
|
129
|
+
buf.write(" ") if i > 0
|
130
|
+
buf.write(@dtexts[i])
|
131
|
+
end
|
132
|
+
buf.string
|
133
|
+
end
|
134
|
+
# Dump draft data of a document object.
|
135
|
+
# The return value is draft data.
|
136
|
+
def dump_draft()
|
137
|
+
buf = StringIO::new
|
138
|
+
keys = @attrs.keys.sort
|
139
|
+
for i in 0...keys.length
|
140
|
+
buf.printf("%s=%s\n", keys[i], @attrs[keys[i]])
|
141
|
+
end
|
142
|
+
if @kwords
|
143
|
+
buf.printf("%%VECTOR")
|
144
|
+
@kwords.each() do |key, value|
|
145
|
+
buf.printf("\t%s\t%s", key, value)
|
146
|
+
end
|
147
|
+
buf.printf("\n")
|
148
|
+
end
|
149
|
+
buf.printf("%%SCORE\t%d\n", @score) if @score >= 0
|
150
|
+
buf.printf("\n")
|
151
|
+
for i in 0...@dtexts.length
|
152
|
+
buf.printf("%s\n", @dtexts[i])
|
153
|
+
end
|
154
|
+
for i in 0...@htexts.length
|
155
|
+
buf.printf("\t%s\n", @htexts[i])
|
156
|
+
end
|
157
|
+
buf.string
|
158
|
+
end
|
159
|
+
# Get attached keywords.
|
160
|
+
# The return value is a map object of keywords and their scores in decimal string. If no
|
161
|
+
# keyword is attached, `nil' is returned.
|
162
|
+
def keywords()
|
163
|
+
@kwords
|
164
|
+
end
|
165
|
+
# Get the substitute score.
|
166
|
+
# The return value is the substitute score or -1 if it is not set.
|
167
|
+
def score()
|
168
|
+
return -1 if(@score < 0)
|
169
|
+
@score
|
170
|
+
end
|
171
|
+
#--------------------------------
|
172
|
+
# private methods
|
173
|
+
#--------------------------------
|
174
|
+
private
|
175
|
+
# Create a document object.
|
176
|
+
# `draft' specifies a string of draft data.
|
177
|
+
def initialize(draft = "")
|
178
|
+
Utility::check_types({ draft=>String }) if $DEBUG
|
179
|
+
@id = -1
|
180
|
+
@attrs = {}
|
181
|
+
@dtexts = []
|
182
|
+
@htexts = []
|
183
|
+
@kwords = nil
|
184
|
+
@score = -1
|
185
|
+
if draft.length
|
186
|
+
lines = draft.split(/\n/, -1)
|
187
|
+
num = 0
|
188
|
+
while num < lines.length
|
189
|
+
line = lines[num]
|
190
|
+
num += 1
|
191
|
+
break if line.length < 1
|
192
|
+
if line =~ /^%/
|
193
|
+
if line =~ /^%VECTOR\t/
|
194
|
+
@kwords = {} unless @kwords
|
195
|
+
fields = line.split(/\t/)
|
196
|
+
i = 1
|
197
|
+
while i < fields.length - 1
|
198
|
+
@kwords[fields[i]] = fields[i+1]
|
199
|
+
i += 2
|
200
|
+
end
|
201
|
+
elsif line =~ /^%SCORE\t/
|
202
|
+
fields = line.split(/\t/)
|
203
|
+
@score = fields[1].to_i;
|
204
|
+
end
|
205
|
+
next
|
206
|
+
end
|
207
|
+
line = line.gsub(/[ \t\r\n\v\f]+/, " ")
|
208
|
+
line = line.strip.squeeze(" ")
|
209
|
+
if idx = line.index("=")
|
210
|
+
key = line[0...idx]
|
211
|
+
value = line[idx+1...line.length]
|
212
|
+
@attrs[key] = value
|
213
|
+
end
|
214
|
+
end
|
215
|
+
while num < lines.length
|
216
|
+
line = lines[num]
|
217
|
+
num += 1
|
218
|
+
next unless line.length > 0
|
219
|
+
if line[0] == 0x9
|
220
|
+
@htexts.push(line[1...line.length]) if line.length > 1
|
221
|
+
else
|
222
|
+
@dtexts.push(line)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
#----------------------------------------------------------------
|
229
|
+
#++ Abstraction of search condition.
|
230
|
+
#----------------------------------------------------------------
|
231
|
+
class Condition
|
232
|
+
#--------------------------------
|
233
|
+
# public constants
|
234
|
+
#--------------------------------
|
235
|
+
public
|
236
|
+
# option: check every N-gram key
|
237
|
+
SURE = 1 << 0
|
238
|
+
# option: check N-gram keys skipping by one
|
239
|
+
USUAL = 1 << 1
|
240
|
+
# option: check N-gram keys skipping by two
|
241
|
+
FAST = 1 << 2
|
242
|
+
# option: check N-gram keys skipping by three
|
243
|
+
AGITO = 1 << 3
|
244
|
+
# option: without TF-IDF tuning
|
245
|
+
NOIDF = 1 << 4
|
246
|
+
# option: with the simplified phrase
|
247
|
+
SIMPLE = 1 << 10
|
248
|
+
# option: with the rough phrase
|
249
|
+
ROUGH = 1 << 11
|
250
|
+
# option: with the union phrase
|
251
|
+
UNION = 1 << 15
|
252
|
+
# option: with the intersection phrase
|
253
|
+
ISECT = 1 << 16
|
254
|
+
#--------------------------------
|
255
|
+
# public methods
|
256
|
+
#--------------------------------
|
257
|
+
public
|
258
|
+
# Set the search phrase.
|
259
|
+
# `phrase' specifies a search phrase.
|
260
|
+
# The return value is always `nil'.
|
261
|
+
def set_phrase(phrase)
|
262
|
+
Utility::check_types({ phrase=>String }) if $DEBUG
|
263
|
+
phrase = phrase.gsub(/[ \t\r\n\v\f]+/, " ")
|
264
|
+
phrase = phrase.strip.squeeze(" ")
|
265
|
+
@phrase = phrase
|
266
|
+
nil
|
267
|
+
end
|
268
|
+
# Add an expression for an attribute.
|
269
|
+
# `expr' specifies an expression for an attribute.
|
270
|
+
# The return value is always `nil'.
|
271
|
+
def add_attr(expr)
|
272
|
+
Utility::check_types({ expr=>String }) if $DEBUG
|
273
|
+
expr = expr.gsub(/[ \t\r\n\v\f]+/, " ")
|
274
|
+
expr = expr.strip.squeeze(" ")
|
275
|
+
@attrs.push(expr)
|
276
|
+
nil
|
277
|
+
end
|
278
|
+
# Set the order of a condition object.
|
279
|
+
# `expr' specifies an expression for the order. By default, the order is by score descending.
|
280
|
+
# The return value is always `nil'.
|
281
|
+
def set_order(expr)
|
282
|
+
Utility::check_types({ expr=>String }) if $DEBUG
|
283
|
+
expr = expr.gsub(/[ \t\r\n\v\f]+/, " ")
|
284
|
+
expr = expr.strip.squeeze(" ")
|
285
|
+
@order = expr
|
286
|
+
nil
|
287
|
+
end
|
288
|
+
# Set the maximum number of retrieval.
|
289
|
+
# `max' specifies the maximum number of retrieval. By default, the number of retrieval is
|
290
|
+
# not limited.
|
291
|
+
# The return value is always `nil'.
|
292
|
+
def set_max(max)
|
293
|
+
Utility::check_types({ max=>Integer }) if $DEBUG
|
294
|
+
@max = max if max >= 0
|
295
|
+
nil
|
296
|
+
end
|
297
|
+
# Set the number of skipped documents.
|
298
|
+
# `skip' specifies the number of documents to be skipped in the search result.
|
299
|
+
# The return value is always `nil'.
|
300
|
+
def set_skip(skip)
|
301
|
+
Utility::check_types({ skip=>Integer }) if $DEBUG
|
302
|
+
@skip = skip if skip >= 0
|
303
|
+
nil
|
304
|
+
end
|
305
|
+
# Set options of retrieval.
|
306
|
+
# `options' specifies options: `Condition::SURE' specifies that it checks every N-gram
|
307
|
+
# key, `Condition::USU', which is the default, specifies that it checks N-gram keys
|
308
|
+
# with skipping one key, `Condition::FAST' skips two keys, `Condition::AGITO'
|
309
|
+
# skips three keys, `Condition::NOIDF' specifies not to perform TF-IDF tuning,
|
310
|
+
# `Condition::SIMPLE' specifies to use simplified phrase, `Condition::ROUGH' specifies to use
|
311
|
+
# rough phrase, `Condition.UNION' specifies to use union phrase, `Condition.ISECT' specifies
|
312
|
+
# to use intersection phrase. Each option can be specified at the same time by bitwise or.
|
313
|
+
# If keys are skipped, though search speed is improved, the relevance ratio grows less.
|
314
|
+
# The return value is always `nil'.
|
315
|
+
def set_options(options)
|
316
|
+
Utility::check_types({ options=>Integer }) if $DEBUG
|
317
|
+
@options |= options
|
318
|
+
nil
|
319
|
+
end
|
320
|
+
# Set permission to adopt result of the auxiliary index.
|
321
|
+
# `min' specifies the minimum hits to adopt result of the auxiliary index. If it is not more
|
322
|
+
# than 0, the auxiliary index is not used. By default, it is 32.
|
323
|
+
# The return value is always `nil'.
|
324
|
+
def set_auxiliary(min)
|
325
|
+
Utility::check_types({ min=>Integer }) if $DEBUG
|
326
|
+
@auxiliary = min
|
327
|
+
nil
|
328
|
+
end
|
329
|
+
# Set the attribute distinction filter.
|
330
|
+
# `name' specifies the name of an attribute to be distinct.
|
331
|
+
# The return value is always `nil'.
|
332
|
+
def set_distinct(name)
|
333
|
+
Utility::check_types({ name=>String }) if $DEBUG
|
334
|
+
name = name.gsub(/[ \t\r\n\v\f]+/, " ")
|
335
|
+
name = name.strip.squeeze(" ")
|
336
|
+
@distinct = name
|
337
|
+
nil
|
338
|
+
end
|
339
|
+
# Set the mask of targets of meta search.
|
340
|
+
# `mask' specifies a masking number. 1 means the first target, 2 means the second target, 4
|
341
|
+
# means the third target, and power values of 2 and their summation compose the mask.
|
342
|
+
# The return value is always `nil'.
|
343
|
+
def set_mask(mask)
|
344
|
+
Utility::check_types({ mask=>Integer }) if $DEBUG
|
345
|
+
@mask = mask
|
346
|
+
nil
|
347
|
+
end
|
348
|
+
# Get the search phrase.
|
349
|
+
# The return value is the search phrase.
|
350
|
+
def phrase()
|
351
|
+
@phrase
|
352
|
+
end
|
353
|
+
# Get expressions for attributes.
|
354
|
+
# The return value is expressions for attributes.
|
355
|
+
def attrs()
|
356
|
+
@attrs
|
357
|
+
end
|
358
|
+
# Get the order expression.
|
359
|
+
# The return value is the order expression.
|
360
|
+
def order()
|
361
|
+
@order
|
362
|
+
end
|
363
|
+
# Get the maximum number of retrieval.
|
364
|
+
# The return value is the maximum number of retrieval.
|
365
|
+
def max()
|
366
|
+
@max
|
367
|
+
end
|
368
|
+
# Get the number of skipped documents.
|
369
|
+
# The return value is the number of documents to be skipped in the search result.
|
370
|
+
def skip()
|
371
|
+
@skip
|
372
|
+
end
|
373
|
+
# Get options of retrieval.
|
374
|
+
# The return value is options by bitwise or.
|
375
|
+
def options()
|
376
|
+
@options
|
377
|
+
end
|
378
|
+
# Get permission to adopt result of the auxiliary index.
|
379
|
+
# The return value is permission to adopt result of the auxiliary index.
|
380
|
+
def auxiliary()
|
381
|
+
@auxiliary
|
382
|
+
end
|
383
|
+
# Get the attribute distinction filter.
|
384
|
+
# The return value is the name of the distinct attribute.
|
385
|
+
def distinct()
|
386
|
+
@distinct
|
387
|
+
end
|
388
|
+
# Get the mask of targets of meta search.
|
389
|
+
# The return value is the mask of targets of meta search.
|
390
|
+
def mask()
|
391
|
+
@mask
|
392
|
+
end
|
393
|
+
#--------------------------------
|
394
|
+
# private methods
|
395
|
+
#--------------------------------
|
396
|
+
private
|
397
|
+
# Create a search condition object.
|
398
|
+
def initialize()
|
399
|
+
@phrase = nil
|
400
|
+
@attrs = []
|
401
|
+
@order = nil
|
402
|
+
@max = -1
|
403
|
+
@skip = 0
|
404
|
+
@options = 0
|
405
|
+
@auxiliary = 32
|
406
|
+
@distinct = nil
|
407
|
+
@mask = 0
|
408
|
+
end
|
409
|
+
end
|
410
|
+
#----------------------------------------------------------------
|
411
|
+
#++ Abstraction of document in result set.
|
412
|
+
#----------------------------------------------------------------
|
413
|
+
class ResultDocument
|
414
|
+
#--------------------------------
|
415
|
+
# public methods
|
416
|
+
#--------------------------------
|
417
|
+
public
|
418
|
+
# Get the URI.
|
419
|
+
# The return value is the URI of the result document object.
|
420
|
+
def uri()
|
421
|
+
@uri
|
422
|
+
end
|
423
|
+
# Get an array of attribute names.
|
424
|
+
# The return value is an array object of attribute names.
|
425
|
+
def attr_names()
|
426
|
+
@attrs.keys.sort
|
427
|
+
end
|
428
|
+
# Get the value of an attribute.
|
429
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
430
|
+
def attr(name)
|
431
|
+
Utility::check_types({ name=>String }) if $DEBUG
|
432
|
+
@attrs[name]
|
433
|
+
end
|
434
|
+
# Get the snippet of a result document object.
|
435
|
+
# The return value is a string of the snippet of the result document object. There are tab
|
436
|
+
# separated values. Each line is a string to be shown. Though most lines have only one
|
437
|
+
# field, some lines have two fields. If the second field exists, the first field is to be
|
438
|
+
# shown with highlighted, and the second field means its normalized form.
|
439
|
+
def snippet()
|
440
|
+
@snippet
|
441
|
+
end
|
442
|
+
# Get keywords.
|
443
|
+
# The return value is a string of serialized keywords of the result document object. There
|
444
|
+
# are tab separated values. Keywords and their scores come alternately.
|
445
|
+
def keywords()
|
446
|
+
@keywords
|
447
|
+
end
|
448
|
+
#--------------------------------
|
449
|
+
# private methods
|
450
|
+
#--------------------------------
|
451
|
+
private
|
452
|
+
# Create a result document object.
|
453
|
+
def initialize(uri, attrs, snippet, keywords)
|
454
|
+
Utility::check_types({ uri=>String, attrs=>Hash,
|
455
|
+
snippet=>String, keywords=>String }) if $DEBUG
|
456
|
+
@uri = uri
|
457
|
+
@attrs = attrs
|
458
|
+
@snippet = snippet
|
459
|
+
@keywords = keywords
|
460
|
+
end
|
461
|
+
end
|
462
|
+
#----------------------------------------------------------------
|
463
|
+
#++ Abstraction of result set from node.
|
464
|
+
#----------------------------------------------------------------
|
465
|
+
class NodeResult
|
466
|
+
#--------------------------------
|
467
|
+
# public methods
|
468
|
+
#--------------------------------
|
469
|
+
public
|
470
|
+
# Get the number of documents.
|
471
|
+
# The return value is the number of documents.
|
472
|
+
def doc_num()
|
473
|
+
@docs.length
|
474
|
+
end
|
475
|
+
# Get the value of hint information.
|
476
|
+
# The return value is a result document object or `nil' if the index is out of bounds.
|
477
|
+
def get_doc(index)
|
478
|
+
Utility::check_types({ index=>Integer }) if $DEBUG
|
479
|
+
return nil if index < 0 || index >= @docs.length
|
480
|
+
@docs[index]
|
481
|
+
end
|
482
|
+
# Get the value of hint information.
|
483
|
+
# `key' specifies the key of a hint. "VERSION", "NODE", "HIT", "HINT#n", "DOCNUM", "WORDNUM",
|
484
|
+
# "TIME", "TIME#n", "LINK#n", and "VIEW" are provided for keys.
|
485
|
+
# The return value is the hint or `nil' if the key does not exist.
|
486
|
+
def hint(key)
|
487
|
+
Utility::check_types({ key=>String }) if $DEBUG
|
488
|
+
@hints[key]
|
489
|
+
end
|
490
|
+
#--------------------------------
|
491
|
+
# private methods
|
492
|
+
#--------------------------------
|
493
|
+
private
|
494
|
+
# Create a node result object.
|
495
|
+
def initialize(docs, hints)
|
496
|
+
Utility::check_types({ docs=>Array, hints=>Hash }) if $DEBUG
|
497
|
+
@docs = docs
|
498
|
+
@hints = hints
|
499
|
+
end
|
500
|
+
end
|
501
|
+
#----------------------------------------------------------------
|
502
|
+
#++ Abstraction of connection to P2P node.
|
503
|
+
#----------------------------------------------------------------
|
504
|
+
class Node
|
505
|
+
#--------------------------------
|
506
|
+
# public methods
|
507
|
+
#--------------------------------
|
508
|
+
public
|
509
|
+
# Set the URL of a node server.
|
510
|
+
# `url' specifies the URL of a node.
|
511
|
+
# The return value is always `nil'.
|
512
|
+
def set_url(url)
|
513
|
+
Utility::check_types({ url=>String }) if $DEBUG
|
514
|
+
@url = url
|
515
|
+
nil
|
516
|
+
end
|
517
|
+
# Set the proxy information.
|
518
|
+
# `host' specifies the host name of a proxy server.
|
519
|
+
# `port' specifies the port number of the proxy server.
|
520
|
+
# The return value is always `nil'.
|
521
|
+
def set_proxy(host, port)
|
522
|
+
Utility::check_types({ host=>String, port=>Integer }) if $DEBUG
|
523
|
+
@pxhost = host
|
524
|
+
@pxport = port
|
525
|
+
nil
|
526
|
+
end
|
527
|
+
# Set timeout of a connection.
|
528
|
+
# `sec' specifies timeout of the connection in seconds.
|
529
|
+
# The return value is always `nil'.
|
530
|
+
def set_timeout(sec)
|
531
|
+
Utility::check_types({ sec=>Integer }) if $DEBUG
|
532
|
+
@timeout = sec
|
533
|
+
nil
|
534
|
+
end
|
535
|
+
# Set the authentication information.
|
536
|
+
# `name' specifies the name of authentication.
|
537
|
+
# `passwd' specifies the password of the authentication.
|
538
|
+
# The return value is always `nil'.
|
539
|
+
def set_auth(name, password)
|
540
|
+
Utility::check_types({ name=>String, password=>String }) if $DEBUG
|
541
|
+
@auth = name + ":" + password
|
542
|
+
nil
|
543
|
+
end
|
544
|
+
# Get the status code of the last request.
|
545
|
+
# The return value is the status code of the last request. -1 means failure of connection.
|
546
|
+
def status()
|
547
|
+
@status
|
548
|
+
end
|
549
|
+
# Synchronize updating contents of the database.
|
550
|
+
# The return value is true if success, else it is false.
|
551
|
+
def sync()
|
552
|
+
@status = -1
|
553
|
+
return false unless @url
|
554
|
+
turl = @url + "/sync"
|
555
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
556
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
557
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, "", nil, nil)
|
558
|
+
@status = rv
|
559
|
+
rv == 200
|
560
|
+
end
|
561
|
+
# Optimize the database.
|
562
|
+
# The return value is true if success, else it is false.
|
563
|
+
def optimize()
|
564
|
+
@status = -1
|
565
|
+
return false unless @url
|
566
|
+
turl = @url + "/optimize"
|
567
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
568
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
569
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, "", nil, nil)
|
570
|
+
@status = rv
|
571
|
+
rv == 200
|
572
|
+
end
|
573
|
+
# Add a document.
|
574
|
+
# `doc' specifies a document object. The document object should have the URI attribute.
|
575
|
+
# The return value is true if success, else it is false.
|
576
|
+
def put_doc(doc)
|
577
|
+
Utility::check_types({ doc=>Document }) if $DEBUG
|
578
|
+
@status = -1
|
579
|
+
return false unless @url
|
580
|
+
turl = @url + "/put_doc"
|
581
|
+
reqheads = [ "Content-Type: text/x-estraier-draft" ]
|
582
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
583
|
+
reqbody = doc.dump_draft
|
584
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
585
|
+
@status = rv
|
586
|
+
rv == 200
|
587
|
+
end
|
588
|
+
# Remove a document.
|
589
|
+
# `id' specifies the ID number of a registered document.
|
590
|
+
# The return value is true if success, else it is false.
|
591
|
+
def out_doc(id)
|
592
|
+
Utility::check_types({ id=>Integer }) if $DEBUG
|
593
|
+
@status = -1
|
594
|
+
return false unless @url
|
595
|
+
turl = @url + "/out_doc"
|
596
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
597
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
598
|
+
reqbody = "id=" + id.to_s
|
599
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
600
|
+
@status = rv
|
601
|
+
rv == 200
|
602
|
+
end
|
603
|
+
# Remove a document specified by URI.
|
604
|
+
# `uri' specifies the URI of a registered document.
|
605
|
+
# The return value is true if success, else it is false.
|
606
|
+
def out_doc_by_uri(uri)
|
607
|
+
Utility::check_types({ uri=>String }) if $DEBUG
|
608
|
+
@status = -1
|
609
|
+
return false unless @url
|
610
|
+
turl = @url + "/out_doc"
|
611
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
612
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
613
|
+
reqbody = "uri=" + CGI::escape(uri)
|
614
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
615
|
+
@status = rv
|
616
|
+
rv == 200
|
617
|
+
end
|
618
|
+
# Edit attributes of a document.
|
619
|
+
# `doc' specifies a document object.
|
620
|
+
# The return value is true if success, else it is false.
|
621
|
+
def edit_doc(doc)
|
622
|
+
Utility::check_types({ doc=>Document }) if $DEBUG
|
623
|
+
@status = -1
|
624
|
+
return false unless @url
|
625
|
+
turl = @url + "/edit_doc"
|
626
|
+
reqheads = [ "Content-Type: text/x-estraier-draft" ]
|
627
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
628
|
+
reqbody = doc.dump_draft
|
629
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
630
|
+
@status = rv
|
631
|
+
rv == 200
|
632
|
+
end
|
633
|
+
# Retrieve a document.
|
634
|
+
# `id' specifies the ID number of a registered document.
|
635
|
+
# The return value is a document object. On error, `nil' is returned.
|
636
|
+
def get_doc(id)
|
637
|
+
Utility::check_types({ id=>Integer }) if $DEBUG
|
638
|
+
@status = -1
|
639
|
+
return nil unless @url
|
640
|
+
turl = @url + "/get_doc"
|
641
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
642
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
643
|
+
reqbody = "id=" + id.to_s
|
644
|
+
resbody = StringIO::new
|
645
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
646
|
+
@status = rv
|
647
|
+
return nil if rv != 200
|
648
|
+
Document::new(resbody.string)
|
649
|
+
end
|
650
|
+
# Retrieve a document.
|
651
|
+
# `uri' specifies the URI of a registered document.
|
652
|
+
# The return value is a document object. On error, `nil' is returned.
|
653
|
+
def get_doc_by_uri(uri)
|
654
|
+
Utility::check_types({ uri=>String }) if $DEBUG
|
655
|
+
@status = -1
|
656
|
+
return nil unless @url
|
657
|
+
turl = @url + "/get_doc"
|
658
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
659
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
660
|
+
reqbody = "uri=" + CGI::escape(uri)
|
661
|
+
resbody = StringIO::new
|
662
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
663
|
+
@status = rv
|
664
|
+
return nil if rv != 200
|
665
|
+
Document::new(resbody.string)
|
666
|
+
end
|
667
|
+
# Retrieve the value of an attribute of a document.
|
668
|
+
# `id' specifies the ID number of a registered document.
|
669
|
+
# `name' specifies the name of an attribute.
|
670
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
671
|
+
def get_doc_attr(id, name)
|
672
|
+
Utility::check_types({ id=>Integer, name=>String }) if $DEBUG
|
673
|
+
@status = -1
|
674
|
+
return nil unless @url
|
675
|
+
turl = @url + "/get_doc_attr"
|
676
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
677
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
678
|
+
reqbody = "id=" + id.to_s + "&attr=" + CGI::escape(name)
|
679
|
+
resbody = StringIO::new
|
680
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
681
|
+
@status = rv
|
682
|
+
return nil if rv != 200
|
683
|
+
resbody.string.chomp
|
684
|
+
end
|
685
|
+
# Retrieve the value of an attribute of a document specified by URI.
|
686
|
+
# `uri' specifies the URI of a registered document.
|
687
|
+
# `name' specifies the name of an attribute.
|
688
|
+
# The return value is the value of the attribute or `nil' if it does not exist.
|
689
|
+
def get_doc_attr_by_uri(uri, name)
|
690
|
+
Utility::check_types({ uri=>String, name=>String }) if $DEBUG
|
691
|
+
@status = -1
|
692
|
+
return nil unless @url
|
693
|
+
turl = @url + "/get_doc_attr"
|
694
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
695
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
696
|
+
reqbody = "uri=" + CGI::escape(uri) + "&attr=" + CGI::escape(name)
|
697
|
+
resbody = StringIO::new
|
698
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
699
|
+
@status = rv
|
700
|
+
return nil if rv != 200
|
701
|
+
resbody.string.chomp
|
702
|
+
end
|
703
|
+
# Extract keywords of a document.
|
704
|
+
# `id' specifies the ID number of a registered document.
|
705
|
+
# The return value is a hash object of keywords and their scores in decimal string or `nil'
|
706
|
+
# on error.
|
707
|
+
def etch_doc(id)
|
708
|
+
Utility::check_types({ id=>Integer }) if $DEBUG
|
709
|
+
@status = -1
|
710
|
+
return nil unless @url
|
711
|
+
turl = @url + "/etch_doc"
|
712
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
713
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
714
|
+
reqbody = "id=" + id.to_s
|
715
|
+
resbody = StringIO::new
|
716
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
717
|
+
@status = rv
|
718
|
+
return nil if rv != 200
|
719
|
+
kwords = {}
|
720
|
+
lines = resbody.string.split(/\n/, -1)
|
721
|
+
for i in 0...lines.length
|
722
|
+
pair = lines[i].split(/\t/)
|
723
|
+
next if pair.length < 2
|
724
|
+
kwords[pair[0]] = pair[1]
|
725
|
+
end
|
726
|
+
kwords
|
727
|
+
end
|
728
|
+
# Extract keywords of a document specified by URI.
|
729
|
+
# `uri' specifies the URI of a registered document.
|
730
|
+
# The return value is a hash object of keywords and their scores in decimal string or `nil'
|
731
|
+
# on error.
|
732
|
+
def etch_doc_by_uri(uri)
|
733
|
+
Utility::check_types({ uri=>String }) if $DEBUG
|
734
|
+
@status = -1
|
735
|
+
return nil unless @url
|
736
|
+
turl = @url + "/etch_doc"
|
737
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
738
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
739
|
+
reqbody = "uri=" + CGI::escape(uri)
|
740
|
+
resbody = StringIO::new
|
741
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
742
|
+
@status = rv
|
743
|
+
return nil if rv != 200
|
744
|
+
kwords = {}
|
745
|
+
lines = resbody.string.split(/\n/, -1)
|
746
|
+
for i in 0...lines.length
|
747
|
+
pair = lines[i].split(/\t/)
|
748
|
+
next if pair.length < 2
|
749
|
+
kwords[pair[0]] = pair[1]
|
750
|
+
end
|
751
|
+
kwords
|
752
|
+
end
|
753
|
+
# Get the ID of a document specified by URI.
|
754
|
+
# `uri' specifies the URI of a registered document.
|
755
|
+
# The return value is the ID of the document. On error, -1 is returned.
|
756
|
+
def uri_to_id(uri)
|
757
|
+
Utility::check_types({ uri=>String }) if $DEBUG
|
758
|
+
@status = -1
|
759
|
+
return -1 unless @url
|
760
|
+
turl = @url + "/uri_to_id"
|
761
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
762
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
763
|
+
reqbody = "uri=" + CGI::escape(uri)
|
764
|
+
resbody = StringIO::new
|
765
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
766
|
+
@status = rv
|
767
|
+
return nil if rv != 200
|
768
|
+
resbody.string.chomp
|
769
|
+
end
|
770
|
+
# Get the name.
|
771
|
+
# The return value is the name. On error, `nil' is returned.
|
772
|
+
def name()
|
773
|
+
set_info if !@name
|
774
|
+
@name
|
775
|
+
end
|
776
|
+
# Get the label.
|
777
|
+
# The return value is the label. On error, `nil' is returned.
|
778
|
+
def label()
|
779
|
+
set_info if !@label
|
780
|
+
@label
|
781
|
+
end
|
782
|
+
# Get the number of documents.
|
783
|
+
# The return value is the number of documents. On error, -1 is returned.
|
784
|
+
def doc_num()
|
785
|
+
set_info if @dnum < 0
|
786
|
+
@dnum
|
787
|
+
end
|
788
|
+
# Get the number of unique words.
|
789
|
+
# The return value is the number of unique words. On error, -1 is returned.
|
790
|
+
def word_num()
|
791
|
+
set_info if @wnum < 0
|
792
|
+
@wnum
|
793
|
+
end
|
794
|
+
# Get the size of the datbase.
|
795
|
+
# The return value is the size of the datbase. On error, -1.0 is returned.
|
796
|
+
def size()
|
797
|
+
set_info if @size < 0.0
|
798
|
+
@size
|
799
|
+
end
|
800
|
+
# Get the usage ratio of the cache.
|
801
|
+
# The return value is the usage ratio of the cache. On error, -1.0 is returned.
|
802
|
+
def cache_usage()
|
803
|
+
@status = -1
|
804
|
+
return -1.0 unless @url
|
805
|
+
turl = @url + "/cacheusage"
|
806
|
+
reqheads = []
|
807
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
808
|
+
resbody = StringIO::new
|
809
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, nil, nil, resbody)
|
810
|
+
@status = rv
|
811
|
+
return -1.0 if rv != 200
|
812
|
+
return resbody.string.strip.to_f
|
813
|
+
end
|
814
|
+
# Get an array of names of administrators.
|
815
|
+
# The return value is an array object of names of administrators. On error, `nil' is
|
816
|
+
# returned.
|
817
|
+
def admins()
|
818
|
+
set_info unless @admins
|
819
|
+
@admins
|
820
|
+
end
|
821
|
+
# Get an array of names of users.
|
822
|
+
# The return value is an array object of names of users. On error, `nil' is returned.
|
823
|
+
def users()
|
824
|
+
set_info unless @users
|
825
|
+
@users
|
826
|
+
end
|
827
|
+
# Get an array of expressions of links.
|
828
|
+
# The return value is an array object of expressions of links. Each element is a TSV string
|
829
|
+
# and has three fields of the URL, the label, and the score. On error, `nil' is returned.
|
830
|
+
def links()
|
831
|
+
set_info unless @links
|
832
|
+
@links
|
833
|
+
end
|
834
|
+
# Search for documents corresponding a condition.
|
835
|
+
# `cond' specifies a condition object.
|
836
|
+
# `depth' specifies the depth of meta search.
|
837
|
+
# The return value is a node result object. On error, `nil' is returned.
|
838
|
+
def search(cond, depth)
|
839
|
+
Utility::check_types({ cond=>Condition, depth=>Integer }) if $DEBUG
|
840
|
+
@status = -1
|
841
|
+
return nil unless @url
|
842
|
+
turl = @url + "/search"
|
843
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
844
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
845
|
+
reqbody = Utility::cond_to_query(cond, depth, @wwidth, @hwidth, @awidth)
|
846
|
+
resbody = StringIO::new
|
847
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, resbody)
|
848
|
+
@status = rv
|
849
|
+
return nil if rv != 200
|
850
|
+
lines = resbody.string.split(/\n/, -1)
|
851
|
+
return nil if lines.length < 1
|
852
|
+
docs = []
|
853
|
+
hints = {}
|
854
|
+
nres = NodeResult::new(docs, hints)
|
855
|
+
border = lines[0]
|
856
|
+
isend = false
|
857
|
+
lnum = 1
|
858
|
+
while lnum < lines.length
|
859
|
+
line = lines[lnum]
|
860
|
+
lnum += 1
|
861
|
+
if line.length >= border.length && line.index(border) == 0
|
862
|
+
isend = true if line[border.length...line.length] == ":END"
|
863
|
+
break
|
864
|
+
end
|
865
|
+
lidx = line.index("\t")
|
866
|
+
if lidx
|
867
|
+
key = line[0...lidx]
|
868
|
+
value = line[(lidx+1)...line.length]
|
869
|
+
hints[key] = value
|
870
|
+
end
|
871
|
+
end
|
872
|
+
snum = lnum
|
873
|
+
while !isend && lnum < lines.length
|
874
|
+
line = lines[lnum]
|
875
|
+
lnum += 1
|
876
|
+
if line.length >= border.length && line.index(border) == 0
|
877
|
+
if lnum > snum
|
878
|
+
rdattrs = {}
|
879
|
+
sb = StringIO::new
|
880
|
+
rdvector = ""
|
881
|
+
rlnum = snum
|
882
|
+
while rlnum < lnum - 1
|
883
|
+
rdline = lines[rlnum].strip
|
884
|
+
rlnum += 1
|
885
|
+
break if rdline.length < 1
|
886
|
+
if rdline =~ /^%/
|
887
|
+
lidx = rdline.index("\t")
|
888
|
+
rdvector = rdline[(lidx+1)...rdline.length] if rdline =~ /%VECTOR/ && lidx
|
889
|
+
else
|
890
|
+
lidx = rdline.index("=")
|
891
|
+
if lidx
|
892
|
+
key = rdline[0...lidx]
|
893
|
+
value = rdline[(lidx+1)...rdline.length]
|
894
|
+
rdattrs[key] = value
|
895
|
+
end
|
896
|
+
end
|
897
|
+
end
|
898
|
+
while rlnum < lnum - 1
|
899
|
+
rdline = lines[rlnum]
|
900
|
+
rlnum += 1
|
901
|
+
sb.printf("%s\n", rdline)
|
902
|
+
end
|
903
|
+
rduri = rdattrs["@uri"]
|
904
|
+
rdsnippet = sb.string
|
905
|
+
if rduri
|
906
|
+
rdoc = ResultDocument::new(rduri, rdattrs, rdsnippet, rdvector)
|
907
|
+
docs.push(rdoc)
|
908
|
+
end
|
909
|
+
end
|
910
|
+
snum = lnum
|
911
|
+
isend = true if line[border.length...line.length] == ":END"
|
912
|
+
end
|
913
|
+
end
|
914
|
+
return nil if !isend
|
915
|
+
return nres
|
916
|
+
end
|
917
|
+
# Set width of snippet in the result.
|
918
|
+
# `wwidth' specifies whole width of a snippet. By default, it is 480. If it is 0, no
|
919
|
+
# snippet is sent. If it is negative, whole body text is sent instead of snippet.
|
920
|
+
# `hwidth' specifies width of strings picked up from the beginning of the text. By default,
|
921
|
+
# it is 96. If it is negative 0, the current setting is not changed.
|
922
|
+
# `awidth' specifies width of strings picked up around each highlighted word. By default,
|
923
|
+
# it is 96. If it is negative, the current setting is not changed.
|
924
|
+
def set_snippet_width(wwidth, hwidth, awidth)
|
925
|
+
@wwidth = wwidth
|
926
|
+
@hwidth = hwidth if hwidth >= 0
|
927
|
+
@awidth = awidth if awidth >= 0
|
928
|
+
end
|
929
|
+
# Manage a user account of a node.
|
930
|
+
# `name' specifies the name of a user.
|
931
|
+
# `mode' specifies the operation mode. 0 means to delete the account. 1 means to set the
|
932
|
+
# account as an administrator. 2 means to set the account as a guest.
|
933
|
+
# The return value is true if success, else it is false.
|
934
|
+
def set_user(name, mode)
|
935
|
+
Utility::check_types({ name=>String, mode=>Integer }) if $DEBUG
|
936
|
+
@status = -1
|
937
|
+
return false unless @url
|
938
|
+
turl = @url + "/_set_user"
|
939
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
940
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
941
|
+
reqbody = "name=" + CGI::escape(name) + "&mode=" + mode.to_s
|
942
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
943
|
+
@status = rv
|
944
|
+
rv == 200
|
945
|
+
end
|
946
|
+
# Manage a link of a node.
|
947
|
+
# `url' specifies the URL of the target node of a link.
|
948
|
+
# `label' specifies the label of the link.
|
949
|
+
# `credit' specifies the credit of the link. If it is negative, the link is removed.
|
950
|
+
# The return value is true if success, else it is false.
|
951
|
+
def set_link(url, label, credit)
|
952
|
+
Utility::check_types({ url=>String, label=>String, credit=>Integer }) if $DEBUG
|
953
|
+
@status = -1
|
954
|
+
return false unless @url
|
955
|
+
turl = @url + "/_set_link"
|
956
|
+
reqheads = [ "Content-Type: application/x-www-form-urlencoded" ]
|
957
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
958
|
+
reqbody = "url=" + CGI::escape(url) + "&label=" + label
|
959
|
+
reqbody += "&credit=" + credit.to_s if credit >= 0
|
960
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, reqbody, nil, nil)
|
961
|
+
@status = rv
|
962
|
+
rv == 200
|
963
|
+
end
|
964
|
+
#--------------------------------
|
965
|
+
# private methods
|
966
|
+
#--------------------------------
|
967
|
+
private
|
968
|
+
# Create a node connection object.
|
969
|
+
def initialize()
|
970
|
+
@url = nil
|
971
|
+
@pxhost = nil
|
972
|
+
@pxport = -1
|
973
|
+
@timeout = -1
|
974
|
+
@auth = nil
|
975
|
+
@name = nil
|
976
|
+
@label = nil
|
977
|
+
@dnum = -1
|
978
|
+
@wnum = -1
|
979
|
+
@size = -1.0
|
980
|
+
@admins = nil
|
981
|
+
@users = nil
|
982
|
+
@links = nil
|
983
|
+
@wwidth = 480
|
984
|
+
@hwidth = 96
|
985
|
+
@awidth = 96
|
986
|
+
@status = -1
|
987
|
+
end
|
988
|
+
# Set information of the node.
|
989
|
+
def set_info()
|
990
|
+
@status = -1
|
991
|
+
return unless @url
|
992
|
+
turl = @url + "/inform"
|
993
|
+
reqheads = []
|
994
|
+
reqheads.push("Authorization: Basic " + Utility::base_encode(@auth)) if @auth
|
995
|
+
resbody = StringIO::new
|
996
|
+
rv = Utility::shuttle_url(turl, @pxhost, @pxport, @timeout, reqheads, nil, nil, resbody)
|
997
|
+
@status = rv
|
998
|
+
return if rv != 200
|
999
|
+
lines = resbody.string.split(/\n/, -1)
|
1000
|
+
return if lines.length < 1
|
1001
|
+
elems = lines[0].chomp.split(/\t/)
|
1002
|
+
return if elems.length != 5
|
1003
|
+
@name = elems[0]
|
1004
|
+
@label = elems[1]
|
1005
|
+
@dnum = elems[2].to_i
|
1006
|
+
@wnum = elems[3].to_i
|
1007
|
+
@size = elems[4].to_f
|
1008
|
+
return if lines.length < 2
|
1009
|
+
lnum = 1
|
1010
|
+
lnum += 1 if(lnum < lines.length && lines[lnum].length < 1)
|
1011
|
+
@admins = []
|
1012
|
+
while(lnum < lines.length)
|
1013
|
+
line = lines[lnum]
|
1014
|
+
break if line.length < 1
|
1015
|
+
@admins.push(line)
|
1016
|
+
lnum += 1
|
1017
|
+
end
|
1018
|
+
lnum += 1 if(lnum < lines.length && lines[lnum].length < 1)
|
1019
|
+
@users = []
|
1020
|
+
while(lnum < lines.length)
|
1021
|
+
line = lines[lnum]
|
1022
|
+
break if line.length < 1
|
1023
|
+
@users.push(line)
|
1024
|
+
lnum += 1
|
1025
|
+
end
|
1026
|
+
lnum += 1 if(lines[lnum].length < 1)
|
1027
|
+
@links = []
|
1028
|
+
while(lnum < lines.length)
|
1029
|
+
line = lines[lnum]
|
1030
|
+
break if line.length < 1
|
1031
|
+
@links.push(line) if line.split(/\t/).length == 3
|
1032
|
+
lnum += 1
|
1033
|
+
end
|
1034
|
+
end
|
1035
|
+
end
|
1036
|
+
#:stopdoc:
|
1037
|
+
#
|
1038
|
+
# Module for utility
|
1039
|
+
#
|
1040
|
+
module Utility
|
1041
|
+
public
|
1042
|
+
# Check types of arguments
|
1043
|
+
# `types' specifies a hash object whose keys are arguments and values are class objects.
|
1044
|
+
# If there is a invalid object, an exception is thrown.
|
1045
|
+
def check_types(types)
|
1046
|
+
i = 0
|
1047
|
+
types.each_key do |key|
|
1048
|
+
i += 1
|
1049
|
+
unless key.kind_of?(types[key]) || key == nil
|
1050
|
+
raise ArgumentError::new("Argument#" + i.to_s +
|
1051
|
+
" should be a kind of " + types[key].to_s)
|
1052
|
+
end
|
1053
|
+
end
|
1054
|
+
end
|
1055
|
+
module_function :check_types
|
1056
|
+
# Perform an interaction of a URL.
|
1057
|
+
# `url' specifies a URL.
|
1058
|
+
# `pxhost' specifies the host name of a proxy. If it is `nil', it is not used.
|
1059
|
+
# `pxport' specifies the port number of the proxy.
|
1060
|
+
# `outsec' specifies timeout in seconds. If it is negative, it is not used.
|
1061
|
+
# `reqheads' specifies an array object of extension headers. If it is `nil', it is not used.
|
1062
|
+
# `reqbody' specifies the pointer of the entitiy body of request. If it is `nil', "GET"
|
1063
|
+
# method is used.
|
1064
|
+
# `resheads' specifies an array object into which headers of response is stored. If it is
|
1065
|
+
# `nil' it is not used.
|
1066
|
+
# `resbody' specifies stream object into which the entity body of response is stored. If it
|
1067
|
+
# is `nil', it is not used.
|
1068
|
+
# The return value is the status code of the response or -1 on error.
|
1069
|
+
def shuttle_url(url, pxhost, pxport, outsec, reqheads, reqbody, resheads, resbody)
|
1070
|
+
begin
|
1071
|
+
status = -1
|
1072
|
+
th = Thread::start do
|
1073
|
+
url = URI::parse(url)
|
1074
|
+
url.normalize
|
1075
|
+
Thread::current.exit if url.scheme != "http" || !url.host || url.port < 1
|
1076
|
+
if pxhost
|
1077
|
+
host = pxhost
|
1078
|
+
port = pxport
|
1079
|
+
query = "http://" + url.host + ":" + url.port.to_s + url.path
|
1080
|
+
else
|
1081
|
+
host = url.host
|
1082
|
+
port = url.port
|
1083
|
+
query = url.path
|
1084
|
+
end
|
1085
|
+
query += "?" + url.query if url.query && !reqbody
|
1086
|
+
begin
|
1087
|
+
sock = TCPSocket.open(host, port)
|
1088
|
+
if reqbody
|
1089
|
+
sock.printf("POST " + query + " HTTP/1.0\r\n")
|
1090
|
+
else
|
1091
|
+
sock.printf("GET " + query + " HTTP/1.0\r\n")
|
1092
|
+
end
|
1093
|
+
sock.printf("Host: %s:%d\r\n", url.host, url.port)
|
1094
|
+
sock.printf("Connection: close\r\n")
|
1095
|
+
sock.printf("User-Agent: HyperEstraierForRuby/1.0.0\r\n")
|
1096
|
+
if reqheads
|
1097
|
+
reqheads.each do |line|
|
1098
|
+
sock.printf("%s\r\n", line)
|
1099
|
+
end
|
1100
|
+
end
|
1101
|
+
sock.printf("Content-Length: %d\r\n", reqbody.length) if reqbody
|
1102
|
+
sock.printf("\r\n")
|
1103
|
+
sock.write(reqbody) if reqbody
|
1104
|
+
line = sock.gets.chomp
|
1105
|
+
elems = line.split(/ */)
|
1106
|
+
Thread::current.exit if elems.length < 3 || !(elems[0] =~ /^HTTP/)
|
1107
|
+
status = elems[1].to_i
|
1108
|
+
resheads.push(line) if resheads
|
1109
|
+
begin
|
1110
|
+
line = sock.gets.chomp
|
1111
|
+
resheads.push(line) if resheads
|
1112
|
+
end while line.length > 0
|
1113
|
+
while buf = sock.read(8192)
|
1114
|
+
resbody.write(buf) if resbody
|
1115
|
+
end
|
1116
|
+
ensure
|
1117
|
+
sock.close if sock
|
1118
|
+
end
|
1119
|
+
end
|
1120
|
+
if outsec >= 0
|
1121
|
+
unless th.join(outsec)
|
1122
|
+
th.exit
|
1123
|
+
th.join
|
1124
|
+
return -1
|
1125
|
+
end
|
1126
|
+
else
|
1127
|
+
th.join
|
1128
|
+
end
|
1129
|
+
return status
|
1130
|
+
rescue
|
1131
|
+
return -1
|
1132
|
+
end
|
1133
|
+
end
|
1134
|
+
module_function :shuttle_url
|
1135
|
+
# Serialize a condition object into a query string.
|
1136
|
+
# `cond' specifies a condition object.
|
1137
|
+
# `depth' specifies depth of meta search.
|
1138
|
+
# `wwidth' specifies whole width of a snippet.
|
1139
|
+
# `hwidth' specifies width of strings picked up from the beginning of the text.
|
1140
|
+
# `awidth' specifies width of strings picked up around each highlighted word.
|
1141
|
+
# The return value is the serialized string.
|
1142
|
+
def cond_to_query(cond, depth, wwidth, hwidth, awidth)
|
1143
|
+
buf = StringIO::new
|
1144
|
+
if cond.phrase
|
1145
|
+
buf.write("&") if buf.length > 0
|
1146
|
+
buf.write("phrase=")
|
1147
|
+
buf.write(CGI::escape(cond.phrase))
|
1148
|
+
end
|
1149
|
+
for i in 0...cond.attrs.length
|
1150
|
+
buf.write("&") if buf.length > 0
|
1151
|
+
buf.write("attr" + (i + 1).to_s + "=")
|
1152
|
+
buf.write(CGI::escape(cond.attrs[i]))
|
1153
|
+
end
|
1154
|
+
if cond.order
|
1155
|
+
buf.write("&") if buf.length > 0
|
1156
|
+
buf.write("order=")
|
1157
|
+
buf.write(CGI::escape(cond.order))
|
1158
|
+
end
|
1159
|
+
if cond.max >= 0
|
1160
|
+
buf.write("&") if buf.length > 0
|
1161
|
+
buf.write("max=" + cond.max.to_s)
|
1162
|
+
else
|
1163
|
+
buf.write("&") if buf.length > 0
|
1164
|
+
buf.write("max=" + (1 << 30).to_s)
|
1165
|
+
end
|
1166
|
+
buf.write("&options=" + cond.options.to_s) if cond.options > 0
|
1167
|
+
buf.write("&auxiliary=" + cond.auxiliary.to_s)
|
1168
|
+
if cond.distinct
|
1169
|
+
buf.write("&distinct=")
|
1170
|
+
buf.write(CGI::escape(cond.distinct))
|
1171
|
+
end
|
1172
|
+
buf.write("&depth=" + depth.to_s) if depth > 0
|
1173
|
+
buf.write("&wwidth=" + wwidth.to_s)
|
1174
|
+
buf.write("&hwidth=" + hwidth.to_s)
|
1175
|
+
buf.write("&awidth=" + awidth.to_s)
|
1176
|
+
buf.write("&skip=" + cond.skip.to_s)
|
1177
|
+
buf.write("&mask=" + cond.mask.to_s)
|
1178
|
+
buf.string
|
1179
|
+
end
|
1180
|
+
module_function :cond_to_query
|
1181
|
+
# Encode a byte sequence with Base64 encoding.
|
1182
|
+
# `data' specifyes a string object.
|
1183
|
+
# The return value is the encoded string.
|
1184
|
+
def base_encode(data)
|
1185
|
+
[data].pack("m").gsub(/[ \n]/, "")
|
1186
|
+
end
|
1187
|
+
module_function :base_encode
|
1188
|
+
end
|
1189
|
+
end
|
1190
|
+
|
1191
|
+
|
1192
|
+
|
1193
|
+
# END OF FILE
|