xapian-rack 1.2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/xapian/rack/search.rb +169 -0
- data/lib/xapian/rack/version.rb +27 -0
- metadata +96 -0
@@ -0,0 +1,169 @@
|
|
1
|
+
# This file is part of the "Utopia Framework" project, and is licensed under the GNU AGPLv3.
|
2
|
+
# Copyright 2010 Samuel Williams. All rights reserved.
|
3
|
+
# See <utopia.rb> for licensing details.
|
4
|
+
|
5
|
+
require 'uri'
|
6
|
+
require 'pathname'
|
7
|
+
|
8
|
+
require 'logger'
|
9
|
+
|
10
|
+
require 'xapian/indexer'
|
11
|
+
require 'xapian/indexer/loaders/http'
|
12
|
+
require 'xapian/indexer/extractors/html'
|
13
|
+
|
14
|
+
module Xapian
|
15
|
+
module Rack
|
16
|
+
|
17
|
+
def self.find(env, query, options = {})
|
18
|
+
search = env['xapian.search']
|
19
|
+
return search.find(query, options)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.get(env)
|
23
|
+
return env['xapian.search']
|
24
|
+
end
|
25
|
+
|
26
|
+
class RelativeLoader
|
27
|
+
def initialize(app, options = {})
|
28
|
+
@app = app
|
29
|
+
|
30
|
+
@logger = options[:logger] || Logger.new($stdout)
|
31
|
+
end
|
32
|
+
|
33
|
+
def call(name, &block)
|
34
|
+
# Handle relative URIs
|
35
|
+
if URI.parse(name).relative?
|
36
|
+
env = {
|
37
|
+
'HTTP_ACCEPT' => '*/*',
|
38
|
+
'QUERY_STRING' => '',
|
39
|
+
'REQUEST_METHOD' => 'GET',
|
40
|
+
'PATH_INFO' => name,
|
41
|
+
'rack.input' => StringIO.new,
|
42
|
+
}
|
43
|
+
|
44
|
+
begin
|
45
|
+
status, header, content = @app.call(env)
|
46
|
+
rescue
|
47
|
+
@logger.error "Error requesting resource #{name}: #{$!}"
|
48
|
+
$!.backtrace.each{|line| @logger.error(line)}
|
49
|
+
|
50
|
+
yield 500, {}, nil
|
51
|
+
end
|
52
|
+
|
53
|
+
body = lambda do
|
54
|
+
buffer = ""
|
55
|
+
content.each{|segment| buffer += segment}
|
56
|
+
|
57
|
+
buffer
|
58
|
+
end
|
59
|
+
|
60
|
+
downcase_header = {}
|
61
|
+
header.each{|k,v| downcase_header[k.downcase] = v}
|
62
|
+
|
63
|
+
yield status, downcase_header, body
|
64
|
+
|
65
|
+
return true
|
66
|
+
end
|
67
|
+
|
68
|
+
return false
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
class Search
|
73
|
+
def initialize(app, options = {})
|
74
|
+
@app = app
|
75
|
+
@database_path = options[:database]
|
76
|
+
@database = nil
|
77
|
+
|
78
|
+
# Setup the controller
|
79
|
+
@controller = Xapian::Indexer::Controller.new
|
80
|
+
|
81
|
+
@controller.loaders << RelativeLoader.new(@app)
|
82
|
+
@controller.loaders << Xapian::Indexer::Loaders::HTTP.new
|
83
|
+
@controller.extractors['text/html'] = Xapian::Indexer::Extractors::HTML.new
|
84
|
+
|
85
|
+
# Setup the generator
|
86
|
+
@generator = Xapian::TermGenerator.new()
|
87
|
+
|
88
|
+
@logger = options[:logger] || Logger.new($stderr)
|
89
|
+
|
90
|
+
unless options[:indexer] == :disabled
|
91
|
+
@indexer = Thread.new do
|
92
|
+
while true
|
93
|
+
begin
|
94
|
+
@logger.info "Updating index in background..."
|
95
|
+
index(options[:roots], options)
|
96
|
+
rescue
|
97
|
+
@logger.error "Error while updating index: #{$!}"
|
98
|
+
$!.backtrace.each{|line| @logger.error(line)}
|
99
|
+
ensure
|
100
|
+
@logger.info "Index update finished."
|
101
|
+
end
|
102
|
+
|
103
|
+
sleep options[:refresh] || 3600
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def find(query, options = {})
|
110
|
+
if @database
|
111
|
+
@database.reopen
|
112
|
+
else
|
113
|
+
@database = Xapian::Database.new(@database_path)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Start an enquire session.
|
117
|
+
enquire = Xapian::Enquire.new(@database)
|
118
|
+
|
119
|
+
# Setup the query parser
|
120
|
+
qp = Xapian::QueryParser.new()
|
121
|
+
qp.database = @database
|
122
|
+
query = qp.parse_query(query, options[:flags] || 0)
|
123
|
+
|
124
|
+
enquire.query = query
|
125
|
+
|
126
|
+
start = options[:start] || 0
|
127
|
+
count = options[:count] || 10
|
128
|
+
|
129
|
+
matchset = enquire.mset(start, count)
|
130
|
+
|
131
|
+
return matchset
|
132
|
+
end
|
133
|
+
|
134
|
+
def index(roots, options = {})
|
135
|
+
writable_database = Xapian::WritableDatabase.new(@database_path, Xapian::DB_CREATE_OR_OPEN)
|
136
|
+
|
137
|
+
begin
|
138
|
+
spider = Xapian::Indexer::Spider.new(writable_database, @generator, @controller)
|
139
|
+
|
140
|
+
spider.add(roots)
|
141
|
+
|
142
|
+
spider.process(options) do |link|
|
143
|
+
uri = URI.parse(link)
|
144
|
+
|
145
|
+
if uri.relative?
|
146
|
+
link
|
147
|
+
elsif options[:domains] && options[:domains].include?(uri.host)
|
148
|
+
link
|
149
|
+
else
|
150
|
+
nil
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
spider.remove_old!
|
155
|
+
ensure
|
156
|
+
writable_database.close
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
attr :database
|
161
|
+
|
162
|
+
def call(env)
|
163
|
+
env['xapian.search'] = self
|
164
|
+
return @app.call(env)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
module Xapian
|
17
|
+
module Rack
|
18
|
+
module VERSION #:nodoc:
|
19
|
+
MAJOR = 1
|
20
|
+
MINOR = 2
|
21
|
+
TINY = 3
|
22
|
+
REV = 1
|
23
|
+
|
24
|
+
STRING = [MAJOR, MINOR, TINY, REV].join('.')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xapian-rack
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 65
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 2
|
9
|
+
- 3
|
10
|
+
- 1
|
11
|
+
version: 1.2.3.1
|
12
|
+
platform: ruby
|
13
|
+
authors:
|
14
|
+
- Samuel Williams
|
15
|
+
autorequire:
|
16
|
+
bindir: bin
|
17
|
+
cert_chain: []
|
18
|
+
|
19
|
+
date: 2010-12-19 00:00:00 +13:00
|
20
|
+
default_executable:
|
21
|
+
dependencies:
|
22
|
+
- !ruby/object:Gem::Dependency
|
23
|
+
name: rack
|
24
|
+
prerelease: false
|
25
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ">="
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
hash: 3
|
31
|
+
segments:
|
32
|
+
- 0
|
33
|
+
version: "0"
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: xapian-indexer
|
38
|
+
prerelease: false
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 3
|
45
|
+
segments:
|
46
|
+
- 0
|
47
|
+
version: "0"
|
48
|
+
type: :runtime
|
49
|
+
version_requirements: *id002
|
50
|
+
description:
|
51
|
+
email: samuel.williams@oriontransfer.co.nz
|
52
|
+
executables: []
|
53
|
+
|
54
|
+
extensions: []
|
55
|
+
|
56
|
+
extra_rdoc_files: []
|
57
|
+
|
58
|
+
files:
|
59
|
+
- lib/xapian/rack/search.rb
|
60
|
+
- lib/xapian/rack/version.rb
|
61
|
+
has_rdoc: true
|
62
|
+
homepage: http://www.oriontransfer.co.nz/software/xapian
|
63
|
+
licenses: []
|
64
|
+
|
65
|
+
post_install_message:
|
66
|
+
rdoc_options: []
|
67
|
+
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
hash: 3
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
version: "0"
|
79
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
hash: 3
|
85
|
+
segments:
|
86
|
+
- 0
|
87
|
+
version: "0"
|
88
|
+
requirements: []
|
89
|
+
|
90
|
+
rubyforge_project:
|
91
|
+
rubygems_version: 1.3.7
|
92
|
+
signing_key:
|
93
|
+
specification_version: 3
|
94
|
+
summary: Xapian-rack provides indexing and searching integration with Rack.
|
95
|
+
test_files: []
|
96
|
+
|