xapian-rack 1.2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,169 @@
1
+ # This file is part of the "Utopia Framework" project, and is licensed under the GNU AGPLv3.
2
+ # Copyright 2010 Samuel Williams. All rights reserved.
3
+ # See <utopia.rb> for licensing details.
4
+
5
+ require 'uri'
6
+ require 'pathname'
7
+
8
+ require 'logger'
9
+
10
+ require 'xapian/indexer'
11
+ require 'xapian/indexer/loaders/http'
12
+ require 'xapian/indexer/extractors/html'
13
+
14
+ module Xapian
15
+ module Rack
16
+
17
+ def self.find(env, query, options = {})
18
+ search = env['xapian.search']
19
+ return search.find(query, options)
20
+ end
21
+
22
+ def self.get(env)
23
+ return env['xapian.search']
24
+ end
25
+
26
+ class RelativeLoader
27
+ def initialize(app, options = {})
28
+ @app = app
29
+
30
+ @logger = options[:logger] || Logger.new($stdout)
31
+ end
32
+
33
+ def call(name, &block)
34
+ # Handle relative URIs
35
+ if URI.parse(name).relative?
36
+ env = {
37
+ 'HTTP_ACCEPT' => '*/*',
38
+ 'QUERY_STRING' => '',
39
+ 'REQUEST_METHOD' => 'GET',
40
+ 'PATH_INFO' => name,
41
+ 'rack.input' => StringIO.new,
42
+ }
43
+
44
+ begin
45
+ status, header, content = @app.call(env)
46
+ rescue
47
+ @logger.error "Error requesting resource #{name}: #{$!}"
48
+ $!.backtrace.each{|line| @logger.error(line)}
49
+
50
+ yield 500, {}, nil
51
+ end
52
+
53
+ body = lambda do
54
+ buffer = ""
55
+ content.each{|segment| buffer += segment}
56
+
57
+ buffer
58
+ end
59
+
60
+ downcase_header = {}
61
+ header.each{|k,v| downcase_header[k.downcase] = v}
62
+
63
+ yield status, downcase_header, body
64
+
65
+ return true
66
+ end
67
+
68
+ return false
69
+ end
70
+ end
71
+
72
+ class Search
73
+ def initialize(app, options = {})
74
+ @app = app
75
+ @database_path = options[:database]
76
+ @database = nil
77
+
78
+ # Setup the controller
79
+ @controller = Xapian::Indexer::Controller.new
80
+
81
+ @controller.loaders << RelativeLoader.new(@app)
82
+ @controller.loaders << Xapian::Indexer::Loaders::HTTP.new
83
+ @controller.extractors['text/html'] = Xapian::Indexer::Extractors::HTML.new
84
+
85
+ # Setup the generator
86
+ @generator = Xapian::TermGenerator.new()
87
+
88
+ @logger = options[:logger] || Logger.new($stderr)
89
+
90
+ unless options[:indexer] == :disabled
91
+ @indexer = Thread.new do
92
+ while true
93
+ begin
94
+ @logger.info "Updating index in background..."
95
+ index(options[:roots], options)
96
+ rescue
97
+ @logger.error "Error while updating index: #{$!}"
98
+ $!.backtrace.each{|line| @logger.error(line)}
99
+ ensure
100
+ @logger.info "Index update finished."
101
+ end
102
+
103
+ sleep options[:refresh] || 3600
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ def find(query, options = {})
110
+ if @database
111
+ @database.reopen
112
+ else
113
+ @database = Xapian::Database.new(@database_path)
114
+ end
115
+
116
+ # Start an enquire session.
117
+ enquire = Xapian::Enquire.new(@database)
118
+
119
+ # Setup the query parser
120
+ qp = Xapian::QueryParser.new()
121
+ qp.database = @database
122
+ query = qp.parse_query(query, options[:flags] || 0)
123
+
124
+ enquire.query = query
125
+
126
+ start = options[:start] || 0
127
+ count = options[:count] || 10
128
+
129
+ matchset = enquire.mset(start, count)
130
+
131
+ return matchset
132
+ end
133
+
134
+ def index(roots, options = {})
135
+ writable_database = Xapian::WritableDatabase.new(@database_path, Xapian::DB_CREATE_OR_OPEN)
136
+
137
+ begin
138
+ spider = Xapian::Indexer::Spider.new(writable_database, @generator, @controller)
139
+
140
+ spider.add(roots)
141
+
142
+ spider.process(options) do |link|
143
+ uri = URI.parse(link)
144
+
145
+ if uri.relative?
146
+ link
147
+ elsif options[:domains] && options[:domains].include?(uri.host)
148
+ link
149
+ else
150
+ nil
151
+ end
152
+ end
153
+
154
+ spider.remove_old!
155
+ ensure
156
+ writable_database.close
157
+ end
158
+ end
159
+
160
+ attr :database
161
+
162
+ def call(env)
163
+ env['xapian.search'] = self
164
+ return @app.call(env)
165
+ end
166
+ end
167
+
168
+ end
169
+ end
@@ -0,0 +1,27 @@
1
+ # Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ module Xapian
17
+ module Rack
18
+ module VERSION #:nodoc:
19
+ MAJOR = 1
20
+ MINOR = 2
21
+ TINY = 3
22
+ REV = 1
23
+
24
+ STRING = [MAJOR, MINOR, TINY, REV].join('.')
25
+ end
26
+ end
27
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xapian-rack
3
+ version: !ruby/object:Gem::Version
4
+ hash: 65
5
+ prerelease: false
6
+ segments:
7
+ - 1
8
+ - 2
9
+ - 3
10
+ - 1
11
+ version: 1.2.3.1
12
+ platform: ruby
13
+ authors:
14
+ - Samuel Williams
15
+ autorequire:
16
+ bindir: bin
17
+ cert_chain: []
18
+
19
+ date: 2010-12-19 00:00:00 +13:00
20
+ default_executable:
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
23
+ name: rack
24
+ prerelease: false
25
+ requirement: &id001 !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ hash: 3
31
+ segments:
32
+ - 0
33
+ version: "0"
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: xapian-indexer
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 3
45
+ segments:
46
+ - 0
47
+ version: "0"
48
+ type: :runtime
49
+ version_requirements: *id002
50
+ description:
51
+ email: samuel.williams@oriontransfer.co.nz
52
+ executables: []
53
+
54
+ extensions: []
55
+
56
+ extra_rdoc_files: []
57
+
58
+ files:
59
+ - lib/xapian/rack/search.rb
60
+ - lib/xapian/rack/version.rb
61
+ has_rdoc: true
62
+ homepage: http://www.oriontransfer.co.nz/software/xapian
63
+ licenses: []
64
+
65
+ post_install_message:
66
+ rdoc_options: []
67
+
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ requirements: []
89
+
90
+ rubyforge_project:
91
+ rubygems_version: 1.3.7
92
+ signing_key:
93
+ specification_version: 3
94
+ summary: Xapian-rack provides indexing and searching integration with Rack.
95
+ test_files: []
96
+