xapian-rack 1.2.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,169 @@
1
+ # This file is part of the "Utopia Framework" project, and is licensed under the GNU AGPLv3.
2
+ # Copyright 2010 Samuel Williams. All rights reserved.
3
+ # See <utopia.rb> for licensing details.
4
+
5
+ require 'uri'
6
+ require 'pathname'
7
+
8
+ require 'logger'
9
+
10
+ require 'xapian/indexer'
11
+ require 'xapian/indexer/loaders/http'
12
+ require 'xapian/indexer/extractors/html'
13
+
14
+ module Xapian
15
+ module Rack
16
+
17
+ def self.find(env, query, options = {})
18
+ search = env['xapian.search']
19
+ return search.find(query, options)
20
+ end
21
+
22
+ def self.get(env)
23
+ return env['xapian.search']
24
+ end
25
+
26
+ class RelativeLoader
27
+ def initialize(app, options = {})
28
+ @app = app
29
+
30
+ @logger = options[:logger] || Logger.new($stdout)
31
+ end
32
+
33
+ def call(name, &block)
34
+ # Handle relative URIs
35
+ if URI.parse(name).relative?
36
+ env = {
37
+ 'HTTP_ACCEPT' => '*/*',
38
+ 'QUERY_STRING' => '',
39
+ 'REQUEST_METHOD' => 'GET',
40
+ 'PATH_INFO' => name,
41
+ 'rack.input' => StringIO.new,
42
+ }
43
+
44
+ begin
45
+ status, header, content = @app.call(env)
46
+ rescue
47
+ @logger.error "Error requesting resource #{name}: #{$!}"
48
+ $!.backtrace.each{|line| @logger.error(line)}
49
+
50
+ yield 500, {}, nil
51
+ end
52
+
53
+ body = lambda do
54
+ buffer = ""
55
+ content.each{|segment| buffer += segment}
56
+
57
+ buffer
58
+ end
59
+
60
+ downcase_header = {}
61
+ header.each{|k,v| downcase_header[k.downcase] = v}
62
+
63
+ yield status, downcase_header, body
64
+
65
+ return true
66
+ end
67
+
68
+ return false
69
+ end
70
+ end
71
+
72
+ class Search
73
+ def initialize(app, options = {})
74
+ @app = app
75
+ @database_path = options[:database]
76
+ @database = nil
77
+
78
+ # Setup the controller
79
+ @controller = Xapian::Indexer::Controller.new
80
+
81
+ @controller.loaders << RelativeLoader.new(@app)
82
+ @controller.loaders << Xapian::Indexer::Loaders::HTTP.new
83
+ @controller.extractors['text/html'] = Xapian::Indexer::Extractors::HTML.new
84
+
85
+ # Setup the generator
86
+ @generator = Xapian::TermGenerator.new()
87
+
88
+ @logger = options[:logger] || Logger.new($stderr)
89
+
90
+ unless options[:indexer] == :disabled
91
+ @indexer = Thread.new do
92
+ while true
93
+ begin
94
+ @logger.info "Updating index in background..."
95
+ index(options[:roots], options)
96
+ rescue
97
+ @logger.error "Error while updating index: #{$!}"
98
+ $!.backtrace.each{|line| @logger.error(line)}
99
+ ensure
100
+ @logger.info "Index update finished."
101
+ end
102
+
103
+ sleep options[:refresh] || 3600
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ def find(query, options = {})
110
+ if @database
111
+ @database.reopen
112
+ else
113
+ @database = Xapian::Database.new(@database_path)
114
+ end
115
+
116
+ # Start an enquire session.
117
+ enquire = Xapian::Enquire.new(@database)
118
+
119
+ # Setup the query parser
120
+ qp = Xapian::QueryParser.new()
121
+ qp.database = @database
122
+ query = qp.parse_query(query, options[:flags] || 0)
123
+
124
+ enquire.query = query
125
+
126
+ start = options[:start] || 0
127
+ count = options[:count] || 10
128
+
129
+ matchset = enquire.mset(start, count)
130
+
131
+ return matchset
132
+ end
133
+
134
+ def index(roots, options = {})
135
+ writable_database = Xapian::WritableDatabase.new(@database_path, Xapian::DB_CREATE_OR_OPEN)
136
+
137
+ begin
138
+ spider = Xapian::Indexer::Spider.new(writable_database, @generator, @controller)
139
+
140
+ spider.add(roots)
141
+
142
+ spider.process(options) do |link|
143
+ uri = URI.parse(link)
144
+
145
+ if uri.relative?
146
+ link
147
+ elsif options[:domains] && options[:domains].include?(uri.host)
148
+ link
149
+ else
150
+ nil
151
+ end
152
+ end
153
+
154
+ spider.remove_old!
155
+ ensure
156
+ writable_database.close
157
+ end
158
+ end
159
+
160
+ attr :database
161
+
162
+ def call(env)
163
+ env['xapian.search'] = self
164
+ return @app.call(env)
165
+ end
166
+ end
167
+
168
+ end
169
+ end
@@ -0,0 +1,27 @@
1
+ # Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ module Xapian
17
+ module Rack
18
+ module VERSION #:nodoc:
19
+ MAJOR = 1
20
+ MINOR = 2
21
+ TINY = 3
22
+ REV = 1
23
+
24
+ STRING = [MAJOR, MINOR, TINY, REV].join('.')
25
+ end
26
+ end
27
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xapian-rack
3
+ version: !ruby/object:Gem::Version
4
+ hash: 65
5
+ prerelease: false
6
+ segments:
7
+ - 1
8
+ - 2
9
+ - 3
10
+ - 1
11
+ version: 1.2.3.1
12
+ platform: ruby
13
+ authors:
14
+ - Samuel Williams
15
+ autorequire:
16
+ bindir: bin
17
+ cert_chain: []
18
+
19
+ date: 2010-12-19 00:00:00 +13:00
20
+ default_executable:
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
23
+ name: rack
24
+ prerelease: false
25
+ requirement: &id001 !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ hash: 3
31
+ segments:
32
+ - 0
33
+ version: "0"
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: xapian-indexer
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 3
45
+ segments:
46
+ - 0
47
+ version: "0"
48
+ type: :runtime
49
+ version_requirements: *id002
50
+ description:
51
+ email: samuel.williams@oriontransfer.co.nz
52
+ executables: []
53
+
54
+ extensions: []
55
+
56
+ extra_rdoc_files: []
57
+
58
+ files:
59
+ - lib/xapian/rack/search.rb
60
+ - lib/xapian/rack/version.rb
61
+ has_rdoc: true
62
+ homepage: http://www.oriontransfer.co.nz/software/xapian
63
+ licenses: []
64
+
65
+ post_install_message:
66
+ rdoc_options: []
67
+
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ hash: 3
76
+ segments:
77
+ - 0
78
+ version: "0"
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ requirements: []
89
+
90
+ rubyforge_project:
91
+ rubygems_version: 1.3.7
92
+ signing_key:
93
+ specification_version: 3
94
+ summary: Xapian-rack provides indexing and searching integration with Rack.
95
+ test_files: []
96
+