xapian-indexer 1.2.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/xapian/indexer.rb +12 -0
- data/lib/xapian/indexer/extensions.rb +45 -0
- data/lib/xapian/indexer/extractors/html.rb +106 -0
- data/lib/xapian/indexer/loaders/http.rb +62 -0
- data/lib/xapian/indexer/resource.rb +165 -0
- data/lib/xapian/indexer/spider.rb +182 -0
- data/lib/xapian/indexer/version.rb +27 -0
- metadata +74 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
# Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require 'uri'
|
17
|
+
|
18
|
+
class URI::Generic
|
19
|
+
def absolute_path?
|
20
|
+
path.match('^/')
|
21
|
+
end
|
22
|
+
|
23
|
+
def relative_path?
|
24
|
+
!absolute_path?
|
25
|
+
end
|
26
|
+
|
27
|
+
# Behavior in 1.8.7 seems to be broken...?
|
28
|
+
def merge0(oth)
|
29
|
+
case oth
|
30
|
+
when Generic
|
31
|
+
when String
|
32
|
+
oth = URI.parse(oth)
|
33
|
+
else
|
34
|
+
raise ArgumentError, "bad argument(expected URI object or URI string)"
|
35
|
+
end
|
36
|
+
|
37
|
+
if oth.absolute?
|
38
|
+
return oth, oth
|
39
|
+
else
|
40
|
+
return self.dup, oth
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# puts URI.parse("/bob/dole") + URI.parse("http://www.lucidsystems.org")
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require 'nokogiri'
|
17
|
+
|
18
|
+
module Xapian
|
19
|
+
module Indexer
|
20
|
+
module Extractors
|
21
|
+
# Represents a resource that will be indexed
|
22
|
+
class HTML
|
23
|
+
def initialize(options = {})
|
24
|
+
@options = options
|
25
|
+
|
26
|
+
@logger = options[:logger] || Logger.new($stderr)
|
27
|
+
end
|
28
|
+
|
29
|
+
def call(resource, status, headers, data)
|
30
|
+
html = Nokogiri::HTML.parse(data)
|
31
|
+
result = {}
|
32
|
+
|
33
|
+
# Extract description
|
34
|
+
meta_description = html.css("meta[name='description']").first
|
35
|
+
|
36
|
+
if meta_description
|
37
|
+
result[:description] = meta_description['content']
|
38
|
+
else
|
39
|
+
# Use the first paragraph as a description
|
40
|
+
first_paragraph = html.search("p").first
|
41
|
+
|
42
|
+
if first_paragraph
|
43
|
+
result[:description] = first_paragraph.inner_text
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
base_tag = html.at('html/head/base')
|
48
|
+
if base_tag
|
49
|
+
base = URI.parse(base_tag['href'])
|
50
|
+
else
|
51
|
+
base = URI.parse(resource.name)
|
52
|
+
end
|
53
|
+
|
54
|
+
links = []
|
55
|
+
|
56
|
+
html.css('a').each do |link|
|
57
|
+
href = (link['href'] || "").to_s.gsub(/ /, '%20')
|
58
|
+
|
59
|
+
# No scheme but starts with a '/'
|
60
|
+
#begin
|
61
|
+
links << (base + href)
|
62
|
+
#rescue
|
63
|
+
# $stderr.puts "Could not add link #{href}: #{$!}"
|
64
|
+
#end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Remove any fragment at the end of the URI.
|
68
|
+
links.each{|link| link.fragment = nil}
|
69
|
+
|
70
|
+
# Convert to strings and uniq.
|
71
|
+
result[:links] = links.map{|link| link.to_s}.uniq
|
72
|
+
|
73
|
+
#$stderr.puts "Extracted links = #{result[:links].inspect}"
|
74
|
+
|
75
|
+
# Extract title
|
76
|
+
title_tag = html.at('html/head/title')
|
77
|
+
h1_tag = html.search('h1').first
|
78
|
+
if title_tag
|
79
|
+
result[:title] = title_tag.inner_text
|
80
|
+
elsif h1_tag
|
81
|
+
result[:title] = h1_tag.inner_text
|
82
|
+
end
|
83
|
+
|
84
|
+
# Extract keywords
|
85
|
+
meta_keywords = html.css("meta[name='keyword']").first
|
86
|
+
if meta_keywords
|
87
|
+
result[:keywords] = meta_keywords['content']
|
88
|
+
end
|
89
|
+
|
90
|
+
# Remove junk elements from the html
|
91
|
+
html.search("script").remove
|
92
|
+
html.search("link").remove
|
93
|
+
html.search("meta").remove
|
94
|
+
html.search("style").remove
|
95
|
+
html.search("form").remove
|
96
|
+
html.css('.noindex').remove
|
97
|
+
|
98
|
+
result[:content] = html.at('html/body').inner_text
|
99
|
+
|
100
|
+
return result
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require 'net/http'
|
17
|
+
require 'xapian/indexer/version'
|
18
|
+
|
19
|
+
module Xapian
|
20
|
+
module Indexer
|
21
|
+
|
22
|
+
module Loaders
|
23
|
+
class HTTP
|
24
|
+
UserAgent = "Xapian-Spider #{Xapian::Indexer::VERSION::STRING}"
|
25
|
+
|
26
|
+
def initialize(options = {})
|
27
|
+
@options = options
|
28
|
+
|
29
|
+
@logger = options[:logger] || Logger.new($stderr)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Extract metadata from the document, including :content and :links
|
33
|
+
def call(name, &block)
|
34
|
+
uri = URI.parse(name)
|
35
|
+
|
36
|
+
if uri.absolute?
|
37
|
+
Net::HTTP.start(uri.host, uri.port) do |http|
|
38
|
+
head = http.request_head(uri.path, 'User-Agent' => UserAgent)
|
39
|
+
|
40
|
+
body = lambda do
|
41
|
+
page = http.request_get(uri.path, 'User-Agent' => UserAgent)
|
42
|
+
page.body
|
43
|
+
end
|
44
|
+
|
45
|
+
@logger.info "Loading external URI: #{name.inspect}"
|
46
|
+
|
47
|
+
yield head.code.to_i, head.header, body
|
48
|
+
end
|
49
|
+
|
50
|
+
return true
|
51
|
+
end
|
52
|
+
|
53
|
+
return false
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
@@ -0,0 +1,165 @@
|
|
1
|
+
# Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require 'digest/md5'
|
17
|
+
|
18
|
+
module Xapian
|
19
|
+
module Indexer
|
20
|
+
|
21
|
+
class Controller
|
22
|
+
def initialize(options = {})
|
23
|
+
@extractors = {}
|
24
|
+
@loaders = []
|
25
|
+
|
26
|
+
@logger = options[:logger] || Logger.new($stderr)
|
27
|
+
end
|
28
|
+
|
29
|
+
attr :loaders
|
30
|
+
attr :extractors
|
31
|
+
|
32
|
+
def create(name)
|
33
|
+
Resource.new(name, self)
|
34
|
+
end
|
35
|
+
|
36
|
+
def load(resource, &block)
|
37
|
+
@loaders.each do |loader|
|
38
|
+
loader.call(resource.name) do |status, header, load_body|
|
39
|
+
if status >= 200 && status < 300
|
40
|
+
# Process the page content
|
41
|
+
mime_type = header['content-type'].split(";").first
|
42
|
+
extractor = @extractors[mime_type]
|
43
|
+
|
44
|
+
if extractor
|
45
|
+
body = load_body.call
|
46
|
+
metadata = extractor.call(resource, status, header, body)
|
47
|
+
|
48
|
+
# Load the data into the resource
|
49
|
+
yield status, header, body, metadata
|
50
|
+
|
51
|
+
return true
|
52
|
+
else
|
53
|
+
@logger.warn "Ignoring resource #{resource.name} because content-type #{mime_type} is not supported."
|
54
|
+
return false
|
55
|
+
end
|
56
|
+
elsif status >= 300 && status < 400
|
57
|
+
# Process the redirect
|
58
|
+
location = URI.parse(resource.name) + header['location']
|
59
|
+
|
60
|
+
metadata = {
|
61
|
+
:links => [location.to_s]
|
62
|
+
}
|
63
|
+
|
64
|
+
# This resource is not indexable, using nil for body
|
65
|
+
yield status, header, nil, metadata
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
return false
|
71
|
+
end
|
72
|
+
|
73
|
+
def save(resource)
|
74
|
+
YAML::dump(resource.to_hash)
|
75
|
+
end
|
76
|
+
|
77
|
+
def recreate(data)
|
78
|
+
values = YAML::load(data)
|
79
|
+
Resource.new(values[:name], self, values)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Represents a resource that will be indexed
|
84
|
+
class Resource
|
85
|
+
def initialize(name, controller, values = {})
|
86
|
+
@name = name
|
87
|
+
@controller = controller
|
88
|
+
|
89
|
+
@fetched_on = values[:fetched_on]
|
90
|
+
@status = values[:status]
|
91
|
+
@header = values[:header]
|
92
|
+
@body = values[:body]
|
93
|
+
@metadata = values[:metadata]
|
94
|
+
end
|
95
|
+
|
96
|
+
attr :name
|
97
|
+
attr :status
|
98
|
+
attr :header
|
99
|
+
attr :body
|
100
|
+
attr :metadata
|
101
|
+
|
102
|
+
def to_hash
|
103
|
+
{
|
104
|
+
:fetched_on => @fetched_on,
|
105
|
+
:name => @name,
|
106
|
+
:status => @status,
|
107
|
+
:header => @header,
|
108
|
+
:body => @body,
|
109
|
+
:metadata => @metadata
|
110
|
+
}
|
111
|
+
end
|
112
|
+
|
113
|
+
# The data that will be indexed
|
114
|
+
def content
|
115
|
+
[@metadata[:content] || @body, @metadata[:title], @metadata[:description], @metadata[:keywords]].compact.join(" ")
|
116
|
+
end
|
117
|
+
|
118
|
+
def links
|
119
|
+
@metadata[:links] if @metadata
|
120
|
+
end
|
121
|
+
|
122
|
+
def fresh?(at = Time.now)
|
123
|
+
cache_control = @header['cache-control'] || ""
|
124
|
+
fetched_age = @header['age'] || ""
|
125
|
+
max_age = 3600
|
126
|
+
|
127
|
+
if cache_control.match(/max-age=([0-9]+)/)
|
128
|
+
max_age = $1.to_i
|
129
|
+
|
130
|
+
if fetched_age.match(/([0-9]+)/)
|
131
|
+
max_age -= $1.to_i
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
age = at - @fetched_on
|
136
|
+
|
137
|
+
# If the page is younger than the max_age the page can be considered fresh.
|
138
|
+
return age < max_age
|
139
|
+
end
|
140
|
+
|
141
|
+
def fetch!
|
142
|
+
@controller.load(self) do |status, header, body, metadata|
|
143
|
+
@fetched_on = Time.now
|
144
|
+
@status = status
|
145
|
+
@header = header
|
146
|
+
@body = body
|
147
|
+
@metadata = metadata
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def fetched?
|
152
|
+
@fetched_on != nil
|
153
|
+
end
|
154
|
+
|
155
|
+
def content?
|
156
|
+
@body != nil
|
157
|
+
end
|
158
|
+
|
159
|
+
def name_digest
|
160
|
+
"Q" + Digest::MD5.hexdigest(@name)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
# Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require 'xapian'
|
17
|
+
require 'set'
|
18
|
+
|
19
|
+
module Xapian
|
20
|
+
module Indexer
|
21
|
+
# Represents a process which consumes resources into the database
|
22
|
+
# and follows links to related resources
|
23
|
+
class Spider
|
24
|
+
# database = Xapian::Database.new(ARGV[0])
|
25
|
+
def initialize(database, generator, controller, options = {})
|
26
|
+
@database = database
|
27
|
+
@generator = generator
|
28
|
+
@controller = controller
|
29
|
+
|
30
|
+
@links = []
|
31
|
+
@touched = Set.new
|
32
|
+
|
33
|
+
@logger = options[:logger] || Logger.new($stdout)
|
34
|
+
end
|
35
|
+
|
36
|
+
attr :resources
|
37
|
+
|
38
|
+
def add(root)
|
39
|
+
case root
|
40
|
+
when String
|
41
|
+
@links << root
|
42
|
+
when Array
|
43
|
+
@links += root
|
44
|
+
else
|
45
|
+
@logger.error "Could not add roots #{root.inspect}!"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class Fetch
|
50
|
+
def initialize(database, controller, link)
|
51
|
+
@database = database
|
52
|
+
@controller = controller
|
53
|
+
|
54
|
+
@document = false
|
55
|
+
@current_resource = controller.create(link)
|
56
|
+
@archived_resource = false
|
57
|
+
end
|
58
|
+
|
59
|
+
attr :database
|
60
|
+
attr :controller
|
61
|
+
attr :current_resource
|
62
|
+
|
63
|
+
def document
|
64
|
+
if @document === false
|
65
|
+
postlist = @database.postlist(@current_resource.name_digest)
|
66
|
+
|
67
|
+
if postlist.size > 0
|
68
|
+
@document = @database.document(postlist[0].docid)
|
69
|
+
else
|
70
|
+
@document = nil
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
return @document
|
75
|
+
end
|
76
|
+
|
77
|
+
def archived_resource
|
78
|
+
if @archived_resource === false
|
79
|
+
if document
|
80
|
+
@archived_resource = @controller.recreate(document.data)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
return @archived_resource
|
85
|
+
end
|
86
|
+
|
87
|
+
def links
|
88
|
+
#$stderr.puts "current_resource.links = #{@current_resource.links.inspect}" if @current_resource
|
89
|
+
#$stderr.puts "archived_resource.links = #{archived_resource.links.inspect}" if archived_resource
|
90
|
+
|
91
|
+
if @current_resource.fetched?
|
92
|
+
@current_resource.links
|
93
|
+
elsif archived_resource
|
94
|
+
archived_resource.links
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def process(options = {}, &block)
|
100
|
+
count = 0
|
101
|
+
depth = 0
|
102
|
+
|
103
|
+
until @links.empty?
|
104
|
+
new_links = []
|
105
|
+
|
106
|
+
@links.each do |link|
|
107
|
+
# Mark and sweep - don't review the same resource twice!
|
108
|
+
next if @touched.include?(link)
|
109
|
+
@touched << link
|
110
|
+
|
111
|
+
# Create a new fetch from the database...
|
112
|
+
fetch = Fetch.new(@database, @controller, link)
|
113
|
+
resource = fetch.current_resource
|
114
|
+
|
115
|
+
# Does it already exist in the current database (and fresh?)
|
116
|
+
unless fetch.archived_resource && fetch.archived_resource.fresh?
|
117
|
+
# Fetch the resource and add it to the index
|
118
|
+
begin
|
119
|
+
@logger.info "Indexing #{resource.name}..."
|
120
|
+
resource.fetch!
|
121
|
+
rescue
|
122
|
+
@logger.error "Could not fetch resource #{resource.name}: #{$!}!"
|
123
|
+
$!.backtrace.each{|line| @logger.error(line)}
|
124
|
+
end
|
125
|
+
|
126
|
+
# Did we fetch a resource and was it indexable?
|
127
|
+
if resource.fetched?
|
128
|
+
if resource.content?
|
129
|
+
doc = Xapian::Document.new
|
130
|
+
doc.data = @controller.save(resource)
|
131
|
+
doc.add_term(resource.name_digest)
|
132
|
+
|
133
|
+
@generator.document = doc
|
134
|
+
@generator.index_text(resource.content)
|
135
|
+
@database.replace_document(resource.name_digest, doc)
|
136
|
+
else
|
137
|
+
@logger.warn "Resource was not indexable #{resource.name}!"
|
138
|
+
@logger.warn "Links = #{(fetch.links || []).map(&block).compact.inspect}"
|
139
|
+
end
|
140
|
+
else
|
141
|
+
@logger.warn "Could not fetch resource #{resource.name}!"
|
142
|
+
end
|
143
|
+
else
|
144
|
+
@logger.info "Still fresh #{resource.name}..."
|
145
|
+
end
|
146
|
+
|
147
|
+
new_links += (fetch.links || []).map(&block).compact
|
148
|
+
|
149
|
+
count += 1
|
150
|
+
|
151
|
+
if options[:count] && count > options[:count]
|
152
|
+
# If we have to leave before finishing this breadth...
|
153
|
+
@links += new_links
|
154
|
+
return count
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
@links = new_links
|
159
|
+
|
160
|
+
depth += 1
|
161
|
+
|
162
|
+
return count if options[:depth] && depth > options[:depth]
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def remove_old!
|
167
|
+
postlist = @database.postlist("")
|
168
|
+
|
169
|
+
postlist.each do |post|
|
170
|
+
document = @database.document(post.docid)
|
171
|
+
resource = @controller.recreate(document.data)
|
172
|
+
|
173
|
+
unless resource.fresh?
|
174
|
+
@logger.info "Removing expired index for #{resource.name}."
|
175
|
+
@database.delete_document(post.docid)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
end
|
182
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# Copyright (c) 2010 Samuel Williams. Released under the GNU GPLv3.
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
module Xapian
|
17
|
+
module Indexer
|
18
|
+
module VERSION #:nodoc:
|
19
|
+
MAJOR = 1
|
20
|
+
MINOR = 2
|
21
|
+
TINY = 3
|
22
|
+
REV = 1
|
23
|
+
|
24
|
+
STRING = [MAJOR, MINOR, TINY, REV].join('.')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xapian-indexer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 65
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 2
|
9
|
+
- 3
|
10
|
+
- 1
|
11
|
+
version: 1.2.3.1
|
12
|
+
platform: ruby
|
13
|
+
authors:
|
14
|
+
- Samuel Williams
|
15
|
+
autorequire:
|
16
|
+
bindir: bin
|
17
|
+
cert_chain: []
|
18
|
+
|
19
|
+
date: 2010-12-19 00:00:00 +13:00
|
20
|
+
default_executable:
|
21
|
+
dependencies: []
|
22
|
+
|
23
|
+
description:
|
24
|
+
email: samuel.williams@oriontransfer.co.nz
|
25
|
+
executables: []
|
26
|
+
|
27
|
+
extensions: []
|
28
|
+
|
29
|
+
extra_rdoc_files: []
|
30
|
+
|
31
|
+
files:
|
32
|
+
- lib/xapian/indexer/extensions.rb
|
33
|
+
- lib/xapian/indexer/extractors/html.rb
|
34
|
+
- lib/xapian/indexer/loaders/http.rb
|
35
|
+
- lib/xapian/indexer/resource.rb
|
36
|
+
- lib/xapian/indexer/spider.rb
|
37
|
+
- lib/xapian/indexer/version.rb
|
38
|
+
- lib/xapian/indexer.rb
|
39
|
+
has_rdoc: true
|
40
|
+
homepage: http://www.oriontransfer.co.nz/software/xapian
|
41
|
+
licenses: []
|
42
|
+
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
hash: 3
|
54
|
+
segments:
|
55
|
+
- 0
|
56
|
+
version: "0"
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
hash: 3
|
63
|
+
segments:
|
64
|
+
- 0
|
65
|
+
version: "0"
|
66
|
+
requirements: []
|
67
|
+
|
68
|
+
rubyforge_project:
|
69
|
+
rubygems_version: 1.3.7
|
70
|
+
signing_key:
|
71
|
+
specification_version: 3
|
72
|
+
summary: Xapian is a framework for fast full-text searching.
|
73
|
+
test_files: []
|
74
|
+
|