lucene 0.5.0.beta.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +147 -0
- data/CONTRIBUTORS +17 -0
- data/Gemfile +9 -0
- data/README.rdoc +274 -0
- data/lib/lucene.rb +15 -0
- data/lib/lucene/config.rb +145 -0
- data/lib/lucene/document.rb +96 -0
- data/lib/lucene/field_info.rb +144 -0
- data/lib/lucene/hits.rb +54 -0
- data/lib/lucene/index.rb +267 -0
- data/lib/lucene/index_info.rb +146 -0
- data/lib/lucene/index_searcher.rb +157 -0
- data/lib/lucene/jars.rb +5 -0
- data/lib/lucene/jars/lucene-core-2.9.1.jar +0 -0
- data/lib/lucene/query_dsl.rb +135 -0
- data/lib/lucene/transaction.rb +117 -0
- data/lib/lucene/version.rb +3 -0
- data/lucene.gemspec +23 -0
- metadata +93 -0
data/lib/lucene.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'logger'
|
2
|
+
$LUCENE_LOGGER = Logger.new(STDOUT)
|
3
|
+
$LUCENE_LOGGER.level = Logger::WARN
|
4
|
+
|
5
|
+
require 'lucene/config'
|
6
|
+
require 'lucene/document'
|
7
|
+
require 'lucene/field_info'
|
8
|
+
require 'lucene/hits'
|
9
|
+
require 'lucene/index'
|
10
|
+
require 'lucene/index_info'
|
11
|
+
require 'lucene/index_searcher'
|
12
|
+
require 'lucene/jars'
|
13
|
+
require 'lucene/query_dsl'
|
14
|
+
require 'lucene/transaction'
|
15
|
+
|
@@ -0,0 +1,145 @@
|
|
1
|
+
|
2
|
+
module Lucene
|
3
|
+
|
4
|
+
|
5
|
+
#
|
6
|
+
# Keeps configuration for lucene.
|
7
|
+
# Contains both common configuration for all lucene indexes as well
|
8
|
+
# as specific configuration for each index (TODO).
|
9
|
+
# This code is copied from merb-core/config.rb.
|
10
|
+
#
|
11
|
+
# Contains three default configurations (Config.defaults)
|
12
|
+
# * :store_on_file:: default false, which will only keep the index in memory
|
13
|
+
# * :id_field:: default :id
|
14
|
+
# * :storage_path:: where the index is kept on file system if stored as a file (instead of just in memory)
|
15
|
+
#
|
16
|
+
class Config
|
17
|
+
class << self
|
18
|
+
# Returns the hash of default config values for lucene.
|
19
|
+
#
|
20
|
+
# ==== Returns
|
21
|
+
# Hash:: The defaults for the config.
|
22
|
+
#
|
23
|
+
def defaults
|
24
|
+
@defaults ||= {
|
25
|
+
:store_on_file => false,
|
26
|
+
:id_field => :id,
|
27
|
+
:storage_path => nil
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
# Yields the configuration.
|
33
|
+
#
|
34
|
+
# ==== Block parameters
|
35
|
+
# c<Hash>:: The configuration parameters.
|
36
|
+
#
|
37
|
+
# ==== Examples
|
38
|
+
# Lucene::Config.use do |config|
|
39
|
+
# config[:in_memory] = true
|
40
|
+
# end
|
41
|
+
#
|
42
|
+
# ==== Returns
|
43
|
+
# nil
|
44
|
+
#
|
45
|
+
def use
|
46
|
+
@configuration ||= {}
|
47
|
+
yield @configuration
|
48
|
+
nil
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
# Set the value of a config entry.
|
53
|
+
#
|
54
|
+
# ==== Parameters
|
55
|
+
# key<Object>:: The key to set the parameter for.
|
56
|
+
# val<Object>:: The value of the parameter.
|
57
|
+
#
|
58
|
+
def []=(key, val)
|
59
|
+
(@configuration ||= setup)[key] = val
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
# Gets the the value of a config entry
|
64
|
+
#
|
65
|
+
# ==== Parameters
|
66
|
+
# key<Object>:: The key of the config entry value we want
|
67
|
+
#
|
68
|
+
def [](key)
|
69
|
+
(@configuration ||= setup)[key]
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
# Remove the value of a config entry.
|
74
|
+
#
|
75
|
+
# ==== Parameters
|
76
|
+
# key<Object>:: The key of the parameter to delete.
|
77
|
+
#
|
78
|
+
# ==== Returns
|
79
|
+
# Object:: The value of the removed entry.
|
80
|
+
#
|
81
|
+
def delete(key)
|
82
|
+
@configuration.delete(key)
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
# Remove all configuration. This can be useful for testing purpose.
|
87
|
+
#
|
88
|
+
#
|
89
|
+
# ==== Returns
|
90
|
+
# nil
|
91
|
+
#
|
92
|
+
def delete_all
|
93
|
+
@configuration = nil
|
94
|
+
IndexInfo.delete_all
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
# Retrieve the value of a config entry, returning the provided default if the key is not present
|
99
|
+
#
|
100
|
+
# ==== Parameters
|
101
|
+
# key<Object>:: The key to retrieve the parameter for.
|
102
|
+
# default<Object>::
|
103
|
+
# The default value to return if the parameter is not set.
|
104
|
+
#
|
105
|
+
# ==== Returns
|
106
|
+
# Object:: The value of the configuration parameter or the default.
|
107
|
+
#
|
108
|
+
def fetch(key, default)
|
109
|
+
@configuration.fetch(key, default)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Sets up the configuration
|
113
|
+
#
|
114
|
+
# ==== Returns
|
115
|
+
# The configuration as a hash.
|
116
|
+
#
|
117
|
+
def setup()
|
118
|
+
@configuration = {}
|
119
|
+
@configuration.merge!(defaults)
|
120
|
+
@configuration
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
# Returns the configuration as a hash.
|
125
|
+
#
|
126
|
+
# ==== Returns
|
127
|
+
# Hash:: The config as a hash.
|
128
|
+
#
|
129
|
+
def to_hash
|
130
|
+
@configuration
|
131
|
+
end
|
132
|
+
|
133
|
+
# Returns the config as YAML.
|
134
|
+
#
|
135
|
+
# ==== Returns
|
136
|
+
# String:: The config as YAML.
|
137
|
+
#
|
138
|
+
def to_yaml
|
139
|
+
require "yaml"
|
140
|
+
@configuration.to_yaml
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module Lucene
|
2
|
+
|
3
|
+
#
|
4
|
+
# A document is like a record or row in a relationship database.
|
5
|
+
# Contains the field infos which can be used for type conversions or
|
6
|
+
# specifying if the field should be stored or only searchable.
|
7
|
+
#
|
8
|
+
class Document
|
9
|
+
|
10
|
+
attr_reader :id_field, :field_infos, :props
|
11
|
+
|
12
|
+
def initialize(field_infos, props = {})
|
13
|
+
@id_field = field_infos.id_field
|
14
|
+
@field_infos = field_infos
|
15
|
+
|
16
|
+
@props = {}
|
17
|
+
props.each_pair do |key,value|
|
18
|
+
@props[key] = field_infos[key].convert_to_ruby(value)
|
19
|
+
$LUCENE_LOGGER.debug{"FieldInfo #{key} type: #{field_infos[key][:type]}"}
|
20
|
+
$LUCENE_LOGGER.debug{"Converted #{key} '#{value}' type: '#{value.class.to_s}' to '#{@props[key]}' type: '#{@props[key].class.to_s}'"}
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def [](key)
|
25
|
+
@props[key]
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Convert a java Document to a ruby Lucene::Document
|
30
|
+
#
|
31
|
+
def self.convert(field_infos, java_doc)
|
32
|
+
fields = {}
|
33
|
+
field_infos.each_pair do |key, field|
|
34
|
+
next unless field.store?
|
35
|
+
raise StandardError.new("expected field '#{key.to_s}' to exist in document") if java_doc.getField(key.to_s).nil?
|
36
|
+
value = java_doc.getField(key.to_s).stringValue
|
37
|
+
fields.merge!({key => value})
|
38
|
+
end
|
39
|
+
Document.new(field_infos, fields)
|
40
|
+
end
|
41
|
+
|
42
|
+
def id
|
43
|
+
raise IdFieldMissingException.new("Missing id field: '#{@id_field}'") if self[@id_field].nil?
|
44
|
+
@props[@id_field]
|
45
|
+
end
|
46
|
+
|
47
|
+
def eql?(other)
|
48
|
+
return false unless other.is_a? Document
|
49
|
+
return id == other.id
|
50
|
+
end
|
51
|
+
|
52
|
+
def ==(other)
|
53
|
+
eql?(other)
|
54
|
+
end
|
55
|
+
|
56
|
+
def hash
|
57
|
+
id.hash
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# removes the document and adds it again
|
62
|
+
#
|
63
|
+
def update(index_writer)
|
64
|
+
index_writer.updateDocument(java_key_term, java_document)
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
def java_key_term
|
69
|
+
org.apache.lucene.index.Term.new(@id_field.to_s, id.to_s)
|
70
|
+
end
|
71
|
+
|
72
|
+
def java_document
|
73
|
+
java_doc = org.apache.lucene.document.Document.new
|
74
|
+
@props.each_pair do |key,value|
|
75
|
+
field_info = @field_infos[key]
|
76
|
+
# TODO value could be an array if value.kind_of? Enumerable
|
77
|
+
if (value.kind_of?(Array))
|
78
|
+
value.each do |v|
|
79
|
+
field = field_info.java_field(key,v)
|
80
|
+
java_doc.add(field) unless field.nil?
|
81
|
+
end
|
82
|
+
else
|
83
|
+
field = field_info.java_field(key,value)
|
84
|
+
java_doc.add(field) unless field.nil?
|
85
|
+
end
|
86
|
+
end
|
87
|
+
java_doc
|
88
|
+
end
|
89
|
+
|
90
|
+
def to_s
|
91
|
+
p = ""
|
92
|
+
@props.each_pair { |key,value| p << "'#{key}' = '#{value}' " }
|
93
|
+
"Document [#@id_field='#{self[@id_field]}', #{p}]"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
module Lucene
|
4
|
+
class ConversionNotSupportedException < StandardError; end
|
5
|
+
|
6
|
+
class FieldInfo
|
7
|
+
DEFAULTS = {:store => false, :type => String, :analyzer => :standard}.freeze
|
8
|
+
|
9
|
+
def initialize(values = {})
|
10
|
+
@info = DEFAULTS.dup
|
11
|
+
@info.merge! values
|
12
|
+
$LUCENE_LOGGER.debug{"new FieldInfo: #{@info.inspect}"}
|
13
|
+
end
|
14
|
+
|
15
|
+
def dup
|
16
|
+
FieldInfo.new(@info)
|
17
|
+
end
|
18
|
+
|
19
|
+
def [](key)
|
20
|
+
@info[key]
|
21
|
+
end
|
22
|
+
|
23
|
+
def []=(key,value)
|
24
|
+
@info[key] = value
|
25
|
+
end
|
26
|
+
|
27
|
+
def java_field(key, value)
|
28
|
+
# convert the ruby value to a string that lucene can handle
|
29
|
+
cvalue = convert_to_lucene(value)
|
30
|
+
|
31
|
+
# check if this field should be indexed
|
32
|
+
return nil if cvalue.nil?
|
33
|
+
|
34
|
+
# decide if the field should be stored in the lucene index or not
|
35
|
+
store = store? ? org.apache.lucene.document.Field::Store::YES : org.apache.lucene.document.Field::Store::NO
|
36
|
+
|
37
|
+
# decide if it should be tokenized/analyzed by lucene
|
38
|
+
token_type = tokenized? ? org.apache.lucene.document.Field::Index::ANALYZED : org.apache.lucene.document.Field::Index::NOT_ANALYZED
|
39
|
+
$LUCENE_LOGGER.debug{"java_field store=#{store} key='#{key.to_s}' value='#{cvalue}' token_type=#{token_type}"}
|
40
|
+
|
41
|
+
# create the new Field
|
42
|
+
org.apache.lucene.document.Field.new(key.to_s, cvalue, store, token_type ) #org.apache.lucene.document.Field::Index::NO_NORMS)
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
def convert_to_ruby(value)
|
47
|
+
if (value.kind_of?(Array))
|
48
|
+
value.collect{|v| convert_to_ruby(v)}
|
49
|
+
else case @info[:type].to_s
|
50
|
+
when NilClass.to_s then "" # TODO, should we accept nil values in indexes ?
|
51
|
+
when String.to_s then value.to_s
|
52
|
+
when Fixnum.to_s then value.to_i
|
53
|
+
when Float.to_s then value.to_f
|
54
|
+
when Date.to_s
|
55
|
+
return value if value.kind_of? Date
|
56
|
+
return nil if value.nil?
|
57
|
+
year = value[0..3].to_i
|
58
|
+
month = value[4..5].to_i
|
59
|
+
day = value[6..7].to_i
|
60
|
+
Date.new year,month,day
|
61
|
+
when DateTime.to_s
|
62
|
+
return value if value.kind_of? DateTime
|
63
|
+
return nil if value.nil?
|
64
|
+
year = value[0..3].to_i
|
65
|
+
month = value[4..5].to_i
|
66
|
+
day = value[6..7].to_i
|
67
|
+
hour = value[8..9].to_i
|
68
|
+
min = value[10..11].to_i
|
69
|
+
sec = value[12..13].to_i
|
70
|
+
DateTime.civil(year,month,day,hour,min,sec)
|
71
|
+
else
|
72
|
+
raise ConversionNotSupportedException.new("Can't convert key '#{value}' of with type '#{@info[:type].class.to_s}'")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def convert_to_lucene(value)
|
78
|
+
if (value.kind_of?(Array))
|
79
|
+
value.collect{|v| convert_to_lucene(v)}
|
80
|
+
elsif value.nil?
|
81
|
+
value
|
82
|
+
else
|
83
|
+
case @info[:type].to_s # otherwise it will match Class
|
84
|
+
when Fixnum.to_s then sprintf('%011d',value) # TODO: configurable
|
85
|
+
when Float.to_s then sprintf('%024.12f', value) # TODO: configurable
|
86
|
+
when Bignum.to_s then sprintf('%024d, value')
|
87
|
+
when Date.to_s
|
88
|
+
t = Time.utc(value.year, value.month, value.day)
|
89
|
+
d = t.to_i * 1000
|
90
|
+
org.apache.lucene.document.DateTools.timeToString(d,org.apache.lucene.document.DateTools::Resolution::DAY )
|
91
|
+
when DateTime.to_s
|
92
|
+
# only utc times are supported
|
93
|
+
t = Time.utc(value.year, value.month, value.day, value.hour, value.min, value.sec)
|
94
|
+
d = t.to_i * 1000
|
95
|
+
org.apache.lucene.document.DateTools.timeToString(d,org.apache.lucene.document.DateTools::Resolution::SECOND )
|
96
|
+
else value.to_s
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def convert_to_query(key,value)
|
102
|
+
if (value.kind_of? Range)
|
103
|
+
first_value = convert_to_lucene(value.first)
|
104
|
+
last_value = convert_to_lucene(value.last)
|
105
|
+
first = org.apache.lucene.index.Term.new(key.to_s, first_value)
|
106
|
+
last = org.apache.lucene.index.Term.new(key.to_s, last_value)
|
107
|
+
$LUCENE_LOGGER.debug{"convert_to_query: Range key '#{key.to_s}' #{first_value}' to '#{last_value}'"}
|
108
|
+
org.apache.lucene.search.RangeQuery.new(first, last, !value.exclude_end?)
|
109
|
+
elsif
|
110
|
+
converted_value = convert_to_lucene(value)
|
111
|
+
term = org.apache.lucene.index.Term.new(key.to_s, converted_value)
|
112
|
+
org.apache.lucene.search.TermQuery.new(term)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def tokenized?
|
117
|
+
@info[:tokenized]
|
118
|
+
end
|
119
|
+
|
120
|
+
def store?
|
121
|
+
@info[:store]
|
122
|
+
end
|
123
|
+
|
124
|
+
def eql?(other)
|
125
|
+
return false unless other.kind_of?(FieldInfo)
|
126
|
+
@info.each_pair do |key,value|
|
127
|
+
return false if other[key] != value
|
128
|
+
end
|
129
|
+
return true
|
130
|
+
end
|
131
|
+
|
132
|
+
def ==(other)
|
133
|
+
eql? other
|
134
|
+
end
|
135
|
+
|
136
|
+
def to_s
|
137
|
+
infos = @info.keys.inject(""){|s, key| s << "#{key}=#{@info[key]} "}
|
138
|
+
"FieldInfo(#{self.object_id.to_s}) [#{infos}]"
|
139
|
+
end
|
140
|
+
|
141
|
+
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
data/lib/lucene/hits.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
module Lucene
|
2
|
+
|
3
|
+
|
4
|
+
#
|
5
|
+
# Contains the result as a collection of Documents from a lucene query.
|
6
|
+
# Is a wrapper for the Java org.apache.lucene.search.Hits class
|
7
|
+
#
|
8
|
+
class Hits
|
9
|
+
include Enumerable
|
10
|
+
|
11
|
+
def initialize(field_infos, hits)
|
12
|
+
@hits = hits
|
13
|
+
@field_infos = field_infos
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
#
|
18
|
+
# Returns the n:th hit document.
|
19
|
+
#
|
20
|
+
def [](n)
|
21
|
+
doc = @hits.doc(n)
|
22
|
+
Document.convert(@field_infos, doc)
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
#
|
27
|
+
# Returns true if there are no hits
|
28
|
+
#
|
29
|
+
def empty?
|
30
|
+
@hits.length == 0
|
31
|
+
end
|
32
|
+
|
33
|
+
def each
|
34
|
+
iter = @hits.iterator
|
35
|
+
|
36
|
+
while (iter.hasNext && hit = iter.next)
|
37
|
+
yield Document.convert(@field_infos, hit.getDocument)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
#
|
43
|
+
# The number of documents the query gave.
|
44
|
+
#
|
45
|
+
def size
|
46
|
+
@hits.length
|
47
|
+
end
|
48
|
+
|
49
|
+
def to_s
|
50
|
+
"Hits [size=#{size}]"
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|