lucene 0.5.0.beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +147 -0
- data/CONTRIBUTORS +17 -0
- data/Gemfile +9 -0
- data/README.rdoc +274 -0
- data/lib/lucene.rb +15 -0
- data/lib/lucene/config.rb +145 -0
- data/lib/lucene/document.rb +96 -0
- data/lib/lucene/field_info.rb +144 -0
- data/lib/lucene/hits.rb +54 -0
- data/lib/lucene/index.rb +267 -0
- data/lib/lucene/index_info.rb +146 -0
- data/lib/lucene/index_searcher.rb +157 -0
- data/lib/lucene/jars.rb +5 -0
- data/lib/lucene/jars/lucene-core-2.9.1.jar +0 -0
- data/lib/lucene/query_dsl.rb +135 -0
- data/lib/lucene/transaction.rb +117 -0
- data/lib/lucene/version.rb +3 -0
- data/lucene.gemspec +23 -0
- metadata +93 -0
data/lib/lucene.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'logger'
|
2
|
+
$LUCENE_LOGGER = Logger.new(STDOUT)
|
3
|
+
$LUCENE_LOGGER.level = Logger::WARN
|
4
|
+
|
5
|
+
require 'lucene/config'
|
6
|
+
require 'lucene/document'
|
7
|
+
require 'lucene/field_info'
|
8
|
+
require 'lucene/hits'
|
9
|
+
require 'lucene/index'
|
10
|
+
require 'lucene/index_info'
|
11
|
+
require 'lucene/index_searcher'
|
12
|
+
require 'lucene/jars'
|
13
|
+
require 'lucene/query_dsl'
|
14
|
+
require 'lucene/transaction'
|
15
|
+
|
@@ -0,0 +1,145 @@
|
|
1
|
+
|
2
|
+
module Lucene
|
3
|
+
|
4
|
+
|
5
|
+
#
|
6
|
+
# Keeps configuration for lucene.
|
7
|
+
# Contains both common configuration for all lucene indexes as well
|
8
|
+
# as specific configuration for each index (TODO).
|
9
|
+
# This code is copied from merb-core/config.rb.
|
10
|
+
#
|
11
|
+
# Contains three default configurations (Config.defaults)
|
12
|
+
# * :store_on_file:: default false, which will only keep the index in memory
|
13
|
+
# * :id_field:: default :id
|
14
|
+
# * :storage_path:: where the index is kept on file system if stored as a file (instead of just in memory)
|
15
|
+
#
|
16
|
+
class Config
|
17
|
+
class << self
|
18
|
+
# Returns the hash of default config values for lucene.
|
19
|
+
#
|
20
|
+
# ==== Returns
|
21
|
+
# Hash:: The defaults for the config.
|
22
|
+
#
|
23
|
+
def defaults
|
24
|
+
@defaults ||= {
|
25
|
+
:store_on_file => false,
|
26
|
+
:id_field => :id,
|
27
|
+
:storage_path => nil
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
# Yields the configuration.
|
33
|
+
#
|
34
|
+
# ==== Block parameters
|
35
|
+
# c<Hash>:: The configuration parameters.
|
36
|
+
#
|
37
|
+
# ==== Examples
|
38
|
+
# Lucene::Config.use do |config|
|
39
|
+
# config[:in_memory] = true
|
40
|
+
# end
|
41
|
+
#
|
42
|
+
# ==== Returns
|
43
|
+
# nil
|
44
|
+
#
|
45
|
+
def use
|
46
|
+
@configuration ||= {}
|
47
|
+
yield @configuration
|
48
|
+
nil
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
# Set the value of a config entry.
|
53
|
+
#
|
54
|
+
# ==== Parameters
|
55
|
+
# key<Object>:: The key to set the parameter for.
|
56
|
+
# val<Object>:: The value of the parameter.
|
57
|
+
#
|
58
|
+
def []=(key, val)
|
59
|
+
(@configuration ||= setup)[key] = val
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
# Gets the the value of a config entry
|
64
|
+
#
|
65
|
+
# ==== Parameters
|
66
|
+
# key<Object>:: The key of the config entry value we want
|
67
|
+
#
|
68
|
+
def [](key)
|
69
|
+
(@configuration ||= setup)[key]
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
# Remove the value of a config entry.
|
74
|
+
#
|
75
|
+
# ==== Parameters
|
76
|
+
# key<Object>:: The key of the parameter to delete.
|
77
|
+
#
|
78
|
+
# ==== Returns
|
79
|
+
# Object:: The value of the removed entry.
|
80
|
+
#
|
81
|
+
def delete(key)
|
82
|
+
@configuration.delete(key)
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
# Remove all configuration. This can be useful for testing purpose.
|
87
|
+
#
|
88
|
+
#
|
89
|
+
# ==== Returns
|
90
|
+
# nil
|
91
|
+
#
|
92
|
+
def delete_all
|
93
|
+
@configuration = nil
|
94
|
+
IndexInfo.delete_all
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
# Retrieve the value of a config entry, returning the provided default if the key is not present
|
99
|
+
#
|
100
|
+
# ==== Parameters
|
101
|
+
# key<Object>:: The key to retrieve the parameter for.
|
102
|
+
# default<Object>::
|
103
|
+
# The default value to return if the parameter is not set.
|
104
|
+
#
|
105
|
+
# ==== Returns
|
106
|
+
# Object:: The value of the configuration parameter or the default.
|
107
|
+
#
|
108
|
+
def fetch(key, default)
|
109
|
+
@configuration.fetch(key, default)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Sets up the configuration
|
113
|
+
#
|
114
|
+
# ==== Returns
|
115
|
+
# The configuration as a hash.
|
116
|
+
#
|
117
|
+
def setup()
|
118
|
+
@configuration = {}
|
119
|
+
@configuration.merge!(defaults)
|
120
|
+
@configuration
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
# Returns the configuration as a hash.
|
125
|
+
#
|
126
|
+
# ==== Returns
|
127
|
+
# Hash:: The config as a hash.
|
128
|
+
#
|
129
|
+
def to_hash
|
130
|
+
@configuration
|
131
|
+
end
|
132
|
+
|
133
|
+
# Returns the config as YAML.
|
134
|
+
#
|
135
|
+
# ==== Returns
|
136
|
+
# String:: The config as YAML.
|
137
|
+
#
|
138
|
+
def to_yaml
|
139
|
+
require "yaml"
|
140
|
+
@configuration.to_yaml
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module Lucene
|
2
|
+
|
3
|
+
#
|
4
|
+
# A document is like a record or row in a relationship database.
|
5
|
+
# Contains the field infos which can be used for type conversions or
|
6
|
+
# specifying if the field should be stored or only searchable.
|
7
|
+
#
|
8
|
+
class Document
|
9
|
+
|
10
|
+
attr_reader :id_field, :field_infos, :props
|
11
|
+
|
12
|
+
def initialize(field_infos, props = {})
|
13
|
+
@id_field = field_infos.id_field
|
14
|
+
@field_infos = field_infos
|
15
|
+
|
16
|
+
@props = {}
|
17
|
+
props.each_pair do |key,value|
|
18
|
+
@props[key] = field_infos[key].convert_to_ruby(value)
|
19
|
+
$LUCENE_LOGGER.debug{"FieldInfo #{key} type: #{field_infos[key][:type]}"}
|
20
|
+
$LUCENE_LOGGER.debug{"Converted #{key} '#{value}' type: '#{value.class.to_s}' to '#{@props[key]}' type: '#{@props[key].class.to_s}'"}
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def [](key)
|
25
|
+
@props[key]
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Convert a java Document to a ruby Lucene::Document
|
30
|
+
#
|
31
|
+
def self.convert(field_infos, java_doc)
|
32
|
+
fields = {}
|
33
|
+
field_infos.each_pair do |key, field|
|
34
|
+
next unless field.store?
|
35
|
+
raise StandardError.new("expected field '#{key.to_s}' to exist in document") if java_doc.getField(key.to_s).nil?
|
36
|
+
value = java_doc.getField(key.to_s).stringValue
|
37
|
+
fields.merge!({key => value})
|
38
|
+
end
|
39
|
+
Document.new(field_infos, fields)
|
40
|
+
end
|
41
|
+
|
42
|
+
def id
|
43
|
+
raise IdFieldMissingException.new("Missing id field: '#{@id_field}'") if self[@id_field].nil?
|
44
|
+
@props[@id_field]
|
45
|
+
end
|
46
|
+
|
47
|
+
def eql?(other)
|
48
|
+
return false unless other.is_a? Document
|
49
|
+
return id == other.id
|
50
|
+
end
|
51
|
+
|
52
|
+
def ==(other)
|
53
|
+
eql?(other)
|
54
|
+
end
|
55
|
+
|
56
|
+
def hash
|
57
|
+
id.hash
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# removes the document and adds it again
|
62
|
+
#
|
63
|
+
def update(index_writer)
|
64
|
+
index_writer.updateDocument(java_key_term, java_document)
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
def java_key_term
|
69
|
+
org.apache.lucene.index.Term.new(@id_field.to_s, id.to_s)
|
70
|
+
end
|
71
|
+
|
72
|
+
def java_document
|
73
|
+
java_doc = org.apache.lucene.document.Document.new
|
74
|
+
@props.each_pair do |key,value|
|
75
|
+
field_info = @field_infos[key]
|
76
|
+
# TODO value could be an array if value.kind_of? Enumerable
|
77
|
+
if (value.kind_of?(Array))
|
78
|
+
value.each do |v|
|
79
|
+
field = field_info.java_field(key,v)
|
80
|
+
java_doc.add(field) unless field.nil?
|
81
|
+
end
|
82
|
+
else
|
83
|
+
field = field_info.java_field(key,value)
|
84
|
+
java_doc.add(field) unless field.nil?
|
85
|
+
end
|
86
|
+
end
|
87
|
+
java_doc
|
88
|
+
end
|
89
|
+
|
90
|
+
def to_s
|
91
|
+
p = ""
|
92
|
+
@props.each_pair { |key,value| p << "'#{key}' = '#{value}' " }
|
93
|
+
"Document [#@id_field='#{self[@id_field]}', #{p}]"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
module Lucene
|
4
|
+
class ConversionNotSupportedException < StandardError; end
|
5
|
+
|
6
|
+
class FieldInfo
|
7
|
+
DEFAULTS = {:store => false, :type => String, :analyzer => :standard}.freeze
|
8
|
+
|
9
|
+
def initialize(values = {})
|
10
|
+
@info = DEFAULTS.dup
|
11
|
+
@info.merge! values
|
12
|
+
$LUCENE_LOGGER.debug{"new FieldInfo: #{@info.inspect}"}
|
13
|
+
end
|
14
|
+
|
15
|
+
def dup
|
16
|
+
FieldInfo.new(@info)
|
17
|
+
end
|
18
|
+
|
19
|
+
def [](key)
|
20
|
+
@info[key]
|
21
|
+
end
|
22
|
+
|
23
|
+
def []=(key,value)
|
24
|
+
@info[key] = value
|
25
|
+
end
|
26
|
+
|
27
|
+
def java_field(key, value)
|
28
|
+
# convert the ruby value to a string that lucene can handle
|
29
|
+
cvalue = convert_to_lucene(value)
|
30
|
+
|
31
|
+
# check if this field should be indexed
|
32
|
+
return nil if cvalue.nil?
|
33
|
+
|
34
|
+
# decide if the field should be stored in the lucene index or not
|
35
|
+
store = store? ? org.apache.lucene.document.Field::Store::YES : org.apache.lucene.document.Field::Store::NO
|
36
|
+
|
37
|
+
# decide if it should be tokenized/analyzed by lucene
|
38
|
+
token_type = tokenized? ? org.apache.lucene.document.Field::Index::ANALYZED : org.apache.lucene.document.Field::Index::NOT_ANALYZED
|
39
|
+
$LUCENE_LOGGER.debug{"java_field store=#{store} key='#{key.to_s}' value='#{cvalue}' token_type=#{token_type}"}
|
40
|
+
|
41
|
+
# create the new Field
|
42
|
+
org.apache.lucene.document.Field.new(key.to_s, cvalue, store, token_type ) #org.apache.lucene.document.Field::Index::NO_NORMS)
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
def convert_to_ruby(value)
|
47
|
+
if (value.kind_of?(Array))
|
48
|
+
value.collect{|v| convert_to_ruby(v)}
|
49
|
+
else case @info[:type].to_s
|
50
|
+
when NilClass.to_s then "" # TODO, should we accept nil values in indexes ?
|
51
|
+
when String.to_s then value.to_s
|
52
|
+
when Fixnum.to_s then value.to_i
|
53
|
+
when Float.to_s then value.to_f
|
54
|
+
when Date.to_s
|
55
|
+
return value if value.kind_of? Date
|
56
|
+
return nil if value.nil?
|
57
|
+
year = value[0..3].to_i
|
58
|
+
month = value[4..5].to_i
|
59
|
+
day = value[6..7].to_i
|
60
|
+
Date.new year,month,day
|
61
|
+
when DateTime.to_s
|
62
|
+
return value if value.kind_of? DateTime
|
63
|
+
return nil if value.nil?
|
64
|
+
year = value[0..3].to_i
|
65
|
+
month = value[4..5].to_i
|
66
|
+
day = value[6..7].to_i
|
67
|
+
hour = value[8..9].to_i
|
68
|
+
min = value[10..11].to_i
|
69
|
+
sec = value[12..13].to_i
|
70
|
+
DateTime.civil(year,month,day,hour,min,sec)
|
71
|
+
else
|
72
|
+
raise ConversionNotSupportedException.new("Can't convert key '#{value}' of with type '#{@info[:type].class.to_s}'")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def convert_to_lucene(value)
|
78
|
+
if (value.kind_of?(Array))
|
79
|
+
value.collect{|v| convert_to_lucene(v)}
|
80
|
+
elsif value.nil?
|
81
|
+
value
|
82
|
+
else
|
83
|
+
case @info[:type].to_s # otherwise it will match Class
|
84
|
+
when Fixnum.to_s then sprintf('%011d',value) # TODO: configurable
|
85
|
+
when Float.to_s then sprintf('%024.12f', value) # TODO: configurable
|
86
|
+
when Bignum.to_s then sprintf('%024d, value')
|
87
|
+
when Date.to_s
|
88
|
+
t = Time.utc(value.year, value.month, value.day)
|
89
|
+
d = t.to_i * 1000
|
90
|
+
org.apache.lucene.document.DateTools.timeToString(d,org.apache.lucene.document.DateTools::Resolution::DAY )
|
91
|
+
when DateTime.to_s
|
92
|
+
# only utc times are supported
|
93
|
+
t = Time.utc(value.year, value.month, value.day, value.hour, value.min, value.sec)
|
94
|
+
d = t.to_i * 1000
|
95
|
+
org.apache.lucene.document.DateTools.timeToString(d,org.apache.lucene.document.DateTools::Resolution::SECOND )
|
96
|
+
else value.to_s
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def convert_to_query(key,value)
|
102
|
+
if (value.kind_of? Range)
|
103
|
+
first_value = convert_to_lucene(value.first)
|
104
|
+
last_value = convert_to_lucene(value.last)
|
105
|
+
first = org.apache.lucene.index.Term.new(key.to_s, first_value)
|
106
|
+
last = org.apache.lucene.index.Term.new(key.to_s, last_value)
|
107
|
+
$LUCENE_LOGGER.debug{"convert_to_query: Range key '#{key.to_s}' #{first_value}' to '#{last_value}'"}
|
108
|
+
org.apache.lucene.search.RangeQuery.new(first, last, !value.exclude_end?)
|
109
|
+
elsif
|
110
|
+
converted_value = convert_to_lucene(value)
|
111
|
+
term = org.apache.lucene.index.Term.new(key.to_s, converted_value)
|
112
|
+
org.apache.lucene.search.TermQuery.new(term)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def tokenized?
|
117
|
+
@info[:tokenized]
|
118
|
+
end
|
119
|
+
|
120
|
+
def store?
|
121
|
+
@info[:store]
|
122
|
+
end
|
123
|
+
|
124
|
+
def eql?(other)
|
125
|
+
return false unless other.kind_of?(FieldInfo)
|
126
|
+
@info.each_pair do |key,value|
|
127
|
+
return false if other[key] != value
|
128
|
+
end
|
129
|
+
return true
|
130
|
+
end
|
131
|
+
|
132
|
+
def ==(other)
|
133
|
+
eql? other
|
134
|
+
end
|
135
|
+
|
136
|
+
def to_s
|
137
|
+
infos = @info.keys.inject(""){|s, key| s << "#{key}=#{@info[key]} "}
|
138
|
+
"FieldInfo(#{self.object_id.to_s}) [#{infos}]"
|
139
|
+
end
|
140
|
+
|
141
|
+
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
data/lib/lucene/hits.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
module Lucene
|
2
|
+
|
3
|
+
|
4
|
+
#
|
5
|
+
# Contains the result as a collection of Documents from a lucene query.
|
6
|
+
# Is a wrapper for the Java org.apache.lucene.search.Hits class
|
7
|
+
#
|
8
|
+
class Hits
|
9
|
+
include Enumerable
|
10
|
+
|
11
|
+
def initialize(field_infos, hits)
|
12
|
+
@hits = hits
|
13
|
+
@field_infos = field_infos
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
#
|
18
|
+
# Returns the n:th hit document.
|
19
|
+
#
|
20
|
+
def [](n)
|
21
|
+
doc = @hits.doc(n)
|
22
|
+
Document.convert(@field_infos, doc)
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
#
|
27
|
+
# Returns true if there are no hits
|
28
|
+
#
|
29
|
+
def empty?
|
30
|
+
@hits.length == 0
|
31
|
+
end
|
32
|
+
|
33
|
+
def each
|
34
|
+
iter = @hits.iterator
|
35
|
+
|
36
|
+
while (iter.hasNext && hit = iter.next)
|
37
|
+
yield Document.convert(@field_infos, hit.getDocument)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
#
|
43
|
+
# The number of documents the query gave.
|
44
|
+
#
|
45
|
+
def size
|
46
|
+
@hits.length
|
47
|
+
end
|
48
|
+
|
49
|
+
def to_s
|
50
|
+
"Hits [size=#{size}]"
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|