lucene 0.5.0.beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ require 'logger'
2
+ $LUCENE_LOGGER = Logger.new(STDOUT)
3
+ $LUCENE_LOGGER.level = Logger::WARN
4
+
5
+ require 'lucene/config'
6
+ require 'lucene/document'
7
+ require 'lucene/field_info'
8
+ require 'lucene/hits'
9
+ require 'lucene/index'
10
+ require 'lucene/index_info'
11
+ require 'lucene/index_searcher'
12
+ require 'lucene/jars'
13
+ require 'lucene/query_dsl'
14
+ require 'lucene/transaction'
15
+
@@ -0,0 +1,145 @@
1
+
2
+ module Lucene
3
+
4
+
5
+ #
6
+ # Keeps configuration for lucene.
7
+ # Contains both common configuration for all lucene indexes as well
8
+ # as specific configuration for each index (TODO).
9
+ # This code is copied from merb-core/config.rb.
10
+ #
11
+ # Contains three default configurations (Config.defaults)
12
+ # * :store_on_file:: default false, which will only keep the index in memory
13
+ # * :id_field:: default :id
14
+ # * :storage_path:: where the index is kept on file system if stored as a file (instead of just in memory)
15
+ #
16
+ class Config
17
+ class << self
18
+ # Returns the hash of default config values for lucene.
19
+ #
20
+ # ==== Returns
21
+ # Hash:: The defaults for the config.
22
+ #
23
+ def defaults
24
+ @defaults ||= {
25
+ :store_on_file => false,
26
+ :id_field => :id,
27
+ :storage_path => nil
28
+ }
29
+ end
30
+
31
+
32
+ # Yields the configuration.
33
+ #
34
+ # ==== Block parameters
35
+ # c<Hash>:: The configuration parameters.
36
+ #
37
+ # ==== Examples
38
+ # Lucene::Config.use do |config|
39
+ # config[:in_memory] = true
40
+ # end
41
+ #
42
+ # ==== Returns
43
+ # nil
44
+ #
45
+ def use
46
+ @configuration ||= {}
47
+ yield @configuration
48
+ nil
49
+ end
50
+
51
+
52
+ # Set the value of a config entry.
53
+ #
54
+ # ==== Parameters
55
+ # key<Object>:: The key to set the parameter for.
56
+ # val<Object>:: The value of the parameter.
57
+ #
58
+ def []=(key, val)
59
+ (@configuration ||= setup)[key] = val
60
+ end
61
+
62
+
63
+ # Gets the the value of a config entry
64
+ #
65
+ # ==== Parameters
66
+ # key<Object>:: The key of the config entry value we want
67
+ #
68
+ def [](key)
69
+ (@configuration ||= setup)[key]
70
+ end
71
+
72
+
73
+ # Remove the value of a config entry.
74
+ #
75
+ # ==== Parameters
76
+ # key<Object>:: The key of the parameter to delete.
77
+ #
78
+ # ==== Returns
79
+ # Object:: The value of the removed entry.
80
+ #
81
+ def delete(key)
82
+ @configuration.delete(key)
83
+ end
84
+
85
+
86
+ # Remove all configuration. This can be useful for testing purpose.
87
+ #
88
+ #
89
+ # ==== Returns
90
+ # nil
91
+ #
92
+ def delete_all
93
+ @configuration = nil
94
+ IndexInfo.delete_all
95
+ end
96
+
97
+
98
+ # Retrieve the value of a config entry, returning the provided default if the key is not present
99
+ #
100
+ # ==== Parameters
101
+ # key<Object>:: The key to retrieve the parameter for.
102
+ # default<Object>::
103
+ # The default value to return if the parameter is not set.
104
+ #
105
+ # ==== Returns
106
+ # Object:: The value of the configuration parameter or the default.
107
+ #
108
+ def fetch(key, default)
109
+ @configuration.fetch(key, default)
110
+ end
111
+
112
+ # Sets up the configuration
113
+ #
114
+ # ==== Returns
115
+ # The configuration as a hash.
116
+ #
117
+ def setup()
118
+ @configuration = {}
119
+ @configuration.merge!(defaults)
120
+ @configuration
121
+ end
122
+
123
+
124
+ # Returns the configuration as a hash.
125
+ #
126
+ # ==== Returns
127
+ # Hash:: The config as a hash.
128
+ #
129
+ def to_hash
130
+ @configuration
131
+ end
132
+
133
+ # Returns the config as YAML.
134
+ #
135
+ # ==== Returns
136
+ # String:: The config as YAML.
137
+ #
138
+ def to_yaml
139
+ require "yaml"
140
+ @configuration.to_yaml
141
+ end
142
+ end
143
+ end
144
+
145
+ end
@@ -0,0 +1,96 @@
1
+ module Lucene
2
+
3
+ #
4
+ # A document is like a record or row in a relationship database.
5
+ # Contains the field infos which can be used for type conversions or
6
+ # specifying if the field should be stored or only searchable.
7
+ #
8
+ class Document
9
+
10
+ attr_reader :id_field, :field_infos, :props
11
+
12
+ def initialize(field_infos, props = {})
13
+ @id_field = field_infos.id_field
14
+ @field_infos = field_infos
15
+
16
+ @props = {}
17
+ props.each_pair do |key,value|
18
+ @props[key] = field_infos[key].convert_to_ruby(value)
19
+ $LUCENE_LOGGER.debug{"FieldInfo #{key} type: #{field_infos[key][:type]}"}
20
+ $LUCENE_LOGGER.debug{"Converted #{key} '#{value}' type: '#{value.class.to_s}' to '#{@props[key]}' type: '#{@props[key].class.to_s}'"}
21
+ end
22
+ end
23
+
24
+ def [](key)
25
+ @props[key]
26
+ end
27
+
28
+ #
29
+ # Convert a java Document to a ruby Lucene::Document
30
+ #
31
+ def self.convert(field_infos, java_doc)
32
+ fields = {}
33
+ field_infos.each_pair do |key, field|
34
+ next unless field.store?
35
+ raise StandardError.new("expected field '#{key.to_s}' to exist in document") if java_doc.getField(key.to_s).nil?
36
+ value = java_doc.getField(key.to_s).stringValue
37
+ fields.merge!({key => value})
38
+ end
39
+ Document.new(field_infos, fields)
40
+ end
41
+
42
+ def id
43
+ raise IdFieldMissingException.new("Missing id field: '#{@id_field}'") if self[@id_field].nil?
44
+ @props[@id_field]
45
+ end
46
+
47
+ def eql?(other)
48
+ return false unless other.is_a? Document
49
+ return id == other.id
50
+ end
51
+
52
+ def ==(other)
53
+ eql?(other)
54
+ end
55
+
56
+ def hash
57
+ id.hash
58
+ end
59
+
60
+ #
61
+ # removes the document and adds it again
62
+ #
63
+ def update(index_writer)
64
+ index_writer.updateDocument(java_key_term, java_document)
65
+ end
66
+
67
+
68
+ def java_key_term
69
+ org.apache.lucene.index.Term.new(@id_field.to_s, id.to_s)
70
+ end
71
+
72
+ def java_document
73
+ java_doc = org.apache.lucene.document.Document.new
74
+ @props.each_pair do |key,value|
75
+ field_info = @field_infos[key]
76
+ # TODO value could be an array if value.kind_of? Enumerable
77
+ if (value.kind_of?(Array))
78
+ value.each do |v|
79
+ field = field_info.java_field(key,v)
80
+ java_doc.add(field) unless field.nil?
81
+ end
82
+ else
83
+ field = field_info.java_field(key,value)
84
+ java_doc.add(field) unless field.nil?
85
+ end
86
+ end
87
+ java_doc
88
+ end
89
+
90
+ def to_s
91
+ p = ""
92
+ @props.each_pair { |key,value| p << "'#{key}' = '#{value}' " }
93
+ "Document [#@id_field='#{self[@id_field]}', #{p}]"
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,144 @@
1
+ require 'date'
2
+
3
+ module Lucene
4
+ class ConversionNotSupportedException < StandardError; end
5
+
6
+ class FieldInfo
7
+ DEFAULTS = {:store => false, :type => String, :analyzer => :standard}.freeze
8
+
9
+ def initialize(values = {})
10
+ @info = DEFAULTS.dup
11
+ @info.merge! values
12
+ $LUCENE_LOGGER.debug{"new FieldInfo: #{@info.inspect}"}
13
+ end
14
+
15
+ def dup
16
+ FieldInfo.new(@info)
17
+ end
18
+
19
+ def [](key)
20
+ @info[key]
21
+ end
22
+
23
+ def []=(key,value)
24
+ @info[key] = value
25
+ end
26
+
27
+ def java_field(key, value)
28
+ # convert the ruby value to a string that lucene can handle
29
+ cvalue = convert_to_lucene(value)
30
+
31
+ # check if this field should be indexed
32
+ return nil if cvalue.nil?
33
+
34
+ # decide if the field should be stored in the lucene index or not
35
+ store = store? ? org.apache.lucene.document.Field::Store::YES : org.apache.lucene.document.Field::Store::NO
36
+
37
+ # decide if it should be tokenized/analyzed by lucene
38
+ token_type = tokenized? ? org.apache.lucene.document.Field::Index::ANALYZED : org.apache.lucene.document.Field::Index::NOT_ANALYZED
39
+ $LUCENE_LOGGER.debug{"java_field store=#{store} key='#{key.to_s}' value='#{cvalue}' token_type=#{token_type}"}
40
+
41
+ # create the new Field
42
+ org.apache.lucene.document.Field.new(key.to_s, cvalue, store, token_type ) #org.apache.lucene.document.Field::Index::NO_NORMS)
43
+ end
44
+
45
+
46
+ def convert_to_ruby(value)
47
+ if (value.kind_of?(Array))
48
+ value.collect{|v| convert_to_ruby(v)}
49
+ else case @info[:type].to_s
50
+ when NilClass.to_s then "" # TODO, should we accept nil values in indexes ?
51
+ when String.to_s then value.to_s
52
+ when Fixnum.to_s then value.to_i
53
+ when Float.to_s then value.to_f
54
+ when Date.to_s
55
+ return value if value.kind_of? Date
56
+ return nil if value.nil?
57
+ year = value[0..3].to_i
58
+ month = value[4..5].to_i
59
+ day = value[6..7].to_i
60
+ Date.new year,month,day
61
+ when DateTime.to_s
62
+ return value if value.kind_of? DateTime
63
+ return nil if value.nil?
64
+ year = value[0..3].to_i
65
+ month = value[4..5].to_i
66
+ day = value[6..7].to_i
67
+ hour = value[8..9].to_i
68
+ min = value[10..11].to_i
69
+ sec = value[12..13].to_i
70
+ DateTime.civil(year,month,day,hour,min,sec)
71
+ else
72
+ raise ConversionNotSupportedException.new("Can't convert key '#{value}' of with type '#{@info[:type].class.to_s}'")
73
+ end
74
+ end
75
+ end
76
+
77
+ def convert_to_lucene(value)
78
+ if (value.kind_of?(Array))
79
+ value.collect{|v| convert_to_lucene(v)}
80
+ elsif value.nil?
81
+ value
82
+ else
83
+ case @info[:type].to_s # otherwise it will match Class
84
+ when Fixnum.to_s then sprintf('%011d',value) # TODO: configurable
85
+ when Float.to_s then sprintf('%024.12f', value) # TODO: configurable
86
+ when Bignum.to_s then sprintf('%024d, value')
87
+ when Date.to_s
88
+ t = Time.utc(value.year, value.month, value.day)
89
+ d = t.to_i * 1000
90
+ org.apache.lucene.document.DateTools.timeToString(d,org.apache.lucene.document.DateTools::Resolution::DAY )
91
+ when DateTime.to_s
92
+ # only utc times are supported
93
+ t = Time.utc(value.year, value.month, value.day, value.hour, value.min, value.sec)
94
+ d = t.to_i * 1000
95
+ org.apache.lucene.document.DateTools.timeToString(d,org.apache.lucene.document.DateTools::Resolution::SECOND )
96
+ else value.to_s
97
+ end
98
+ end
99
+ end
100
+
101
+ def convert_to_query(key,value)
102
+ if (value.kind_of? Range)
103
+ first_value = convert_to_lucene(value.first)
104
+ last_value = convert_to_lucene(value.last)
105
+ first = org.apache.lucene.index.Term.new(key.to_s, first_value)
106
+ last = org.apache.lucene.index.Term.new(key.to_s, last_value)
107
+ $LUCENE_LOGGER.debug{"convert_to_query: Range key '#{key.to_s}' #{first_value}' to '#{last_value}'"}
108
+ org.apache.lucene.search.RangeQuery.new(first, last, !value.exclude_end?)
109
+ elsif
110
+ converted_value = convert_to_lucene(value)
111
+ term = org.apache.lucene.index.Term.new(key.to_s, converted_value)
112
+ org.apache.lucene.search.TermQuery.new(term)
113
+ end
114
+ end
115
+
116
+ def tokenized?
117
+ @info[:tokenized]
118
+ end
119
+
120
+ def store?
121
+ @info[:store]
122
+ end
123
+
124
+ def eql?(other)
125
+ return false unless other.kind_of?(FieldInfo)
126
+ @info.each_pair do |key,value|
127
+ return false if other[key] != value
128
+ end
129
+ return true
130
+ end
131
+
132
+ def ==(other)
133
+ eql? other
134
+ end
135
+
136
+ def to_s
137
+ infos = @info.keys.inject(""){|s, key| s << "#{key}=#{@info[key]} "}
138
+ "FieldInfo(#{self.object_id.to_s}) [#{infos}]"
139
+ end
140
+
141
+
142
+ end
143
+ end
144
+
@@ -0,0 +1,54 @@
1
+ module Lucene
2
+
3
+
4
+ #
5
+ # Contains the result as a collection of Documents from a lucene query.
6
+ # Is a wrapper for the Java org.apache.lucene.search.Hits class
7
+ #
8
+ class Hits
9
+ include Enumerable
10
+
11
+ def initialize(field_infos, hits)
12
+ @hits = hits
13
+ @field_infos = field_infos
14
+ end
15
+
16
+
17
+ #
18
+ # Returns the n:th hit document.
19
+ #
20
+ def [](n)
21
+ doc = @hits.doc(n)
22
+ Document.convert(@field_infos, doc)
23
+ end
24
+
25
+
26
+ #
27
+ # Returns true if there are no hits
28
+ #
29
+ def empty?
30
+ @hits.length == 0
31
+ end
32
+
33
+ def each
34
+ iter = @hits.iterator
35
+
36
+ while (iter.hasNext && hit = iter.next)
37
+ yield Document.convert(@field_infos, hit.getDocument)
38
+ end
39
+ end
40
+
41
+
42
+ #
43
+ # The number of documents the query gave.
44
+ #
45
+ def size
46
+ @hits.length
47
+ end
48
+
49
+ def to_s
50
+ "Hits [size=#{size}]"
51
+ end
52
+
53
+ end
54
+ end