lucene 0.5.0.beta.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ require 'logger'
2
+ $LUCENE_LOGGER = Logger.new(STDOUT)
3
+ $LUCENE_LOGGER.level = Logger::WARN
4
+
5
+ require 'lucene/config'
6
+ require 'lucene/document'
7
+ require 'lucene/field_info'
8
+ require 'lucene/hits'
9
+ require 'lucene/index'
10
+ require 'lucene/index_info'
11
+ require 'lucene/index_searcher'
12
+ require 'lucene/jars'
13
+ require 'lucene/query_dsl'
14
+ require 'lucene/transaction'
15
+
@@ -0,0 +1,145 @@
1
+
2
+ module Lucene
3
+
4
+
5
+ #
6
+ # Keeps configuration for lucene.
7
+ # Contains both common configuration for all lucene indexes as well
8
+ # as specific configuration for each index (TODO).
9
+ # This code is copied from merb-core/config.rb.
10
+ #
11
+ # Contains three default configurations (Config.defaults)
12
+ # * :store_on_file:: default false, which will only keep the index in memory
13
+ # * :id_field:: default :id
14
+ # * :storage_path:: where the index is kept on file system if stored as a file (instead of just in memory)
15
+ #
16
+ class Config
17
+ class << self
18
+ # Returns the hash of default config values for lucene.
19
+ #
20
+ # ==== Returns
21
+ # Hash:: The defaults for the config.
22
+ #
23
+ def defaults
24
+ @defaults ||= {
25
+ :store_on_file => false,
26
+ :id_field => :id,
27
+ :storage_path => nil
28
+ }
29
+ end
30
+
31
+
32
+ # Yields the configuration.
33
+ #
34
+ # ==== Block parameters
35
+ # c<Hash>:: The configuration parameters.
36
+ #
37
+ # ==== Examples
38
+ # Lucene::Config.use do |config|
39
+ # config[:in_memory] = true
40
+ # end
41
+ #
42
+ # ==== Returns
43
+ # nil
44
+ #
45
+ def use
46
+ @configuration ||= {}
47
+ yield @configuration
48
+ nil
49
+ end
50
+
51
+
52
+ # Set the value of a config entry.
53
+ #
54
+ # ==== Parameters
55
+ # key<Object>:: The key to set the parameter for.
56
+ # val<Object>:: The value of the parameter.
57
+ #
58
+ def []=(key, val)
59
+ (@configuration ||= setup)[key] = val
60
+ end
61
+
62
+
63
+ # Gets the the value of a config entry
64
+ #
65
+ # ==== Parameters
66
+ # key<Object>:: The key of the config entry value we want
67
+ #
68
+ def [](key)
69
+ (@configuration ||= setup)[key]
70
+ end
71
+
72
+
73
+ # Remove the value of a config entry.
74
+ #
75
+ # ==== Parameters
76
+ # key<Object>:: The key of the parameter to delete.
77
+ #
78
+ # ==== Returns
79
+ # Object:: The value of the removed entry.
80
+ #
81
+ def delete(key)
82
+ @configuration.delete(key)
83
+ end
84
+
85
+
86
+ # Remove all configuration. This can be useful for testing purpose.
87
+ #
88
+ #
89
+ # ==== Returns
90
+ # nil
91
+ #
92
+ def delete_all
93
+ @configuration = nil
94
+ IndexInfo.delete_all
95
+ end
96
+
97
+
98
+ # Retrieve the value of a config entry, returning the provided default if the key is not present
99
+ #
100
+ # ==== Parameters
101
+ # key<Object>:: The key to retrieve the parameter for.
102
+ # default<Object>::
103
+ # The default value to return if the parameter is not set.
104
+ #
105
+ # ==== Returns
106
+ # Object:: The value of the configuration parameter or the default.
107
+ #
108
+ def fetch(key, default)
109
+ @configuration.fetch(key, default)
110
+ end
111
+
112
+ # Sets up the configuration
113
+ #
114
+ # ==== Returns
115
+ # The configuration as a hash.
116
+ #
117
+ def setup()
118
+ @configuration = {}
119
+ @configuration.merge!(defaults)
120
+ @configuration
121
+ end
122
+
123
+
124
+ # Returns the configuration as a hash.
125
+ #
126
+ # ==== Returns
127
+ # Hash:: The config as a hash.
128
+ #
129
+ def to_hash
130
+ @configuration
131
+ end
132
+
133
+ # Returns the config as YAML.
134
+ #
135
+ # ==== Returns
136
+ # String:: The config as YAML.
137
+ #
138
+ def to_yaml
139
+ require "yaml"
140
+ @configuration.to_yaml
141
+ end
142
+ end
143
+ end
144
+
145
+ end
@@ -0,0 +1,96 @@
1
+ module Lucene
2
+
3
+ #
4
+ # A document is like a record or row in a relationship database.
5
+ # Contains the field infos which can be used for type conversions or
6
+ # specifying if the field should be stored or only searchable.
7
+ #
8
+ class Document
9
+
10
+ attr_reader :id_field, :field_infos, :props
11
+
12
+ def initialize(field_infos, props = {})
13
+ @id_field = field_infos.id_field
14
+ @field_infos = field_infos
15
+
16
+ @props = {}
17
+ props.each_pair do |key,value|
18
+ @props[key] = field_infos[key].convert_to_ruby(value)
19
+ $LUCENE_LOGGER.debug{"FieldInfo #{key} type: #{field_infos[key][:type]}"}
20
+ $LUCENE_LOGGER.debug{"Converted #{key} '#{value}' type: '#{value.class.to_s}' to '#{@props[key]}' type: '#{@props[key].class.to_s}'"}
21
+ end
22
+ end
23
+
24
+ def [](key)
25
+ @props[key]
26
+ end
27
+
28
+ #
29
+ # Convert a java Document to a ruby Lucene::Document
30
+ #
31
+ def self.convert(field_infos, java_doc)
32
+ fields = {}
33
+ field_infos.each_pair do |key, field|
34
+ next unless field.store?
35
+ raise StandardError.new("expected field '#{key.to_s}' to exist in document") if java_doc.getField(key.to_s).nil?
36
+ value = java_doc.getField(key.to_s).stringValue
37
+ fields.merge!({key => value})
38
+ end
39
+ Document.new(field_infos, fields)
40
+ end
41
+
42
+ def id
43
+ raise IdFieldMissingException.new("Missing id field: '#{@id_field}'") if self[@id_field].nil?
44
+ @props[@id_field]
45
+ end
46
+
47
+ def eql?(other)
48
+ return false unless other.is_a? Document
49
+ return id == other.id
50
+ end
51
+
52
+ def ==(other)
53
+ eql?(other)
54
+ end
55
+
56
+ def hash
57
+ id.hash
58
+ end
59
+
60
+ #
61
+ # removes the document and adds it again
62
+ #
63
+ def update(index_writer)
64
+ index_writer.updateDocument(java_key_term, java_document)
65
+ end
66
+
67
+
68
+ def java_key_term
69
+ org.apache.lucene.index.Term.new(@id_field.to_s, id.to_s)
70
+ end
71
+
72
+ def java_document
73
+ java_doc = org.apache.lucene.document.Document.new
74
+ @props.each_pair do |key,value|
75
+ field_info = @field_infos[key]
76
+ # TODO value could be an array if value.kind_of? Enumerable
77
+ if (value.kind_of?(Array))
78
+ value.each do |v|
79
+ field = field_info.java_field(key,v)
80
+ java_doc.add(field) unless field.nil?
81
+ end
82
+ else
83
+ field = field_info.java_field(key,value)
84
+ java_doc.add(field) unless field.nil?
85
+ end
86
+ end
87
+ java_doc
88
+ end
89
+
90
+ def to_s
91
+ p = ""
92
+ @props.each_pair { |key,value| p << "'#{key}' = '#{value}' " }
93
+ "Document [#@id_field='#{self[@id_field]}', #{p}]"
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,144 @@
1
+ require 'date'
2
+
3
+ module Lucene
4
+ class ConversionNotSupportedException < StandardError; end
5
+
6
+ class FieldInfo
7
+ DEFAULTS = {:store => false, :type => String, :analyzer => :standard}.freeze
8
+
9
+ def initialize(values = {})
10
+ @info = DEFAULTS.dup
11
+ @info.merge! values
12
+ $LUCENE_LOGGER.debug{"new FieldInfo: #{@info.inspect}"}
13
+ end
14
+
15
+ def dup
16
+ FieldInfo.new(@info)
17
+ end
18
+
19
+ def [](key)
20
+ @info[key]
21
+ end
22
+
23
+ def []=(key,value)
24
+ @info[key] = value
25
+ end
26
+
27
+ def java_field(key, value)
28
+ # convert the ruby value to a string that lucene can handle
29
+ cvalue = convert_to_lucene(value)
30
+
31
+ # check if this field should be indexed
32
+ return nil if cvalue.nil?
33
+
34
+ # decide if the field should be stored in the lucene index or not
35
+ store = store? ? org.apache.lucene.document.Field::Store::YES : org.apache.lucene.document.Field::Store::NO
36
+
37
+ # decide if it should be tokenized/analyzed by lucene
38
+ token_type = tokenized? ? org.apache.lucene.document.Field::Index::ANALYZED : org.apache.lucene.document.Field::Index::NOT_ANALYZED
39
+ $LUCENE_LOGGER.debug{"java_field store=#{store} key='#{key.to_s}' value='#{cvalue}' token_type=#{token_type}"}
40
+
41
+ # create the new Field
42
+ org.apache.lucene.document.Field.new(key.to_s, cvalue, store, token_type ) #org.apache.lucene.document.Field::Index::NO_NORMS)
43
+ end
44
+
45
+
46
+ def convert_to_ruby(value)
47
+ if (value.kind_of?(Array))
48
+ value.collect{|v| convert_to_ruby(v)}
49
+ else case @info[:type].to_s
50
+ when NilClass.to_s then "" # TODO, should we accept nil values in indexes ?
51
+ when String.to_s then value.to_s
52
+ when Fixnum.to_s then value.to_i
53
+ when Float.to_s then value.to_f
54
+ when Date.to_s
55
+ return value if value.kind_of? Date
56
+ return nil if value.nil?
57
+ year = value[0..3].to_i
58
+ month = value[4..5].to_i
59
+ day = value[6..7].to_i
60
+ Date.new year,month,day
61
+ when DateTime.to_s
62
+ return value if value.kind_of? DateTime
63
+ return nil if value.nil?
64
+ year = value[0..3].to_i
65
+ month = value[4..5].to_i
66
+ day = value[6..7].to_i
67
+ hour = value[8..9].to_i
68
+ min = value[10..11].to_i
69
+ sec = value[12..13].to_i
70
+ DateTime.civil(year,month,day,hour,min,sec)
71
+ else
72
+ raise ConversionNotSupportedException.new("Can't convert key '#{value}' of with type '#{@info[:type].class.to_s}'")
73
+ end
74
+ end
75
+ end
76
+
77
+ def convert_to_lucene(value)
78
+ if (value.kind_of?(Array))
79
+ value.collect{|v| convert_to_lucene(v)}
80
+ elsif value.nil?
81
+ value
82
+ else
83
+ case @info[:type].to_s # otherwise it will match Class
84
+ when Fixnum.to_s then sprintf('%011d',value) # TODO: configurable
85
+ when Float.to_s then sprintf('%024.12f', value) # TODO: configurable
86
+ when Bignum.to_s then sprintf('%024d, value')
87
+ when Date.to_s
88
+ t = Time.utc(value.year, value.month, value.day)
89
+ d = t.to_i * 1000
90
+ org.apache.lucene.document.DateTools.timeToString(d,org.apache.lucene.document.DateTools::Resolution::DAY )
91
+ when DateTime.to_s
92
+ # only utc times are supported
93
+ t = Time.utc(value.year, value.month, value.day, value.hour, value.min, value.sec)
94
+ d = t.to_i * 1000
95
+ org.apache.lucene.document.DateTools.timeToString(d,org.apache.lucene.document.DateTools::Resolution::SECOND )
96
+ else value.to_s
97
+ end
98
+ end
99
+ end
100
+
101
+ def convert_to_query(key,value)
102
+ if (value.kind_of? Range)
103
+ first_value = convert_to_lucene(value.first)
104
+ last_value = convert_to_lucene(value.last)
105
+ first = org.apache.lucene.index.Term.new(key.to_s, first_value)
106
+ last = org.apache.lucene.index.Term.new(key.to_s, last_value)
107
+ $LUCENE_LOGGER.debug{"convert_to_query: Range key '#{key.to_s}' #{first_value}' to '#{last_value}'"}
108
+ org.apache.lucene.search.RangeQuery.new(first, last, !value.exclude_end?)
109
+ elsif
110
+ converted_value = convert_to_lucene(value)
111
+ term = org.apache.lucene.index.Term.new(key.to_s, converted_value)
112
+ org.apache.lucene.search.TermQuery.new(term)
113
+ end
114
+ end
115
+
116
+ def tokenized?
117
+ @info[:tokenized]
118
+ end
119
+
120
+ def store?
121
+ @info[:store]
122
+ end
123
+
124
+ def eql?(other)
125
+ return false unless other.kind_of?(FieldInfo)
126
+ @info.each_pair do |key,value|
127
+ return false if other[key] != value
128
+ end
129
+ return true
130
+ end
131
+
132
+ def ==(other)
133
+ eql? other
134
+ end
135
+
136
+ def to_s
137
+ infos = @info.keys.inject(""){|s, key| s << "#{key}=#{@info[key]} "}
138
+ "FieldInfo(#{self.object_id.to_s}) [#{infos}]"
139
+ end
140
+
141
+
142
+ end
143
+ end
144
+
@@ -0,0 +1,54 @@
1
+ module Lucene
2
+
3
+
4
+ #
5
+ # Contains the result as a collection of Documents from a lucene query.
6
+ # Is a wrapper for the Java org.apache.lucene.search.Hits class
7
+ #
8
+ class Hits
9
+ include Enumerable
10
+
11
+ def initialize(field_infos, hits)
12
+ @hits = hits
13
+ @field_infos = field_infos
14
+ end
15
+
16
+
17
+ #
18
+ # Returns the n:th hit document.
19
+ #
20
+ def [](n)
21
+ doc = @hits.doc(n)
22
+ Document.convert(@field_infos, doc)
23
+ end
24
+
25
+
26
+ #
27
+ # Returns true if there are no hits
28
+ #
29
+ def empty?
30
+ @hits.length == 0
31
+ end
32
+
33
+ def each
34
+ iter = @hits.iterator
35
+
36
+ while (iter.hasNext && hit = iter.next)
37
+ yield Document.convert(@field_infos, hit.getDocument)
38
+ end
39
+ end
40
+
41
+
42
+ #
43
+ # The number of documents the query gave.
44
+ #
45
+ def size
46
+ @hits.length
47
+ end
48
+
49
+ def to_s
50
+ "Hits [size=#{size}]"
51
+ end
52
+
53
+ end
54
+ end