moonstone 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,61 @@
1
+ $:.unshift "#{here = File.dirname(__FILE__)}/lib"
2
+ require 'rake/gempackagetask'
3
+ require 'rake/rdoctask'
4
+
5
+ deps = %w{ rspec }
6
+
7
+ task(:install_gems) {
8
+ deps.each { |g|
9
+ system "jruby -S gem install #{g}"
10
+ }
11
+ }
12
+
13
+ spec = Gem::Specification.new { |s|
14
+ s.platform = Gem::Platform::RUBY
15
+
16
+ s.authors = "Matthew King", "Jason Rush", "Jay Donnell", "Dan Yoder"
17
+ s.email = "self@automatthew.com"
18
+ s.files = Dir["{lib,doc,bin,ext}/**/*"].delete_if {|f|
19
+ /\/rdoc(\/|$)/i.match f
20
+ } + %w(Rakefile)
21
+ s.require_path = 'lib'
22
+ s.has_rdoc = true
23
+ s.extra_rdoc_files = Dir['doc/*'].select(&File.method(:file?))
24
+ s.extensions << 'ext/extconf.rb' if File.exist? 'ext/extconf.rb'
25
+ Dir['bin/*'].map(&File.method(:basename)).map(&s.executables.method(:<<))
26
+
27
+ s.name = 'moonstone'
28
+ s.summary = "Moonstone Agile Search Framework"
29
+ deps.each &s.method(:add_dependency)
30
+ s.version = '0.6.0'
31
+ }
32
+
33
+ Rake::GemPackageTask.new(spec) { |pkg|
34
+ pkg.need_tar_bz2 = true
35
+ }
36
+
37
+ task(:uninstall) {
38
+ system "sudo jruby -S gem uninstall -aIx #{spec.name}"
39
+ }
40
+
41
+ task(:install => [:uninstall, :package]) {
42
+ g = "pkg/#{spec.name}-#{spec.version}.gem"
43
+ system "sudo jruby -S gem install --local #{g}"
44
+ }
45
+
46
+ task(:uninstall_no_sudo) {
47
+ system "jruby -S gem uninstall -aIx #{spec.name}"
48
+ }
49
+
50
+ task(:install_no_sudo => [:uninstall_no_sudo, :package]) {
51
+ g = "pkg/#{spec.name}-#{spec.version}.gem"
52
+ system "jruby -S gem install -l #{g}"
53
+ }
54
+
55
+ desc "run some tests"
56
+ task :test do
57
+ options = ENV['options']
58
+ files = FileList['test/**/*.rb'].exclude('test/helpers.rb')
59
+ puts cmd = "jruby #{options} -I lib -S spec -c #{ files.join(' ') }"
60
+ system cmd
61
+ end
Binary file
@@ -0,0 +1,58 @@
1
+ module Lucene
2
+ module Analysis
3
+ include_package "org.apache.lucene.analysis"
4
+
5
+ module Standard
6
+ include_package "org.apache.lucene.analysis.standard"
7
+ [
8
+ StandardAnalyzer,
9
+ StandardFilter,
10
+ StandardTokenizer,
11
+ ]
12
+ end
13
+ include Standard
14
+
15
+ TokenStream.module_eval do
16
+ include Enumerable
17
+ def each
18
+ token = Token.new
19
+ while token = self.next(token) do
20
+ yield token
21
+ end
22
+ end
23
+ end
24
+
25
+ Analyzer.module_eval do
26
+ def tokenize(field, text)
27
+ token_stream(field, java.io.StringReader.new(text)).map { |token| token.term_text }
28
+ end
29
+ end
30
+
31
+ # Biggie Smalls, Biggie Smalls, Biggie Smalls
32
+ [
33
+ CachingTokenFilter,
34
+ CharTokenizer,
35
+ ISOLatin1AccentFilter,
36
+ KeywordAnalyzer,
37
+ KeywordTokenizer,
38
+ LengthFilter,
39
+ LetterTokenizer,
40
+ LowerCaseFilter,
41
+ LowerCaseTokenizer,
42
+ PerFieldAnalyzerWrapper,
43
+ PorterStemFilter,
44
+ PorterStemmer,
45
+ SimpleAnalyzer,
46
+ SinkTokenizer,
47
+ StopAnalyzer,
48
+ StopFilter,
49
+ TeeTokenFilter,
50
+ Token,
51
+ TokenFilter,
52
+ Tokenizer,
53
+ WhitespaceAnalyzer,
54
+ WhitespaceTokenizer,
55
+ WordlistLoader
56
+ ]
57
+ end
58
+ end
@@ -0,0 +1,149 @@
1
+ module Lucene
2
+ module Document
3
+ include_package "org.apache.lucene.document"
4
+
5
+ # avoid naming problems with Lucene::Document::Document
6
+ Doc = Lucene::Document::Document
7
+
8
+ # I spit on final class
9
+ Doc.module_eval do
10
+ attr_accessor :score, :id, :tokens, :explanation
11
+
12
+ self::Field = Lucene::Document::Field
13
+
14
+ @@field_store = {
15
+ nil => Field::Store::YES,
16
+ false => Field::Store::NO,
17
+ :NO => Field::Store::NO,
18
+ :no => Field::Store::NO,
19
+ true => Field::Store::YES,
20
+ :YES => Field::Store::YES,
21
+ :yes => Field::Store::YES,
22
+ :compress => Field::Store::COMPRESS,
23
+ :COMPRESS => Field::Store::COMPRESS
24
+ }
25
+ @@field_index = {
26
+ nil => Field::Index::ANALYZED,
27
+ false => Field::Index::NO,
28
+ :NO => Field::Index::NO,
29
+ :no => Field::Index::NO,
30
+ true => Field::Index::ANALYZED,
31
+ :analyzed => Field::Index::ANALYZED,
32
+ :ANALYZED => Field::Index::ANALYZED,
33
+ :not_analyzed => Field::Index::NOT_ANALYZED,
34
+ :NOT_ANALYZED => Field::Index::NOT_ANALYZED,
35
+ :analyzed_no_norms => Field::Index::ANALYZED_NO_NORMS,
36
+ :ANALYZED_NO_NORMS => Field::Index::ANALYZED_NO_NORMS,
37
+ :not_analyzed_no_norms => Field::Index::NOT_ANALYZED_NO_NORMS,
38
+ :NOT_ANALYZED_NO_NORMS => Field::Index::NOT_ANALYZED_NO_NORMS
39
+ }
40
+
41
+ @@field_term_vector = {
42
+ nil => Field::TermVector::NO,
43
+ :NO => Field::TermVector::NO,
44
+ :no => Field::TermVector::NO,
45
+ false => Field::TermVector::NO,
46
+ :YES => Field::TermVector::YES,
47
+ :yes => Field::TermVector::YES,
48
+ true => Field::TermVector::YES,
49
+ :WITH_POSITIONS => Field::TermVector::WITH_POSITIONS,
50
+ :with_positions => Field::TermVector::WITH_POSITIONS,
51
+ :WITH_OFFSETS => Field::TermVector::WITH_OFFSETS,
52
+ :with_offsets => Field::TermVector::WITH_OFFSETS,
53
+ :WITH_POSITIONS_OFFSETS => Field::TermVector::WITH_POSITIONS_OFFSETS,
54
+ :with_positions_offsets => Field::TermVector::WITH_POSITIONS_OFFSETS
55
+ }
56
+
57
+ def self.new
58
+ doc = super()
59
+ yield doc if block_given?
60
+ doc
61
+ end
62
+
63
+ def self.create(fields)
64
+ doc = self.new
65
+ fields.each { |field| doc.add_field(*field) }
66
+ doc
67
+ end
68
+
69
+ def add_field(name, value, options={})
70
+ field = if value.is_a? java.io.Reader
71
+ Field.new(name, value, @@field_term_vector[options[:term_vector]])
72
+ else
73
+ store = @@field_store[options[:store]]
74
+ index = @@field_index[options[:index]]
75
+ term_vector = @@field_term_vector[options[:term_vector]]
76
+ params = [name, value, store, index]
77
+ params << term_vector if term_vector
78
+ Field.new(*params)
79
+ end
80
+ add(field)
81
+ end
82
+
83
+ # specialty field adders
84
+ def stored(name, value)
85
+ add_field(name, value, :store => true, :index => false)
86
+ end
87
+
88
+ def analyzed(name, value)
89
+ add_field(name, value, :store => true, :index => :tokenized)
90
+ end
91
+
92
+ def unanalyzed(name, value)
93
+ add_field(name, value, :store => true, :index => :not_analyzed)
94
+ end
95
+
96
+ alias_method :[], :get
97
+
98
+ def get_all(field_name)
99
+ fields.select { |f| f.name == field_name }.map { |f| f.string_value }
100
+ end
101
+
102
+ def field_names
103
+ fields.map { |f| f.name }.uniq
104
+ end
105
+
106
+ alias_method :keys, :field_names
107
+
108
+ def to_hash
109
+ hash = {}
110
+ hash["id"] = @id if @id
111
+ hash["score"] = @score if @score
112
+ hash["explanation"] = @explanation.toString(1) if @explanation
113
+ fields = {}
114
+ hash["fields"] = fields
115
+ keys.each do|k|
116
+ values = self.get_all(k)
117
+ # fields[k] = values.size == 1 ? values.first : values
118
+ fields[k] = values
119
+ end
120
+ hash["tokens"] = @tokens if @tokens
121
+ hash
122
+ end
123
+
124
+ def to_json
125
+ to_hash.to_json
126
+ end
127
+
128
+ end
129
+
130
+ Field.module_eval do
131
+
132
+ alias_method :stored?, :is_stored
133
+ alias_method :indexed?, :is_indexed
134
+ alias_method :tokenized?, :is_tokenized
135
+ alias_method :analyzed?, :is_tokenized
136
+ alias_method :compressed?, :is_compressed
137
+
138
+ def unanalyzed?; indexed? && !analyzed?; end
139
+ def unindexed?; stored? && !indexed?; end
140
+
141
+ end
142
+
143
+ # Biggie Smalls, Biggie Smalls, Biggie Smalls
144
+ [
145
+ DateField,
146
+ DateTools
147
+ ]
148
+ end
149
+ end
@@ -0,0 +1,9 @@
1
+ module Lucene
2
+ module Search
3
+ module Function
4
+ include_package 'org.apache.lucene.search.function'
5
+
6
+ [FieldScoreQuery, CustomScoreQuery]
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,48 @@
1
+ module Lucene
2
+ module Index
3
+ include_package "org.apache.lucene.index"
4
+
5
+ IndexWriter.module_eval do
6
+ MaxFieldLength = self::MaxFieldLength
7
+
8
+ def self.open(*args)
9
+ args << MaxFieldLength::UNLIMITED unless args.last.is_a? MaxFieldLength
10
+ writer = new(*args)
11
+ begin
12
+ result = yield(writer)
13
+ ensure
14
+ writer.close
15
+ end
16
+ result
17
+ end
18
+
19
+ def add_documents(docs)
20
+ docs.each { |doc| add_document(doc) }
21
+ end
22
+
23
+
24
+ end
25
+
26
+ TermEnum.module_eval do
27
+ include Enumerable
28
+
29
+ def each
30
+ while self.next do
31
+ yield term
32
+ end
33
+ end
34
+
35
+ def for_field(field_name)
36
+ select { |t| t.field == field_name }.map { |t| t.text }
37
+ end
38
+
39
+ end
40
+
41
+ # Biggie Smalls, Biggie Smalls, Biggie Smalls
42
+ [
43
+ IndexReader,
44
+ Payload,
45
+ Term,
46
+ ]
47
+ end
48
+ end
@@ -0,0 +1,14 @@
1
+ module Lucene
2
+ module QueryParser
3
+ include_package "org.apache.lucene.queryParser"
4
+
5
+ # avoid problems with Lucene::QueryParser::QueryParser
6
+ Parser = org.apache.lucene.queryParser.QueryParser
7
+
8
+ # Biggie Smalls, Biggie Smalls, Biggie Smalls
9
+ [
10
+ MultiFieldQueryParser,
11
+ Token
12
+ ]
13
+ end
14
+ end
@@ -0,0 +1,195 @@
1
+ module Lucene
2
+ module Search
3
+ include_package "org.apache.lucene.search"
4
+
5
+ [ SortField, Sort ]
6
+
7
+ Hits.module_eval do
8
+ include Enumerable
9
+ def each
10
+ i = 0
11
+ while i < length do
12
+ yield doc(i)
13
+ i += 1
14
+ end
15
+ end
16
+
17
+ def to_a
18
+ map
19
+ end
20
+
21
+ alias_method :size, :length
22
+ end
23
+
24
+ TopDocs.module_eval do
25
+ attr_accessor :query
26
+ include Enumerable
27
+
28
+ def each(searcher=nil)
29
+ initialize_docs(searcher) if searcher && documents.empty? #Do we ever want to reinitialize the documents list?
30
+ documents.each { |doc| yield doc }
31
+ end
32
+
33
+ def initialize_docs(searcher)
34
+ @offset ||= 0
35
+ self.scoreDocs.each_with_index do |sd, i|
36
+ #For pagination, only init the docs that fit the offset
37
+ if i >= @offset
38
+ doc = searcher.doc(sd.doc)
39
+ doc.score = sd.score
40
+ doc.id = sd.doc
41
+ documents << doc
42
+ end
43
+ end
44
+ end
45
+
46
+ #Remove docs that precede the offset
47
+ def offset!(offset)
48
+ @offset = offset || 0
49
+ self
50
+ end
51
+
52
+ def offset
53
+ @offset ||= 0
54
+ end
55
+
56
+ def [](index)
57
+ documents[index]
58
+ end
59
+
60
+ def first
61
+ documents[0]
62
+ end
63
+
64
+ def last
65
+ to_a.last
66
+ end
67
+
68
+ def length
69
+ self.scoreDocs.length - (@offset || 0)
70
+ end
71
+
72
+ alias_method :size, :length
73
+
74
+ def empty?
75
+ self.length == 0
76
+ end
77
+
78
+ def to_hash
79
+ {
80
+ :query => self.query,
81
+ :total_hits => self.totalHits,
82
+ :documents => self.to_a
83
+ }
84
+ end
85
+
86
+ def to_json
87
+ to_hash.to_json
88
+ end
89
+
90
+ private
91
+ def documents
92
+ @documents ||= []
93
+ end
94
+ end
95
+
96
+ Hit.module_eval do
97
+ alias_method :[], :get
98
+ end
99
+
100
+ IndexSearcher.module_eval do
101
+ def self.open(*args)
102
+ searcher = new(*args)
103
+ begin
104
+ result = yield(searcher)
105
+ ensure
106
+ searcher.close
107
+ end
108
+ result
109
+ end
110
+ end
111
+
112
+ BooleanQuery.module_eval do
113
+
114
+ def self.and(*queries)
115
+ q = self.new
116
+ queries.each { |query| q.add(query, BooleanClause::Occur::MUST) }
117
+ q
118
+ end
119
+
120
+ def self.or(*queries)
121
+ q = self.new
122
+ queries.each { |query| q.add(query, BooleanClause::Occur::SHOULD) }
123
+ q
124
+ end
125
+
126
+ def self.not(*queries)
127
+ q = self.new
128
+ queries.each { |query| q.add(query, BooleanClause::Occur::MUST_NOT) }
129
+ q
130
+ end
131
+
132
+ def and(*queries)
133
+ queries.each { |query| add(query, BooleanClause::Occur::MUST) }
134
+ self
135
+ end
136
+
137
+ def or(*queries)
138
+ queries.each { |query| add(query, BooleanClause::Occur::SHOULD) }
139
+ self
140
+ end
141
+
142
+ def not(*queries)
143
+ queries.each { |query| add(query, BooleanClause::Occur::MUST_NOT) }
144
+ self
145
+ end
146
+
147
+ end
148
+
149
+ TermQuery.module_eval do
150
+
151
+ def self.new(*args)
152
+ term = args.first.is_a?(Lucene::Index::Term) ? args.first : Lucene::Index::Term.new(*args)
153
+ super(term)
154
+ end
155
+
156
+ end
157
+
158
+ module Spell
159
+ include_package 'org.apache.lucene.search.spell'
160
+ [PlainTextDictionary]
161
+ end
162
+
163
+ PhraseQuery.module_eval do
164
+ def self.create(field, phrase)
165
+ raise "I need an array" unless phrase.is_a? Array
166
+ query = self.new
167
+ phrase.each do |word|
168
+ query.add(Index::Term.new(field, word))
169
+ end
170
+ query
171
+ end
172
+ end
173
+
174
+
175
+ # Biggie Smalls, Biggie Smalls, Biggie Smalls
176
+ [
177
+ Explanation,
178
+ FilteredQuery,
179
+ FuzzyQuery,
180
+ HitIterator,
181
+ MultiPhraseQuery,
182
+ PrefixQuery,
183
+ Query,
184
+ RangeQuery,
185
+ ScoreDoc,
186
+ Searcher,
187
+ Similarity,
188
+ TopDocCollector,
189
+ TopFieldDocCollector,
190
+ TopFieldDocs,
191
+ Weight,
192
+ WildcardQuery
193
+ ]
194
+ end
195
+ end
@@ -0,0 +1,12 @@
1
+ module Lucene
2
+ module Store
3
+ include_package "org.apache.lucene.store"
4
+
5
+ # Biggie Smalls, Biggie Smalls, Biggie Smalls
6
+ [
7
+ Directory,
8
+ FSDirectory,
9
+ RAMDirectory
10
+ ]
11
+ end
12
+ end
@@ -0,0 +1,23 @@
1
+ module Moonstone
2
+ class Analyzer < Lucene::Analysis::Analyzer
3
+ attr_accessor :filter_chain
4
+ # Moonstone::Analyzer.new(WhitespaceTokenizer, StandardFilter, StemFilter)
5
+ # FIXME: Why don't we explicitly require a tokenizer + *filters ?
6
+ def self.new(*classes)
7
+ analyzer = super()
8
+ analyzer.filter_chain = classes
9
+ analyzer
10
+ end
11
+
12
+ def tokenStream(field_name, reader)
13
+ tokenizer, *args = @filter_chain[0]
14
+ stream = tokenizer.new(reader, *args)
15
+ @filter_chain.slice(1..-1).each do |filter|
16
+ klass, *args = filter
17
+ stream = klass.new(stream, *args)
18
+ end
19
+ stream
20
+ end
21
+
22
+ end
23
+ end
@@ -0,0 +1,186 @@
1
+ module Moonstone
2
+ class Engine
3
+ include Lucene::Index
4
+ include Lucene::Search
5
+
6
+ attr_reader :store, :similarity
7
+
8
+ # :store should be a String or some kind of Lucene::Store::Directory
9
+ def initialize(options = {})
10
+ @store = options[:store] || Lucene::Store::RAMDirectory.new
11
+ @inspect = options[:inspect]
12
+ end
13
+
14
+ # The source should be enumerable.
15
+ def index(source, optimize=true)
16
+ IndexWriter.open(@store, analyzer) do |writer|
17
+ writer.set_similarity(@similarity.new) if @similarity
18
+
19
+ source.each_with_index do |record, i|
20
+ doc = doc_from(record)
21
+ writer.add_document(doc) if doc
22
+ Moonstone::Logger.info "Indexed #{i+1} records" if (i+1)%1000 == 0
23
+ end
24
+ writer.optimize if optimize
25
+ yield writer if block_given? #For post-processing stuff where you still need access to the writer
26
+ end
27
+ refresh_searcher
28
+ end
29
+
30
+ def stamp_metadata
31
+ metadata = Lucene::Document::Doc.new
32
+ metadata.add_field 'metadata', 'index', :index => :not_analyzed
33
+ metadata.add_field 'build_date', Date.today.strftime("%Y-%m-%d"), :index => false
34
+ metadata.add_field 'engine_name', self.class.name, :index => false
35
+ metadata.add_field 'engine_version', `git show-ref -s --abbrev HEAD`.chomp, :index => false
36
+ metadata.add_field 'query_conditions', ENV['query_conditions'].to_s, :index => false
37
+ writer do |w|
38
+ w.add_document(metadata)
39
+ end
40
+ end
41
+
42
+ def index_metadata
43
+ query = TermQuery.new 'metadata', 'index'
44
+ @index_metadata ||= search(query).last
45
+ end
46
+
47
+ def doc_count
48
+ @reader ||= IndexReader.open(@store)
49
+ @reader.max_doc
50
+ end
51
+
52
+ def document(id)
53
+ @reader ||= IndexReader.open(@store)
54
+ if id < @reader.max_doc
55
+ doc = @reader.document(id)
56
+ doc.tokens = tokens_for_doc(id)
57
+ doc.id = id
58
+ doc
59
+ end
60
+ end
61
+
62
+ # Adds docs to index. docs must be an enumerable set of such objects that doc_from can turn into a document
63
+ def insert_documents(source, optimize=false)
64
+ index(source, optimize)
65
+ refresh_searcher
66
+ end
67
+
68
+ def insert_document(source, optimize=false)
69
+ insert_documents([source], optimize)
70
+ end
71
+
72
+ # docs must be enumerable set of hashes, with fields
73
+ # :field, :value, :document
74
+ # (where field and value combine to make a term to match documents to replace)
75
+ def update_documents(docs)
76
+ IndexWriter.open(@store, analyzer) do |writer|
77
+ writer.set_similarity(@similarity.new) if @similarity
78
+ docs.each do |doc|
79
+ raise "Invalid arguments" unless doc[:field] && doc[:value] && doc[:document]
80
+ term = Term.new(doc[:field], doc[:value])
81
+ document = doc_from(doc[:document])
82
+ writer.updateDocument(term, document)
83
+ end
84
+ end
85
+ refresh_searcher
86
+ end
87
+
88
+ def update_document(doc)
89
+ update_documents([doc])
90
+ end
91
+
92
+ # terms should be an enumerable set of hashes, with fields
93
+ # :field and :value, which combine to make a term to match documents to delete
94
+ def delete_documents(terms)
95
+ IndexWriter.open(@store, analyzer) do |writer|
96
+ terms.each do |t|
97
+ term = Term.new(t[:field], t[:value])
98
+ writer.deleteDocuments(term)
99
+ end
100
+ end
101
+ refresh_searcher
102
+ end
103
+
104
+ def delete_document(term)
105
+ delete_documents([term])
106
+ end
107
+
108
+ # Takes any kind of input object parsable by your #create_query method. Quack.
109
+ # Options patterns (see javadoc for org.apache.lucene.search.Searcher):
110
+ # Returns a TopDocs object
111
+ # Note that Hits is deprecated so the versions of search() returning a Hits object are not implemented
112
+ def search(input, options = {})
113
+ query = input.kind_of?(Lucene::Search::Query) ? input : create_query(input)
114
+ @searcher ||= IndexSearcher.new(@store)
115
+ top_docs = if (hit_collector = options[:hit_collector])
116
+ args = [ options[:filter], hit_collector ].compact
117
+ @searcher.search(query, *args)
118
+ hit_collector.topDocs
119
+ else
120
+ options[:limit] ||= 25
121
+ options[:offset] ||= 0
122
+ args = [ options[:filter], (options[:limit] + options[:offset]) ] #Always include both of these, even if nil
123
+ args << options[:sort] if options[:sort]
124
+ @searcher.search(query, *args).offset!(options[:offset])
125
+ end
126
+ top_docs.each(@searcher) do |doc|
127
+ doc.tokens = self.tokens_for_doc(doc) if inspect_mode?
128
+ yield doc if block_given?
129
+ end
130
+ top_docs
131
+ end
132
+
133
+ #Reopen the searcher (used when the index has changed)
134
+ def refresh_searcher
135
+ @searcher = IndexSearcher.new(@store) if @searcher #If it's nil, it'll get lazy loaded
136
+ end
137
+
138
+ def close
139
+ @searcher.close if @searcher
140
+ @reader.close if @reader
141
+ end
142
+
143
+ # Returns an instance of the Analyzer class defined within
144
+ # this class's namespace.
145
+ def analyzer
146
+ @analyzer ||= self.class::Analyzer.new
147
+ end
148
+
149
+ # Opens an IndexWriter for the duration of the block.
150
+ # engine.writer { |w| w.add_document(doc) }
151
+ def writer
152
+ IndexWriter.open(@store, self.class::Analyzer.new) do |writer|
153
+ writer.set_similarity(@similarity.new) if @similarity
154
+ yield writer
155
+ end
156
+ end
157
+
158
+ # Opens an IndexSearcher for the duration of the block.
159
+ # engine.searcher { |s| s.search(query_object) }
160
+ def searcher
161
+ IndexSearcher.open(@store) do |searcher|
162
+ searcher.set_similarity(@similarity.new) if @similarity
163
+ yield searcher
164
+ end
165
+ end
166
+
167
+ # Opens an IndexReader for the duration of the block.
168
+ # engine.reader { |r| r.terms }
169
+ def reader
170
+ reader = IndexReader.open(@store)
171
+ yield reader
172
+ reader.close
173
+ end
174
+
175
+
176
+ def parser(field, analyzer = nil)
177
+ @parser ||= {}
178
+ @parser[field.to_sym] ||= Lucene::QueryParser::Parser.new(field, analyzer || self.analyzer)
179
+ end
180
+
181
+ def inspect_mode?
182
+ @inspect
183
+ end
184
+
185
+ end
186
+ end
@@ -0,0 +1,30 @@
1
+ module Moonstone
2
+ class Filter < Lucene::Analysis::TokenFilter
3
+
4
+ def initialize(stream)
5
+ if block_given?
6
+ self.class.module_eval do
7
+ define_method :process do |token|
8
+ yield token
9
+ end
10
+ end
11
+ end
12
+ super
13
+ @stream = stream
14
+ end
15
+
16
+ def next(token=nil)
17
+ if token = (token ? @stream.next(token) : @stream.next)
18
+ text = process(token.term_text)
19
+ # skip a token if its text is empty
20
+ if text.empty?
21
+ token = self.next(token)
22
+ else
23
+ token.term_text = text
24
+ token
25
+ end
26
+ end
27
+ end
28
+
29
+ end
30
+ end
@@ -0,0 +1,26 @@
1
+ module Moonstone
2
+
3
+ module Filters
4
+
5
+ class Synonymer < Moonstone::QueuedFilter
6
+
7
+ def initialize(stream, synonym_hash)
8
+ @synonym_hash = synonym_hash
9
+ super(stream)
10
+ end
11
+
12
+ def process(text)
13
+ if syns = @synonym_hash[text]
14
+ if syns.is_a?(String)
15
+ [text, syns]
16
+ elsif syns.is_a?(Array)
17
+ [text].concat syns
18
+ end
19
+ else
20
+ text
21
+ end
22
+ end
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,38 @@
1
+ #Methods to assist in index analysis
2
+ module Moonstone
3
+ class Engine
4
+
5
+ #Return a hash of tokens, keyed on field name, for the given doc.
6
+ # Doc can be either a Document, or the integer document id.
7
+ # Note that if it is a Document, doc.id cannot be nil
8
+ def tokens_for_doc(doc, fields = nil)
9
+ tokens = {}
10
+ self.reader do |reader|
11
+ unless doc.kind_of?(Lucene::Document::Doc)
12
+ doc_id = doc
13
+ doc = reader.document(doc)
14
+ doc.id = doc_id
15
+ end
16
+ fields = doc.keys if fields.nil?
17
+ fields.each do |field|
18
+ tokens[field] = []
19
+ tfv = reader.getTermFreqVector(doc.id, field)
20
+ if tfv && tfv.size > 0 && tfv.respond_to?(:getTermPositions)
21
+ tv = tfv.getTerms
22
+ tv.length.times do |i|
23
+ positions = tfv.getTermPositions(i) || []
24
+ positions.each { |pos| tokens[field][pos] = tv[i]}
25
+ end
26
+ end
27
+ end
28
+ end
29
+ tokens
30
+ end
31
+
32
+ #Helper, delegates to tokens_for_doc
33
+ def tokens_for_field(doc, field)
34
+ tokens_for_doc(doc, [field])[field]
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,25 @@
1
+ module Moonstone
2
+ class MultiAnalyzer < Lucene::Analysis::Analyzer
3
+ attr_accessor :fields
4
+
5
+ # Moonstone::MultiAnalyzer.new :name => [KeywordTokenizer, SynonymFilter],
6
+ # :categories => [WhitespaceTokenizer, SynonymFilter, StemFilter]
7
+ def self.new(hash={})
8
+ analyzer = super()
9
+ analyzer.fields = hash
10
+ analyzer
11
+ end
12
+
13
+ def tokenStream(field_name, reader)
14
+ filter_chain = @fields[field_name.to_sym] || @fields[true]
15
+ tokenizer, *args = filter_chain[0]
16
+ stream = tokenizer.new(reader, *args)
17
+ filter_chain.slice(1..-1).each do |filter|
18
+ klass, *args = filter
19
+ stream = klass.new(stream, *args)
20
+ end
21
+ stream
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,45 @@
1
+ module Moonstone
2
+
3
+ class QueuedFilter < Moonstone::Filter
4
+
5
+ def initialize(stream)
6
+ @buffer = []
7
+ super
8
+ end
9
+
10
+ def read_buffer(token=nil)
11
+ if item = @buffer.shift
12
+ if item.is_a? String
13
+ token ||= Lucene::Analysis::Token.new
14
+ token.term_text = item
15
+ token
16
+ else
17
+ raise "What have you done?"
18
+ end
19
+ end
20
+ end
21
+
22
+ def next(token=nil)
23
+ if t = read_buffer(token)
24
+ t
25
+ elsif token = (token ? @stream.next(token) : @stream.next)
26
+ results = process(token.term_text)
27
+ if results.is_a? Array
28
+ text = results.shift
29
+ results.each { |t| @buffer << t }
30
+ else
31
+ text = results
32
+ end
33
+ # skip a token if its text is empty
34
+ if text && text.empty?
35
+ token = self.next(token)
36
+ else
37
+ token.term_text = text
38
+ token
39
+ end
40
+ end
41
+ end
42
+
43
+ end
44
+
45
+ end
@@ -0,0 +1,28 @@
1
+ require 'moonstone/racker'
2
+ module Moonstone
3
+ module Racker
4
+ module BasicSearch
5
+ include Moonstone::Racker
6
+
7
+ # GET /search.html?input=happiness
8
+ def html_GET_search(request)
9
+ results = search(request.params['input'], search_options(request))
10
+ results.join("\n<br>")
11
+ end
12
+
13
+ # GET /search.json?input=happiness
14
+ def json_GET_search(request)
15
+ results = search(request.params['input'], search_options(request))
16
+ results.to_json
17
+ end
18
+
19
+ # POST /search.json
20
+ def json_POST_search(request)
21
+ options = search_options(request)
22
+ data = request.env['rack.input'].read
23
+ JSON.parse(data).map { |input| search(input, options) }.to_json
24
+ end
25
+
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,33 @@
1
+ require 'moonstone/racker'
2
+ module Moonstone
3
+ module Racker
4
+ module LocalSearch
5
+ include Moonstone::Racker
6
+
7
+ def json_GET_search(request)
8
+ args = request.params.values_at('input', 'lat', 'lon')
9
+ options = search_options(request)
10
+ args << options
11
+ t = Time.now
12
+ results = search(*args).to_hash
13
+ results[:time] = Time.now - t
14
+ results.to_json
15
+ end
16
+
17
+ # JSON body should contain an array of 3-element arrays (topic, lat, lon)
18
+ # curl -i -X POST -d '[ ["plumbers", "", ""], ["burgers", "", ""] ]' \
19
+ # http://localhost:9292/search.json
20
+ def json_POST_search(request)
21
+ options = search_options(request)
22
+ data = request.env['rack.input'].read
23
+ JSON.parse(data).map do |input, lat, lon|
24
+ t = Time.now
25
+ results = search(input, lat, lon, options).to_hash
26
+ results[:time] = Time.now - t
27
+ results
28
+ end.to_json
29
+ end
30
+
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,84 @@
1
+ require 'rack'
2
+ require 'json'
3
+ module Moonstone
4
+ # include me in a Moonstone::Engine, maybe?
5
+ module Racker
6
+
7
+ PathMatcher = %r{^/([\w_]+)\.([\w_]+)$}
8
+
9
+ def call(env)
10
+ request, response = Rack::Request.new(env), Rack::Response.new
11
+ # Determine (or possibly fake) an HTTP method
12
+ real = request.request_method.upcase
13
+ http_method = if (real == 'POST') && (fake = request.params['_method'])
14
+ fake.upcase
15
+ else
16
+ real
17
+ end
18
+ # Match against a very limited species of URI path.
19
+ whole, action, ext = request.path_info.match(PathMatcher).to_a
20
+ # Poor man's content negotiation
21
+ content_type = case ext
22
+ when 'json'
23
+ 'application/json'
24
+ end
25
+ response['Content-Type'] = content_type if content_type
26
+ # Poor man's routing
27
+ method_name = action ? "#{ext || 'html'}_#{http_method}_#{action}" : nil
28
+ if method_name && respond_to?(method_name)
29
+ response.body = send(method_name, request).to_s
30
+ else
31
+ response.status, response.body = 404, "404"
32
+ end
33
+ response.finish
34
+ end
35
+
36
+ # helper for action methods
37
+ def search_options(request)
38
+ params = request.params
39
+ limit = params['limit']
40
+ offset = params['offset']
41
+ options = {}
42
+ options[:limit] = limit.to_i if limit
43
+ options[:offset] = offset.to_i if offset
44
+ options
45
+ end
46
+
47
+ def json_GET_engine_version(request)
48
+ { :name => self.class.name,
49
+ :version => `git show-ref -h -s --abbrev HEAD`.chomp.split.first
50
+ }.to_json
51
+ end
52
+
53
+ def json_GET_index_info(request)
54
+ md = index_metadata || {}
55
+ { :build_date => md["build_date"],
56
+ :build_engine => { :name => md["engine_name"],
57
+ :version => md["engine_version"]},
58
+ :query_conditions => md["query_conditions"],
59
+ :doc_count => doc_count
60
+ }.to_json
61
+ end
62
+
63
+ def json_GET_document(request)
64
+ document(request.params['id'].to_i).to_json
65
+ end
66
+
67
+ def self.generate_rackup_file(engine, store)
68
+ rackup = <<RACKUP
69
+ options[:Port] = 9293
70
+ #{yield}
71
+ require 'moonstone/racker/local_search'
72
+ #{engine}.module_eval do
73
+ include Moonstone::Racker::LocalSearch
74
+ end
75
+ run #{engine}.new(:store => "#{File.expand_path store}")
76
+ RACKUP
77
+
78
+ File.open "#{File.dirname(store)}/config.ru", "w" do |f|
79
+ f.puts rackup
80
+ end
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,19 @@
1
+ module Moonstone
2
+ class Tokenizer < Lucene::Analysis::Tokenizer
3
+
4
+ include Lucene::Analysis
5
+
6
+ def initialize(reader)
7
+ @reader = java.io.BufferedReader.new(reader)
8
+ end
9
+
10
+ # No, this is not terribly useful. Subclass me already.
11
+ def next(token=nil)
12
+ token = (token ? token.clear : Token.new)
13
+ token.set_term_text @reader.read_line
14
+ token.set_start_offset 1
15
+ token.set_end_offset 1
16
+ end
17
+
18
+ end
19
+ end
data/lib/moonstone.rb ADDED
@@ -0,0 +1,28 @@
1
+ require 'java'
2
+ require 'logger'
3
+ require 'json'
4
+
5
+ $:.unshift(here = File.dirname(__FILE__))
6
+
7
+ Dir["#{here}/jar/*.jar"].each { |jar| require jar }
8
+ require 'lucene/analysis'
9
+ require 'lucene/document'
10
+ require 'lucene/function'
11
+ require 'lucene/index'
12
+ require 'lucene/query_parser'
13
+ require 'lucene/search'
14
+ require 'lucene/store'
15
+
16
+ require 'moonstone/engine'
17
+ require 'moonstone/tokenizer'
18
+ require 'moonstone/filter'
19
+ require 'moonstone/queued_filter'
20
+ require 'moonstone/analyzer'
21
+ require 'moonstone/multi_analyzer'
22
+ require 'moonstone/index_inspection'
23
+
24
+ require 'moonstone/filters/synonymer.rb'
25
+
26
+ require 'moonstone/racker'
27
+
28
+ Moonstone::Logger = Logger.new($stderr) unless defined? Moonstone::Logger
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ extensions: []
3
+
4
+ homepage:
5
+ executables: []
6
+
7
+ version: !ruby/object:Gem::Version
8
+ version: 0.6.0
9
+ post_install_message:
10
+ date: 2009-06-16 07:00:00 +00:00
11
+ files:
12
+ - lib/jar
13
+ - lib/lucene
14
+ - lib/moonstone
15
+ - lib/moonstone.rb
16
+ - lib/jar/lucene-core-2.4.0.jar
17
+ - lib/jar/lucene-spellchecker-2.4-dev.jar
18
+ - lib/lucene/analysis.rb
19
+ - lib/lucene/document.rb
20
+ - lib/lucene/function.rb
21
+ - lib/lucene/index.rb
22
+ - lib/lucene/query_parser.rb
23
+ - lib/lucene/search.rb
24
+ - lib/lucene/store.rb
25
+ - lib/moonstone/analyzer.rb
26
+ - lib/moonstone/engine.rb
27
+ - lib/moonstone/filter.rb
28
+ - lib/moonstone/filters
29
+ - lib/moonstone/index_inspection.rb
30
+ - lib/moonstone/multi_analyzer.rb
31
+ - lib/moonstone/queued_filter.rb
32
+ - lib/moonstone/racker
33
+ - lib/moonstone/racker.rb
34
+ - lib/moonstone/tokenizer.rb
35
+ - lib/moonstone/filters/synonymer.rb
36
+ - lib/moonstone/racker/basic_search.rb
37
+ - lib/moonstone/racker/local_search.rb
38
+ - Rakefile
39
+ rubygems_version: 1.3.1
40
+ rdoc_options: []
41
+
42
+ signing_key:
43
+ cert_chain: []
44
+
45
+ name: moonstone
46
+ has_rdoc: true
47
+ platform: ruby
48
+ summary: Moonstone Agile Search Framework
49
+ default_executable:
50
+ bindir: bin
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ version:
53
+ requirements:
54
+ - - '>='
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ version:
59
+ requirements:
60
+ - - '>='
61
+ - !ruby/object:Gem::Version
62
+ version: "0"
63
+ require_paths:
64
+ - lib
65
+ specification_version: 2
66
+ test_files: []
67
+
68
+ dependencies:
69
+ - !ruby/object:Gem::Dependency
70
+ type: :runtime
71
+ name: rspec
72
+ version_requirement:
73
+ version_requirements: !ruby/object:Gem::Requirement
74
+ version:
75
+ requirements:
76
+ - - '>='
77
+ - !ruby/object:Gem::Version
78
+ version: "0"
79
+ description:
80
+ email: self@automatthew.com
81
+ authors:
82
+ - Matthew King
83
+ - Jason Rush
84
+ - Jay Donnell
85
+ - Dan Yoder
86
+ extra_rdoc_files: []
87
+
88
+ requirements: []
89
+
90
+ rubyforge_project:
91
+ autorequire: