solrium 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. data/LICENSE.txt +712 -0
  2. data/README.markdown +51 -0
  3. data/Rakefile +24 -0
  4. data/VERSION.yml +4 -0
  5. data/bin/solr +9 -0
  6. data/lib/jars/jetty/ant-1.6.5.jar +0 -0
  7. data/lib/jars/jetty/commons-codec-1.3.jar +0 -0
  8. data/lib/jars/jetty/commons-fileupload-1.2.jar +0 -0
  9. data/lib/jars/jetty/jetty-7.0.0.pre5.jar +0 -0
  10. data/lib/jars/jetty/jetty-util-7.0.0.pre5.jar +0 -0
  11. data/lib/jars/jetty/jsp-2.1.jar +0 -0
  12. data/lib/jars/jetty/jsp-api-2.1.jar +0 -0
  13. data/lib/jars/jetty/servlet-api-3.0.pre4.jar +0 -0
  14. data/lib/jars/lucene/lucene-analyzers-2.4-dev.jar +0 -0
  15. data/lib/jars/lucene/lucene-core-2.4-dev.jar +0 -0
  16. data/lib/jars/lucene/lucene-highlighter-2.4-dev.jar +0 -0
  17. data/lib/jars/lucene/lucene-memory-2.4-dev.jar +0 -0
  18. data/lib/jars/lucene/lucene-queries-2.4-dev.jar +0 -0
  19. data/lib/jars/lucene/lucene-snowball-2.4-dev.jar +0 -0
  20. data/lib/jars/lucene/lucene-spellchecker-2.4-dev.jar +0 -0
  21. data/lib/jars/solr/apache-solr-common-1.3.0.jar +0 -0
  22. data/lib/jars/solr/apache-solr-core-1.3.0.jar +0 -0
  23. data/lib/jars/solr/commons-io-1.2.jar +0 -0
  24. data/lib/jars/solr/stax-1.2.0-dev.jar +0 -0
  25. data/lib/jars/solr/stax-api-1.0.jar +0 -0
  26. data/lib/jars/solr/stax-utils.jar +0 -0
  27. data/lib/jars/solr/xpp3-1.1.3.4.O.jar +0 -0
  28. data/lib/jetty.rb +83 -0
  29. data/lib/lucene.rb +33 -0
  30. data/lib/queries.rb +60 -0
  31. data/lib/remote.rb +22 -0
  32. data/lib/solr.rb +69 -0
  33. data/lib/solr_ext.rb +81 -0
  34. data/lib/webapp/admin/_info.jsp +110 -0
  35. data/lib/webapp/admin/action.jsp +116 -0
  36. data/lib/webapp/admin/analysis.jsp +456 -0
  37. data/lib/webapp/admin/analysis.xsl +179 -0
  38. data/lib/webapp/admin/distributiondump.jsp +158 -0
  39. data/lib/webapp/admin/favicon.ico +0 -0
  40. data/lib/webapp/admin/form.jsp +129 -0
  41. data/lib/webapp/admin/get-file.jsp +72 -0
  42. data/lib/webapp/admin/get-properties.jsp +24 -0
  43. data/lib/webapp/admin/header.jsp +41 -0
  44. data/lib/webapp/admin/index.jsp +154 -0
  45. data/lib/webapp/admin/jquery-1.2.3.min.js +32 -0
  46. data/lib/webapp/admin/logging.jsp +50 -0
  47. data/lib/webapp/admin/logging.xsl +91 -0
  48. data/lib/webapp/admin/meta.xsl +34 -0
  49. data/lib/webapp/admin/ping.jsp +65 -0
  50. data/lib/webapp/admin/ping.xsl +71 -0
  51. data/lib/webapp/admin/raw-schema.jsp +38 -0
  52. data/lib/webapp/admin/registry.jsp +107 -0
  53. data/lib/webapp/admin/registry.xsl +287 -0
  54. data/lib/webapp/admin/schema.jsp +661 -0
  55. data/lib/webapp/admin/solr-admin.css +206 -0
  56. data/lib/webapp/admin/solr-head.gif +0 -0
  57. data/lib/webapp/admin/solr-head.png +0 -0
  58. data/lib/webapp/admin/solr-lowercase.gif +0 -0
  59. data/lib/webapp/admin/solr-lowercase.png +0 -0
  60. data/lib/webapp/admin/stats.jsp +94 -0
  61. data/lib/webapp/admin/stats.xsl +220 -0
  62. data/lib/webapp/admin/tabular.xsl +141 -0
  63. data/lib/webapp/admin/threaddump.jsp +110 -0
  64. data/lib/webapp/admin/threaddump.xsl +103 -0
  65. data/lib/webapp/favicon.ico +0 -0
  66. data/lib/webapp/index.jsp +42 -0
  67. metadata +120 -0
data/README.markdown ADDED
@@ -0,0 +1,51 @@
1
+ # Solrium
2
+
3
+ A JRuby integration layer for Apache Solr/Lucene with a focus on usage from within irb.
4
+
5
+ ## Features
6
+
7
+ * embed solr directly into your app
8
+ * convenience methods for searching using ruby idioms
9
+ * jetty integration
10
+ * bundles all necessary jars (solr, lucene & jetty)
11
+
12
+ I mainly wrote it as a tool for quickly prototyping/debugging lucene queries and solr parser plugins.
13
+
14
+ Note: there's limited support for remote solr queries, searching will done in-process using a [SolrIndexSearcher](http://lucene.apache.org/solr/api/org/apache/solr/search/SolrIndexSearcher.html]), bypassing the HTTP layer. If you need to do remote queries in ruby consider using something like [sunspot](http://github.com/outoftime/sunspot) instead.
15
+
16
+ ## Synposis
17
+
18
+ ### Embedding solr into jirb
19
+
20
+ $ jirb -r lib/solr.rb
21
+ irb> solr = Solrium::Solr.new('/path/to/solr_home')
22
+ irb> solr.search("foo")
23
+ => [[Document: {"id"=>"1", "name_t"=>"Foomatic"}, 1.2112]]
24
+
25
+ ### Embedding jetty
26
+
27
+ $ jirb -r lib/solr.rb
28
+ irb> Solrium::Jetty.new('/path/to_solr_home').start(2000).join
29
+ 2009-05-14 13:47:24.444::INFO: jetty-7.0.0.pre5
30
+ ....
31
+ INFO: SolrDispatchFilter.init() done
32
+ 2009-05-14 13:47:25.336::INFO: Started SelectChannelConnector@0.0.0.0:2000
33
+
34
+ ## Credits
35
+
36
+ Lucene, Solr, Jetty are all licensed under Apache License Version 2.0.
37
+
38
+ ## Contact
39
+
40
+ Jan Berkel <jan.berkel@gmail.com>
41
+
42
+ ## License
43
+
44
+ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
45
+ You may obtain a copy of the License at
46
+
47
+ http://www.apache.org/licenses/LICENSE-2.0
48
+
49
+ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
50
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
51
+ and limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,24 @@
1
+ require 'rubygems'
2
+ require 'rake/testtask'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gemspec|
7
+ gemspec.name = "solrium"
8
+ gemspec.summary = "TODO"
9
+ gemspec.email = "jan.berkel@gmail.com"
10
+ gemspec.homepage = "http://github.com/jberkel/solrium"
11
+ gemspec.description = "A JRuby integration layer for Apache Solr/Lucene with a focus on irb integration."
12
+ gemspec.authors = ["Jan Berkel"]
13
+ end
14
+ rescue LoadError
15
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
16
+ end
17
+
18
+
19
+ Rake::TestTask.new do |t|
20
+ t.libs << "lib"
21
+ t.test_files = FileList['test/*_test.rb']
22
+ t.verbose = true
23
+ end
24
+
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :major: 0
3
+ :minor: 1
4
+ :patch: 0
data/bin/solr ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env jruby
2
+
3
+ require 'java'
4
+ #require 'solr'
5
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'jetty')
6
+
7
+ if __FILE__ == $0
8
+ Solrium::Jetty.new('solr-instance').start(8000).join
9
+ end
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
data/lib/jetty.rb ADDED
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env jruby
2
+
3
+ require 'java'
4
+
5
+ Dir[File.join(File.dirname(__FILE__), 'jars', 'jetty', '*.jar')].each { |jar| require jar }
6
+ Dir[File.join(File.dirname(__FILE__), 'jars', 'solr', '*.jar')].each { |jar| require jar }
7
+ Dir[File.join(File.dirname(__FILE__), 'jars', 'lucene', '*.jar')].each { |jar| require jar }
8
+
9
+ module Solrium
10
+ class Jetty
11
+ include_class 'org.mortbay.jetty.Server'
12
+ include_class 'org.mortbay.jetty.servlet.Context'
13
+ include_class 'org.mortbay.jetty.servlet.ServletHolder'
14
+ include_class 'org.mortbay.jetty.servlet.DefaultServlet'
15
+ include_class 'org.mortbay.jetty.handler.ResourceHandler'
16
+ include_class 'org.mortbay.jetty.handler.MovedContextHandler'
17
+ include_class 'org.mortbay.jetty.handler.HandlerList'
18
+ include_class 'org.apache.solr.servlet.SolrServlet'
19
+ include_class 'org.apache.solr.servlet.SolrUpdateServlet'
20
+ include_class 'org.apache.jasper.servlet.JspServlet'
21
+ include_class 'org.mortbay.thread.QueuedThreadPool'
22
+ include_class 'org.mortbay.jetty.nio.SelectChannelConnector'
23
+ include_class 'org.mortbay.jetty.Handler'
24
+
25
+
26
+ def initialize(dir='solr-instance')
27
+ Java::JavaLang::System.setProperty("solr.solr.home", dir)
28
+ end
29
+
30
+ def start(port=8000, servlet_path='/solr', admin_interface=true)
31
+ server = create_server(port)
32
+ context = create_solr_context(servlet_path, admin_interface)
33
+
34
+ hl = HandlerList.new
35
+ hl.add_handler(context)
36
+ MovedContextHandler.new(hl, '/', "#{servlet_path}/admin") if admin_interface
37
+
38
+ server.set_handler(hl)
39
+ server.start
40
+
41
+ at_exit do
42
+ server.stop rescue nil
43
+ end
44
+ server
45
+ end
46
+
47
+ private
48
+ def create_server(port)
49
+ server = Server.new
50
+
51
+ thread_pool = QueuedThreadPool.new
52
+ thread_pool.min_threads = 5
53
+ thread_pool.max_threads = 50
54
+ server.set_thread_pool(thread_pool)
55
+
56
+ #use the more efficient NIO connector
57
+ connector = SelectChannelConnector.new
58
+ connector.port = port
59
+
60
+ server.add_connector(connector)
61
+ server
62
+ end
63
+
64
+ def create_solr_context(path, admin_interface=true)
65
+ context = Context.new(nil, path, Context::SESSIONS)
66
+ context.add_servlet(ServletHolder.new(DefaultServlet.new), "/")
67
+ context.add_servlet(ServletHolder.new(JspServlet.new), "*.jsp")
68
+ context.add_filter('org.apache.solr.servlet.SolrDispatchFilter', '/*', Handler::DEFAULT)
69
+ context.set_mime_types(Java::org.mortbay.jetty.MimeTypes.new)
70
+ context.mime_types.add_mime_mapping('xsl', 'application/xslt+xml')
71
+
72
+ if admin_interface
73
+ jsp_servlet = ServletHolder.new(JspServlet.new)
74
+ jsp_servlet.set_forced_path('/admin/ping.jsp')
75
+
76
+ context.add_servlet(jsp_servlet, '/admin/ping')
77
+ context.set_resource_base(File.join(File.dirname(__FILE__), 'webapp'))
78
+ end
79
+
80
+ context
81
+ end
82
+ end
83
+ end
data/lib/lucene.rb ADDED
@@ -0,0 +1,33 @@
1
+ require 'java'
2
+ require File.join(File.dirname(__FILE__), 'queries')
3
+
4
+ module Solrium
5
+
6
+ # simple lucene integration, bypassing solr completely
7
+ class Lucene
8
+ include Queries
9
+
10
+ include_class 'org.apache.lucene.queryParser.QueryParser'
11
+ include_class 'org.apache.lucene.analysis.standard.StandardAnalyzer'
12
+ include_class 'org.apache.lucene.search.IndexSearcher'
13
+
14
+ attr_accessor :index_path
15
+
16
+ def initialize(solr_home = "solr-instance")
17
+ @index_path = File.join(solr_home, 'data', 'index')
18
+ end
19
+
20
+ def analyzer
21
+ StandardAnalyzer.new
22
+ end
23
+
24
+ def with_searcher(&block)
25
+ searcher = IndexSearcher.new(@index_path)
26
+ begin
27
+ block.call(searcher)
28
+ ensure
29
+ searcher.close
30
+ end
31
+ end
32
+ end
33
+ end
data/lib/queries.rb ADDED
@@ -0,0 +1,60 @@
1
+ require 'java'
2
+
3
+ require File.join(File.dirname(__FILE__), 'solr_ext')
4
+
5
+ module Solrium
6
+ module Queries
7
+
8
+ include_class 'org.apache.lucene.queryParser.QueryParser'
9
+ include_class 'org.apache.lucene.search.MultiPhraseQuery'
10
+ include_class 'org.apache.lucene.search.WildcardTermEnum'
11
+
12
+ include_class 'org.apache.lucene.search.BooleanQuery'
13
+ include_class 'org.apache.lucene.search.WildcardQuery'
14
+ include_class 'org.apache.lucene.search.TermQuery'
15
+
16
+
17
+ # convenience search method
18
+ def search(q, max=10, &block)
19
+ q = q.is_a?(org.apache.lucene.search.Query) ? q : parse(q.to_s)
20
+
21
+ block ||= lambda do |doc, score, searcher|
22
+ [doc, score]
23
+ end
24
+
25
+ with_searcher do |searcher|
26
+ searcher.search(q, nil, max).scoreDocs.map do |sd|
27
+ block.call(searcher.doc(sd.doc), sd.score, searcher)
28
+ end
29
+ end
30
+ end
31
+
32
+ def parse(q, default='default')
33
+ QueryParser.new(default, analyzer).parse(q)
34
+ end
35
+
36
+ #a wildcard phrase query
37
+ def wildcard_pq(*terms)
38
+ return MultiPhraseQuery.new if terms.empty?
39
+
40
+ if terms.size == 1
41
+ #only one term, do simple term+wildcard query
42
+ bq = BooleanQuery.new
43
+ bq.add(TermQuery.new(Term[terms[0]]), org.apache.lucene.search.BooleanClause::Occur::SHOULD)
44
+ bq.add(WildcardQuery.new(Term[terms[0] + "*"]), org.apache.lucene.search.BooleanClause::Occur::SHOULD)
45
+ return bq
46
+ end
47
+
48
+ mpq = MultiPhraseQuery.new
49
+
50
+ terms[0..-2].each { |t| mpq.add(Term[t]) }
51
+
52
+ with_searcher do |s|
53
+ completions = WildcardTermEnum.new(s.reader, Term["#{terms[-1]}*"]).to_java_array
54
+ return MultiPhraseQuery.new if completions.length == 0
55
+ mpq.add(completions)
56
+ end
57
+ mpq
58
+ end
59
+ end
60
+ end
data/lib/remote.rb ADDED
@@ -0,0 +1,22 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'cgi'
4
+
5
+ module Solrium
6
+ module Remote
7
+ extend self
8
+
9
+ def query(q, params={}, port=8000)
10
+ params['q'] = q
11
+ params['wt'] = 'ruby'
12
+
13
+ url = URI.parse("http://localhost:#{port}")
14
+ res = Net::HTTP.start(url.host, url.port) do |http|
15
+ http.get("/solr/select?" + params.map { |k,v| "#{k}=#{CGI.escape(v)}" }.join("&"))
16
+ end
17
+ res.value
18
+ eval(res.body)
19
+ end
20
+
21
+ end
22
+ end
data/lib/solr.rb ADDED
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env jruby
2
+ require 'java'
3
+ require 'fileutils'
4
+
5
+ Dir[File.join(File.dirname(__FILE__), 'jars', 'lucene', '*.jar')].each { |jar| require jar }
6
+ Dir[File.join(File.dirname(__FILE__), 'jars', 'solr', '*.jar')].each { |jar| require jar }
7
+
8
+ require File.join(File.dirname(__FILE__), 'solr_ext')
9
+ require File.join(File.dirname(__FILE__), 'lucene')
10
+ require File.join(File.dirname(__FILE__), 'queries')
11
+ require File.join(File.dirname(__FILE__), 'remote')
12
+ require File.join(File.dirname(__FILE__), 'jetty')
13
+
14
+ module Solrium
15
+ #wrapper for SolrCore
16
+ class Solr
17
+ include Queries
18
+ include Remote
19
+
20
+ include_class 'org.apache.solr.core.SolrCore'
21
+ include_class 'org.apache.solr.core.SolrConfig'
22
+
23
+ attr_accessor :config, :core
24
+
25
+ def initialize(solr_home='solr-instance')
26
+ raise "solr_home #{solr_home} not found" unless File.directory?(solr_home)
27
+
28
+ Java::JavaLang::System.setProperty("solr.solr.home", solr_home)
29
+
30
+ yield self if block_given?
31
+
32
+ @config = SolrConfig.new
33
+ @core = SolrCore.new('core', nil, @config, nil, nil)
34
+
35
+ at_exit do
36
+ @core.close rescue nil
37
+ end
38
+ end
39
+
40
+ def close
41
+ core.close
42
+ end
43
+
44
+ def logger(which="")
45
+ java.util.logging.Logger.getLogger(which)
46
+ end
47
+
48
+ def analyzer
49
+ core.schema.analyzer
50
+ end
51
+
52
+ def with_searcher(&block)
53
+ searcher_ref = core.searcher
54
+ searcher = searcher_ref.get
55
+ begin
56
+ block.call(searcher)
57
+ ensure
58
+ searcher_ref.decref
59
+ end
60
+ end
61
+
62
+ def self.template_dir; File.join(File.dirname(__FILE__), '..', 'solr-instance/conf'); end
63
+
64
+ def self.init_solr_dir(dest)
65
+ raise "#{dest} is not a dir" unless File.directory?(dest)
66
+ FileUtils.cp_r template_dir, dest
67
+ end
68
+ end
69
+ end
data/lib/solr_ext.rb ADDED
@@ -0,0 +1,81 @@
1
+ # make lucene/solr a bit more rubyis
2
+ module Solrium
3
+ include_class 'org.apache.lucene.index.TermEnum'
4
+ include_class 'org.apache.lucene.index.Term'
5
+ include_class 'org.apache.lucene.document.Document'
6
+ include_class 'org.apache.lucene.document.Field'
7
+ include_class 'org.apache.lucene.document.Field'
8
+
9
+ include_class 'org.apache.lucene.search.Query'
10
+ include_class 'org.apache.lucene.search.BooleanQuery'
11
+
12
+ #sugar, sugar... honey, honey.
13
+ Query.class_eval do
14
+ def |(other)
15
+ boolean_query(self, other, org.apache.lucene.search.BooleanClause::Occur::SHOULD)
16
+ end
17
+
18
+ def &(other)
19
+ boolean_query(self, other, org.apache.lucene.search.BooleanClause::Occur::MUST)
20
+ end
21
+
22
+ def boolean_query(a,b,occur)
23
+ bq = BooleanQuery.new
24
+ bq.add(a, occur)
25
+ bq.add(b, occur)
26
+ bq
27
+ end
28
+ end
29
+
30
+ Document.class_eval do
31
+ def [](n)
32
+ self.getField(n)
33
+ end
34
+
35
+ def inspect
36
+ "Document: " + to_hash.inspect
37
+ end
38
+
39
+ def to_hash
40
+ fields.inject({}) do |h, f|
41
+ h[f.name] = f.stringValue()
42
+ h
43
+ end
44
+ end
45
+ end
46
+
47
+ Field.class_eval do
48
+ def inspect
49
+ "#{name}: #{to_s}"
50
+ end
51
+
52
+ def to_s
53
+ self.stringValue()
54
+ end
55
+ end
56
+
57
+ # lets you use Term['foo'] ==> Term.new('default', foo)
58
+ Term.class_eval do
59
+ def self.[](t)
60
+ new('default', t)
61
+ end
62
+ end
63
+
64
+ #make TermEnum a bit friendlier to use
65
+ class TermEnum
66
+ include Enumerable
67
+
68
+ def each
69
+ while self.next()
70
+ yield(self.term())
71
+ end
72
+ end
73
+
74
+ def to_java_array
75
+ elements = to_a
76
+ array = java.lang.reflect.Array.newInstance(Term.java_class, elements.length)
77
+ elements.each_with_index { |e,i| array[i] = e }
78
+ array
79
+ end
80
+ end
81
+ end