solrium 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. data/LICENSE.txt +712 -0
  2. data/README.markdown +51 -0
  3. data/Rakefile +24 -0
  4. data/VERSION.yml +4 -0
  5. data/bin/solr +9 -0
  6. data/lib/jars/jetty/ant-1.6.5.jar +0 -0
  7. data/lib/jars/jetty/commons-codec-1.3.jar +0 -0
  8. data/lib/jars/jetty/commons-fileupload-1.2.jar +0 -0
  9. data/lib/jars/jetty/jetty-7.0.0.pre5.jar +0 -0
  10. data/lib/jars/jetty/jetty-util-7.0.0.pre5.jar +0 -0
  11. data/lib/jars/jetty/jsp-2.1.jar +0 -0
  12. data/lib/jars/jetty/jsp-api-2.1.jar +0 -0
  13. data/lib/jars/jetty/servlet-api-3.0.pre4.jar +0 -0
  14. data/lib/jars/lucene/lucene-analyzers-2.4-dev.jar +0 -0
  15. data/lib/jars/lucene/lucene-core-2.4-dev.jar +0 -0
  16. data/lib/jars/lucene/lucene-highlighter-2.4-dev.jar +0 -0
  17. data/lib/jars/lucene/lucene-memory-2.4-dev.jar +0 -0
  18. data/lib/jars/lucene/lucene-queries-2.4-dev.jar +0 -0
  19. data/lib/jars/lucene/lucene-snowball-2.4-dev.jar +0 -0
  20. data/lib/jars/lucene/lucene-spellchecker-2.4-dev.jar +0 -0
  21. data/lib/jars/solr/apache-solr-common-1.3.0.jar +0 -0
  22. data/lib/jars/solr/apache-solr-core-1.3.0.jar +0 -0
  23. data/lib/jars/solr/commons-io-1.2.jar +0 -0
  24. data/lib/jars/solr/stax-1.2.0-dev.jar +0 -0
  25. data/lib/jars/solr/stax-api-1.0.jar +0 -0
  26. data/lib/jars/solr/stax-utils.jar +0 -0
  27. data/lib/jars/solr/xpp3-1.1.3.4.O.jar +0 -0
  28. data/lib/jetty.rb +83 -0
  29. data/lib/lucene.rb +33 -0
  30. data/lib/queries.rb +60 -0
  31. data/lib/remote.rb +22 -0
  32. data/lib/solr.rb +69 -0
  33. data/lib/solr_ext.rb +81 -0
  34. data/lib/webapp/admin/_info.jsp +110 -0
  35. data/lib/webapp/admin/action.jsp +116 -0
  36. data/lib/webapp/admin/analysis.jsp +456 -0
  37. data/lib/webapp/admin/analysis.xsl +179 -0
  38. data/lib/webapp/admin/distributiondump.jsp +158 -0
  39. data/lib/webapp/admin/favicon.ico +0 -0
  40. data/lib/webapp/admin/form.jsp +129 -0
  41. data/lib/webapp/admin/get-file.jsp +72 -0
  42. data/lib/webapp/admin/get-properties.jsp +24 -0
  43. data/lib/webapp/admin/header.jsp +41 -0
  44. data/lib/webapp/admin/index.jsp +154 -0
  45. data/lib/webapp/admin/jquery-1.2.3.min.js +32 -0
  46. data/lib/webapp/admin/logging.jsp +50 -0
  47. data/lib/webapp/admin/logging.xsl +91 -0
  48. data/lib/webapp/admin/meta.xsl +34 -0
  49. data/lib/webapp/admin/ping.jsp +65 -0
  50. data/lib/webapp/admin/ping.xsl +71 -0
  51. data/lib/webapp/admin/raw-schema.jsp +38 -0
  52. data/lib/webapp/admin/registry.jsp +107 -0
  53. data/lib/webapp/admin/registry.xsl +287 -0
  54. data/lib/webapp/admin/schema.jsp +661 -0
  55. data/lib/webapp/admin/solr-admin.css +206 -0
  56. data/lib/webapp/admin/solr-head.gif +0 -0
  57. data/lib/webapp/admin/solr-head.png +0 -0
  58. data/lib/webapp/admin/solr-lowercase.gif +0 -0
  59. data/lib/webapp/admin/solr-lowercase.png +0 -0
  60. data/lib/webapp/admin/stats.jsp +94 -0
  61. data/lib/webapp/admin/stats.xsl +220 -0
  62. data/lib/webapp/admin/tabular.xsl +141 -0
  63. data/lib/webapp/admin/threaddump.jsp +110 -0
  64. data/lib/webapp/admin/threaddump.xsl +103 -0
  65. data/lib/webapp/favicon.ico +0 -0
  66. data/lib/webapp/index.jsp +42 -0
  67. metadata +120 -0
data/README.markdown ADDED
@@ -0,0 +1,51 @@
1
+ # Solrium
2
+
3
+ A JRuby integration layer for Apache Solr/Lucene with a focus on usage from within irb.
4
+
5
+ ## Features
6
+
7
+ * embed solr directly into your app
8
+ * convenience methods for searching using ruby idioms
9
+ * jetty integration
10
+ * bundles all necessary jars (solr, lucene & jetty)
11
+
12
+ I mainly wrote it as a tool for quickly prototyping/debugging lucene queries and solr parser plugins.
13
+
14
+ Note: there's limited support for remote solr queries, searching will done in-process using a [SolrIndexSearcher](http://lucene.apache.org/solr/api/org/apache/solr/search/SolrIndexSearcher.html]), bypassing the HTTP layer. If you need to do remote queries in ruby consider using something like [sunspot](http://github.com/outoftime/sunspot) instead.
15
+
16
+ ## Synposis
17
+
18
+ ### Embedding solr into jirb
19
+
20
+ $ jirb -r lib/solr.rb
21
+ irb> solr = Solrium::Solr.new('/path/to/solr_home')
22
+ irb> solr.search("foo")
23
+ => [[Document: {"id"=>"1", "name_t"=>"Foomatic"}, 1.2112]]
24
+
25
+ ### Embedding jetty
26
+
27
+ $ jirb -r lib/solr.rb
28
+ irb> Solrium::Jetty.new('/path/to_solr_home').start(2000).join
29
+ 2009-05-14 13:47:24.444::INFO: jetty-7.0.0.pre5
30
+ ....
31
+ INFO: SolrDispatchFilter.init() done
32
+ 2009-05-14 13:47:25.336::INFO: Started SelectChannelConnector@0.0.0.0:2000
33
+
34
+ ## Credits
35
+
36
+ Lucene, Solr, Jetty are all licensed under Apache License Version 2.0.
37
+
38
+ ## Contact
39
+
40
+ Jan Berkel <jan.berkel@gmail.com>
41
+
42
+ ## License
43
+
44
+ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
45
+ You may obtain a copy of the License at
46
+
47
+ http://www.apache.org/licenses/LICENSE-2.0
48
+
49
+ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
50
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
51
+ and limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,24 @@
1
+ require 'rubygems'
2
+ require 'rake/testtask'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gemspec|
7
+ gemspec.name = "solrium"
8
+ gemspec.summary = "TODO"
9
+ gemspec.email = "jan.berkel@gmail.com"
10
+ gemspec.homepage = "http://github.com/jberkel/solrium"
11
+ gemspec.description = "A JRuby integration layer for Apache Solr/Lucene with a focus on irb integration."
12
+ gemspec.authors = ["Jan Berkel"]
13
+ end
14
+ rescue LoadError
15
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
16
+ end
17
+
18
+
19
+ Rake::TestTask.new do |t|
20
+ t.libs << "lib"
21
+ t.test_files = FileList['test/*_test.rb']
22
+ t.verbose = true
23
+ end
24
+
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :major: 0
3
+ :minor: 1
4
+ :patch: 0
data/bin/solr ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env jruby
2
+
3
+ require 'java'
4
+ #require 'solr'
5
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'jetty')
6
+
7
+ if __FILE__ == $0
8
+ Solrium::Jetty.new('solr-instance').start(8000).join
9
+ end
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
data/lib/jetty.rb ADDED
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env jruby
2
+
3
+ require 'java'
4
+
5
+ Dir[File.join(File.dirname(__FILE__), 'jars', 'jetty', '*.jar')].each { |jar| require jar }
6
+ Dir[File.join(File.dirname(__FILE__), 'jars', 'solr', '*.jar')].each { |jar| require jar }
7
+ Dir[File.join(File.dirname(__FILE__), 'jars', 'lucene', '*.jar')].each { |jar| require jar }
8
+
9
+ module Solrium
10
+ class Jetty
11
+ include_class 'org.mortbay.jetty.Server'
12
+ include_class 'org.mortbay.jetty.servlet.Context'
13
+ include_class 'org.mortbay.jetty.servlet.ServletHolder'
14
+ include_class 'org.mortbay.jetty.servlet.DefaultServlet'
15
+ include_class 'org.mortbay.jetty.handler.ResourceHandler'
16
+ include_class 'org.mortbay.jetty.handler.MovedContextHandler'
17
+ include_class 'org.mortbay.jetty.handler.HandlerList'
18
+ include_class 'org.apache.solr.servlet.SolrServlet'
19
+ include_class 'org.apache.solr.servlet.SolrUpdateServlet'
20
+ include_class 'org.apache.jasper.servlet.JspServlet'
21
+ include_class 'org.mortbay.thread.QueuedThreadPool'
22
+ include_class 'org.mortbay.jetty.nio.SelectChannelConnector'
23
+ include_class 'org.mortbay.jetty.Handler'
24
+
25
+
26
+ def initialize(dir='solr-instance')
27
+ Java::JavaLang::System.setProperty("solr.solr.home", dir)
28
+ end
29
+
30
+ def start(port=8000, servlet_path='/solr', admin_interface=true)
31
+ server = create_server(port)
32
+ context = create_solr_context(servlet_path, admin_interface)
33
+
34
+ hl = HandlerList.new
35
+ hl.add_handler(context)
36
+ MovedContextHandler.new(hl, '/', "#{servlet_path}/admin") if admin_interface
37
+
38
+ server.set_handler(hl)
39
+ server.start
40
+
41
+ at_exit do
42
+ server.stop rescue nil
43
+ end
44
+ server
45
+ end
46
+
47
+ private
48
+ def create_server(port)
49
+ server = Server.new
50
+
51
+ thread_pool = QueuedThreadPool.new
52
+ thread_pool.min_threads = 5
53
+ thread_pool.max_threads = 50
54
+ server.set_thread_pool(thread_pool)
55
+
56
+ #use the more efficient NIO connector
57
+ connector = SelectChannelConnector.new
58
+ connector.port = port
59
+
60
+ server.add_connector(connector)
61
+ server
62
+ end
63
+
64
+ def create_solr_context(path, admin_interface=true)
65
+ context = Context.new(nil, path, Context::SESSIONS)
66
+ context.add_servlet(ServletHolder.new(DefaultServlet.new), "/")
67
+ context.add_servlet(ServletHolder.new(JspServlet.new), "*.jsp")
68
+ context.add_filter('org.apache.solr.servlet.SolrDispatchFilter', '/*', Handler::DEFAULT)
69
+ context.set_mime_types(Java::org.mortbay.jetty.MimeTypes.new)
70
+ context.mime_types.add_mime_mapping('xsl', 'application/xslt+xml')
71
+
72
+ if admin_interface
73
+ jsp_servlet = ServletHolder.new(JspServlet.new)
74
+ jsp_servlet.set_forced_path('/admin/ping.jsp')
75
+
76
+ context.add_servlet(jsp_servlet, '/admin/ping')
77
+ context.set_resource_base(File.join(File.dirname(__FILE__), 'webapp'))
78
+ end
79
+
80
+ context
81
+ end
82
+ end
83
+ end
data/lib/lucene.rb ADDED
@@ -0,0 +1,33 @@
1
+ require 'java'
2
+ require File.join(File.dirname(__FILE__), 'queries')
3
+
4
+ module Solrium
5
+
6
+ # simple lucene integration, bypassing solr completely
7
+ class Lucene
8
+ include Queries
9
+
10
+ include_class 'org.apache.lucene.queryParser.QueryParser'
11
+ include_class 'org.apache.lucene.analysis.standard.StandardAnalyzer'
12
+ include_class 'org.apache.lucene.search.IndexSearcher'
13
+
14
+ attr_accessor :index_path
15
+
16
+ def initialize(solr_home = "solr-instance")
17
+ @index_path = File.join(solr_home, 'data', 'index')
18
+ end
19
+
20
+ def analyzer
21
+ StandardAnalyzer.new
22
+ end
23
+
24
+ def with_searcher(&block)
25
+ searcher = IndexSearcher.new(@index_path)
26
+ begin
27
+ block.call(searcher)
28
+ ensure
29
+ searcher.close
30
+ end
31
+ end
32
+ end
33
+ end
data/lib/queries.rb ADDED
@@ -0,0 +1,60 @@
1
+ require 'java'
2
+
3
+ require File.join(File.dirname(__FILE__), 'solr_ext')
4
+
5
+ module Solrium
6
+ module Queries
7
+
8
+ include_class 'org.apache.lucene.queryParser.QueryParser'
9
+ include_class 'org.apache.lucene.search.MultiPhraseQuery'
10
+ include_class 'org.apache.lucene.search.WildcardTermEnum'
11
+
12
+ include_class 'org.apache.lucene.search.BooleanQuery'
13
+ include_class 'org.apache.lucene.search.WildcardQuery'
14
+ include_class 'org.apache.lucene.search.TermQuery'
15
+
16
+
17
+ # convenience search method
18
+ def search(q, max=10, &block)
19
+ q = q.is_a?(org.apache.lucene.search.Query) ? q : parse(q.to_s)
20
+
21
+ block ||= lambda do |doc, score, searcher|
22
+ [doc, score]
23
+ end
24
+
25
+ with_searcher do |searcher|
26
+ searcher.search(q, nil, max).scoreDocs.map do |sd|
27
+ block.call(searcher.doc(sd.doc), sd.score, searcher)
28
+ end
29
+ end
30
+ end
31
+
32
+ def parse(q, default='default')
33
+ QueryParser.new(default, analyzer).parse(q)
34
+ end
35
+
36
+ #a wildcard phrase query
37
+ def wildcard_pq(*terms)
38
+ return MultiPhraseQuery.new if terms.empty?
39
+
40
+ if terms.size == 1
41
+ #only one term, do simple term+wildcard query
42
+ bq = BooleanQuery.new
43
+ bq.add(TermQuery.new(Term[terms[0]]), org.apache.lucene.search.BooleanClause::Occur::SHOULD)
44
+ bq.add(WildcardQuery.new(Term[terms[0] + "*"]), org.apache.lucene.search.BooleanClause::Occur::SHOULD)
45
+ return bq
46
+ end
47
+
48
+ mpq = MultiPhraseQuery.new
49
+
50
+ terms[0..-2].each { |t| mpq.add(Term[t]) }
51
+
52
+ with_searcher do |s|
53
+ completions = WildcardTermEnum.new(s.reader, Term["#{terms[-1]}*"]).to_java_array
54
+ return MultiPhraseQuery.new if completions.length == 0
55
+ mpq.add(completions)
56
+ end
57
+ mpq
58
+ end
59
+ end
60
+ end
data/lib/remote.rb ADDED
@@ -0,0 +1,22 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'cgi'
4
+
5
+ module Solrium
6
+ module Remote
7
+ extend self
8
+
9
+ def query(q, params={}, port=8000)
10
+ params['q'] = q
11
+ params['wt'] = 'ruby'
12
+
13
+ url = URI.parse("http://localhost:#{port}")
14
+ res = Net::HTTP.start(url.host, url.port) do |http|
15
+ http.get("/solr/select?" + params.map { |k,v| "#{k}=#{CGI.escape(v)}" }.join("&"))
16
+ end
17
+ res.value
18
+ eval(res.body)
19
+ end
20
+
21
+ end
22
+ end
data/lib/solr.rb ADDED
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env jruby
2
+ require 'java'
3
+ require 'fileutils'
4
+
5
+ Dir[File.join(File.dirname(__FILE__), 'jars', 'lucene', '*.jar')].each { |jar| require jar }
6
+ Dir[File.join(File.dirname(__FILE__), 'jars', 'solr', '*.jar')].each { |jar| require jar }
7
+
8
+ require File.join(File.dirname(__FILE__), 'solr_ext')
9
+ require File.join(File.dirname(__FILE__), 'lucene')
10
+ require File.join(File.dirname(__FILE__), 'queries')
11
+ require File.join(File.dirname(__FILE__), 'remote')
12
+ require File.join(File.dirname(__FILE__), 'jetty')
13
+
14
+ module Solrium
15
+ #wrapper for SolrCore
16
+ class Solr
17
+ include Queries
18
+ include Remote
19
+
20
+ include_class 'org.apache.solr.core.SolrCore'
21
+ include_class 'org.apache.solr.core.SolrConfig'
22
+
23
+ attr_accessor :config, :core
24
+
25
+ def initialize(solr_home='solr-instance')
26
+ raise "solr_home #{solr_home} not found" unless File.directory?(solr_home)
27
+
28
+ Java::JavaLang::System.setProperty("solr.solr.home", solr_home)
29
+
30
+ yield self if block_given?
31
+
32
+ @config = SolrConfig.new
33
+ @core = SolrCore.new('core', nil, @config, nil, nil)
34
+
35
+ at_exit do
36
+ @core.close rescue nil
37
+ end
38
+ end
39
+
40
+ def close
41
+ core.close
42
+ end
43
+
44
+ def logger(which="")
45
+ java.util.logging.Logger.getLogger(which)
46
+ end
47
+
48
+ def analyzer
49
+ core.schema.analyzer
50
+ end
51
+
52
+ def with_searcher(&block)
53
+ searcher_ref = core.searcher
54
+ searcher = searcher_ref.get
55
+ begin
56
+ block.call(searcher)
57
+ ensure
58
+ searcher_ref.decref
59
+ end
60
+ end
61
+
62
+ def self.template_dir; File.join(File.dirname(__FILE__), '..', 'solr-instance/conf'); end
63
+
64
+ def self.init_solr_dir(dest)
65
+ raise "#{dest} is not a dir" unless File.directory?(dest)
66
+ FileUtils.cp_r template_dir, dest
67
+ end
68
+ end
69
+ end
data/lib/solr_ext.rb ADDED
@@ -0,0 +1,81 @@
1
+ # make lucene/solr a bit more rubyis
2
+ module Solrium
3
+ include_class 'org.apache.lucene.index.TermEnum'
4
+ include_class 'org.apache.lucene.index.Term'
5
+ include_class 'org.apache.lucene.document.Document'
6
+ include_class 'org.apache.lucene.document.Field'
7
+ include_class 'org.apache.lucene.document.Field'
8
+
9
+ include_class 'org.apache.lucene.search.Query'
10
+ include_class 'org.apache.lucene.search.BooleanQuery'
11
+
12
+ #sugar, sugar... honey, honey.
13
+ Query.class_eval do
14
+ def |(other)
15
+ boolean_query(self, other, org.apache.lucene.search.BooleanClause::Occur::SHOULD)
16
+ end
17
+
18
+ def &(other)
19
+ boolean_query(self, other, org.apache.lucene.search.BooleanClause::Occur::MUST)
20
+ end
21
+
22
+ def boolean_query(a,b,occur)
23
+ bq = BooleanQuery.new
24
+ bq.add(a, occur)
25
+ bq.add(b, occur)
26
+ bq
27
+ end
28
+ end
29
+
30
+ Document.class_eval do
31
+ def [](n)
32
+ self.getField(n)
33
+ end
34
+
35
+ def inspect
36
+ "Document: " + to_hash.inspect
37
+ end
38
+
39
+ def to_hash
40
+ fields.inject({}) do |h, f|
41
+ h[f.name] = f.stringValue()
42
+ h
43
+ end
44
+ end
45
+ end
46
+
47
+ Field.class_eval do
48
+ def inspect
49
+ "#{name}: #{to_s}"
50
+ end
51
+
52
+ def to_s
53
+ self.stringValue()
54
+ end
55
+ end
56
+
57
+ # lets you use Term['foo'] ==> Term.new('default', foo)
58
+ Term.class_eval do
59
+ def self.[](t)
60
+ new('default', t)
61
+ end
62
+ end
63
+
64
+ #make TermEnum a bit friendlier to use
65
+ class TermEnum
66
+ include Enumerable
67
+
68
+ def each
69
+ while self.next()
70
+ yield(self.term())
71
+ end
72
+ end
73
+
74
+ def to_java_array
75
+ elements = to_a
76
+ array = java.lang.reflect.Array.newInstance(Term.java_class, elements.length)
77
+ elements.each_with_index { |e,i| array[i] = e }
78
+ array
79
+ end
80
+ end
81
+ end