sunspot_cell 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Zheileman
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,59 @@
1
+ = Sunspot Cell (gem)
2
+
3
+ == Note by Zheileman
4
+
5
+ * This gem exists because the only gem I could find for the original project (https://github.com/springbok/sunspot_cell) was the one packaged by Kevin Motschiedler (https://github.com/motske/sunspot_cell) and it lacks the support to deal with S3 included in this other fork: https://github.com/chebyte/sunspot_cell
6
+
7
+
8
+ This gem adds Cell support (for indexing rich documents like pdf, docs, html, etc...) to Sunspot (developed against Sunspot 1.3.0).
9
+ Support Paperclip and S3 Storage
10
+
11
+ The code is based on the patch included here:
12
+ http://outoftime.lighthouseapp.com/projects/20339/tickets/98-solr-cell
13
+
14
+ == Requirements
15
+
16
+ * Sunspot gem installed (>= 1.3.0)
17
+
18
+ * Solr Cell libraries (+dist/apache-solr-cell-1.4.X.jar+ and +contrib/extraction/lib/*.jar+ from the standard Solr distribution) placed in the +/solr/lib+ directory as created by the Sunspot gem, in development environment. Your production setup might vary.
19
+
20
+ * Adjustments to the Solr +schema.xml+:
21
+
22
+ <fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
23
+
24
+ and
25
+
26
+ <dynamicField name="*_attachment" stored="true" type="text" multiValued="true" indexed="true"/>
27
+ <dynamicField name="ignored_*" type="ignored"/>
28
+
29
+ === Install Plugin
30
+
31
+ Add sunspot gem and sunspot_cell to Gemfile:
32
+
33
+ gem 'sunspot_rails', '~> 1.3.0'
34
+ gem 'sunspot_cell', :git => 'git://github.com/zheileman/sunspot_cell.git'
35
+
36
+ == Usage
37
+
38
+ class Doc
39
+ searchable do
40
+ text :title
41
+ attachment :file
42
+ end
43
+ end
44
+
45
+ == Paperclip & S3 Storage
46
+
47
+ require 'open-uri'
48
+
49
+ class Doc
50
+ searchable do
51
+ text :title
52
+ attachment :attached_file
53
+ end
54
+
55
+ private
56
+ def attached_file
57
+ URI.parse(remote_full_url)
58
+ end
59
+ end
@@ -0,0 +1,25 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/rdoctask'
4
+
5
+ desc 'Default: run unit tests.'
6
+ task :default => :test
7
+
8
+ begin
9
+ require 'rspec'
10
+ require 'rspec/core/rake_task'
11
+ desc 'Run the unit tests'
12
+ RSpec::Core::RakeTask.new(:test)
13
+ rescue LoadError
14
+ task :test do
15
+ STDERR.puts "You must have rspec 2.0 installed to run the tests"
16
+ end
17
+ end
18
+
19
+ desc 'Generate documentation.'
20
+ Rake::RDocTask.new(:rdoc) do |rdoc|
21
+ rdoc.rdoc_dir = 'rdoc'
22
+ rdoc.options << '--title' << 'Sunspot Cell support' << '--line-numbers' << '--inline-source' << '--main' << 'README.rdoc'
23
+ rdoc.rdoc_files.include('README.rdoc')
24
+ rdoc.rdoc_files.include('lib/**/*.rb')
25
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.2
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'sunspot_cell'
@@ -0,0 +1,49 @@
1
+ module Sunspot
2
+
3
+ class RichDocument < RSolr::Xml::Document
4
+ include Enumerable
5
+
6
+ def contains_attachment?
7
+ @fields.each do |field|
8
+ if field.name.to_s.include?("_attachment") && field.value.present?
9
+ return true
10
+ end
11
+ end
12
+ return false
13
+ end
14
+
15
+ def add(connection)
16
+ params = {
17
+ :wt => :ruby
18
+ }
19
+
20
+ data = nil
21
+
22
+ @fields.each do |f|
23
+ if f.name.to_s.include?("_attachment") and f.value.present?
24
+ params['fmap.content'] = f.name
25
+ if f.value.is_a?(Hash)
26
+ params['stream.url'] = f.value[:file]
27
+ params['stream.contentType'] = f.value[:type]
28
+ else
29
+ data = open(f.value).read rescue ""
30
+ end
31
+ else
32
+ param_name = "literal.#{f.name.to_s}"
33
+ params[param_name] = [] unless params.has_key?(param_name)
34
+ params[param_name] << f.value
35
+ end
36
+ if f.attrs[:boost]
37
+ params["boost.#{f.name.to_s}"] = f.attrs[:boost]
38
+ end
39
+ end
40
+
41
+ connection.send_and_receive('update/extract',
42
+ { :method => :post,
43
+ :params => params,
44
+ :data => data,
45
+ :headers => {"Content-Type" => ""}
46
+ })
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,17 @@
1
+ require 'sunspot/rich_document'
2
+ require 'sunspot_cell/dsl/fields'
3
+ Sunspot::DSL::Fields.send(:include, SunspotCell::DSL::Fields)
4
+ require 'sunspot_cell/dsl/standard_query'
5
+ Sunspot::DSL::StandardQuery.send(:include, SunspotCell::DSL::StandardQuery)
6
+ require 'sunspot_cell/composite_setup'
7
+ Sunspot::CompositeSetup.send(:include, SunspotCell::CompositeSetup)
8
+ require 'sunspot_cell/type'
9
+ Sunspot::Type.send(:include, SunspotCell::Type)
10
+ require 'sunspot_cell/attribute_field'
11
+ Sunspot::AttributeField.send(:include, SunspotCell::AttributeField)
12
+ require 'sunspot_cell/setup'
13
+ Sunspot::Setup.send(:include, SunspotCell::Setup)
14
+ require 'sunspot_cell/field_factory'
15
+ Sunspot::FieldFactory.send(:include, SunspotCell::FieldFactory)
16
+ require 'sunspot_cell/indexer'
17
+ Sunspot::Indexer.send(:include, SunspotCell::Indexer)
@@ -0,0 +1,12 @@
1
+ module SunspotCell
2
+ module AttributeField
3
+
4
+
5
+ def self.included(base)
6
+ base.class_eval do
7
+ attr_reader :default_boost
8
+ end
9
+ end
10
+
11
+ end
12
+ end
@@ -0,0 +1,43 @@
1
+ module SunspotCell
2
+ module CompositeSetup
3
+
4
+ def self.included(base)
5
+ base.class_eval do
6
+ include InstanceMethods
7
+ end
8
+ end
9
+
10
+ module InstanceMethods
11
+
12
+ # Collection of all attachment fields configured for any of the enclosed types.
13
+ #
14
+ # === Returns
15
+ #
16
+ # Array:: Text fields configured for the enclosed types
17
+ #
18
+ def all_attachment_fields
19
+ @attachment_fields ||= attachment_fields_hash.values.map { |set| set.to_a }.flatten
20
+ end
21
+
22
+ private
23
+
24
+ # Return a hash of field names to atachment field objects, containing all fields
25
+ # that are configured for any of the types enclosed.
26
+ #
27
+ # ==== Returns
28
+ #
29
+ # Hash:: Hash of field names to text field objects.
30
+ #
31
+ def attachment_fields_hash
32
+ @attachment_fields_hash ||=
33
+ setups.inject({}) do |hash, setup|
34
+ setup.all_attachment_fields.each do |text_field|
35
+ (hash[text_field.name] ||= Set.new) << text_field
36
+ end
37
+ hash
38
+ end
39
+ end
40
+
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,26 @@
1
+ module SunspotCell
2
+ module DSL
3
+ module Fields
4
+
5
+ def self.included(base)
6
+ base.class_eval do
7
+ include InstanceMethods
8
+ end
9
+ end
10
+
11
+ module InstanceMethods
12
+
13
+ # Added an attachment field, the attachment filename is passed to Solr for
14
+ # indexing by tiqa
15
+
16
+ def attachment(*names)
17
+ options = names.pop if names.last.is_a?(Hash)
18
+ names.each do |name|
19
+ @setup.add_attachment_field_factory(name, options || {})
20
+ end
21
+ end
22
+
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,74 @@
1
+ module SunspotCell
2
+ module DSL
3
+ module StandardQuery
4
+
5
+ def self.included(base)
6
+ base.class_eval do
7
+ include InstanceMethods
8
+ end
9
+ end
10
+
11
+ module InstanceMethods
12
+
13
+ def fulltext(keywords, options = {}, &block)
14
+ if keywords && !(keywords.to_s =~ /^\s*$/)
15
+ fulltext_query = @query.add_fulltext(keywords)
16
+ if field_names = options.delete(:fields)
17
+ Util.Array(field_names).each do |field_name|
18
+ @setup.text_fields(field_name).each do |field|
19
+ fulltext_query.add_fulltext_field(field, field.default_boost)
20
+ end
21
+ end
22
+ end
23
+ if minimum_match = options.delete(:minimum_match)
24
+ fulltext_query.minimum_match = minimum_match.to_i
25
+ end
26
+ if tie = options.delete(:tie)
27
+ fulltext_query.tie = tie.to_f
28
+ end
29
+ if query_phrase_slop = options.delete(:query_phrase_slop)
30
+ fulltext_query.query_phrase_slop = query_phrase_slop.to_i
31
+ end
32
+ if highlight_field_names = options.delete(:highlight)
33
+ if highlight_field_names == true
34
+ fulltext_query.add_highlight
35
+ else
36
+ highlight_fields = []
37
+ Util.Array(highlight_field_names).each do |field_name|
38
+ highlight_fields.concat(@setup.text_fields(field_name))
39
+ end
40
+ fulltext_query.add_highlight(highlight_fields)
41
+ end
42
+ end
43
+ if block && fulltext_query
44
+ fulltext_dsl = Fulltext.new(fulltext_query, @setup)
45
+ Util.instance_eval_or_call(
46
+ fulltext_dsl,
47
+ &block
48
+ )
49
+ end
50
+ if !field_names && (!fulltext_dsl || !fulltext_dsl.fields_added?)
51
+ @setup.all_text_fields.each do |field|
52
+ unless fulltext_query.has_fulltext_field?(field)
53
+ unless fulltext_dsl && fulltext_dsl.exclude_fields.include?(field.name)
54
+ fulltext_query.add_fulltext_field(field, field.default_boost)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ if !field_names && (!fulltext_dsl || !fulltext_dsl.fields_added?)
61
+ unless @setup.all_attachment_fields.empty?
62
+ @setup.all_attachment_fields.each do |attachment_text_field|
63
+ unless fulltext_dsl && fulltext_dsl.exclude_fields.include?(attachment_text_field.name)
64
+ fulltext_query.add_fulltext_field(attachment_text_field, attachment_text_field.default_boost)
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,18 @@
1
+ module SunspotCell
2
+ module FieldFactory
3
+
4
+ class Attachment
5
+ def initialize(name = nil, &block)
6
+ if block
7
+ @data_extractor = DataExtractor::BlockExtractor.new(&block)
8
+ else
9
+ @data_extractor = DataExtractor::AttributeExtractor.new(name)
10
+ end
11
+ end
12
+
13
+ def populate_document(document, model)
14
+ end
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,45 @@
1
+ module SunspotCell
2
+ module Indexer
3
+
4
+ def self.included(base)
5
+ base.class_eval do
6
+
7
+ def add_documents(documents)
8
+ documents_arr = Sunspot::Util.Array(documents)
9
+ docs_attach = []
10
+ docs_no_attach = []
11
+ documents_arr.each do |document|
12
+ if document.contains_attachment?
13
+ docs_attach << document
14
+ else
15
+ docs_no_attach << document
16
+ end
17
+ end
18
+
19
+ begin
20
+ if !docs_no_attach.empty?
21
+ @connection.add(docs_no_attach)
22
+ end
23
+ if !docs_attach.empty?
24
+ Sunspot::Util.Array(docs_attach).each do |document|
25
+ document.add(@connection)
26
+ end
27
+ end
28
+ rescue Exception => e
29
+ @batch = nil
30
+ raise e
31
+ end
32
+ end
33
+
34
+
35
+ def document_for(model)
36
+ Sunspot::RichDocument.new(
37
+ :id => Sunspot::Adapters::InstanceAdapter.adapt(model).index_id,
38
+ :type => Sunspot::Util.superclasses_for(model.class).map { |clazz| clazz.name }
39
+ )
40
+ end
41
+
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,76 @@
1
+ module SunspotCell
2
+ module Setup
3
+
4
+ def self.included(base)
5
+ base.class_eval do
6
+ alias :sunspot_initialize :initialize unless method_defined?(:sunspot_initialize)
7
+ def initialize(clazz)
8
+ @attachment_field_factories, @attachment_field_factories_cache = *Array.new(8) { Hash.new }
9
+ sunspot_initialize(clazz)
10
+ end
11
+
12
+ alias :sunspot_all_field_factories :all_field_factories unless method_defined?(:sunspot_all_field_factories)
13
+ def all_field_factories
14
+ all_field_factories = sunspot_all_field_factories
15
+ all_field_factories.concat(attachment_field_factories)
16
+ all_field_factories
17
+ end
18
+
19
+ # Add field_factories for fulltext search on attachments
20
+ #
21
+ # ==== Parameters
22
+ #
23
+ def add_attachment_field_factory(name, options = {}, &block)
24
+ stored = options[:stored]
25
+ field_factory = Sunspot::FieldFactory::Static.new(name, Sunspot::Type::AttachmentType.instance, options, &block)
26
+ @attachment_field_factories[name] = field_factory
27
+ @attachment_field_factories_cache[field_factory.name] = field_factory
28
+ if stored
29
+ @attachment_field_factories_cache[field_factory.name] << field_factory
30
+ end
31
+ end
32
+
33
+ def text_fields(field_name)
34
+ text_field =
35
+ if field_factory = @text_field_factories_cache[field_name.to_sym]
36
+ field_factory.build
37
+ else
38
+ if field_factory = @attachment_field_factories_cache[field_name.to_sym]
39
+ field_factory.build
40
+ else
41
+ raise(
42
+ UnrecognizedFieldError,
43
+ "No text field configured for #{@class_name} with name '#{field_name}'"
44
+ )
45
+ end
46
+ end
47
+ [text_field]
48
+ end
49
+
50
+ #
51
+ # Return all text fields
52
+ #
53
+ def all_text_fields
54
+ text_fields = text_field_factories.map { |text_field_factory| text_field_factory.build }
55
+ text_fields.concat(all_attachment_fields)
56
+ end
57
+
58
+ def all_attachment_fields
59
+ attachment_field_factories.map { |field_factory| field_factory.build }
60
+ end
61
+
62
+ # Get the text field_factories associated with this setup as well as all inherited
63
+ # attachment field_factories
64
+ #
65
+ # ==== Returns
66
+ #
67
+ # Array:: Collection of all text field_factories associated with this setup
68
+ #
69
+ def attachment_field_factories
70
+ collection_from_inheritable_hash(:attachment_field_factories)
71
+ end
72
+ end
73
+ end
74
+
75
+ end
76
+ end
@@ -0,0 +1,19 @@
1
+ module SunspotCell
2
+ module Type
3
+
4
+ class AttachmentType < Sunspot::Type::AbstractType
5
+ def indexed_name(name)
6
+ "#{name}_attachment"
7
+ end
8
+
9
+ def to_indexed(value)
10
+ value if value
11
+ end
12
+
13
+ def cast(text)
14
+ text
15
+ end
16
+ end
17
+
18
+ end
19
+ end
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sunspot_cell
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - liZhang
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: sunspot
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.3.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.3.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: activerecord
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '2.2'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '2.2'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: 2.0.0
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 2.0.0
62
+ description: This gem adds Cell support (for indexing rich documents) to Sunspot
63
+ email: zhangyicaocao@gmail.com
64
+ executables: []
65
+ extensions: []
66
+ extra_rdoc_files:
67
+ - LICENSE
68
+ - README.rdoc
69
+ files:
70
+ - LICENSE
71
+ - README.rdoc
72
+ - Rakefile
73
+ - VERSION
74
+ - init.rb
75
+ - lib/sunspot/rich_document.rb
76
+ - lib/sunspot_cell.rb
77
+ - lib/sunspot_cell/attribute_field.rb
78
+ - lib/sunspot_cell/composite_setup.rb
79
+ - lib/sunspot_cell/dsl/fields.rb
80
+ - lib/sunspot_cell/dsl/standard_query.rb
81
+ - lib/sunspot_cell/field_factory.rb
82
+ - lib/sunspot_cell/indexer.rb
83
+ - lib/sunspot_cell/setup.rb
84
+ - lib/sunspot_cell/type.rb
85
+ homepage: https://github.com/uudui/sunspot_cell
86
+ licenses:
87
+ - MIT
88
+ post_install_message:
89
+ rdoc_options:
90
+ - --charset=UTF-8
91
+ - --main
92
+ - README.rdoc
93
+ - LICENSE
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ! '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ requirements: []
109
+ rubyforge_project:
110
+ rubygems_version: 1.8.24
111
+ signing_key:
112
+ specification_version: 3
113
+ summary: This gem adds Cell support (for indexing rich documents) to Sunspot
114
+ test_files: []