sunspot_cell 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Zheileman
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,59 @@
1
+ = Sunspot Cell (gem)
2
+
3
+ == Note by Zheileman
4
+
5
+ * This gem exists because the only gem I could find for the original project (https://github.com/springbok/sunspot_cell) was the one packaged by Kevin Motschiedler (https://github.com/motske/sunspot_cell) and it lacks the support to deal with S3 included in this other fork: https://github.com/chebyte/sunspot_cell
6
+
7
+
8
+ This gem adds Cell support (for indexing rich documents like pdf, docs, html, etc...) to Sunspot (developed against Sunspot 1.3.0).
9
+ Support Paperclip and S3 Storage
10
+
11
+ The code is based on the patch included here:
12
+ http://outoftime.lighthouseapp.com/projects/20339/tickets/98-solr-cell
13
+
14
+ == Requirements
15
+
16
+ * Sunspot gem installed (>= 1.3.0)
17
+
18
+ * Solr Cell libraries (+dist/apache-solr-cell-1.4.X.jar+ and +contrib/extraction/lib/*.jar+ from the standard Solr distribution) placed in the +/solr/lib+ directory as created by the Sunspot gem, in development environment. Your production setup might vary.
19
+
20
+ * Adjustments to the Solr +schema.xml+:
21
+
22
+ <fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
23
+
24
+ and
25
+
26
+ <dynamicField name="*_attachment" stored="true" type="text" multiValued="true" indexed="true"/>
27
+ <dynamicField name="ignored_*" type="ignored"/>
28
+
29
+ === Install Plugin
30
+
31
+ Add sunspot gem and sunspot_cell to Gemfile:
32
+
33
+ gem 'sunspot_rails', '~> 1.3.0'
34
+ gem 'sunspot_cell', :git => 'git://github.com/zheileman/sunspot_cell.git'
35
+
36
+ == Usage
37
+
38
+ class Doc
39
+ searchable do
40
+ text :title
41
+ attachment :file
42
+ end
43
+ end
44
+
45
+ == Paperclip & S3 Storage
46
+
47
+ require 'open-uri'
48
+
49
+ class Doc
50
+ searchable do
51
+ text :title
52
+ attachment :attached_file
53
+ end
54
+
55
+ private
56
+ def attached_file
57
+ URI.parse(remote_full_url)
58
+ end
59
+ end
@@ -0,0 +1,25 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/rdoctask'
4
+
5
+ desc 'Default: run unit tests.'
6
+ task :default => :test
7
+
8
+ begin
9
+ require 'rspec'
10
+ require 'rspec/core/rake_task'
11
+ desc 'Run the unit tests'
12
+ RSpec::Core::RakeTask.new(:test)
13
+ rescue LoadError
14
+ task :test do
15
+ STDERR.puts "You must have rspec 2.0 installed to run the tests"
16
+ end
17
+ end
18
+
19
+ desc 'Generate documentation.'
20
+ Rake::RDocTask.new(:rdoc) do |rdoc|
21
+ rdoc.rdoc_dir = 'rdoc'
22
+ rdoc.options << '--title' << 'Sunspot Cell support' << '--line-numbers' << '--inline-source' << '--main' << 'README.rdoc'
23
+ rdoc.rdoc_files.include('README.rdoc')
24
+ rdoc.rdoc_files.include('lib/**/*.rb')
25
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.2
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'sunspot_cell'
@@ -0,0 +1,49 @@
1
+ module Sunspot
2
+
3
+ class RichDocument < RSolr::Xml::Document
4
+ include Enumerable
5
+
6
+ def contains_attachment?
7
+ @fields.each do |field|
8
+ if field.name.to_s.include?("_attachment") && field.value.present?
9
+ return true
10
+ end
11
+ end
12
+ return false
13
+ end
14
+
15
+ def add(connection)
16
+ params = {
17
+ :wt => :ruby
18
+ }
19
+
20
+ data = nil
21
+
22
+ @fields.each do |f|
23
+ if f.name.to_s.include?("_attachment") and f.value.present?
24
+ params['fmap.content'] = f.name
25
+ if f.value.is_a?(Hash)
26
+ params['stream.url'] = f.value[:file]
27
+ params['stream.contentType'] = f.value[:type]
28
+ else
29
+ data = open(f.value).read rescue ""
30
+ end
31
+ else
32
+ param_name = "literal.#{f.name.to_s}"
33
+ params[param_name] = [] unless params.has_key?(param_name)
34
+ params[param_name] << f.value
35
+ end
36
+ if f.attrs[:boost]
37
+ params["boost.#{f.name.to_s}"] = f.attrs[:boost]
38
+ end
39
+ end
40
+
41
+ connection.send_and_receive('update/extract',
42
+ { :method => :post,
43
+ :params => params,
44
+ :data => data,
45
+ :headers => {"Content-Type" => ""}
46
+ })
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,17 @@
1
+ require 'sunspot/rich_document'
2
+ require 'sunspot_cell/dsl/fields'
3
+ Sunspot::DSL::Fields.send(:include, SunspotCell::DSL::Fields)
4
+ require 'sunspot_cell/dsl/standard_query'
5
+ Sunspot::DSL::StandardQuery.send(:include, SunspotCell::DSL::StandardQuery)
6
+ require 'sunspot_cell/composite_setup'
7
+ Sunspot::CompositeSetup.send(:include, SunspotCell::CompositeSetup)
8
+ require 'sunspot_cell/type'
9
+ Sunspot::Type.send(:include, SunspotCell::Type)
10
+ require 'sunspot_cell/attribute_field'
11
+ Sunspot::AttributeField.send(:include, SunspotCell::AttributeField)
12
+ require 'sunspot_cell/setup'
13
+ Sunspot::Setup.send(:include, SunspotCell::Setup)
14
+ require 'sunspot_cell/field_factory'
15
+ Sunspot::FieldFactory.send(:include, SunspotCell::FieldFactory)
16
+ require 'sunspot_cell/indexer'
17
+ Sunspot::Indexer.send(:include, SunspotCell::Indexer)
@@ -0,0 +1,12 @@
1
+ module SunspotCell
2
+ module AttributeField
3
+
4
+
5
+ def self.included(base)
6
+ base.class_eval do
7
+ attr_reader :default_boost
8
+ end
9
+ end
10
+
11
+ end
12
+ end
@@ -0,0 +1,43 @@
1
+ module SunspotCell
2
+ module CompositeSetup
3
+
4
+ def self.included(base)
5
+ base.class_eval do
6
+ include InstanceMethods
7
+ end
8
+ end
9
+
10
+ module InstanceMethods
11
+
12
+ # Collection of all attachment fields configured for any of the enclosed types.
13
+ #
14
+ # === Returns
15
+ #
16
+ # Array:: Text fields configured for the enclosed types
17
+ #
18
+ def all_attachment_fields
19
+ @attachment_fields ||= attachment_fields_hash.values.map { |set| set.to_a }.flatten
20
+ end
21
+
22
+ private
23
+
24
+ # Return a hash of field names to atachment field objects, containing all fields
25
+ # that are configured for any of the types enclosed.
26
+ #
27
+ # ==== Returns
28
+ #
29
+ # Hash:: Hash of field names to text field objects.
30
+ #
31
+ def attachment_fields_hash
32
+ @attachment_fields_hash ||=
33
+ setups.inject({}) do |hash, setup|
34
+ setup.all_attachment_fields.each do |text_field|
35
+ (hash[text_field.name] ||= Set.new) << text_field
36
+ end
37
+ hash
38
+ end
39
+ end
40
+
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,26 @@
1
+ module SunspotCell
2
+ module DSL
3
+ module Fields
4
+
5
+ def self.included(base)
6
+ base.class_eval do
7
+ include InstanceMethods
8
+ end
9
+ end
10
+
11
+ module InstanceMethods
12
+
13
+ # Added an attachment field, the attachment filename is passed to Solr for
14
+ # indexing by tiqa
15
+
16
+ def attachment(*names)
17
+ options = names.pop if names.last.is_a?(Hash)
18
+ names.each do |name|
19
+ @setup.add_attachment_field_factory(name, options || {})
20
+ end
21
+ end
22
+
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,74 @@
1
+ module SunspotCell
2
+ module DSL
3
+ module StandardQuery
4
+
5
+ def self.included(base)
6
+ base.class_eval do
7
+ include InstanceMethods
8
+ end
9
+ end
10
+
11
+ module InstanceMethods
12
+
13
+ def fulltext(keywords, options = {}, &block)
14
+ if keywords && !(keywords.to_s =~ /^\s*$/)
15
+ fulltext_query = @query.add_fulltext(keywords)
16
+ if field_names = options.delete(:fields)
17
+ Util.Array(field_names).each do |field_name|
18
+ @setup.text_fields(field_name).each do |field|
19
+ fulltext_query.add_fulltext_field(field, field.default_boost)
20
+ end
21
+ end
22
+ end
23
+ if minimum_match = options.delete(:minimum_match)
24
+ fulltext_query.minimum_match = minimum_match.to_i
25
+ end
26
+ if tie = options.delete(:tie)
27
+ fulltext_query.tie = tie.to_f
28
+ end
29
+ if query_phrase_slop = options.delete(:query_phrase_slop)
30
+ fulltext_query.query_phrase_slop = query_phrase_slop.to_i
31
+ end
32
+ if highlight_field_names = options.delete(:highlight)
33
+ if highlight_field_names == true
34
+ fulltext_query.add_highlight
35
+ else
36
+ highlight_fields = []
37
+ Util.Array(highlight_field_names).each do |field_name|
38
+ highlight_fields.concat(@setup.text_fields(field_name))
39
+ end
40
+ fulltext_query.add_highlight(highlight_fields)
41
+ end
42
+ end
43
+ if block && fulltext_query
44
+ fulltext_dsl = Fulltext.new(fulltext_query, @setup)
45
+ Util.instance_eval_or_call(
46
+ fulltext_dsl,
47
+ &block
48
+ )
49
+ end
50
+ if !field_names && (!fulltext_dsl || !fulltext_dsl.fields_added?)
51
+ @setup.all_text_fields.each do |field|
52
+ unless fulltext_query.has_fulltext_field?(field)
53
+ unless fulltext_dsl && fulltext_dsl.exclude_fields.include?(field.name)
54
+ fulltext_query.add_fulltext_field(field, field.default_boost)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ if !field_names && (!fulltext_dsl || !fulltext_dsl.fields_added?)
61
+ unless @setup.all_attachment_fields.empty?
62
+ @setup.all_attachment_fields.each do |attachment_text_field|
63
+ unless fulltext_dsl && fulltext_dsl.exclude_fields.include?(attachment_text_field.name)
64
+ fulltext_query.add_fulltext_field(attachment_text_field, attachment_text_field.default_boost)
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,18 @@
1
+ module SunspotCell
2
+ module FieldFactory
3
+
4
+ class Attachment
5
+ def initialize(name = nil, &block)
6
+ if block
7
+ @data_extractor = DataExtractor::BlockExtractor.new(&block)
8
+ else
9
+ @data_extractor = DataExtractor::AttributeExtractor.new(name)
10
+ end
11
+ end
12
+
13
+ def populate_document(document, model)
14
+ end
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,45 @@
1
+ module SunspotCell
2
+ module Indexer
3
+
4
+ def self.included(base)
5
+ base.class_eval do
6
+
7
+ def add_documents(documents)
8
+ documents_arr = Sunspot::Util.Array(documents)
9
+ docs_attach = []
10
+ docs_no_attach = []
11
+ documents_arr.each do |document|
12
+ if document.contains_attachment?
13
+ docs_attach << document
14
+ else
15
+ docs_no_attach << document
16
+ end
17
+ end
18
+
19
+ begin
20
+ if !docs_no_attach.empty?
21
+ @connection.add(docs_no_attach)
22
+ end
23
+ if !docs_attach.empty?
24
+ Sunspot::Util.Array(docs_attach).each do |document|
25
+ document.add(@connection)
26
+ end
27
+ end
28
+ rescue Exception => e
29
+ @batch = nil
30
+ raise e
31
+ end
32
+ end
33
+
34
+
35
+ def document_for(model)
36
+ Sunspot::RichDocument.new(
37
+ :id => Sunspot::Adapters::InstanceAdapter.adapt(model).index_id,
38
+ :type => Sunspot::Util.superclasses_for(model.class).map { |clazz| clazz.name }
39
+ )
40
+ end
41
+
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,76 @@
1
+ module SunspotCell
2
+ module Setup
3
+
4
+ def self.included(base)
5
+ base.class_eval do
6
+ alias :sunspot_initialize :initialize unless method_defined?(:sunspot_initialize)
7
+ def initialize(clazz)
8
+ @attachment_field_factories, @attachment_field_factories_cache = *Array.new(8) { Hash.new }
9
+ sunspot_initialize(clazz)
10
+ end
11
+
12
+ alias :sunspot_all_field_factories :all_field_factories unless method_defined?(:sunspot_all_field_factories)
13
+ def all_field_factories
14
+ all_field_factories = sunspot_all_field_factories
15
+ all_field_factories.concat(attachment_field_factories)
16
+ all_field_factories
17
+ end
18
+
19
+ # Add field_factories for fulltext search on attachments
20
+ #
21
+ # ==== Parameters
22
+ #
23
+ def add_attachment_field_factory(name, options = {}, &block)
24
+ stored = options[:stored]
25
+ field_factory = Sunspot::FieldFactory::Static.new(name, Sunspot::Type::AttachmentType.instance, options, &block)
26
+ @attachment_field_factories[name] = field_factory
27
+ @attachment_field_factories_cache[field_factory.name] = field_factory
28
+ if stored
29
+ @attachment_field_factories_cache[field_factory.name] << field_factory
30
+ end
31
+ end
32
+
33
+ def text_fields(field_name)
34
+ text_field =
35
+ if field_factory = @text_field_factories_cache[field_name.to_sym]
36
+ field_factory.build
37
+ else
38
+ if field_factory = @attachment_field_factories_cache[field_name.to_sym]
39
+ field_factory.build
40
+ else
41
+ raise(
42
+ UnrecognizedFieldError,
43
+ "No text field configured for #{@class_name} with name '#{field_name}'"
44
+ )
45
+ end
46
+ end
47
+ [text_field]
48
+ end
49
+
50
+ #
51
+ # Return all text fields
52
+ #
53
+ def all_text_fields
54
+ text_fields = text_field_factories.map { |text_field_factory| text_field_factory.build }
55
+ text_fields.concat(all_attachment_fields)
56
+ end
57
+
58
+ def all_attachment_fields
59
+ attachment_field_factories.map { |field_factory| field_factory.build }
60
+ end
61
+
62
+ # Get the text field_factories associated with this setup as well as all inherited
63
+ # attachment field_factories
64
+ #
65
+ # ==== Returns
66
+ #
67
+ # Array:: Collection of all text field_factories associated with this setup
68
+ #
69
+ def attachment_field_factories
70
+ collection_from_inheritable_hash(:attachment_field_factories)
71
+ end
72
+ end
73
+ end
74
+
75
+ end
76
+ end
@@ -0,0 +1,19 @@
1
+ module SunspotCell
2
+ module Type
3
+
4
+ class AttachmentType < Sunspot::Type::AbstractType
5
+ def indexed_name(name)
6
+ "#{name}_attachment"
7
+ end
8
+
9
+ def to_indexed(value)
10
+ value if value
11
+ end
12
+
13
+ def cast(text)
14
+ text
15
+ end
16
+ end
17
+
18
+ end
19
+ end
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sunspot_cell
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - liZhang
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: sunspot
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.3.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.3.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: activerecord
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '2.2'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '2.2'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: 2.0.0
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 2.0.0
62
+ description: This gem adds Cell support (for indexing rich documents) to Sunspot
63
+ email: zhangyicaocao@gmail.com
64
+ executables: []
65
+ extensions: []
66
+ extra_rdoc_files:
67
+ - LICENSE
68
+ - README.rdoc
69
+ files:
70
+ - LICENSE
71
+ - README.rdoc
72
+ - Rakefile
73
+ - VERSION
74
+ - init.rb
75
+ - lib/sunspot/rich_document.rb
76
+ - lib/sunspot_cell.rb
77
+ - lib/sunspot_cell/attribute_field.rb
78
+ - lib/sunspot_cell/composite_setup.rb
79
+ - lib/sunspot_cell/dsl/fields.rb
80
+ - lib/sunspot_cell/dsl/standard_query.rb
81
+ - lib/sunspot_cell/field_factory.rb
82
+ - lib/sunspot_cell/indexer.rb
83
+ - lib/sunspot_cell/setup.rb
84
+ - lib/sunspot_cell/type.rb
85
+ homepage: https://github.com/uudui/sunspot_cell
86
+ licenses:
87
+ - MIT
88
+ post_install_message:
89
+ rdoc_options:
90
+ - --charset=UTF-8
91
+ - --main
92
+ - README.rdoc
93
+ - LICENSE
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ! '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ requirements: []
109
+ rubyforge_project:
110
+ rubygems_version: 1.8.24
111
+ signing_key:
112
+ specification_version: 3
113
+ summary: This gem adds Cell support (for indexing rich documents) to Sunspot
114
+ test_files: []