picky 2.4.0 → 2.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,7 +15,7 @@
15
15
  #
16
16
  # == Index::Memory.new(name)
17
17
  #
18
- # Next, define where your data comes from. You use the <tt>Index::Memory.new</tt> method for that:
18
+ # Next, define where your data comes from, creating an <tt>Index</tt>. You use the <tt>Index::Memory.new</tt> method for that:
19
19
  # my_index = Index::Memory.new :some_index_name
20
20
  # You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
21
21
  # class MyGreatSearch < Application
@@ -82,7 +82,7 @@
82
82
  # $ rake index
83
83
  # $ rake start
84
84
  # Run your first query:
85
- # $ curl 'localhost:8080/books/full?query=hello server'
85
+ # $ curl 'localhost:8080/books?query=hello server'
86
86
  #
87
87
  # Nice, right? Your first query!
88
88
  #
@@ -1,3 +1,79 @@
1
+ # = Picky Indexes
2
+ #
3
+ # A Picky Index defines
4
+ # * where its data comes from (a data source).
5
+ # * how this data it is indexed.
6
+ # * a number of categories that may or may not map directly to data categories.
7
+ #
8
+ # == Howto
9
+ #
10
+ # This is a step-by-step description on how to create an index.
11
+ #
12
+ # Start by choosing an <tt>Index::Memory</tt> or an <tt>Index::Redis</tt>.
13
+ # In the example, we will be using an in-memory index, <tt>Index::Memory</tt>.
14
+ #
15
+ # books = Index::Memory.new(:books)
16
+ #
17
+ # That in itself won't do much good, that's why we add a data source:
18
+ #
19
+ # books = Index::Memory.new(:books) do
20
+ # source Sources::CSV.new(:title, :author, file: 'data/books.csv')
21
+ # end
22
+ #
23
+ # In the example, we use an explicit <tt>Sources::CSV</tt> of Picky.
24
+ # However, anything that responds to <tt>#each</tt>, and returns an object that
25
+ # answers to <tt>#id</tt>, works.
26
+ #
27
+ # For example, a 3.0 ActiveRecord class:
28
+ #
29
+ # books = Index::Memory.new(:books) do
30
+ # source Book.order('isbn ASC')
31
+ # end
32
+ #
33
+ # Now we know where the data comes from, but not, how to categorize it.
34
+ #
35
+ # Let's add a few categories:
36
+ #
37
+ # books = Index::Memory.new(:books) do
38
+ # source Book.order('isbn ASC')
39
+ # category :title
40
+ # category :author
41
+ # category :isbn
42
+ # end
43
+ #
44
+ # Categories offer quite a few options, see <tt>Index::Base#category</tt> for details.
45
+ #
46
+ # After adding more options, it might look like this:
47
+ #
48
+ # books = Index::Memory.new(:books) do
49
+ # source Book.order('isbn ASC')
50
+ # category :title,
51
+ # partial: Partial::Substring.new(from: 1),
52
+ # similarity: Similarity::DoubleMetaphone.new(3),
53
+ # qualifiers: [:t, :title, :titulo]
54
+ # category :author,
55
+ # similarity: Similarity::Metaphone.new(2)
56
+ # category :isbn,
57
+ # partial: Partial::None.new,
58
+ # from: :legacy_isbn_name
59
+ # end
60
+ #
61
+ # For this to work, a <tt>Book</tt> should support methods <tt>#title</tt>, <tt>#author</tt> and <tt>#legacy_isbn_name</tt>.
62
+ #
63
+ # If it uses <tt>String</tt> ids, use <tt>#key_format</tt> to define a formatting method:
64
+ #
65
+ # books = Index::Memory.new(:books) do
66
+ # key_format :to_s
67
+ # source Book.order('isbn ASC')
68
+ # category :title
69
+ # category :author
70
+ # category :isbn
71
+ # end
72
+ #
73
+ # Finally, use the index for a <tt>Search</tt>:
74
+ #
75
+ # route %r{^/media$} => Search.new(books, dvds, mp3s)
76
+ #
1
77
  module Index
2
78
 
3
79
  # This class defines the indexing and index API that is exposed to the user
@@ -5,7 +81,7 @@ module Index
5
81
  #
6
82
  # It provides a single front for both indexing and index options. We suggest to always use the index API.
7
83
  #
8
- # Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
84
+ # Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Index*.
9
85
  #
10
86
  class Base
11
87
 
@@ -52,16 +128,16 @@ module Index
52
128
 
53
129
  check_source internal_indexing.source
54
130
  end
55
- def internal_indexing
131
+ def internal_indexing # :nodoc:
56
132
  @indexing
57
133
  end
58
- def internal_indexed
134
+ def internal_indexed # :nodoc:
59
135
  @indexed
60
136
  end
61
137
  #
62
138
  # Since this is an API, we fail hard quickly.
63
139
  #
64
- def check_name name
140
+ def check_name name # :nodoc:
65
141
  raise ArgumentError.new(<<-NAME
66
142
 
67
143
 
@@ -74,7 +150,7 @@ NAME
74
150
 
75
151
  ) unless name.respond_to?(:to_sym)
76
152
  end
77
- def check_options options
153
+ def check_options options # :nodoc:
78
154
  raise ArgumentError.new(<<-OPTIONS
79
155
 
80
156
 
@@ -96,7 +172,7 @@ All the best
96
172
  OPTIONS
97
173
  ) unless options.respond_to?(:[])
98
174
  end
99
- def check_source source
175
+ def check_source source # :nodoc:
100
176
  raise ArgumentError.new(<<-SOURCE
101
177
 
102
178
 
@@ -110,7 +186,7 @@ SOURCE
110
186
  ) unless source.respond_to?(:each) || source.respond_to?(:harvest)
111
187
  end
112
188
 
113
- def to_stats
189
+ def to_stats # :nodoc:
114
190
  stats = <<-INDEX
115
191
  #{name} (#{self.class}):
116
192
  #{"source: #{internal_indexing.source}".indented_to_s}
@@ -17,10 +17,10 @@ module Index
17
17
  # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
18
18
  #
19
19
  def initialize name, options = {}
20
- super name, options
21
-
22
20
  options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Memory
23
21
  options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Memory
22
+
23
+ super name, options
24
24
  end
25
25
 
26
26
  end
@@ -17,10 +17,10 @@ module Index
17
17
  # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
18
18
  #
19
19
  def initialize name, options = {}
20
- super name, options
21
-
22
20
  options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Redis
23
21
  options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Redis
22
+
23
+ super name, options
24
24
  end
25
25
 
26
26
  end
@@ -6,9 +6,7 @@ module Indexers
6
6
  #
7
7
  class Base
8
8
 
9
- # Selects the original id (indexed id) and a column to process. The column data is called "token".
10
- #
11
- # Note: Puts together the parts first in an array, then releasing the array from time to time by joining.
9
+ # Starts the indexing process.
12
10
  #
13
11
  def index
14
12
  indexing_message
@@ -48,14 +48,14 @@ module Indexers
48
48
  flush combined
49
49
  combined.each { |_, _, file, _| file.close }
50
50
  end
51
- def flush combined
51
+ def flush combined # :nodoc:
52
52
  combined.each do |_, cache, file, _|
53
53
  file.write(cache.join) && cache.clear
54
54
  end
55
55
  end
56
56
  #
57
57
  #
58
- def indexing_message
58
+ def indexing_message # :nodoc:
59
59
  timed_exclaim %Q{"#{@index.name}": Starting parallel indexing.}
60
60
  end
61
61
 
@@ -4,7 +4,7 @@ module Indexers
4
4
 
5
5
  # Uses a category to index its data.
6
6
  #
7
- # Note: It is called serial since it indexes each
7
+ # Note: It is called serial since it indexes each category separately.
8
8
  #
9
9
  class Serial < Base
10
10
 
@@ -44,7 +44,7 @@ module Indexers
44
44
  end
45
45
  #
46
46
  #
47
- def indexing_message
47
+ def indexing_message # :nodoc:
48
48
  timed_exclaim %Q{"#{@category.identifier}": Starting serial indexing.}
49
49
  end
50
50
 
@@ -6,7 +6,7 @@ module Indexers
6
6
 
7
7
  # Deprecated. Only here as an example.
8
8
  #
9
- class Solr
9
+ class Solr # :nodoc:
10
10
 
11
11
  attr_reader :type, :fields, :solr
12
12
 
@@ -38,7 +38,8 @@ module Internals
38
38
  weights = options[:weights] || Generators::Weights::Default
39
39
  similarity = options[:similarity] || Generators::Similarity::Default
40
40
 
41
- bundle_class = options[:indexing_bundle_class] || Bundle::Memory
41
+ bundle_class = index.bundle_class || Bundle::Memory
42
+
42
43
  @exact = bundle_class.new(:exact, self, similarity, Generators::Partial::None.new, weights)
43
44
  @partial = bundle_class.new(:partial, self, Generators::Similarity::None.new, partial, weights)
44
45
  end
data/lib/picky/loader.rb CHANGED
@@ -299,6 +299,7 @@ module Loader # :nodoc:all
299
299
  load_relative 'sources/csv'
300
300
  load_relative 'sources/delicious'
301
301
  load_relative 'sources/couch'
302
+ load_relative 'sources/mongo'
302
303
 
303
304
  load_relative 'sources/wrappers/base'
304
305
  load_relative 'sources/wrappers/location'
@@ -0,0 +1,75 @@
1
+ module Sources
2
+
3
+ # Raised when a Mongo source is instantiated without a valid uri.
4
+ #
5
+ # Important!
6
+ # You have to start your mongodb with --rest in order to use
7
+ # the rest / http interface
8
+ #
9
+ class NoMongoDBGiven < StandardError; end
10
+
11
+ # Important note: We're not sure if this works already.
12
+ #
13
+ # A Mongo database source.
14
+ #
15
+ # Options:
16
+ # * url, db
17
+ # Example:
18
+ # Sources::Mongo.new(:collection1, :collection2, :url => 'localhost:28017', :db => 'testdatabase')
19
+ # Be sure to escape the URL properly, e.g. # => %23 in the databasename if needed
20
+ #
21
+ # and all the options of a <tt>RestClient::Resource</tt>.
22
+ # See http://github.com/archiloque/rest-client.
23
+ #
24
+ class Mongo < Base
25
+ @@id_key = '_id'
26
+ #
27
+ #
28
+ def initialize *category_names, options
29
+ check_gem
30
+
31
+ unless options[:url] && options[:db]
32
+ raise_no_db_given(category_names)
33
+ end
34
+
35
+ @db = RestClient::Resource.new options.delete(:url), options
36
+ @database = options.delete(:db)
37
+ @key_format = options[:key_format] && options[:key_format].to_sym || :to_sym
38
+ end
39
+
40
+ # Tries to require the rest_client gem.
41
+ #
42
+ def check_gem # :nodoc:
43
+ require 'rest_client'
44
+ rescue LoadError
45
+ warn_gem_missing 'rest-client', 'the MongoDB source'
46
+ exit 1
47
+ end
48
+
49
+ # Fetches the data, @limit=0 will return all records
50
+ #
51
+ # Limit is set to 0 by default - all collection entries will be send
52
+ # If want to limit the results, set to to any other number, e.g. limit=15
53
+ # to return only 15 entries
54
+ #
55
+ def harvest category
56
+ collection = (category.from || category.index_name).to_s
57
+ resp = @db["/#{@database}/#{category.index_name}/?@limit=0"].get
58
+ JSON.parse(resp)['rows'].each do |row|
59
+ text = row[collection].to_s
60
+ next unless text
61
+ index_key = row.delete(@@id_key) # TODO Still works, I removed .values
62
+ yield index_key, text
63
+ end
64
+ end
65
+
66
+ def raise_no_db_given category_names # :nodoc:
67
+ raise NoMongoDBGiven.new(category_names.join(', '))
68
+ end
69
+
70
+ def to_s
71
+ self.class.name
72
+ end
73
+
74
+ end
75
+ end
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Internals::Index::Files do
4
4
 
5
5
  before(:each) do
6
- index = stub :index, :name => :some_index
6
+ index = stub :index, :name => :some_index, :bundle_class => nil
7
7
  category = Internals::Indexing::Category.new :some_category, index
8
8
 
9
9
  @files = described_class.new :some_name, category
@@ -4,7 +4,7 @@ describe Internals::Index::Redis do
4
4
 
5
5
  context 'indexing' do
6
6
  let(:category) do
7
- index = stub :index, :name => :some_index
7
+ index = stub :index, :name => :some_index, :bundle_class => nil
8
8
  category = Internals::Indexing::Category.new :some_category, index
9
9
  end
10
10
  let(:redis) { described_class.new :some_name, category }
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Internals::Indexing::Bundle::Memory do
4
4
 
5
5
  before(:each) do
6
- @index = stub :index, :name => :some_index
6
+ @index = stub :index, :name => :some_index, :bundle_class => nil
7
7
  @category = Internals::Indexing::Category.new :some_category, @index
8
8
 
9
9
  @partial_strategy = Internals::Generators::Partial::Substring.new :from => 1
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Internals::Indexing::Bundle::Memory do
4
4
 
5
5
  before(:each) do
6
- @index = stub :index, :name => :some_index
6
+ @index = stub :index, :name => :some_index, :bundle_class => nil
7
7
  @category = Internals::Indexing::Category.new :some_category, @index
8
8
 
9
9
  @partial = stub :partial
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Internals::Indexing::Bundle::Redis do
4
4
 
5
5
  before(:each) do
6
- @index = stub :index, :name => :some_index
6
+ @index = stub :index, :name => :some_index, :bundle_class => nil
7
7
  @category = Internals::Indexing::Category.new :some_category, @index
8
8
 
9
9
  @partial = stub :partial
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Internals::Indexing::Category do
4
4
 
5
5
  before(:each) do
6
- @index = stub :index, :name => :some_index
6
+ @index = stub :index, :name => :some_index, :bundle_class => nil
7
7
  @source = stub :some_given_source, :key_format => nil
8
8
  end
9
9
  let(:category) { described_class.new(:some_category, @index, :source => @source).tap { |c| c.stub! :timed_exclaim } }
@@ -0,0 +1,50 @@
1
+ require 'spec_helper'
2
+
3
+ describe Sources::Mongo do
4
+
5
+ describe 'key_format' do
6
+ context 'default' do
7
+ let(:source) { Sources::Mongo.new(:a, :b, :url => 'someurl', :db => 'somedb') }
8
+ it 'is correct' do
9
+ source.key_format.should == :to_sym
10
+ end
11
+ end
12
+
13
+ context 'non-default' do
14
+ let(:source) { Sources::Mongo.new(:a, :b, :url => 'bla', :db => 'somedb', :key_format => 'some_key_method') }
15
+ it 'is correct' do
16
+ source.key_format.should == :some_key_method
17
+ end
18
+ end
19
+ end
20
+
21
+ describe 'to_s' do
22
+ let(:source) { Sources::Mongo.new(:a, :b, :url => 'someurl', :db => 'somedb') }
23
+ it 'is correct' do
24
+ source.to_s.should == 'Sources::Mongo'
25
+ end
26
+ end
27
+
28
+ context "without database" do
29
+ it "should fail correctly" do
30
+ lambda { @source = Sources::Mongo.new(:a, :b, :url => 'someurl') }.should raise_error(Sources::NoMongoDBGiven)
31
+ end
32
+ end
33
+
34
+ context "with database" do
35
+ before(:each) do
36
+ @source = Sources::Mongo.new :a, :b, :url => 'someurl', :db => 'somedb'
37
+ RestClient::Request.should_receive(:execute).any_number_of_times.and_return %{{"rows":[{"_id":"7f","a":"a data","b":"b data","c":"c data"}]}}
38
+ end
39
+
40
+ describe "harvest" do
41
+ it "yields the right data" do
42
+ category = stub :b, :from => :b, :index_name => :some_index_name
43
+ @source.harvest category do |id, token|
44
+ id.should eql('7f')
45
+ token.should eql('b data')
46
+ end.should have(1).item
47
+ end
48
+ end
49
+ end
50
+ end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.4.0
5
+ version: 2.4.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - Florian Hanke
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-04-20 00:00:00 +10:00
13
+ date: 2011-05-25 00:00:00 +10:00
14
14
  default_executable: picky
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -32,7 +32,7 @@ dependencies:
32
32
  requirements:
33
33
  - - "="
34
34
  - !ruby/object:Gem::Version
35
- version: 2.4.0
35
+ version: 2.4.1
36
36
  type: :development
37
37
  version_requirements: *id002
38
38
  description: Fast Ruby semantic text search engine with comfortable single field interface.
@@ -159,6 +159,7 @@ files:
159
159
  - lib/picky/sources/csv.rb
160
160
  - lib/picky/sources/db.rb
161
161
  - lib/picky/sources/delicious.rb
162
+ - lib/picky/sources/mongo.rb
162
163
  - lib/picky/sources/wrappers/base.rb
163
164
  - lib/picky/sources/wrappers/location.rb
164
165
  - lib/picky/statistics.rb
@@ -269,6 +270,7 @@ files:
269
270
  - spec/lib/sources/csv_spec.rb
270
271
  - spec/lib/sources/db_spec.rb
271
272
  - spec/lib/sources/delicious_spec.rb
273
+ - spec/lib/sources/mongo_spec.rb
272
274
  - spec/lib/sources/wrappers/base_spec.rb
273
275
  - spec/lib/sources/wrappers/location_spec.rb
274
276
  - spec/lib/statistics_spec.rb
@@ -298,7 +300,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
298
300
  requirements: []
299
301
 
300
302
  rubyforge_project: http://rubyforge.org/projects/picky
301
- rubygems_version: 1.5.0
303
+ rubygems_version: 1.6.2
302
304
  signing_key:
303
305
  specification_version: 3
304
306
  summary: "Picky: Semantic Search Engine. Clever Interface. Good Tools."
@@ -394,6 +396,7 @@ test_files:
394
396
  - spec/lib/sources/csv_spec.rb
395
397
  - spec/lib/sources/db_spec.rb
396
398
  - spec/lib/sources/delicious_spec.rb
399
+ - spec/lib/sources/mongo_spec.rb
397
400
  - spec/lib/sources/wrappers/base_spec.rb
398
401
  - spec/lib/sources/wrappers/location_spec.rb
399
402
  - spec/lib/statistics_spec.rb