picky 2.4.0 → 2.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -15,7 +15,7 @@
15
15
  #
16
16
  # == Index::Memory.new(name)
17
17
  #
18
- # Next, define where your data comes from. You use the <tt>Index::Memory.new</tt> method for that:
18
+ # Next, define where your data comes from, creating an <tt>Index</tt>. You use the <tt>Index::Memory.new</tt> method for that:
19
19
  # my_index = Index::Memory.new :some_index_name
20
20
  # You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
21
21
  # class MyGreatSearch < Application
@@ -82,7 +82,7 @@
82
82
  # $ rake index
83
83
  # $ rake start
84
84
  # Run your first query:
85
- # $ curl 'localhost:8080/books/full?query=hello server'
85
+ # $ curl 'localhost:8080/books?query=hello server'
86
86
  #
87
87
  # Nice, right? Your first query!
88
88
  #
@@ -1,3 +1,79 @@
1
+ # = Picky Indexes
2
+ #
3
+ # A Picky Index defines
4
+ # * where its data comes from (a data source).
5
+ # * how this data it is indexed.
6
+ # * a number of categories that may or may not map directly to data categories.
7
+ #
8
+ # == Howto
9
+ #
10
+ # This is a step-by-step description on how to create an index.
11
+ #
12
+ # Start by choosing an <tt>Index::Memory</tt> or an <tt>Index::Redis</tt>.
13
+ # In the example, we will be using an in-memory index, <tt>Index::Memory</tt>.
14
+ #
15
+ # books = Index::Memory.new(:books)
16
+ #
17
+ # That in itself won't do much good, that's why we add a data source:
18
+ #
19
+ # books = Index::Memory.new(:books) do
20
+ # source Sources::CSV.new(:title, :author, file: 'data/books.csv')
21
+ # end
22
+ #
23
+ # In the example, we use an explicit <tt>Sources::CSV</tt> of Picky.
24
+ # However, anything that responds to <tt>#each</tt>, and returns an object that
25
+ # answers to <tt>#id</tt>, works.
26
+ #
27
+ # For example, a 3.0 ActiveRecord class:
28
+ #
29
+ # books = Index::Memory.new(:books) do
30
+ # source Book.order('isbn ASC')
31
+ # end
32
+ #
33
+ # Now we know where the data comes from, but not, how to categorize it.
34
+ #
35
+ # Let's add a few categories:
36
+ #
37
+ # books = Index::Memory.new(:books) do
38
+ # source Book.order('isbn ASC')
39
+ # category :title
40
+ # category :author
41
+ # category :isbn
42
+ # end
43
+ #
44
+ # Categories offer quite a few options, see <tt>Index::Base#category</tt> for details.
45
+ #
46
+ # After adding more options, it might look like this:
47
+ #
48
+ # books = Index::Memory.new(:books) do
49
+ # source Book.order('isbn ASC')
50
+ # category :title,
51
+ # partial: Partial::Substring.new(from: 1),
52
+ # similarity: Similarity::DoubleMetaphone.new(3),
53
+ # qualifiers: [:t, :title, :titulo]
54
+ # category :author,
55
+ # similarity: Similarity::Metaphone.new(2)
56
+ # category :isbn,
57
+ # partial: Partial::None.new,
58
+ # from: :legacy_isbn_name
59
+ # end
60
+ #
61
+ # For this to work, a <tt>Book</tt> should support methods <tt>#title</tt>, <tt>#author</tt> and <tt>#legacy_isbn_name</tt>.
62
+ #
63
+ # If it uses <tt>String</tt> ids, use <tt>#key_format</tt> to define a formatting method:
64
+ #
65
+ # books = Index::Memory.new(:books) do
66
+ # key_format :to_s
67
+ # source Book.order('isbn ASC')
68
+ # category :title
69
+ # category :author
70
+ # category :isbn
71
+ # end
72
+ #
73
+ # Finally, use the index for a <tt>Search</tt>:
74
+ #
75
+ # route %r{^/media$} => Search.new(books, dvds, mp3s)
76
+ #
1
77
  module Index
2
78
 
3
79
  # This class defines the indexing and index API that is exposed to the user
@@ -5,7 +81,7 @@ module Index
5
81
  #
6
82
  # It provides a single front for both indexing and index options. We suggest to always use the index API.
7
83
  #
8
- # Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
84
+ # Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Index*.
9
85
  #
10
86
  class Base
11
87
 
@@ -52,16 +128,16 @@ module Index
52
128
 
53
129
  check_source internal_indexing.source
54
130
  end
55
- def internal_indexing
131
+ def internal_indexing # :nodoc:
56
132
  @indexing
57
133
  end
58
- def internal_indexed
134
+ def internal_indexed # :nodoc:
59
135
  @indexed
60
136
  end
61
137
  #
62
138
  # Since this is an API, we fail hard quickly.
63
139
  #
64
- def check_name name
140
+ def check_name name # :nodoc:
65
141
  raise ArgumentError.new(<<-NAME
66
142
 
67
143
 
@@ -74,7 +150,7 @@ NAME
74
150
 
75
151
  ) unless name.respond_to?(:to_sym)
76
152
  end
77
- def check_options options
153
+ def check_options options # :nodoc:
78
154
  raise ArgumentError.new(<<-OPTIONS
79
155
 
80
156
 
@@ -96,7 +172,7 @@ All the best
96
172
  OPTIONS
97
173
  ) unless options.respond_to?(:[])
98
174
  end
99
- def check_source source
175
+ def check_source source # :nodoc:
100
176
  raise ArgumentError.new(<<-SOURCE
101
177
 
102
178
 
@@ -110,7 +186,7 @@ SOURCE
110
186
  ) unless source.respond_to?(:each) || source.respond_to?(:harvest)
111
187
  end
112
188
 
113
- def to_stats
189
+ def to_stats # :nodoc:
114
190
  stats = <<-INDEX
115
191
  #{name} (#{self.class}):
116
192
  #{"source: #{internal_indexing.source}".indented_to_s}
@@ -17,10 +17,10 @@ module Index
17
17
  # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
18
18
  #
19
19
  def initialize name, options = {}
20
- super name, options
21
-
22
20
  options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Memory
23
21
  options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Memory
22
+
23
+ super name, options
24
24
  end
25
25
 
26
26
  end
@@ -17,10 +17,10 @@ module Index
17
17
  # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
18
18
  #
19
19
  def initialize name, options = {}
20
- super name, options
21
-
22
20
  options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Redis
23
21
  options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Redis
22
+
23
+ super name, options
24
24
  end
25
25
 
26
26
  end
@@ -6,9 +6,7 @@ module Indexers
6
6
  #
7
7
  class Base
8
8
 
9
- # Selects the original id (indexed id) and a column to process. The column data is called "token".
10
- #
11
- # Note: Puts together the parts first in an array, then releasing the array from time to time by joining.
9
+ # Starts the indexing process.
12
10
  #
13
11
  def index
14
12
  indexing_message
@@ -48,14 +48,14 @@ module Indexers
48
48
  flush combined
49
49
  combined.each { |_, _, file, _| file.close }
50
50
  end
51
- def flush combined
51
+ def flush combined # :nodoc:
52
52
  combined.each do |_, cache, file, _|
53
53
  file.write(cache.join) && cache.clear
54
54
  end
55
55
  end
56
56
  #
57
57
  #
58
- def indexing_message
58
+ def indexing_message # :nodoc:
59
59
  timed_exclaim %Q{"#{@index.name}": Starting parallel indexing.}
60
60
  end
61
61
 
@@ -4,7 +4,7 @@ module Indexers
4
4
 
5
5
  # Uses a category to index its data.
6
6
  #
7
- # Note: It is called serial since it indexes each
7
+ # Note: It is called serial since it indexes each category separately.
8
8
  #
9
9
  class Serial < Base
10
10
 
@@ -44,7 +44,7 @@ module Indexers
44
44
  end
45
45
  #
46
46
  #
47
- def indexing_message
47
+ def indexing_message # :nodoc:
48
48
  timed_exclaim %Q{"#{@category.identifier}": Starting serial indexing.}
49
49
  end
50
50
 
@@ -6,7 +6,7 @@ module Indexers
6
6
 
7
7
  # Deprecated. Only here as an example.
8
8
  #
9
- class Solr
9
+ class Solr # :nodoc:
10
10
 
11
11
  attr_reader :type, :fields, :solr
12
12
 
@@ -38,7 +38,8 @@ module Internals
38
38
  weights = options[:weights] || Generators::Weights::Default
39
39
  similarity = options[:similarity] || Generators::Similarity::Default
40
40
 
41
- bundle_class = options[:indexing_bundle_class] || Bundle::Memory
41
+ bundle_class = index.bundle_class || Bundle::Memory
42
+
42
43
  @exact = bundle_class.new(:exact, self, similarity, Generators::Partial::None.new, weights)
43
44
  @partial = bundle_class.new(:partial, self, Generators::Similarity::None.new, partial, weights)
44
45
  end
data/lib/picky/loader.rb CHANGED
@@ -299,6 +299,7 @@ module Loader # :nodoc:all
299
299
  load_relative 'sources/csv'
300
300
  load_relative 'sources/delicious'
301
301
  load_relative 'sources/couch'
302
+ load_relative 'sources/mongo'
302
303
 
303
304
  load_relative 'sources/wrappers/base'
304
305
  load_relative 'sources/wrappers/location'
@@ -0,0 +1,75 @@
1
+ module Sources
2
+
3
+ # Raised when a Mongo source is instantiated without a valid uri.
4
+ #
5
+ # Important!
6
+ # You have to start your mongodb with --rest in order to use
7
+ # the rest / http interface
8
+ #
9
+ class NoMongoDBGiven < StandardError; end
10
+
11
+ # Important note: We're not sure if this works already.
12
+ #
13
+ # A Mongo database source.
14
+ #
15
+ # Options:
16
+ # * url, db
17
+ # Example:
18
+ # Sources::Mongo.new(:collection1, :collection2, :url => 'localhost:28017', :db => 'testdatabase')
19
+ # Be sure to escape the URL properly, e.g. # => %23 in the databasename if needed
20
+ #
21
+ # and all the options of a <tt>RestClient::Resource</tt>.
22
+ # See http://github.com/archiloque/rest-client.
23
+ #
24
+ class Mongo < Base
25
+ @@id_key = '_id'
26
+ #
27
+ #
28
+ def initialize *category_names, options
29
+ check_gem
30
+
31
+ unless options[:url] && options[:db]
32
+ raise_no_db_given(category_names)
33
+ end
34
+
35
+ @db = RestClient::Resource.new options.delete(:url), options
36
+ @database = options.delete(:db)
37
+ @key_format = options[:key_format] && options[:key_format].to_sym || :to_sym
38
+ end
39
+
40
+ # Tries to require the rest_client gem.
41
+ #
42
+ def check_gem # :nodoc:
43
+ require 'rest_client'
44
+ rescue LoadError
45
+ warn_gem_missing 'rest-client', 'the MongoDB source'
46
+ exit 1
47
+ end
48
+
49
+ # Fetches the data, @limit=0 will return all records
50
+ #
51
+ # Limit is set to 0 by default - all collection entries will be send
52
+ # If want to limit the results, set to to any other number, e.g. limit=15
53
+ # to return only 15 entries
54
+ #
55
+ def harvest category
56
+ collection = (category.from || category.index_name).to_s
57
+ resp = @db["/#{@database}/#{category.index_name}/?@limit=0"].get
58
+ JSON.parse(resp)['rows'].each do |row|
59
+ text = row[collection].to_s
60
+ next unless text
61
+ index_key = row.delete(@@id_key) # TODO Still works, I removed .values
62
+ yield index_key, text
63
+ end
64
+ end
65
+
66
+ def raise_no_db_given category_names # :nodoc:
67
+ raise NoMongoDBGiven.new(category_names.join(', '))
68
+ end
69
+
70
+ def to_s
71
+ self.class.name
72
+ end
73
+
74
+ end
75
+ end
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Internals::Index::Files do
4
4
 
5
5
  before(:each) do
6
- index = stub :index, :name => :some_index
6
+ index = stub :index, :name => :some_index, :bundle_class => nil
7
7
  category = Internals::Indexing::Category.new :some_category, index
8
8
 
9
9
  @files = described_class.new :some_name, category
@@ -4,7 +4,7 @@ describe Internals::Index::Redis do
4
4
 
5
5
  context 'indexing' do
6
6
  let(:category) do
7
- index = stub :index, :name => :some_index
7
+ index = stub :index, :name => :some_index, :bundle_class => nil
8
8
  category = Internals::Indexing::Category.new :some_category, index
9
9
  end
10
10
  let(:redis) { described_class.new :some_name, category }
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Internals::Indexing::Bundle::Memory do
4
4
 
5
5
  before(:each) do
6
- @index = stub :index, :name => :some_index
6
+ @index = stub :index, :name => :some_index, :bundle_class => nil
7
7
  @category = Internals::Indexing::Category.new :some_category, @index
8
8
 
9
9
  @partial_strategy = Internals::Generators::Partial::Substring.new :from => 1
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Internals::Indexing::Bundle::Memory do
4
4
 
5
5
  before(:each) do
6
- @index = stub :index, :name => :some_index
6
+ @index = stub :index, :name => :some_index, :bundle_class => nil
7
7
  @category = Internals::Indexing::Category.new :some_category, @index
8
8
 
9
9
  @partial = stub :partial
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Internals::Indexing::Bundle::Redis do
4
4
 
5
5
  before(:each) do
6
- @index = stub :index, :name => :some_index
6
+ @index = stub :index, :name => :some_index, :bundle_class => nil
7
7
  @category = Internals::Indexing::Category.new :some_category, @index
8
8
 
9
9
  @partial = stub :partial
@@ -3,7 +3,7 @@ require 'spec_helper'
3
3
  describe Internals::Indexing::Category do
4
4
 
5
5
  before(:each) do
6
- @index = stub :index, :name => :some_index
6
+ @index = stub :index, :name => :some_index, :bundle_class => nil
7
7
  @source = stub :some_given_source, :key_format => nil
8
8
  end
9
9
  let(:category) { described_class.new(:some_category, @index, :source => @source).tap { |c| c.stub! :timed_exclaim } }
@@ -0,0 +1,50 @@
1
+ require 'spec_helper'
2
+
3
+ describe Sources::Mongo do
4
+
5
+ describe 'key_format' do
6
+ context 'default' do
7
+ let(:source) { Sources::Mongo.new(:a, :b, :url => 'someurl', :db => 'somedb') }
8
+ it 'is correct' do
9
+ source.key_format.should == :to_sym
10
+ end
11
+ end
12
+
13
+ context 'non-default' do
14
+ let(:source) { Sources::Mongo.new(:a, :b, :url => 'bla', :db => 'somedb', :key_format => 'some_key_method') }
15
+ it 'is correct' do
16
+ source.key_format.should == :some_key_method
17
+ end
18
+ end
19
+ end
20
+
21
+ describe 'to_s' do
22
+ let(:source) { Sources::Mongo.new(:a, :b, :url => 'someurl', :db => 'somedb') }
23
+ it 'is correct' do
24
+ source.to_s.should == 'Sources::Mongo'
25
+ end
26
+ end
27
+
28
+ context "without database" do
29
+ it "should fail correctly" do
30
+ lambda { @source = Sources::Mongo.new(:a, :b, :url => 'someurl') }.should raise_error(Sources::NoMongoDBGiven)
31
+ end
32
+ end
33
+
34
+ context "with database" do
35
+ before(:each) do
36
+ @source = Sources::Mongo.new :a, :b, :url => 'someurl', :db => 'somedb'
37
+ RestClient::Request.should_receive(:execute).any_number_of_times.and_return %{{"rows":[{"_id":"7f","a":"a data","b":"b data","c":"c data"}]}}
38
+ end
39
+
40
+ describe "harvest" do
41
+ it "yields the right data" do
42
+ category = stub :b, :from => :b, :index_name => :some_index_name
43
+ @source.harvest category do |id, token|
44
+ id.should eql('7f')
45
+ token.should eql('b data')
46
+ end.should have(1).item
47
+ end
48
+ end
49
+ end
50
+ end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.4.0
5
+ version: 2.4.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - Florian Hanke
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-04-20 00:00:00 +10:00
13
+ date: 2011-05-25 00:00:00 +10:00
14
14
  default_executable: picky
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -32,7 +32,7 @@ dependencies:
32
32
  requirements:
33
33
  - - "="
34
34
  - !ruby/object:Gem::Version
35
- version: 2.4.0
35
+ version: 2.4.1
36
36
  type: :development
37
37
  version_requirements: *id002
38
38
  description: Fast Ruby semantic text search engine with comfortable single field interface.
@@ -159,6 +159,7 @@ files:
159
159
  - lib/picky/sources/csv.rb
160
160
  - lib/picky/sources/db.rb
161
161
  - lib/picky/sources/delicious.rb
162
+ - lib/picky/sources/mongo.rb
162
163
  - lib/picky/sources/wrappers/base.rb
163
164
  - lib/picky/sources/wrappers/location.rb
164
165
  - lib/picky/statistics.rb
@@ -269,6 +270,7 @@ files:
269
270
  - spec/lib/sources/csv_spec.rb
270
271
  - spec/lib/sources/db_spec.rb
271
272
  - spec/lib/sources/delicious_spec.rb
273
+ - spec/lib/sources/mongo_spec.rb
272
274
  - spec/lib/sources/wrappers/base_spec.rb
273
275
  - spec/lib/sources/wrappers/location_spec.rb
274
276
  - spec/lib/statistics_spec.rb
@@ -298,7 +300,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
298
300
  requirements: []
299
301
 
300
302
  rubyforge_project: http://rubyforge.org/projects/picky
301
- rubygems_version: 1.5.0
303
+ rubygems_version: 1.6.2
302
304
  signing_key:
303
305
  specification_version: 3
304
306
  summary: "Picky: Semantic Search Engine. Clever Interface. Good Tools."
@@ -394,6 +396,7 @@ test_files:
394
396
  - spec/lib/sources/csv_spec.rb
395
397
  - spec/lib/sources/db_spec.rb
396
398
  - spec/lib/sources/delicious_spec.rb
399
+ - spec/lib/sources/mongo_spec.rb
397
400
  - spec/lib/sources/wrappers/base_spec.rb
398
401
  - spec/lib/sources/wrappers/location_spec.rb
399
402
  - spec/lib/statistics_spec.rb