picky 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/bundling.rb CHANGED
@@ -4,5 +4,5 @@ rescue LoadError => e
4
4
  require 'rubygems'
5
5
  require 'bundler'
6
6
  end
7
- Bundler.setup SEARCH_ENVIRONMENT
7
+ Bundler.setup PICKY_ENVIRONMENT
8
8
  Bundler.require
data/lib/constants.rb CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  # Use rack's environment for the search engine.
5
5
  #
6
- ENV['SEARCH_ENV'] ||= ENV['RACK_ENV']
6
+ ENV['PICKY_ENV'] ||= ENV['RACK_ENV']
7
7
 
8
- SEARCH_ENVIRONMENT = ENV['SEARCH_ENV'] || 'development' unless defined? SEARCH_ENVIRONMENT
9
- SEARCH_ROOT = Dir.pwd unless defined? SEARCH_ROOT
8
+ PICKY_ENVIRONMENT = ENV['PICKY_ENV'] || 'development' unless defined? PICKY_ENVIRONMENT
9
+ PICKY_ROOT = Dir.pwd unless defined? PICKY_ROOT
data/lib/deployment.rb CHANGED
@@ -13,12 +13,12 @@ module Picky
13
13
  # Executes a rake task on the server.
14
14
  #
15
15
  # Options:
16
- # * env: The SEARCH_ENV. Will not set if set explicitly to false. Default: production.
16
+ # * env: The PICKY_ENV. Will not set if set explicitly to false. Default: production.
17
17
  # * All other options get passed on to the Capistrano run task.
18
18
  #
19
19
  def execute_rake_task name, options = {}, &block
20
20
  env = options.delete :env
21
- env = env == false ? '' : "SEARCH_ENV=#{env || 'production'}"
21
+ env = env == false ? '' : "PICKY_ENV=#{env || 'production'}"
22
22
  run "cd #{current_path}; rake #{name} #{env}", options, &block
23
23
  end
24
24
 
@@ -33,12 +33,12 @@ module Configuration
33
33
  # TODO Move to type, and use in bundle from there.
34
34
  #
35
35
  def search_index_root
36
- File.join SEARCH_ROOT, 'index'
36
+ File.join PICKY_ROOT, 'index'
37
37
  end
38
38
  # TODO Move to config. Duplicate Code in field.rb.
39
39
  #
40
40
  def cache_directory
41
- File.join search_index_root, SEARCH_ENVIRONMENT, type.name.to_s
41
+ File.join search_index_root, PICKY_ENVIRONMENT, type.name.to_s
42
42
  end
43
43
  def search_index_file_name
44
44
  File.join cache_directory, "#{type.name}_#{name}_index.txt"
@@ -45,5 +45,10 @@ module Configuration
45
45
  @indexer = Indexers::Solr.new self
46
46
  @indexer.index
47
47
  end
48
+ # TODO Spec!
49
+ #
50
+ def connect_backend
51
+ @source.connect_backend
52
+ end
48
53
  end
49
54
  end
@@ -56,7 +56,7 @@ module Index
56
56
  # Point to category.
57
57
  #
58
58
  def search_index_root
59
- File.join SEARCH_ROOT, 'index'
59
+ File.join PICKY_ROOT, 'index'
60
60
  # category.search_index_root
61
61
  end
62
62
 
@@ -145,7 +145,7 @@ module Index
145
145
  # TODO Move to config. Duplicate Code in field.rb.
146
146
  #
147
147
  def cache_directory
148
- File.join search_index_root, SEARCH_ENVIRONMENT, type.name.to_s
148
+ File.join search_index_root, PICKY_ENVIRONMENT, type.name.to_s
149
149
  end
150
150
 
151
151
  # Generates a cache path.
@@ -44,8 +44,10 @@ module Indexers
44
44
  comma = ?,
45
45
  newline = ?\n
46
46
 
47
+ indexing_message
48
+
47
49
  File.open(search_index_file_name, 'w:binary') do |file|
48
- chunked do |indexed_id, text|
50
+ source.harvest(@type, @field) do |indexed_id, text|
49
51
  tokenizer.tokenize(text).each do |token_text|
50
52
  file.write indexed_id
51
53
  file.write comma
@@ -55,22 +57,9 @@ module Indexers
55
57
  end
56
58
  end
57
59
  end
58
- # Split original data into chunks.
59
- #
60
- def chunked
61
- (0..source.count(@type)).step(chunksize) do |offset|
62
- indexing_message offset
63
- data = source.harvest @type, @field, offset, chunksize
64
- data.each do |indexed_id, text|
65
- next unless text
66
- text.force_encoding 'utf-8' # TODO Still needed?
67
- yield indexed_id, text
68
- end
69
- end
70
- end
71
60
 
72
- def indexing_message offset
73
- puts "#{Time.now}: Indexing #{@type.name}:#{@field.name}:#{@field.indexed_name} beginning at #{offset}."
61
+ def indexing_message
62
+ puts "#{Time.now}: Indexing #{@type.name}:#{@field.name}:#{@field.indexed_name}."
74
63
  end
75
64
 
76
65
  end
@@ -2,12 +2,8 @@ module Indexers
2
2
  # Base indexer for fields.
3
3
  #
4
4
  class Field < Base
5
-
6
- # Override in subclasses.
7
- #
8
- def chunksize
9
- 25_000
10
- end
5
+
6
+ # TODO Still needed?
11
7
 
12
8
  end
13
9
  end
data/lib/picky/indexes.rb CHANGED
@@ -20,7 +20,7 @@ module Indexes
20
20
  Cores.forked self.fields, :randomly => true do |field|
21
21
  # Reestablish DB connection.
22
22
  #
23
- DB.connect # TODO Rewrite!
23
+ connect_backends
24
24
  field.index
25
25
  field.cache
26
26
  end
@@ -29,6 +29,14 @@ module Indexes
29
29
  configuration.index_solr
30
30
  end
31
31
 
32
+ # TODO Push into configuration.
33
+ #
34
+ def self.connect_backends
35
+ configuration.types.each do |type|
36
+ type.connect_backend
37
+ end
38
+ end
39
+
32
40
  # Returns an array of fields.
33
41
  #
34
42
  # TODO Rewrite.
data/lib/picky/loader.rb CHANGED
@@ -6,7 +6,7 @@ module Loader
6
6
  # First itself, then the app.
7
7
  #
8
8
  def self.reload
9
- Dir.chdir(SEARCH_ROOT)
9
+ Dir.chdir(PICKY_ROOT)
10
10
  exclaim 'Reloading loader.'
11
11
  load_self
12
12
  exclaim 'Reloading framework.'
@@ -30,13 +30,13 @@ module Loader
30
30
  end
31
31
 
32
32
  def self.load_user filename
33
- load File.join(SEARCH_ROOT, "#{filename}.rb")
33
+ load File.join(PICKY_ROOT, "#{filename}.rb")
34
34
  end
35
35
  def self.load_user_lib filename
36
36
  load_user File.join('lib', filename)
37
37
  end
38
38
  def self.load_all_user_in dirname
39
- Dir[File.join(SEARCH_ROOT, dirname, '**', '*.rb')].each do |filename|
39
+ Dir[File.join(PICKY_ROOT, dirname, '**', '*.rb')].each do |filename|
40
40
  load filename
41
41
  end
42
42
  end
@@ -203,10 +203,7 @@ module Loader
203
203
  #
204
204
  load_relative 'sources/base'
205
205
  load_relative 'sources/db'
206
-
207
- # DB
208
- #
209
- load_relative 'db/configuration'
206
+ load_relative 'sources/csv'
210
207
 
211
208
  # Indexes.
212
209
  #
@@ -178,13 +178,13 @@ module Query
178
178
  def to_solr
179
179
  blank? ? '' : (to_s + @@solr_fuzzy_mapping[@text.size].to_s)
180
180
  end
181
-
181
+
182
182
  #
183
183
  #
184
184
  def to_result
185
185
  [@original, @text]
186
186
  end
187
-
187
+
188
188
  # Displays the qualifier text and the text, joined.
189
189
  #
190
190
  # e.g. name:meier
@@ -192,9 +192,9 @@ module Query
192
192
  def to_s
193
193
  [@qualifier, @text].compact.join ':'
194
194
  end
195
-
195
+
196
196
  private
197
-
197
+
198
198
  # Splits text into a qualifier and text.
199
199
  #
200
200
  # Returns [qualifier, text].
@@ -52,7 +52,7 @@ module Solr
52
52
  #
53
53
  #
54
54
  def read_template
55
- template_path = File.join SEARCH_ROOT, 'solr', 'conf', 'schema.xml.erb'
55
+ template_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml.erb'
56
56
  schema = ''
57
57
  File.open(template_path, 'r') do |f|
58
58
  schema = f.read
@@ -63,7 +63,7 @@ module Solr
63
63
  #
64
64
  #
65
65
  def write result
66
- schema_path = File.join SEARCH_ROOT, 'solr', 'conf', 'schema.xml'
66
+ schema_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml'
67
67
  File.open(schema_path, 'w') do |f|
68
68
  f << result
69
69
  end
@@ -2,15 +2,15 @@ module Sources
2
2
 
3
3
  class Base
4
4
 
5
- def take_snapshot type
5
+ def connect_backend
6
6
 
7
7
  end
8
8
 
9
- def count type
9
+ def take_snapshot type
10
10
 
11
11
  end
12
12
 
13
- def harvest offset
13
+ def harvest type, field
14
14
 
15
15
  end
16
16
 
@@ -1,28 +1,41 @@
1
+ require 'CSV'
2
+
1
3
  module Sources
2
4
 
5
+ class NoCSVFileGiven < StandardError; end
6
+
3
7
  class CSV < Base
4
8
 
5
- attr_reader :file_name
9
+ attr_reader :file_name, :field_names
10
+
11
+ def initialize *field_names, options
12
+ @field_names = field_names
13
+ @file_name = Hash === options && options[:file] || raise_no_file_given(field_names)
14
+ end
6
15
 
7
- def initialize file_name, *field_names
8
- @file_name = file_name
9
- @field_names
16
+ #
17
+ #
18
+ def raise_no_file_given field_names
19
+ raise NoCSVFileGiven.new field_names.join(', ')
10
20
  end
11
21
 
12
- # Counts all the entries that are used for the index.
22
+ # Harvests the data to index.
13
23
  #
14
- def count type
15
- `wc -l #{file_name}`
24
+ def harvest _, field
25
+ index = field_names.index field.name
26
+ get_data do |ary|
27
+ indexed_id = ary.shift.to_i
28
+ text = ary[index]
29
+ next unless text
30
+ text.force_encoding 'utf-8' # TODO Still needed?
31
+ yield indexed_id, text
32
+ end
16
33
  end
17
34
 
18
- # Harvests the data to index, chunked.
19
35
  #
20
- # Subclasses should override harvest_statement to define how their data is found.
21
- # Example:
22
- # "SELECT indexed_id, value FROM bla_table st WHERE kind = 'bla'"
23
36
  #
24
- def harvest offset
25
- File.open file_name, 'r'
37
+ def get_data &block
38
+ ::CSV.foreach file_name, &block
26
39
  end
27
40
 
28
41
  end
@@ -2,17 +2,57 @@ module Sources
2
2
 
3
3
  class DB < Base
4
4
 
5
- attr_reader :select_statement, :database
5
+ attr_reader :select_statement, :database, :connection_options
6
6
 
7
- def initialize select_statement, database_adapter
7
+ def initialize select_statement, with_options = { :file => 'app/db.yml' }
8
8
  @select_statement = select_statement
9
- @database = database_adapter
9
+ @database = create_database_adapter
10
+ configure with_options
11
+ end
12
+
13
+ # Get a configured Database backend.
14
+ #
15
+ # Options:
16
+ # Either
17
+ # * file => 'some/filename.yml' # With an active record configuration.
18
+ # Or
19
+ # * The configuration as a hash.
20
+ #
21
+ def create_database_adapter
22
+ adapter_class = Class.new ActiveRecord::Base
23
+ adapter_class.abstract_class = true
24
+ adapter_class
25
+ end
26
+
27
+ # Configure the backend.
28
+ #
29
+ # Options:
30
+ # Either
31
+ # * file => 'some/filename.yml' # With an active record configuration.
32
+ # Or
33
+ # * The configuration as a hash.
34
+ #
35
+ def configure options
36
+ @connection_options = if filename = options[:file]
37
+ File.open(File.join(PICKY_ROOT, filename)) { |f| YAML::load(f) }
38
+ else
39
+ options
40
+ end
41
+ self
42
+ end
43
+
44
+ # Connect the backend.
45
+ #
46
+ def connect_backend
47
+ return if PICKY_ENVIRONMENT.to_s == 'test' # TODO Unclean.
48
+ raise "Database backend not configured" unless connection_options
49
+ database.establish_connection connection_options
10
50
  end
11
51
 
12
52
  # Take the snapshot.
13
53
  #
14
54
  def take_snapshot type
15
- database.connect
55
+ connect_backend
16
56
 
17
57
  origin = snapshot_table_name type
18
58
 
@@ -29,6 +69,8 @@ module Sources
29
69
  # Counts all the entries that are used for the index.
30
70
  #
31
71
  def count type
72
+ connect_backend
73
+
32
74
  database.connection.select_value("SELECT COUNT(id) FROM #{snapshot_table_name(type)}").to_i
33
75
  end
34
76
 
@@ -44,10 +86,28 @@ module Sources
44
86
  # Example:
45
87
  # "SELECT indexed_id, value FROM bla_table st WHERE kind = 'bla'"
46
88
  #
47
- def harvest type, field, offset, chunksize
48
- database.connect
89
+ def harvest type, field
90
+ connect_backend
49
91
 
50
- database.connection.execute harvest_statement_with_offset(type, field, offset, chunksize)
92
+ (0..count(type)).step(chunksize) do |offset|
93
+ get_data(type, field, offset).each do |indexed_id, text|
94
+ next unless text
95
+ text.force_encoding 'utf-8' # TODO Still needed?
96
+ yield indexed_id, text
97
+ end
98
+ end
99
+ end
100
+
101
+ # Override in subclasses.
102
+ #
103
+ def chunksize
104
+ 25_000
105
+ end
106
+
107
+ # Gets database from the backend.
108
+ #
109
+ def get_data type, field, offset
110
+ database.connection.execute harvest_statement_with_offset(type, field, offset)
51
111
  end
52
112
 
53
113
  # Base harvest statement for dbs.
@@ -60,7 +120,7 @@ module Sources
60
120
  #
61
121
  # TODO Use the adapter for this.
62
122
  #
63
- def harvest_statement_with_offset type, field, offset, chunksize
123
+ def harvest_statement_with_offset type, field, offset
64
124
  statement = harvest_statement type, field
65
125
 
66
126
  if statement.include? 'WHERE'
data/lib/picky.rb CHANGED
@@ -15,4 +15,4 @@ require File.expand_path(File.join(File.dirname(__FILE__), 'picky', 'loader'))
15
15
  # Load the framework
16
16
  #
17
17
  Loader.load_framework
18
- puts "Loaded picky with environment '#{SEARCH_ENVIRONMENT}' in #{SEARCH_ROOT} on Ruby #{RUBY_VERSION}."
18
+ puts "Loaded picky with environment '#{PICKY_ENVIRONMENT}' in #{PICKY_ROOT} on Ruby #{RUBY_VERSION}."
@@ -3,11 +3,11 @@
3
3
  namespace :server do
4
4
 
5
5
  def chdir_to_root
6
- Dir.chdir SEARCH_ROOT
6
+ Dir.chdir PICKY_ROOT
7
7
  end
8
8
 
9
9
  def current_pid
10
- pid = `cat #{File.join(SEARCH_ROOT, 'tmp/pids/unicorn.pid')}`
10
+ pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
11
11
  pid.blank? ? nil : pid.chomp
12
12
  end
13
13
 
@@ -15,8 +15,8 @@ namespace :server do
15
15
  task :start => :framework do
16
16
  chdir_to_root
17
17
  # Rake::Task[:"solr:start"].invoke # TODO Move to better place.
18
- daemonize = SEARCH_ENVIRONMENT == 'production' ? '-D' : ''
19
- command = "export SEARCH_ENV=#{SEARCH_ENVIRONMENT}; unicorn -c unicorn.ru #{daemonize}".strip
18
+ daemonize = PICKY_ENVIRONMENT == 'production' ? '-D' : ''
19
+ command = "export PICKY_ENV=#{PICKY_ENVIRONMENT}; unicorn -c unicorn.ru #{daemonize}".strip
20
20
  puts "Running \`#{command}\`."
21
21
  exec command
22
22
  end
data/lib/tasks/solr.rake CHANGED
@@ -18,7 +18,7 @@ namespace :solr do
18
18
 
19
19
 
20
20
  def action name
21
- `sunspot-solr #{name} --solr-home=solr --data-directory=index/#{SEARCH_ENVIRONMENT}/solr --pid-dir=solr/pids --log-file=log/solr.log`
21
+ `sunspot-solr #{name} --solr-home=solr --data-directory=index/#{PICKY_ENVIRONMENT}/solr --pid-dir=solr/pids --log-file=log/solr.log`
22
22
  end
23
23
  task :start => :application do
24
24
  Rake::Task['solr:schema:generate'].invoke
@@ -2,12 +2,12 @@ namespace :statistics do
2
2
 
3
3
  desc "start the server"
4
4
  task :start => :application do
5
- Statistics.start unless SEARCH_ENVIRONMENT == 'test'
5
+ Statistics.start unless PICKY_ENVIRONMENT == 'test'
6
6
  end
7
7
 
8
8
  desc "stop the server"
9
9
  task :stop => :application do
10
- Statistics.stop unless SEARCH_ENVIRONMENT == 'test'
10
+ Statistics.stop unless PICKY_ENVIRONMENT == 'test'
11
11
  end
12
12
 
13
13
  end
data/lib/tasks/try.rake CHANGED
@@ -15,7 +15,9 @@ namespace :try do
15
15
  task :query, [:text] => :application do |_, options|
16
16
  text = options.text
17
17
 
18
- puts "\"#{text}\" is query tokenized as #{Tokenizers::Query.new.tokenize(text).to_a}"
18
+ # TODO tokenize destroys the original text...
19
+ #
20
+ puts "\"#{text}\" is query tokenized as #{Tokenizers::Query.new.tokenize(text.dup).to_a.map(&:to_s)}"
19
21
  end
20
22
 
21
23
  desc "Try the given text with both the index and the query (type:field optional)."
@@ -11,7 +11,7 @@ class PickySearch < Application # The App Constant needs to be identical in appl
11
11
  # Note: Much more is possible, but let's start out easy.
12
12
  #
13
13
  # Ask me if you have questions!
14
- #
14
+ #
15
15
 
16
16
  indexes do
17
17
  illegal_characters(/[^äöüa-zA-Z0-9\s\/\-\"\&\.]/)
@@ -19,10 +19,7 @@ class PickySearch < Application # The App Constant needs to be identical in appl
19
19
  split_text_on(/[\s\/\-\"\&\.]/)
20
20
 
21
21
  type :books,
22
- Sources::DB.new(
23
- 'SELECT id, title, author, isbn13 as isbn FROM books',
24
- DB.configure(:file => 'app/db.yml')
25
- ),
22
+ Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
26
23
  field(:title, :qualifiers => [:t, :title, :titre], :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
27
24
  field(:author, :qualifiers => [:s, :author, :auteur]),
28
25
  field(:isbn, :qualifiers => [:i, :isbn], :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
@@ -1,7 +1,7 @@
1
1
  # Standard logging.
2
2
  #
3
3
  require 'logger'
4
- PickyLog = Loggers::Search.new ::Logger.new(File.expand_path(File.join(SEARCH_ROOT, 'log/search.log')))
4
+ PickyLog = Loggers::Search.new ::Logger.new(File.expand_path(File.join(PICKY_ROOT, 'log/search.log')))
5
5
 
6
6
  # Example with using the syslog logger.
7
7
  # Falling back to the standard log if it isn't available.
@@ -16,5 +16,5 @@ PickyLog = Loggers::Search.new ::Logger.new(File.expand_path(File.join(SEARCH_RO
16
16
  # rescue StandardError
17
17
  # puts "Could not connect to the syslog, using the normal log."
18
18
  # require 'logger'
19
- # PickyLog = Loggers::Search.new ::Logger.new(File.join(SEARCH_ROOT, 'log/search.log'))
19
+ # PickyLog = Loggers::Search.new ::Logger.new(File.join(PICKY_ROOT, 'log/search.log'))
20
20
  # end
@@ -14,12 +14,12 @@ end
14
14
  libs = " -r irb/completion"
15
15
  libs << %( -r "picky" )
16
16
 
17
- ENV['SEARCH_ENV'] = case ARGV.first
17
+ ENV['PICKY_ENV'] = case ARGV.first
18
18
  when "p"; "production"
19
19
  when "d"; "development"
20
20
  when "t"; "test"
21
21
  else
22
- ARGV.first || ENV['SEARCH_ENV'] || 'development'
22
+ ARGV.first || ENV['PICKY_ENV'] || 'development'
23
23
  end
24
24
 
25
25
  puts "Use \x1b[1;30mLoader.load_application\x1b[m to load app."
@@ -3,18 +3,7 @@
3
3
  require 'spec_helper'
4
4
 
5
5
  describe Indexers::Field do
6
-
7
- before(:each) do
8
- @type = stub :type, :name => :some_type, :snapshot_table_name => :some_prepared_table_name
9
- @field = stub :field, :indexed_name => :some_indexed_field_name, :name => :some_field_name, :search_index_file_name => :some_index_table
10
- @strategy = Indexers::Field.new @type, @field
11
- @strategy.stub! :indexing_message
12
- end
13
-
14
- describe "chunksize" do
15
- it "should be a specific size" do
16
- @strategy.chunksize.should == 25_000
17
- end
18
- end
19
-
6
+
7
+
8
+
20
9
  end
@@ -0,0 +1,32 @@
1
+ require 'spec_helper'
2
+
3
+ describe Sources::CSV do
4
+
5
+ context "without file" do
6
+ it "should fail correctly" do
7
+ lambda { @source = Sources::CSV.new(:a, :b, :c) }.should raise_error(Sources::NoCSVFileGiven)
8
+ end
9
+ end
10
+ context "with file" do
11
+ before(:each) do
12
+ @source = Sources::CSV.new :a, :b, :c, :file => :some_file
13
+ ::CSV.should_receive(:foreach).any_number_of_times.and_yield ['7', 'a data', 'b data', 'c data']
14
+ end
15
+ describe "harvest" do
16
+ it "should yield the right data" do
17
+ field = stub :b, :name => :b
18
+ @source.harvest :anything, field do |id, token|
19
+ [id, token].should == [7, 'b data']
20
+ end
21
+ end
22
+ end
23
+ describe "get_data" do
24
+ it "should yield each line" do
25
+ @source.get_data do |data|
26
+ data.should == ['7', 'a data', 'b data', 'c data']
27
+ end
28
+ end
29
+ end
30
+ end
31
+
32
+ end
@@ -4,12 +4,22 @@ describe Sources::DB do
4
4
 
5
5
  before(:each) do
6
6
  @type = stub :type, :name => 'some_type_name'
7
- @connection = stub :connection
8
7
 
8
+ @connection = stub :connection
9
9
  @adapter = stub :adapter, :connection => @connection
10
+
10
11
  @select_statement = stub :statement
11
12
 
12
- @source = Sources::DB.new @select_statement, @adapter
13
+ @source = Sources::DB.new @select_statement, :option => :some_options
14
+
15
+ @source.stub! :database => @adapter
16
+ @source.stub! :connect_backend
17
+ end
18
+
19
+ describe "chunksize" do
20
+ it "should be a specific size" do
21
+ @source.chunksize.should == 25_000
22
+ end
13
23
  end
14
24
 
15
25
  describe "count" do
@@ -26,47 +36,49 @@ describe Sources::DB do
26
36
  @source.count @type
27
37
  end
28
38
  end
29
-
30
- describe "harvest" do
31
- before(:each) do
32
- @adapter.stub! :connect
33
- @source.stub! :harvest_statement_with_offset
34
- end
35
- context 'expectations' do
36
- before(:each) do
37
- @connection.stub! :execute
38
- end
39
- after(:each) do
40
- @source.harvest :some_type, :some_field, :some_offset, :some_chunksize
41
- end
42
- context "with WHERE" do
43
- before(:each) do
44
- @source.stub! :select_statement => 'bla WHERE blu'
45
- end
46
- it "should connect" do
47
- @adapter.should_receive(:connect).once.with
48
- end
49
- it "should call the harvest statement with an offset" do
50
- @source.should_receive(:harvest_statement_with_offset).once.with :some_type, :some_field, :some_offset, :some_chunksize
51
- end
52
- end
53
- context "without WHERE" do
54
- it "should connect" do
55
- @adapter.should_receive(:connect).once.with
56
- end
57
- it "should call the harvest statement with an offset" do
58
- @source.should_receive(:harvest_statement_with_offset).once.with :some_type, :some_field, :some_offset, :some_chunksize
59
- end
60
- end
61
- end
62
- context 'returns' do
63
- it "should return whatever the execute statement returns" do
64
- @connection.stub! :execute => :some_result
65
-
66
- @source.harvest(:some_type, :some_field, :some_offset, :some_chunksize).should == :some_result
67
- end
68
- end
69
- end
39
+
40
+ # TODO Redo.
41
+ #
42
+ # describe "harvest" do
43
+ # before(:each) do
44
+ # @source.stub! :harvest_statement_with_offset
45
+ # end
46
+ # context 'expectations' do
47
+ # before(:each) do
48
+ # @connection.stub! :execute => []
49
+ # @connection.stub! :select_value
50
+ # end
51
+ # after(:each) do
52
+ # @source.harvest :type_name, :some_field
53
+ # end
54
+ # context "with WHERE" do
55
+ # before(:each) do
56
+ # @source.stub! :select_statement => 'bla WHERE blu'
57
+ # end
58
+ # it "should connect" do
59
+ # @source.should_receive(:connect_backend).once.with
60
+ # end
61
+ # it "should call the harvest statement with an offset" do
62
+ # @source.should_receive(:harvest_statement_with_offset).once.with :some_type, :some_field, :some_offset
63
+ # end
64
+ # end
65
+ # context "without WHERE" do
66
+ # it "should connect" do
67
+ # @adapter.should_receive(:connect).once.with
68
+ # end
69
+ # it "should call the harvest statement with an offset" do
70
+ # @source.should_receive(:harvest_statement_with_offset).once.with :some_type, :some_field, :some_offset
71
+ # end
72
+ # end
73
+ # end
74
+ # context 'returns' do
75
+ # it "should return whatever the execute statement returns" do
76
+ # @connection.stub! :execute => :some_result
77
+ #
78
+ # @source.harvest(:some_type, :some_field).should == :some_result
79
+ # end
80
+ # end
81
+ # end
70
82
 
71
83
  describe "harvest_statement_with_offset" do
72
84
  before(:each) do
@@ -76,15 +88,15 @@ describe Sources::DB do
76
88
  end
77
89
  it "should get a harvest statement and the chunksize to put the statement together" do
78
90
  @source.should_receive(:harvest_statement).once.and_return 'some_example_statement'
79
- @source.harvest_statement_with_offset(@type, @field, :some_offset, :some_chunksize)
91
+ @source.harvest_statement_with_offset(@type, @field, :some_offset)
80
92
  end
81
93
  it "should add an AND if it already contains a WHERE statement" do
82
94
  @source.should_receive(:harvest_statement).and_return 'WHERE'
83
- @source.harvest_statement_with_offset(@type, @field, :some_offset, :some_chunksize).should == "WHERE AND st.id > some_offset LIMIT some_chunksize"
95
+ @source.harvest_statement_with_offset(@type, @field, :some_offset).should == "WHERE AND st.id > some_offset LIMIT 25000"
84
96
  end
85
97
  it "should add a WHERE if it doesn't already contain one" do
86
98
  @source.should_receive(:harvest_statement).and_return 'some_statement'
87
- @source.harvest_statement_with_offset(@type, @field, :some_offset, :some_chunksize).should == "some_statement WHERE st.id > some_offset LIMIT some_chunksize"
99
+ @source.harvest_statement_with_offset(@type, @field, :some_offset).should == "some_statement WHERE st.id > some_offset LIMIT 25000"
88
100
  end
89
101
  end
90
102
 
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 5
9
- version: 0.0.5
8
+ - 6
9
+ version: 0.0.6
10
10
  platform: ruby
11
11
  authors:
12
12
  - Florian Hanke
@@ -14,148 +14,13 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-10-03 00:00:00 +02:00
17
+ date: 2010-10-04 00:00:00 +02:00
18
18
  default_executable: picky
19
19
  dependencies:
20
- - !ruby/object:Gem::Dependency
21
- name: bundler
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
- - 9
31
- - 26
32
- version: 0.9.26
33
- type: :runtime
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: activesupport
37
- prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - "="
42
- - !ruby/object:Gem::Version
43
- segments:
44
- - 2
45
- - 3
46
- - 8
47
- version: 2.3.8
48
- type: :runtime
49
- version_requirements: *id002
50
- - !ruby/object:Gem::Dependency
51
- name: activerecord
52
- prerelease: false
53
- requirement: &id003 !ruby/object:Gem::Requirement
54
- none: false
55
- requirements:
56
- - - "="
57
- - !ruby/object:Gem::Version
58
- segments:
59
- - 2
60
- - 3
61
- - 8
62
- version: 2.3.8
63
- type: :runtime
64
- version_requirements: *id003
65
- - !ruby/object:Gem::Dependency
66
- name: rack
67
- prerelease: false
68
- requirement: &id004 !ruby/object:Gem::Requirement
69
- none: false
70
- requirements:
71
- - - "="
72
- - !ruby/object:Gem::Version
73
- segments:
74
- - 1
75
- - 2
76
- - 1
77
- version: 1.2.1
78
- type: :runtime
79
- version_requirements: *id004
80
- - !ruby/object:Gem::Dependency
81
- name: rack-mount
82
- prerelease: false
83
- requirement: &id005 !ruby/object:Gem::Requirement
84
- none: false
85
- requirements:
86
- - - "="
87
- - !ruby/object:Gem::Version
88
- segments:
89
- - 0
90
- - 6
91
- - 9
92
- version: 0.6.9
93
- type: :runtime
94
- version_requirements: *id005
95
- - !ruby/object:Gem::Dependency
96
- name: rsolr
97
- prerelease: false
98
- requirement: &id006 !ruby/object:Gem::Requirement
99
- none: false
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- segments:
104
- - 0
105
- - 12
106
- - 1
107
- version: 0.12.1
108
- type: :runtime
109
- version_requirements: *id006
110
- - !ruby/object:Gem::Dependency
111
- name: sunspot
112
- prerelease: false
113
- requirement: &id007 !ruby/object:Gem::Requirement
114
- none: false
115
- requirements:
116
- - - "="
117
- - !ruby/object:Gem::Version
118
- segments:
119
- - 1
120
- - 1
121
- - 0
122
- version: 1.1.0
123
- type: :runtime
124
- version_requirements: *id007
125
- - !ruby/object:Gem::Dependency
126
- name: text
127
- prerelease: false
128
- requirement: &id008 !ruby/object:Gem::Requirement
129
- none: false
130
- requirements:
131
- - - "="
132
- - !ruby/object:Gem::Version
133
- segments:
134
- - 0
135
- - 2
136
- - 0
137
- version: 0.2.0
138
- type: :runtime
139
- version_requirements: *id008
140
- - !ruby/object:Gem::Dependency
141
- name: rack_fast_escape
142
- prerelease: false
143
- requirement: &id009 !ruby/object:Gem::Requirement
144
- none: false
145
- requirements:
146
- - - "="
147
- - !ruby/object:Gem::Version
148
- segments:
149
- - 2009
150
- - 6
151
- - 24
152
- version: 2009.06.24
153
- type: :runtime
154
- version_requirements: *id009
155
20
  - !ruby/object:Gem::Dependency
156
21
  name: rspec
157
22
  prerelease: false
158
- requirement: &id010 !ruby/object:Gem::Requirement
23
+ requirement: &id001 !ruby/object:Gem::Requirement
159
24
  none: false
160
25
  requirements:
161
26
  - - ">="
@@ -164,7 +29,7 @@ dependencies:
164
29
  - 0
165
30
  version: "0"
166
31
  type: :development
167
- version_requirements: *id010
32
+ version_requirements: *id001
168
33
  description: Fast Combinatorial Ruby Search Engine
169
34
  email: florian.hanke+picky@gmail.com
170
35
  executables:
@@ -199,7 +64,6 @@ files:
199
64
  - lib/picky/configuration/queries.rb
200
65
  - lib/picky/configuration/type.rb
201
66
  - lib/picky/cores.rb
202
- - lib/picky/db/configuration.rb
203
67
  - lib/picky/ext/ruby19/extconf.rb
204
68
  - lib/picky/extensions/array.rb
205
69
  - lib/picky/extensions/hash.rb
@@ -323,6 +187,7 @@ files:
323
187
  - spec/lib/results/base_spec.rb
324
188
  - spec/lib/routing_spec.rb
325
189
  - spec/lib/solr/schema_generator_spec.rb
190
+ - spec/lib/sources/csv_spec.rb
326
191
  - spec/lib/sources/db_spec.rb
327
192
  - spec/lib/tokenizers/base_spec.rb
328
193
  - spec/lib/tokenizers/index_spec.rb
@@ -407,6 +272,7 @@ test_files:
407
272
  - spec/lib/results/base_spec.rb
408
273
  - spec/lib/routing_spec.rb
409
274
  - spec/lib/solr/schema_generator_spec.rb
275
+ - spec/lib/sources/csv_spec.rb
410
276
  - spec/lib/sources/db_spec.rb
411
277
  - spec/lib/tokenizers/base_spec.rb
412
278
  - spec/lib/tokenizers/index_spec.rb
@@ -1,23 +0,0 @@
1
- class DB < ActiveRecord::Base
2
-
3
- self.abstract_class = true
4
-
5
- #
6
- #
7
- def self.configure options = {}
8
- @connection_options = if filename = options[:file]
9
- File.open(File.join(SEARCH_ROOT, filename)) { |f| YAML::load(f) }
10
- else
11
- options
12
- end
13
- self
14
- end
15
-
16
- #
17
- #
18
- def self.connect
19
- return if SEARCH_ENVIRONMENT.to_s == 'test'
20
- establish_connection @connection_options
21
- end
22
-
23
- end