picky 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/bundling.rb +1 -1
- data/lib/constants.rb +3 -3
- data/lib/deployment.rb +2 -2
- data/lib/picky/configuration/field.rb +2 -2
- data/lib/picky/configuration/type.rb +5 -0
- data/lib/picky/index/bundle.rb +2 -2
- data/lib/picky/indexers/base.rb +5 -16
- data/lib/picky/indexers/field.rb +2 -6
- data/lib/picky/indexes.rb +9 -1
- data/lib/picky/loader.rb +4 -7
- data/lib/picky/query/token.rb +4 -4
- data/lib/picky/solr/schema_generator.rb +2 -2
- data/lib/picky/sources/base.rb +3 -3
- data/lib/picky/sources/csv.rb +26 -13
- data/lib/picky/sources/db.rb +68 -8
- data/lib/picky.rb +1 -1
- data/lib/tasks/server.rake +4 -4
- data/lib/tasks/solr.rake +1 -1
- data/lib/tasks/statistics.rake +2 -2
- data/lib/tasks/try.rake +3 -1
- data/prototype_project/app/application.rb +2 -5
- data/prototype_project/app/logging.rb +2 -2
- data/prototype_project/script/console +2 -2
- data/spec/lib/indexers/field_spec.rb +3 -14
- data/spec/lib/sources/csv_spec.rb +32 -0
- data/spec/lib/sources/db_spec.rb +58 -46
- metadata +7 -141
- data/lib/picky/db/configuration.rb +0 -23
data/lib/bundling.rb
CHANGED
data/lib/constants.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
# Use rack's environment for the search engine.
|
5
5
|
#
|
6
|
-
ENV['
|
6
|
+
ENV['PICKY_ENV'] ||= ENV['RACK_ENV']
|
7
7
|
|
8
|
-
|
9
|
-
|
8
|
+
PICKY_ENVIRONMENT = ENV['PICKY_ENV'] || 'development' unless defined? PICKY_ENVIRONMENT
|
9
|
+
PICKY_ROOT = Dir.pwd unless defined? PICKY_ROOT
|
data/lib/deployment.rb
CHANGED
@@ -13,12 +13,12 @@ module Picky
|
|
13
13
|
# Executes a rake task on the server.
|
14
14
|
#
|
15
15
|
# Options:
|
16
|
-
# * env: The
|
16
|
+
# * env: The PICKY_ENV. Will not set if set explicitly to false. Default: production.
|
17
17
|
# * All other options get passed on to the Capistrano run task.
|
18
18
|
#
|
19
19
|
def execute_rake_task name, options = {}, &block
|
20
20
|
env = options.delete :env
|
21
|
-
env = env == false ? '' : "
|
21
|
+
env = env == false ? '' : "PICKY_ENV=#{env || 'production'}"
|
22
22
|
run "cd #{current_path}; rake #{name} #{env}", options, &block
|
23
23
|
end
|
24
24
|
|
@@ -33,12 +33,12 @@ module Configuration
|
|
33
33
|
# TODO Move to type, and use in bundle from there.
|
34
34
|
#
|
35
35
|
def search_index_root
|
36
|
-
File.join
|
36
|
+
File.join PICKY_ROOT, 'index'
|
37
37
|
end
|
38
38
|
# TODO Move to config. Duplicate Code in field.rb.
|
39
39
|
#
|
40
40
|
def cache_directory
|
41
|
-
File.join search_index_root,
|
41
|
+
File.join search_index_root, PICKY_ENVIRONMENT, type.name.to_s
|
42
42
|
end
|
43
43
|
def search_index_file_name
|
44
44
|
File.join cache_directory, "#{type.name}_#{name}_index.txt"
|
data/lib/picky/index/bundle.rb
CHANGED
@@ -56,7 +56,7 @@ module Index
|
|
56
56
|
# Point to category.
|
57
57
|
#
|
58
58
|
def search_index_root
|
59
|
-
File.join
|
59
|
+
File.join PICKY_ROOT, 'index'
|
60
60
|
# category.search_index_root
|
61
61
|
end
|
62
62
|
|
@@ -145,7 +145,7 @@ module Index
|
|
145
145
|
# TODO Move to config. Duplicate Code in field.rb.
|
146
146
|
#
|
147
147
|
def cache_directory
|
148
|
-
File.join search_index_root,
|
148
|
+
File.join search_index_root, PICKY_ENVIRONMENT, type.name.to_s
|
149
149
|
end
|
150
150
|
|
151
151
|
# Generates a cache path.
|
data/lib/picky/indexers/base.rb
CHANGED
@@ -44,8 +44,10 @@ module Indexers
|
|
44
44
|
comma = ?,
|
45
45
|
newline = ?\n
|
46
46
|
|
47
|
+
indexing_message
|
48
|
+
|
47
49
|
File.open(search_index_file_name, 'w:binary') do |file|
|
48
|
-
|
50
|
+
source.harvest(@type, @field) do |indexed_id, text|
|
49
51
|
tokenizer.tokenize(text).each do |token_text|
|
50
52
|
file.write indexed_id
|
51
53
|
file.write comma
|
@@ -55,22 +57,9 @@ module Indexers
|
|
55
57
|
end
|
56
58
|
end
|
57
59
|
end
|
58
|
-
# Split original data into chunks.
|
59
|
-
#
|
60
|
-
def chunked
|
61
|
-
(0..source.count(@type)).step(chunksize) do |offset|
|
62
|
-
indexing_message offset
|
63
|
-
data = source.harvest @type, @field, offset, chunksize
|
64
|
-
data.each do |indexed_id, text|
|
65
|
-
next unless text
|
66
|
-
text.force_encoding 'utf-8' # TODO Still needed?
|
67
|
-
yield indexed_id, text
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
60
|
|
72
|
-
def indexing_message
|
73
|
-
puts "#{Time.now}: Indexing #{@type.name}:#{@field.name}:#{@field.indexed_name}
|
61
|
+
def indexing_message
|
62
|
+
puts "#{Time.now}: Indexing #{@type.name}:#{@field.name}:#{@field.indexed_name}."
|
74
63
|
end
|
75
64
|
|
76
65
|
end
|
data/lib/picky/indexers/field.rb
CHANGED
data/lib/picky/indexes.rb
CHANGED
@@ -20,7 +20,7 @@ module Indexes
|
|
20
20
|
Cores.forked self.fields, :randomly => true do |field|
|
21
21
|
# Reestablish DB connection.
|
22
22
|
#
|
23
|
-
|
23
|
+
connect_backends
|
24
24
|
field.index
|
25
25
|
field.cache
|
26
26
|
end
|
@@ -29,6 +29,14 @@ module Indexes
|
|
29
29
|
configuration.index_solr
|
30
30
|
end
|
31
31
|
|
32
|
+
# TODO Push into configuration.
|
33
|
+
#
|
34
|
+
def self.connect_backends
|
35
|
+
configuration.types.each do |type|
|
36
|
+
type.connect_backend
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
32
40
|
# Returns an array of fields.
|
33
41
|
#
|
34
42
|
# TODO Rewrite.
|
data/lib/picky/loader.rb
CHANGED
@@ -6,7 +6,7 @@ module Loader
|
|
6
6
|
# First itself, then the app.
|
7
7
|
#
|
8
8
|
def self.reload
|
9
|
-
Dir.chdir(
|
9
|
+
Dir.chdir(PICKY_ROOT)
|
10
10
|
exclaim 'Reloading loader.'
|
11
11
|
load_self
|
12
12
|
exclaim 'Reloading framework.'
|
@@ -30,13 +30,13 @@ module Loader
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def self.load_user filename
|
33
|
-
load File.join(
|
33
|
+
load File.join(PICKY_ROOT, "#{filename}.rb")
|
34
34
|
end
|
35
35
|
def self.load_user_lib filename
|
36
36
|
load_user File.join('lib', filename)
|
37
37
|
end
|
38
38
|
def self.load_all_user_in dirname
|
39
|
-
Dir[File.join(
|
39
|
+
Dir[File.join(PICKY_ROOT, dirname, '**', '*.rb')].each do |filename|
|
40
40
|
load filename
|
41
41
|
end
|
42
42
|
end
|
@@ -203,10 +203,7 @@ module Loader
|
|
203
203
|
#
|
204
204
|
load_relative 'sources/base'
|
205
205
|
load_relative 'sources/db'
|
206
|
-
|
207
|
-
# DB
|
208
|
-
#
|
209
|
-
load_relative 'db/configuration'
|
206
|
+
load_relative 'sources/csv'
|
210
207
|
|
211
208
|
# Indexes.
|
212
209
|
#
|
data/lib/picky/query/token.rb
CHANGED
@@ -178,13 +178,13 @@ module Query
|
|
178
178
|
def to_solr
|
179
179
|
blank? ? '' : (to_s + @@solr_fuzzy_mapping[@text.size].to_s)
|
180
180
|
end
|
181
|
-
|
181
|
+
|
182
182
|
#
|
183
183
|
#
|
184
184
|
def to_result
|
185
185
|
[@original, @text]
|
186
186
|
end
|
187
|
-
|
187
|
+
|
188
188
|
# Displays the qualifier text and the text, joined.
|
189
189
|
#
|
190
190
|
# e.g. name:meier
|
@@ -192,9 +192,9 @@ module Query
|
|
192
192
|
def to_s
|
193
193
|
[@qualifier, @text].compact.join ':'
|
194
194
|
end
|
195
|
-
|
195
|
+
|
196
196
|
private
|
197
|
-
|
197
|
+
|
198
198
|
# Splits text into a qualifier and text.
|
199
199
|
#
|
200
200
|
# Returns [qualifier, text].
|
@@ -52,7 +52,7 @@ module Solr
|
|
52
52
|
#
|
53
53
|
#
|
54
54
|
def read_template
|
55
|
-
template_path = File.join
|
55
|
+
template_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml.erb'
|
56
56
|
schema = ''
|
57
57
|
File.open(template_path, 'r') do |f|
|
58
58
|
schema = f.read
|
@@ -63,7 +63,7 @@ module Solr
|
|
63
63
|
#
|
64
64
|
#
|
65
65
|
def write result
|
66
|
-
schema_path = File.join
|
66
|
+
schema_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml'
|
67
67
|
File.open(schema_path, 'w') do |f|
|
68
68
|
f << result
|
69
69
|
end
|
data/lib/picky/sources/base.rb
CHANGED
data/lib/picky/sources/csv.rb
CHANGED
@@ -1,28 +1,41 @@
|
|
1
|
+
require 'CSV'
|
2
|
+
|
1
3
|
module Sources
|
2
4
|
|
5
|
+
class NoCSVFileGiven < StandardError; end
|
6
|
+
|
3
7
|
class CSV < Base
|
4
8
|
|
5
|
-
attr_reader :file_name
|
9
|
+
attr_reader :file_name, :field_names
|
10
|
+
|
11
|
+
def initialize *field_names, options
|
12
|
+
@field_names = field_names
|
13
|
+
@file_name = Hash === options && options[:file] || raise_no_file_given(field_names)
|
14
|
+
end
|
6
15
|
|
7
|
-
|
8
|
-
|
9
|
-
|
16
|
+
#
|
17
|
+
#
|
18
|
+
def raise_no_file_given field_names
|
19
|
+
raise NoCSVFileGiven.new field_names.join(', ')
|
10
20
|
end
|
11
21
|
|
12
|
-
#
|
22
|
+
# Harvests the data to index.
|
13
23
|
#
|
14
|
-
def
|
15
|
-
|
24
|
+
def harvest _, field
|
25
|
+
index = field_names.index field.name
|
26
|
+
get_data do |ary|
|
27
|
+
indexed_id = ary.shift.to_i
|
28
|
+
text = ary[index]
|
29
|
+
next unless text
|
30
|
+
text.force_encoding 'utf-8' # TODO Still needed?
|
31
|
+
yield indexed_id, text
|
32
|
+
end
|
16
33
|
end
|
17
34
|
|
18
|
-
# Harvests the data to index, chunked.
|
19
35
|
#
|
20
|
-
# Subclasses should override harvest_statement to define how their data is found.
|
21
|
-
# Example:
|
22
|
-
# "SELECT indexed_id, value FROM bla_table st WHERE kind = 'bla'"
|
23
36
|
#
|
24
|
-
def
|
25
|
-
|
37
|
+
def get_data &block
|
38
|
+
::CSV.foreach file_name, &block
|
26
39
|
end
|
27
40
|
|
28
41
|
end
|
data/lib/picky/sources/db.rb
CHANGED
@@ -2,17 +2,57 @@ module Sources
|
|
2
2
|
|
3
3
|
class DB < Base
|
4
4
|
|
5
|
-
attr_reader :select_statement, :database
|
5
|
+
attr_reader :select_statement, :database, :connection_options
|
6
6
|
|
7
|
-
def initialize select_statement,
|
7
|
+
def initialize select_statement, with_options = { :file => 'app/db.yml' }
|
8
8
|
@select_statement = select_statement
|
9
|
-
@database =
|
9
|
+
@database = create_database_adapter
|
10
|
+
configure with_options
|
11
|
+
end
|
12
|
+
|
13
|
+
# Get a configured Database backend.
|
14
|
+
#
|
15
|
+
# Options:
|
16
|
+
# Either
|
17
|
+
# * file => 'some/filename.yml' # With an active record configuration.
|
18
|
+
# Or
|
19
|
+
# * The configuration as a hash.
|
20
|
+
#
|
21
|
+
def create_database_adapter
|
22
|
+
adapter_class = Class.new ActiveRecord::Base
|
23
|
+
adapter_class.abstract_class = true
|
24
|
+
adapter_class
|
25
|
+
end
|
26
|
+
|
27
|
+
# Configure the backend.
|
28
|
+
#
|
29
|
+
# Options:
|
30
|
+
# Either
|
31
|
+
# * file => 'some/filename.yml' # With an active record configuration.
|
32
|
+
# Or
|
33
|
+
# * The configuration as a hash.
|
34
|
+
#
|
35
|
+
def configure options
|
36
|
+
@connection_options = if filename = options[:file]
|
37
|
+
File.open(File.join(PICKY_ROOT, filename)) { |f| YAML::load(f) }
|
38
|
+
else
|
39
|
+
options
|
40
|
+
end
|
41
|
+
self
|
42
|
+
end
|
43
|
+
|
44
|
+
# Connect the backend.
|
45
|
+
#
|
46
|
+
def connect_backend
|
47
|
+
return if PICKY_ENVIRONMENT.to_s == 'test' # TODO Unclean.
|
48
|
+
raise "Database backend not configured" unless connection_options
|
49
|
+
database.establish_connection connection_options
|
10
50
|
end
|
11
51
|
|
12
52
|
# Take the snapshot.
|
13
53
|
#
|
14
54
|
def take_snapshot type
|
15
|
-
|
55
|
+
connect_backend
|
16
56
|
|
17
57
|
origin = snapshot_table_name type
|
18
58
|
|
@@ -29,6 +69,8 @@ module Sources
|
|
29
69
|
# Counts all the entries that are used for the index.
|
30
70
|
#
|
31
71
|
def count type
|
72
|
+
connect_backend
|
73
|
+
|
32
74
|
database.connection.select_value("SELECT COUNT(id) FROM #{snapshot_table_name(type)}").to_i
|
33
75
|
end
|
34
76
|
|
@@ -44,10 +86,28 @@ module Sources
|
|
44
86
|
# Example:
|
45
87
|
# "SELECT indexed_id, value FROM bla_table st WHERE kind = 'bla'"
|
46
88
|
#
|
47
|
-
def harvest type, field
|
48
|
-
|
89
|
+
def harvest type, field
|
90
|
+
connect_backend
|
49
91
|
|
50
|
-
|
92
|
+
(0..count(type)).step(chunksize) do |offset|
|
93
|
+
get_data(type, field, offset).each do |indexed_id, text|
|
94
|
+
next unless text
|
95
|
+
text.force_encoding 'utf-8' # TODO Still needed?
|
96
|
+
yield indexed_id, text
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Override in subclasses.
|
102
|
+
#
|
103
|
+
def chunksize
|
104
|
+
25_000
|
105
|
+
end
|
106
|
+
|
107
|
+
# Gets database from the backend.
|
108
|
+
#
|
109
|
+
def get_data type, field, offset
|
110
|
+
database.connection.execute harvest_statement_with_offset(type, field, offset)
|
51
111
|
end
|
52
112
|
|
53
113
|
# Base harvest statement for dbs.
|
@@ -60,7 +120,7 @@ module Sources
|
|
60
120
|
#
|
61
121
|
# TODO Use the adapter for this.
|
62
122
|
#
|
63
|
-
def harvest_statement_with_offset type, field, offset
|
123
|
+
def harvest_statement_with_offset type, field, offset
|
64
124
|
statement = harvest_statement type, field
|
65
125
|
|
66
126
|
if statement.include? 'WHERE'
|
data/lib/picky.rb
CHANGED
@@ -15,4 +15,4 @@ require File.expand_path(File.join(File.dirname(__FILE__), 'picky', 'loader'))
|
|
15
15
|
# Load the framework
|
16
16
|
#
|
17
17
|
Loader.load_framework
|
18
|
-
puts "Loaded picky with environment '#{
|
18
|
+
puts "Loaded picky with environment '#{PICKY_ENVIRONMENT}' in #{PICKY_ROOT} on Ruby #{RUBY_VERSION}."
|
data/lib/tasks/server.rake
CHANGED
@@ -3,11 +3,11 @@
|
|
3
3
|
namespace :server do
|
4
4
|
|
5
5
|
def chdir_to_root
|
6
|
-
Dir.chdir
|
6
|
+
Dir.chdir PICKY_ROOT
|
7
7
|
end
|
8
8
|
|
9
9
|
def current_pid
|
10
|
-
pid = `cat #{File.join(
|
10
|
+
pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
|
11
11
|
pid.blank? ? nil : pid.chomp
|
12
12
|
end
|
13
13
|
|
@@ -15,8 +15,8 @@ namespace :server do
|
|
15
15
|
task :start => :framework do
|
16
16
|
chdir_to_root
|
17
17
|
# Rake::Task[:"solr:start"].invoke # TODO Move to better place.
|
18
|
-
daemonize =
|
19
|
-
command = "export
|
18
|
+
daemonize = PICKY_ENVIRONMENT == 'production' ? '-D' : ''
|
19
|
+
command = "export PICKY_ENV=#{PICKY_ENVIRONMENT}; unicorn -c unicorn.ru #{daemonize}".strip
|
20
20
|
puts "Running \`#{command}\`."
|
21
21
|
exec command
|
22
22
|
end
|
data/lib/tasks/solr.rake
CHANGED
@@ -18,7 +18,7 @@ namespace :solr do
|
|
18
18
|
|
19
19
|
|
20
20
|
def action name
|
21
|
-
`sunspot-solr #{name} --solr-home=solr --data-directory=index/#{
|
21
|
+
`sunspot-solr #{name} --solr-home=solr --data-directory=index/#{PICKY_ENVIRONMENT}/solr --pid-dir=solr/pids --log-file=log/solr.log`
|
22
22
|
end
|
23
23
|
task :start => :application do
|
24
24
|
Rake::Task['solr:schema:generate'].invoke
|
data/lib/tasks/statistics.rake
CHANGED
@@ -2,12 +2,12 @@ namespace :statistics do
|
|
2
2
|
|
3
3
|
desc "start the server"
|
4
4
|
task :start => :application do
|
5
|
-
Statistics.start unless
|
5
|
+
Statistics.start unless PICKY_ENVIRONMENT == 'test'
|
6
6
|
end
|
7
7
|
|
8
8
|
desc "stop the server"
|
9
9
|
task :stop => :application do
|
10
|
-
Statistics.stop unless
|
10
|
+
Statistics.stop unless PICKY_ENVIRONMENT == 'test'
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
data/lib/tasks/try.rake
CHANGED
@@ -15,7 +15,9 @@ namespace :try do
|
|
15
15
|
task :query, [:text] => :application do |_, options|
|
16
16
|
text = options.text
|
17
17
|
|
18
|
-
|
18
|
+
# TODO tokenize destroys the original text...
|
19
|
+
#
|
20
|
+
puts "\"#{text}\" is query tokenized as #{Tokenizers::Query.new.tokenize(text.dup).to_a.map(&:to_s)}"
|
19
21
|
end
|
20
22
|
|
21
23
|
desc "Try the given text with both the index and the query (type:field optional)."
|
@@ -11,7 +11,7 @@ class PickySearch < Application # The App Constant needs to be identical in appl
|
|
11
11
|
# Note: Much more is possible, but let's start out easy.
|
12
12
|
#
|
13
13
|
# Ask me if you have questions!
|
14
|
-
#
|
14
|
+
#
|
15
15
|
|
16
16
|
indexes do
|
17
17
|
illegal_characters(/[^äöüa-zA-Z0-9\s\/\-\"\&\.]/)
|
@@ -19,10 +19,7 @@ class PickySearch < Application # The App Constant needs to be identical in appl
|
|
19
19
|
split_text_on(/[\s\/\-\"\&\.]/)
|
20
20
|
|
21
21
|
type :books,
|
22
|
-
Sources::DB.new(
|
23
|
-
'SELECT id, title, author, isbn13 as isbn FROM books',
|
24
|
-
DB.configure(:file => 'app/db.yml')
|
25
|
-
),
|
22
|
+
Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
|
26
23
|
field(:title, :qualifiers => [:t, :title, :titre], :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
|
27
24
|
field(:author, :qualifiers => [:s, :author, :auteur]),
|
28
25
|
field(:isbn, :qualifiers => [:i, :isbn], :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# Standard logging.
|
2
2
|
#
|
3
3
|
require 'logger'
|
4
|
-
PickyLog = Loggers::Search.new ::Logger.new(File.expand_path(File.join(
|
4
|
+
PickyLog = Loggers::Search.new ::Logger.new(File.expand_path(File.join(PICKY_ROOT, 'log/search.log')))
|
5
5
|
|
6
6
|
# Example with using the syslog logger.
|
7
7
|
# Falling back to the standard log if it isn't available.
|
@@ -16,5 +16,5 @@ PickyLog = Loggers::Search.new ::Logger.new(File.expand_path(File.join(SEARCH_RO
|
|
16
16
|
# rescue StandardError
|
17
17
|
# puts "Could not connect to the syslog, using the normal log."
|
18
18
|
# require 'logger'
|
19
|
-
# PickyLog = Loggers::Search.new ::Logger.new(File.join(
|
19
|
+
# PickyLog = Loggers::Search.new ::Logger.new(File.join(PICKY_ROOT, 'log/search.log'))
|
20
20
|
# end
|
@@ -14,12 +14,12 @@ end
|
|
14
14
|
libs = " -r irb/completion"
|
15
15
|
libs << %( -r "picky" )
|
16
16
|
|
17
|
-
ENV['
|
17
|
+
ENV['PICKY_ENV'] = case ARGV.first
|
18
18
|
when "p"; "production"
|
19
19
|
when "d"; "development"
|
20
20
|
when "t"; "test"
|
21
21
|
else
|
22
|
-
ARGV.first || ENV['
|
22
|
+
ARGV.first || ENV['PICKY_ENV'] || 'development'
|
23
23
|
end
|
24
24
|
|
25
25
|
puts "Use \x1b[1;30mLoader.load_application\x1b[m to load app."
|
@@ -3,18 +3,7 @@
|
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
5
|
describe Indexers::Field do
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
@field = stub :field, :indexed_name => :some_indexed_field_name, :name => :some_field_name, :search_index_file_name => :some_index_table
|
10
|
-
@strategy = Indexers::Field.new @type, @field
|
11
|
-
@strategy.stub! :indexing_message
|
12
|
-
end
|
13
|
-
|
14
|
-
describe "chunksize" do
|
15
|
-
it "should be a specific size" do
|
16
|
-
@strategy.chunksize.should == 25_000
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
6
|
+
|
7
|
+
|
8
|
+
|
20
9
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Sources::CSV do
|
4
|
+
|
5
|
+
context "without file" do
|
6
|
+
it "should fail correctly" do
|
7
|
+
lambda { @source = Sources::CSV.new(:a, :b, :c) }.should raise_error(Sources::NoCSVFileGiven)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
context "with file" do
|
11
|
+
before(:each) do
|
12
|
+
@source = Sources::CSV.new :a, :b, :c, :file => :some_file
|
13
|
+
::CSV.should_receive(:foreach).any_number_of_times.and_yield ['7', 'a data', 'b data', 'c data']
|
14
|
+
end
|
15
|
+
describe "harvest" do
|
16
|
+
it "should yield the right data" do
|
17
|
+
field = stub :b, :name => :b
|
18
|
+
@source.harvest :anything, field do |id, token|
|
19
|
+
[id, token].should == [7, 'b data']
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
describe "get_data" do
|
24
|
+
it "should yield each line" do
|
25
|
+
@source.get_data do |data|
|
26
|
+
data.should == ['7', 'a data', 'b data', 'c data']
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
data/spec/lib/sources/db_spec.rb
CHANGED
@@ -4,12 +4,22 @@ describe Sources::DB do
|
|
4
4
|
|
5
5
|
before(:each) do
|
6
6
|
@type = stub :type, :name => 'some_type_name'
|
7
|
-
@connection = stub :connection
|
8
7
|
|
8
|
+
@connection = stub :connection
|
9
9
|
@adapter = stub :adapter, :connection => @connection
|
10
|
+
|
10
11
|
@select_statement = stub :statement
|
11
12
|
|
12
|
-
@source = Sources::DB.new @select_statement,
|
13
|
+
@source = Sources::DB.new @select_statement, :option => :some_options
|
14
|
+
|
15
|
+
@source.stub! :database => @adapter
|
16
|
+
@source.stub! :connect_backend
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "chunksize" do
|
20
|
+
it "should be a specific size" do
|
21
|
+
@source.chunksize.should == 25_000
|
22
|
+
end
|
13
23
|
end
|
14
24
|
|
15
25
|
describe "count" do
|
@@ -26,47 +36,49 @@ describe Sources::DB do
|
|
26
36
|
@source.count @type
|
27
37
|
end
|
28
38
|
end
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
end
|
39
|
+
|
40
|
+
# TODO Redo.
|
41
|
+
#
|
42
|
+
# describe "harvest" do
|
43
|
+
# before(:each) do
|
44
|
+
# @source.stub! :harvest_statement_with_offset
|
45
|
+
# end
|
46
|
+
# context 'expectations' do
|
47
|
+
# before(:each) do
|
48
|
+
# @connection.stub! :execute => []
|
49
|
+
# @connection.stub! :select_value
|
50
|
+
# end
|
51
|
+
# after(:each) do
|
52
|
+
# @source.harvest :type_name, :some_field
|
53
|
+
# end
|
54
|
+
# context "with WHERE" do
|
55
|
+
# before(:each) do
|
56
|
+
# @source.stub! :select_statement => 'bla WHERE blu'
|
57
|
+
# end
|
58
|
+
# it "should connect" do
|
59
|
+
# @source.should_receive(:connect_backend).once.with
|
60
|
+
# end
|
61
|
+
# it "should call the harvest statement with an offset" do
|
62
|
+
# @source.should_receive(:harvest_statement_with_offset).once.with :some_type, :some_field, :some_offset
|
63
|
+
# end
|
64
|
+
# end
|
65
|
+
# context "without WHERE" do
|
66
|
+
# it "should connect" do
|
67
|
+
# @adapter.should_receive(:connect).once.with
|
68
|
+
# end
|
69
|
+
# it "should call the harvest statement with an offset" do
|
70
|
+
# @source.should_receive(:harvest_statement_with_offset).once.with :some_type, :some_field, :some_offset
|
71
|
+
# end
|
72
|
+
# end
|
73
|
+
# end
|
74
|
+
# context 'returns' do
|
75
|
+
# it "should return whatever the execute statement returns" do
|
76
|
+
# @connection.stub! :execute => :some_result
|
77
|
+
#
|
78
|
+
# @source.harvest(:some_type, :some_field).should == :some_result
|
79
|
+
# end
|
80
|
+
# end
|
81
|
+
# end
|
70
82
|
|
71
83
|
describe "harvest_statement_with_offset" do
|
72
84
|
before(:each) do
|
@@ -76,15 +88,15 @@ describe Sources::DB do
|
|
76
88
|
end
|
77
89
|
it "should get a harvest statement and the chunksize to put the statement together" do
|
78
90
|
@source.should_receive(:harvest_statement).once.and_return 'some_example_statement'
|
79
|
-
@source.harvest_statement_with_offset(@type, @field, :some_offset
|
91
|
+
@source.harvest_statement_with_offset(@type, @field, :some_offset)
|
80
92
|
end
|
81
93
|
it "should add an AND if it already contains a WHERE statement" do
|
82
94
|
@source.should_receive(:harvest_statement).and_return 'WHERE'
|
83
|
-
@source.harvest_statement_with_offset(@type, @field, :some_offset
|
95
|
+
@source.harvest_statement_with_offset(@type, @field, :some_offset).should == "WHERE AND st.id > some_offset LIMIT 25000"
|
84
96
|
end
|
85
97
|
it "should add a WHERE if it doesn't already contain one" do
|
86
98
|
@source.should_receive(:harvest_statement).and_return 'some_statement'
|
87
|
-
@source.harvest_statement_with_offset(@type, @field, :some_offset
|
99
|
+
@source.harvest_statement_with_offset(@type, @field, :some_offset).should == "some_statement WHERE st.id > some_offset LIMIT 25000"
|
88
100
|
end
|
89
101
|
end
|
90
102
|
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 6
|
9
|
+
version: 0.0.6
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Florian Hanke
|
@@ -14,148 +14,13 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-10-
|
17
|
+
date: 2010-10-04 00:00:00 +02:00
|
18
18
|
default_executable: picky
|
19
19
|
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
21
|
-
name: bundler
|
22
|
-
prerelease: false
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
-
none: false
|
25
|
-
requirements:
|
26
|
-
- - ">="
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 0
|
30
|
-
- 9
|
31
|
-
- 26
|
32
|
-
version: 0.9.26
|
33
|
-
type: :runtime
|
34
|
-
version_requirements: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
name: activesupport
|
37
|
-
prerelease: false
|
38
|
-
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
-
none: false
|
40
|
-
requirements:
|
41
|
-
- - "="
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
segments:
|
44
|
-
- 2
|
45
|
-
- 3
|
46
|
-
- 8
|
47
|
-
version: 2.3.8
|
48
|
-
type: :runtime
|
49
|
-
version_requirements: *id002
|
50
|
-
- !ruby/object:Gem::Dependency
|
51
|
-
name: activerecord
|
52
|
-
prerelease: false
|
53
|
-
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
-
none: false
|
55
|
-
requirements:
|
56
|
-
- - "="
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
segments:
|
59
|
-
- 2
|
60
|
-
- 3
|
61
|
-
- 8
|
62
|
-
version: 2.3.8
|
63
|
-
type: :runtime
|
64
|
-
version_requirements: *id003
|
65
|
-
- !ruby/object:Gem::Dependency
|
66
|
-
name: rack
|
67
|
-
prerelease: false
|
68
|
-
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
-
none: false
|
70
|
-
requirements:
|
71
|
-
- - "="
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
segments:
|
74
|
-
- 1
|
75
|
-
- 2
|
76
|
-
- 1
|
77
|
-
version: 1.2.1
|
78
|
-
type: :runtime
|
79
|
-
version_requirements: *id004
|
80
|
-
- !ruby/object:Gem::Dependency
|
81
|
-
name: rack-mount
|
82
|
-
prerelease: false
|
83
|
-
requirement: &id005 !ruby/object:Gem::Requirement
|
84
|
-
none: false
|
85
|
-
requirements:
|
86
|
-
- - "="
|
87
|
-
- !ruby/object:Gem::Version
|
88
|
-
segments:
|
89
|
-
- 0
|
90
|
-
- 6
|
91
|
-
- 9
|
92
|
-
version: 0.6.9
|
93
|
-
type: :runtime
|
94
|
-
version_requirements: *id005
|
95
|
-
- !ruby/object:Gem::Dependency
|
96
|
-
name: rsolr
|
97
|
-
prerelease: false
|
98
|
-
requirement: &id006 !ruby/object:Gem::Requirement
|
99
|
-
none: false
|
100
|
-
requirements:
|
101
|
-
- - ">="
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
segments:
|
104
|
-
- 0
|
105
|
-
- 12
|
106
|
-
- 1
|
107
|
-
version: 0.12.1
|
108
|
-
type: :runtime
|
109
|
-
version_requirements: *id006
|
110
|
-
- !ruby/object:Gem::Dependency
|
111
|
-
name: sunspot
|
112
|
-
prerelease: false
|
113
|
-
requirement: &id007 !ruby/object:Gem::Requirement
|
114
|
-
none: false
|
115
|
-
requirements:
|
116
|
-
- - "="
|
117
|
-
- !ruby/object:Gem::Version
|
118
|
-
segments:
|
119
|
-
- 1
|
120
|
-
- 1
|
121
|
-
- 0
|
122
|
-
version: 1.1.0
|
123
|
-
type: :runtime
|
124
|
-
version_requirements: *id007
|
125
|
-
- !ruby/object:Gem::Dependency
|
126
|
-
name: text
|
127
|
-
prerelease: false
|
128
|
-
requirement: &id008 !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
|
-
requirements:
|
131
|
-
- - "="
|
132
|
-
- !ruby/object:Gem::Version
|
133
|
-
segments:
|
134
|
-
- 0
|
135
|
-
- 2
|
136
|
-
- 0
|
137
|
-
version: 0.2.0
|
138
|
-
type: :runtime
|
139
|
-
version_requirements: *id008
|
140
|
-
- !ruby/object:Gem::Dependency
|
141
|
-
name: rack_fast_escape
|
142
|
-
prerelease: false
|
143
|
-
requirement: &id009 !ruby/object:Gem::Requirement
|
144
|
-
none: false
|
145
|
-
requirements:
|
146
|
-
- - "="
|
147
|
-
- !ruby/object:Gem::Version
|
148
|
-
segments:
|
149
|
-
- 2009
|
150
|
-
- 6
|
151
|
-
- 24
|
152
|
-
version: 2009.06.24
|
153
|
-
type: :runtime
|
154
|
-
version_requirements: *id009
|
155
20
|
- !ruby/object:Gem::Dependency
|
156
21
|
name: rspec
|
157
22
|
prerelease: false
|
158
|
-
requirement: &
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
159
24
|
none: false
|
160
25
|
requirements:
|
161
26
|
- - ">="
|
@@ -164,7 +29,7 @@ dependencies:
|
|
164
29
|
- 0
|
165
30
|
version: "0"
|
166
31
|
type: :development
|
167
|
-
version_requirements: *
|
32
|
+
version_requirements: *id001
|
168
33
|
description: Fast Combinatorial Ruby Search Engine
|
169
34
|
email: florian.hanke+picky@gmail.com
|
170
35
|
executables:
|
@@ -199,7 +64,6 @@ files:
|
|
199
64
|
- lib/picky/configuration/queries.rb
|
200
65
|
- lib/picky/configuration/type.rb
|
201
66
|
- lib/picky/cores.rb
|
202
|
-
- lib/picky/db/configuration.rb
|
203
67
|
- lib/picky/ext/ruby19/extconf.rb
|
204
68
|
- lib/picky/extensions/array.rb
|
205
69
|
- lib/picky/extensions/hash.rb
|
@@ -323,6 +187,7 @@ files:
|
|
323
187
|
- spec/lib/results/base_spec.rb
|
324
188
|
- spec/lib/routing_spec.rb
|
325
189
|
- spec/lib/solr/schema_generator_spec.rb
|
190
|
+
- spec/lib/sources/csv_spec.rb
|
326
191
|
- spec/lib/sources/db_spec.rb
|
327
192
|
- spec/lib/tokenizers/base_spec.rb
|
328
193
|
- spec/lib/tokenizers/index_spec.rb
|
@@ -407,6 +272,7 @@ test_files:
|
|
407
272
|
- spec/lib/results/base_spec.rb
|
408
273
|
- spec/lib/routing_spec.rb
|
409
274
|
- spec/lib/solr/schema_generator_spec.rb
|
275
|
+
- spec/lib/sources/csv_spec.rb
|
410
276
|
- spec/lib/sources/db_spec.rb
|
411
277
|
- spec/lib/tokenizers/base_spec.rb
|
412
278
|
- spec/lib/tokenizers/index_spec.rb
|
@@ -1,23 +0,0 @@
|
|
1
|
-
class DB < ActiveRecord::Base
|
2
|
-
|
3
|
-
self.abstract_class = true
|
4
|
-
|
5
|
-
#
|
6
|
-
#
|
7
|
-
def self.configure options = {}
|
8
|
-
@connection_options = if filename = options[:file]
|
9
|
-
File.open(File.join(SEARCH_ROOT, filename)) { |f| YAML::load(f) }
|
10
|
-
else
|
11
|
-
options
|
12
|
-
end
|
13
|
-
self
|
14
|
-
end
|
15
|
-
|
16
|
-
#
|
17
|
-
#
|
18
|
-
def self.connect
|
19
|
-
return if SEARCH_ENVIRONMENT.to_s == 'test'
|
20
|
-
establish_connection @connection_options
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|