picky 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/bundling.rb +1 -1
- data/lib/constants.rb +3 -3
- data/lib/deployment.rb +2 -2
- data/lib/picky/configuration/field.rb +2 -2
- data/lib/picky/configuration/type.rb +5 -0
- data/lib/picky/index/bundle.rb +2 -2
- data/lib/picky/indexers/base.rb +5 -16
- data/lib/picky/indexers/field.rb +2 -6
- data/lib/picky/indexes.rb +9 -1
- data/lib/picky/loader.rb +4 -7
- data/lib/picky/query/token.rb +4 -4
- data/lib/picky/solr/schema_generator.rb +2 -2
- data/lib/picky/sources/base.rb +3 -3
- data/lib/picky/sources/csv.rb +26 -13
- data/lib/picky/sources/db.rb +68 -8
- data/lib/picky.rb +1 -1
- data/lib/tasks/server.rake +4 -4
- data/lib/tasks/solr.rake +1 -1
- data/lib/tasks/statistics.rake +2 -2
- data/lib/tasks/try.rake +3 -1
- data/prototype_project/app/application.rb +2 -5
- data/prototype_project/app/logging.rb +2 -2
- data/prototype_project/script/console +2 -2
- data/spec/lib/indexers/field_spec.rb +3 -14
- data/spec/lib/sources/csv_spec.rb +32 -0
- data/spec/lib/sources/db_spec.rb +58 -46
- metadata +7 -141
- data/lib/picky/db/configuration.rb +0 -23
data/lib/bundling.rb
CHANGED
data/lib/constants.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
# Use rack's environment for the search engine.
|
5
5
|
#
|
6
|
-
ENV['
|
6
|
+
ENV['PICKY_ENV'] ||= ENV['RACK_ENV']
|
7
7
|
|
8
|
-
|
9
|
-
|
8
|
+
PICKY_ENVIRONMENT = ENV['PICKY_ENV'] || 'development' unless defined? PICKY_ENVIRONMENT
|
9
|
+
PICKY_ROOT = Dir.pwd unless defined? PICKY_ROOT
|
data/lib/deployment.rb
CHANGED
@@ -13,12 +13,12 @@ module Picky
|
|
13
13
|
# Executes a rake task on the server.
|
14
14
|
#
|
15
15
|
# Options:
|
16
|
-
# * env: The
|
16
|
+
# * env: The PICKY_ENV. Will not set if set explicitly to false. Default: production.
|
17
17
|
# * All other options get passed on to the Capistrano run task.
|
18
18
|
#
|
19
19
|
def execute_rake_task name, options = {}, &block
|
20
20
|
env = options.delete :env
|
21
|
-
env = env == false ? '' : "
|
21
|
+
env = env == false ? '' : "PICKY_ENV=#{env || 'production'}"
|
22
22
|
run "cd #{current_path}; rake #{name} #{env}", options, &block
|
23
23
|
end
|
24
24
|
|
@@ -33,12 +33,12 @@ module Configuration
|
|
33
33
|
# TODO Move to type, and use in bundle from there.
|
34
34
|
#
|
35
35
|
def search_index_root
|
36
|
-
File.join
|
36
|
+
File.join PICKY_ROOT, 'index'
|
37
37
|
end
|
38
38
|
# TODO Move to config. Duplicate Code in field.rb.
|
39
39
|
#
|
40
40
|
def cache_directory
|
41
|
-
File.join search_index_root,
|
41
|
+
File.join search_index_root, PICKY_ENVIRONMENT, type.name.to_s
|
42
42
|
end
|
43
43
|
def search_index_file_name
|
44
44
|
File.join cache_directory, "#{type.name}_#{name}_index.txt"
|
data/lib/picky/index/bundle.rb
CHANGED
@@ -56,7 +56,7 @@ module Index
|
|
56
56
|
# Point to category.
|
57
57
|
#
|
58
58
|
def search_index_root
|
59
|
-
File.join
|
59
|
+
File.join PICKY_ROOT, 'index'
|
60
60
|
# category.search_index_root
|
61
61
|
end
|
62
62
|
|
@@ -145,7 +145,7 @@ module Index
|
|
145
145
|
# TODO Move to config. Duplicate Code in field.rb.
|
146
146
|
#
|
147
147
|
def cache_directory
|
148
|
-
File.join search_index_root,
|
148
|
+
File.join search_index_root, PICKY_ENVIRONMENT, type.name.to_s
|
149
149
|
end
|
150
150
|
|
151
151
|
# Generates a cache path.
|
data/lib/picky/indexers/base.rb
CHANGED
@@ -44,8 +44,10 @@ module Indexers
|
|
44
44
|
comma = ?,
|
45
45
|
newline = ?\n
|
46
46
|
|
47
|
+
indexing_message
|
48
|
+
|
47
49
|
File.open(search_index_file_name, 'w:binary') do |file|
|
48
|
-
|
50
|
+
source.harvest(@type, @field) do |indexed_id, text|
|
49
51
|
tokenizer.tokenize(text).each do |token_text|
|
50
52
|
file.write indexed_id
|
51
53
|
file.write comma
|
@@ -55,22 +57,9 @@ module Indexers
|
|
55
57
|
end
|
56
58
|
end
|
57
59
|
end
|
58
|
-
# Split original data into chunks.
|
59
|
-
#
|
60
|
-
def chunked
|
61
|
-
(0..source.count(@type)).step(chunksize) do |offset|
|
62
|
-
indexing_message offset
|
63
|
-
data = source.harvest @type, @field, offset, chunksize
|
64
|
-
data.each do |indexed_id, text|
|
65
|
-
next unless text
|
66
|
-
text.force_encoding 'utf-8' # TODO Still needed?
|
67
|
-
yield indexed_id, text
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
60
|
|
72
|
-
def indexing_message
|
73
|
-
puts "#{Time.now}: Indexing #{@type.name}:#{@field.name}:#{@field.indexed_name}
|
61
|
+
def indexing_message
|
62
|
+
puts "#{Time.now}: Indexing #{@type.name}:#{@field.name}:#{@field.indexed_name}."
|
74
63
|
end
|
75
64
|
|
76
65
|
end
|
data/lib/picky/indexers/field.rb
CHANGED
data/lib/picky/indexes.rb
CHANGED
@@ -20,7 +20,7 @@ module Indexes
|
|
20
20
|
Cores.forked self.fields, :randomly => true do |field|
|
21
21
|
# Reestablish DB connection.
|
22
22
|
#
|
23
|
-
|
23
|
+
connect_backends
|
24
24
|
field.index
|
25
25
|
field.cache
|
26
26
|
end
|
@@ -29,6 +29,14 @@ module Indexes
|
|
29
29
|
configuration.index_solr
|
30
30
|
end
|
31
31
|
|
32
|
+
# TODO Push into configuration.
|
33
|
+
#
|
34
|
+
def self.connect_backends
|
35
|
+
configuration.types.each do |type|
|
36
|
+
type.connect_backend
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
32
40
|
# Returns an array of fields.
|
33
41
|
#
|
34
42
|
# TODO Rewrite.
|
data/lib/picky/loader.rb
CHANGED
@@ -6,7 +6,7 @@ module Loader
|
|
6
6
|
# First itself, then the app.
|
7
7
|
#
|
8
8
|
def self.reload
|
9
|
-
Dir.chdir(
|
9
|
+
Dir.chdir(PICKY_ROOT)
|
10
10
|
exclaim 'Reloading loader.'
|
11
11
|
load_self
|
12
12
|
exclaim 'Reloading framework.'
|
@@ -30,13 +30,13 @@ module Loader
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def self.load_user filename
|
33
|
-
load File.join(
|
33
|
+
load File.join(PICKY_ROOT, "#{filename}.rb")
|
34
34
|
end
|
35
35
|
def self.load_user_lib filename
|
36
36
|
load_user File.join('lib', filename)
|
37
37
|
end
|
38
38
|
def self.load_all_user_in dirname
|
39
|
-
Dir[File.join(
|
39
|
+
Dir[File.join(PICKY_ROOT, dirname, '**', '*.rb')].each do |filename|
|
40
40
|
load filename
|
41
41
|
end
|
42
42
|
end
|
@@ -203,10 +203,7 @@ module Loader
|
|
203
203
|
#
|
204
204
|
load_relative 'sources/base'
|
205
205
|
load_relative 'sources/db'
|
206
|
-
|
207
|
-
# DB
|
208
|
-
#
|
209
|
-
load_relative 'db/configuration'
|
206
|
+
load_relative 'sources/csv'
|
210
207
|
|
211
208
|
# Indexes.
|
212
209
|
#
|
data/lib/picky/query/token.rb
CHANGED
@@ -178,13 +178,13 @@ module Query
|
|
178
178
|
def to_solr
|
179
179
|
blank? ? '' : (to_s + @@solr_fuzzy_mapping[@text.size].to_s)
|
180
180
|
end
|
181
|
-
|
181
|
+
|
182
182
|
#
|
183
183
|
#
|
184
184
|
def to_result
|
185
185
|
[@original, @text]
|
186
186
|
end
|
187
|
-
|
187
|
+
|
188
188
|
# Displays the qualifier text and the text, joined.
|
189
189
|
#
|
190
190
|
# e.g. name:meier
|
@@ -192,9 +192,9 @@ module Query
|
|
192
192
|
def to_s
|
193
193
|
[@qualifier, @text].compact.join ':'
|
194
194
|
end
|
195
|
-
|
195
|
+
|
196
196
|
private
|
197
|
-
|
197
|
+
|
198
198
|
# Splits text into a qualifier and text.
|
199
199
|
#
|
200
200
|
# Returns [qualifier, text].
|
@@ -52,7 +52,7 @@ module Solr
|
|
52
52
|
#
|
53
53
|
#
|
54
54
|
def read_template
|
55
|
-
template_path = File.join
|
55
|
+
template_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml.erb'
|
56
56
|
schema = ''
|
57
57
|
File.open(template_path, 'r') do |f|
|
58
58
|
schema = f.read
|
@@ -63,7 +63,7 @@ module Solr
|
|
63
63
|
#
|
64
64
|
#
|
65
65
|
def write result
|
66
|
-
schema_path = File.join
|
66
|
+
schema_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml'
|
67
67
|
File.open(schema_path, 'w') do |f|
|
68
68
|
f << result
|
69
69
|
end
|
data/lib/picky/sources/base.rb
CHANGED
data/lib/picky/sources/csv.rb
CHANGED
@@ -1,28 +1,41 @@
|
|
1
|
+
require 'CSV'
|
2
|
+
|
1
3
|
module Sources
|
2
4
|
|
5
|
+
class NoCSVFileGiven < StandardError; end
|
6
|
+
|
3
7
|
class CSV < Base
|
4
8
|
|
5
|
-
attr_reader :file_name
|
9
|
+
attr_reader :file_name, :field_names
|
10
|
+
|
11
|
+
def initialize *field_names, options
|
12
|
+
@field_names = field_names
|
13
|
+
@file_name = Hash === options && options[:file] || raise_no_file_given(field_names)
|
14
|
+
end
|
6
15
|
|
7
|
-
|
8
|
-
|
9
|
-
|
16
|
+
#
|
17
|
+
#
|
18
|
+
def raise_no_file_given field_names
|
19
|
+
raise NoCSVFileGiven.new field_names.join(', ')
|
10
20
|
end
|
11
21
|
|
12
|
-
#
|
22
|
+
# Harvests the data to index.
|
13
23
|
#
|
14
|
-
def
|
15
|
-
|
24
|
+
def harvest _, field
|
25
|
+
index = field_names.index field.name
|
26
|
+
get_data do |ary|
|
27
|
+
indexed_id = ary.shift.to_i
|
28
|
+
text = ary[index]
|
29
|
+
next unless text
|
30
|
+
text.force_encoding 'utf-8' # TODO Still needed?
|
31
|
+
yield indexed_id, text
|
32
|
+
end
|
16
33
|
end
|
17
34
|
|
18
|
-
# Harvests the data to index, chunked.
|
19
35
|
#
|
20
|
-
# Subclasses should override harvest_statement to define how their data is found.
|
21
|
-
# Example:
|
22
|
-
# "SELECT indexed_id, value FROM bla_table st WHERE kind = 'bla'"
|
23
36
|
#
|
24
|
-
def
|
25
|
-
|
37
|
+
def get_data &block
|
38
|
+
::CSV.foreach file_name, &block
|
26
39
|
end
|
27
40
|
|
28
41
|
end
|
data/lib/picky/sources/db.rb
CHANGED
@@ -2,17 +2,57 @@ module Sources
|
|
2
2
|
|
3
3
|
class DB < Base
|
4
4
|
|
5
|
-
attr_reader :select_statement, :database
|
5
|
+
attr_reader :select_statement, :database, :connection_options
|
6
6
|
|
7
|
-
def initialize select_statement,
|
7
|
+
def initialize select_statement, with_options = { :file => 'app/db.yml' }
|
8
8
|
@select_statement = select_statement
|
9
|
-
@database =
|
9
|
+
@database = create_database_adapter
|
10
|
+
configure with_options
|
11
|
+
end
|
12
|
+
|
13
|
+
# Get a configured Database backend.
|
14
|
+
#
|
15
|
+
# Options:
|
16
|
+
# Either
|
17
|
+
# * file => 'some/filename.yml' # With an active record configuration.
|
18
|
+
# Or
|
19
|
+
# * The configuration as a hash.
|
20
|
+
#
|
21
|
+
def create_database_adapter
|
22
|
+
adapter_class = Class.new ActiveRecord::Base
|
23
|
+
adapter_class.abstract_class = true
|
24
|
+
adapter_class
|
25
|
+
end
|
26
|
+
|
27
|
+
# Configure the backend.
|
28
|
+
#
|
29
|
+
# Options:
|
30
|
+
# Either
|
31
|
+
# * file => 'some/filename.yml' # With an active record configuration.
|
32
|
+
# Or
|
33
|
+
# * The configuration as a hash.
|
34
|
+
#
|
35
|
+
def configure options
|
36
|
+
@connection_options = if filename = options[:file]
|
37
|
+
File.open(File.join(PICKY_ROOT, filename)) { |f| YAML::load(f) }
|
38
|
+
else
|
39
|
+
options
|
40
|
+
end
|
41
|
+
self
|
42
|
+
end
|
43
|
+
|
44
|
+
# Connect the backend.
|
45
|
+
#
|
46
|
+
def connect_backend
|
47
|
+
return if PICKY_ENVIRONMENT.to_s == 'test' # TODO Unclean.
|
48
|
+
raise "Database backend not configured" unless connection_options
|
49
|
+
database.establish_connection connection_options
|
10
50
|
end
|
11
51
|
|
12
52
|
# Take the snapshot.
|
13
53
|
#
|
14
54
|
def take_snapshot type
|
15
|
-
|
55
|
+
connect_backend
|
16
56
|
|
17
57
|
origin = snapshot_table_name type
|
18
58
|
|
@@ -29,6 +69,8 @@ module Sources
|
|
29
69
|
# Counts all the entries that are used for the index.
|
30
70
|
#
|
31
71
|
def count type
|
72
|
+
connect_backend
|
73
|
+
|
32
74
|
database.connection.select_value("SELECT COUNT(id) FROM #{snapshot_table_name(type)}").to_i
|
33
75
|
end
|
34
76
|
|
@@ -44,10 +86,28 @@ module Sources
|
|
44
86
|
# Example:
|
45
87
|
# "SELECT indexed_id, value FROM bla_table st WHERE kind = 'bla'"
|
46
88
|
#
|
47
|
-
def harvest type, field
|
48
|
-
|
89
|
+
def harvest type, field
|
90
|
+
connect_backend
|
49
91
|
|
50
|
-
|
92
|
+
(0..count(type)).step(chunksize) do |offset|
|
93
|
+
get_data(type, field, offset).each do |indexed_id, text|
|
94
|
+
next unless text
|
95
|
+
text.force_encoding 'utf-8' # TODO Still needed?
|
96
|
+
yield indexed_id, text
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Override in subclasses.
|
102
|
+
#
|
103
|
+
def chunksize
|
104
|
+
25_000
|
105
|
+
end
|
106
|
+
|
107
|
+
# Gets database from the backend.
|
108
|
+
#
|
109
|
+
def get_data type, field, offset
|
110
|
+
database.connection.execute harvest_statement_with_offset(type, field, offset)
|
51
111
|
end
|
52
112
|
|
53
113
|
# Base harvest statement for dbs.
|
@@ -60,7 +120,7 @@ module Sources
|
|
60
120
|
#
|
61
121
|
# TODO Use the adapter for this.
|
62
122
|
#
|
63
|
-
def harvest_statement_with_offset type, field, offset
|
123
|
+
def harvest_statement_with_offset type, field, offset
|
64
124
|
statement = harvest_statement type, field
|
65
125
|
|
66
126
|
if statement.include? 'WHERE'
|
data/lib/picky.rb
CHANGED
@@ -15,4 +15,4 @@ require File.expand_path(File.join(File.dirname(__FILE__), 'picky', 'loader'))
|
|
15
15
|
# Load the framework
|
16
16
|
#
|
17
17
|
Loader.load_framework
|
18
|
-
puts "Loaded picky with environment '#{
|
18
|
+
puts "Loaded picky with environment '#{PICKY_ENVIRONMENT}' in #{PICKY_ROOT} on Ruby #{RUBY_VERSION}."
|
data/lib/tasks/server.rake
CHANGED
@@ -3,11 +3,11 @@
|
|
3
3
|
namespace :server do
|
4
4
|
|
5
5
|
def chdir_to_root
|
6
|
-
Dir.chdir
|
6
|
+
Dir.chdir PICKY_ROOT
|
7
7
|
end
|
8
8
|
|
9
9
|
def current_pid
|
10
|
-
pid = `cat #{File.join(
|
10
|
+
pid = `cat #{File.join(PICKY_ROOT, 'tmp/pids/unicorn.pid')}`
|
11
11
|
pid.blank? ? nil : pid.chomp
|
12
12
|
end
|
13
13
|
|
@@ -15,8 +15,8 @@ namespace :server do
|
|
15
15
|
task :start => :framework do
|
16
16
|
chdir_to_root
|
17
17
|
# Rake::Task[:"solr:start"].invoke # TODO Move to better place.
|
18
|
-
daemonize =
|
19
|
-
command = "export
|
18
|
+
daemonize = PICKY_ENVIRONMENT == 'production' ? '-D' : ''
|
19
|
+
command = "export PICKY_ENV=#{PICKY_ENVIRONMENT}; unicorn -c unicorn.ru #{daemonize}".strip
|
20
20
|
puts "Running \`#{command}\`."
|
21
21
|
exec command
|
22
22
|
end
|
data/lib/tasks/solr.rake
CHANGED
@@ -18,7 +18,7 @@ namespace :solr do
|
|
18
18
|
|
19
19
|
|
20
20
|
def action name
|
21
|
-
`sunspot-solr #{name} --solr-home=solr --data-directory=index/#{
|
21
|
+
`sunspot-solr #{name} --solr-home=solr --data-directory=index/#{PICKY_ENVIRONMENT}/solr --pid-dir=solr/pids --log-file=log/solr.log`
|
22
22
|
end
|
23
23
|
task :start => :application do
|
24
24
|
Rake::Task['solr:schema:generate'].invoke
|
data/lib/tasks/statistics.rake
CHANGED
@@ -2,12 +2,12 @@ namespace :statistics do
|
|
2
2
|
|
3
3
|
desc "start the server"
|
4
4
|
task :start => :application do
|
5
|
-
Statistics.start unless
|
5
|
+
Statistics.start unless PICKY_ENVIRONMENT == 'test'
|
6
6
|
end
|
7
7
|
|
8
8
|
desc "stop the server"
|
9
9
|
task :stop => :application do
|
10
|
-
Statistics.stop unless
|
10
|
+
Statistics.stop unless PICKY_ENVIRONMENT == 'test'
|
11
11
|
end
|
12
12
|
|
13
13
|
end
|
data/lib/tasks/try.rake
CHANGED
@@ -15,7 +15,9 @@ namespace :try do
|
|
15
15
|
task :query, [:text] => :application do |_, options|
|
16
16
|
text = options.text
|
17
17
|
|
18
|
-
|
18
|
+
# TODO tokenize destroys the original text...
|
19
|
+
#
|
20
|
+
puts "\"#{text}\" is query tokenized as #{Tokenizers::Query.new.tokenize(text.dup).to_a.map(&:to_s)}"
|
19
21
|
end
|
20
22
|
|
21
23
|
desc "Try the given text with both the index and the query (type:field optional)."
|
@@ -11,7 +11,7 @@ class PickySearch < Application # The App Constant needs to be identical in appl
|
|
11
11
|
# Note: Much more is possible, but let's start out easy.
|
12
12
|
#
|
13
13
|
# Ask me if you have questions!
|
14
|
-
#
|
14
|
+
#
|
15
15
|
|
16
16
|
indexes do
|
17
17
|
illegal_characters(/[^äöüa-zA-Z0-9\s\/\-\"\&\.]/)
|
@@ -19,10 +19,7 @@ class PickySearch < Application # The App Constant needs to be identical in appl
|
|
19
19
|
split_text_on(/[\s\/\-\"\&\.]/)
|
20
20
|
|
21
21
|
type :books,
|
22
|
-
Sources::DB.new(
|
23
|
-
'SELECT id, title, author, isbn13 as isbn FROM books',
|
24
|
-
DB.configure(:file => 'app/db.yml')
|
25
|
-
),
|
22
|
+
Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
|
26
23
|
field(:title, :qualifiers => [:t, :title, :titre], :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
|
27
24
|
field(:author, :qualifiers => [:s, :author, :auteur]),
|
28
25
|
field(:isbn, :qualifiers => [:i, :isbn], :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# Standard logging.
|
2
2
|
#
|
3
3
|
require 'logger'
|
4
|
-
PickyLog = Loggers::Search.new ::Logger.new(File.expand_path(File.join(
|
4
|
+
PickyLog = Loggers::Search.new ::Logger.new(File.expand_path(File.join(PICKY_ROOT, 'log/search.log')))
|
5
5
|
|
6
6
|
# Example with using the syslog logger.
|
7
7
|
# Falling back to the standard log if it isn't available.
|
@@ -16,5 +16,5 @@ PickyLog = Loggers::Search.new ::Logger.new(File.expand_path(File.join(SEARCH_RO
|
|
16
16
|
# rescue StandardError
|
17
17
|
# puts "Could not connect to the syslog, using the normal log."
|
18
18
|
# require 'logger'
|
19
|
-
# PickyLog = Loggers::Search.new ::Logger.new(File.join(
|
19
|
+
# PickyLog = Loggers::Search.new ::Logger.new(File.join(PICKY_ROOT, 'log/search.log'))
|
20
20
|
# end
|
@@ -14,12 +14,12 @@ end
|
|
14
14
|
libs = " -r irb/completion"
|
15
15
|
libs << %( -r "picky" )
|
16
16
|
|
17
|
-
ENV['
|
17
|
+
ENV['PICKY_ENV'] = case ARGV.first
|
18
18
|
when "p"; "production"
|
19
19
|
when "d"; "development"
|
20
20
|
when "t"; "test"
|
21
21
|
else
|
22
|
-
ARGV.first || ENV['
|
22
|
+
ARGV.first || ENV['PICKY_ENV'] || 'development'
|
23
23
|
end
|
24
24
|
|
25
25
|
puts "Use \x1b[1;30mLoader.load_application\x1b[m to load app."
|
@@ -3,18 +3,7 @@
|
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
5
|
describe Indexers::Field do
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
@field = stub :field, :indexed_name => :some_indexed_field_name, :name => :some_field_name, :search_index_file_name => :some_index_table
|
10
|
-
@strategy = Indexers::Field.new @type, @field
|
11
|
-
@strategy.stub! :indexing_message
|
12
|
-
end
|
13
|
-
|
14
|
-
describe "chunksize" do
|
15
|
-
it "should be a specific size" do
|
16
|
-
@strategy.chunksize.should == 25_000
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
6
|
+
|
7
|
+
|
8
|
+
|
20
9
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Sources::CSV do
|
4
|
+
|
5
|
+
context "without file" do
|
6
|
+
it "should fail correctly" do
|
7
|
+
lambda { @source = Sources::CSV.new(:a, :b, :c) }.should raise_error(Sources::NoCSVFileGiven)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
context "with file" do
|
11
|
+
before(:each) do
|
12
|
+
@source = Sources::CSV.new :a, :b, :c, :file => :some_file
|
13
|
+
::CSV.should_receive(:foreach).any_number_of_times.and_yield ['7', 'a data', 'b data', 'c data']
|
14
|
+
end
|
15
|
+
describe "harvest" do
|
16
|
+
it "should yield the right data" do
|
17
|
+
field = stub :b, :name => :b
|
18
|
+
@source.harvest :anything, field do |id, token|
|
19
|
+
[id, token].should == [7, 'b data']
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
describe "get_data" do
|
24
|
+
it "should yield each line" do
|
25
|
+
@source.get_data do |data|
|
26
|
+
data.should == ['7', 'a data', 'b data', 'c data']
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
data/spec/lib/sources/db_spec.rb
CHANGED
@@ -4,12 +4,22 @@ describe Sources::DB do
|
|
4
4
|
|
5
5
|
before(:each) do
|
6
6
|
@type = stub :type, :name => 'some_type_name'
|
7
|
-
@connection = stub :connection
|
8
7
|
|
8
|
+
@connection = stub :connection
|
9
9
|
@adapter = stub :adapter, :connection => @connection
|
10
|
+
|
10
11
|
@select_statement = stub :statement
|
11
12
|
|
12
|
-
@source = Sources::DB.new @select_statement,
|
13
|
+
@source = Sources::DB.new @select_statement, :option => :some_options
|
14
|
+
|
15
|
+
@source.stub! :database => @adapter
|
16
|
+
@source.stub! :connect_backend
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "chunksize" do
|
20
|
+
it "should be a specific size" do
|
21
|
+
@source.chunksize.should == 25_000
|
22
|
+
end
|
13
23
|
end
|
14
24
|
|
15
25
|
describe "count" do
|
@@ -26,47 +36,49 @@ describe Sources::DB do
|
|
26
36
|
@source.count @type
|
27
37
|
end
|
28
38
|
end
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
end
|
39
|
+
|
40
|
+
# TODO Redo.
|
41
|
+
#
|
42
|
+
# describe "harvest" do
|
43
|
+
# before(:each) do
|
44
|
+
# @source.stub! :harvest_statement_with_offset
|
45
|
+
# end
|
46
|
+
# context 'expectations' do
|
47
|
+
# before(:each) do
|
48
|
+
# @connection.stub! :execute => []
|
49
|
+
# @connection.stub! :select_value
|
50
|
+
# end
|
51
|
+
# after(:each) do
|
52
|
+
# @source.harvest :type_name, :some_field
|
53
|
+
# end
|
54
|
+
# context "with WHERE" do
|
55
|
+
# before(:each) do
|
56
|
+
# @source.stub! :select_statement => 'bla WHERE blu'
|
57
|
+
# end
|
58
|
+
# it "should connect" do
|
59
|
+
# @source.should_receive(:connect_backend).once.with
|
60
|
+
# end
|
61
|
+
# it "should call the harvest statement with an offset" do
|
62
|
+
# @source.should_receive(:harvest_statement_with_offset).once.with :some_type, :some_field, :some_offset
|
63
|
+
# end
|
64
|
+
# end
|
65
|
+
# context "without WHERE" do
|
66
|
+
# it "should connect" do
|
67
|
+
# @adapter.should_receive(:connect).once.with
|
68
|
+
# end
|
69
|
+
# it "should call the harvest statement with an offset" do
|
70
|
+
# @source.should_receive(:harvest_statement_with_offset).once.with :some_type, :some_field, :some_offset
|
71
|
+
# end
|
72
|
+
# end
|
73
|
+
# end
|
74
|
+
# context 'returns' do
|
75
|
+
# it "should return whatever the execute statement returns" do
|
76
|
+
# @connection.stub! :execute => :some_result
|
77
|
+
#
|
78
|
+
# @source.harvest(:some_type, :some_field).should == :some_result
|
79
|
+
# end
|
80
|
+
# end
|
81
|
+
# end
|
70
82
|
|
71
83
|
describe "harvest_statement_with_offset" do
|
72
84
|
before(:each) do
|
@@ -76,15 +88,15 @@ describe Sources::DB do
|
|
76
88
|
end
|
77
89
|
it "should get a harvest statement and the chunksize to put the statement together" do
|
78
90
|
@source.should_receive(:harvest_statement).once.and_return 'some_example_statement'
|
79
|
-
@source.harvest_statement_with_offset(@type, @field, :some_offset
|
91
|
+
@source.harvest_statement_with_offset(@type, @field, :some_offset)
|
80
92
|
end
|
81
93
|
it "should add an AND if it already contains a WHERE statement" do
|
82
94
|
@source.should_receive(:harvest_statement).and_return 'WHERE'
|
83
|
-
@source.harvest_statement_with_offset(@type, @field, :some_offset
|
95
|
+
@source.harvest_statement_with_offset(@type, @field, :some_offset).should == "WHERE AND st.id > some_offset LIMIT 25000"
|
84
96
|
end
|
85
97
|
it "should add a WHERE if it doesn't already contain one" do
|
86
98
|
@source.should_receive(:harvest_statement).and_return 'some_statement'
|
87
|
-
@source.harvest_statement_with_offset(@type, @field, :some_offset
|
99
|
+
@source.harvest_statement_with_offset(@type, @field, :some_offset).should == "some_statement WHERE st.id > some_offset LIMIT 25000"
|
88
100
|
end
|
89
101
|
end
|
90
102
|
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 6
|
9
|
+
version: 0.0.6
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Florian Hanke
|
@@ -14,148 +14,13 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-10-
|
17
|
+
date: 2010-10-04 00:00:00 +02:00
|
18
18
|
default_executable: picky
|
19
19
|
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
21
|
-
name: bundler
|
22
|
-
prerelease: false
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
-
none: false
|
25
|
-
requirements:
|
26
|
-
- - ">="
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 0
|
30
|
-
- 9
|
31
|
-
- 26
|
32
|
-
version: 0.9.26
|
33
|
-
type: :runtime
|
34
|
-
version_requirements: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
name: activesupport
|
37
|
-
prerelease: false
|
38
|
-
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
-
none: false
|
40
|
-
requirements:
|
41
|
-
- - "="
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
segments:
|
44
|
-
- 2
|
45
|
-
- 3
|
46
|
-
- 8
|
47
|
-
version: 2.3.8
|
48
|
-
type: :runtime
|
49
|
-
version_requirements: *id002
|
50
|
-
- !ruby/object:Gem::Dependency
|
51
|
-
name: activerecord
|
52
|
-
prerelease: false
|
53
|
-
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
-
none: false
|
55
|
-
requirements:
|
56
|
-
- - "="
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
segments:
|
59
|
-
- 2
|
60
|
-
- 3
|
61
|
-
- 8
|
62
|
-
version: 2.3.8
|
63
|
-
type: :runtime
|
64
|
-
version_requirements: *id003
|
65
|
-
- !ruby/object:Gem::Dependency
|
66
|
-
name: rack
|
67
|
-
prerelease: false
|
68
|
-
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
-
none: false
|
70
|
-
requirements:
|
71
|
-
- - "="
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
segments:
|
74
|
-
- 1
|
75
|
-
- 2
|
76
|
-
- 1
|
77
|
-
version: 1.2.1
|
78
|
-
type: :runtime
|
79
|
-
version_requirements: *id004
|
80
|
-
- !ruby/object:Gem::Dependency
|
81
|
-
name: rack-mount
|
82
|
-
prerelease: false
|
83
|
-
requirement: &id005 !ruby/object:Gem::Requirement
|
84
|
-
none: false
|
85
|
-
requirements:
|
86
|
-
- - "="
|
87
|
-
- !ruby/object:Gem::Version
|
88
|
-
segments:
|
89
|
-
- 0
|
90
|
-
- 6
|
91
|
-
- 9
|
92
|
-
version: 0.6.9
|
93
|
-
type: :runtime
|
94
|
-
version_requirements: *id005
|
95
|
-
- !ruby/object:Gem::Dependency
|
96
|
-
name: rsolr
|
97
|
-
prerelease: false
|
98
|
-
requirement: &id006 !ruby/object:Gem::Requirement
|
99
|
-
none: false
|
100
|
-
requirements:
|
101
|
-
- - ">="
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
segments:
|
104
|
-
- 0
|
105
|
-
- 12
|
106
|
-
- 1
|
107
|
-
version: 0.12.1
|
108
|
-
type: :runtime
|
109
|
-
version_requirements: *id006
|
110
|
-
- !ruby/object:Gem::Dependency
|
111
|
-
name: sunspot
|
112
|
-
prerelease: false
|
113
|
-
requirement: &id007 !ruby/object:Gem::Requirement
|
114
|
-
none: false
|
115
|
-
requirements:
|
116
|
-
- - "="
|
117
|
-
- !ruby/object:Gem::Version
|
118
|
-
segments:
|
119
|
-
- 1
|
120
|
-
- 1
|
121
|
-
- 0
|
122
|
-
version: 1.1.0
|
123
|
-
type: :runtime
|
124
|
-
version_requirements: *id007
|
125
|
-
- !ruby/object:Gem::Dependency
|
126
|
-
name: text
|
127
|
-
prerelease: false
|
128
|
-
requirement: &id008 !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
|
-
requirements:
|
131
|
-
- - "="
|
132
|
-
- !ruby/object:Gem::Version
|
133
|
-
segments:
|
134
|
-
- 0
|
135
|
-
- 2
|
136
|
-
- 0
|
137
|
-
version: 0.2.0
|
138
|
-
type: :runtime
|
139
|
-
version_requirements: *id008
|
140
|
-
- !ruby/object:Gem::Dependency
|
141
|
-
name: rack_fast_escape
|
142
|
-
prerelease: false
|
143
|
-
requirement: &id009 !ruby/object:Gem::Requirement
|
144
|
-
none: false
|
145
|
-
requirements:
|
146
|
-
- - "="
|
147
|
-
- !ruby/object:Gem::Version
|
148
|
-
segments:
|
149
|
-
- 2009
|
150
|
-
- 6
|
151
|
-
- 24
|
152
|
-
version: 2009.06.24
|
153
|
-
type: :runtime
|
154
|
-
version_requirements: *id009
|
155
20
|
- !ruby/object:Gem::Dependency
|
156
21
|
name: rspec
|
157
22
|
prerelease: false
|
158
|
-
requirement: &
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
159
24
|
none: false
|
160
25
|
requirements:
|
161
26
|
- - ">="
|
@@ -164,7 +29,7 @@ dependencies:
|
|
164
29
|
- 0
|
165
30
|
version: "0"
|
166
31
|
type: :development
|
167
|
-
version_requirements: *
|
32
|
+
version_requirements: *id001
|
168
33
|
description: Fast Combinatorial Ruby Search Engine
|
169
34
|
email: florian.hanke+picky@gmail.com
|
170
35
|
executables:
|
@@ -199,7 +64,6 @@ files:
|
|
199
64
|
- lib/picky/configuration/queries.rb
|
200
65
|
- lib/picky/configuration/type.rb
|
201
66
|
- lib/picky/cores.rb
|
202
|
-
- lib/picky/db/configuration.rb
|
203
67
|
- lib/picky/ext/ruby19/extconf.rb
|
204
68
|
- lib/picky/extensions/array.rb
|
205
69
|
- lib/picky/extensions/hash.rb
|
@@ -323,6 +187,7 @@ files:
|
|
323
187
|
- spec/lib/results/base_spec.rb
|
324
188
|
- spec/lib/routing_spec.rb
|
325
189
|
- spec/lib/solr/schema_generator_spec.rb
|
190
|
+
- spec/lib/sources/csv_spec.rb
|
326
191
|
- spec/lib/sources/db_spec.rb
|
327
192
|
- spec/lib/tokenizers/base_spec.rb
|
328
193
|
- spec/lib/tokenizers/index_spec.rb
|
@@ -407,6 +272,7 @@ test_files:
|
|
407
272
|
- spec/lib/results/base_spec.rb
|
408
273
|
- spec/lib/routing_spec.rb
|
409
274
|
- spec/lib/solr/schema_generator_spec.rb
|
275
|
+
- spec/lib/sources/csv_spec.rb
|
410
276
|
- spec/lib/sources/db_spec.rb
|
411
277
|
- spec/lib/tokenizers/base_spec.rb
|
412
278
|
- spec/lib/tokenizers/index_spec.rb
|
@@ -1,23 +0,0 @@
|
|
1
|
-
class DB < ActiveRecord::Base
|
2
|
-
|
3
|
-
self.abstract_class = true
|
4
|
-
|
5
|
-
#
|
6
|
-
#
|
7
|
-
def self.configure options = {}
|
8
|
-
@connection_options = if filename = options[:file]
|
9
|
-
File.open(File.join(SEARCH_ROOT, filename)) { |f| YAML::load(f) }
|
10
|
-
else
|
11
|
-
options
|
12
|
-
end
|
13
|
-
self
|
14
|
-
end
|
15
|
-
|
16
|
-
#
|
17
|
-
#
|
18
|
-
def self.connect
|
19
|
-
return if SEARCH_ENVIRONMENT.to_s == 'test'
|
20
|
-
establish_connection @connection_options
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|