picky 2.7.0 → 3.0.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/adapters/rack/base.rb +20 -16
- data/lib/picky/adapters/rack/live_parameters.rb +28 -24
- data/lib/picky/adapters/rack/search.rb +67 -0
- data/lib/picky/adapters/rack.rb +27 -23
- data/lib/picky/application.rb +246 -236
- data/lib/picky/backend/base.rb +115 -119
- data/lib/picky/backend/file/basic.rb +102 -98
- data/lib/picky/backend/file/json.rb +27 -23
- data/lib/picky/backend/file/marshal.rb +32 -28
- data/lib/picky/backend/file/text.rb +45 -41
- data/lib/picky/backend/files.rb +19 -15
- data/lib/picky/backend/redis/basic.rb +76 -72
- data/lib/picky/backend/redis/list_hash.rb +40 -36
- data/lib/picky/backend/redis/string_hash.rb +30 -26
- data/lib/picky/backend/redis.rb +32 -28
- data/lib/picky/bundle.rb +82 -57
- data/lib/{bundling.rb → picky/bundling.rb} +0 -0
- data/lib/picky/calculations/location.rb +51 -47
- data/lib/picky/categories.rb +60 -56
- data/lib/picky/categories_indexed.rb +73 -82
- data/lib/picky/categories_indexing.rb +12 -8
- data/lib/picky/category.rb +109 -120
- data/lib/picky/category_indexed.rb +39 -41
- data/lib/picky/category_indexing.rb +123 -125
- data/lib/picky/character_substituters/west_european.rb +32 -26
- data/lib/{constants.rb → picky/constants.rb} +0 -0
- data/lib/picky/cores.rb +96 -92
- data/lib/{deployment.rb → picky/deployment.rb} +0 -0
- data/lib/picky/frontend_adapters/rack.rb +133 -118
- data/lib/picky/generators/aliases.rb +5 -3
- data/lib/picky/generators/base.rb +11 -7
- data/lib/picky/generators/partial/default.rb +7 -3
- data/lib/picky/generators/partial/none.rb +24 -20
- data/lib/picky/generators/partial/strategy.rb +20 -16
- data/lib/picky/generators/partial/substring.rb +94 -90
- data/lib/picky/generators/partial_generator.rb +11 -7
- data/lib/picky/generators/similarity/default.rb +9 -5
- data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
- data/lib/picky/generators/similarity/metaphone.rb +20 -16
- data/lib/picky/generators/similarity/none.rb +23 -19
- data/lib/picky/generators/similarity/phonetic.rb +49 -45
- data/lib/picky/generators/similarity/soundex.rb +20 -16
- data/lib/picky/generators/similarity/strategy.rb +10 -6
- data/lib/picky/generators/similarity_generator.rb +11 -7
- data/lib/picky/generators/strategy.rb +14 -10
- data/lib/picky/generators/weights/default.rb +9 -5
- data/lib/picky/generators/weights/logarithmic.rb +30 -26
- data/lib/picky/generators/weights/strategy.rb +10 -6
- data/lib/picky/generators/weights_generator.rb +11 -7
- data/lib/picky/helpers/measuring.rb +20 -16
- data/lib/picky/indexed/bundle/base.rb +39 -37
- data/lib/picky/indexed/bundle/memory.rb +68 -64
- data/lib/picky/indexed/bundle/redis.rb +73 -69
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
- data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
- data/lib/picky/indexed/wrappers/category/location.rb +17 -13
- data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
- data/lib/picky/indexers/base.rb +26 -22
- data/lib/picky/indexers/parallel.rb +62 -58
- data/lib/picky/indexers/serial.rb +41 -37
- data/lib/picky/indexes/index.rb +400 -0
- data/lib/picky/indexes/index_indexed.rb +24 -0
- data/lib/picky/indexes/index_indexing.rb +138 -0
- data/lib/picky/indexes/memory.rb +20 -0
- data/lib/picky/indexes/redis.rb +20 -0
- data/lib/picky/indexes.rb +68 -61
- data/lib/picky/indexes_indexed.rb +16 -12
- data/lib/picky/indexes_indexing.rb +41 -37
- data/lib/picky/indexing/bundle/base.rb +216 -205
- data/lib/picky/indexing/bundle/memory.rb +16 -11
- data/lib/picky/indexing/bundle/redis.rb +14 -12
- data/lib/picky/indexing/wrappers/category/location.rb +17 -13
- data/lib/picky/interfaces/live_parameters.rb +159 -154
- data/lib/picky/loader.rb +267 -304
- data/lib/picky/loggers/search.rb +20 -13
- data/lib/picky/no_source_specified_exception.rb +7 -3
- data/lib/picky/performant.rb +6 -2
- data/lib/picky/query/allocation.rb +71 -67
- data/lib/picky/query/allocations.rb +99 -94
- data/lib/picky/query/combination.rb +70 -66
- data/lib/picky/query/combinations/base.rb +56 -52
- data/lib/picky/query/combinations/memory.rb +36 -32
- data/lib/picky/query/combinations/redis.rb +66 -62
- data/lib/picky/query/indexes.rb +175 -160
- data/lib/picky/query/qualifier_category_mapper.rb +43 -0
- data/lib/picky/query/token.rb +165 -172
- data/lib/picky/query/tokens.rb +86 -82
- data/lib/picky/query/weights.rb +44 -48
- data/lib/picky/query.rb +5 -1
- data/lib/picky/rack/harakiri.rb +51 -47
- data/lib/picky/results.rb +81 -77
- data/lib/picky/search.rb +169 -158
- data/lib/picky/sinatra.rb +34 -0
- data/lib/picky/sources/base.rb +73 -70
- data/lib/picky/sources/couch.rb +61 -57
- data/lib/picky/sources/csv.rb +68 -64
- data/lib/picky/sources/db.rb +139 -135
- data/lib/picky/sources/delicious.rb +52 -48
- data/lib/picky/sources/mongo.rb +68 -63
- data/lib/picky/sources/wrappers/base.rb +20 -16
- data/lib/picky/sources/wrappers/location.rb +37 -33
- data/lib/picky/statistics.rb +46 -43
- data/lib/picky/tasks.rb +3 -0
- data/lib/picky/tokenizers/base.rb +192 -187
- data/lib/picky/tokenizers/index.rb +25 -21
- data/lib/picky/tokenizers/location.rb +33 -29
- data/lib/picky/tokenizers/query.rb +49 -43
- data/lib/picky.rb +21 -13
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/routes.rake +1 -1
- data/lib/tasks/server.rake +1 -1
- data/spec/lib/adapters/rack/base_spec.rb +1 -1
- data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/adapters/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +39 -32
- data/spec/lib/backend/file/basic_spec.rb +2 -2
- data/spec/lib/backend/file/json_spec.rb +2 -2
- data/spec/lib/backend/file/marshal_spec.rb +2 -2
- data/spec/lib/backend/file/text_spec.rb +1 -1
- data/spec/lib/backend/files_spec.rb +14 -24
- data/spec/lib/backend/redis/basic_spec.rb +2 -2
- data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
- data/spec/lib/backend/redis_spec.rb +20 -13
- data/spec/lib/calculations/location_spec.rb +1 -1
- data/spec/lib/categories_indexed_spec.rb +16 -34
- data/spec/lib/category_indexed_spec.rb +9 -27
- data/spec/lib/category_indexing_spec.rb +2 -3
- data/spec/lib/category_spec.rb +10 -10
- data/spec/lib/character_substituters/west_european_spec.rb +6 -5
- data/spec/lib/cores_spec.rb +17 -17
- data/spec/lib/extensions/symbol_spec.rb +15 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
- data/spec/lib/generators/partial/default_spec.rb +3 -3
- data/spec/lib/generators/partial/none_spec.rb +2 -2
- data/spec/lib/generators/partial/substring_spec.rb +1 -1
- data/spec/lib/generators/partial_generator_spec.rb +3 -3
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/generators/similarity_generator_spec.rb +2 -2
- data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/generators/weights_generator_spec.rb +1 -1
- data/spec/lib/helpers/measuring_spec.rb +2 -2
- data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
- data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/base_spec.rb +1 -1
- data/spec/lib/indexers/parallel_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +1 -1
- data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
- data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
- data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
- data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
- data/spec/lib/indexes_class_spec.rb +2 -2
- data/spec/lib/indexes_indexed_spec.rb +1 -1
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/indexes_spec.rb +1 -1
- data/spec/lib/indexing/bundle/base_spec.rb +7 -5
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
- data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
- data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
- data/spec/lib/loader_spec.rb +17 -19
- data/spec/lib/loggers/search_spec.rb +2 -2
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +4 -4
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +7 -2
- data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
- data/spec/lib/query/token_spec.rb +32 -53
- data/spec/lib/query/tokens_spec.rb +30 -35
- data/spec/lib/query/weights_spec.rb +16 -16
- data/spec/lib/rack/harakiri_spec.rb +5 -5
- data/spec/lib/results_spec.rb +1 -1
- data/spec/lib/search_spec.rb +24 -22
- data/spec/lib/sinatra_spec.rb +36 -0
- data/spec/lib/sources/base_spec.rb +1 -1
- data/spec/lib/sources/couch_spec.rb +9 -9
- data/spec/lib/sources/csv_spec.rb +7 -7
- data/spec/lib/sources/db_spec.rb +2 -2
- data/spec/lib/sources/delicious_spec.rb +5 -5
- data/spec/lib/sources/mongo_spec.rb +7 -7
- data/spec/lib/sources/wrappers/base_spec.rb +2 -2
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/statistics_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/lib/tokenizers/query_spec.rb +1 -1
- metadata +30 -30
- data/lib/picky/adapters/rack/query.rb +0 -65
- data/lib/picky/index/base.rb +0 -409
- data/lib/picky/index/base_indexed.rb +0 -29
- data/lib/picky/index/base_indexing.rb +0 -127
- data/lib/picky/index/memory.rb +0 -16
- data/lib/picky/index/redis.rb +0 -16
- data/lib/picky/query/qualifiers.rb +0 -76
- data/lib/picky/query/solr.rb +0 -60
- data/lib/picky/signals.rb +0 -8
- data/lib/picky-tasks.rb +0 -6
- data/lib/tasks/spec.rake +0 -11
- data/spec/lib/query/qualifiers_spec.rb +0 -31
data/lib/picky/sources/base.rb
CHANGED
@@ -1,87 +1,90 @@
|
|
1
|
-
|
2
|
-
#
|
3
|
-
# Currently, Picky offers the following Sources:
|
4
|
-
# * CSV (comma – or other – separated file)
|
5
|
-
# * Couch (CouchDB, key-value store)
|
6
|
-
# * DB (Databases, foremost MySQL)
|
7
|
-
# * Delicious (http://del.icio.us, online bookmarking service)
|
8
|
-
# See also:
|
9
|
-
# http://github.com/floere/picky/wiki/Sources-Configuration
|
10
|
-
#
|
11
|
-
# Don't worry if your source isn't here. Adding your own is easy:
|
12
|
-
# http://github.com/floere/picky/wiki/Contributing-sources
|
13
|
-
#
|
14
|
-
module Sources
|
1
|
+
module Picky
|
15
2
|
|
16
|
-
#
|
3
|
+
# = Data Sources
|
17
4
|
#
|
18
|
-
#
|
19
|
-
# *
|
20
|
-
# *
|
21
|
-
# *
|
5
|
+
# Currently, Picky offers the following Sources:
|
6
|
+
# * CSV (comma – or other – separated file)
|
7
|
+
# * Couch (CouchDB, key-value store)
|
8
|
+
# * DB (Databases, foremost MySQL)
|
9
|
+
# * Delicious (http://del.icio.us, online bookmarking service)
|
10
|
+
# See also:
|
11
|
+
# http://github.com/floere/picky/wiki/Sources-Configuration
|
22
12
|
#
|
23
|
-
#
|
24
|
-
#
|
13
|
+
# Don't worry if your source isn't here. Adding your own is easy:
|
14
|
+
# http://github.com/floere/picky/wiki/Contributing-sources
|
25
15
|
#
|
26
|
-
|
16
|
+
module Sources
|
27
17
|
|
28
|
-
|
29
|
-
|
30
|
-
# Connect to the backend.
|
18
|
+
# Sources are where your data comes from.
|
31
19
|
#
|
32
|
-
#
|
20
|
+
# A source has 1 mandatory and 2 optional methods:
|
21
|
+
# * connect_backend (_optional_): called once for each type/category pair.
|
22
|
+
# * harvest: Used by the indexer to gather data. Yields an indexed_id (string or integer) and a string value.
|
23
|
+
# * take_snapshot (_optional_): called once for each index or category (if indexing a single category).
|
33
24
|
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
# * We open a connection to a key value store.
|
37
|
-
# * We open an file with data.
|
25
|
+
# This base class "implements" all these methods, but they don't do anything.
|
26
|
+
# Subclass this class <tt>class MySource < Base</tt> and override the methods in your source to do something.
|
38
27
|
#
|
39
|
-
|
28
|
+
class Base
|
40
29
|
|
41
|
-
|
30
|
+
attr_reader :key_format
|
42
31
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
# def harvest category # :yields: id, text_for_id
|
54
|
-
#
|
55
|
-
# end
|
32
|
+
# Connect to the backend.
|
33
|
+
#
|
34
|
+
# Called once per index/category combination before harvesting.
|
35
|
+
#
|
36
|
+
# Examples:
|
37
|
+
# * The DB backend connects the DB adapter.
|
38
|
+
# * We open a connection to a key value store.
|
39
|
+
# * We open an file with data.
|
40
|
+
#
|
41
|
+
def connect_backend
|
56
42
|
|
57
|
-
|
58
|
-
#
|
59
|
-
# Called once for each index before harvesting.
|
60
|
-
# If it has been called on a source already by an index,
|
61
|
-
# it won't be called again for a category inside that index.
|
62
|
-
#
|
63
|
-
# Example:
|
64
|
-
# * In a DB source, a table based on the source's select statement is created.
|
65
|
-
#
|
66
|
-
def take_snapshot index
|
43
|
+
end
|
67
44
|
|
68
|
-
|
45
|
+
# Called by the indexer when gathering data.
|
46
|
+
#
|
47
|
+
# Yields the data (id, text for id) for the given category.
|
48
|
+
#
|
49
|
+
# When implementing or overriding your own,
|
50
|
+
# be sure to <tt>yield(id, text_for_id)</tt> (or <tt>block.call(id, text_for_id)</tt>)
|
51
|
+
# for the given type symbol and category symbol.
|
52
|
+
#
|
53
|
+
# Note: Since harvest needs to be implemented, it has no default impementation.
|
54
|
+
#
|
55
|
+
# def harvest category # :yields: id, text_for_id
|
56
|
+
#
|
57
|
+
# end
|
69
58
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
59
|
+
# Used to take a snapshot of your data if it is fast changing.
|
60
|
+
#
|
61
|
+
# Called once for each index before harvesting.
|
62
|
+
# If it has been called on a source already by an index,
|
63
|
+
# it won't be called again for a category inside that index.
|
64
|
+
#
|
65
|
+
# Example:
|
66
|
+
# * In a DB source, a table based on the source's select statement is created.
|
67
|
+
#
|
68
|
+
def take_snapshot index
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
# Used to check if a snapshot has been done already.
|
73
|
+
#
|
74
|
+
# Example:
|
75
|
+
# * In a DB source, a table based on the source's select statement is created.
|
76
|
+
#
|
77
|
+
def with_snapshot index
|
78
|
+
connect_backend
|
79
|
+
@snapshot_taken ||= 0
|
80
|
+
if @snapshot_taken.zero?
|
81
|
+
take_snapshot index
|
82
|
+
end
|
83
|
+
@snapshot_taken += 1
|
84
|
+
yield
|
85
|
+
@snapshot_taken -= 1
|
81
86
|
end
|
82
|
-
|
83
|
-
yield
|
84
|
-
@snapshot_taken -= 1
|
87
|
+
|
85
88
|
end
|
86
89
|
|
87
90
|
end
|
data/lib/picky/sources/couch.rb
CHANGED
@@ -1,72 +1,76 @@
|
|
1
|
-
module
|
1
|
+
module Picky
|
2
2
|
|
3
|
-
|
4
|
-
#
|
5
|
-
# Example:
|
6
|
-
# Sources::Couch.new(:column1, :column2) # without file option
|
7
|
-
#
|
8
|
-
class NoCouchDBGiven < StandardError; end
|
9
|
-
|
10
|
-
# A Couch database source.
|
11
|
-
#
|
12
|
-
# Options:
|
13
|
-
# * url
|
14
|
-
# and all the options of a <tt>RestClient::Resource</tt>.
|
15
|
-
# See http://github.com/archiloque/rest-client.
|
16
|
-
#
|
17
|
-
# Examples:
|
18
|
-
# Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984')
|
19
|
-
# Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984', user:'someuser', password:'somepassword')
|
20
|
-
#
|
21
|
-
class Couch < Base
|
3
|
+
module Sources
|
22
4
|
|
5
|
+
# A Couch database source.
|
6
|
+
#
|
7
|
+
# Options:
|
8
|
+
# * url
|
9
|
+
# and all the options of a <tt>RestClient::Resource</tt>.
|
10
|
+
# See http://github.com/archiloque/rest-client.
|
23
11
|
#
|
12
|
+
# Examples:
|
13
|
+
# Picky::Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984')
|
14
|
+
# Picky::Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984', user:'someuser', password:'somepassword')
|
24
15
|
#
|
25
|
-
|
26
|
-
check_gem
|
16
|
+
class Couch < Base
|
27
17
|
|
28
|
-
|
18
|
+
# Raised when a Couch source is instantiated without a file.
|
19
|
+
#
|
20
|
+
# Example:
|
21
|
+
# Picky::Sources::Couch.new(:column1, :column2) # without file option
|
22
|
+
#
|
23
|
+
class NoDBGiven < StandardError; end
|
29
24
|
|
30
|
-
|
25
|
+
#
|
26
|
+
#
|
27
|
+
def initialize *category_names, options
|
28
|
+
check_gem
|
31
29
|
|
32
|
-
|
33
|
-
@key_format = key_format && key_format.to_sym || :to_sym
|
34
|
-
end
|
30
|
+
Hash === options && options[:url] || raise_no_db_given(category_names)
|
35
31
|
|
36
|
-
|
37
|
-
self.class.name
|
38
|
-
end
|
32
|
+
@db = RestClient::Resource.new options.delete(:url), options
|
39
33
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
require 'rest_client'
|
44
|
-
rescue LoadError
|
45
|
-
warn_gem_missing 'rest-client', 'the CouchDB source'
|
46
|
-
exit 1
|
47
|
-
end
|
34
|
+
key_format = options.delete :key_format
|
35
|
+
@key_format = key_format && key_format.to_sym || :to_sym
|
36
|
+
end
|
48
37
|
|
49
|
-
|
50
|
-
|
51
|
-
# See important note, above.
|
52
|
-
#
|
53
|
-
@@id_key = '_id'
|
54
|
-
def harvest category
|
55
|
-
category_name = category.from.to_s
|
56
|
-
get_data do |doc|
|
57
|
-
yield doc[@@id_key], doc[category_name] || next
|
38
|
+
def to_s
|
39
|
+
self.class.name
|
58
40
|
end
|
59
|
-
end
|
60
41
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
42
|
+
# Tries to require the rest_client gem.
|
43
|
+
#
|
44
|
+
def check_gem # :nodoc:
|
45
|
+
require 'rest_client'
|
46
|
+
rescue LoadError
|
47
|
+
warn_gem_missing 'rest-client', 'the CouchDB source'
|
48
|
+
exit 1
|
49
|
+
end
|
50
|
+
|
51
|
+
# Harvests the data to index.
|
52
|
+
#
|
53
|
+
# See important note, above.
|
54
|
+
#
|
55
|
+
@@id_key = '_id'
|
56
|
+
def harvest category
|
57
|
+
category_name = category.from.to_s
|
58
|
+
get_data do |doc|
|
59
|
+
yield doc[@@id_key], doc[category_name] || next
|
60
|
+
end
|
61
|
+
end
|
67
62
|
|
68
|
-
|
69
|
-
|
63
|
+
def get_data &block # :nodoc:
|
64
|
+
resp = @db['_all_docs?include_docs=true'].get
|
65
|
+
JSON.parse(resp)['rows'].
|
66
|
+
map{|row| row['doc']}.
|
67
|
+
each &block
|
68
|
+
end
|
69
|
+
|
70
|
+
def raise_no_db_given category_names # :nodoc:
|
71
|
+
raise NoDBGiven.new(category_names.join(', '))
|
72
|
+
end
|
70
73
|
end
|
71
74
|
end
|
72
|
-
|
75
|
+
|
76
|
+
end
|
data/lib/picky/sources/csv.rb
CHANGED
@@ -1,78 +1,82 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
# Raised when a CSV source is instantiated without a file.
|
4
|
-
#
|
5
|
-
# Example:
|
6
|
-
# Sources::CSV.new(:column1, :column2) # without file option
|
7
|
-
#
|
8
|
-
class NoCSVFileGiven < StandardError; end
|
9
|
-
|
10
|
-
# Describes a CSV source, a file with comma separated values in it.
|
11
|
-
#
|
12
|
-
# The first column is implicitly assumed to be the id column.
|
13
|
-
#
|
14
|
-
# It takes the same options as the Ruby 1.9 CSV class.
|
15
|
-
#
|
16
|
-
# Examples:
|
17
|
-
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv')
|
18
|
-
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', col_sep:';')
|
19
|
-
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', row_sep:"\n")
|
20
|
-
#
|
21
|
-
class CSV < Base
|
22
|
-
|
23
|
-
# The CSV file's path, relative to PICKY_ROOT.
|
24
|
-
#
|
25
|
-
attr_reader :file_name
|
1
|
+
module Picky
|
26
2
|
|
27
|
-
|
28
|
-
#
|
29
|
-
attr_reader :csv_options, :key_format
|
3
|
+
module Sources
|
30
4
|
|
31
|
-
#
|
5
|
+
# Describes a CSV source, a file with comma separated values in it.
|
6
|
+
#
|
7
|
+
# The first column is implicitly assumed to be the id column.
|
32
8
|
#
|
33
|
-
|
9
|
+
# It takes the same options as the Ruby 1.9 CSV class.
|
10
|
+
#
|
11
|
+
# Examples:
|
12
|
+
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv')
|
13
|
+
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', col_sep:';')
|
14
|
+
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', row_sep:"\n")
|
15
|
+
#
|
16
|
+
class CSV < Base
|
34
17
|
|
35
|
-
|
36
|
-
|
37
|
-
|
18
|
+
# Raised when a CSV source is instantiated without a file.
|
19
|
+
#
|
20
|
+
# Example:
|
21
|
+
# Sources::CSV.new(:column1, :column2) # without file option
|
22
|
+
#
|
23
|
+
class NoFileGiven < StandardError; end
|
38
24
|
|
39
|
-
|
40
|
-
|
25
|
+
# The CSV file's path, relative to PICKY_ROOT.
|
26
|
+
#
|
27
|
+
attr_reader :file_name
|
41
28
|
|
42
|
-
|
43
|
-
|
44
|
-
|
29
|
+
# The options that were passed into #new.
|
30
|
+
#
|
31
|
+
attr_reader :csv_options, :key_format
|
45
32
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
parameters << csv_options unless csv_options.empty?
|
50
|
-
%Q{#{self.class.name}(#{parameters.join(', ')})}
|
51
|
-
end
|
33
|
+
# The data category names.
|
34
|
+
#
|
35
|
+
attr_reader :category_names
|
52
36
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
raise NoCSVFileGiven.new(category_names.join(', '))
|
57
|
-
end
|
37
|
+
def initialize *category_names, options
|
38
|
+
require 'csv'
|
39
|
+
@category_names = category_names
|
58
40
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
41
|
+
@csv_options = Hash === options && options || {}
|
42
|
+
@file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
|
43
|
+
|
44
|
+
key_format = options.delete :key_format
|
45
|
+
@key_format = key_format && key_format.to_sym || :to_i
|
46
|
+
end
|
47
|
+
|
48
|
+
def to_s
|
49
|
+
parameters = category_names
|
50
|
+
parameters << { file: file_name }
|
51
|
+
parameters << csv_options unless csv_options.empty?
|
52
|
+
%Q{#{self.class.name}(#{parameters.join(', ')})}
|
53
|
+
end
|
54
|
+
|
55
|
+
# Raises a NoCSVFileGiven exception.
|
56
|
+
#
|
57
|
+
def raise_no_file_given category_names # :nodoc:
|
58
|
+
raise NoFileGiven.new(category_names.join(', '))
|
59
|
+
end
|
60
|
+
|
61
|
+
# Harvests the data to index.
|
62
|
+
#
|
63
|
+
def harvest category
|
64
|
+
index = category_names.index category.from
|
65
|
+
get_data do |ary|
|
66
|
+
indexed_id = ary.shift
|
67
|
+
text = ary[index]
|
68
|
+
next unless text
|
69
|
+
text.force_encoding 'utf-8' # TODO Still needed?
|
70
|
+
yield indexed_id, text
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
#
|
75
|
+
#
|
76
|
+
def get_data &block # :nodoc:
|
77
|
+
::CSV.foreach file_name, csv_options, &block
|
69
78
|
end
|
70
|
-
end
|
71
79
|
|
72
|
-
#
|
73
|
-
#
|
74
|
-
def get_data &block # :nodoc:
|
75
|
-
::CSV.foreach file_name, csv_options, &block
|
76
80
|
end
|
77
81
|
|
78
82
|
end
|