picky 2.7.0 → 3.0.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +20 -16
- data/lib/picky/adapters/rack/live_parameters.rb +28 -24
- data/lib/picky/adapters/rack/search.rb +67 -0
- data/lib/picky/adapters/rack.rb +27 -23
- data/lib/picky/application.rb +246 -236
- data/lib/picky/backend/base.rb +115 -119
- data/lib/picky/backend/file/basic.rb +102 -98
- data/lib/picky/backend/file/json.rb +27 -23
- data/lib/picky/backend/file/marshal.rb +32 -28
- data/lib/picky/backend/file/text.rb +45 -41
- data/lib/picky/backend/files.rb +19 -15
- data/lib/picky/backend/redis/basic.rb +76 -72
- data/lib/picky/backend/redis/list_hash.rb +40 -36
- data/lib/picky/backend/redis/string_hash.rb +30 -26
- data/lib/picky/backend/redis.rb +32 -28
- data/lib/picky/bundle.rb +82 -57
- data/lib/{bundling.rb → picky/bundling.rb} +0 -0
- data/lib/picky/calculations/location.rb +51 -47
- data/lib/picky/categories.rb +60 -56
- data/lib/picky/categories_indexed.rb +73 -82
- data/lib/picky/categories_indexing.rb +12 -8
- data/lib/picky/category.rb +109 -120
- data/lib/picky/category_indexed.rb +39 -41
- data/lib/picky/category_indexing.rb +123 -125
- data/lib/picky/character_substituters/west_european.rb +32 -26
- data/lib/{constants.rb → picky/constants.rb} +0 -0
- data/lib/picky/cores.rb +96 -92
- data/lib/{deployment.rb → picky/deployment.rb} +0 -0
- data/lib/picky/frontend_adapters/rack.rb +133 -118
- data/lib/picky/generators/aliases.rb +5 -3
- data/lib/picky/generators/base.rb +11 -7
- data/lib/picky/generators/partial/default.rb +7 -3
- data/lib/picky/generators/partial/none.rb +24 -20
- data/lib/picky/generators/partial/strategy.rb +20 -16
- data/lib/picky/generators/partial/substring.rb +94 -90
- data/lib/picky/generators/partial_generator.rb +11 -7
- data/lib/picky/generators/similarity/default.rb +9 -5
- data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
- data/lib/picky/generators/similarity/metaphone.rb +20 -16
- data/lib/picky/generators/similarity/none.rb +23 -19
- data/lib/picky/generators/similarity/phonetic.rb +49 -45
- data/lib/picky/generators/similarity/soundex.rb +20 -16
- data/lib/picky/generators/similarity/strategy.rb +10 -6
- data/lib/picky/generators/similarity_generator.rb +11 -7
- data/lib/picky/generators/strategy.rb +14 -10
- data/lib/picky/generators/weights/default.rb +9 -5
- data/lib/picky/generators/weights/logarithmic.rb +30 -26
- data/lib/picky/generators/weights/strategy.rb +10 -6
- data/lib/picky/generators/weights_generator.rb +11 -7
- data/lib/picky/helpers/measuring.rb +20 -16
- data/lib/picky/indexed/bundle/base.rb +39 -37
- data/lib/picky/indexed/bundle/memory.rb +68 -64
- data/lib/picky/indexed/bundle/redis.rb +73 -69
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
- data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
- data/lib/picky/indexed/wrappers/category/location.rb +17 -13
- data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
- data/lib/picky/indexers/base.rb +26 -22
- data/lib/picky/indexers/parallel.rb +62 -58
- data/lib/picky/indexers/serial.rb +41 -37
- data/lib/picky/indexes/index.rb +400 -0
- data/lib/picky/indexes/index_indexed.rb +24 -0
- data/lib/picky/indexes/index_indexing.rb +138 -0
- data/lib/picky/indexes/memory.rb +20 -0
- data/lib/picky/indexes/redis.rb +20 -0
- data/lib/picky/indexes.rb +68 -61
- data/lib/picky/indexes_indexed.rb +16 -12
- data/lib/picky/indexes_indexing.rb +41 -37
- data/lib/picky/indexing/bundle/base.rb +216 -205
- data/lib/picky/indexing/bundle/memory.rb +16 -11
- data/lib/picky/indexing/bundle/redis.rb +14 -12
- data/lib/picky/indexing/wrappers/category/location.rb +17 -13
- data/lib/picky/interfaces/live_parameters.rb +159 -154
- data/lib/picky/loader.rb +267 -304
- data/lib/picky/loggers/search.rb +20 -13
- data/lib/picky/no_source_specified_exception.rb +7 -3
- data/lib/picky/performant.rb +6 -2
- data/lib/picky/query/allocation.rb +71 -67
- data/lib/picky/query/allocations.rb +99 -94
- data/lib/picky/query/combination.rb +70 -66
- data/lib/picky/query/combinations/base.rb +56 -52
- data/lib/picky/query/combinations/memory.rb +36 -32
- data/lib/picky/query/combinations/redis.rb +66 -62
- data/lib/picky/query/indexes.rb +175 -160
- data/lib/picky/query/qualifier_category_mapper.rb +43 -0
- data/lib/picky/query/token.rb +165 -172
- data/lib/picky/query/tokens.rb +86 -82
- data/lib/picky/query/weights.rb +44 -48
- data/lib/picky/query.rb +5 -1
- data/lib/picky/rack/harakiri.rb +51 -47
- data/lib/picky/results.rb +81 -77
- data/lib/picky/search.rb +169 -158
- data/lib/picky/sinatra.rb +34 -0
- data/lib/picky/sources/base.rb +73 -70
- data/lib/picky/sources/couch.rb +61 -57
- data/lib/picky/sources/csv.rb +68 -64
- data/lib/picky/sources/db.rb +139 -135
- data/lib/picky/sources/delicious.rb +52 -48
- data/lib/picky/sources/mongo.rb +68 -63
- data/lib/picky/sources/wrappers/base.rb +20 -16
- data/lib/picky/sources/wrappers/location.rb +37 -33
- data/lib/picky/statistics.rb +46 -43
- data/lib/picky/tasks.rb +3 -0
- data/lib/picky/tokenizers/base.rb +192 -187
- data/lib/picky/tokenizers/index.rb +25 -21
- data/lib/picky/tokenizers/location.rb +33 -29
- data/lib/picky/tokenizers/query.rb +49 -43
- data/lib/picky.rb +21 -13
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/routes.rake +1 -1
- data/lib/tasks/server.rake +1 -1
- data/spec/lib/adapters/rack/base_spec.rb +1 -1
- data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/adapters/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +39 -32
- data/spec/lib/backend/file/basic_spec.rb +2 -2
- data/spec/lib/backend/file/json_spec.rb +2 -2
- data/spec/lib/backend/file/marshal_spec.rb +2 -2
- data/spec/lib/backend/file/text_spec.rb +1 -1
- data/spec/lib/backend/files_spec.rb +14 -24
- data/spec/lib/backend/redis/basic_spec.rb +2 -2
- data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
- data/spec/lib/backend/redis_spec.rb +20 -13
- data/spec/lib/calculations/location_spec.rb +1 -1
- data/spec/lib/categories_indexed_spec.rb +16 -34
- data/spec/lib/category_indexed_spec.rb +9 -27
- data/spec/lib/category_indexing_spec.rb +2 -3
- data/spec/lib/category_spec.rb +10 -10
- data/spec/lib/character_substituters/west_european_spec.rb +6 -5
- data/spec/lib/cores_spec.rb +17 -17
- data/spec/lib/extensions/symbol_spec.rb +15 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
- data/spec/lib/generators/partial/default_spec.rb +3 -3
- data/spec/lib/generators/partial/none_spec.rb +2 -2
- data/spec/lib/generators/partial/substring_spec.rb +1 -1
- data/spec/lib/generators/partial_generator_spec.rb +3 -3
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/generators/similarity_generator_spec.rb +2 -2
- data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/generators/weights_generator_spec.rb +1 -1
- data/spec/lib/helpers/measuring_spec.rb +2 -2
- data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
- data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/base_spec.rb +1 -1
- data/spec/lib/indexers/parallel_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +1 -1
- data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
- data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
- data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
- data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
- data/spec/lib/indexes_class_spec.rb +2 -2
- data/spec/lib/indexes_indexed_spec.rb +1 -1
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/indexes_spec.rb +1 -1
- data/spec/lib/indexing/bundle/base_spec.rb +7 -5
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
- data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
- data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
- data/spec/lib/loader_spec.rb +17 -19
- data/spec/lib/loggers/search_spec.rb +2 -2
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +4 -4
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +7 -2
- data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
- data/spec/lib/query/token_spec.rb +32 -53
- data/spec/lib/query/tokens_spec.rb +30 -35
- data/spec/lib/query/weights_spec.rb +16 -16
- data/spec/lib/rack/harakiri_spec.rb +5 -5
- data/spec/lib/results_spec.rb +1 -1
- data/spec/lib/search_spec.rb +24 -22
- data/spec/lib/sinatra_spec.rb +36 -0
- data/spec/lib/sources/base_spec.rb +1 -1
- data/spec/lib/sources/couch_spec.rb +9 -9
- data/spec/lib/sources/csv_spec.rb +7 -7
- data/spec/lib/sources/db_spec.rb +2 -2
- data/spec/lib/sources/delicious_spec.rb +5 -5
- data/spec/lib/sources/mongo_spec.rb +7 -7
- data/spec/lib/sources/wrappers/base_spec.rb +2 -2
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/statistics_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/lib/tokenizers/query_spec.rb +1 -1
- metadata +30 -30
- data/lib/picky/adapters/rack/query.rb +0 -65
- data/lib/picky/index/base.rb +0 -409
- data/lib/picky/index/base_indexed.rb +0 -29
- data/lib/picky/index/base_indexing.rb +0 -127
- data/lib/picky/index/memory.rb +0 -16
- data/lib/picky/index/redis.rb +0 -16
- data/lib/picky/query/qualifiers.rb +0 -76
- data/lib/picky/query/solr.rb +0 -60
- data/lib/picky/signals.rb +0 -8
- data/lib/picky-tasks.rb +0 -6
- data/lib/tasks/spec.rake +0 -11
- data/spec/lib/query/qualifiers_spec.rb +0 -31
data/lib/picky/sources/base.rb
CHANGED
|
@@ -1,87 +1,90 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# Currently, Picky offers the following Sources:
|
|
4
|
-
# * CSV (comma – or other – separated file)
|
|
5
|
-
# * Couch (CouchDB, key-value store)
|
|
6
|
-
# * DB (Databases, foremost MySQL)
|
|
7
|
-
# * Delicious (http://del.icio.us, online bookmarking service)
|
|
8
|
-
# See also:
|
|
9
|
-
# http://github.com/floere/picky/wiki/Sources-Configuration
|
|
10
|
-
#
|
|
11
|
-
# Don't worry if your source isn't here. Adding your own is easy:
|
|
12
|
-
# http://github.com/floere/picky/wiki/Contributing-sources
|
|
13
|
-
#
|
|
14
|
-
module Sources
|
|
1
|
+
module Picky
|
|
15
2
|
|
|
16
|
-
#
|
|
3
|
+
# = Data Sources
|
|
17
4
|
#
|
|
18
|
-
#
|
|
19
|
-
# *
|
|
20
|
-
# *
|
|
21
|
-
# *
|
|
5
|
+
# Currently, Picky offers the following Sources:
|
|
6
|
+
# * CSV (comma – or other – separated file)
|
|
7
|
+
# * Couch (CouchDB, key-value store)
|
|
8
|
+
# * DB (Databases, foremost MySQL)
|
|
9
|
+
# * Delicious (http://del.icio.us, online bookmarking service)
|
|
10
|
+
# See also:
|
|
11
|
+
# http://github.com/floere/picky/wiki/Sources-Configuration
|
|
22
12
|
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
13
|
+
# Don't worry if your source isn't here. Adding your own is easy:
|
|
14
|
+
# http://github.com/floere/picky/wiki/Contributing-sources
|
|
25
15
|
#
|
|
26
|
-
|
|
16
|
+
module Sources
|
|
27
17
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
# Connect to the backend.
|
|
18
|
+
# Sources are where your data comes from.
|
|
31
19
|
#
|
|
32
|
-
#
|
|
20
|
+
# A source has 1 mandatory and 2 optional methods:
|
|
21
|
+
# * connect_backend (_optional_): called once for each type/category pair.
|
|
22
|
+
# * harvest: Used by the indexer to gather data. Yields an indexed_id (string or integer) and a string value.
|
|
23
|
+
# * take_snapshot (_optional_): called once for each index or category (if indexing a single category).
|
|
33
24
|
#
|
|
34
|
-
#
|
|
35
|
-
#
|
|
36
|
-
# * We open a connection to a key value store.
|
|
37
|
-
# * We open an file with data.
|
|
25
|
+
# This base class "implements" all these methods, but they don't do anything.
|
|
26
|
+
# Subclass this class <tt>class MySource < Base</tt> and override the methods in your source to do something.
|
|
38
27
|
#
|
|
39
|
-
|
|
28
|
+
class Base
|
|
40
29
|
|
|
41
|
-
|
|
30
|
+
attr_reader :key_format
|
|
42
31
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
# def harvest category # :yields: id, text_for_id
|
|
54
|
-
#
|
|
55
|
-
# end
|
|
32
|
+
# Connect to the backend.
|
|
33
|
+
#
|
|
34
|
+
# Called once per index/category combination before harvesting.
|
|
35
|
+
#
|
|
36
|
+
# Examples:
|
|
37
|
+
# * The DB backend connects the DB adapter.
|
|
38
|
+
# * We open a connection to a key value store.
|
|
39
|
+
# * We open an file with data.
|
|
40
|
+
#
|
|
41
|
+
def connect_backend
|
|
56
42
|
|
|
57
|
-
|
|
58
|
-
#
|
|
59
|
-
# Called once for each index before harvesting.
|
|
60
|
-
# If it has been called on a source already by an index,
|
|
61
|
-
# it won't be called again for a category inside that index.
|
|
62
|
-
#
|
|
63
|
-
# Example:
|
|
64
|
-
# * In a DB source, a table based on the source's select statement is created.
|
|
65
|
-
#
|
|
66
|
-
def take_snapshot index
|
|
43
|
+
end
|
|
67
44
|
|
|
68
|
-
|
|
45
|
+
# Called by the indexer when gathering data.
|
|
46
|
+
#
|
|
47
|
+
# Yields the data (id, text for id) for the given category.
|
|
48
|
+
#
|
|
49
|
+
# When implementing or overriding your own,
|
|
50
|
+
# be sure to <tt>yield(id, text_for_id)</tt> (or <tt>block.call(id, text_for_id)</tt>)
|
|
51
|
+
# for the given type symbol and category symbol.
|
|
52
|
+
#
|
|
53
|
+
# Note: Since harvest needs to be implemented, it has no default impementation.
|
|
54
|
+
#
|
|
55
|
+
# def harvest category # :yields: id, text_for_id
|
|
56
|
+
#
|
|
57
|
+
# end
|
|
69
58
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
59
|
+
# Used to take a snapshot of your data if it is fast changing.
|
|
60
|
+
#
|
|
61
|
+
# Called once for each index before harvesting.
|
|
62
|
+
# If it has been called on a source already by an index,
|
|
63
|
+
# it won't be called again for a category inside that index.
|
|
64
|
+
#
|
|
65
|
+
# Example:
|
|
66
|
+
# * In a DB source, a table based on the source's select statement is created.
|
|
67
|
+
#
|
|
68
|
+
def take_snapshot index
|
|
69
|
+
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Used to check if a snapshot has been done already.
|
|
73
|
+
#
|
|
74
|
+
# Example:
|
|
75
|
+
# * In a DB source, a table based on the source's select statement is created.
|
|
76
|
+
#
|
|
77
|
+
def with_snapshot index
|
|
78
|
+
connect_backend
|
|
79
|
+
@snapshot_taken ||= 0
|
|
80
|
+
if @snapshot_taken.zero?
|
|
81
|
+
take_snapshot index
|
|
82
|
+
end
|
|
83
|
+
@snapshot_taken += 1
|
|
84
|
+
yield
|
|
85
|
+
@snapshot_taken -= 1
|
|
81
86
|
end
|
|
82
|
-
|
|
83
|
-
yield
|
|
84
|
-
@snapshot_taken -= 1
|
|
87
|
+
|
|
85
88
|
end
|
|
86
89
|
|
|
87
90
|
end
|
data/lib/picky/sources/couch.rb
CHANGED
|
@@ -1,72 +1,76 @@
|
|
|
1
|
-
module
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
# Example:
|
|
6
|
-
# Sources::Couch.new(:column1, :column2) # without file option
|
|
7
|
-
#
|
|
8
|
-
class NoCouchDBGiven < StandardError; end
|
|
9
|
-
|
|
10
|
-
# A Couch database source.
|
|
11
|
-
#
|
|
12
|
-
# Options:
|
|
13
|
-
# * url
|
|
14
|
-
# and all the options of a <tt>RestClient::Resource</tt>.
|
|
15
|
-
# See http://github.com/archiloque/rest-client.
|
|
16
|
-
#
|
|
17
|
-
# Examples:
|
|
18
|
-
# Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984')
|
|
19
|
-
# Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984', user:'someuser', password:'somepassword')
|
|
20
|
-
#
|
|
21
|
-
class Couch < Base
|
|
3
|
+
module Sources
|
|
22
4
|
|
|
5
|
+
# A Couch database source.
|
|
6
|
+
#
|
|
7
|
+
# Options:
|
|
8
|
+
# * url
|
|
9
|
+
# and all the options of a <tt>RestClient::Resource</tt>.
|
|
10
|
+
# See http://github.com/archiloque/rest-client.
|
|
23
11
|
#
|
|
12
|
+
# Examples:
|
|
13
|
+
# Picky::Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984')
|
|
14
|
+
# Picky::Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984', user:'someuser', password:'somepassword')
|
|
24
15
|
#
|
|
25
|
-
|
|
26
|
-
check_gem
|
|
16
|
+
class Couch < Base
|
|
27
17
|
|
|
28
|
-
|
|
18
|
+
# Raised when a Couch source is instantiated without a file.
|
|
19
|
+
#
|
|
20
|
+
# Example:
|
|
21
|
+
# Picky::Sources::Couch.new(:column1, :column2) # without file option
|
|
22
|
+
#
|
|
23
|
+
class NoDBGiven < StandardError; end
|
|
29
24
|
|
|
30
|
-
|
|
25
|
+
#
|
|
26
|
+
#
|
|
27
|
+
def initialize *category_names, options
|
|
28
|
+
check_gem
|
|
31
29
|
|
|
32
|
-
|
|
33
|
-
@key_format = key_format && key_format.to_sym || :to_sym
|
|
34
|
-
end
|
|
30
|
+
Hash === options && options[:url] || raise_no_db_given(category_names)
|
|
35
31
|
|
|
36
|
-
|
|
37
|
-
self.class.name
|
|
38
|
-
end
|
|
32
|
+
@db = RestClient::Resource.new options.delete(:url), options
|
|
39
33
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
require 'rest_client'
|
|
44
|
-
rescue LoadError
|
|
45
|
-
warn_gem_missing 'rest-client', 'the CouchDB source'
|
|
46
|
-
exit 1
|
|
47
|
-
end
|
|
34
|
+
key_format = options.delete :key_format
|
|
35
|
+
@key_format = key_format && key_format.to_sym || :to_sym
|
|
36
|
+
end
|
|
48
37
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
# See important note, above.
|
|
52
|
-
#
|
|
53
|
-
@@id_key = '_id'
|
|
54
|
-
def harvest category
|
|
55
|
-
category_name = category.from.to_s
|
|
56
|
-
get_data do |doc|
|
|
57
|
-
yield doc[@@id_key], doc[category_name] || next
|
|
38
|
+
def to_s
|
|
39
|
+
self.class.name
|
|
58
40
|
end
|
|
59
|
-
end
|
|
60
41
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
42
|
+
# Tries to require the rest_client gem.
|
|
43
|
+
#
|
|
44
|
+
def check_gem # :nodoc:
|
|
45
|
+
require 'rest_client'
|
|
46
|
+
rescue LoadError
|
|
47
|
+
warn_gem_missing 'rest-client', 'the CouchDB source'
|
|
48
|
+
exit 1
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Harvests the data to index.
|
|
52
|
+
#
|
|
53
|
+
# See important note, above.
|
|
54
|
+
#
|
|
55
|
+
@@id_key = '_id'
|
|
56
|
+
def harvest category
|
|
57
|
+
category_name = category.from.to_s
|
|
58
|
+
get_data do |doc|
|
|
59
|
+
yield doc[@@id_key], doc[category_name] || next
|
|
60
|
+
end
|
|
61
|
+
end
|
|
67
62
|
|
|
68
|
-
|
|
69
|
-
|
|
63
|
+
def get_data &block # :nodoc:
|
|
64
|
+
resp = @db['_all_docs?include_docs=true'].get
|
|
65
|
+
JSON.parse(resp)['rows'].
|
|
66
|
+
map{|row| row['doc']}.
|
|
67
|
+
each &block
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def raise_no_db_given category_names # :nodoc:
|
|
71
|
+
raise NoDBGiven.new(category_names.join(', '))
|
|
72
|
+
end
|
|
70
73
|
end
|
|
71
74
|
end
|
|
72
|
-
|
|
75
|
+
|
|
76
|
+
end
|
data/lib/picky/sources/csv.rb
CHANGED
|
@@ -1,78 +1,82 @@
|
|
|
1
|
-
module
|
|
2
|
-
|
|
3
|
-
# Raised when a CSV source is instantiated without a file.
|
|
4
|
-
#
|
|
5
|
-
# Example:
|
|
6
|
-
# Sources::CSV.new(:column1, :column2) # without file option
|
|
7
|
-
#
|
|
8
|
-
class NoCSVFileGiven < StandardError; end
|
|
9
|
-
|
|
10
|
-
# Describes a CSV source, a file with comma separated values in it.
|
|
11
|
-
#
|
|
12
|
-
# The first column is implicitly assumed to be the id column.
|
|
13
|
-
#
|
|
14
|
-
# It takes the same options as the Ruby 1.9 CSV class.
|
|
15
|
-
#
|
|
16
|
-
# Examples:
|
|
17
|
-
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv')
|
|
18
|
-
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', col_sep:';')
|
|
19
|
-
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', row_sep:"\n")
|
|
20
|
-
#
|
|
21
|
-
class CSV < Base
|
|
22
|
-
|
|
23
|
-
# The CSV file's path, relative to PICKY_ROOT.
|
|
24
|
-
#
|
|
25
|
-
attr_reader :file_name
|
|
1
|
+
module Picky
|
|
26
2
|
|
|
27
|
-
|
|
28
|
-
#
|
|
29
|
-
attr_reader :csv_options, :key_format
|
|
3
|
+
module Sources
|
|
30
4
|
|
|
31
|
-
#
|
|
5
|
+
# Describes a CSV source, a file with comma separated values in it.
|
|
6
|
+
#
|
|
7
|
+
# The first column is implicitly assumed to be the id column.
|
|
32
8
|
#
|
|
33
|
-
|
|
9
|
+
# It takes the same options as the Ruby 1.9 CSV class.
|
|
10
|
+
#
|
|
11
|
+
# Examples:
|
|
12
|
+
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv')
|
|
13
|
+
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', col_sep:';')
|
|
14
|
+
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', row_sep:"\n")
|
|
15
|
+
#
|
|
16
|
+
class CSV < Base
|
|
34
17
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
18
|
+
# Raised when a CSV source is instantiated without a file.
|
|
19
|
+
#
|
|
20
|
+
# Example:
|
|
21
|
+
# Sources::CSV.new(:column1, :column2) # without file option
|
|
22
|
+
#
|
|
23
|
+
class NoFileGiven < StandardError; end
|
|
38
24
|
|
|
39
|
-
|
|
40
|
-
|
|
25
|
+
# The CSV file's path, relative to PICKY_ROOT.
|
|
26
|
+
#
|
|
27
|
+
attr_reader :file_name
|
|
41
28
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
29
|
+
# The options that were passed into #new.
|
|
30
|
+
#
|
|
31
|
+
attr_reader :csv_options, :key_format
|
|
45
32
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
parameters << csv_options unless csv_options.empty?
|
|
50
|
-
%Q{#{self.class.name}(#{parameters.join(', ')})}
|
|
51
|
-
end
|
|
33
|
+
# The data category names.
|
|
34
|
+
#
|
|
35
|
+
attr_reader :category_names
|
|
52
36
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
raise NoCSVFileGiven.new(category_names.join(', '))
|
|
57
|
-
end
|
|
37
|
+
def initialize *category_names, options
|
|
38
|
+
require 'csv'
|
|
39
|
+
@category_names = category_names
|
|
58
40
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
41
|
+
@csv_options = Hash === options && options || {}
|
|
42
|
+
@file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
|
|
43
|
+
|
|
44
|
+
key_format = options.delete :key_format
|
|
45
|
+
@key_format = key_format && key_format.to_sym || :to_i
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def to_s
|
|
49
|
+
parameters = category_names
|
|
50
|
+
parameters << { file: file_name }
|
|
51
|
+
parameters << csv_options unless csv_options.empty?
|
|
52
|
+
%Q{#{self.class.name}(#{parameters.join(', ')})}
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Raises a NoCSVFileGiven exception.
|
|
56
|
+
#
|
|
57
|
+
def raise_no_file_given category_names # :nodoc:
|
|
58
|
+
raise NoFileGiven.new(category_names.join(', '))
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Harvests the data to index.
|
|
62
|
+
#
|
|
63
|
+
def harvest category
|
|
64
|
+
index = category_names.index category.from
|
|
65
|
+
get_data do |ary|
|
|
66
|
+
indexed_id = ary.shift
|
|
67
|
+
text = ary[index]
|
|
68
|
+
next unless text
|
|
69
|
+
text.force_encoding 'utf-8' # TODO Still needed?
|
|
70
|
+
yield indexed_id, text
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
#
|
|
75
|
+
#
|
|
76
|
+
def get_data &block # :nodoc:
|
|
77
|
+
::CSV.foreach file_name, csv_options, &block
|
|
69
78
|
end
|
|
70
|
-
end
|
|
71
79
|
|
|
72
|
-
#
|
|
73
|
-
#
|
|
74
|
-
def get_data &block # :nodoc:
|
|
75
|
-
::CSV.foreach file_name, csv_options, &block
|
|
76
80
|
end
|
|
77
81
|
|
|
78
82
|
end
|