stockboy 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +0 -2
- data/.travis.yml +1 -0
- data/CHANGELOG.md +7 -0
- data/README.md +31 -4
- data/lib/stockboy/attribute_map.rb +18 -14
- data/lib/stockboy/candidate_record.rb +12 -13
- data/lib/stockboy/configurator.rb +76 -38
- data/lib/stockboy/filters.rb +9 -0
- data/lib/stockboy/job.rb +28 -5
- data/lib/stockboy/provider.rb +9 -25
- data/lib/stockboy/provider_repeater.rb +64 -0
- data/lib/stockboy/providers/file.rb +7 -6
- data/lib/stockboy/providers/ftp.rb +7 -6
- data/lib/stockboy/providers/http.rb +3 -2
- data/lib/stockboy/providers/imap.rb +7 -5
- data/lib/stockboy/providers/soap.rb +79 -3
- data/lib/stockboy/registry.rb +11 -0
- data/lib/stockboy/translator.rb +1 -0
- data/lib/stockboy/version.rb +1 -1
- data/spec/stockboy/attribute_map_spec.rb +13 -2
- data/spec/stockboy/configurator_spec.rb +21 -0
- data/spec/stockboy/job_spec.rb +44 -19
- data/spec/stockboy/provider_repeater_spec.rb +80 -0
- data/spec/stockboy/provider_spec.rb +22 -2
- data/spec/stockboy/providers/file_spec.rb +7 -7
- data/spec/stockboy/providers/ftp_spec.rb +3 -3
- data/spec/stockboy/providers/http_spec.rb +4 -2
- data/spec/stockboy/providers/imap_spec.rb +2 -2
- data/spec/stockboy/providers/soap_spec.rb +7 -2
- data/stockboy.gemspec +1 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64fb63c74d214447769a32a463ccab05bc92516e
|
4
|
+
data.tar.gz: c79abf6d768765673e66503d5ed5cab1d89dade7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55c4b8106f01b7f822915b4ce2e655c70afa4770a8e42794dea09345326d88a2a16edd00633fad2822a7054b7f58fee81064c00583ef93ccf0eaf849a63814de
|
7
|
+
data.tar.gz: f28bc29494c073b3b5570dc9294e089c25b15e549f8f4db5349dd0df0df310429a06d39e57dc5acf858ca90035f3bb85bb07c02a27adb4b08f6f92202f203799
|
data/.rspec
CHANGED
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 0.7.0 / 2014-03-21
|
4
|
+
|
5
|
+
* [FEATURE] Add individual attribute mappings
|
6
|
+
* [FEATURE] Repeat provider requests to fetch paginated data
|
7
|
+
* [ENHANCEMENT] More configurable SOAP options (@markedmondson)
|
8
|
+
* Removed ActiveModel errors, configuration errors are simple arrays now
|
9
|
+
|
3
10
|
## 0.6.0 / 2014-02-06
|
4
11
|
|
5
12
|
* [FEATURE] Support HTTP basic authentication (@markedmondson)
|
data/README.md
CHANGED
@@ -99,10 +99,11 @@ Writing a job template requires you to declare three parts:
|
|
99
99
|
file_pick :first
|
100
100
|
end
|
101
101
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
102
|
+
repeat do |inputs, provider|
|
103
|
+
0.upto 12 do |m|
|
104
|
+
provider.file_dir = "reports/#{Date.today << 12-m}"
|
105
|
+
inputs << provider
|
106
|
+
end
|
106
107
|
end
|
107
108
|
|
108
109
|
attributes do
|
@@ -130,6 +131,29 @@ data, which is returned as a raw string blob. It can handle some complexity to
|
|
130
131
|
determine which file to pick from an email attachment or FTP directory, for
|
131
132
|
example.
|
132
133
|
|
134
|
+
#### Fetching paginated data
|
135
|
+
|
136
|
+
If the provider requires multiple queries to fetch all the data (e.g. http
|
137
|
+
`?page=1` query params or a list of files), you can use an optional repeat
|
138
|
+
block to specify how to iterate over the data source, and when to stop.
|
139
|
+
Although it's possible to process each page as an individual job, the repeat
|
140
|
+
option is useful when you need to have all the data in hand before making
|
141
|
+
downstream processing decisions such as purging records that are not in the
|
142
|
+
data set.
|
143
|
+
|
144
|
+
repeat do |inputs, http_provider|
|
145
|
+
loop do
|
146
|
+
inputs << http_provider
|
147
|
+
break if http_provider.data.split("\n").size < 100
|
148
|
+
http_provider.query["page"] += 1
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
For each iteration, increment the provider settings and push it onto the list
|
153
|
+
of inputs. (This uses an Enumerator to yield each iteration's data before
|
154
|
+
processing the next one, so it should be memory-efficient for long series of
|
155
|
+
data sets.)
|
156
|
+
|
133
157
|
See: [File][file], [FTP][ftp], [HTTP][http], [IMAP][imap], [SOAP][soap]
|
134
158
|
|
135
159
|
[file]: lib/stockboy/providers/file.rb
|
@@ -209,6 +233,9 @@ so it's a good idea to have default values at the end of the chain.
|
|
209
233
|
* [`:or_zero`][dzer]
|
210
234
|
Returns `0` for blank values
|
211
235
|
|
236
|
+
Attributes can be defined in a block as described, or added
|
237
|
+
individually as `attribute :name`.
|
238
|
+
|
212
239
|
[bool]: lib/stockboy/translators/boolean.rb
|
213
240
|
[date]: lib/stockboy/translators/date.rb
|
214
241
|
[deci]: lib/stockboy/translators/decimal.rb
|
@@ -13,22 +13,11 @@ module Stockboy
|
|
13
13
|
#
|
14
14
|
class DSL
|
15
15
|
def initialize(instance)
|
16
|
-
@
|
17
|
-
@map = @instance.instance_variable_get(:@map)
|
16
|
+
@attribute_map = instance
|
18
17
|
end
|
19
18
|
|
20
19
|
def method_missing(attr, *args)
|
21
|
-
|
22
|
-
to = attr.to_sym
|
23
|
-
from = opts.fetch(:from, attr)
|
24
|
-
from = from.to_s.freeze if from.is_a? Symbol
|
25
|
-
translators = Array(opts[:as]).map { |t| Translations.translator_for(to, t) }
|
26
|
-
@map[attr] = Attribute.new(to, from, translators)
|
27
|
-
define_attribute_method(attr)
|
28
|
-
end
|
29
|
-
|
30
|
-
def define_attribute_method(attr)
|
31
|
-
(class << @instance; self end).send(:define_method, attr) { @map[attr] }
|
20
|
+
@attribute_map.insert(attr, *args)
|
32
21
|
end
|
33
22
|
end
|
34
23
|
|
@@ -40,7 +29,6 @@ module Stockboy
|
|
40
29
|
if block_given?
|
41
30
|
DSL.new(self).instance_eval(&block)
|
42
31
|
end
|
43
|
-
freeze
|
44
32
|
end
|
45
33
|
|
46
34
|
# Retrieve an attribute by symbolic name
|
@@ -52,6 +40,22 @@ module Stockboy
|
|
52
40
|
@map[key]
|
53
41
|
end
|
54
42
|
|
43
|
+
# Add or replace a mapped attribute
|
44
|
+
#
|
45
|
+
# @param [Symbol] key Name of the output attribute
|
46
|
+
# @param [Hash] opts
|
47
|
+
# @option opts [String] from Name of input field from reader
|
48
|
+
# @option opts [Array,Proc,Translator] as One or more translators
|
49
|
+
#
|
50
|
+
def insert(key, opts={})
|
51
|
+
to = key.to_sym
|
52
|
+
from = opts.fetch(:from, key)
|
53
|
+
from = from.to_s.freeze if from.is_a? Symbol
|
54
|
+
translators = Array(opts[:as]).map { |t| Translations.translator_for(to, t) }
|
55
|
+
define_singleton_method(key) { @map[key] }
|
56
|
+
@map[key] = Attribute.new(to, from, translators)
|
57
|
+
end
|
58
|
+
|
55
59
|
# Fetch the attribute corresponding to the source field name
|
56
60
|
#
|
57
61
|
# @param [String] key
|
@@ -97,21 +97,20 @@ module Stockboy
|
|
97
97
|
private
|
98
98
|
|
99
99
|
def translate(col)
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
100
|
+
@tr_table.fetch(col.to) do |key|
|
101
|
+
return @tr_table[key] = sanitize(@table[col.from]) if col.translators.empty?
|
102
|
+
fields = raw_hash
|
103
|
+
tr_input = col.translators.reduce(input) do |value, tr|
|
104
|
+
begin
|
105
|
+
fields[col.to] = tr[value]
|
106
|
+
SourceRecord.new(fields, @table)
|
107
|
+
rescue
|
108
|
+
fields[col.to] = nil
|
109
|
+
break SourceRecord.new(fields, @table)
|
110
|
+
end
|
109
111
|
end
|
110
|
-
|
111
|
-
fields[col.to] = new_value
|
112
|
-
SourceRecord.new(fields, @table)
|
112
|
+
@tr_table[col.to] = tr_input.public_send(col.to)
|
113
113
|
end
|
114
|
-
@tr_table[col.to] = translated.public_send(col.to)
|
115
114
|
end
|
116
115
|
|
117
116
|
# Clean output values that are a subclass of a standard type
|
@@ -40,11 +40,11 @@ module Stockboy
|
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
|
-
#
|
43
|
+
# Configure the provider for fetching data
|
44
44
|
#
|
45
45
|
# The optional block is evaluated in the provider's own DSL context.
|
46
46
|
#
|
47
|
-
# @param [Symbol, Class, Provider]
|
47
|
+
# @param [Symbol, Class, Provider] key
|
48
48
|
# The registered symbol name for the provider, or actual provider
|
49
49
|
# @param [Hash] opts
|
50
50
|
# Provider-specific options passed to the provider initializer
|
@@ -56,24 +56,51 @@ module Stockboy
|
|
56
56
|
#
|
57
57
|
# @return [Provider]
|
58
58
|
#
|
59
|
-
def provider(
|
60
|
-
|
59
|
+
def provider(key, opts={}, &block)
|
60
|
+
@config[:provider] = Providers.build(key, opts, block)
|
61
|
+
end
|
62
|
+
alias_method :connection, :provider
|
61
63
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
64
|
+
# Configure repeating the provider for fetching multiple parts
|
65
|
+
#
|
66
|
+
# If the provider needs to give us all the data as a series of requests,
|
67
|
+
# for example multiple HTTP pages or FTP files, the repeat block can be
|
68
|
+
# used to define the iteration for fetching each item.
|
69
|
+
#
|
70
|
+
# The `<<` interface used here is defined by Ruby's Enumerator.new block
|
71
|
+
# syntax. For each page that needs to be fetched, the provider options need
|
72
|
+
# to be altered and pushed on to the output. Control will be yielded to the
|
73
|
+
# reader at each iteration.
|
74
|
+
#
|
75
|
+
# @example
|
76
|
+
# repeat do |output, provider|
|
77
|
+
# loop do
|
78
|
+
# output << provider
|
79
|
+
# break if provider.data.split("\n").size < 100
|
80
|
+
# provider.query_params["page"] += 1
|
81
|
+
# end
|
82
|
+
# end
|
83
|
+
#
|
84
|
+
# @example
|
85
|
+
# repeat do |output, provider|
|
86
|
+
# 1.upto 10 do |i|
|
87
|
+
# provider.file_name = "example-#{i}.log"
|
88
|
+
# output << provider
|
89
|
+
# end
|
90
|
+
# end
|
91
|
+
#
|
92
|
+
def repeat(&block)
|
93
|
+
unless block_given? && block.arity == 2
|
94
|
+
raise ArgumentError, "repeat block must accept |output, provider| arguments"
|
69
95
|
end
|
96
|
+
|
97
|
+
@config[:repeat] = block
|
70
98
|
end
|
71
|
-
alias_method :connection, :provider
|
72
99
|
|
73
|
-
#
|
100
|
+
# Configure the reader for parsing data
|
74
101
|
#
|
75
|
-
# @param [Symbol, Class, Reader]
|
76
|
-
# The registered symbol name for the reader, or actual reader
|
102
|
+
# @param [Symbol, Class, Reader] key
|
103
|
+
# The registered symbol name for the reader, or actual reader instance
|
77
104
|
# @param [Hash] opts
|
78
105
|
# Provider-specific options passed to the provider initializer
|
79
106
|
#
|
@@ -84,21 +111,14 @@ module Stockboy
|
|
84
111
|
#
|
85
112
|
# @return [Reader]
|
86
113
|
#
|
87
|
-
def reader(
|
88
|
-
|
89
|
-
|
90
|
-
@config[:reader] = case reader_class
|
91
|
-
when Symbol
|
92
|
-
Readers.find(reader_class).new(opts, &block)
|
93
|
-
when Class
|
94
|
-
reader_class.new(opts, &block)
|
95
|
-
else
|
96
|
-
reader_class
|
97
|
-
end
|
114
|
+
def reader(key, opts={}, &block)
|
115
|
+
@config[:reader] = Readers.build(key, opts, block)
|
98
116
|
end
|
99
117
|
alias_method :format, :reader
|
100
118
|
|
101
|
-
#
|
119
|
+
# Configure the attribute map for data records
|
120
|
+
#
|
121
|
+
# This will replace any existing attributes with a new set.
|
102
122
|
#
|
103
123
|
# @example
|
104
124
|
# attributes do
|
@@ -113,7 +133,20 @@ module Stockboy
|
|
113
133
|
@config[:attributes] = AttributeMap.new(&block)
|
114
134
|
end
|
115
135
|
|
116
|
-
#
|
136
|
+
# Add individual attribute mapping rules
|
137
|
+
#
|
138
|
+
# @param [Symbol] key Name of the output attribute
|
139
|
+
# @param [Hash] opts
|
140
|
+
# @option opts [String] from Name of input field from reader
|
141
|
+
# @option opts [Array,Proc,Translator] as One or more translators
|
142
|
+
#
|
143
|
+
#
|
144
|
+
def attribute(key, opts={})
|
145
|
+
@config[:attributes] ||= AttributeMap.new
|
146
|
+
@config[:attributes].insert(key, opts)
|
147
|
+
end
|
148
|
+
|
149
|
+
# Add a filter to the processing filter chain
|
117
150
|
#
|
118
151
|
# * Must be called with either a callable argument (proc) or a block.
|
119
152
|
# * Must be called in the order that filters should be applied.
|
@@ -129,17 +162,10 @@ module Stockboy
|
|
129
162
|
# filter :update, proc{ true } # capture all remaining items
|
130
163
|
#
|
131
164
|
def filter(key, callable=nil, *args, &block)
|
132
|
-
|
133
|
-
if callable.is_a?(Symbol)
|
134
|
-
callable = Filters.find(callable)
|
135
|
-
callable = callable.new(*args) if callable.is_a? Class
|
136
|
-
end
|
137
|
-
raise ArgumentError unless callable.respond_to?(:call) ^ block_given?
|
138
|
-
|
139
|
-
@config[:filters][key] = block || callable
|
165
|
+
@config[:filters][key] = block || Filters.build(callable, args)
|
140
166
|
end
|
141
167
|
|
142
|
-
#
|
168
|
+
# Register a trigger to notify the job of external events
|
143
169
|
#
|
144
170
|
# Useful for adding generic control over the job's resources from your app.
|
145
171
|
# For example, if you need to record stats or clean up data after your
|
@@ -162,6 +188,7 @@ module Stockboy
|
|
162
188
|
#
|
163
189
|
def on(key, &block)
|
164
190
|
raise(ArgumentError, "no block given") unless block_given?
|
191
|
+
|
165
192
|
@config[:triggers][key] << block
|
166
193
|
end
|
167
194
|
|
@@ -170,7 +197,18 @@ module Stockboy
|
|
170
197
|
# @return [Job]
|
171
198
|
#
|
172
199
|
def to_job
|
173
|
-
Job.new(
|
200
|
+
Job.new(config_for_job)
|
201
|
+
end
|
202
|
+
|
203
|
+
private
|
204
|
+
|
205
|
+
def config_for_job
|
206
|
+
config.dup.tap { |config| wrap_provider(config) }
|
207
|
+
end
|
208
|
+
|
209
|
+
def wrap_provider(config)
|
210
|
+
return unless (repeat = config.delete(:repeat))
|
211
|
+
config[:provider] = ProviderRepeater.new(config[:provider], &repeat)
|
174
212
|
end
|
175
213
|
|
176
214
|
end
|
data/lib/stockboy/filters.rb
CHANGED
@@ -6,6 +6,15 @@ module Stockboy
|
|
6
6
|
#
|
7
7
|
module Filters
|
8
8
|
extend Stockboy::Registry
|
9
|
+
|
10
|
+
def self.build(callable, args)
|
11
|
+
if callable.is_a?(Symbol)
|
12
|
+
callable = find(callable)
|
13
|
+
callable = callable.new(*args) if callable.is_a? Class
|
14
|
+
end
|
15
|
+
callable
|
16
|
+
end
|
17
|
+
|
9
18
|
end
|
10
19
|
|
11
20
|
end
|
data/lib/stockboy/job.rb
CHANGED
@@ -109,7 +109,6 @@ module Stockboy
|
|
109
109
|
# @return [Boolean] Success or failure
|
110
110
|
#
|
111
111
|
def process
|
112
|
-
reset
|
113
112
|
with_query_caching do
|
114
113
|
load_records
|
115
114
|
yield @records if block_given?
|
@@ -208,17 +207,41 @@ module Stockboy
|
|
208
207
|
end
|
209
208
|
|
210
209
|
def load_records
|
211
|
-
|
210
|
+
reset
|
211
|
+
load_all_records
|
212
|
+
partition_all_records
|
213
|
+
@processed = true
|
214
|
+
end
|
212
215
|
|
213
|
-
|
214
|
-
|
216
|
+
def load_all_records
|
217
|
+
each_reader_row do |row|
|
218
|
+
@all_records << CandidateRecord.new(row, @attributes)
|
215
219
|
end
|
220
|
+
end
|
216
221
|
|
222
|
+
def partition_all_records
|
217
223
|
@all_records.each do |record|
|
218
224
|
record_partition(record) << record
|
219
225
|
end
|
226
|
+
end
|
220
227
|
|
221
|
-
|
228
|
+
def each_reader_row
|
229
|
+
return to_enum(__method__) unless block_given?
|
230
|
+
with_provider_data do |data|
|
231
|
+
reader.parse(data).each do |row|
|
232
|
+
yield row
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def with_provider_data
|
238
|
+
return to_enum(__method__) unless block_given?
|
239
|
+
yielded = nil
|
240
|
+
provider.data do |data|
|
241
|
+
yielded = true
|
242
|
+
yield data
|
243
|
+
end
|
244
|
+
yield provider.data unless yielded
|
222
245
|
end
|
223
246
|
|
224
247
|
def record_partition(record)
|
data/lib/stockboy/provider.rb
CHANGED
@@ -1,6 +1,4 @@
|
|
1
1
|
require 'logger'
|
2
|
-
require 'active_model/errors'
|
3
|
-
require 'active_model/naming'
|
4
2
|
require 'stockboy/dsl'
|
5
3
|
require 'stockboy/exceptions'
|
6
4
|
|
@@ -34,7 +32,6 @@ module Stockboy
|
|
34
32
|
#
|
35
33
|
class Provider
|
36
34
|
extend Stockboy::DSL
|
37
|
-
extend ActiveModel::Naming # Required by ActiveModel::Errors
|
38
35
|
|
39
36
|
# Default logger if none is provided to the instance
|
40
37
|
#
|
@@ -48,7 +45,7 @@ module Stockboy
|
|
48
45
|
#
|
49
46
|
attr_accessor :logger
|
50
47
|
|
51
|
-
# @return [
|
48
|
+
# @return [Array]
|
52
49
|
#
|
53
50
|
attr_reader :errors
|
54
51
|
|
@@ -61,7 +58,9 @@ module Stockboy
|
|
61
58
|
# @return [String]
|
62
59
|
#
|
63
60
|
def inspect
|
64
|
-
|
61
|
+
"#<#{self.class}:#{self.object_id} "\
|
62
|
+
"data_size=#{@data_size.inspect} "\
|
63
|
+
"errors=[#{errors.join(", ")}]>"
|
65
64
|
end
|
66
65
|
|
67
66
|
# Must be called by subclasses via +super+ to set up dependencies
|
@@ -79,8 +78,8 @@ module Stockboy
|
|
79
78
|
# @!attribute [r] data
|
80
79
|
#
|
81
80
|
def data
|
82
|
-
|
83
|
-
|
81
|
+
fetch_data if @data.nil? && validate_config?
|
82
|
+
yield @data if block_given?
|
84
83
|
@data
|
85
84
|
end
|
86
85
|
|
@@ -92,7 +91,7 @@ module Stockboy
|
|
92
91
|
@data = nil
|
93
92
|
@data_time = nil
|
94
93
|
@data_size = nil
|
95
|
-
@errors =
|
94
|
+
@errors = []
|
96
95
|
true
|
97
96
|
end
|
98
97
|
alias_method :reset, :clear
|
@@ -125,7 +124,7 @@ module Stockboy
|
|
125
124
|
raise NoMethodError, "#{self.class}#fetch_data needs implementation"
|
126
125
|
end
|
127
126
|
|
128
|
-
# Use errors
|
127
|
+
# Use errors << "'option' is required"
|
129
128
|
# for validating required provider parameters before attempting
|
130
129
|
# to make connections and retrieve data.
|
131
130
|
#
|
@@ -138,27 +137,12 @@ module Stockboy
|
|
138
137
|
def validate_config?
|
139
138
|
unless validation = valid?
|
140
139
|
logger.error do
|
141
|
-
"Invalid #{self.class} provider configuration: #{errors.
|
140
|
+
"Invalid #{self.class} provider configuration: #{errors.join(', ')}"
|
142
141
|
end
|
143
142
|
end
|
144
143
|
validation
|
145
144
|
end
|
146
145
|
|
147
|
-
# Required by ActiveModel::Errors
|
148
|
-
def read_attribute_for_validation(attr)
|
149
|
-
send(attr)
|
150
|
-
end
|
151
|
-
|
152
|
-
# Required by ActiveModel::Errors
|
153
|
-
def self.human_attribute_name(attr, options = {})
|
154
|
-
attr
|
155
|
-
end
|
156
|
-
|
157
|
-
# Required by ActiveModel::Errors
|
158
|
-
def self.lookup_ancestors
|
159
|
-
[self]
|
160
|
-
end
|
161
|
-
|
162
146
|
# When picking files from a list you can supply +:first+ or +:last+ to the
|
163
147
|
# provider's +pick+ option, or else a block that can reduce to a single
|
164
148
|
# value, like:
|