stockboy 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +0 -2
- data/.travis.yml +1 -0
- data/CHANGELOG.md +7 -0
- data/README.md +31 -4
- data/lib/stockboy/attribute_map.rb +18 -14
- data/lib/stockboy/candidate_record.rb +12 -13
- data/lib/stockboy/configurator.rb +76 -38
- data/lib/stockboy/filters.rb +9 -0
- data/lib/stockboy/job.rb +28 -5
- data/lib/stockboy/provider.rb +9 -25
- data/lib/stockboy/provider_repeater.rb +64 -0
- data/lib/stockboy/providers/file.rb +7 -6
- data/lib/stockboy/providers/ftp.rb +7 -6
- data/lib/stockboy/providers/http.rb +3 -2
- data/lib/stockboy/providers/imap.rb +7 -5
- data/lib/stockboy/providers/soap.rb +79 -3
- data/lib/stockboy/registry.rb +11 -0
- data/lib/stockboy/translator.rb +1 -0
- data/lib/stockboy/version.rb +1 -1
- data/spec/stockboy/attribute_map_spec.rb +13 -2
- data/spec/stockboy/configurator_spec.rb +21 -0
- data/spec/stockboy/job_spec.rb +44 -19
- data/spec/stockboy/provider_repeater_spec.rb +80 -0
- data/spec/stockboy/provider_spec.rb +22 -2
- data/spec/stockboy/providers/file_spec.rb +7 -7
- data/spec/stockboy/providers/ftp_spec.rb +3 -3
- data/spec/stockboy/providers/http_spec.rb +4 -2
- data/spec/stockboy/providers/imap_spec.rb +2 -2
- data/spec/stockboy/providers/soap_spec.rb +7 -2
- data/stockboy.gemspec +1 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64fb63c74d214447769a32a463ccab05bc92516e
|
4
|
+
data.tar.gz: c79abf6d768765673e66503d5ed5cab1d89dade7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55c4b8106f01b7f822915b4ce2e655c70afa4770a8e42794dea09345326d88a2a16edd00633fad2822a7054b7f58fee81064c00583ef93ccf0eaf849a63814de
|
7
|
+
data.tar.gz: f28bc29494c073b3b5570dc9294e089c25b15e549f8f4db5349dd0df0df310429a06d39e57dc5acf858ca90035f3bb85bb07c02a27adb4b08f6f92202f203799
|
data/.rspec
CHANGED
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 0.7.0 / 2014-03-21
|
4
|
+
|
5
|
+
* [FEATURE] Add individual attribute mappings
|
6
|
+
* [FEATURE] Repeat provider requests to fetch paginated data
|
7
|
+
* [ENHANCEMENT] More configurable SOAP options (@markedmondson)
|
8
|
+
* Removed ActiveModel errors, configuration errors are simple arrays now
|
9
|
+
|
3
10
|
## 0.6.0 / 2014-02-06
|
4
11
|
|
5
12
|
* [FEATURE] Support HTTP basic authentication (@markedmondson)
|
data/README.md
CHANGED
@@ -99,10 +99,11 @@ Writing a job template requires you to declare three parts:
|
|
99
99
|
file_pick :first
|
100
100
|
end
|
101
101
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
102
|
+
repeat do |inputs, provider|
|
103
|
+
0.upto 12 do |m|
|
104
|
+
provider.file_dir = "reports/#{Date.today << 12-m}"
|
105
|
+
inputs << provider
|
106
|
+
end
|
106
107
|
end
|
107
108
|
|
108
109
|
attributes do
|
@@ -130,6 +131,29 @@ data, which is returned as a raw string blob. It can handle some complexity to
|
|
130
131
|
determine which file to pick from an email attachment or FTP directory, for
|
131
132
|
example.
|
132
133
|
|
134
|
+
#### Fetching paginated data
|
135
|
+
|
136
|
+
If the provider requires multiple queries to fetch all the data (e.g. http
|
137
|
+
`?page=1` query params or a list of files), you can use an optional repeat
|
138
|
+
block to specify how to iterate over the data source, and when to stop.
|
139
|
+
Although it's possible to process each page as an individual job, the repeat
|
140
|
+
option is useful when you need to have all the data in hand before making
|
141
|
+
downstream processing decisions such as purging records that are not in the
|
142
|
+
data set.
|
143
|
+
|
144
|
+
repeat do |inputs, http_provider|
|
145
|
+
loop do
|
146
|
+
inputs << http_provider
|
147
|
+
break if http_provider.data.split("\n").size < 100
|
148
|
+
http_provider.query["page"] += 1
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
For each iteration, increment the provider settings and push it onto the list
|
153
|
+
of inputs. (This uses an Enumerator to yield each iteration's data before
|
154
|
+
processing the next one, so it should be memory-efficient for long series of
|
155
|
+
data sets.)
|
156
|
+
|
133
157
|
See: [File][file], [FTP][ftp], [HTTP][http], [IMAP][imap], [SOAP][soap]
|
134
158
|
|
135
159
|
[file]: lib/stockboy/providers/file.rb
|
@@ -209,6 +233,9 @@ so it's a good idea to have default values at the end of the chain.
|
|
209
233
|
* [`:or_zero`][dzer]
|
210
234
|
Returns `0` for blank values
|
211
235
|
|
236
|
+
Attributes can be defined in a block as described, or added
|
237
|
+
individually as `attribute :name`.
|
238
|
+
|
212
239
|
[bool]: lib/stockboy/translators/boolean.rb
|
213
240
|
[date]: lib/stockboy/translators/date.rb
|
214
241
|
[deci]: lib/stockboy/translators/decimal.rb
|
@@ -13,22 +13,11 @@ module Stockboy
|
|
13
13
|
#
|
14
14
|
class DSL
|
15
15
|
def initialize(instance)
|
16
|
-
@
|
17
|
-
@map = @instance.instance_variable_get(:@map)
|
16
|
+
@attribute_map = instance
|
18
17
|
end
|
19
18
|
|
20
19
|
def method_missing(attr, *args)
|
21
|
-
|
22
|
-
to = attr.to_sym
|
23
|
-
from = opts.fetch(:from, attr)
|
24
|
-
from = from.to_s.freeze if from.is_a? Symbol
|
25
|
-
translators = Array(opts[:as]).map { |t| Translations.translator_for(to, t) }
|
26
|
-
@map[attr] = Attribute.new(to, from, translators)
|
27
|
-
define_attribute_method(attr)
|
28
|
-
end
|
29
|
-
|
30
|
-
def define_attribute_method(attr)
|
31
|
-
(class << @instance; self end).send(:define_method, attr) { @map[attr] }
|
20
|
+
@attribute_map.insert(attr, *args)
|
32
21
|
end
|
33
22
|
end
|
34
23
|
|
@@ -40,7 +29,6 @@ module Stockboy
|
|
40
29
|
if block_given?
|
41
30
|
DSL.new(self).instance_eval(&block)
|
42
31
|
end
|
43
|
-
freeze
|
44
32
|
end
|
45
33
|
|
46
34
|
# Retrieve an attribute by symbolic name
|
@@ -52,6 +40,22 @@ module Stockboy
|
|
52
40
|
@map[key]
|
53
41
|
end
|
54
42
|
|
43
|
+
# Add or replace a mapped attribute
|
44
|
+
#
|
45
|
+
# @param [Symbol] key Name of the output attribute
|
46
|
+
# @param [Hash] opts
|
47
|
+
# @option opts [String] from Name of input field from reader
|
48
|
+
# @option opts [Array,Proc,Translator] as One or more translators
|
49
|
+
#
|
50
|
+
def insert(key, opts={})
|
51
|
+
to = key.to_sym
|
52
|
+
from = opts.fetch(:from, key)
|
53
|
+
from = from.to_s.freeze if from.is_a? Symbol
|
54
|
+
translators = Array(opts[:as]).map { |t| Translations.translator_for(to, t) }
|
55
|
+
define_singleton_method(key) { @map[key] }
|
56
|
+
@map[key] = Attribute.new(to, from, translators)
|
57
|
+
end
|
58
|
+
|
55
59
|
# Fetch the attribute corresponding to the source field name
|
56
60
|
#
|
57
61
|
# @param [String] key
|
@@ -97,21 +97,20 @@ module Stockboy
|
|
97
97
|
private
|
98
98
|
|
99
99
|
def translate(col)
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
100
|
+
@tr_table.fetch(col.to) do |key|
|
101
|
+
return @tr_table[key] = sanitize(@table[col.from]) if col.translators.empty?
|
102
|
+
fields = raw_hash
|
103
|
+
tr_input = col.translators.reduce(input) do |value, tr|
|
104
|
+
begin
|
105
|
+
fields[col.to] = tr[value]
|
106
|
+
SourceRecord.new(fields, @table)
|
107
|
+
rescue
|
108
|
+
fields[col.to] = nil
|
109
|
+
break SourceRecord.new(fields, @table)
|
110
|
+
end
|
109
111
|
end
|
110
|
-
|
111
|
-
fields[col.to] = new_value
|
112
|
-
SourceRecord.new(fields, @table)
|
112
|
+
@tr_table[col.to] = tr_input.public_send(col.to)
|
113
113
|
end
|
114
|
-
@tr_table[col.to] = translated.public_send(col.to)
|
115
114
|
end
|
116
115
|
|
117
116
|
# Clean output values that are a subclass of a standard type
|
@@ -40,11 +40,11 @@ module Stockboy
|
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
|
-
#
|
43
|
+
# Configure the provider for fetching data
|
44
44
|
#
|
45
45
|
# The optional block is evaluated in the provider's own DSL context.
|
46
46
|
#
|
47
|
-
# @param [Symbol, Class, Provider]
|
47
|
+
# @param [Symbol, Class, Provider] key
|
48
48
|
# The registered symbol name for the provider, or actual provider
|
49
49
|
# @param [Hash] opts
|
50
50
|
# Provider-specific options passed to the provider initializer
|
@@ -56,24 +56,51 @@ module Stockboy
|
|
56
56
|
#
|
57
57
|
# @return [Provider]
|
58
58
|
#
|
59
|
-
def provider(
|
60
|
-
|
59
|
+
def provider(key, opts={}, &block)
|
60
|
+
@config[:provider] = Providers.build(key, opts, block)
|
61
|
+
end
|
62
|
+
alias_method :connection, :provider
|
61
63
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
64
|
+
# Configure repeating the provider for fetching multiple parts
|
65
|
+
#
|
66
|
+
# If the provider needs to give us all the data as a series of requests,
|
67
|
+
# for example multiple HTTP pages or FTP files, the repeat block can be
|
68
|
+
# used to define the iteration for fetching each item.
|
69
|
+
#
|
70
|
+
# The `<<` interface used here is defined by Ruby's Enumerator.new block
|
71
|
+
# syntax. For each page that needs to be fetched, the provider options need
|
72
|
+
# to be altered and pushed on to the output. Control will be yielded to the
|
73
|
+
# reader at each iteration.
|
74
|
+
#
|
75
|
+
# @example
|
76
|
+
# repeat do |output, provider|
|
77
|
+
# loop do
|
78
|
+
# output << provider
|
79
|
+
# break if provider.data.split("\n").size < 100
|
80
|
+
# provider.query_params["page"] += 1
|
81
|
+
# end
|
82
|
+
# end
|
83
|
+
#
|
84
|
+
# @example
|
85
|
+
# repeat do |output, provider|
|
86
|
+
# 1.upto 10 do |i|
|
87
|
+
# provider.file_name = "example-#{i}.log"
|
88
|
+
# output << provider
|
89
|
+
# end
|
90
|
+
# end
|
91
|
+
#
|
92
|
+
def repeat(&block)
|
93
|
+
unless block_given? && block.arity == 2
|
94
|
+
raise ArgumentError, "repeat block must accept |output, provider| arguments"
|
69
95
|
end
|
96
|
+
|
97
|
+
@config[:repeat] = block
|
70
98
|
end
|
71
|
-
alias_method :connection, :provider
|
72
99
|
|
73
|
-
#
|
100
|
+
# Configure the reader for parsing data
|
74
101
|
#
|
75
|
-
# @param [Symbol, Class, Reader]
|
76
|
-
# The registered symbol name for the reader, or actual reader
|
102
|
+
# @param [Symbol, Class, Reader] key
|
103
|
+
# The registered symbol name for the reader, or actual reader instance
|
77
104
|
# @param [Hash] opts
|
78
105
|
# Provider-specific options passed to the provider initializer
|
79
106
|
#
|
@@ -84,21 +111,14 @@ module Stockboy
|
|
84
111
|
#
|
85
112
|
# @return [Reader]
|
86
113
|
#
|
87
|
-
def reader(
|
88
|
-
|
89
|
-
|
90
|
-
@config[:reader] = case reader_class
|
91
|
-
when Symbol
|
92
|
-
Readers.find(reader_class).new(opts, &block)
|
93
|
-
when Class
|
94
|
-
reader_class.new(opts, &block)
|
95
|
-
else
|
96
|
-
reader_class
|
97
|
-
end
|
114
|
+
def reader(key, opts={}, &block)
|
115
|
+
@config[:reader] = Readers.build(key, opts, block)
|
98
116
|
end
|
99
117
|
alias_method :format, :reader
|
100
118
|
|
101
|
-
#
|
119
|
+
# Configure the attribute map for data records
|
120
|
+
#
|
121
|
+
# This will replace any existing attributes with a new set.
|
102
122
|
#
|
103
123
|
# @example
|
104
124
|
# attributes do
|
@@ -113,7 +133,20 @@ module Stockboy
|
|
113
133
|
@config[:attributes] = AttributeMap.new(&block)
|
114
134
|
end
|
115
135
|
|
116
|
-
#
|
136
|
+
# Add individual attribute mapping rules
|
137
|
+
#
|
138
|
+
# @param [Symbol] key Name of the output attribute
|
139
|
+
# @param [Hash] opts
|
140
|
+
# @option opts [String] from Name of input field from reader
|
141
|
+
# @option opts [Array,Proc,Translator] as One or more translators
|
142
|
+
#
|
143
|
+
#
|
144
|
+
def attribute(key, opts={})
|
145
|
+
@config[:attributes] ||= AttributeMap.new
|
146
|
+
@config[:attributes].insert(key, opts)
|
147
|
+
end
|
148
|
+
|
149
|
+
# Add a filter to the processing filter chain
|
117
150
|
#
|
118
151
|
# * Must be called with either a callable argument (proc) or a block.
|
119
152
|
# * Must be called in the order that filters should be applied.
|
@@ -129,17 +162,10 @@ module Stockboy
|
|
129
162
|
# filter :update, proc{ true } # capture all remaining items
|
130
163
|
#
|
131
164
|
def filter(key, callable=nil, *args, &block)
|
132
|
-
|
133
|
-
if callable.is_a?(Symbol)
|
134
|
-
callable = Filters.find(callable)
|
135
|
-
callable = callable.new(*args) if callable.is_a? Class
|
136
|
-
end
|
137
|
-
raise ArgumentError unless callable.respond_to?(:call) ^ block_given?
|
138
|
-
|
139
|
-
@config[:filters][key] = block || callable
|
165
|
+
@config[:filters][key] = block || Filters.build(callable, args)
|
140
166
|
end
|
141
167
|
|
142
|
-
#
|
168
|
+
# Register a trigger to notify the job of external events
|
143
169
|
#
|
144
170
|
# Useful for adding generic control over the job's resources from your app.
|
145
171
|
# For example, if you need to record stats or clean up data after your
|
@@ -162,6 +188,7 @@ module Stockboy
|
|
162
188
|
#
|
163
189
|
def on(key, &block)
|
164
190
|
raise(ArgumentError, "no block given") unless block_given?
|
191
|
+
|
165
192
|
@config[:triggers][key] << block
|
166
193
|
end
|
167
194
|
|
@@ -170,7 +197,18 @@ module Stockboy
|
|
170
197
|
# @return [Job]
|
171
198
|
#
|
172
199
|
def to_job
|
173
|
-
Job.new(
|
200
|
+
Job.new(config_for_job)
|
201
|
+
end
|
202
|
+
|
203
|
+
private
|
204
|
+
|
205
|
+
def config_for_job
|
206
|
+
config.dup.tap { |config| wrap_provider(config) }
|
207
|
+
end
|
208
|
+
|
209
|
+
def wrap_provider(config)
|
210
|
+
return unless (repeat = config.delete(:repeat))
|
211
|
+
config[:provider] = ProviderRepeater.new(config[:provider], &repeat)
|
174
212
|
end
|
175
213
|
|
176
214
|
end
|
data/lib/stockboy/filters.rb
CHANGED
@@ -6,6 +6,15 @@ module Stockboy
|
|
6
6
|
#
|
7
7
|
module Filters
|
8
8
|
extend Stockboy::Registry
|
9
|
+
|
10
|
+
def self.build(callable, args)
|
11
|
+
if callable.is_a?(Symbol)
|
12
|
+
callable = find(callable)
|
13
|
+
callable = callable.new(*args) if callable.is_a? Class
|
14
|
+
end
|
15
|
+
callable
|
16
|
+
end
|
17
|
+
|
9
18
|
end
|
10
19
|
|
11
20
|
end
|
data/lib/stockboy/job.rb
CHANGED
@@ -109,7 +109,6 @@ module Stockboy
|
|
109
109
|
# @return [Boolean] Success or failure
|
110
110
|
#
|
111
111
|
def process
|
112
|
-
reset
|
113
112
|
with_query_caching do
|
114
113
|
load_records
|
115
114
|
yield @records if block_given?
|
@@ -208,17 +207,41 @@ module Stockboy
|
|
208
207
|
end
|
209
208
|
|
210
209
|
def load_records
|
211
|
-
|
210
|
+
reset
|
211
|
+
load_all_records
|
212
|
+
partition_all_records
|
213
|
+
@processed = true
|
214
|
+
end
|
212
215
|
|
213
|
-
|
214
|
-
|
216
|
+
def load_all_records
|
217
|
+
each_reader_row do |row|
|
218
|
+
@all_records << CandidateRecord.new(row, @attributes)
|
215
219
|
end
|
220
|
+
end
|
216
221
|
|
222
|
+
def partition_all_records
|
217
223
|
@all_records.each do |record|
|
218
224
|
record_partition(record) << record
|
219
225
|
end
|
226
|
+
end
|
220
227
|
|
221
|
-
|
228
|
+
def each_reader_row
|
229
|
+
return to_enum(__method__) unless block_given?
|
230
|
+
with_provider_data do |data|
|
231
|
+
reader.parse(data).each do |row|
|
232
|
+
yield row
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def with_provider_data
|
238
|
+
return to_enum(__method__) unless block_given?
|
239
|
+
yielded = nil
|
240
|
+
provider.data do |data|
|
241
|
+
yielded = true
|
242
|
+
yield data
|
243
|
+
end
|
244
|
+
yield provider.data unless yielded
|
222
245
|
end
|
223
246
|
|
224
247
|
def record_partition(record)
|
data/lib/stockboy/provider.rb
CHANGED
@@ -1,6 +1,4 @@
|
|
1
1
|
require 'logger'
|
2
|
-
require 'active_model/errors'
|
3
|
-
require 'active_model/naming'
|
4
2
|
require 'stockboy/dsl'
|
5
3
|
require 'stockboy/exceptions'
|
6
4
|
|
@@ -34,7 +32,6 @@ module Stockboy
|
|
34
32
|
#
|
35
33
|
class Provider
|
36
34
|
extend Stockboy::DSL
|
37
|
-
extend ActiveModel::Naming # Required by ActiveModel::Errors
|
38
35
|
|
39
36
|
# Default logger if none is provided to the instance
|
40
37
|
#
|
@@ -48,7 +45,7 @@ module Stockboy
|
|
48
45
|
#
|
49
46
|
attr_accessor :logger
|
50
47
|
|
51
|
-
# @return [
|
48
|
+
# @return [Array]
|
52
49
|
#
|
53
50
|
attr_reader :errors
|
54
51
|
|
@@ -61,7 +58,9 @@ module Stockboy
|
|
61
58
|
# @return [String]
|
62
59
|
#
|
63
60
|
def inspect
|
64
|
-
|
61
|
+
"#<#{self.class}:#{self.object_id} "\
|
62
|
+
"data_size=#{@data_size.inspect} "\
|
63
|
+
"errors=[#{errors.join(", ")}]>"
|
65
64
|
end
|
66
65
|
|
67
66
|
# Must be called by subclasses via +super+ to set up dependencies
|
@@ -79,8 +78,8 @@ module Stockboy
|
|
79
78
|
# @!attribute [r] data
|
80
79
|
#
|
81
80
|
def data
|
82
|
-
|
83
|
-
|
81
|
+
fetch_data if @data.nil? && validate_config?
|
82
|
+
yield @data if block_given?
|
84
83
|
@data
|
85
84
|
end
|
86
85
|
|
@@ -92,7 +91,7 @@ module Stockboy
|
|
92
91
|
@data = nil
|
93
92
|
@data_time = nil
|
94
93
|
@data_size = nil
|
95
|
-
@errors =
|
94
|
+
@errors = []
|
96
95
|
true
|
97
96
|
end
|
98
97
|
alias_method :reset, :clear
|
@@ -125,7 +124,7 @@ module Stockboy
|
|
125
124
|
raise NoMethodError, "#{self.class}#fetch_data needs implementation"
|
126
125
|
end
|
127
126
|
|
128
|
-
# Use errors
|
127
|
+
# Use errors << "'option' is required"
|
129
128
|
# for validating required provider parameters before attempting
|
130
129
|
# to make connections and retrieve data.
|
131
130
|
#
|
@@ -138,27 +137,12 @@ module Stockboy
|
|
138
137
|
def validate_config?
|
139
138
|
unless validation = valid?
|
140
139
|
logger.error do
|
141
|
-
"Invalid #{self.class} provider configuration: #{errors.
|
140
|
+
"Invalid #{self.class} provider configuration: #{errors.join(', ')}"
|
142
141
|
end
|
143
142
|
end
|
144
143
|
validation
|
145
144
|
end
|
146
145
|
|
147
|
-
# Required by ActiveModel::Errors
|
148
|
-
def read_attribute_for_validation(attr)
|
149
|
-
send(attr)
|
150
|
-
end
|
151
|
-
|
152
|
-
# Required by ActiveModel::Errors
|
153
|
-
def self.human_attribute_name(attr, options = {})
|
154
|
-
attr
|
155
|
-
end
|
156
|
-
|
157
|
-
# Required by ActiveModel::Errors
|
158
|
-
def self.lookup_ancestors
|
159
|
-
[self]
|
160
|
-
end
|
161
|
-
|
162
146
|
# When picking files from a list you can supply +:first+ or +:last+ to the
|
163
147
|
# provider's +pick+ option, or else a block that can reduce to a single
|
164
148
|
# value, like:
|