stockboy 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 025d746c97ca5ded5e263969f5faf3fa215dbaa6
4
- data.tar.gz: febec727068f490b15846a93912d625f91ad048b
3
+ metadata.gz: 64fb63c74d214447769a32a463ccab05bc92516e
4
+ data.tar.gz: c79abf6d768765673e66503d5ed5cab1d89dade7
5
5
  SHA512:
6
- metadata.gz: 8fa25cb4d7326baaa6de53c0b4160663a5f0523e5dc7a6132904e6f11bac32d72b69d7328c7a563129b17b37440756d704be45d5f7cfe3b57673f32bdb25e1cd
7
- data.tar.gz: 9dd0a0bd18b9b5b198f8aff1a90e649a3ffc8acb4b62b7aa4fd305908de6e8a3826f439ea4894a82f195692e14578513c84ced4942a5818b57eade3afd1f1687
6
+ metadata.gz: 55c4b8106f01b7f822915b4ce2e655c70afa4770a8e42794dea09345326d88a2a16edd00633fad2822a7054b7f58fee81064c00583ef93ccf0eaf849a63814de
7
+ data.tar.gz: f28bc29494c073b3b5570dc9294e089c25b15e549f8f4db5349dd0df0df310429a06d39e57dc5acf858ca90035f3bb85bb07c02a27adb4b08f6f92202f203799
data/.rspec CHANGED
@@ -1,5 +1,3 @@
1
- --backtrace
2
1
  --colour
3
2
  --format progress
4
3
  --order random
5
- --profile
data/.travis.yml CHANGED
@@ -5,6 +5,7 @@ env:
5
5
  rvm:
6
6
  - "1.9.3"
7
7
  - "2.0.0"
8
+ - "2.1.1"
8
9
  - "jruby-19mode"
9
10
  - "rbx"
10
11
  script: bundle exec rspec spec
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.7.0 / 2014-03-21
4
+
5
+ * [FEATURE] Add individual attribute mappings
6
+ * [FEATURE] Repeat provider requests to fetch paginated data
7
+ * [ENHANCEMENT] More configurable SOAP options (@markedmondson)
8
+ * Removed ActiveModel errors, configuration errors are simple arrays now
9
+
3
10
  ## 0.6.0 / 2014-02-06
4
11
 
5
12
  * [FEATURE] Support HTTP basic authentication (@markedmondson)
data/README.md CHANGED
@@ -99,10 +99,11 @@ Writing a job template requires you to declare three parts:
99
99
  file_pick :first
100
100
  end
101
101
 
102
- reader :csv do
103
- headers true
104
- col_sep "|"
105
- encoding "Windows-1252"
102
+ repeat do |inputs, provider|
103
+ 0.upto 12 do |m|
104
+ provider.file_dir = "reports/#{Date.today << 12-m}"
105
+ inputs << provider
106
+ end
106
107
  end
107
108
 
108
109
  attributes do
@@ -130,6 +131,29 @@ data, which is returned as a raw string blob. It can handle some complexity to
130
131
  determine which file to pick from an email attachment or FTP directory, for
131
132
  example.
132
133
 
134
+ #### Fetching paginated data
135
+
136
+ If the provider requires multiple queries to fetch all the data (e.g. http
137
+ `?page=1` query params or a list of files), you can use an optional repeat
138
+ block to specify how to iterate over the data source, and when to stop.
139
+ Although it's possible to process each page as an individual job, the repeat
140
+ option is useful when you need to have all the data in hand before making
141
+ downstream processing decisions such as purging records that are not in the
142
+ data set.
143
+
144
+ repeat do |inputs, http_provider|
145
+ loop do
146
+ inputs << http_provider
147
+ break if http_provider.data.split("\n").size < 100
148
+ http_provider.query["page"] += 1
149
+ end
150
+ end
151
+
152
+ For each iteration, increment the provider settings and push it onto the list
153
+ of inputs. (This uses an Enumerator to yield each iteration's data before
154
+ processing the next one, so it should be memory-efficient for long series of
155
+ data sets.)
156
+
133
157
  See: [File][file], [FTP][ftp], [HTTP][http], [IMAP][imap], [SOAP][soap]
134
158
 
135
159
  [file]: lib/stockboy/providers/file.rb
@@ -209,6 +233,9 @@ so it's a good idea to have default values at the end of the chain.
209
233
  * [`:or_zero`][dzer]
210
234
  Returns `0` for blank values
211
235
 
236
+ Attributes can be defined in a block as described, or added
237
+ individually as `attribute :name`.
238
+
212
239
  [bool]: lib/stockboy/translators/boolean.rb
213
240
  [date]: lib/stockboy/translators/date.rb
214
241
  [deci]: lib/stockboy/translators/decimal.rb
@@ -13,22 +13,11 @@ module Stockboy
13
13
  #
14
14
  class DSL
15
15
  def initialize(instance)
16
- @instance = instance
17
- @map = @instance.instance_variable_get(:@map)
16
+ @attribute_map = instance
18
17
  end
19
18
 
20
19
  def method_missing(attr, *args)
21
- opts = args.first || {}
22
- to = attr.to_sym
23
- from = opts.fetch(:from, attr)
24
- from = from.to_s.freeze if from.is_a? Symbol
25
- translators = Array(opts[:as]).map { |t| Translations.translator_for(to, t) }
26
- @map[attr] = Attribute.new(to, from, translators)
27
- define_attribute_method(attr)
28
- end
29
-
30
- def define_attribute_method(attr)
31
- (class << @instance; self end).send(:define_method, attr) { @map[attr] }
20
+ @attribute_map.insert(attr, *args)
32
21
  end
33
22
  end
34
23
 
@@ -40,7 +29,6 @@ module Stockboy
40
29
  if block_given?
41
30
  DSL.new(self).instance_eval(&block)
42
31
  end
43
- freeze
44
32
  end
45
33
 
46
34
  # Retrieve an attribute by symbolic name
@@ -52,6 +40,22 @@ module Stockboy
52
40
  @map[key]
53
41
  end
54
42
 
43
+ # Add or replace a mapped attribute
44
+ #
45
+ # @param [Symbol] key Name of the output attribute
46
+ # @param [Hash] opts
47
+ # @option opts [String] from Name of input field from reader
48
+ # @option opts [Array,Proc,Translator] as One or more translators
49
+ #
50
+ def insert(key, opts={})
51
+ to = key.to_sym
52
+ from = opts.fetch(:from, key)
53
+ from = from.to_s.freeze if from.is_a? Symbol
54
+ translators = Array(opts[:as]).map { |t| Translations.translator_for(to, t) }
55
+ define_singleton_method(key) { @map[key] }
56
+ @map[key] = Attribute.new(to, from, translators)
57
+ end
58
+
55
59
  # Fetch the attribute corresponding to the source field name
56
60
  #
57
61
  # @param [String] key
@@ -97,21 +97,20 @@ module Stockboy
97
97
  private
98
98
 
99
99
  def translate(col)
100
- return sanitize(@table[col.from]) if col.translators.empty?
101
- return @tr_table[col.to] if @tr_table.has_key? col.to
102
- fields = self.raw_hash.dup
103
- translated = col.translators.inject(input) do |m,t|
104
- begin
105
- new_value = t.call(m)
106
- rescue
107
- fields[col.to] = nil
108
- break SourceRecord.new(fields, @table)
100
+ @tr_table.fetch(col.to) do |key|
101
+ return @tr_table[key] = sanitize(@table[col.from]) if col.translators.empty?
102
+ fields = raw_hash
103
+ tr_input = col.translators.reduce(input) do |value, tr|
104
+ begin
105
+ fields[col.to] = tr[value]
106
+ SourceRecord.new(fields, @table)
107
+ rescue
108
+ fields[col.to] = nil
109
+ break SourceRecord.new(fields, @table)
110
+ end
109
111
  end
110
-
111
- fields[col.to] = new_value
112
- SourceRecord.new(fields, @table)
112
+ @tr_table[col.to] = tr_input.public_send(col.to)
113
113
  end
114
- @tr_table[col.to] = translated.public_send(col.to)
115
114
  end
116
115
 
117
116
  # Clean output values that are a subclass of a standard type
@@ -40,11 +40,11 @@ module Stockboy
40
40
  end
41
41
  end
42
42
 
43
- # DSL method for configuring the provider
43
+ # Configure the provider for fetching data
44
44
  #
45
45
  # The optional block is evaluated in the provider's own DSL context.
46
46
  #
47
- # @param [Symbol, Class, Provider] provider_class
47
+ # @param [Symbol, Class, Provider] key
48
48
  # The registered symbol name for the provider, or actual provider
49
49
  # @param [Hash] opts
50
50
  # Provider-specific options passed to the provider initializer
@@ -56,24 +56,51 @@ module Stockboy
56
56
  #
57
57
  # @return [Provider]
58
58
  #
59
- def provider(provider_class, opts={}, &block)
60
- raise ArgumentError unless provider_class
59
+ def provider(key, opts={}, &block)
60
+ @config[:provider] = Providers.build(key, opts, block)
61
+ end
62
+ alias_method :connection, :provider
61
63
 
62
- @config[:provider] = case provider_class
63
- when Symbol
64
- Providers.find(provider_class).new(opts, &block)
65
- when Class
66
- provider_class.new(opts, &block)
67
- else
68
- provider_class
64
+ # Configure repeating the provider for fetching multiple parts
65
+ #
66
+ # If the provider needs to give us all the data as a series of requests,
67
+ # for example multiple HTTP pages or FTP files, the repeat block can be
68
+ # used to define the iteration for fetching each item.
69
+ #
70
+ # The `<<` interface used here is defined by Ruby's Enumerator.new block
71
+ # syntax. For each page that needs to be fetched, the provider options need
72
+ # to be altered and pushed on to the output. Control will be yielded to the
73
+ # reader at each iteration.
74
+ #
75
+ # @example
76
+ # repeat do |output, provider|
77
+ # loop do
78
+ # output << provider
79
+ # break if provider.data.split("\n").size < 100
80
+ # provider.query_params["page"] += 1
81
+ # end
82
+ # end
83
+ #
84
+ # @example
85
+ # repeat do |output, provider|
86
+ # 1.upto 10 do |i|
87
+ # provider.file_name = "example-#{i}.log"
88
+ # output << provider
89
+ # end
90
+ # end
91
+ #
92
+ def repeat(&block)
93
+ unless block_given? && block.arity == 2
94
+ raise ArgumentError, "repeat block must accept |output, provider| arguments"
69
95
  end
96
+
97
+ @config[:repeat] = block
70
98
  end
71
- alias_method :connection, :provider
72
99
 
73
- # DSL method for configuring the reader
100
+ # Configure the reader for parsing data
74
101
  #
75
- # @param [Symbol, Class, Reader] reader_class
76
- # The registered symbol name for the reader, or actual reader
102
+ # @param [Symbol, Class, Reader] key
103
+ # The registered symbol name for the reader, or actual reader instance
77
104
  # @param [Hash] opts
78
105
  # Provider-specific options passed to the provider initializer
79
106
  #
@@ -84,21 +111,14 @@ module Stockboy
84
111
  #
85
112
  # @return [Reader]
86
113
  #
87
- def reader(reader_class, opts={}, &block)
88
- raise ArgumentError unless reader_class
89
-
90
- @config[:reader] = case reader_class
91
- when Symbol
92
- Readers.find(reader_class).new(opts, &block)
93
- when Class
94
- reader_class.new(opts, &block)
95
- else
96
- reader_class
97
- end
114
+ def reader(key, opts={}, &block)
115
+ @config[:reader] = Readers.build(key, opts, block)
98
116
  end
99
117
  alias_method :format, :reader
100
118
 
101
- # DSL method for configuring the attribute map in a block
119
+ # Configure the attribute map for data records
120
+ #
121
+ # This will replace any existing attributes with a new set.
102
122
  #
103
123
  # @example
104
124
  # attributes do
@@ -113,7 +133,20 @@ module Stockboy
113
133
  @config[:attributes] = AttributeMap.new(&block)
114
134
  end
115
135
 
116
- # DSL method to add a filter to the filter chain
136
+ # Add individual attribute mapping rules
137
+ #
138
+ # @param [Symbol] key Name of the output attribute
139
+ # @param [Hash] opts
140
+ # @option opts [String] from Name of input field from reader
141
+ # @option opts [Array,Proc,Translator] as One or more translators
142
+ #
143
+ #
144
+ def attribute(key, opts={})
145
+ @config[:attributes] ||= AttributeMap.new
146
+ @config[:attributes].insert(key, opts)
147
+ end
148
+
149
+ # Add a filter to the processing filter chain
117
150
  #
118
151
  # * Must be called with either a callable argument (proc) or a block.
119
152
  # * Must be called in the order that filters should be applied.
@@ -129,17 +162,10 @@ module Stockboy
129
162
  # filter :update, proc{ true } # capture all remaining items
130
163
  #
131
164
  def filter(key, callable=nil, *args, &block)
132
- raise ArgumentError unless key
133
- if callable.is_a?(Symbol)
134
- callable = Filters.find(callable)
135
- callable = callable.new(*args) if callable.is_a? Class
136
- end
137
- raise ArgumentError unless callable.respond_to?(:call) ^ block_given?
138
-
139
- @config[:filters][key] = block || callable
165
+ @config[:filters][key] = block || Filters.build(callable, args)
140
166
  end
141
167
 
142
- # DSL method to register a trigger to notify the job of an event
168
+ # Register a trigger to notify the job of external events
143
169
  #
144
170
  # Useful for adding generic control over the job's resources from your app.
145
171
  # For example, if you need to record stats or clean up data after your
@@ -162,6 +188,7 @@ module Stockboy
162
188
  #
163
189
  def on(key, &block)
164
190
  raise(ArgumentError, "no block given") unless block_given?
191
+
165
192
  @config[:triggers][key] << block
166
193
  end
167
194
 
@@ -170,7 +197,18 @@ module Stockboy
170
197
  # @return [Job]
171
198
  #
172
199
  def to_job
173
- Job.new(@config)
200
+ Job.new(config_for_job)
201
+ end
202
+
203
+ private
204
+
205
+ def config_for_job
206
+ config.dup.tap { |config| wrap_provider(config) }
207
+ end
208
+
209
+ def wrap_provider(config)
210
+ return unless (repeat = config.delete(:repeat))
211
+ config[:provider] = ProviderRepeater.new(config[:provider], &repeat)
174
212
  end
175
213
 
176
214
  end
@@ -6,6 +6,15 @@ module Stockboy
6
6
  #
7
7
  module Filters
8
8
  extend Stockboy::Registry
9
+
10
+ def self.build(callable, args)
11
+ if callable.is_a?(Symbol)
12
+ callable = find(callable)
13
+ callable = callable.new(*args) if callable.is_a? Class
14
+ end
15
+ callable
16
+ end
17
+
9
18
  end
10
19
 
11
20
  end
data/lib/stockboy/job.rb CHANGED
@@ -109,7 +109,6 @@ module Stockboy
109
109
  # @return [Boolean] Success or failure
110
110
  #
111
111
  def process
112
- reset
113
112
  with_query_caching do
114
113
  load_records
115
114
  yield @records if block_given?
@@ -208,17 +207,41 @@ module Stockboy
208
207
  end
209
208
 
210
209
  def load_records
211
- return unless provider.data
210
+ reset
211
+ load_all_records
212
+ partition_all_records
213
+ @processed = true
214
+ end
212
215
 
213
- @all_records = reader.parse(provider.data).map do |row|
214
- CandidateRecord.new(row, @attributes)
216
+ def load_all_records
217
+ each_reader_row do |row|
218
+ @all_records << CandidateRecord.new(row, @attributes)
215
219
  end
220
+ end
216
221
 
222
+ def partition_all_records
217
223
  @all_records.each do |record|
218
224
  record_partition(record) << record
219
225
  end
226
+ end
220
227
 
221
- @processed = true
228
+ def each_reader_row
229
+ return to_enum(__method__) unless block_given?
230
+ with_provider_data do |data|
231
+ reader.parse(data).each do |row|
232
+ yield row
233
+ end
234
+ end
235
+ end
236
+
237
+ def with_provider_data
238
+ return to_enum(__method__) unless block_given?
239
+ yielded = nil
240
+ provider.data do |data|
241
+ yielded = true
242
+ yield data
243
+ end
244
+ yield provider.data unless yielded
222
245
  end
223
246
 
224
247
  def record_partition(record)
@@ -1,6 +1,4 @@
1
1
  require 'logger'
2
- require 'active_model/errors'
3
- require 'active_model/naming'
4
2
  require 'stockboy/dsl'
5
3
  require 'stockboy/exceptions'
6
4
 
@@ -34,7 +32,6 @@ module Stockboy
34
32
  #
35
33
  class Provider
36
34
  extend Stockboy::DSL
37
- extend ActiveModel::Naming # Required by ActiveModel::Errors
38
35
 
39
36
  # Default logger if none is provided to the instance
40
37
  #
@@ -48,7 +45,7 @@ module Stockboy
48
45
  #
49
46
  attr_accessor :logger
50
47
 
51
- # @return [ActiveModel::Errors]
48
+ # @return [Array]
52
49
  #
53
50
  attr_reader :errors
54
51
 
@@ -61,7 +58,9 @@ module Stockboy
61
58
  # @return [String]
62
59
  #
63
60
  def inspect
64
- "#<#{self.class}:#{self.object_id} data_size=#{@data_size or 'nil'} errors=#{@errors.full_messages}>"
61
+ "#<#{self.class}:#{self.object_id} "\
62
+ "data_size=#{@data_size.inspect} "\
63
+ "errors=[#{errors.join(", ")}]>"
65
64
  end
66
65
 
67
66
  # Must be called by subclasses via +super+ to set up dependencies
@@ -79,8 +78,8 @@ module Stockboy
79
78
  # @!attribute [r] data
80
79
  #
81
80
  def data
82
- return @data if @data
83
- fetch_data if validate_config?
81
+ fetch_data if @data.nil? && validate_config?
82
+ yield @data if block_given?
84
83
  @data
85
84
  end
86
85
 
@@ -92,7 +91,7 @@ module Stockboy
92
91
  @data = nil
93
92
  @data_time = nil
94
93
  @data_size = nil
95
- @errors = ActiveModel::Errors.new(self)
94
+ @errors = []
96
95
  true
97
96
  end
98
97
  alias_method :reset, :clear
@@ -125,7 +124,7 @@ module Stockboy
125
124
  raise NoMethodError, "#{self.class}#fetch_data needs implementation"
126
125
  end
127
126
 
128
- # Use errors.add(:attribute, "Message") provided by ActiveModel
127
+ # Use errors << "'option' is required"
129
128
  # for validating required provider parameters before attempting
130
129
  # to make connections and retrieve data.
131
130
  #
@@ -138,27 +137,12 @@ module Stockboy
138
137
  def validate_config?
139
138
  unless validation = valid?
140
139
  logger.error do
141
- "Invalid #{self.class} provider configuration: #{errors.full_messages}"
140
+ "Invalid #{self.class} provider configuration: #{errors.join(', ')}"
142
141
  end
143
142
  end
144
143
  validation
145
144
  end
146
145
 
147
- # Required by ActiveModel::Errors
148
- def read_attribute_for_validation(attr)
149
- send(attr)
150
- end
151
-
152
- # Required by ActiveModel::Errors
153
- def self.human_attribute_name(attr, options = {})
154
- attr
155
- end
156
-
157
- # Required by ActiveModel::Errors
158
- def self.lookup_ancestors
159
- [self]
160
- end
161
-
162
146
  # When picking files from a list you can supply +:first+ or +:last+ to the
163
147
  # provider's +pick+ option, or else a block that can reduce to a single
164
148
  # value, like: