stockboy 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 025d746c97ca5ded5e263969f5faf3fa215dbaa6
4
- data.tar.gz: febec727068f490b15846a93912d625f91ad048b
3
+ metadata.gz: 64fb63c74d214447769a32a463ccab05bc92516e
4
+ data.tar.gz: c79abf6d768765673e66503d5ed5cab1d89dade7
5
5
  SHA512:
6
- metadata.gz: 8fa25cb4d7326baaa6de53c0b4160663a5f0523e5dc7a6132904e6f11bac32d72b69d7328c7a563129b17b37440756d704be45d5f7cfe3b57673f32bdb25e1cd
7
- data.tar.gz: 9dd0a0bd18b9b5b198f8aff1a90e649a3ffc8acb4b62b7aa4fd305908de6e8a3826f439ea4894a82f195692e14578513c84ced4942a5818b57eade3afd1f1687
6
+ metadata.gz: 55c4b8106f01b7f822915b4ce2e655c70afa4770a8e42794dea09345326d88a2a16edd00633fad2822a7054b7f58fee81064c00583ef93ccf0eaf849a63814de
7
+ data.tar.gz: f28bc29494c073b3b5570dc9294e089c25b15e549f8f4db5349dd0df0df310429a06d39e57dc5acf858ca90035f3bb85bb07c02a27adb4b08f6f92202f203799
data/.rspec CHANGED
@@ -1,5 +1,3 @@
1
- --backtrace
2
1
  --colour
3
2
  --format progress
4
3
  --order random
5
- --profile
data/.travis.yml CHANGED
@@ -5,6 +5,7 @@ env:
5
5
  rvm:
6
6
  - "1.9.3"
7
7
  - "2.0.0"
8
+ - "2.1.1"
8
9
  - "jruby-19mode"
9
10
  - "rbx"
10
11
  script: bundle exec rspec spec
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.7.0 / 2014-03-21
4
+
5
+ * [FEATURE] Add individual attribute mappings
6
+ * [FEATURE] Repeat provider requests to fetch paginated data
7
+ * [ENHANCEMENT] More configurable SOAP options (@markedmondson)
8
+ * Removed ActiveModel errors, configuration errors are simple arrays now
9
+
3
10
  ## 0.6.0 / 2014-02-06
4
11
 
5
12
  * [FEATURE] Support HTTP basic authentication (@markedmondson)
data/README.md CHANGED
@@ -99,10 +99,11 @@ Writing a job template requires you to declare three parts:
99
99
  file_pick :first
100
100
  end
101
101
 
102
- reader :csv do
103
- headers true
104
- col_sep "|"
105
- encoding "Windows-1252"
102
+ repeat do |inputs, provider|
103
+ 0.upto 12 do |m|
104
+ provider.file_dir = "reports/#{Date.today << 12-m}"
105
+ inputs << provider
106
+ end
106
107
  end
107
108
 
108
109
  attributes do
@@ -130,6 +131,29 @@ data, which is returned as a raw string blob. It can handle some complexity to
130
131
  determine which file to pick from an email attachment or FTP directory, for
131
132
  example.
132
133
 
134
+ #### Fetching paginated data
135
+
136
+ If the provider requires multiple queries to fetch all the data (e.g. http
137
+ `?page=1` query params or a list of files), you can use an optional repeat
138
+ block to specify how to iterate over the data source, and when to stop.
139
+ Although it's possible to process each page as an individual job, the repeat
140
+ option is useful when you need to have all the data in hand before making
141
+ downstream processing decisions such as purging records that are not in the
142
+ data set.
143
+
144
+ repeat do |inputs, http_provider|
145
+ loop do
146
+ inputs << http_provider
147
+ break if http_provider.data.split("\n").size < 100
148
+ http_provider.query["page"] += 1
149
+ end
150
+ end
151
+
152
+ For each iteration, increment the provider settings and push it onto the list
153
+ of inputs. (This uses an Enumerator to yield each iteration's data before
154
+ processing the next one, so it should be memory-efficient for long series of
155
+ data sets.)
156
+
133
157
  See: [File][file], [FTP][ftp], [HTTP][http], [IMAP][imap], [SOAP][soap]
134
158
 
135
159
  [file]: lib/stockboy/providers/file.rb
@@ -209,6 +233,9 @@ so it's a good idea to have default values at the end of the chain.
209
233
  * [`:or_zero`][dzer]
210
234
  Returns `0` for blank values
211
235
 
236
+ Attributes can be defined in a block as described, or added
237
+ individually as `attribute :name`.
238
+
212
239
  [bool]: lib/stockboy/translators/boolean.rb
213
240
  [date]: lib/stockboy/translators/date.rb
214
241
  [deci]: lib/stockboy/translators/decimal.rb
@@ -13,22 +13,11 @@ module Stockboy
13
13
  #
14
14
  class DSL
15
15
  def initialize(instance)
16
- @instance = instance
17
- @map = @instance.instance_variable_get(:@map)
16
+ @attribute_map = instance
18
17
  end
19
18
 
20
19
  def method_missing(attr, *args)
21
- opts = args.first || {}
22
- to = attr.to_sym
23
- from = opts.fetch(:from, attr)
24
- from = from.to_s.freeze if from.is_a? Symbol
25
- translators = Array(opts[:as]).map { |t| Translations.translator_for(to, t) }
26
- @map[attr] = Attribute.new(to, from, translators)
27
- define_attribute_method(attr)
28
- end
29
-
30
- def define_attribute_method(attr)
31
- (class << @instance; self end).send(:define_method, attr) { @map[attr] }
20
+ @attribute_map.insert(attr, *args)
32
21
  end
33
22
  end
34
23
 
@@ -40,7 +29,6 @@ module Stockboy
40
29
  if block_given?
41
30
  DSL.new(self).instance_eval(&block)
42
31
  end
43
- freeze
44
32
  end
45
33
 
46
34
  # Retrieve an attribute by symbolic name
@@ -52,6 +40,22 @@ module Stockboy
52
40
  @map[key]
53
41
  end
54
42
 
43
+ # Add or replace a mapped attribute
44
+ #
45
+ # @param [Symbol] key Name of the output attribute
46
+ # @param [Hash] opts
47
+ # @option opts [String] from Name of input field from reader
48
+ # @option opts [Array,Proc,Translator] as One or more translators
49
+ #
50
+ def insert(key, opts={})
51
+ to = key.to_sym
52
+ from = opts.fetch(:from, key)
53
+ from = from.to_s.freeze if from.is_a? Symbol
54
+ translators = Array(opts[:as]).map { |t| Translations.translator_for(to, t) }
55
+ define_singleton_method(key) { @map[key] }
56
+ @map[key] = Attribute.new(to, from, translators)
57
+ end
58
+
55
59
  # Fetch the attribute corresponding to the source field name
56
60
  #
57
61
  # @param [String] key
@@ -97,21 +97,20 @@ module Stockboy
97
97
  private
98
98
 
99
99
  def translate(col)
100
- return sanitize(@table[col.from]) if col.translators.empty?
101
- return @tr_table[col.to] if @tr_table.has_key? col.to
102
- fields = self.raw_hash.dup
103
- translated = col.translators.inject(input) do |m,t|
104
- begin
105
- new_value = t.call(m)
106
- rescue
107
- fields[col.to] = nil
108
- break SourceRecord.new(fields, @table)
100
+ @tr_table.fetch(col.to) do |key|
101
+ return @tr_table[key] = sanitize(@table[col.from]) if col.translators.empty?
102
+ fields = raw_hash
103
+ tr_input = col.translators.reduce(input) do |value, tr|
104
+ begin
105
+ fields[col.to] = tr[value]
106
+ SourceRecord.new(fields, @table)
107
+ rescue
108
+ fields[col.to] = nil
109
+ break SourceRecord.new(fields, @table)
110
+ end
109
111
  end
110
-
111
- fields[col.to] = new_value
112
- SourceRecord.new(fields, @table)
112
+ @tr_table[col.to] = tr_input.public_send(col.to)
113
113
  end
114
- @tr_table[col.to] = translated.public_send(col.to)
115
114
  end
116
115
 
117
116
  # Clean output values that are a subclass of a standard type
@@ -40,11 +40,11 @@ module Stockboy
40
40
  end
41
41
  end
42
42
 
43
- # DSL method for configuring the provider
43
+ # Configure the provider for fetching data
44
44
  #
45
45
  # The optional block is evaluated in the provider's own DSL context.
46
46
  #
47
- # @param [Symbol, Class, Provider] provider_class
47
+ # @param [Symbol, Class, Provider] key
48
48
  # The registered symbol name for the provider, or actual provider
49
49
  # @param [Hash] opts
50
50
  # Provider-specific options passed to the provider initializer
@@ -56,24 +56,51 @@ module Stockboy
56
56
  #
57
57
  # @return [Provider]
58
58
  #
59
- def provider(provider_class, opts={}, &block)
60
- raise ArgumentError unless provider_class
59
+ def provider(key, opts={}, &block)
60
+ @config[:provider] = Providers.build(key, opts, block)
61
+ end
62
+ alias_method :connection, :provider
61
63
 
62
- @config[:provider] = case provider_class
63
- when Symbol
64
- Providers.find(provider_class).new(opts, &block)
65
- when Class
66
- provider_class.new(opts, &block)
67
- else
68
- provider_class
64
+ # Configure repeating the provider for fetching multiple parts
65
+ #
66
+ # If the provider needs to give us all the data as a series of requests,
67
+ # for example multiple HTTP pages or FTP files, the repeat block can be
68
+ # used to define the iteration for fetching each item.
69
+ #
70
+ # The `<<` interface used here is defined by Ruby's Enumerator.new block
71
+ # syntax. For each page that needs to be fetched, the provider options need
72
+ # to be altered and pushed on to the output. Control will be yielded to the
73
+ # reader at each iteration.
74
+ #
75
+ # @example
76
+ # repeat do |output, provider|
77
+ # loop do
78
+ # output << provider
79
+ # break if provider.data.split("\n").size < 100
80
+ # provider.query_params["page"] += 1
81
+ # end
82
+ # end
83
+ #
84
+ # @example
85
+ # repeat do |output, provider|
86
+ # 1.upto 10 do |i|
87
+ # provider.file_name = "example-#{i}.log"
88
+ # output << provider
89
+ # end
90
+ # end
91
+ #
92
+ def repeat(&block)
93
+ unless block_given? && block.arity == 2
94
+ raise ArgumentError, "repeat block must accept |output, provider| arguments"
69
95
  end
96
+
97
+ @config[:repeat] = block
70
98
  end
71
- alias_method :connection, :provider
72
99
 
73
- # DSL method for configuring the reader
100
+ # Configure the reader for parsing data
74
101
  #
75
- # @param [Symbol, Class, Reader] reader_class
76
- # The registered symbol name for the reader, or actual reader
102
+ # @param [Symbol, Class, Reader] key
103
+ # The registered symbol name for the reader, or actual reader instance
77
104
  # @param [Hash] opts
78
105
  # Provider-specific options passed to the provider initializer
79
106
  #
@@ -84,21 +111,14 @@ module Stockboy
84
111
  #
85
112
  # @return [Reader]
86
113
  #
87
- def reader(reader_class, opts={}, &block)
88
- raise ArgumentError unless reader_class
89
-
90
- @config[:reader] = case reader_class
91
- when Symbol
92
- Readers.find(reader_class).new(opts, &block)
93
- when Class
94
- reader_class.new(opts, &block)
95
- else
96
- reader_class
97
- end
114
+ def reader(key, opts={}, &block)
115
+ @config[:reader] = Readers.build(key, opts, block)
98
116
  end
99
117
  alias_method :format, :reader
100
118
 
101
- # DSL method for configuring the attribute map in a block
119
+ # Configure the attribute map for data records
120
+ #
121
+ # This will replace any existing attributes with a new set.
102
122
  #
103
123
  # @example
104
124
  # attributes do
@@ -113,7 +133,20 @@ module Stockboy
113
133
  @config[:attributes] = AttributeMap.new(&block)
114
134
  end
115
135
 
116
- # DSL method to add a filter to the filter chain
136
+ # Add individual attribute mapping rules
137
+ #
138
+ # @param [Symbol] key Name of the output attribute
139
+ # @param [Hash] opts
140
+ # @option opts [String] from Name of input field from reader
141
+ # @option opts [Array,Proc,Translator] as One or more translators
142
+ #
143
+ #
144
+ def attribute(key, opts={})
145
+ @config[:attributes] ||= AttributeMap.new
146
+ @config[:attributes].insert(key, opts)
147
+ end
148
+
149
+ # Add a filter to the processing filter chain
117
150
  #
118
151
  # * Must be called with either a callable argument (proc) or a block.
119
152
  # * Must be called in the order that filters should be applied.
@@ -129,17 +162,10 @@ module Stockboy
129
162
  # filter :update, proc{ true } # capture all remaining items
130
163
  #
131
164
  def filter(key, callable=nil, *args, &block)
132
- raise ArgumentError unless key
133
- if callable.is_a?(Symbol)
134
- callable = Filters.find(callable)
135
- callable = callable.new(*args) if callable.is_a? Class
136
- end
137
- raise ArgumentError unless callable.respond_to?(:call) ^ block_given?
138
-
139
- @config[:filters][key] = block || callable
165
+ @config[:filters][key] = block || Filters.build(callable, args)
140
166
  end
141
167
 
142
- # DSL method to register a trigger to notify the job of an event
168
+ # Register a trigger to notify the job of external events
143
169
  #
144
170
  # Useful for adding generic control over the job's resources from your app.
145
171
  # For example, if you need to record stats or clean up data after your
@@ -162,6 +188,7 @@ module Stockboy
162
188
  #
163
189
  def on(key, &block)
164
190
  raise(ArgumentError, "no block given") unless block_given?
191
+
165
192
  @config[:triggers][key] << block
166
193
  end
167
194
 
@@ -170,7 +197,18 @@ module Stockboy
170
197
  # @return [Job]
171
198
  #
172
199
  def to_job
173
- Job.new(@config)
200
+ Job.new(config_for_job)
201
+ end
202
+
203
+ private
204
+
205
+ def config_for_job
206
+ config.dup.tap { |config| wrap_provider(config) }
207
+ end
208
+
209
+ def wrap_provider(config)
210
+ return unless (repeat = config.delete(:repeat))
211
+ config[:provider] = ProviderRepeater.new(config[:provider], &repeat)
174
212
  end
175
213
 
176
214
  end
@@ -6,6 +6,15 @@ module Stockboy
6
6
  #
7
7
  module Filters
8
8
  extend Stockboy::Registry
9
+
10
+ def self.build(callable, args)
11
+ if callable.is_a?(Symbol)
12
+ callable = find(callable)
13
+ callable = callable.new(*args) if callable.is_a? Class
14
+ end
15
+ callable
16
+ end
17
+
9
18
  end
10
19
 
11
20
  end
data/lib/stockboy/job.rb CHANGED
@@ -109,7 +109,6 @@ module Stockboy
109
109
  # @return [Boolean] Success or failure
110
110
  #
111
111
  def process
112
- reset
113
112
  with_query_caching do
114
113
  load_records
115
114
  yield @records if block_given?
@@ -208,17 +207,41 @@ module Stockboy
208
207
  end
209
208
 
210
209
  def load_records
211
- return unless provider.data
210
+ reset
211
+ load_all_records
212
+ partition_all_records
213
+ @processed = true
214
+ end
212
215
 
213
- @all_records = reader.parse(provider.data).map do |row|
214
- CandidateRecord.new(row, @attributes)
216
+ def load_all_records
217
+ each_reader_row do |row|
218
+ @all_records << CandidateRecord.new(row, @attributes)
215
219
  end
220
+ end
216
221
 
222
+ def partition_all_records
217
223
  @all_records.each do |record|
218
224
  record_partition(record) << record
219
225
  end
226
+ end
220
227
 
221
- @processed = true
228
+ def each_reader_row
229
+ return to_enum(__method__) unless block_given?
230
+ with_provider_data do |data|
231
+ reader.parse(data).each do |row|
232
+ yield row
233
+ end
234
+ end
235
+ end
236
+
237
+ def with_provider_data
238
+ return to_enum(__method__) unless block_given?
239
+ yielded = nil
240
+ provider.data do |data|
241
+ yielded = true
242
+ yield data
243
+ end
244
+ yield provider.data unless yielded
222
245
  end
223
246
 
224
247
  def record_partition(record)
@@ -1,6 +1,4 @@
1
1
  require 'logger'
2
- require 'active_model/errors'
3
- require 'active_model/naming'
4
2
  require 'stockboy/dsl'
5
3
  require 'stockboy/exceptions'
6
4
 
@@ -34,7 +32,6 @@ module Stockboy
34
32
  #
35
33
  class Provider
36
34
  extend Stockboy::DSL
37
- extend ActiveModel::Naming # Required by ActiveModel::Errors
38
35
 
39
36
  # Default logger if none is provided to the instance
40
37
  #
@@ -48,7 +45,7 @@ module Stockboy
48
45
  #
49
46
  attr_accessor :logger
50
47
 
51
- # @return [ActiveModel::Errors]
48
+ # @return [Array]
52
49
  #
53
50
  attr_reader :errors
54
51
 
@@ -61,7 +58,9 @@ module Stockboy
61
58
  # @return [String]
62
59
  #
63
60
  def inspect
64
- "#<#{self.class}:#{self.object_id} data_size=#{@data_size or 'nil'} errors=#{@errors.full_messages}>"
61
+ "#<#{self.class}:#{self.object_id} "\
62
+ "data_size=#{@data_size.inspect} "\
63
+ "errors=[#{errors.join(", ")}]>"
65
64
  end
66
65
 
67
66
  # Must be called by subclasses via +super+ to set up dependencies
@@ -79,8 +78,8 @@ module Stockboy
79
78
  # @!attribute [r] data
80
79
  #
81
80
  def data
82
- return @data if @data
83
- fetch_data if validate_config?
81
+ fetch_data if @data.nil? && validate_config?
82
+ yield @data if block_given?
84
83
  @data
85
84
  end
86
85
 
@@ -92,7 +91,7 @@ module Stockboy
92
91
  @data = nil
93
92
  @data_time = nil
94
93
  @data_size = nil
95
- @errors = ActiveModel::Errors.new(self)
94
+ @errors = []
96
95
  true
97
96
  end
98
97
  alias_method :reset, :clear
@@ -125,7 +124,7 @@ module Stockboy
125
124
  raise NoMethodError, "#{self.class}#fetch_data needs implementation"
126
125
  end
127
126
 
128
- # Use errors.add(:attribute, "Message") provided by ActiveModel
127
+ # Use errors << "'option' is required"
129
128
  # for validating required provider parameters before attempting
130
129
  # to make connections and retrieve data.
131
130
  #
@@ -138,27 +137,12 @@ module Stockboy
138
137
  def validate_config?
139
138
  unless validation = valid?
140
139
  logger.error do
141
- "Invalid #{self.class} provider configuration: #{errors.full_messages}"
140
+ "Invalid #{self.class} provider configuration: #{errors.join(', ')}"
142
141
  end
143
142
  end
144
143
  validation
145
144
  end
146
145
 
147
- # Required by ActiveModel::Errors
148
- def read_attribute_for_validation(attr)
149
- send(attr)
150
- end
151
-
152
- # Required by ActiveModel::Errors
153
- def self.human_attribute_name(attr, options = {})
154
- attr
155
- end
156
-
157
- # Required by ActiveModel::Errors
158
- def self.lookup_ancestors
159
- [self]
160
- end
161
-
162
146
  # When picking files from a list you can supply +:first+ or +:last+ to the
163
147
  # provider's +pick+ option, or else a block that can reduce to a single
164
148
  # value, like: