stockboy 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (112) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +5 -0
  4. data/.yardopts +7 -0
  5. data/CHANGELOG.md +24 -0
  6. data/Gemfile +12 -0
  7. data/Guardfile +10 -0
  8. data/LICENSE +21 -0
  9. data/README.md +293 -0
  10. data/Rakefile +30 -0
  11. data/lib/stockboy.rb +80 -0
  12. data/lib/stockboy/attribute.rb +11 -0
  13. data/lib/stockboy/attribute_map.rb +74 -0
  14. data/lib/stockboy/candidate_record.rb +130 -0
  15. data/lib/stockboy/configuration.rb +62 -0
  16. data/lib/stockboy/configurator.rb +176 -0
  17. data/lib/stockboy/dsl.rb +68 -0
  18. data/lib/stockboy/exceptions.rb +3 -0
  19. data/lib/stockboy/filter.rb +58 -0
  20. data/lib/stockboy/filter_chain.rb +41 -0
  21. data/lib/stockboy/filters.rb +11 -0
  22. data/lib/stockboy/filters/missing_email.rb +37 -0
  23. data/lib/stockboy/job.rb +241 -0
  24. data/lib/stockboy/mapped_record.rb +59 -0
  25. data/lib/stockboy/provider.rb +238 -0
  26. data/lib/stockboy/providers.rb +11 -0
  27. data/lib/stockboy/providers/file.rb +135 -0
  28. data/lib/stockboy/providers/ftp.rb +205 -0
  29. data/lib/stockboy/providers/http.rb +123 -0
  30. data/lib/stockboy/providers/imap.rb +290 -0
  31. data/lib/stockboy/providers/soap.rb +120 -0
  32. data/lib/stockboy/railtie.rb +28 -0
  33. data/lib/stockboy/reader.rb +59 -0
  34. data/lib/stockboy/readers.rb +11 -0
  35. data/lib/stockboy/readers/csv.rb +115 -0
  36. data/lib/stockboy/readers/fixed_width.rb +121 -0
  37. data/lib/stockboy/readers/spreadsheet.rb +144 -0
  38. data/lib/stockboy/readers/xml.rb +155 -0
  39. data/lib/stockboy/registry.rb +42 -0
  40. data/lib/stockboy/source_record.rb +43 -0
  41. data/lib/stockboy/string_pool.rb +35 -0
  42. data/lib/stockboy/template_file.rb +44 -0
  43. data/lib/stockboy/translations.rb +70 -0
  44. data/lib/stockboy/translations/boolean.rb +58 -0
  45. data/lib/stockboy/translations/date.rb +41 -0
  46. data/lib/stockboy/translations/decimal.rb +33 -0
  47. data/lib/stockboy/translations/default_empty_string.rb +38 -0
  48. data/lib/stockboy/translations/default_false.rb +41 -0
  49. data/lib/stockboy/translations/default_nil.rb +38 -0
  50. data/lib/stockboy/translations/default_true.rb +41 -0
  51. data/lib/stockboy/translations/default_zero.rb +41 -0
  52. data/lib/stockboy/translations/integer.rb +33 -0
  53. data/lib/stockboy/translations/string.rb +33 -0
  54. data/lib/stockboy/translations/time.rb +41 -0
  55. data/lib/stockboy/translations/uk_date.rb +51 -0
  56. data/lib/stockboy/translations/us_date.rb +51 -0
  57. data/lib/stockboy/translator.rb +66 -0
  58. data/lib/stockboy/version.rb +3 -0
  59. data/spec/fixtures/.gitkeep +0 -0
  60. data/spec/fixtures/files/a_garbage.csv +1 -0
  61. data/spec/fixtures/files/test_data-20120101.csv +1 -0
  62. data/spec/fixtures/files/test_data-20120202.csv +1 -0
  63. data/spec/fixtures/files/z_garbage.csv +1 -0
  64. data/spec/fixtures/jobs/test_job.rb +1 -0
  65. data/spec/fixtures/soap/get_list/fault.xml +8 -0
  66. data/spec/fixtures/soap/get_list/success.xml +18 -0
  67. data/spec/fixtures/spreadsheets/test_data.xls +0 -0
  68. data/spec/fixtures/spreadsheets/test_row_options.xls +0 -0
  69. data/spec/fixtures/xml/body.xml +14 -0
  70. data/spec/spec_helper.rb +28 -0
  71. data/spec/stockboy/attribute_map_spec.rb +59 -0
  72. data/spec/stockboy/attribute_spec.rb +11 -0
  73. data/spec/stockboy/candidate_record_spec.rb +150 -0
  74. data/spec/stockboy/configuration_spec.rb +28 -0
  75. data/spec/stockboy/configurator_spec.rb +127 -0
  76. data/spec/stockboy/filter_chain_spec.rb +40 -0
  77. data/spec/stockboy/filter_spec.rb +41 -0
  78. data/spec/stockboy/filters/missing_email_spec.rb +26 -0
  79. data/spec/stockboy/filters_spec.rb +38 -0
  80. data/spec/stockboy/job_spec.rb +238 -0
  81. data/spec/stockboy/mapped_record_spec.rb +30 -0
  82. data/spec/stockboy/provider_spec.rb +34 -0
  83. data/spec/stockboy/providers/file_spec.rb +116 -0
  84. data/spec/stockboy/providers/ftp_spec.rb +143 -0
  85. data/spec/stockboy/providers/http_spec.rb +94 -0
  86. data/spec/stockboy/providers/imap_spec.rb +76 -0
  87. data/spec/stockboy/providers/soap_spec.rb +107 -0
  88. data/spec/stockboy/providers_spec.rb +38 -0
  89. data/spec/stockboy/readers/csv_spec.rb +68 -0
  90. data/spec/stockboy/readers/fixed_width_spec.rb +52 -0
  91. data/spec/stockboy/readers/spreadsheet_spec.rb +121 -0
  92. data/spec/stockboy/readers/xml_spec.rb +94 -0
  93. data/spec/stockboy/readers_spec.rb +30 -0
  94. data/spec/stockboy/source_record_spec.rb +19 -0
  95. data/spec/stockboy/template_file_spec.rb +30 -0
  96. data/spec/stockboy/translations/boolean_spec.rb +48 -0
  97. data/spec/stockboy/translations/date_spec.rb +38 -0
  98. data/spec/stockboy/translations/decimal_spec.rb +23 -0
  99. data/spec/stockboy/translations/default_empty_string_spec.rb +32 -0
  100. data/spec/stockboy/translations/default_false_spec.rb +25 -0
  101. data/spec/stockboy/translations/default_nil_spec.rb +32 -0
  102. data/spec/stockboy/translations/default_true_spec.rb +25 -0
  103. data/spec/stockboy/translations/default_zero_spec.rb +32 -0
  104. data/spec/stockboy/translations/integer_spec.rb +22 -0
  105. data/spec/stockboy/translations/string_spec.rb +22 -0
  106. data/spec/stockboy/translations/time_spec.rb +27 -0
  107. data/spec/stockboy/translations/uk_date_spec.rb +37 -0
  108. data/spec/stockboy/translations/us_date_spec.rb +37 -0
  109. data/spec/stockboy/translations_spec.rb +55 -0
  110. data/spec/stockboy/translator_spec.rb +27 -0
  111. data/stockboy.gemspec +32 -0
  112. metadata +305 -0
@@ -0,0 +1,68 @@
1
+ module Stockboy
2
+
3
+
4
+ # @api private
5
+ #
6
+ class ConfiguratorBlock
7
+
8
+ # Initialize a DSL context around an instance
9
+ #
10
+ def initialize(instance)
11
+ @instance = instance
12
+ end
13
+
14
+ end
15
+
16
+ # Mixin for defining DSL methods
17
+ #
18
+ module DSL
19
+
20
+ # Define ambiguous attr reader/writers for DSL readability
21
+ #
22
+ # @example
23
+ # dsl.some_option = "new value" # => some_option = "new value"
24
+ # dsl.some_option "new value" # => some_option = "new value"
25
+ # dsl.some_option # => some_option
26
+ #
27
+ # @visibility private
28
+ # @scope class
29
+ #
30
+ def dsl_attr(attr, options={})
31
+ if options.fetch(:attr_accessor, true)
32
+ attr_reader attr if options.fetch(:attr_reader, true)
33
+ attr_writer attr if options.fetch(:attr_writer, true)
34
+ end
35
+
36
+ class_eval <<-___, __FILE__, __LINE__
37
+ class DSL < Stockboy::ConfiguratorBlock
38
+ def #{attr}(*arg)
39
+ if arg.empty?
40
+ @instance.#{attr}
41
+ else
42
+ @instance.#{attr} = arg.first
43
+ end
44
+ end
45
+ def #{attr}=(arg)
46
+ @instance.#{attr} = arg
47
+ end
48
+ end
49
+ ___
50
+
51
+ if attr_alias = options[:alias]
52
+ alias_method attr_alias, attr
53
+ alias_method :"#{attr_alias}=", :"#{attr}="
54
+
55
+ class_eval <<-___, __FILE__, __LINE__
56
+ class DSL < Stockboy::ConfiguratorBlock
57
+ alias_method :#{attr_alias}, :#{attr}
58
+ alias_method :#{attr_alias}=, :#{attr}=
59
+ end
60
+ ___
61
+ end
62
+
63
+ attr
64
+ end
65
+
66
+ end
67
+
68
+ end
@@ -0,0 +1,3 @@
1
+ module Stockboy
2
+ class OutOfSequence < StandardError; end
3
+ end
@@ -0,0 +1,58 @@
1
+ require 'stockboy/exceptions'
2
+
3
+ module Stockboy
4
+
5
+ # Filters can be any callable object that returns true or false. This
6
+ # abstract class is a helpful way to define a commonly used filter pattern.
7
+ #
8
+ # == Interface
9
+ #
10
+ # Filter subclasses must define a +filter+ method that returns true or false
11
+ # when called with the record context.
12
+ #
13
+ # @example
14
+ # class Bouncer < Stockboy::Filter
15
+ # def initialize(age)
16
+ # @age = age
17
+ # end
18
+ # def filter(input_context, output_context)
19
+ # input_context["RawAge"].empty? or output_context.age < @age
20
+ # end
21
+ # end
22
+ #
23
+ # Stockboy::Filters.register(:bouncer, Bouncer.new(19))
24
+ # filter :under_age, :bouncer # in job template
25
+ #
26
+ # Stockboy::Filters.register(:check_id, Bouncer)
27
+ # filter :under_age, :bouncer, 19 # in job template
28
+ #
29
+ # @abstract
30
+ #
31
+ class Filter
32
+
33
+ # Return true to capture a filtered record, false to pass it on
34
+ #
35
+ # @param [SourceRecord] raw_context
36
+ # Unmapped source fields with Hash-like access field names (e.g.
37
+ # <tt>input["RawField"]</tt>) or raw values on mapped attributes as
38
+ # methods (e.g. <tt>input.email</tt>)
39
+ # @param [MappedRecord] translated_context
40
+ # Mapped and translated fields with access to attributes
41
+ # as methods (<tt>output.email</tt>)
42
+ # @return [Boolean]
43
+ #
44
+ def call(raw_context, translated_context)
45
+ return !!filter(raw_context, translated_context)
46
+ end
47
+
48
+ private
49
+
50
+ # @abstract
51
+ #
52
+ def filter(raw_context, translated_context)
53
+ raise NoMethodError, "#{self.class}#filter must be implemented"
54
+ end
55
+
56
+ end
57
+
58
+ end
@@ -0,0 +1,41 @@
1
+ module Stockboy
2
+
3
+ # A hash for executing items in order with callbacks
4
+ #
5
+ class FilterChain < Hash
6
+
7
+ # Initialize a new FilterChain with a hash of filters
8
+ #
9
+ # @param [Hash{Symbol=>Filter}] hash
10
+ #
11
+ def self.new(hash=nil)
12
+ super().replace(hash || {})
13
+ end
14
+
15
+ # Add filters to the front of the chain
16
+ #
17
+ # @param [Hash{Symbol=>Filter}] hash Filters to add
18
+ #
19
+ def prepend(hash)
20
+ replace hash.merge(self)
21
+ end
22
+
23
+ # Call the reset callback on all filters that respond to it
24
+ #
25
+ # @return [Hash{Symbol=>Array}] Filter keys point to empty arrays
26
+ #
27
+ def reset
28
+ each do |key, filter|
29
+ filter.reset if filter.respond_to? :reset
30
+ end
31
+ keys_to_arrays
32
+ end
33
+
34
+ # @return [Hash{Symbol=>Array}] Filter keys point to empty arrays
35
+ #
36
+ def keys_to_arrays
37
+ Hash[keys.map { |k| [k, []] }]
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,11 @@
1
+ require 'stockboy/registry'
2
+
3
+ module Stockboy
4
+
5
+ # Registry of available named filters
6
+ #
7
+ module Filters
8
+ extend Stockboy::Registry
9
+ end
10
+
11
+ end
@@ -0,0 +1,37 @@
1
+ require 'stockboy/filter'
2
+
3
+ module Stockboy::Filters
4
+
5
+ # Very loose matching to pre-screen missing emails.
6
+ #
7
+ # Only checks if there is a potential email-like string in the output value,
8
+ # and does not do any format checking for validity.
9
+ #
10
+ # @example
11
+ # filter = Stockboy::Filters::MissingEmail.new(:addr)
12
+ # model.email = ""
13
+ # filter.call(_, model) # => false
14
+ # model.email = "@"
15
+ # filter.call(_, model) # => true
16
+ #
17
+ class MissingEmail < Stockboy::Filter
18
+
19
+ # Initialize a new filter for a missing email attribute
20
+ #
21
+ # @param [Symbol] attr
22
+ # Name of the email attribute to examine on the mapped output record
23
+ #
24
+ def initialize(attr)
25
+ @attr = attr
26
+ end
27
+
28
+ private
29
+
30
+ def filter(raw,output)
31
+ value = output.send(@attr)
32
+ return true if value.blank?
33
+ return true unless value =~ /\w@\w/
34
+ end
35
+
36
+ end
37
+ end
@@ -0,0 +1,241 @@
1
+ require 'stockboy/configuration'
2
+ require 'stockboy/exceptions'
3
+ require 'stockboy/configurator'
4
+ require 'stockboy/template_file'
5
+ require 'stockboy/filter_chain'
6
+ require 'stockboy/candidate_record'
7
+
8
+ module Stockboy
9
+
10
+ # This class wraps up the main interface for the process of fetching,
11
+ # parsing and sorting data. When used with a predefined template file, you
12
+ # can pass the name of the template to define it. This is the common way
13
+ # to use Stockboy:
14
+ #
15
+ # job = Stockboy::Job.define('my_template')
16
+ # if job.process
17
+ # job.records[:update].each do |r|
18
+ # # ...
19
+ # end
20
+ # job.records[:cancel].each do |r|
21
+ # # ...
22
+ # end
23
+ # end
24
+ #
25
+ class Job
26
+
27
+ # Defines the data source for receiving data
28
+ #
29
+ # @return [Provider]
30
+ #
31
+ attr_accessor :provider
32
+
33
+ # Defines the format for parsing received data
34
+ #
35
+ # @return [Reader]
36
+ #
37
+ attr_accessor :reader
38
+
39
+ # Configures the mapping & translation of raw data fields
40
+ #
41
+ # @return [AttributeMap]
42
+ #
43
+ attr_reader :attributes
44
+
45
+ # List of filters for sorting processed records
46
+ #
47
+ # @return [FilterChain]
48
+ #
49
+ # Filters are applied in order, first match will capture the record.
50
+ # Records that don't match any
51
+ #
52
+ attr_reader :filters
53
+
54
+ attr_reader :triggers
55
+
56
+ # Lists of records grouped by filter key
57
+ #
58
+ # @return [Hash{Symbol=>Array}]
59
+ #
60
+ attr_reader :records
61
+
62
+ # List of records not matched by any filter
63
+ #
64
+ # @return [Array<CandidateRecord>]
65
+ #
66
+ attr_reader :unfiltered_records
67
+
68
+ # List of all records, filtered or not
69
+ #
70
+ # @return [Array<CandidateRecord>]
71
+ #
72
+ attr_reader :all_records
73
+
74
+ # Initialize a new job
75
+ #
76
+ # @param [Hash] params
77
+ # @option params [Provider] :provider
78
+ # @option params [Reader] :reader
79
+ # @option params [AttributeMap] :attributes
80
+ # @option params [Array,FilterChain] :filters
81
+ # @yield instance for further configuration or processing
82
+ #
83
+ def initialize(params={}, &block)
84
+ @provider = params[:provider]
85
+ @reader = params[:reader]
86
+ @attributes = params[:attributes]
87
+ @filters = FilterChain.new params[:filters]
88
+ @triggers = Hash.new { |h,k| h[k] = [] }
89
+ @triggers.replace params[:triggers] if params[:triggers]
90
+ yield self if block_given?
91
+ reset
92
+ end
93
+
94
+ # Instantiate a job configured by DSL template file
95
+ #
96
+ # @param template_name [String] File basename from template load path
97
+ # @yield instance for further configuration or processing
98
+ # @see Configuration#template_load_paths
99
+ #
100
+ def self.define(template_name)
101
+ return nil unless template = TemplateFile.read(template_name)
102
+ job = Configurator.new(template, TemplateFile.find(template_name)).to_job
103
+ yield job if block_given?
104
+ job
105
+ end
106
+
107
+ # Fetch data and process it into groups of filtered records
108
+ #
109
+ # @return [Boolean] Success or failure
110
+ #
111
+ def process
112
+ reset
113
+ with_query_caching do
114
+ load_records
115
+ yield @records if block_given?
116
+ end
117
+ provider.errors.empty?
118
+ end
119
+
120
+ # Count of all processed records
121
+ #
122
+ # @!attribute [r] total_records
123
+ # @return [Fixnum]
124
+ #
125
+ def total_records
126
+ @all_records.size
127
+ end
128
+
129
+ # Counts of processed records grouped by filter key
130
+ #
131
+ # @return [Hash{Symbol=>Fixnum}]
132
+ #
133
+ def record_counts
134
+ @records.reduce(Hash.new) { |a, (k,v)| a[k] = v.size; a }
135
+ end
136
+
137
+ def triggers=(new_triggers)
138
+ @triggers.replace new_triggers
139
+ end
140
+
141
+ def trigger(key, *args)
142
+ return nil unless triggers.key?(key)
143
+ triggers[key].each do |c|
144
+ c.call(self, *args)
145
+ end
146
+ end
147
+
148
+ def method_missing(name, *args)
149
+ if triggers.key?(name)
150
+ trigger(name, *args)
151
+ else
152
+ super
153
+ end
154
+ end
155
+
156
+ # Replace existing filters
157
+ #
158
+ # @param new_filters [Array]
159
+ # @return [Stockboy::FilterChain]
160
+ #
161
+ def filters=(new_filters)
162
+ @filters.replace new_filters
163
+ reset
164
+ @filters
165
+ end
166
+
167
+ # Replace existing attribute map
168
+ #
169
+ # @param new_attributes [Stockboy::AttributeMap]
170
+ # @return [Stockboy::AttributeMap]
171
+ #
172
+ def attributes=(new_attributes)
173
+ @attributes = new_attributes
174
+ reset
175
+ @attributes
176
+ end
177
+
178
+ # Has the job been processed successfully?
179
+ #
180
+ # @return [Boolean]
181
+ #
182
+ def processed?
183
+ !!@processed
184
+ end
185
+
186
+ # Overview of the job configuration; tries to be less noisy by hiding
187
+ # sub-element details.
188
+ #
189
+ # @return [String]
190
+ #
191
+ def inspect
192
+ prov = "provider=#{(Stockboy::Providers.all.key(provider.class) || provider.class.to_s).inspect}"
193
+ read = "reader=#{(Stockboy::Readers.all.key(reader.class) || reader.class.to_s).inspect}"
194
+ attr = "attributes=#{attributes.map(&:to)}"
195
+ filt = "filters=#{filters.keys}"
196
+ cnts = "record_counts=#{record_counts}"
197
+ "#<#{self.class}:#{self.object_id} #{[prov, read, attr, filt, cnts].join(', ')}>"
198
+ end
199
+
200
+ private
201
+
202
+ def reset
203
+ @records = filters.reset
204
+ @all_records = []
205
+ @unfiltered_records = []
206
+ @processed = false
207
+ true
208
+ end
209
+
210
+ def load_records
211
+ return unless provider.data
212
+
213
+ @all_records = reader.parse(provider.data).map do |row|
214
+ CandidateRecord.new(row, @attributes)
215
+ end
216
+
217
+ @all_records.each do |record|
218
+ record_partition(record) << record
219
+ end
220
+
221
+ @processed = true
222
+ end
223
+
224
+ def record_partition(record)
225
+ if key = record.partition(filters)
226
+ @records[key]
227
+ else
228
+ @unfiltered_records
229
+ end
230
+ end
231
+
232
+ def with_query_caching(&block)
233
+ if defined? ActiveRecord
234
+ ActiveRecord::Base.cache(&block)
235
+ else
236
+ yield
237
+ end
238
+ end
239
+
240
+ end
241
+ end