stockboy 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +5 -0
  4. data/.yardopts +7 -0
  5. data/CHANGELOG.md +24 -0
  6. data/Gemfile +12 -0
  7. data/Guardfile +10 -0
  8. data/LICENSE +21 -0
  9. data/README.md +293 -0
  10. data/Rakefile +30 -0
  11. data/lib/stockboy.rb +80 -0
  12. data/lib/stockboy/attribute.rb +11 -0
  13. data/lib/stockboy/attribute_map.rb +74 -0
  14. data/lib/stockboy/candidate_record.rb +130 -0
  15. data/lib/stockboy/configuration.rb +62 -0
  16. data/lib/stockboy/configurator.rb +176 -0
  17. data/lib/stockboy/dsl.rb +68 -0
  18. data/lib/stockboy/exceptions.rb +3 -0
  19. data/lib/stockboy/filter.rb +58 -0
  20. data/lib/stockboy/filter_chain.rb +41 -0
  21. data/lib/stockboy/filters.rb +11 -0
  22. data/lib/stockboy/filters/missing_email.rb +37 -0
  23. data/lib/stockboy/job.rb +241 -0
  24. data/lib/stockboy/mapped_record.rb +59 -0
  25. data/lib/stockboy/provider.rb +238 -0
  26. data/lib/stockboy/providers.rb +11 -0
  27. data/lib/stockboy/providers/file.rb +135 -0
  28. data/lib/stockboy/providers/ftp.rb +205 -0
  29. data/lib/stockboy/providers/http.rb +123 -0
  30. data/lib/stockboy/providers/imap.rb +290 -0
  31. data/lib/stockboy/providers/soap.rb +120 -0
  32. data/lib/stockboy/railtie.rb +28 -0
  33. data/lib/stockboy/reader.rb +59 -0
  34. data/lib/stockboy/readers.rb +11 -0
  35. data/lib/stockboy/readers/csv.rb +115 -0
  36. data/lib/stockboy/readers/fixed_width.rb +121 -0
  37. data/lib/stockboy/readers/spreadsheet.rb +144 -0
  38. data/lib/stockboy/readers/xml.rb +155 -0
  39. data/lib/stockboy/registry.rb +42 -0
  40. data/lib/stockboy/source_record.rb +43 -0
  41. data/lib/stockboy/string_pool.rb +35 -0
  42. data/lib/stockboy/template_file.rb +44 -0
  43. data/lib/stockboy/translations.rb +70 -0
  44. data/lib/stockboy/translations/boolean.rb +58 -0
  45. data/lib/stockboy/translations/date.rb +41 -0
  46. data/lib/stockboy/translations/decimal.rb +33 -0
  47. data/lib/stockboy/translations/default_empty_string.rb +38 -0
  48. data/lib/stockboy/translations/default_false.rb +41 -0
  49. data/lib/stockboy/translations/default_nil.rb +38 -0
  50. data/lib/stockboy/translations/default_true.rb +41 -0
  51. data/lib/stockboy/translations/default_zero.rb +41 -0
  52. data/lib/stockboy/translations/integer.rb +33 -0
  53. data/lib/stockboy/translations/string.rb +33 -0
  54. data/lib/stockboy/translations/time.rb +41 -0
  55. data/lib/stockboy/translations/uk_date.rb +51 -0
  56. data/lib/stockboy/translations/us_date.rb +51 -0
  57. data/lib/stockboy/translator.rb +66 -0
  58. data/lib/stockboy/version.rb +3 -0
  59. data/spec/fixtures/.gitkeep +0 -0
  60. data/spec/fixtures/files/a_garbage.csv +1 -0
  61. data/spec/fixtures/files/test_data-20120101.csv +1 -0
  62. data/spec/fixtures/files/test_data-20120202.csv +1 -0
  63. data/spec/fixtures/files/z_garbage.csv +1 -0
  64. data/spec/fixtures/jobs/test_job.rb +1 -0
  65. data/spec/fixtures/soap/get_list/fault.xml +8 -0
  66. data/spec/fixtures/soap/get_list/success.xml +18 -0
  67. data/spec/fixtures/spreadsheets/test_data.xls +0 -0
  68. data/spec/fixtures/spreadsheets/test_row_options.xls +0 -0
  69. data/spec/fixtures/xml/body.xml +14 -0
  70. data/spec/spec_helper.rb +28 -0
  71. data/spec/stockboy/attribute_map_spec.rb +59 -0
  72. data/spec/stockboy/attribute_spec.rb +11 -0
  73. data/spec/stockboy/candidate_record_spec.rb +150 -0
  74. data/spec/stockboy/configuration_spec.rb +28 -0
  75. data/spec/stockboy/configurator_spec.rb +127 -0
  76. data/spec/stockboy/filter_chain_spec.rb +40 -0
  77. data/spec/stockboy/filter_spec.rb +41 -0
  78. data/spec/stockboy/filters/missing_email_spec.rb +26 -0
  79. data/spec/stockboy/filters_spec.rb +38 -0
  80. data/spec/stockboy/job_spec.rb +238 -0
  81. data/spec/stockboy/mapped_record_spec.rb +30 -0
  82. data/spec/stockboy/provider_spec.rb +34 -0
  83. data/spec/stockboy/providers/file_spec.rb +116 -0
  84. data/spec/stockboy/providers/ftp_spec.rb +143 -0
  85. data/spec/stockboy/providers/http_spec.rb +94 -0
  86. data/spec/stockboy/providers/imap_spec.rb +76 -0
  87. data/spec/stockboy/providers/soap_spec.rb +107 -0
  88. data/spec/stockboy/providers_spec.rb +38 -0
  89. data/spec/stockboy/readers/csv_spec.rb +68 -0
  90. data/spec/stockboy/readers/fixed_width_spec.rb +52 -0
  91. data/spec/stockboy/readers/spreadsheet_spec.rb +121 -0
  92. data/spec/stockboy/readers/xml_spec.rb +94 -0
  93. data/spec/stockboy/readers_spec.rb +30 -0
  94. data/spec/stockboy/source_record_spec.rb +19 -0
  95. data/spec/stockboy/template_file_spec.rb +30 -0
  96. data/spec/stockboy/translations/boolean_spec.rb +48 -0
  97. data/spec/stockboy/translations/date_spec.rb +38 -0
  98. data/spec/stockboy/translations/decimal_spec.rb +23 -0
  99. data/spec/stockboy/translations/default_empty_string_spec.rb +32 -0
  100. data/spec/stockboy/translations/default_false_spec.rb +25 -0
  101. data/spec/stockboy/translations/default_nil_spec.rb +32 -0
  102. data/spec/stockboy/translations/default_true_spec.rb +25 -0
  103. data/spec/stockboy/translations/default_zero_spec.rb +32 -0
  104. data/spec/stockboy/translations/integer_spec.rb +22 -0
  105. data/spec/stockboy/translations/string_spec.rb +22 -0
  106. data/spec/stockboy/translations/time_spec.rb +27 -0
  107. data/spec/stockboy/translations/uk_date_spec.rb +37 -0
  108. data/spec/stockboy/translations/us_date_spec.rb +37 -0
  109. data/spec/stockboy/translations_spec.rb +55 -0
  110. data/spec/stockboy/translator_spec.rb +27 -0
  111. data/stockboy.gemspec +32 -0
  112. metadata +305 -0
@@ -0,0 +1,28 @@
1
+ require 'stockboy'
2
+
3
+ # When loaded in a Rails project, Stockboy will assume the following defaults:
4
+ #
5
+ # == Configuration file
6
+ #
7
+ # If a file under +config/stockboy.rb+ exists, it will be loaded for setting up
8
+ # the app-specific configuration options, like paths or registering custom
9
+ # readers, filters, or providers.
10
+ #
11
+ # == Default template load paths
12
+ #
13
+ # +config/stockboy_jobs+ Will be set up as the default template load path.
14
+ # This can be changed in the config file.
15
+ #
16
+ class Railtie < Rails::Railtie
17
+
18
+ initializer "stockboy.configure_rails_initialization" do
19
+ Stockboy.configure do |config|
20
+ config.template_load_paths = [Rails.root.join('config/stockboy_jobs')]
21
+ end
22
+
23
+ if File.exists?(config_file = Rails.root.join("config", "stockboy.rb"))
24
+ require config_file
25
+ end
26
+ end
27
+
28
+ end
@@ -0,0 +1,59 @@
1
+ require 'stockboy/dsl'
2
+
3
+ module Stockboy
4
+
5
+ # Abstract class for defining data readers
6
+ #
7
+ # == Interface
8
+ #
9
+ # A reader must implement a +parse+ method for extracting an array of records
10
+ # from raw data. At this stage no data transformation is performed, only
11
+ # extracting field tokens for each record, based on the specific data
12
+ # serialization.
13
+ #
14
+ # String keys should be preferred, since these may be specified by the user;
15
+ # external inputs should not be symbolized (because symbols are never GC'd).
16
+ # Frozen strings for keys are a good idea, of course.
17
+ #
18
+ # @example
19
+ # reader.parse("name,email\nArthur Dent,arthur@example.com")
20
+ # # => [{"name" => "Arthur Dent", "email" => "arthur@example.com"}]
21
+ #
22
+ # @abstract
23
+ #
24
+ class Reader
25
+ extend Stockboy::DSL
26
+
27
+ # Initialize a new reader
28
+ #
29
+ # @param [Hash] opts
30
+ #
31
+ def initialize(opts={})
32
+ @encoding = opts.delete(:encoding)
33
+ end
34
+
35
+ # Take raw input (String) and extract an array of records
36
+ #
37
+ # @return [Array<Hash>]
38
+ #
39
+ def parse(data)
40
+ raise NoMethodError, "#{self.class}#parse needs implementation"
41
+ end
42
+
43
+ end
44
+
45
+
46
+ # @!macro [new] reader.skip_row_options
47
+ # [skip_header_rows]
48
+ # If the file has a preamble before actual data to be ignored
49
+ # skip_header_rows 4
50
+ # [skip_header_rows]
51
+ # If the file has a summary or footer to be ignored
52
+ # skip_footer_rows 4
53
+
54
+ # @!macro [new] reader.encoding_options
55
+ # [encoding]
56
+ # String encoding format of the source data. All readers output UTF-8.
57
+ # encoding 'Windows-1252'
58
+
59
+ end
@@ -0,0 +1,11 @@
1
+ require 'stockboy/registry'
2
+
3
+ module Stockboy
4
+
5
+ # Registry of available readers
6
+ #
7
+ module Readers
8
+ extend Stockboy::Registry
9
+ end
10
+
11
+ end
@@ -0,0 +1,115 @@
1
+ require 'stockboy/configuration'
2
+ require 'stockboy/reader'
3
+ require 'csv'
4
+
5
+ module Stockboy::Readers
6
+
7
+ # Parse data from CSV into hashes
8
+ #
9
+ # All standard ::CSV options are respected and passed through
10
+ #
11
+ # @see
12
+ # http://www.ruby-doc.org/stdlib-2.0.0/libdoc/csv/rdoc/CSV.html#DEFAULT_OPTIONS
13
+ #
14
+ class CSV < Stockboy::Reader
15
+
16
+ # @!group Options
17
+
18
+ # Override source file encoding
19
+ #
20
+ # @!attribute [rw] encoding
21
+ # @return [String]
22
+ #
23
+ dsl_attr :encoding
24
+
25
+ # Skip number of rows at start of file before data starts
26
+ #
27
+ # @!attribute [rw] skip_header_rows
28
+ # @return [Fixnum]
29
+ #
30
+ dsl_attr :skip_header_rows
31
+
32
+ # Skip number of rows at end of file after data ends
33
+ #
34
+ # @!attribute [rw] skip_footer_rows
35
+ # @return [Fixnum]
36
+ #
37
+ dsl_attr :skip_footer_rows
38
+
39
+ # @!attribute [rw] col_sep
40
+ # @macro dsl_attr
41
+ # @return [String]
42
+ #
43
+ # @!attribute [rw] row_sep
44
+ # @macro dsl_attr
45
+ # @return [String]
46
+ #
47
+ # @!attribute [rw] quote_char
48
+ # @macro dsl_attr
49
+ # @return [String]
50
+ #
51
+ # @!attribute [rw] headers
52
+ # @macro dsl_attr
53
+ # @return [Array, String]
54
+ #
55
+ ::CSV::DEFAULT_OPTIONS.keys.each do |opt|
56
+ dsl_attr opt, attr_accessor: false
57
+ define_method(opt) { @csv_options[opt] }
58
+ define_method(:"#{opt}=") { |value| @csv_options[opt] = value }
59
+ end
60
+
61
+ # @!endgroup
62
+
63
+ # Initialize a new CSV reader
64
+ #
65
+ # All stdlib ::CSV options are respected.
66
+ # @see http://ruby-doc.org/stdlib-2.0.0/libdoc/csv/rdoc/CSV.html#method-c-new
67
+ #
68
+ # @param [Hash] opts
69
+ #
70
+ def initialize(opts={}, &block)
71
+ super
72
+ @csv_options = opts.reject {|k,v| !::CSV::DEFAULT_OPTIONS.keys.include?(k) }
73
+ @csv_options[:headers] = @csv_options.fetch(:headers, true)
74
+ @skip_header_rows = opts.fetch(:skip_header_rows, 0)
75
+ @skip_footer_rows = opts.fetch(:skip_footer_rows, 0)
76
+ DSL.new(self).instance_eval(&block) if block_given?
77
+ end
78
+
79
+ def parse(data)
80
+ chain = options[:header_converters] || []
81
+ chain << proc{ |h| h.freeze }
82
+ opts = options.merge(header_converters: chain)
83
+ ::CSV.parse(sanitize(data), opts).map &:to_hash
84
+ end
85
+
86
+ # Hash of all CSV-specific options
87
+ #
88
+ # @!attribute [r] options
89
+ # @return [Hash]
90
+ #
91
+ def options
92
+ @csv_options
93
+ end
94
+
95
+ private
96
+
97
+ def sanitize(data)
98
+ data.force_encoding(encoding) if encoding
99
+ data = data.encode(universal_newline: true)
100
+ .delete(0.chr)
101
+ .chomp
102
+ from = row_start_index(data, skip_header_rows)
103
+ to = row_end_index(data, skip_footer_rows)
104
+ data[from..to]
105
+ end
106
+
107
+ def row_start_index(data, skip_rows)
108
+ Array.new(skip_rows).inject(0) { |i| data.index(/$/, i) + 1 }
109
+ end
110
+
111
+ def row_end_index(data, skip_rows)
112
+ Array.new(skip_rows).inject(-1) { |i| data.rindex(/$/, i) - 1 }
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,121 @@
1
+ require 'stockboy/reader'
2
+ require 'stockboy/configuration'
3
+
4
+ module Stockboy::Readers
5
+
6
+ # For reading fixed-width data split by column widths
7
+ #
8
+ class FixedWidth < Stockboy::Reader
9
+
10
+ # @!group Options
11
+
12
+ # Widths of data columns with optional names
13
+ #
14
+ # Array format will use numeric indexes for field keys. Hash will use the
15
+ # keys for naming the fields.
16
+ #
17
+ # @return [Array<Fixnum>, Hash{Object=>Fixnum}]
18
+ # @example
19
+ # reader.headers = [10, 5, 10, 42]
20
+ # reader.parse(data)
21
+ # #=> [{0=>"Arthur", 1=>"42", 2=>"Earth", 3=>""}]
22
+ #
23
+ # reader.headers = {name: 10, age: 5, planet: 10, notes: 42}
24
+ # reader.parse(data)
25
+ # #=> [{name: "Arthur", age: "42", planet: "Earth", notes: ""}]
26
+ #
27
+ dsl_attr :headers
28
+
29
+ # String format used for unpacking rows
30
+ #
31
+ # This is read from the {#headers} attribute by default but can be
32
+ # overridden
33
+ #
34
+ # @return [String]
35
+ #
36
+ dsl_attr :skip_header_rows
37
+
38
+ # Number of file rows to skip from start of file
39
+ #
40
+ # Useful if the file starts with a preamble or header metadata
41
+ #
42
+ # @return [Fixnum]
43
+ #
44
+ dsl_attr :skip_footer_rows
45
+
46
+ # Number of file rows to skip at end of file
47
+ #
48
+ # Useful if the file ends with a summary or notice
49
+ #
50
+ # @return [Fixnum]
51
+ #
52
+ dsl_attr :row_format
53
+
54
+ # Override original file encoding
55
+ #
56
+ # @return [String]
57
+ #
58
+ dsl_attr :encoding
59
+
60
+ # @!endgroup
61
+
62
+ # Initialize a new fixed-width reader
63
+ #
64
+ # @param [Hash] opts
65
+ # @option opts [Array<Fixnum>, Hash<Fixnum>] headers
66
+ # @option opts [Fixnum] skip_header_rows
67
+ # @option opts [Fixnum] skip_footer_rows
68
+ # @option opts [String] encoding
69
+ #
70
+ def initialize(opts={}, &block)
71
+ super
72
+ @headers = opts[:headers]
73
+ @skip_header_rows = opts.fetch(:skip_header_rows, 0)
74
+ @skip_footer_rows = opts.fetch(:skip_footer_rows, 0)
75
+ DSL.new(self).instance_eval(&block) if block_given?
76
+ end
77
+
78
+ def parse(data)
79
+ @column_widths, @column_keys = nil, nil
80
+ data.force_encoding!(encoding) if encoding
81
+ data = StringIO.new(data) unless data.is_a? StringIO
82
+ skip_header_rows.times { data.readline }
83
+ records = data.reduce([]) do |a, row|
84
+ a.tap { a << parse_row(row) unless row.strip.empty? }
85
+ end
86
+ skip_footer_rows.times { records.pop }
87
+ records
88
+ end
89
+
90
+ def row_format
91
+ @row_format ||= (?A << column_widths.join(?A)).freeze
92
+ end
93
+
94
+ private
95
+
96
+ def column_widths
97
+ return @column_widths if @column_widths
98
+ @column_widths = case headers
99
+ when Hash then headers.values
100
+ when Array then headers
101
+ else
102
+ raise "Invalid headers set for #{self.class}"
103
+ end
104
+ end
105
+
106
+ def column_keys
107
+ return @column_keys if @column_keys
108
+ @column_keys = case headers
109
+ when Hash then headers.keys.map(&:freeze)
110
+ when Array then (0 ... headers.length).to_a
111
+ else
112
+ raise "Invalid headers set for #{self.class}"
113
+ end
114
+ end
115
+
116
+ def parse_row(row)
117
+ Hash[column_keys.zip(row.unpack(row_format))]
118
+ end
119
+
120
+ end
121
+ end
@@ -0,0 +1,144 @@
1
+ require 'stockboy/reader'
2
+ require 'tempfile'
3
+ require 'roo'
4
+
5
+ module Stockboy::Readers
6
+
7
+ # Parse an Excel spreadsheet
8
+ #
9
+ # Backed by Roo gem. See roo for other configuration options.
10
+ #
11
+ class Spreadsheet < Stockboy::Reader
12
+
13
+ # Spreadsheet format
14
+ #
15
+ # @!attribute [rw] format
16
+ # @return [Symbol] +:xls+ or +:xslx+
17
+ #
18
+ dsl_attr :format
19
+
20
+ # Spreadsheet sheet number, defaults to first
21
+ #
22
+ # @!attribute [rw] sheet
23
+ # @return [Fixnum]
24
+ #
25
+ dsl_attr :sheet
26
+
27
+ # Line number to look for headers, starts counting at 1, like in Excel
28
+ #
29
+ # @!attribute [rw] header_row
30
+ # @return [Fixnum]
31
+ #
32
+ dsl_attr :header_row
33
+
34
+ # Line number of first data row, starts counting at 1, like in Excel
35
+ #
36
+ # @!attribute [rw] first_row
37
+ # @return [Fixnum]
38
+ #
39
+ dsl_attr :first_row
40
+
41
+ # Line number of last data row, use negative numbers to count back from end
42
+ #
43
+ # @!attribute [rw] last_row
44
+ # @return [Fixnum]
45
+ #
46
+ dsl_attr :last_row
47
+
48
+ # Override to set headers manually
49
+ #
50
+ # @!attribute [rw] headers
51
+ # @return [Array]
52
+ #
53
+ dsl_attr :headers
54
+
55
+ # @!endgroup
56
+
57
+ # Initialize a new Spreadsheet reader
58
+ #
59
+ # @param [Hash] opts
60
+ #
61
+ def initialize(opts={}, &block)
62
+ super
63
+ @format = opts[:format] || :xls
64
+ @sheet = opts[:sheet] || :first
65
+ @first_row = opts[:first_row]
66
+ @last_row = opts[:last_row]
67
+ @header_row = opts[:header_row]
68
+ @headers = opts[:headers]
69
+ @roo_options = opts[:roo_options] || {}
70
+ DSL.new(self).instance_eval(&block) if block_given?
71
+ end
72
+
73
+ def parse(content)
74
+ with_spreadsheet_tempfile(content) do |table|
75
+ headers = table_headers(table)
76
+
77
+ enum_data_rows(table).inject([]) do |rows, i|
78
+ rows << Hash[headers.zip(table.row(i))]
79
+ end
80
+ end
81
+ end
82
+
83
+ # Roo-specific options hash passed to underlying spreadsheet parser
84
+ #
85
+ # @!attribute [r] options
86
+ # @return [Hash]
87
+ #
88
+ def options
89
+ @roo_options
90
+ end
91
+
92
+ private
93
+
94
+ def enum_data_rows(table)
95
+ first_table_row(table).upto last_table_row(table)
96
+ end
97
+
98
+ def with_spreadsheet_tempfile(content)
99
+ Tempfile.open(tmp_name, Stockboy.configuration.tmp_dir) do |file|
100
+ file.binmode
101
+ file.write content
102
+ table = Roo::Spreadsheet.open(file.path, @roo_options)
103
+ table.default_sheet = sheet_number(table, @sheet)
104
+ table.header_line = @header_line if @header_line
105
+ yield table
106
+ end
107
+ end
108
+
109
+ def sheet_number(table, id)
110
+ case id
111
+ when Symbol then table.sheets.public_send id
112
+ when Fixnum then table.sheets[id-1]
113
+ when String then id
114
+ end
115
+ end
116
+
117
+ def first_table_row(table)
118
+ @first_row || table.first_row
119
+ end
120
+
121
+ def last_table_row(table)
122
+ if @last_row.to_i < 0
123
+ table.last_row + @last_row + 1
124
+ elsif @last_row.to_i > 0
125
+ @last_row
126
+ else
127
+ table.last_row
128
+ end
129
+ end
130
+
131
+ def table_headers(table)
132
+ return @headers if @headers
133
+ table.row(table_header_row(table)).map { |h| h.to_s unless h.nil? }
134
+ end
135
+
136
+ def table_header_row(table)
137
+ [table.header_line, table.first_row].max
138
+ end
139
+
140
+ def tmp_name
141
+ ['stockboy', ".#{@format}"]
142
+ end
143
+ end
144
+ end