stockboy 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (112) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +5 -0
  4. data/.yardopts +7 -0
  5. data/CHANGELOG.md +24 -0
  6. data/Gemfile +12 -0
  7. data/Guardfile +10 -0
  8. data/LICENSE +21 -0
  9. data/README.md +293 -0
  10. data/Rakefile +30 -0
  11. data/lib/stockboy.rb +80 -0
  12. data/lib/stockboy/attribute.rb +11 -0
  13. data/lib/stockboy/attribute_map.rb +74 -0
  14. data/lib/stockboy/candidate_record.rb +130 -0
  15. data/lib/stockboy/configuration.rb +62 -0
  16. data/lib/stockboy/configurator.rb +176 -0
  17. data/lib/stockboy/dsl.rb +68 -0
  18. data/lib/stockboy/exceptions.rb +3 -0
  19. data/lib/stockboy/filter.rb +58 -0
  20. data/lib/stockboy/filter_chain.rb +41 -0
  21. data/lib/stockboy/filters.rb +11 -0
  22. data/lib/stockboy/filters/missing_email.rb +37 -0
  23. data/lib/stockboy/job.rb +241 -0
  24. data/lib/stockboy/mapped_record.rb +59 -0
  25. data/lib/stockboy/provider.rb +238 -0
  26. data/lib/stockboy/providers.rb +11 -0
  27. data/lib/stockboy/providers/file.rb +135 -0
  28. data/lib/stockboy/providers/ftp.rb +205 -0
  29. data/lib/stockboy/providers/http.rb +123 -0
  30. data/lib/stockboy/providers/imap.rb +290 -0
  31. data/lib/stockboy/providers/soap.rb +120 -0
  32. data/lib/stockboy/railtie.rb +28 -0
  33. data/lib/stockboy/reader.rb +59 -0
  34. data/lib/stockboy/readers.rb +11 -0
  35. data/lib/stockboy/readers/csv.rb +115 -0
  36. data/lib/stockboy/readers/fixed_width.rb +121 -0
  37. data/lib/stockboy/readers/spreadsheet.rb +144 -0
  38. data/lib/stockboy/readers/xml.rb +155 -0
  39. data/lib/stockboy/registry.rb +42 -0
  40. data/lib/stockboy/source_record.rb +43 -0
  41. data/lib/stockboy/string_pool.rb +35 -0
  42. data/lib/stockboy/template_file.rb +44 -0
  43. data/lib/stockboy/translations.rb +70 -0
  44. data/lib/stockboy/translations/boolean.rb +58 -0
  45. data/lib/stockboy/translations/date.rb +41 -0
  46. data/lib/stockboy/translations/decimal.rb +33 -0
  47. data/lib/stockboy/translations/default_empty_string.rb +38 -0
  48. data/lib/stockboy/translations/default_false.rb +41 -0
  49. data/lib/stockboy/translations/default_nil.rb +38 -0
  50. data/lib/stockboy/translations/default_true.rb +41 -0
  51. data/lib/stockboy/translations/default_zero.rb +41 -0
  52. data/lib/stockboy/translations/integer.rb +33 -0
  53. data/lib/stockboy/translations/string.rb +33 -0
  54. data/lib/stockboy/translations/time.rb +41 -0
  55. data/lib/stockboy/translations/uk_date.rb +51 -0
  56. data/lib/stockboy/translations/us_date.rb +51 -0
  57. data/lib/stockboy/translator.rb +66 -0
  58. data/lib/stockboy/version.rb +3 -0
  59. data/spec/fixtures/.gitkeep +0 -0
  60. data/spec/fixtures/files/a_garbage.csv +1 -0
  61. data/spec/fixtures/files/test_data-20120101.csv +1 -0
  62. data/spec/fixtures/files/test_data-20120202.csv +1 -0
  63. data/spec/fixtures/files/z_garbage.csv +1 -0
  64. data/spec/fixtures/jobs/test_job.rb +1 -0
  65. data/spec/fixtures/soap/get_list/fault.xml +8 -0
  66. data/spec/fixtures/soap/get_list/success.xml +18 -0
  67. data/spec/fixtures/spreadsheets/test_data.xls +0 -0
  68. data/spec/fixtures/spreadsheets/test_row_options.xls +0 -0
  69. data/spec/fixtures/xml/body.xml +14 -0
  70. data/spec/spec_helper.rb +28 -0
  71. data/spec/stockboy/attribute_map_spec.rb +59 -0
  72. data/spec/stockboy/attribute_spec.rb +11 -0
  73. data/spec/stockboy/candidate_record_spec.rb +150 -0
  74. data/spec/stockboy/configuration_spec.rb +28 -0
  75. data/spec/stockboy/configurator_spec.rb +127 -0
  76. data/spec/stockboy/filter_chain_spec.rb +40 -0
  77. data/spec/stockboy/filter_spec.rb +41 -0
  78. data/spec/stockboy/filters/missing_email_spec.rb +26 -0
  79. data/spec/stockboy/filters_spec.rb +38 -0
  80. data/spec/stockboy/job_spec.rb +238 -0
  81. data/spec/stockboy/mapped_record_spec.rb +30 -0
  82. data/spec/stockboy/provider_spec.rb +34 -0
  83. data/spec/stockboy/providers/file_spec.rb +116 -0
  84. data/spec/stockboy/providers/ftp_spec.rb +143 -0
  85. data/spec/stockboy/providers/http_spec.rb +94 -0
  86. data/spec/stockboy/providers/imap_spec.rb +76 -0
  87. data/spec/stockboy/providers/soap_spec.rb +107 -0
  88. data/spec/stockboy/providers_spec.rb +38 -0
  89. data/spec/stockboy/readers/csv_spec.rb +68 -0
  90. data/spec/stockboy/readers/fixed_width_spec.rb +52 -0
  91. data/spec/stockboy/readers/spreadsheet_spec.rb +121 -0
  92. data/spec/stockboy/readers/xml_spec.rb +94 -0
  93. data/spec/stockboy/readers_spec.rb +30 -0
  94. data/spec/stockboy/source_record_spec.rb +19 -0
  95. data/spec/stockboy/template_file_spec.rb +30 -0
  96. data/spec/stockboy/translations/boolean_spec.rb +48 -0
  97. data/spec/stockboy/translations/date_spec.rb +38 -0
  98. data/spec/stockboy/translations/decimal_spec.rb +23 -0
  99. data/spec/stockboy/translations/default_empty_string_spec.rb +32 -0
  100. data/spec/stockboy/translations/default_false_spec.rb +25 -0
  101. data/spec/stockboy/translations/default_nil_spec.rb +32 -0
  102. data/spec/stockboy/translations/default_true_spec.rb +25 -0
  103. data/spec/stockboy/translations/default_zero_spec.rb +32 -0
  104. data/spec/stockboy/translations/integer_spec.rb +22 -0
  105. data/spec/stockboy/translations/string_spec.rb +22 -0
  106. data/spec/stockboy/translations/time_spec.rb +27 -0
  107. data/spec/stockboy/translations/uk_date_spec.rb +37 -0
  108. data/spec/stockboy/translations/us_date_spec.rb +37 -0
  109. data/spec/stockboy/translations_spec.rb +55 -0
  110. data/spec/stockboy/translator_spec.rb +27 -0
  111. data/stockboy.gemspec +32 -0
  112. metadata +305 -0
@@ -0,0 +1,28 @@
1
+ require 'stockboy'
2
+
3
+ # When loaded in a Rails project, Stockboy will assume the following defaults:
4
+ #
5
+ # == Configuration file
6
+ #
7
+ # If a file under +config/stockboy.rb+ exists, it will be loaded for setting up
8
+ # the app-specific configuration options, like paths or registering custom
9
+ # readers, filters, or providers.
10
+ #
11
+ # == Default template load paths
12
+ #
13
+ # +config/stockboy_jobs+ Will be set up as the default template load path.
14
+ # This can be changed in the config file.
15
+ #
16
+ class Railtie < Rails::Railtie
17
+
18
+ initializer "stockboy.configure_rails_initialization" do
19
+ Stockboy.configure do |config|
20
+ config.template_load_paths = [Rails.root.join('config/stockboy_jobs')]
21
+ end
22
+
23
+ if File.exists?(config_file = Rails.root.join("config", "stockboy.rb"))
24
+ require config_file
25
+ end
26
+ end
27
+
28
+ end
@@ -0,0 +1,59 @@
1
+ require 'stockboy/dsl'
2
+
3
+ module Stockboy
4
+
5
+ # Abstract class for defining data readers
6
+ #
7
+ # == Interface
8
+ #
9
+ # A reader must implement a +parse+ method for extracting an array of records
10
+ # from raw data. At this stage no data transformation is performed, only
11
+ # extracting field tokens for each record, based on the specific data
12
+ # serialization.
13
+ #
14
+ # String keys should be preferred, since these may be specified by the user;
15
+ # external inputs should not be symbolized (because symbols are never GC'd).
16
+ # Frozen strings for keys are a good idea, of course.
17
+ #
18
+ # @example
19
+ # reader.parse("name,email\nArthur Dent,arthur@example.com")
20
+ # # => [{"name" => "Arthur Dent", "email" => "arthur@example.com"}]
21
+ #
22
+ # @abstract
23
+ #
24
+ class Reader
25
+ extend Stockboy::DSL
26
+
27
+ # Initialize a new reader
28
+ #
29
+ # @param [Hash] opts
30
+ #
31
+ def initialize(opts={})
32
+ @encoding = opts.delete(:encoding)
33
+ end
34
+
35
+ # Take raw input (String) and extract an array of records
36
+ #
37
+ # @return [Array<Hash>]
38
+ #
39
+ def parse(data)
40
+ raise NoMethodError, "#{self.class}#parse needs implementation"
41
+ end
42
+
43
+ end
44
+
45
+
46
+ # @!macro [new] reader.skip_row_options
47
+ # [skip_header_rows]
48
+ # If the file has a preamble before actual data to be ignored
49
+ # skip_header_rows 4
50
+ # [skip_header_rows]
51
+ # If the file has a summary or footer to be ignored
52
+ # skip_footer_rows 4
53
+
54
+ # @!macro [new] reader.encoding_options
55
+ # [encoding]
56
+ # String encoding format of the source data. All readers output UTF-8.
57
+ # encoding 'Windows-1252'
58
+
59
+ end
@@ -0,0 +1,11 @@
1
+ require 'stockboy/registry'
2
+
3
+ module Stockboy
4
+
5
+ # Registry of available readers
6
+ #
7
+ module Readers
8
+ extend Stockboy::Registry
9
+ end
10
+
11
+ end
@@ -0,0 +1,115 @@
1
+ require 'stockboy/configuration'
2
+ require 'stockboy/reader'
3
+ require 'csv'
4
+
5
+ module Stockboy::Readers
6
+
7
+ # Parse data from CSV into hashes
8
+ #
9
+ # All standard ::CSV options are respected and passed through
10
+ #
11
+ # @see
12
+ # http://www.ruby-doc.org/stdlib-2.0.0/libdoc/csv/rdoc/CSV.html#DEFAULT_OPTIONS
13
+ #
14
+ class CSV < Stockboy::Reader
15
+
16
+ # @!group Options
17
+
18
+ # Override source file encoding
19
+ #
20
+ # @!attribute [rw] encoding
21
+ # @return [String]
22
+ #
23
+ dsl_attr :encoding
24
+
25
+ # Skip number of rows at start of file before data starts
26
+ #
27
+ # @!attribute [rw] skip_header_rows
28
+ # @return [Fixnum]
29
+ #
30
+ dsl_attr :skip_header_rows
31
+
32
+ # Skip number of rows at end of file after data ends
33
+ #
34
+ # @!attribute [rw] skip_footer_rows
35
+ # @return [Fixnum]
36
+ #
37
+ dsl_attr :skip_footer_rows
38
+
39
+ # @!attribute [rw] col_sep
40
+ # @macro dsl_attr
41
+ # @return [String]
42
+ #
43
+ # @!attribute [rw] row_sep
44
+ # @macro dsl_attr
45
+ # @return [String]
46
+ #
47
+ # @!attribute [rw] quote_char
48
+ # @macro dsl_attr
49
+ # @return [String]
50
+ #
51
+ # @!attribute [rw] headers
52
+ # @macro dsl_attr
53
+ # @return [Array, String]
54
+ #
55
+ ::CSV::DEFAULT_OPTIONS.keys.each do |opt|
56
+ dsl_attr opt, attr_accessor: false
57
+ define_method(opt) { @csv_options[opt] }
58
+ define_method(:"#{opt}=") { |value| @csv_options[opt] = value }
59
+ end
60
+
61
+ # @!endgroup
62
+
63
+ # Initialize a new CSV reader
64
+ #
65
+ # All stdlib ::CSV options are respected.
66
+ # @see http://ruby-doc.org/stdlib-2.0.0/libdoc/csv/rdoc/CSV.html#method-c-new
67
+ #
68
+ # @param [Hash] opts
69
+ #
70
+ def initialize(opts={}, &block)
71
+ super
72
+ @csv_options = opts.reject {|k,v| !::CSV::DEFAULT_OPTIONS.keys.include?(k) }
73
+ @csv_options[:headers] = @csv_options.fetch(:headers, true)
74
+ @skip_header_rows = opts.fetch(:skip_header_rows, 0)
75
+ @skip_footer_rows = opts.fetch(:skip_footer_rows, 0)
76
+ DSL.new(self).instance_eval(&block) if block_given?
77
+ end
78
+
79
+ def parse(data)
80
+ chain = options[:header_converters] || []
81
+ chain << proc{ |h| h.freeze }
82
+ opts = options.merge(header_converters: chain)
83
+ ::CSV.parse(sanitize(data), opts).map &:to_hash
84
+ end
85
+
86
+ # Hash of all CSV-specific options
87
+ #
88
+ # @!attribute [r] options
89
+ # @return [Hash]
90
+ #
91
+ def options
92
+ @csv_options
93
+ end
94
+
95
+ private
96
+
97
+ def sanitize(data)
98
+ data.force_encoding(encoding) if encoding
99
+ data = data.encode(universal_newline: true)
100
+ .delete(0.chr)
101
+ .chomp
102
+ from = row_start_index(data, skip_header_rows)
103
+ to = row_end_index(data, skip_footer_rows)
104
+ data[from..to]
105
+ end
106
+
107
+ def row_start_index(data, skip_rows)
108
+ Array.new(skip_rows).inject(0) { |i| data.index(/$/, i) + 1 }
109
+ end
110
+
111
+ def row_end_index(data, skip_rows)
112
+ Array.new(skip_rows).inject(-1) { |i| data.rindex(/$/, i) - 1 }
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,121 @@
1
+ require 'stockboy/reader'
2
+ require 'stockboy/configuration'
3
+
4
+ module Stockboy::Readers
5
+
6
+ # For reading fixed-width data split by column widths
7
+ #
8
+ class FixedWidth < Stockboy::Reader
9
+
10
+ # @!group Options
11
+
12
+ # Widths of data columns with optional names
13
+ #
14
+ # Array format will use numeric indexes for field keys. Hash will use the
15
+ # keys for naming the fields.
16
+ #
17
+ # @return [Array<Fixnum>, Hash{Object=>Fixnum}]
18
+ # @example
19
+ # reader.headers = [10, 5, 10, 42]
20
+ # reader.parse(data)
21
+ # #=> [{0=>"Arthur", 1=>"42", 2=>"Earth", 3=>""}]
22
+ #
23
+ # reader.headers = {name: 10, age: 5, planet: 10, notes: 42}
24
+ # reader.parse(data)
25
+ # #=> [{name: "Arthur", age: "42", planet: "Earth", notes: ""}]
26
+ #
27
+ dsl_attr :headers
28
+
29
+ # String format used for unpacking rows
30
+ #
31
+ # This is read from the {#headers} attribute by default but can be
32
+ # overridden
33
+ #
34
+ # @return [String]
35
+ #
36
+ dsl_attr :skip_header_rows
37
+
38
+ # Number of file rows to skip from start of file
39
+ #
40
+ # Useful if the file starts with a preamble or header metadata
41
+ #
42
+ # @return [Fixnum]
43
+ #
44
+ dsl_attr :skip_footer_rows
45
+
46
+ # Number of file rows to skip at end of file
47
+ #
48
+ # Useful if the file ends with a summary or notice
49
+ #
50
+ # @return [Fixnum]
51
+ #
52
+ dsl_attr :row_format
53
+
54
+ # Override original file encoding
55
+ #
56
+ # @return [String]
57
+ #
58
+ dsl_attr :encoding
59
+
60
+ # @!endgroup
61
+
62
+ # Initialize a new fixed-width reader
63
+ #
64
+ # @param [Hash] opts
65
+ # @option opts [Array<Fixnum>, Hash<Fixnum>] headers
66
+ # @option opts [Fixnum] skip_header_rows
67
+ # @option opts [Fixnum] skip_footer_rows
68
+ # @option opts [String] encoding
69
+ #
70
+ def initialize(opts={}, &block)
71
+ super
72
+ @headers = opts[:headers]
73
+ @skip_header_rows = opts.fetch(:skip_header_rows, 0)
74
+ @skip_footer_rows = opts.fetch(:skip_footer_rows, 0)
75
+ DSL.new(self).instance_eval(&block) if block_given?
76
+ end
77
+
78
+ def parse(data)
79
+ @column_widths, @column_keys = nil, nil
80
+ data.force_encoding!(encoding) if encoding
81
+ data = StringIO.new(data) unless data.is_a? StringIO
82
+ skip_header_rows.times { data.readline }
83
+ records = data.reduce([]) do |a, row|
84
+ a.tap { a << parse_row(row) unless row.strip.empty? }
85
+ end
86
+ skip_footer_rows.times { records.pop }
87
+ records
88
+ end
89
+
90
+ def row_format
91
+ @row_format ||= (?A << column_widths.join(?A)).freeze
92
+ end
93
+
94
+ private
95
+
96
+ def column_widths
97
+ return @column_widths if @column_widths
98
+ @column_widths = case headers
99
+ when Hash then headers.values
100
+ when Array then headers
101
+ else
102
+ raise "Invalid headers set for #{self.class}"
103
+ end
104
+ end
105
+
106
+ def column_keys
107
+ return @column_keys if @column_keys
108
+ @column_keys = case headers
109
+ when Hash then headers.keys.map(&:freeze)
110
+ when Array then (0 ... headers.length).to_a
111
+ else
112
+ raise "Invalid headers set for #{self.class}"
113
+ end
114
+ end
115
+
116
+ def parse_row(row)
117
+ Hash[column_keys.zip(row.unpack(row_format))]
118
+ end
119
+
120
+ end
121
+ end
@@ -0,0 +1,144 @@
1
+ require 'stockboy/reader'
2
+ require 'tempfile'
3
+ require 'roo'
4
+
5
+ module Stockboy::Readers
6
+
7
+ # Parse an Excel spreadsheet
8
+ #
9
+ # Backed by Roo gem. See roo for other configuration options.
10
+ #
11
+ class Spreadsheet < Stockboy::Reader
12
+
13
+ # Spreadsheet format
14
+ #
15
+ # @!attribute [rw] format
16
+ # @return [Symbol] +:xls+ or +:xslx+
17
+ #
18
+ dsl_attr :format
19
+
20
+ # Spreadsheet sheet number, defaults to first
21
+ #
22
+ # @!attribute [rw] sheet
23
+ # @return [Fixnum]
24
+ #
25
+ dsl_attr :sheet
26
+
27
+ # Line number to look for headers, starts counting at 1, like in Excel
28
+ #
29
+ # @!attribute [rw] header_row
30
+ # @return [Fixnum]
31
+ #
32
+ dsl_attr :header_row
33
+
34
+ # Line number of first data row, starts counting at 1, like in Excel
35
+ #
36
+ # @!attribute [rw] first_row
37
+ # @return [Fixnum]
38
+ #
39
+ dsl_attr :first_row
40
+
41
+ # Line number of last data row, use negative numbers to count back from end
42
+ #
43
+ # @!attribute [rw] last_row
44
+ # @return [Fixnum]
45
+ #
46
+ dsl_attr :last_row
47
+
48
+ # Override to set headers manually
49
+ #
50
+ # @!attribute [rw] headers
51
+ # @return [Array]
52
+ #
53
+ dsl_attr :headers
54
+
55
+ # @!endgroup
56
+
57
+ # Initialize a new Spreadsheet reader
58
+ #
59
+ # @param [Hash] opts
60
+ #
61
+ def initialize(opts={}, &block)
62
+ super
63
+ @format = opts[:format] || :xls
64
+ @sheet = opts[:sheet] || :first
65
+ @first_row = opts[:first_row]
66
+ @last_row = opts[:last_row]
67
+ @header_row = opts[:header_row]
68
+ @headers = opts[:headers]
69
+ @roo_options = opts[:roo_options] || {}
70
+ DSL.new(self).instance_eval(&block) if block_given?
71
+ end
72
+
73
+ def parse(content)
74
+ with_spreadsheet_tempfile(content) do |table|
75
+ headers = table_headers(table)
76
+
77
+ enum_data_rows(table).inject([]) do |rows, i|
78
+ rows << Hash[headers.zip(table.row(i))]
79
+ end
80
+ end
81
+ end
82
+
83
+ # Roo-specific options hash passed to underlying spreadsheet parser
84
+ #
85
+ # @!attribute [r] options
86
+ # @return [Hash]
87
+ #
88
+ def options
89
+ @roo_options
90
+ end
91
+
92
+ private
93
+
94
+ def enum_data_rows(table)
95
+ first_table_row(table).upto last_table_row(table)
96
+ end
97
+
98
+ def with_spreadsheet_tempfile(content)
99
+ Tempfile.open(tmp_name, Stockboy.configuration.tmp_dir) do |file|
100
+ file.binmode
101
+ file.write content
102
+ table = Roo::Spreadsheet.open(file.path, @roo_options)
103
+ table.default_sheet = sheet_number(table, @sheet)
104
+ table.header_line = @header_line if @header_line
105
+ yield table
106
+ end
107
+ end
108
+
109
+ def sheet_number(table, id)
110
+ case id
111
+ when Symbol then table.sheets.public_send id
112
+ when Fixnum then table.sheets[id-1]
113
+ when String then id
114
+ end
115
+ end
116
+
117
+ def first_table_row(table)
118
+ @first_row || table.first_row
119
+ end
120
+
121
+ def last_table_row(table)
122
+ if @last_row.to_i < 0
123
+ table.last_row + @last_row + 1
124
+ elsif @last_row.to_i > 0
125
+ @last_row
126
+ else
127
+ table.last_row
128
+ end
129
+ end
130
+
131
+ def table_headers(table)
132
+ return @headers if @headers
133
+ table.row(table_header_row(table)).map { |h| h.to_s unless h.nil? }
134
+ end
135
+
136
+ def table_header_row(table)
137
+ [table.header_line, table.first_row].max
138
+ end
139
+
140
+ def tmp_name
141
+ ['stockboy', ".#{@format}"]
142
+ end
143
+ end
144
+ end