activewarehouse-etl 0.9.1 → 0.9.5.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (135) hide show
  1. data/.gitignore +7 -0
  2. data/0.9-UPGRADE +6 -0
  3. data/CHANGELOG +182 -150
  4. data/Gemfile +4 -0
  5. data/HOW_TO_RELEASE +9 -0
  6. data/README +18 -2
  7. data/Rakefile +35 -91
  8. data/active_support_logger.patch +78 -0
  9. data/activewarehouse-etl.gemspec +30 -0
  10. data/lib/etl.rb +10 -2
  11. data/lib/etl/batch/directives.rb +11 -1
  12. data/lib/etl/control/control.rb +2 -2
  13. data/lib/etl/control/destination.rb +27 -7
  14. data/lib/etl/control/destination/database_destination.rb +8 -6
  15. data/lib/etl/control/destination/excel_destination.rb +91 -0
  16. data/lib/etl/control/destination/file_destination.rb +6 -4
  17. data/lib/etl/control/destination/insert_update_database_destination.rb +133 -0
  18. data/lib/etl/control/destination/update_database_destination.rb +109 -0
  19. data/lib/etl/control/source.rb +3 -2
  20. data/lib/etl/control/source/database_source.rb +14 -10
  21. data/lib/etl/control/source/file_source.rb +2 -2
  22. data/lib/etl/engine.rb +17 -15
  23. data/lib/etl/execution.rb +0 -1
  24. data/lib/etl/execution/batch.rb +3 -1
  25. data/lib/etl/execution/migration.rb +5 -0
  26. data/lib/etl/parser/delimited_parser.rb +20 -1
  27. data/lib/etl/parser/excel_parser.rb +112 -0
  28. data/lib/etl/processor/bulk_import_processor.rb +4 -2
  29. data/lib/etl/processor/database_join_processor.rb +68 -0
  30. data/lib/etl/processor/escape_csv_processor.rb +77 -0
  31. data/lib/etl/processor/filter_row_processor.rb +51 -0
  32. data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
  33. data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
  34. data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
  35. data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
  36. data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
  37. data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
  38. data/lib/etl/processor/zip_file_processor.rb +27 -0
  39. data/lib/etl/transform/calculation_transform.rb +71 -0
  40. data/lib/etl/transform/foreign_key_lookup_transform.rb +25 -7
  41. data/lib/etl/transform/ordinalize_transform.rb +3 -1
  42. data/lib/etl/transform/split_fields_transform.rb +27 -0
  43. data/lib/etl/version.rb +1 -7
  44. data/test-matrix.yml +10 -0
  45. data/test/.gitignore +1 -0
  46. data/test/.ignore +2 -0
  47. data/test/all.ebf +6 -0
  48. data/test/apache_combined_log.ctl +11 -0
  49. data/test/batch_test.rb +41 -0
  50. data/test/batch_with_error.ebf +6 -0
  51. data/test/batched1.ctl +0 -0
  52. data/test/batched2.ctl +0 -0
  53. data/test/block_processor.ctl +6 -0
  54. data/test/block_processor_error.ctl +1 -0
  55. data/test/block_processor_pre_post_process.ctl +4 -0
  56. data/test/block_processor_remove_rows.ctl +5 -0
  57. data/test/block_processor_test.rb +38 -0
  58. data/test/config/Gemfile.rails-2.3.x +3 -0
  59. data/test/config/Gemfile.rails-2.3.x.lock +38 -0
  60. data/test/config/Gemfile.rails-3.0.x +3 -0
  61. data/test/config/Gemfile.rails-3.0.x.lock +49 -0
  62. data/test/config/common.rb +21 -0
  63. data/test/connection/mysql/connection.rb +9 -0
  64. data/test/connection/mysql/schema.sql +36 -0
  65. data/test/connection/postgresql/connection.rb +13 -0
  66. data/test/connection/postgresql/schema.sql +39 -0
  67. data/test/control_test.rb +43 -0
  68. data/test/data/apache_combined_log.txt +3 -0
  69. data/test/data/bulk_import.txt +3 -0
  70. data/test/data/bulk_import_with_empties.txt +3 -0
  71. data/test/data/decode.txt +3 -0
  72. data/test/data/delimited.txt +3 -0
  73. data/test/data/encode_source_latin1.txt +2 -0
  74. data/test/data/excel.xls +0 -0
  75. data/test/data/excel2.xls +0 -0
  76. data/test/data/fixed_width.txt +3 -0
  77. data/test/data/multiple_delimited_1.txt +3 -0
  78. data/test/data/multiple_delimited_2.txt +3 -0
  79. data/test/data/people.txt +3 -0
  80. data/test/data/sax.xml +14 -0
  81. data/test/data/xml.xml +16 -0
  82. data/test/date_dimension_builder_test.rb +96 -0
  83. data/test/delimited.ctl +30 -0
  84. data/test/delimited_absolute.ctl +33 -0
  85. data/test/delimited_destination_db.ctl +25 -0
  86. data/test/delimited_excel.ctl +31 -0
  87. data/test/delimited_insert_update.ctl +34 -0
  88. data/test/delimited_update.ctl +34 -0
  89. data/test/delimited_with_bulk_load.ctl +34 -0
  90. data/test/destination_test.rb +275 -0
  91. data/test/directive_test.rb +23 -0
  92. data/test/encode_processor_test.rb +32 -0
  93. data/test/engine_test.rb +32 -0
  94. data/test/errors.ctl +24 -0
  95. data/test/etl_test.rb +42 -0
  96. data/test/excel.ctl +24 -0
  97. data/test/excel2.ctl +25 -0
  98. data/test/fixed_width.ctl +35 -0
  99. data/test/generator_test.rb +14 -0
  100. data/test/inline_parser.ctl +17 -0
  101. data/test/mocks/mock_destination.rb +26 -0
  102. data/test/mocks/mock_source.rb +25 -0
  103. data/test/model_source.ctl +14 -0
  104. data/test/multiple_delimited.ctl +22 -0
  105. data/test/multiple_source_delimited.ctl +39 -0
  106. data/test/parser_test.rb +224 -0
  107. data/test/performance/delimited.ctl +30 -0
  108. data/test/processor_test.rb +44 -0
  109. data/test/row_processor_test.rb +17 -0
  110. data/test/sax.ctl +26 -0
  111. data/test/scd/1.txt +1 -0
  112. data/test/scd/2.txt +1 -0
  113. data/test/scd/3.txt +1 -0
  114. data/test/scd_test.rb +257 -0
  115. data/test/scd_test_type_1.ctl +43 -0
  116. data/test/scd_test_type_2.ctl +34 -0
  117. data/test/screen_test.rb +9 -0
  118. data/test/screen_test_error.ctl +3 -0
  119. data/test/screen_test_fatal.ctl +3 -0
  120. data/test/source_test.rb +139 -0
  121. data/test/test_helper.rb +34 -0
  122. data/test/transform_test.rb +101 -0
  123. data/test/vendor/adapter_extensions-0.5.0/CHANGELOG +26 -0
  124. data/test/vendor/adapter_extensions-0.5.0/LICENSE +16 -0
  125. data/test/vendor/adapter_extensions-0.5.0/README +7 -0
  126. data/test/vendor/adapter_extensions-0.5.0/Rakefile +158 -0
  127. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions.rb +12 -0
  128. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/abstract_adapter.rb +44 -0
  129. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/mysql_adapter.rb +63 -0
  130. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/postgresql_adapter.rb +52 -0
  131. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/sqlserver_adapter.rb +44 -0
  132. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/version.rb +10 -0
  133. data/test/xml.ctl +31 -0
  134. metadata +229 -70
  135. data/lib/etl/execution/record.rb +0 -18
@@ -0,0 +1,109 @@
1
+ module ETL #:nodoc:
2
+ module Control #:nodoc:
3
+ # Destination which writes directly to a database. This is useful when you are dealing with
4
+ # a small amount of data. For larger amounts of data you should probably use the bulk
5
+ # loader if it is supported with your target database as it will use a much faster load
6
+ # method.
7
+ class UpdateDatabaseDestination < Destination
8
+ # The target connection
9
+ attr_reader :target
10
+
11
+ # The table
12
+ attr_reader :table
13
+
14
+ # Specify the order from the source
15
+ attr_reader :order
16
+
17
+ # Specify the conditions from the source
18
+ attr_reader :conditions
19
+
20
+ # Initialize the database destination
21
+ #
22
+ # * <tt>control</tt>: The ETL::Control::Control instance
23
+ # * <tt>configuration</tt>: The configuration Hash
24
+ # * <tt>mapping</tt>: The mapping
25
+ #
26
+ # Configuration options:
27
+ # * <tt>:database</tt>: The database name (REQUIRED)
28
+ # * <tt>:target</tt>: The target connection (REQUIRED)
29
+ # * <tt>:table</tt>: The table to write to (REQUIRED)
30
+ # * <tt>:unique</tt>: Set to true to only insert unique records (defaults to false)
31
+ # * <tt>:append_rows</tt>: Array of rows to append
32
+ #
33
+ # Mapping options:
34
+ # * <tt>:order</tt>: The order of fields to write (REQUIRED)
35
+ # * <tt>:conditions</tt>: The conditions on the fields to update (REQUIRED)
36
+ def initialize(control, configuration, mapping={})
37
+ super
38
+ @target = configuration[:target]
39
+ @table = configuration[:table]
40
+ @unique = configuration[:unique] ? configuration[:unique] + [scd_effective_date_field] : configuration[:unique]
41
+ @unique.uniq! unless @unique.nil?
42
+ @order = mapping[:order] ? mapping[:order] + scd_required_fields : order_from_source
43
+ @order.uniq! unless @order.nil?
44
+ @conditions = mapping[:conditions] ? mapping[:conditions] + scd_required_fields : nil
45
+ @conditions.uniq! unless @conditions.nil?
46
+ raise ControlError, "Conditions required in mapping" unless @conditions
47
+ raise ControlError, "Order required in mapping" unless @order
48
+ raise ControlError, "Table required" unless @table
49
+ raise ControlError, "Target required" unless @target
50
+ end
51
+
52
+ # Flush the currently buffered data
53
+ def flush
54
+ conn.transaction do
55
+ buffer.flatten.each do |row|
56
+ # check to see if this row's compound key constraint already exists
57
+ # note that the compound key constraint may not utilize virtual fields
58
+ next unless row_allowed?(row)
59
+
60
+ # add any virtual fields
61
+ add_virtuals!(row)
62
+
63
+ conditionsfilter = []
64
+ conditions.each do |cond|
65
+ c = " #{cond[:field]} #{cond[:comp]} #{cond[:value]} "
66
+ condition = c
67
+ begin
68
+ condition = eval('"' + c + '"')
69
+ rescue
70
+ end
71
+ conditionsfilter << condition
72
+ end
73
+
74
+ updatevalues = []
75
+ order.each do |name|
76
+ updatevalues << "#{conn.quote_column_name(name)} = #{conn.quote(row[name])}"
77
+ end
78
+ q = "UPDATE #{conn.quote_table_name(table_name)} SET #{updatevalues.join(',')} WHERE #{conditionsfilter.join(' AND ')}"
79
+ ETL::Engine.logger.debug("Executing update: #{q}")
80
+ conn.update(q, "Update row #{current_row}")
81
+ @current_row += 1
82
+ end
83
+ buffer.clear
84
+ end
85
+ end
86
+
87
+ # Close the connection
88
+ def close
89
+ buffer << append_rows if append_rows
90
+ flush
91
+ end
92
+
93
+ private
94
+ def conn
95
+ @conn ||= begin
96
+ conn = ETL::Engine.connection(target)
97
+ conn
98
+ rescue
99
+ raise RuntimeError, "Problem to connect to db"
100
+ end
101
+ end
102
+
103
+ def table_name
104
+ ETL::Engine.table(table, ETL::Engine.connection(target))
105
+ end
106
+
107
+ end
108
+ end
109
+ end
@@ -40,7 +40,8 @@ module ETL #:nodoc:
40
40
  @configuration = configuration
41
41
  @definition = definition
42
42
 
43
- @store_locally = configuration[:store_locally] || true
43
+ @store_locally = true
44
+ @store_locally = configuration[:store_locally] unless configuration[:store_locally].nil?
44
45
  end
45
46
 
46
47
  # Get an array of errors that occur during reading from the source
@@ -106,4 +107,4 @@ module ETL #:nodoc:
106
107
  end
107
108
  end
108
109
 
109
- Dir[File.dirname(__FILE__) + "/source/*.rb"].each { |file| require(file) }
110
+ Dir[File.dirname(__FILE__) + "/source/*.rb"].each { |file| require(file) }
@@ -41,17 +41,18 @@ module ETL #:nodoc:
41
41
  super
42
42
  @target = configuration[:target]
43
43
  @table = configuration[:table]
44
+ @query = configuration[:query]
44
45
  end
45
46
 
46
47
  # Get a String identifier for the source
47
48
  def to_s
48
- "#{host}/#{database}/#{table}"
49
+ "#{host}/#{database}/#{@table}"
49
50
  end
50
51
 
51
52
  # Get the local directory to use, which is a combination of the
52
53
  # local_base, the db hostname the db database name and the db table.
53
54
  def local_directory
54
- File.join(local_base, host, database, configuration[:table])
55
+ File.join(local_base, to_s)
55
56
  end
56
57
 
57
58
  # Get the join part of the query, defaults to nil
@@ -83,7 +84,7 @@ module ETL #:nodoc:
83
84
  # Get the number of rows in the source
84
85
  def count(use_cache=true)
85
86
  return @count if @count && use_cache
86
- if store_locally || read_locally
87
+ if @store_locally || read_locally
87
88
  @count = count_locally
88
89
  else
89
90
  @count = connection.select_value(query.gsub(/SELECT .* FROM/, 'SELECT count(1) FROM'))
@@ -107,13 +108,16 @@ module ETL #:nodoc:
107
108
  ETL::Engine.logger.debug "Reading from local cache"
108
109
  read_rows(last_local_file, &block)
109
110
  else # Read from the original source
110
- if store_locally
111
+ if @store_locally
111
112
  file = local_file
112
113
  write_local(file)
113
114
  read_rows(file, &block)
114
115
  else
115
- query_rows.each do |row|
116
- row = ETL::Row.new(row.symbolize_keys)
116
+ query_rows.each do |r|
117
+ row = ETL::Row.new()
118
+ r.symbolize_keys.each_pair { |key, value|
119
+ row[key] = value
120
+ }
117
121
  row.source = self
118
122
  yield row
119
123
  end
@@ -128,7 +132,7 @@ module ETL #:nodoc:
128
132
  raise "Local cache trigger file not found" unless File.exists?(local_file_trigger(file))
129
133
 
130
134
  t = Benchmark.realtime do
131
- FasterCSV.open(file, :headers => true).each do |row|
135
+ CSV.open(file, :headers => true).each do |row|
132
136
  result_row = ETL::Row.new
133
137
  result_row.source = self
134
138
  row.each do |header, field|
@@ -150,7 +154,7 @@ module ETL #:nodoc:
150
154
  def write_local(file)
151
155
  lines = 0
152
156
  t = Benchmark.realtime do
153
- FasterCSV.open(file, 'w') do |f|
157
+ CSV.open(file, 'w') do |f|
154
158
  f << columns
155
159
  query_rows.each do |row|
156
160
  f << columns.collect { |column| row[column.to_s] }
@@ -165,7 +169,7 @@ module ETL #:nodoc:
165
169
  # Get the query to use
166
170
  def query
167
171
  return @query if @query
168
- q = "SELECT #{select} FROM #{configuration[:table]}"
172
+ q = "SELECT #{select} FROM #{@table}"
169
173
  q << " #{join}" if join
170
174
 
171
175
  conditions = []
@@ -217,4 +221,4 @@ module ETL #:nodoc:
217
221
  end
218
222
  end
219
223
  end
220
- end
224
+ end
@@ -41,7 +41,7 @@ module ETL #:nodoc:
41
41
  # Returns each row from the source
42
42
  def each
43
43
  count = 0
44
- copy_sources if store_locally
44
+ copy_sources if @store_locally
45
45
  @parser.each do |row|
46
46
  if ETL::Engine.offset && count < ETL::Engine.offset
47
47
  count += 1
@@ -87,4 +87,4 @@ module ETL #:nodoc:
87
87
  end
88
88
  end
89
89
  end
90
- end
90
+ end
@@ -32,7 +32,7 @@ module ETL #:nodoc:
32
32
  options[:config] = 'config/database.yml' unless File.exist?(options[:config])
33
33
  database_configuration = YAML::load(ERB.new(IO.read(options[:config])).result + "\n")
34
34
  ActiveRecord::Base.configurations.merge!(database_configuration)
35
- ETL::Base.configurations = database_configuration
35
+ ETL::Base.configurations = HashWithIndifferentAccess.new(database_configuration)
36
36
  #puts "configurations in init: #{ActiveRecord::Base.configurations.inspect}"
37
37
 
38
38
  require 'etl/execution'
@@ -173,17 +173,19 @@ module ETL #:nodoc:
173
173
  # Modify the table name if necessary
174
174
  def table(table_name, connection)
175
175
  if use_temp_tables?
176
- returning "tmp_#{table_name}" do |temp_table_name|
177
- if temp_tables[temp_table_name].nil?
178
- # Create the temp table and add it to the mapping
179
- begin connection.drop_table(temp_table_name); rescue; end
180
- connection.copy_table(table_name, temp_table_name)
181
- temp_tables[temp_table_name] = {
182
- :table => table_name,
183
- :connection => connection
184
- }
185
- end
176
+ temp_table_name = "tmp_#{table_name}"
177
+
178
+ if temp_tables[temp_table_name].nil?
179
+ # Create the temp table and add it to the mapping
180
+ begin connection.drop_table(temp_table_name); rescue; end
181
+ connection.copy_table(table_name, temp_table_name)
182
+ temp_tables[temp_table_name] = {
183
+ :table => table_name,
184
+ :connection => connection
185
+ }
186
186
  end
187
+
188
+ temp_table_name
187
189
  else
188
190
  table_name
189
191
  end
@@ -308,7 +310,7 @@ module ETL #:nodoc:
308
310
 
309
311
  sources.each do |source|
310
312
  Engine.current_source = source
311
- Engine.logger.debug "Processing source #{source}"
313
+ Engine.logger.debug "Processing source #{source.inspect}"
312
314
  say "Source: #{source}"
313
315
  say "Limiting enabled: #{Engine.limit}" if Engine.limit != nil
314
316
  say "Offset enabled: #{Engine.offset}" if Engine.offset != nil
@@ -470,8 +472,8 @@ module ETL #:nodoc:
470
472
  say "Avg transforms: #{Engine.rows_read/benchmarks[:transforms]} rows/sec" if benchmarks[:transforms] > 0
471
473
  say "Avg writes: #{Engine.rows_read/benchmarks[:writes]} rows/sec" if benchmarks[:writes] > 0
472
474
 
473
- say "Avg time writing execution records: #{ETL::Execution::Record.average_time_spent}"
474
-
475
+ # say "Avg time writing execution records: #{ETL::Execution::Record.average_time_spent}"
476
+ #
475
477
  # ETL::Transform::Transform.benchmarks.each do |klass, t|
476
478
  # say "Avg #{klass}: #{Engine.rows_read/t} rows/sec"
477
479
  # end
@@ -553,4 +555,4 @@ module ETL #:nodoc:
553
555
  end
554
556
  end
555
557
  end
556
- end
558
+ end
@@ -16,5 +16,4 @@ end
16
16
  require 'etl/execution/base'
17
17
  require 'etl/execution/batch'
18
18
  require 'etl/execution/job'
19
- require 'etl/execution/record'
20
19
  require 'etl/execution/migration'
@@ -2,7 +2,9 @@ module ETL #:nodoc:
2
2
  module Execution #:nodoc:
3
3
  # Persistent class representing an ETL batch
4
4
  class Batch < Base
5
+ belongs_to :batch
6
+ has_many :batches
5
7
  has_many :jobs
6
8
  end
7
9
  end
8
- end
10
+ end
@@ -74,6 +74,11 @@ module ETL #:nodoc:
74
74
  def migration_4
75
75
  connection.drop_table :records
76
76
  end
77
+
78
+ def migration_5
79
+ connection.add_column :batches, :batch_id, :integer
80
+ connection.add_index :batches, :batch_id
81
+ end
77
82
 
78
83
  # Update the schema info table, setting the version value
79
84
  def update_schema_info(version)
@@ -10,13 +10,32 @@ module ETL #:nodoc:
10
10
  configure
11
11
  end
12
12
 
13
+ def get_fields_names(file)
14
+ File.open(file) do |input|
15
+ fields = CSV.parse(input.readline).first
16
+ new_fields = []
17
+ fields.each_with_index do |field,index|
18
+ # compute the index of occurrence of this specific occurrence of the field (usually, will be 1)
19
+ occurrence_index = fields[0..index].find_all { |e| e == field }.size
20
+ number_of_occurrences = fields.find_all { |e| e == field }.size
21
+ new_field = field + (number_of_occurrences > 1 ? "_#{occurrence_index}" : "")
22
+ new_fields << Field.new(new_field.to_sym)
23
+ end
24
+ return new_fields
25
+ end
26
+ end
27
+
13
28
  # Returns each row.
14
29
  def each
15
30
  Dir.glob(file).each do |file|
16
31
  ETL::Engine.logger.debug "parsing #{file}"
32
+ if fields.length == 0
33
+ ETL::Engine.logger.debug "no columns specified so reading names from first line of #{file}"
34
+ @fields = get_fields_names(file)
35
+ end
17
36
  line = 0
18
37
  lines_skipped = 0
19
- FasterCSV.foreach(file, options) do |raw_row|
38
+ CSV.foreach(file, options) do |raw_row|
20
39
  if lines_skipped < source.skip_lines
21
40
  ETL::Engine.logger.debug "skipping line"
22
41
  lines_skipped += 1
@@ -0,0 +1,112 @@
1
+ require 'spreadsheet'
2
+
3
+ module ETL
4
+ module Parser
5
+ class ExcelParser < ETL::Parser::Parser
6
+
7
+ attr_accessor :ignore_blank_line
8
+
9
+ # Initialize the parser
10
+ # * <tt>source</tt>: The Source object
11
+ # * <tt>options</tt>: Parser options Hash
12
+ def initialize(source, options={})
13
+ super
14
+ configure
15
+ end
16
+
17
+ # Returns each row
18
+ def each
19
+ Dir.glob(file).each do |file|
20
+ ETL::Engine.logger.debug "parsing #{file}"
21
+ line = 0
22
+ lines_skipped = 0
23
+ book = Spreadsheet.open file
24
+ loopworksheets = []
25
+
26
+ if worksheets.empty?
27
+ loopworksheets = book.worksheets
28
+ else
29
+ worksheets.each do |index|
30
+ loopworksheets << book.worksheet(index)
31
+ end
32
+ end
33
+
34
+ loopworksheets.each do |sheet|
35
+ sheet.each do |raw_row|
36
+ if lines_skipped < source.skip_lines
37
+ ETL::Engine.logger.debug "skipping line"
38
+ lines_skipped += 1
39
+ next
40
+ end
41
+ line += 1
42
+ row = {}
43
+ if self.ignore_blank_line and raw_row.empty?
44
+ lines_skipped += 1
45
+ next
46
+ end
47
+ validate_row(raw_row, line, file)
48
+ raw_row.each_with_index do |value, index|
49
+ f = fields[index]
50
+ row[f.name] = value
51
+ end
52
+ yield row
53
+ end
54
+ end
55
+ end
56
+ end
57
+
58
+ # Get an array of defined worksheets
59
+ def worksheets
60
+ @worksheets ||= []
61
+ end
62
+
63
+ # Get an array of defined fields
64
+ def fields
65
+ @fields ||= []
66
+ end
67
+
68
+ private
69
+ def validate_row(row, line, file)
70
+ ETL::Engine.logger.debug "validating line #{line} in file #{file}"
71
+ if row.length != fields.length
72
+ raise_with_info( MismatchError,
73
+ "The number of columns from the source (#{row.length}) does not match the number of columns in the definition (#{fields.length})",
74
+ line, file
75
+ )
76
+ end
77
+ end
78
+
79
+ private
80
+ def configure
81
+ source.definition[:worksheets].each do |worksheet|
82
+ if Integer(worksheet)
83
+ worksheets << worksheet.to_i
84
+ else
85
+ raise DefinitionError, "Each worksheet definition must be an integer"
86
+ end
87
+ end unless source.definition[:worksheets].nil?
88
+
89
+ self.ignore_blank_line = source.definition[:ignore_blank_line]
90
+
91
+ source.definition[:fields].each do |options|
92
+ case options
93
+ when Symbol
94
+ fields << Field.new(options)
95
+ when Hash
96
+ fields << Field.new(options[:name])
97
+ else
98
+ raise DefinitionError, "Each field definition must either be a symbol or a hash"
99
+ end
100
+ end
101
+ end
102
+
103
+ class Field #:nodoc:
104
+ attr_reader :name
105
+ def initialize(name)
106
+ @name = name
107
+ end
108
+ end
109
+
110
+ end
111
+ end
112
+ end