datashift 0.15.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/README.markdown +91 -55
  3. data/VERSION +1 -1
  4. data/datashift.gemspec +8 -23
  5. data/lib/applications/jexcel_file.rb +1 -2
  6. data/lib/datashift.rb +34 -15
  7. data/lib/datashift/column_packer.rb +98 -34
  8. data/lib/datashift/data_transforms.rb +83 -0
  9. data/lib/datashift/delimiters.rb +58 -10
  10. data/lib/datashift/excel_base.rb +123 -0
  11. data/lib/datashift/exceptions.rb +45 -7
  12. data/lib/datashift/load_object.rb +25 -0
  13. data/lib/datashift/mapping_service.rb +91 -0
  14. data/lib/datashift/method_detail.rb +40 -62
  15. data/lib/datashift/method_details_manager.rb +18 -2
  16. data/lib/datashift/method_dictionary.rb +27 -10
  17. data/lib/datashift/method_mapper.rb +49 -41
  18. data/lib/datashift/model_mapper.rb +42 -22
  19. data/lib/datashift/populator.rb +258 -143
  20. data/lib/datashift/thor_base.rb +38 -0
  21. data/lib/exporters/csv_exporter.rb +57 -145
  22. data/lib/exporters/excel_exporter.rb +73 -60
  23. data/lib/generators/csv_generator.rb +65 -5
  24. data/lib/generators/generator_base.rb +69 -3
  25. data/lib/generators/mapping_generator.rb +112 -0
  26. data/lib/helpers/core_ext/csv_file.rb +33 -0
  27. data/lib/loaders/csv_loader.rb +41 -39
  28. data/lib/loaders/excel_loader.rb +130 -116
  29. data/lib/loaders/loader_base.rb +190 -146
  30. data/lib/loaders/paperclip/attachment_loader.rb +4 -4
  31. data/lib/loaders/paperclip/datashift_paperclip.rb +5 -3
  32. data/lib/loaders/paperclip/image_loading.rb +9 -7
  33. data/lib/loaders/reporter.rb +17 -8
  34. data/lib/thor/export.thor +12 -13
  35. data/lib/thor/generate.thor +1 -9
  36. data/lib/thor/import.thor +13 -24
  37. data/lib/thor/mapping.thor +65 -0
  38. data/spec/Gemfile +13 -11
  39. data/spec/Gemfile.lock +98 -93
  40. data/spec/csv_exporter_spec.rb +104 -99
  41. data/spec/csv_generator_spec.rb +159 -0
  42. data/spec/csv_loader_spec.rb +197 -16
  43. data/spec/datashift_spec.rb +9 -0
  44. data/spec/excel_exporter_spec.rb +149 -58
  45. data/spec/excel_generator_spec.rb +35 -44
  46. data/spec/excel_loader_spec.rb +196 -178
  47. data/spec/excel_spec.rb +8 -5
  48. data/spec/loader_base_spec.rb +47 -7
  49. data/spec/mapping_spec.rb +117 -0
  50. data/spec/method_dictionary_spec.rb +24 -11
  51. data/spec/method_mapper_spec.rb +5 -7
  52. data/spec/model_mapper_spec.rb +41 -0
  53. data/spec/paperclip_loader_spec.rb +3 -6
  54. data/spec/populator_spec.rb +48 -14
  55. data/spec/spec_helper.rb +85 -73
  56. data/spec/thor_spec.rb +40 -5
  57. metadata +93 -86
  58. data/lib/applications/excel_base.rb +0 -63
@@ -12,22 +12,82 @@ module DataShift
12
12
 
13
13
  class CsvGenerator < GeneratorBase
14
14
 
15
+ include DataShift::Logging
16
+
15
17
  def initialize(filename)
16
18
  super(filename)
17
19
  end
18
20
 
19
21
  # Create CSV file representing supplied Model
20
-
21
- def generate(model, options = {})
22
22
 
23
- @filename = options[:filename] if options[:filename]
23
+ def generate(klass, options = {})
24
+ @filename = options[:filename] if options[:filename]
25
+
26
+ prep_remove_list(options)
27
+
28
+ MethodDictionary.find_operators( klass )
29
+ @headers = MethodDictionary.assignments[klass]
30
+
31
+ @headers.delete_if{|h| @remove_list.include?( h.to_sym ) }
32
+
33
+ logger.info("CSVGenerator saving generated template #{@filename}")
34
+
35
+ CSV.open(@filename, "w") do |csv|
36
+ csv << @headers
37
+ end
38
+ end
39
+
40
+ def generate_with_associations(klass, options = {})
41
+ @filename = options[:filename] if options[:filename]
42
+
43
+ MethodDictionary.find_operators( klass )
44
+ MethodDictionary.build_method_details( klass )
45
+
46
+ work_list = MethodDetail::supported_types_enum.to_a - [ *options[:exclude] ]
47
+
48
+ prep_remove_list(options)
49
+
50
+ @headers = []
51
+
52
+ details_mgr = MethodDictionary.method_details_mgrs[klass]
53
+
54
+ work_list.each do |assoc_type|
55
+ method_details_for_assoc_type = details_mgr.get_list_of_method_details(assoc_type)
56
+
57
+ next if(method_details_for_assoc_type.nil? || method_details_for_assoc_type.empty?)
58
+
59
+ method_details_for_assoc_type.each do |md|
60
+ comparable_association = md.operator.to_s.downcase.to_sym
61
+
62
+ i = remove_list.index { |r| r == comparable_association }
63
+
64
+ (i) ? remove_list.delete_at(i) : headers << "#{md.operator}"
65
+ end
66
+ end
67
+
68
+ logger.info("CSVGenerator saving generated with associations template #{@filename}")
69
+
70
+ CSV.open(@filename, "w") do |csv|
71
+ csv << @headers
72
+ end
24
73
  end
25
74
 
26
-
27
- # Create an Csv file representing supplied Model
75
+
76
+ # Create an CSV file representing supplied Model
28
77
 
29
78
  def export(items, options = {})
30
79
  end
31
80
 
81
+
82
+ private
83
+
84
+ # Take options and create a list of symbols to remove from headers
85
+ #
86
+ def prep_remove_list( options )
87
+ @remove_list = [ *options[:remove] ].compact.collect{|x| x.to_s.downcase.to_sym }
88
+
89
+ @remove_list += GeneratorBase::rails_columns if(options[:remove_rails])
90
+ end
91
+
32
92
  end
33
93
  end
@@ -10,17 +10,83 @@ module DataShift
10
10
  class GeneratorBase
11
11
 
12
12
  attr_accessor :filename, :headers, :remove_list
13
-
13
+
14
14
  def initialize(filename)
15
15
  @filename = filename
16
16
  @headers = []
17
17
  @remove_list =[]
18
18
  end
19
-
20
-
19
+
21
20
  def self.rails_columns
22
21
  @rails_standard_columns ||= [:id, :created_at, :created_on, :updated_at, :updated_on]
23
22
  end
23
+
24
+
25
+ # Parse options and build collection of headers for a method_details_mgr wrapping a class
26
+ # based on association requirements,
27
+ #
28
+ # Default is to include *everything*
29
+ #
30
+ # * <tt>:exclude</tt> - Association TYPE(s) to exclude completely.
31
+ #
32
+ # Possible association_type values are given by MethodDetail::supported_types_enum
33
+ # ... [:assignment, :belongs_to, :has_one, :has_many]
34
+ #
35
+ # * <tt>:remove</tt> - Array of header names to remove
36
+ #
37
+ # Rails DB columns like id, created_at, updated_at are removed by default
38
+ #
39
+ # * <tt>:include_rails</tt> - Specify to keep Rails columns in mappings
40
+ #
41
+ def prepare_model_headers(method_details_mgr, options = {})
42
+
43
+ work_list = MethodDetail::supported_types_enum.to_a - [ *options[:exclude] ]
44
+
45
+ @headers = []
46
+
47
+ work_list.each do |assoc_type|
48
+ method_details_for_assoc_type = method_details_mgr.get_list_of_method_details(assoc_type)
49
+
50
+ next if(method_details_for_assoc_type.nil? || method_details_for_assoc_type.empty?)
51
+
52
+ method_details_for_assoc_type.each do |md|
53
+ #comparable_association = md.operator.to_s.downcase.to_sym
54
+ #i = remove_list.index { |r| r == comparable_association }
55
+ #(i) ? remove_list.delete_at(i) : @headers << "#{md.operator}"
56
+ @headers << md.operator
57
+ end
58
+ end
59
+
60
+ remove_headers(options)
61
+
62
+ end
63
+
64
+ # Parse options and remove headers
65
+ # Specify columns to remove with :
66
+ # options[:remove]
67
+ # Rails columns like id, created_at are removed by default,
68
+ # to keep them in specify
69
+ # options[:include_rails]
70
+ #
71
+ def remove_headers(options)
72
+ remove_list = prep_remove_list( options )
73
+
74
+ #TODO - more efficient way ?
75
+ headers.delete_if { |h| remove_list.include?( h.to_sym ) } unless(remove_list.empty?)
76
+ end
77
+
78
+
79
+ # Take options and create a list of symbols to remove from headers
80
+ # Rails columns like id, created_at etc are added to the remove list by default
81
+ # Specify :include_rails to keep them in
82
+ def prep_remove_list( options )
83
+ remove_list = [ *options[:remove] ].compact.collect{|x| x.to_s.downcase.to_sym }
84
+
85
+ remove_list += GeneratorBase::rails_columns unless(options[:include_rails])
86
+
87
+ remove_list
88
+ end
89
+
24
90
  end
25
91
 
26
92
  end
@@ -0,0 +1,112 @@
1
+ # Copyright:: (c) Autotelik Media Ltd 2015
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2015
4
+ # License:: MIT
5
+ #
6
+ # Details:: Create mappings between systems
7
+ #
8
+ require 'generator_base'
9
+
10
+ module DataShift
11
+
12
+ class MappingGenerator < GeneratorBase
13
+
14
+ include DataShift::Logging
15
+ include ExcelBase
16
+
17
+ def initialize(filename)
18
+ super(filename)
19
+ end
20
+
21
+ # Create an YAML template for mapping headers
22
+ #
23
+ # Options:
24
+ #
25
+ # * <tt>:model_as_dest</tt> - Override default treatment of using model as the SOURCE
26
+ #
27
+ # * <tt>:remove</tt> - Array of header names to remove
28
+ #
29
+ # Rails columns like id, created_at etc are added to the remove list by default
30
+ #
31
+ # * <tt>:include_rails</tt> - Specify to keep Rails columns in mappings
32
+ #
33
+ # * <tt>:associations</tt> - Additionally include all Associations
34
+ #
35
+ # * <tt>:exclude</tt> - Association TYPE(s) to exclude.
36
+ #
37
+ # Possible association_type values are given by MethodDetail::supported_types_enum
38
+ # ... [:assignment, :belongs_to, :has_one, :has_many]
39
+ #
40
+ # * <tt>:file</tt> - Write mappings direct to file name provided
41
+ #
42
+ def generate(model = nil, options = {})
43
+
44
+ mappings = "mappings:\n"
45
+
46
+ if(model)
47
+
48
+ klass = DataShift::ModelMapper.class_from_string_or_raise( model )
49
+
50
+ MethodDictionary.find_operators( klass )
51
+
52
+ MethodDictionary.build_method_details( klass )
53
+
54
+ prepare_model_headers(MethodDictionary.method_details_mgrs[klass], options)
55
+
56
+ if(options[:model_as_dest])
57
+ headers.each_with_index do |s, i| mappings += " #srcs_column_heading_#{i}: #{s}\n" end
58
+ else
59
+ headers.each_with_index do |s, i| mappings += " #{s}: #dest_column_heading_#{i}\n" end
60
+ end
61
+ else
62
+ mappings += <<EOS
63
+ ##source_column_heading_0: #dest_column_heading_0
64
+ ##source_column_heading_1: #dest_column_heading_1
65
+ ##source_column_heading_2: #dest_column_heading_2
66
+
67
+ EOS
68
+ end
69
+
70
+ File.open(options[:file], 'w') do |f| f << mappings end if(options[:file])
71
+
72
+ mappings
73
+
74
+ end
75
+
76
+ # Create an YAML template from a Excel spreadsheet for mapping headers
77
+ #
78
+ # * <tt>:model_as_dest</tt> - Override default treatment of using model as the SOURCE
79
+ #
80
+ # * <tt>:file</tt> - Write mappings direct to file name provided
81
+ #
82
+ def generate_from_excel(excel_file_name, options = {})
83
+
84
+ excel = Excel.new
85
+
86
+ puts "\n\n\nGenerating mapping from Excel file: #{excel_file_name}"
87
+
88
+ excel.open(excel_file_name)
89
+
90
+ sheet_number = options[:sheet_number] || 0
91
+
92
+ sheet = excel.worksheet( sheet_number )
93
+
94
+ parse_headers(sheet, options[:header_row])
95
+
96
+ mappings = "mappings:\n"
97
+
98
+ if(options[:model_as_dest])
99
+ excel_headers.each_with_index do |s, i| mappings += " #srcs_column_heading_#{i}: #{s}\n" end
100
+ else
101
+ excel_headers.each_with_index do |s, i| mappings += " #{s}: #dest_column_heading_#{i}\n" end
102
+ end
103
+
104
+ File.open(options[:file], 'w') do |f| f << mappings end if(options[:file])
105
+
106
+ mappings
107
+
108
+ end
109
+
110
+ end
111
+
112
+ end # DataShift
@@ -0,0 +1,33 @@
1
+ # Copyright:: Autotelik Media Ltd
2
+ # Author :: Tom Statter
3
+ # Date :: July 2010
4
+ # License::
5
+ #
6
+ # Details:: Simple internal representation of Csv File
7
+
8
+ require 'csv'
9
+
10
+ class CSV
11
+
12
+ include DataShift::ColumnPacker
13
+
14
+ # Helpers for dealing with Active Record models and collections
15
+ # Specify array of operators/associations to include - possible values are :
16
+ # [:assignment, :belongs_to, :has_one, :has_many]
17
+
18
+ def ar_to_headers( records, associations = nil, options = {} )
19
+ add_row( to_headers(records, associations, options) )
20
+ end
21
+
22
+ # Convert an AR instance to a set of CSV columns
23
+ # Additional non instance data can be included by supplying list of methods to call
24
+ # on the record
25
+ def ar_to_csv(record, options = {})
26
+ csv_data = record.serializable_hash.values.collect { |c| escape_for_csv(c) }
27
+
28
+ [*options[:methods]].each { |x| csv_data << escape_for_csv(record.send(x)) if(record.respond_to?(x)) } if(options[:methods])
29
+
30
+ add_row(csv_data)
31
+ end
32
+
33
+ end
@@ -11,9 +11,9 @@ require 'datashift/exceptions'
11
11
  require 'datashift/method_mapper'
12
12
 
13
13
  module DataShift
14
-
14
+
15
15
  module CsvLoading
16
-
16
+
17
17
  include DataShift::Logging
18
18
 
19
19
  # Load data through active Record models into DB from a CSV file
@@ -31,9 +31,9 @@ module DataShift
31
31
  # [:force_inclusion] : Array of inbound column names to force into mapping
32
32
  # [:include_all] : Include all headers in processing - takes precedence of :force_inclusion
33
33
  # [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
34
-
34
+
35
35
  def perform_csv_load(file_name, options = {})
36
-
36
+
37
37
  require "csv"
38
38
 
39
39
  # TODO - can we abstract out what a 'parsed file' is - so a common object can represent excel,csv etc
@@ -48,19 +48,21 @@ module DataShift
48
48
  puts "\n\n\nLoading from CSV file: #{file_name}"
49
49
  puts "Processing #{@parsed_file.size} rows"
50
50
  begin
51
-
51
+
52
52
  load_object_class.transaction do
53
53
  @reporter.reset
54
54
 
55
55
  @parsed_file.each_with_index do |row, i|
56
-
57
- @current_row = row
58
-
56
+
57
+ @current_row = row
58
+
59
59
  @reporter.processed_object_count += 1
60
-
60
+
61
+ logger.info("Begin processing Row #{@reporter.processed_object_count} from CSV file")
62
+
61
63
  begin
62
64
  # First assign any default values for columns not included in parsed_file
63
- process_missing_columns_with_defaults
65
+ process_defaults
64
66
 
65
67
  # TODO - Smart sorting of column processing order ....
66
68
  # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
@@ -73,64 +75,64 @@ module DataShift
73
75
  # pulling data out of associated column
74
76
  @method_mapper.method_details.each_with_index do |method_detail, col|
75
77
 
78
+ unless method_detail
79
+ logger.warn("No method_detail found for col #{col + 1} #{method_detail}")
80
+ next # TODO populate unmapped with a real MethodDetail that is 'null' and create is_nil
81
+ end
82
+
76
83
  value = row[col]
77
84
 
78
- prepare_data(method_detail, value)
79
-
80
- process()
85
+ process(method_detail, value)
81
86
  end
82
87
 
83
88
  rescue => e
84
- failure( row, true )
85
- logger.error "Failed to process row [#{i}] (#{@current_row})"
86
-
89
+ failure(row, true)
90
+ logger.error e.inspect
91
+ logger.error e.backtrace.first.inspect
92
+ logger.error "Failed to process row [#{@reporter.processed_object_count}] (#{@current_row})"
93
+
87
94
  if(verbose)
88
- puts "Failed to process row [#{i}] (#{@current_row})"
89
- puts e.inspect
95
+ puts "Failed to process row [#{@reporter.processed_object_count}] (#{@current_row})"
96
+ puts e.inspect
90
97
  end
91
-
98
+
92
99
  # don't forget to reset the load object
93
100
  new_load_object
94
101
  next
95
102
  end
96
-
97
- # TODO - make optional - all or nothing or carry on and dump out the exception list at end
98
- unless(save)
99
- failure
100
- logger.error "Failed to save row [#{@current_row}] (#{load_object.inspect})"
101
- logger.error load_object.errors.inspect if(load_object)
102
- else
103
- logger.info "Row #{@current_row} succesfully SAVED : ID #{load_object.id}"
104
- @reporter.add_loaded_object(@load_object)
105
- end
103
+
104
+ # TODO - make optional - all or nothing or carry on and dump out the exception list at end
105
+
106
+ logger.debug "Attempting Save on : #{load_object.inspect}"
107
+
108
+ save_and_report
106
109
 
107
110
  # don't forget to reset the object or we'll update rather than create
108
111
  new_load_object
109
-
110
112
  end
111
-
113
+
112
114
  raise ActiveRecord::Rollback if(options[:dummy]) # Don't actually create/upload to DB if we are doing dummy run
113
115
  end
114
116
  rescue => e
115
- puts "CAUGHT ", e.backtrace, e.inspect
117
+ logger.error "perform_csv_load failed - #{e.message}:\n#{e.backtrace}"
116
118
  if e.is_a?(ActiveRecord::Rollback) && options[:dummy]
117
- puts "CSV loading stage complete - Dummy run so Rolling Back."
119
+ logger.info "CSV loading stage complete - Dummy run so Rolling Back."
118
120
  else
119
121
  raise e
120
122
  end
121
123
  ensure
122
124
  report
123
- end
124
-
125
+ end # transaction
126
+
125
127
  end
126
128
  end
127
-
129
+
128
130
  class CsvLoader < LoaderBase
129
131
 
130
132
  include DataShift::CsvLoading
131
133
 
132
- def initialize(klass, find_operators = true, object = nil, options = {})
133
- super( klass, find_operators, object, options )
134
+ def initialize(klass, object = nil, options = {})
135
+ super( klass, object, options )
134
136
  raise "Cannot load - failed to create a #{klass}" unless @load_object
135
137
  end
136
138
 
@@ -141,5 +143,5 @@ module DataShift
141
143
  end
142
144
 
143
145
  end
144
-
146
+
145
147
  end