datashift 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/README.markdown +91 -55
  3. data/VERSION +1 -1
  4. data/datashift.gemspec +8 -23
  5. data/lib/applications/jexcel_file.rb +1 -2
  6. data/lib/datashift.rb +34 -15
  7. data/lib/datashift/column_packer.rb +98 -34
  8. data/lib/datashift/data_transforms.rb +83 -0
  9. data/lib/datashift/delimiters.rb +58 -10
  10. data/lib/datashift/excel_base.rb +123 -0
  11. data/lib/datashift/exceptions.rb +45 -7
  12. data/lib/datashift/load_object.rb +25 -0
  13. data/lib/datashift/mapping_service.rb +91 -0
  14. data/lib/datashift/method_detail.rb +40 -62
  15. data/lib/datashift/method_details_manager.rb +18 -2
  16. data/lib/datashift/method_dictionary.rb +27 -10
  17. data/lib/datashift/method_mapper.rb +49 -41
  18. data/lib/datashift/model_mapper.rb +42 -22
  19. data/lib/datashift/populator.rb +258 -143
  20. data/lib/datashift/thor_base.rb +38 -0
  21. data/lib/exporters/csv_exporter.rb +57 -145
  22. data/lib/exporters/excel_exporter.rb +73 -60
  23. data/lib/generators/csv_generator.rb +65 -5
  24. data/lib/generators/generator_base.rb +69 -3
  25. data/lib/generators/mapping_generator.rb +112 -0
  26. data/lib/helpers/core_ext/csv_file.rb +33 -0
  27. data/lib/loaders/csv_loader.rb +41 -39
  28. data/lib/loaders/excel_loader.rb +130 -116
  29. data/lib/loaders/loader_base.rb +190 -146
  30. data/lib/loaders/paperclip/attachment_loader.rb +4 -4
  31. data/lib/loaders/paperclip/datashift_paperclip.rb +5 -3
  32. data/lib/loaders/paperclip/image_loading.rb +9 -7
  33. data/lib/loaders/reporter.rb +17 -8
  34. data/lib/thor/export.thor +12 -13
  35. data/lib/thor/generate.thor +1 -9
  36. data/lib/thor/import.thor +13 -24
  37. data/lib/thor/mapping.thor +65 -0
  38. data/spec/Gemfile +13 -11
  39. data/spec/Gemfile.lock +98 -93
  40. data/spec/csv_exporter_spec.rb +104 -99
  41. data/spec/csv_generator_spec.rb +159 -0
  42. data/spec/csv_loader_spec.rb +197 -16
  43. data/spec/datashift_spec.rb +9 -0
  44. data/spec/excel_exporter_spec.rb +149 -58
  45. data/spec/excel_generator_spec.rb +35 -44
  46. data/spec/excel_loader_spec.rb +196 -178
  47. data/spec/excel_spec.rb +8 -5
  48. data/spec/loader_base_spec.rb +47 -7
  49. data/spec/mapping_spec.rb +117 -0
  50. data/spec/method_dictionary_spec.rb +24 -11
  51. data/spec/method_mapper_spec.rb +5 -7
  52. data/spec/model_mapper_spec.rb +41 -0
  53. data/spec/paperclip_loader_spec.rb +3 -6
  54. data/spec/populator_spec.rb +48 -14
  55. data/spec/spec_helper.rb +85 -73
  56. data/spec/thor_spec.rb +40 -5
  57. metadata +93 -86
  58. data/lib/applications/excel_base.rb +0 -63
@@ -12,22 +12,82 @@ module DataShift
12
12
 
13
13
  class CsvGenerator < GeneratorBase
14
14
 
15
+ include DataShift::Logging
16
+
15
17
  def initialize(filename)
16
18
  super(filename)
17
19
  end
18
20
 
19
21
  # Create CSV file representing supplied Model
20
-
21
- def generate(model, options = {})
22
22
 
23
- @filename = options[:filename] if options[:filename]
23
+ def generate(klass, options = {})
24
+ @filename = options[:filename] if options[:filename]
25
+
26
+ prep_remove_list(options)
27
+
28
+ MethodDictionary.find_operators( klass )
29
+ @headers = MethodDictionary.assignments[klass]
30
+
31
+ @headers.delete_if{|h| @remove_list.include?( h.to_sym ) }
32
+
33
+ logger.info("CSVGenerator saving generated template #{@filename}")
34
+
35
+ CSV.open(@filename, "w") do |csv|
36
+ csv << @headers
37
+ end
38
+ end
39
+
40
+ def generate_with_associations(klass, options = {})
41
+ @filename = options[:filename] if options[:filename]
42
+
43
+ MethodDictionary.find_operators( klass )
44
+ MethodDictionary.build_method_details( klass )
45
+
46
+ work_list = MethodDetail::supported_types_enum.to_a - [ *options[:exclude] ]
47
+
48
+ prep_remove_list(options)
49
+
50
+ @headers = []
51
+
52
+ details_mgr = MethodDictionary.method_details_mgrs[klass]
53
+
54
+ work_list.each do |assoc_type|
55
+ method_details_for_assoc_type = details_mgr.get_list_of_method_details(assoc_type)
56
+
57
+ next if(method_details_for_assoc_type.nil? || method_details_for_assoc_type.empty?)
58
+
59
+ method_details_for_assoc_type.each do |md|
60
+ comparable_association = md.operator.to_s.downcase.to_sym
61
+
62
+ i = remove_list.index { |r| r == comparable_association }
63
+
64
+ (i) ? remove_list.delete_at(i) : headers << "#{md.operator}"
65
+ end
66
+ end
67
+
68
+ logger.info("CSVGenerator saving generated with associations template #{@filename}")
69
+
70
+ CSV.open(@filename, "w") do |csv|
71
+ csv << @headers
72
+ end
24
73
  end
25
74
 
26
-
27
- # Create an Csv file representing supplied Model
75
+
76
+ # Create an CSV file representing supplied Model
28
77
 
29
78
  def export(items, options = {})
30
79
  end
31
80
 
81
+
82
+ private
83
+
84
+ # Take options and create a list of symbols to remove from headers
85
+ #
86
+ def prep_remove_list( options )
87
+ @remove_list = [ *options[:remove] ].compact.collect{|x| x.to_s.downcase.to_sym }
88
+
89
+ @remove_list += GeneratorBase::rails_columns if(options[:remove_rails])
90
+ end
91
+
32
92
  end
33
93
  end
@@ -10,17 +10,83 @@ module DataShift
10
10
  class GeneratorBase
11
11
 
12
12
  attr_accessor :filename, :headers, :remove_list
13
-
13
+
14
14
  def initialize(filename)
15
15
  @filename = filename
16
16
  @headers = []
17
17
  @remove_list =[]
18
18
  end
19
-
20
-
19
+
21
20
  def self.rails_columns
22
21
  @rails_standard_columns ||= [:id, :created_at, :created_on, :updated_at, :updated_on]
23
22
  end
23
+
24
+
25
+ # Parse options and build collection of headers for a method_details_mgr wrapping a class
26
+ # based on association requirements,
27
+ #
28
+ # Default is to include *everything*
29
+ #
30
+ # * <tt>:exclude</tt> - Association TYPE(s) to exclude completely.
31
+ #
32
+ # Possible association_type values are given by MethodDetail::supported_types_enum
33
+ # ... [:assignment, :belongs_to, :has_one, :has_many]
34
+ #
35
+ # * <tt>:remove</tt> - Array of header names to remove
36
+ #
37
+ # Rails DB columns like id, created_at, updated_at are removed by default
38
+ #
39
+ # * <tt>:include_rails</tt> - Specify to keep Rails columns in mappings
40
+ #
41
+ def prepare_model_headers(method_details_mgr, options = {})
42
+
43
+ work_list = MethodDetail::supported_types_enum.to_a - [ *options[:exclude] ]
44
+
45
+ @headers = []
46
+
47
+ work_list.each do |assoc_type|
48
+ method_details_for_assoc_type = method_details_mgr.get_list_of_method_details(assoc_type)
49
+
50
+ next if(method_details_for_assoc_type.nil? || method_details_for_assoc_type.empty?)
51
+
52
+ method_details_for_assoc_type.each do |md|
53
+ #comparable_association = md.operator.to_s.downcase.to_sym
54
+ #i = remove_list.index { |r| r == comparable_association }
55
+ #(i) ? remove_list.delete_at(i) : @headers << "#{md.operator}"
56
+ @headers << md.operator
57
+ end
58
+ end
59
+
60
+ remove_headers(options)
61
+
62
+ end
63
+
64
+ # Parse options and remove headers
65
+ # Specify columns to remove with :
66
+ # options[:remove]
67
+ # Rails columns like id, created_at are removed by default,
68
+ # to keep them in specify
69
+ # options[:include_rails]
70
+ #
71
+ def remove_headers(options)
72
+ remove_list = prep_remove_list( options )
73
+
74
+ #TODO - more efficient way ?
75
+ headers.delete_if { |h| remove_list.include?( h.to_sym ) } unless(remove_list.empty?)
76
+ end
77
+
78
+
79
+ # Take options and create a list of symbols to remove from headers
80
+ # Rails columns like id, created_at etc are added to the remove list by default
81
+ # Specify :include_rails to keep them in
82
+ def prep_remove_list( options )
83
+ remove_list = [ *options[:remove] ].compact.collect{|x| x.to_s.downcase.to_sym }
84
+
85
+ remove_list += GeneratorBase::rails_columns unless(options[:include_rails])
86
+
87
+ remove_list
88
+ end
89
+
24
90
  end
25
91
 
26
92
  end
@@ -0,0 +1,112 @@
1
+ # Copyright:: (c) Autotelik Media Ltd 2015
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2015
4
+ # License:: MIT
5
+ #
6
+ # Details:: Create mappings between systems
7
+ #
8
+ require 'generator_base'
9
+
10
+ module DataShift
11
+
12
+ class MappingGenerator < GeneratorBase
13
+
14
+ include DataShift::Logging
15
+ include ExcelBase
16
+
17
+ def initialize(filename)
18
+ super(filename)
19
+ end
20
+
21
+ # Create an YAML template for mapping headers
22
+ #
23
+ # Options:
24
+ #
25
+ # * <tt>:model_as_dest</tt> - Override default treatment of using model as the SOURCE
26
+ #
27
+ # * <tt>:remove</tt> - Array of header names to remove
28
+ #
29
+ # Rails columns like id, created_at etc are added to the remove list by default
30
+ #
31
+ # * <tt>:include_rails</tt> - Specify to keep Rails columns in mappings
32
+ #
33
+ # * <tt>:associations</tt> - Additionally include all Associations
34
+ #
35
+ # * <tt>:exclude</tt> - Association TYPE(s) to exclude.
36
+ #
37
+ # Possible association_type values are given by MethodDetail::supported_types_enum
38
+ # ... [:assignment, :belongs_to, :has_one, :has_many]
39
+ #
40
+ # * <tt>:file</tt> - Write mappings direct to file name provided
41
+ #
42
+ def generate(model = nil, options = {})
43
+
44
+ mappings = "mappings:\n"
45
+
46
+ if(model)
47
+
48
+ klass = DataShift::ModelMapper.class_from_string_or_raise( model )
49
+
50
+ MethodDictionary.find_operators( klass )
51
+
52
+ MethodDictionary.build_method_details( klass )
53
+
54
+ prepare_model_headers(MethodDictionary.method_details_mgrs[klass], options)
55
+
56
+ if(options[:model_as_dest])
57
+ headers.each_with_index do |s, i| mappings += " #srcs_column_heading_#{i}: #{s}\n" end
58
+ else
59
+ headers.each_with_index do |s, i| mappings += " #{s}: #dest_column_heading_#{i}\n" end
60
+ end
61
+ else
62
+ mappings += <<EOS
63
+ ##source_column_heading_0: #dest_column_heading_0
64
+ ##source_column_heading_1: #dest_column_heading_1
65
+ ##source_column_heading_2: #dest_column_heading_2
66
+
67
+ EOS
68
+ end
69
+
70
+ File.open(options[:file], 'w') do |f| f << mappings end if(options[:file])
71
+
72
+ mappings
73
+
74
+ end
75
+
76
+ # Create an YAML template from a Excel spreadsheet for mapping headers
77
+ #
78
+ # * <tt>:model_as_dest</tt> - Override default treatment of using model as the SOURCE
79
+ #
80
+ # * <tt>:file</tt> - Write mappings direct to file name provided
81
+ #
82
+ def generate_from_excel(excel_file_name, options = {})
83
+
84
+ excel = Excel.new
85
+
86
+ puts "\n\n\nGenerating mapping from Excel file: #{excel_file_name}"
87
+
88
+ excel.open(excel_file_name)
89
+
90
+ sheet_number = options[:sheet_number] || 0
91
+
92
+ sheet = excel.worksheet( sheet_number )
93
+
94
+ parse_headers(sheet, options[:header_row])
95
+
96
+ mappings = "mappings:\n"
97
+
98
+ if(options[:model_as_dest])
99
+ excel_headers.each_with_index do |s, i| mappings += " #srcs_column_heading_#{i}: #{s}\n" end
100
+ else
101
+ excel_headers.each_with_index do |s, i| mappings += " #{s}: #dest_column_heading_#{i}\n" end
102
+ end
103
+
104
+ File.open(options[:file], 'w') do |f| f << mappings end if(options[:file])
105
+
106
+ mappings
107
+
108
+ end
109
+
110
+ end
111
+
112
+ end # DataShift
@@ -0,0 +1,33 @@
1
+ # Copyright:: Autotelik Media Ltd
2
+ # Author :: Tom Statter
3
+ # Date :: July 2010
4
+ # License::
5
+ #
6
+ # Details:: Simple internal representation of Csv File
7
+
8
+ require 'csv'
9
+
10
+ class CSV
11
+
12
+ include DataShift::ColumnPacker
13
+
14
+ # Helpers for dealing with Active Record models and collections
15
+ # Specify array of operators/associations to include - possible values are :
16
+ # [:assignment, :belongs_to, :has_one, :has_many]
17
+
18
+ def ar_to_headers( records, associations = nil, options = {} )
19
+ add_row( to_headers(records, associations, options) )
20
+ end
21
+
22
+ # Convert an AR instance to a set of CSV columns
23
+ # Additional non instance data can be included by supplying list of methods to call
24
+ # on the record
25
+ def ar_to_csv(record, options = {})
26
+ csv_data = record.serializable_hash.values.collect { |c| escape_for_csv(c) }
27
+
28
+ [*options[:methods]].each { |x| csv_data << escape_for_csv(record.send(x)) if(record.respond_to?(x)) } if(options[:methods])
29
+
30
+ add_row(csv_data)
31
+ end
32
+
33
+ end
@@ -11,9 +11,9 @@ require 'datashift/exceptions'
11
11
  require 'datashift/method_mapper'
12
12
 
13
13
  module DataShift
14
-
14
+
15
15
  module CsvLoading
16
-
16
+
17
17
  include DataShift::Logging
18
18
 
19
19
  # Load data through active Record models into DB from a CSV file
@@ -31,9 +31,9 @@ module DataShift
31
31
  # [:force_inclusion] : Array of inbound column names to force into mapping
32
32
  # [:include_all] : Include all headers in processing - takes precedence of :force_inclusion
33
33
  # [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
34
-
34
+
35
35
  def perform_csv_load(file_name, options = {})
36
-
36
+
37
37
  require "csv"
38
38
 
39
39
  # TODO - can we abstract out what a 'parsed file' is - so a common object can represent excel,csv etc
@@ -48,19 +48,21 @@ module DataShift
48
48
  puts "\n\n\nLoading from CSV file: #{file_name}"
49
49
  puts "Processing #{@parsed_file.size} rows"
50
50
  begin
51
-
51
+
52
52
  load_object_class.transaction do
53
53
  @reporter.reset
54
54
 
55
55
  @parsed_file.each_with_index do |row, i|
56
-
57
- @current_row = row
58
-
56
+
57
+ @current_row = row
58
+
59
59
  @reporter.processed_object_count += 1
60
-
60
+
61
+ logger.info("Begin processing Row #{@reporter.processed_object_count} from CSV file")
62
+
61
63
  begin
62
64
  # First assign any default values for columns not included in parsed_file
63
- process_missing_columns_with_defaults
65
+ process_defaults
64
66
 
65
67
  # TODO - Smart sorting of column processing order ....
66
68
  # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
@@ -73,64 +75,64 @@ module DataShift
73
75
  # pulling data out of associated column
74
76
  @method_mapper.method_details.each_with_index do |method_detail, col|
75
77
 
78
+ unless method_detail
79
+ logger.warn("No method_detail found for col #{col + 1} #{method_detail}")
80
+ next # TODO populate unmapped with a real MethodDetail that is 'null' and create is_nil
81
+ end
82
+
76
83
  value = row[col]
77
84
 
78
- prepare_data(method_detail, value)
79
-
80
- process()
85
+ process(method_detail, value)
81
86
  end
82
87
 
83
88
  rescue => e
84
- failure( row, true )
85
- logger.error "Failed to process row [#{i}] (#{@current_row})"
86
-
89
+ failure(row, true)
90
+ logger.error e.inspect
91
+ logger.error e.backtrace.first.inspect
92
+ logger.error "Failed to process row [#{@reporter.processed_object_count}] (#{@current_row})"
93
+
87
94
  if(verbose)
88
- puts "Failed to process row [#{i}] (#{@current_row})"
89
- puts e.inspect
95
+ puts "Failed to process row [#{@reporter.processed_object_count}] (#{@current_row})"
96
+ puts e.inspect
90
97
  end
91
-
98
+
92
99
  # don't forget to reset the load object
93
100
  new_load_object
94
101
  next
95
102
  end
96
-
97
- # TODO - make optional - all or nothing or carry on and dump out the exception list at end
98
- unless(save)
99
- failure
100
- logger.error "Failed to save row [#{@current_row}] (#{load_object.inspect})"
101
- logger.error load_object.errors.inspect if(load_object)
102
- else
103
- logger.info "Row #{@current_row} succesfully SAVED : ID #{load_object.id}"
104
- @reporter.add_loaded_object(@load_object)
105
- end
103
+
104
+ # TODO - make optional - all or nothing or carry on and dump out the exception list at end
105
+
106
+ logger.debug "Attempting Save on : #{load_object.inspect}"
107
+
108
+ save_and_report
106
109
 
107
110
  # don't forget to reset the object or we'll update rather than create
108
111
  new_load_object
109
-
110
112
  end
111
-
113
+
112
114
  raise ActiveRecord::Rollback if(options[:dummy]) # Don't actually create/upload to DB if we are doing dummy run
113
115
  end
114
116
  rescue => e
115
- puts "CAUGHT ", e.backtrace, e.inspect
117
+ logger.error "perform_csv_load failed - #{e.message}:\n#{e.backtrace}"
116
118
  if e.is_a?(ActiveRecord::Rollback) && options[:dummy]
117
- puts "CSV loading stage complete - Dummy run so Rolling Back."
119
+ logger.info "CSV loading stage complete - Dummy run so Rolling Back."
118
120
  else
119
121
  raise e
120
122
  end
121
123
  ensure
122
124
  report
123
- end
124
-
125
+ end # transaction
126
+
125
127
  end
126
128
  end
127
-
129
+
128
130
  class CsvLoader < LoaderBase
129
131
 
130
132
  include DataShift::CsvLoading
131
133
 
132
- def initialize(klass, find_operators = true, object = nil, options = {})
133
- super( klass, find_operators, object, options )
134
+ def initialize(klass, object = nil, options = {})
135
+ super( klass, object, options )
134
136
  raise "Cannot load - failed to create a #{klass}" unless @load_object
135
137
  end
136
138
 
@@ -141,5 +143,5 @@ module DataShift
141
143
  end
142
144
 
143
145
  end
144
-
146
+
145
147
  end