datashift 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.markdown +91 -55
- data/VERSION +1 -1
- data/datashift.gemspec +8 -23
- data/lib/applications/jexcel_file.rb +1 -2
- data/lib/datashift.rb +34 -15
- data/lib/datashift/column_packer.rb +98 -34
- data/lib/datashift/data_transforms.rb +83 -0
- data/lib/datashift/delimiters.rb +58 -10
- data/lib/datashift/excel_base.rb +123 -0
- data/lib/datashift/exceptions.rb +45 -7
- data/lib/datashift/load_object.rb +25 -0
- data/lib/datashift/mapping_service.rb +91 -0
- data/lib/datashift/method_detail.rb +40 -62
- data/lib/datashift/method_details_manager.rb +18 -2
- data/lib/datashift/method_dictionary.rb +27 -10
- data/lib/datashift/method_mapper.rb +49 -41
- data/lib/datashift/model_mapper.rb +42 -22
- data/lib/datashift/populator.rb +258 -143
- data/lib/datashift/thor_base.rb +38 -0
- data/lib/exporters/csv_exporter.rb +57 -145
- data/lib/exporters/excel_exporter.rb +73 -60
- data/lib/generators/csv_generator.rb +65 -5
- data/lib/generators/generator_base.rb +69 -3
- data/lib/generators/mapping_generator.rb +112 -0
- data/lib/helpers/core_ext/csv_file.rb +33 -0
- data/lib/loaders/csv_loader.rb +41 -39
- data/lib/loaders/excel_loader.rb +130 -116
- data/lib/loaders/loader_base.rb +190 -146
- data/lib/loaders/paperclip/attachment_loader.rb +4 -4
- data/lib/loaders/paperclip/datashift_paperclip.rb +5 -3
- data/lib/loaders/paperclip/image_loading.rb +9 -7
- data/lib/loaders/reporter.rb +17 -8
- data/lib/thor/export.thor +12 -13
- data/lib/thor/generate.thor +1 -9
- data/lib/thor/import.thor +13 -24
- data/lib/thor/mapping.thor +65 -0
- data/spec/Gemfile +13 -11
- data/spec/Gemfile.lock +98 -93
- data/spec/csv_exporter_spec.rb +104 -99
- data/spec/csv_generator_spec.rb +159 -0
- data/spec/csv_loader_spec.rb +197 -16
- data/spec/datashift_spec.rb +9 -0
- data/spec/excel_exporter_spec.rb +149 -58
- data/spec/excel_generator_spec.rb +35 -44
- data/spec/excel_loader_spec.rb +196 -178
- data/spec/excel_spec.rb +8 -5
- data/spec/loader_base_spec.rb +47 -7
- data/spec/mapping_spec.rb +117 -0
- data/spec/method_dictionary_spec.rb +24 -11
- data/spec/method_mapper_spec.rb +5 -7
- data/spec/model_mapper_spec.rb +41 -0
- data/spec/paperclip_loader_spec.rb +3 -6
- data/spec/populator_spec.rb +48 -14
- data/spec/spec_helper.rb +85 -73
- data/spec/thor_spec.rb +40 -5
- metadata +93 -86
- data/lib/applications/excel_base.rb +0 -63
@@ -12,22 +12,82 @@ module DataShift
|
|
12
12
|
|
13
13
|
class CsvGenerator < GeneratorBase
|
14
14
|
|
15
|
+
include DataShift::Logging
|
16
|
+
|
15
17
|
def initialize(filename)
|
16
18
|
super(filename)
|
17
19
|
end
|
18
20
|
|
19
21
|
# Create CSV file representing supplied Model
|
20
|
-
|
21
|
-
def generate(model, options = {})
|
22
22
|
|
23
|
-
|
23
|
+
def generate(klass, options = {})
|
24
|
+
@filename = options[:filename] if options[:filename]
|
25
|
+
|
26
|
+
prep_remove_list(options)
|
27
|
+
|
28
|
+
MethodDictionary.find_operators( klass )
|
29
|
+
@headers = MethodDictionary.assignments[klass]
|
30
|
+
|
31
|
+
@headers.delete_if{|h| @remove_list.include?( h.to_sym ) }
|
32
|
+
|
33
|
+
logger.info("CSVGenerator saving generated template #{@filename}")
|
34
|
+
|
35
|
+
CSV.open(@filename, "w") do |csv|
|
36
|
+
csv << @headers
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def generate_with_associations(klass, options = {})
|
41
|
+
@filename = options[:filename] if options[:filename]
|
42
|
+
|
43
|
+
MethodDictionary.find_operators( klass )
|
44
|
+
MethodDictionary.build_method_details( klass )
|
45
|
+
|
46
|
+
work_list = MethodDetail::supported_types_enum.to_a - [ *options[:exclude] ]
|
47
|
+
|
48
|
+
prep_remove_list(options)
|
49
|
+
|
50
|
+
@headers = []
|
51
|
+
|
52
|
+
details_mgr = MethodDictionary.method_details_mgrs[klass]
|
53
|
+
|
54
|
+
work_list.each do |assoc_type|
|
55
|
+
method_details_for_assoc_type = details_mgr.get_list_of_method_details(assoc_type)
|
56
|
+
|
57
|
+
next if(method_details_for_assoc_type.nil? || method_details_for_assoc_type.empty?)
|
58
|
+
|
59
|
+
method_details_for_assoc_type.each do |md|
|
60
|
+
comparable_association = md.operator.to_s.downcase.to_sym
|
61
|
+
|
62
|
+
i = remove_list.index { |r| r == comparable_association }
|
63
|
+
|
64
|
+
(i) ? remove_list.delete_at(i) : headers << "#{md.operator}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
logger.info("CSVGenerator saving generated with associations template #{@filename}")
|
69
|
+
|
70
|
+
CSV.open(@filename, "w") do |csv|
|
71
|
+
csv << @headers
|
72
|
+
end
|
24
73
|
end
|
25
74
|
|
26
|
-
|
27
|
-
# Create an
|
75
|
+
|
76
|
+
# Create an CSV file representing supplied Model
|
28
77
|
|
29
78
|
def export(items, options = {})
|
30
79
|
end
|
31
80
|
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
# Take options and create a list of symbols to remove from headers
|
85
|
+
#
|
86
|
+
def prep_remove_list( options )
|
87
|
+
@remove_list = [ *options[:remove] ].compact.collect{|x| x.to_s.downcase.to_sym }
|
88
|
+
|
89
|
+
@remove_list += GeneratorBase::rails_columns if(options[:remove_rails])
|
90
|
+
end
|
91
|
+
|
32
92
|
end
|
33
93
|
end
|
@@ -10,17 +10,83 @@ module DataShift
|
|
10
10
|
class GeneratorBase
|
11
11
|
|
12
12
|
attr_accessor :filename, :headers, :remove_list
|
13
|
-
|
13
|
+
|
14
14
|
def initialize(filename)
|
15
15
|
@filename = filename
|
16
16
|
@headers = []
|
17
17
|
@remove_list =[]
|
18
18
|
end
|
19
|
-
|
20
|
-
|
19
|
+
|
21
20
|
def self.rails_columns
|
22
21
|
@rails_standard_columns ||= [:id, :created_at, :created_on, :updated_at, :updated_on]
|
23
22
|
end
|
23
|
+
|
24
|
+
|
25
|
+
# Parse options and build collection of headers for a method_details_mgr wrapping a class
|
26
|
+
# based on association requirements,
|
27
|
+
#
|
28
|
+
# Default is to include *everything*
|
29
|
+
#
|
30
|
+
# * <tt>:exclude</tt> - Association TYPE(s) to exclude completely.
|
31
|
+
#
|
32
|
+
# Possible association_type values are given by MethodDetail::supported_types_enum
|
33
|
+
# ... [:assignment, :belongs_to, :has_one, :has_many]
|
34
|
+
#
|
35
|
+
# * <tt>:remove</tt> - Array of header names to remove
|
36
|
+
#
|
37
|
+
# Rails DB columns like id, created_at, updated_at are removed by default
|
38
|
+
#
|
39
|
+
# * <tt>:include_rails</tt> - Specify to keep Rails columns in mappings
|
40
|
+
#
|
41
|
+
def prepare_model_headers(method_details_mgr, options = {})
|
42
|
+
|
43
|
+
work_list = MethodDetail::supported_types_enum.to_a - [ *options[:exclude] ]
|
44
|
+
|
45
|
+
@headers = []
|
46
|
+
|
47
|
+
work_list.each do |assoc_type|
|
48
|
+
method_details_for_assoc_type = method_details_mgr.get_list_of_method_details(assoc_type)
|
49
|
+
|
50
|
+
next if(method_details_for_assoc_type.nil? || method_details_for_assoc_type.empty?)
|
51
|
+
|
52
|
+
method_details_for_assoc_type.each do |md|
|
53
|
+
#comparable_association = md.operator.to_s.downcase.to_sym
|
54
|
+
#i = remove_list.index { |r| r == comparable_association }
|
55
|
+
#(i) ? remove_list.delete_at(i) : @headers << "#{md.operator}"
|
56
|
+
@headers << md.operator
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
remove_headers(options)
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
# Parse options and remove headers
|
65
|
+
# Specify columns to remove with :
|
66
|
+
# options[:remove]
|
67
|
+
# Rails columns like id, created_at are removed by default,
|
68
|
+
# to keep them in specify
|
69
|
+
# options[:include_rails]
|
70
|
+
#
|
71
|
+
def remove_headers(options)
|
72
|
+
remove_list = prep_remove_list( options )
|
73
|
+
|
74
|
+
#TODO - more efficient way ?
|
75
|
+
headers.delete_if { |h| remove_list.include?( h.to_sym ) } unless(remove_list.empty?)
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
# Take options and create a list of symbols to remove from headers
|
80
|
+
# Rails columns like id, created_at etc are added to the remove list by default
|
81
|
+
# Specify :include_rails to keep them in
|
82
|
+
def prep_remove_list( options )
|
83
|
+
remove_list = [ *options[:remove] ].compact.collect{|x| x.to_s.downcase.to_sym }
|
84
|
+
|
85
|
+
remove_list += GeneratorBase::rails_columns unless(options[:include_rails])
|
86
|
+
|
87
|
+
remove_list
|
88
|
+
end
|
89
|
+
|
24
90
|
end
|
25
91
|
|
26
92
|
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2015
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: Aug 2015
|
4
|
+
# License:: MIT
|
5
|
+
#
|
6
|
+
# Details:: Create mappings between systems
|
7
|
+
#
|
8
|
+
require 'generator_base'
|
9
|
+
|
10
|
+
module DataShift
|
11
|
+
|
12
|
+
class MappingGenerator < GeneratorBase
|
13
|
+
|
14
|
+
include DataShift::Logging
|
15
|
+
include ExcelBase
|
16
|
+
|
17
|
+
def initialize(filename)
|
18
|
+
super(filename)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Create an YAML template for mapping headers
|
22
|
+
#
|
23
|
+
# Options:
|
24
|
+
#
|
25
|
+
# * <tt>:model_as_dest</tt> - Override default treatment of using model as the SOURCE
|
26
|
+
#
|
27
|
+
# * <tt>:remove</tt> - Array of header names to remove
|
28
|
+
#
|
29
|
+
# Rails columns like id, created_at etc are added to the remove list by default
|
30
|
+
#
|
31
|
+
# * <tt>:include_rails</tt> - Specify to keep Rails columns in mappings
|
32
|
+
#
|
33
|
+
# * <tt>:associations</tt> - Additionally include all Associations
|
34
|
+
#
|
35
|
+
# * <tt>:exclude</tt> - Association TYPE(s) to exclude.
|
36
|
+
#
|
37
|
+
# Possible association_type values are given by MethodDetail::supported_types_enum
|
38
|
+
# ... [:assignment, :belongs_to, :has_one, :has_many]
|
39
|
+
#
|
40
|
+
# * <tt>:file</tt> - Write mappings direct to file name provided
|
41
|
+
#
|
42
|
+
def generate(model = nil, options = {})
|
43
|
+
|
44
|
+
mappings = "mappings:\n"
|
45
|
+
|
46
|
+
if(model)
|
47
|
+
|
48
|
+
klass = DataShift::ModelMapper.class_from_string_or_raise( model )
|
49
|
+
|
50
|
+
MethodDictionary.find_operators( klass )
|
51
|
+
|
52
|
+
MethodDictionary.build_method_details( klass )
|
53
|
+
|
54
|
+
prepare_model_headers(MethodDictionary.method_details_mgrs[klass], options)
|
55
|
+
|
56
|
+
if(options[:model_as_dest])
|
57
|
+
headers.each_with_index do |s, i| mappings += " #srcs_column_heading_#{i}: #{s}\n" end
|
58
|
+
else
|
59
|
+
headers.each_with_index do |s, i| mappings += " #{s}: #dest_column_heading_#{i}\n" end
|
60
|
+
end
|
61
|
+
else
|
62
|
+
mappings += <<EOS
|
63
|
+
##source_column_heading_0: #dest_column_heading_0
|
64
|
+
##source_column_heading_1: #dest_column_heading_1
|
65
|
+
##source_column_heading_2: #dest_column_heading_2
|
66
|
+
|
67
|
+
EOS
|
68
|
+
end
|
69
|
+
|
70
|
+
File.open(options[:file], 'w') do |f| f << mappings end if(options[:file])
|
71
|
+
|
72
|
+
mappings
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
# Create an YAML template from a Excel spreadsheet for mapping headers
|
77
|
+
#
|
78
|
+
# * <tt>:model_as_dest</tt> - Override default treatment of using model as the SOURCE
|
79
|
+
#
|
80
|
+
# * <tt>:file</tt> - Write mappings direct to file name provided
|
81
|
+
#
|
82
|
+
def generate_from_excel(excel_file_name, options = {})
|
83
|
+
|
84
|
+
excel = Excel.new
|
85
|
+
|
86
|
+
puts "\n\n\nGenerating mapping from Excel file: #{excel_file_name}"
|
87
|
+
|
88
|
+
excel.open(excel_file_name)
|
89
|
+
|
90
|
+
sheet_number = options[:sheet_number] || 0
|
91
|
+
|
92
|
+
sheet = excel.worksheet( sheet_number )
|
93
|
+
|
94
|
+
parse_headers(sheet, options[:header_row])
|
95
|
+
|
96
|
+
mappings = "mappings:\n"
|
97
|
+
|
98
|
+
if(options[:model_as_dest])
|
99
|
+
excel_headers.each_with_index do |s, i| mappings += " #srcs_column_heading_#{i}: #{s}\n" end
|
100
|
+
else
|
101
|
+
excel_headers.each_with_index do |s, i| mappings += " #{s}: #dest_column_heading_#{i}\n" end
|
102
|
+
end
|
103
|
+
|
104
|
+
File.open(options[:file], 'w') do |f| f << mappings end if(options[:file])
|
105
|
+
|
106
|
+
mappings
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
end # DataShift
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# Copyright:: Autotelik Media Ltd
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: July 2010
|
4
|
+
# License::
|
5
|
+
#
|
6
|
+
# Details:: Simple internal representation of Csv File
|
7
|
+
|
8
|
+
require 'csv'
|
9
|
+
|
10
|
+
class CSV
|
11
|
+
|
12
|
+
include DataShift::ColumnPacker
|
13
|
+
|
14
|
+
# Helpers for dealing with Active Record models and collections
|
15
|
+
# Specify array of operators/associations to include - possible values are :
|
16
|
+
# [:assignment, :belongs_to, :has_one, :has_many]
|
17
|
+
|
18
|
+
def ar_to_headers( records, associations = nil, options = {} )
|
19
|
+
add_row( to_headers(records, associations, options) )
|
20
|
+
end
|
21
|
+
|
22
|
+
# Convert an AR instance to a set of CSV columns
|
23
|
+
# Additional non instance data can be included by supplying list of methods to call
|
24
|
+
# on the record
|
25
|
+
def ar_to_csv(record, options = {})
|
26
|
+
csv_data = record.serializable_hash.values.collect { |c| escape_for_csv(c) }
|
27
|
+
|
28
|
+
[*options[:methods]].each { |x| csv_data << escape_for_csv(record.send(x)) if(record.respond_to?(x)) } if(options[:methods])
|
29
|
+
|
30
|
+
add_row(csv_data)
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
data/lib/loaders/csv_loader.rb
CHANGED
@@ -11,9 +11,9 @@ require 'datashift/exceptions'
|
|
11
11
|
require 'datashift/method_mapper'
|
12
12
|
|
13
13
|
module DataShift
|
14
|
-
|
14
|
+
|
15
15
|
module CsvLoading
|
16
|
-
|
16
|
+
|
17
17
|
include DataShift::Logging
|
18
18
|
|
19
19
|
# Load data through active Record models into DB from a CSV file
|
@@ -31,9 +31,9 @@ module DataShift
|
|
31
31
|
# [:force_inclusion] : Array of inbound column names to force into mapping
|
32
32
|
# [:include_all] : Include all headers in processing - takes precedence of :force_inclusion
|
33
33
|
# [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
|
34
|
-
|
34
|
+
|
35
35
|
def perform_csv_load(file_name, options = {})
|
36
|
-
|
36
|
+
|
37
37
|
require "csv"
|
38
38
|
|
39
39
|
# TODO - can we abstract out what a 'parsed file' is - so a common object can represent excel,csv etc
|
@@ -48,19 +48,21 @@ module DataShift
|
|
48
48
|
puts "\n\n\nLoading from CSV file: #{file_name}"
|
49
49
|
puts "Processing #{@parsed_file.size} rows"
|
50
50
|
begin
|
51
|
-
|
51
|
+
|
52
52
|
load_object_class.transaction do
|
53
53
|
@reporter.reset
|
54
54
|
|
55
55
|
@parsed_file.each_with_index do |row, i|
|
56
|
-
|
57
|
-
@current_row = row
|
58
|
-
|
56
|
+
|
57
|
+
@current_row = row
|
58
|
+
|
59
59
|
@reporter.processed_object_count += 1
|
60
|
-
|
60
|
+
|
61
|
+
logger.info("Begin processing Row #{@reporter.processed_object_count} from CSV file")
|
62
|
+
|
61
63
|
begin
|
62
64
|
# First assign any default values for columns not included in parsed_file
|
63
|
-
|
65
|
+
process_defaults
|
64
66
|
|
65
67
|
# TODO - Smart sorting of column processing order ....
|
66
68
|
# Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
|
@@ -73,64 +75,64 @@ module DataShift
|
|
73
75
|
# pulling data out of associated column
|
74
76
|
@method_mapper.method_details.each_with_index do |method_detail, col|
|
75
77
|
|
78
|
+
unless method_detail
|
79
|
+
logger.warn("No method_detail found for col #{col + 1} #{method_detail}")
|
80
|
+
next # TODO populate unmapped with a real MethodDetail that is 'null' and create is_nil
|
81
|
+
end
|
82
|
+
|
76
83
|
value = row[col]
|
77
84
|
|
78
|
-
|
79
|
-
|
80
|
-
process()
|
85
|
+
process(method_detail, value)
|
81
86
|
end
|
82
87
|
|
83
88
|
rescue => e
|
84
|
-
failure(
|
85
|
-
logger.error
|
86
|
-
|
89
|
+
failure(row, true)
|
90
|
+
logger.error e.inspect
|
91
|
+
logger.error e.backtrace.first.inspect
|
92
|
+
logger.error "Failed to process row [#{@reporter.processed_object_count}] (#{@current_row})"
|
93
|
+
|
87
94
|
if(verbose)
|
88
|
-
puts "Failed to process row [#{
|
89
|
-
puts e.inspect
|
95
|
+
puts "Failed to process row [#{@reporter.processed_object_count}] (#{@current_row})"
|
96
|
+
puts e.inspect
|
90
97
|
end
|
91
|
-
|
98
|
+
|
92
99
|
# don't forget to reset the load object
|
93
100
|
new_load_object
|
94
101
|
next
|
95
102
|
end
|
96
|
-
|
97
|
-
# TODO - make optional - all or nothing or carry on and dump out the exception list at end
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
else
|
103
|
-
logger.info "Row #{@current_row} succesfully SAVED : ID #{load_object.id}"
|
104
|
-
@reporter.add_loaded_object(@load_object)
|
105
|
-
end
|
103
|
+
|
104
|
+
# TODO - make optional - all or nothing or carry on and dump out the exception list at end
|
105
|
+
|
106
|
+
logger.debug "Attempting Save on : #{load_object.inspect}"
|
107
|
+
|
108
|
+
save_and_report
|
106
109
|
|
107
110
|
# don't forget to reset the object or we'll update rather than create
|
108
111
|
new_load_object
|
109
|
-
|
110
112
|
end
|
111
|
-
|
113
|
+
|
112
114
|
raise ActiveRecord::Rollback if(options[:dummy]) # Don't actually create/upload to DB if we are doing dummy run
|
113
115
|
end
|
114
116
|
rescue => e
|
115
|
-
|
117
|
+
logger.error "perform_csv_load failed - #{e.message}:\n#{e.backtrace}"
|
116
118
|
if e.is_a?(ActiveRecord::Rollback) && options[:dummy]
|
117
|
-
|
119
|
+
logger.info "CSV loading stage complete - Dummy run so Rolling Back."
|
118
120
|
else
|
119
121
|
raise e
|
120
122
|
end
|
121
123
|
ensure
|
122
124
|
report
|
123
|
-
end
|
124
|
-
|
125
|
+
end # transaction
|
126
|
+
|
125
127
|
end
|
126
128
|
end
|
127
|
-
|
129
|
+
|
128
130
|
class CsvLoader < LoaderBase
|
129
131
|
|
130
132
|
include DataShift::CsvLoading
|
131
133
|
|
132
|
-
def initialize(klass,
|
133
|
-
super( klass,
|
134
|
+
def initialize(klass, object = nil, options = {})
|
135
|
+
super( klass, object, options )
|
134
136
|
raise "Cannot load - failed to create a #{klass}" unless @load_object
|
135
137
|
end
|
136
138
|
|
@@ -141,5 +143,5 @@ module DataShift
|
|
141
143
|
end
|
142
144
|
|
143
145
|
end
|
144
|
-
|
146
|
+
|
145
147
|
end
|