datashift 0.15.0 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.markdown +91 -55
- data/VERSION +1 -1
- data/datashift.gemspec +8 -23
- data/lib/applications/jexcel_file.rb +1 -2
- data/lib/datashift.rb +34 -15
- data/lib/datashift/column_packer.rb +98 -34
- data/lib/datashift/data_transforms.rb +83 -0
- data/lib/datashift/delimiters.rb +58 -10
- data/lib/datashift/excel_base.rb +123 -0
- data/lib/datashift/exceptions.rb +45 -7
- data/lib/datashift/load_object.rb +25 -0
- data/lib/datashift/mapping_service.rb +91 -0
- data/lib/datashift/method_detail.rb +40 -62
- data/lib/datashift/method_details_manager.rb +18 -2
- data/lib/datashift/method_dictionary.rb +27 -10
- data/lib/datashift/method_mapper.rb +49 -41
- data/lib/datashift/model_mapper.rb +42 -22
- data/lib/datashift/populator.rb +258 -143
- data/lib/datashift/thor_base.rb +38 -0
- data/lib/exporters/csv_exporter.rb +57 -145
- data/lib/exporters/excel_exporter.rb +73 -60
- data/lib/generators/csv_generator.rb +65 -5
- data/lib/generators/generator_base.rb +69 -3
- data/lib/generators/mapping_generator.rb +112 -0
- data/lib/helpers/core_ext/csv_file.rb +33 -0
- data/lib/loaders/csv_loader.rb +41 -39
- data/lib/loaders/excel_loader.rb +130 -116
- data/lib/loaders/loader_base.rb +190 -146
- data/lib/loaders/paperclip/attachment_loader.rb +4 -4
- data/lib/loaders/paperclip/datashift_paperclip.rb +5 -3
- data/lib/loaders/paperclip/image_loading.rb +9 -7
- data/lib/loaders/reporter.rb +17 -8
- data/lib/thor/export.thor +12 -13
- data/lib/thor/generate.thor +1 -9
- data/lib/thor/import.thor +13 -24
- data/lib/thor/mapping.thor +65 -0
- data/spec/Gemfile +13 -11
- data/spec/Gemfile.lock +98 -93
- data/spec/csv_exporter_spec.rb +104 -99
- data/spec/csv_generator_spec.rb +159 -0
- data/spec/csv_loader_spec.rb +197 -16
- data/spec/datashift_spec.rb +9 -0
- data/spec/excel_exporter_spec.rb +149 -58
- data/spec/excel_generator_spec.rb +35 -44
- data/spec/excel_loader_spec.rb +196 -178
- data/spec/excel_spec.rb +8 -5
- data/spec/loader_base_spec.rb +47 -7
- data/spec/mapping_spec.rb +117 -0
- data/spec/method_dictionary_spec.rb +24 -11
- data/spec/method_mapper_spec.rb +5 -7
- data/spec/model_mapper_spec.rb +41 -0
- data/spec/paperclip_loader_spec.rb +3 -6
- data/spec/populator_spec.rb +48 -14
- data/spec/spec_helper.rb +85 -73
- data/spec/thor_spec.rb +40 -5
- metadata +93 -86
- data/lib/applications/excel_base.rb +0 -63
@@ -12,22 +12,82 @@ module DataShift
|
|
12
12
|
|
13
13
|
class CsvGenerator < GeneratorBase
|
14
14
|
|
15
|
+
include DataShift::Logging
|
16
|
+
|
15
17
|
def initialize(filename)
|
16
18
|
super(filename)
|
17
19
|
end
|
18
20
|
|
19
21
|
# Create CSV file representing supplied Model
|
20
|
-
|
21
|
-
def generate(model, options = {})
|
22
22
|
|
23
|
-
|
23
|
+
def generate(klass, options = {})
|
24
|
+
@filename = options[:filename] if options[:filename]
|
25
|
+
|
26
|
+
prep_remove_list(options)
|
27
|
+
|
28
|
+
MethodDictionary.find_operators( klass )
|
29
|
+
@headers = MethodDictionary.assignments[klass]
|
30
|
+
|
31
|
+
@headers.delete_if{|h| @remove_list.include?( h.to_sym ) }
|
32
|
+
|
33
|
+
logger.info("CSVGenerator saving generated template #{@filename}")
|
34
|
+
|
35
|
+
CSV.open(@filename, "w") do |csv|
|
36
|
+
csv << @headers
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def generate_with_associations(klass, options = {})
|
41
|
+
@filename = options[:filename] if options[:filename]
|
42
|
+
|
43
|
+
MethodDictionary.find_operators( klass )
|
44
|
+
MethodDictionary.build_method_details( klass )
|
45
|
+
|
46
|
+
work_list = MethodDetail::supported_types_enum.to_a - [ *options[:exclude] ]
|
47
|
+
|
48
|
+
prep_remove_list(options)
|
49
|
+
|
50
|
+
@headers = []
|
51
|
+
|
52
|
+
details_mgr = MethodDictionary.method_details_mgrs[klass]
|
53
|
+
|
54
|
+
work_list.each do |assoc_type|
|
55
|
+
method_details_for_assoc_type = details_mgr.get_list_of_method_details(assoc_type)
|
56
|
+
|
57
|
+
next if(method_details_for_assoc_type.nil? || method_details_for_assoc_type.empty?)
|
58
|
+
|
59
|
+
method_details_for_assoc_type.each do |md|
|
60
|
+
comparable_association = md.operator.to_s.downcase.to_sym
|
61
|
+
|
62
|
+
i = remove_list.index { |r| r == comparable_association }
|
63
|
+
|
64
|
+
(i) ? remove_list.delete_at(i) : headers << "#{md.operator}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
logger.info("CSVGenerator saving generated with associations template #{@filename}")
|
69
|
+
|
70
|
+
CSV.open(@filename, "w") do |csv|
|
71
|
+
csv << @headers
|
72
|
+
end
|
24
73
|
end
|
25
74
|
|
26
|
-
|
27
|
-
# Create an
|
75
|
+
|
76
|
+
# Create an CSV file representing supplied Model
|
28
77
|
|
29
78
|
def export(items, options = {})
|
30
79
|
end
|
31
80
|
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
# Take options and create a list of symbols to remove from headers
|
85
|
+
#
|
86
|
+
def prep_remove_list( options )
|
87
|
+
@remove_list = [ *options[:remove] ].compact.collect{|x| x.to_s.downcase.to_sym }
|
88
|
+
|
89
|
+
@remove_list += GeneratorBase::rails_columns if(options[:remove_rails])
|
90
|
+
end
|
91
|
+
|
32
92
|
end
|
33
93
|
end
|
@@ -10,17 +10,83 @@ module DataShift
|
|
10
10
|
class GeneratorBase
|
11
11
|
|
12
12
|
attr_accessor :filename, :headers, :remove_list
|
13
|
-
|
13
|
+
|
14
14
|
def initialize(filename)
|
15
15
|
@filename = filename
|
16
16
|
@headers = []
|
17
17
|
@remove_list =[]
|
18
18
|
end
|
19
|
-
|
20
|
-
|
19
|
+
|
21
20
|
def self.rails_columns
|
22
21
|
@rails_standard_columns ||= [:id, :created_at, :created_on, :updated_at, :updated_on]
|
23
22
|
end
|
23
|
+
|
24
|
+
|
25
|
+
# Parse options and build collection of headers for a method_details_mgr wrapping a class
|
26
|
+
# based on association requirements,
|
27
|
+
#
|
28
|
+
# Default is to include *everything*
|
29
|
+
#
|
30
|
+
# * <tt>:exclude</tt> - Association TYPE(s) to exclude completely.
|
31
|
+
#
|
32
|
+
# Possible association_type values are given by MethodDetail::supported_types_enum
|
33
|
+
# ... [:assignment, :belongs_to, :has_one, :has_many]
|
34
|
+
#
|
35
|
+
# * <tt>:remove</tt> - Array of header names to remove
|
36
|
+
#
|
37
|
+
# Rails DB columns like id, created_at, updated_at are removed by default
|
38
|
+
#
|
39
|
+
# * <tt>:include_rails</tt> - Specify to keep Rails columns in mappings
|
40
|
+
#
|
41
|
+
def prepare_model_headers(method_details_mgr, options = {})
|
42
|
+
|
43
|
+
work_list = MethodDetail::supported_types_enum.to_a - [ *options[:exclude] ]
|
44
|
+
|
45
|
+
@headers = []
|
46
|
+
|
47
|
+
work_list.each do |assoc_type|
|
48
|
+
method_details_for_assoc_type = method_details_mgr.get_list_of_method_details(assoc_type)
|
49
|
+
|
50
|
+
next if(method_details_for_assoc_type.nil? || method_details_for_assoc_type.empty?)
|
51
|
+
|
52
|
+
method_details_for_assoc_type.each do |md|
|
53
|
+
#comparable_association = md.operator.to_s.downcase.to_sym
|
54
|
+
#i = remove_list.index { |r| r == comparable_association }
|
55
|
+
#(i) ? remove_list.delete_at(i) : @headers << "#{md.operator}"
|
56
|
+
@headers << md.operator
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
remove_headers(options)
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
# Parse options and remove headers
|
65
|
+
# Specify columns to remove with :
|
66
|
+
# options[:remove]
|
67
|
+
# Rails columns like id, created_at are removed by default,
|
68
|
+
# to keep them in specify
|
69
|
+
# options[:include_rails]
|
70
|
+
#
|
71
|
+
def remove_headers(options)
|
72
|
+
remove_list = prep_remove_list( options )
|
73
|
+
|
74
|
+
#TODO - more efficient way ?
|
75
|
+
headers.delete_if { |h| remove_list.include?( h.to_sym ) } unless(remove_list.empty?)
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
# Take options and create a list of symbols to remove from headers
|
80
|
+
# Rails columns like id, created_at etc are added to the remove list by default
|
81
|
+
# Specify :include_rails to keep them in
|
82
|
+
def prep_remove_list( options )
|
83
|
+
remove_list = [ *options[:remove] ].compact.collect{|x| x.to_s.downcase.to_sym }
|
84
|
+
|
85
|
+
remove_list += GeneratorBase::rails_columns unless(options[:include_rails])
|
86
|
+
|
87
|
+
remove_list
|
88
|
+
end
|
89
|
+
|
24
90
|
end
|
25
91
|
|
26
92
|
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2015
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: Aug 2015
|
4
|
+
# License:: MIT
|
5
|
+
#
|
6
|
+
# Details:: Create mappings between systems
|
7
|
+
#
|
8
|
+
require 'generator_base'
|
9
|
+
|
10
|
+
module DataShift
|
11
|
+
|
12
|
+
class MappingGenerator < GeneratorBase
|
13
|
+
|
14
|
+
include DataShift::Logging
|
15
|
+
include ExcelBase
|
16
|
+
|
17
|
+
def initialize(filename)
|
18
|
+
super(filename)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Create an YAML template for mapping headers
|
22
|
+
#
|
23
|
+
# Options:
|
24
|
+
#
|
25
|
+
# * <tt>:model_as_dest</tt> - Override default treatment of using model as the SOURCE
|
26
|
+
#
|
27
|
+
# * <tt>:remove</tt> - Array of header names to remove
|
28
|
+
#
|
29
|
+
# Rails columns like id, created_at etc are added to the remove list by default
|
30
|
+
#
|
31
|
+
# * <tt>:include_rails</tt> - Specify to keep Rails columns in mappings
|
32
|
+
#
|
33
|
+
# * <tt>:associations</tt> - Additionally include all Associations
|
34
|
+
#
|
35
|
+
# * <tt>:exclude</tt> - Association TYPE(s) to exclude.
|
36
|
+
#
|
37
|
+
# Possible association_type values are given by MethodDetail::supported_types_enum
|
38
|
+
# ... [:assignment, :belongs_to, :has_one, :has_many]
|
39
|
+
#
|
40
|
+
# * <tt>:file</tt> - Write mappings direct to file name provided
|
41
|
+
#
|
42
|
+
def generate(model = nil, options = {})
|
43
|
+
|
44
|
+
mappings = "mappings:\n"
|
45
|
+
|
46
|
+
if(model)
|
47
|
+
|
48
|
+
klass = DataShift::ModelMapper.class_from_string_or_raise( model )
|
49
|
+
|
50
|
+
MethodDictionary.find_operators( klass )
|
51
|
+
|
52
|
+
MethodDictionary.build_method_details( klass )
|
53
|
+
|
54
|
+
prepare_model_headers(MethodDictionary.method_details_mgrs[klass], options)
|
55
|
+
|
56
|
+
if(options[:model_as_dest])
|
57
|
+
headers.each_with_index do |s, i| mappings += " #srcs_column_heading_#{i}: #{s}\n" end
|
58
|
+
else
|
59
|
+
headers.each_with_index do |s, i| mappings += " #{s}: #dest_column_heading_#{i}\n" end
|
60
|
+
end
|
61
|
+
else
|
62
|
+
mappings += <<EOS
|
63
|
+
##source_column_heading_0: #dest_column_heading_0
|
64
|
+
##source_column_heading_1: #dest_column_heading_1
|
65
|
+
##source_column_heading_2: #dest_column_heading_2
|
66
|
+
|
67
|
+
EOS
|
68
|
+
end
|
69
|
+
|
70
|
+
File.open(options[:file], 'w') do |f| f << mappings end if(options[:file])
|
71
|
+
|
72
|
+
mappings
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
# Create an YAML template from a Excel spreadsheet for mapping headers
|
77
|
+
#
|
78
|
+
# * <tt>:model_as_dest</tt> - Override default treatment of using model as the SOURCE
|
79
|
+
#
|
80
|
+
# * <tt>:file</tt> - Write mappings direct to file name provided
|
81
|
+
#
|
82
|
+
def generate_from_excel(excel_file_name, options = {})
|
83
|
+
|
84
|
+
excel = Excel.new
|
85
|
+
|
86
|
+
puts "\n\n\nGenerating mapping from Excel file: #{excel_file_name}"
|
87
|
+
|
88
|
+
excel.open(excel_file_name)
|
89
|
+
|
90
|
+
sheet_number = options[:sheet_number] || 0
|
91
|
+
|
92
|
+
sheet = excel.worksheet( sheet_number )
|
93
|
+
|
94
|
+
parse_headers(sheet, options[:header_row])
|
95
|
+
|
96
|
+
mappings = "mappings:\n"
|
97
|
+
|
98
|
+
if(options[:model_as_dest])
|
99
|
+
excel_headers.each_with_index do |s, i| mappings += " #srcs_column_heading_#{i}: #{s}\n" end
|
100
|
+
else
|
101
|
+
excel_headers.each_with_index do |s, i| mappings += " #{s}: #dest_column_heading_#{i}\n" end
|
102
|
+
end
|
103
|
+
|
104
|
+
File.open(options[:file], 'w') do |f| f << mappings end if(options[:file])
|
105
|
+
|
106
|
+
mappings
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
end # DataShift
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# Copyright:: Autotelik Media Ltd
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: July 2010
|
4
|
+
# License::
|
5
|
+
#
|
6
|
+
# Details:: Simple internal representation of Csv File
|
7
|
+
|
8
|
+
require 'csv'
|
9
|
+
|
10
|
+
class CSV
|
11
|
+
|
12
|
+
include DataShift::ColumnPacker
|
13
|
+
|
14
|
+
# Helpers for dealing with Active Record models and collections
|
15
|
+
# Specify array of operators/associations to include - possible values are :
|
16
|
+
# [:assignment, :belongs_to, :has_one, :has_many]
|
17
|
+
|
18
|
+
def ar_to_headers( records, associations = nil, options = {} )
|
19
|
+
add_row( to_headers(records, associations, options) )
|
20
|
+
end
|
21
|
+
|
22
|
+
# Convert an AR instance to a set of CSV columns
|
23
|
+
# Additional non instance data can be included by supplying list of methods to call
|
24
|
+
# on the record
|
25
|
+
def ar_to_csv(record, options = {})
|
26
|
+
csv_data = record.serializable_hash.values.collect { |c| escape_for_csv(c) }
|
27
|
+
|
28
|
+
[*options[:methods]].each { |x| csv_data << escape_for_csv(record.send(x)) if(record.respond_to?(x)) } if(options[:methods])
|
29
|
+
|
30
|
+
add_row(csv_data)
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
data/lib/loaders/csv_loader.rb
CHANGED
@@ -11,9 +11,9 @@ require 'datashift/exceptions'
|
|
11
11
|
require 'datashift/method_mapper'
|
12
12
|
|
13
13
|
module DataShift
|
14
|
-
|
14
|
+
|
15
15
|
module CsvLoading
|
16
|
-
|
16
|
+
|
17
17
|
include DataShift::Logging
|
18
18
|
|
19
19
|
# Load data through active Record models into DB from a CSV file
|
@@ -31,9 +31,9 @@ module DataShift
|
|
31
31
|
# [:force_inclusion] : Array of inbound column names to force into mapping
|
32
32
|
# [:include_all] : Include all headers in processing - takes precedence of :force_inclusion
|
33
33
|
# [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
|
34
|
-
|
34
|
+
|
35
35
|
def perform_csv_load(file_name, options = {})
|
36
|
-
|
36
|
+
|
37
37
|
require "csv"
|
38
38
|
|
39
39
|
# TODO - can we abstract out what a 'parsed file' is - so a common object can represent excel,csv etc
|
@@ -48,19 +48,21 @@ module DataShift
|
|
48
48
|
puts "\n\n\nLoading from CSV file: #{file_name}"
|
49
49
|
puts "Processing #{@parsed_file.size} rows"
|
50
50
|
begin
|
51
|
-
|
51
|
+
|
52
52
|
load_object_class.transaction do
|
53
53
|
@reporter.reset
|
54
54
|
|
55
55
|
@parsed_file.each_with_index do |row, i|
|
56
|
-
|
57
|
-
@current_row = row
|
58
|
-
|
56
|
+
|
57
|
+
@current_row = row
|
58
|
+
|
59
59
|
@reporter.processed_object_count += 1
|
60
|
-
|
60
|
+
|
61
|
+
logger.info("Begin processing Row #{@reporter.processed_object_count} from CSV file")
|
62
|
+
|
61
63
|
begin
|
62
64
|
# First assign any default values for columns not included in parsed_file
|
63
|
-
|
65
|
+
process_defaults
|
64
66
|
|
65
67
|
# TODO - Smart sorting of column processing order ....
|
66
68
|
# Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
|
@@ -73,64 +75,64 @@ module DataShift
|
|
73
75
|
# pulling data out of associated column
|
74
76
|
@method_mapper.method_details.each_with_index do |method_detail, col|
|
75
77
|
|
78
|
+
unless method_detail
|
79
|
+
logger.warn("No method_detail found for col #{col + 1} #{method_detail}")
|
80
|
+
next # TODO populate unmapped with a real MethodDetail that is 'null' and create is_nil
|
81
|
+
end
|
82
|
+
|
76
83
|
value = row[col]
|
77
84
|
|
78
|
-
|
79
|
-
|
80
|
-
process()
|
85
|
+
process(method_detail, value)
|
81
86
|
end
|
82
87
|
|
83
88
|
rescue => e
|
84
|
-
failure(
|
85
|
-
logger.error
|
86
|
-
|
89
|
+
failure(row, true)
|
90
|
+
logger.error e.inspect
|
91
|
+
logger.error e.backtrace.first.inspect
|
92
|
+
logger.error "Failed to process row [#{@reporter.processed_object_count}] (#{@current_row})"
|
93
|
+
|
87
94
|
if(verbose)
|
88
|
-
puts "Failed to process row [#{
|
89
|
-
puts e.inspect
|
95
|
+
puts "Failed to process row [#{@reporter.processed_object_count}] (#{@current_row})"
|
96
|
+
puts e.inspect
|
90
97
|
end
|
91
|
-
|
98
|
+
|
92
99
|
# don't forget to reset the load object
|
93
100
|
new_load_object
|
94
101
|
next
|
95
102
|
end
|
96
|
-
|
97
|
-
# TODO - make optional - all or nothing or carry on and dump out the exception list at end
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
else
|
103
|
-
logger.info "Row #{@current_row} succesfully SAVED : ID #{load_object.id}"
|
104
|
-
@reporter.add_loaded_object(@load_object)
|
105
|
-
end
|
103
|
+
|
104
|
+
# TODO - make optional - all or nothing or carry on and dump out the exception list at end
|
105
|
+
|
106
|
+
logger.debug "Attempting Save on : #{load_object.inspect}"
|
107
|
+
|
108
|
+
save_and_report
|
106
109
|
|
107
110
|
# don't forget to reset the object or we'll update rather than create
|
108
111
|
new_load_object
|
109
|
-
|
110
112
|
end
|
111
|
-
|
113
|
+
|
112
114
|
raise ActiveRecord::Rollback if(options[:dummy]) # Don't actually create/upload to DB if we are doing dummy run
|
113
115
|
end
|
114
116
|
rescue => e
|
115
|
-
|
117
|
+
logger.error "perform_csv_load failed - #{e.message}:\n#{e.backtrace}"
|
116
118
|
if e.is_a?(ActiveRecord::Rollback) && options[:dummy]
|
117
|
-
|
119
|
+
logger.info "CSV loading stage complete - Dummy run so Rolling Back."
|
118
120
|
else
|
119
121
|
raise e
|
120
122
|
end
|
121
123
|
ensure
|
122
124
|
report
|
123
|
-
end
|
124
|
-
|
125
|
+
end # transaction
|
126
|
+
|
125
127
|
end
|
126
128
|
end
|
127
|
-
|
129
|
+
|
128
130
|
class CsvLoader < LoaderBase
|
129
131
|
|
130
132
|
include DataShift::CsvLoading
|
131
133
|
|
132
|
-
def initialize(klass,
|
133
|
-
super( klass,
|
134
|
+
def initialize(klass, object = nil, options = {})
|
135
|
+
super( klass, object, options )
|
134
136
|
raise "Cannot load - failed to create a #{klass}" unless @load_object
|
135
137
|
end
|
136
138
|
|
@@ -141,5 +143,5 @@ module DataShift
|
|
141
143
|
end
|
142
144
|
|
143
145
|
end
|
144
|
-
|
146
|
+
|
145
147
|
end
|