datashift 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.markdown +91 -55
- data/VERSION +1 -1
- data/datashift.gemspec +8 -23
- data/lib/applications/jexcel_file.rb +1 -2
- data/lib/datashift.rb +34 -15
- data/lib/datashift/column_packer.rb +98 -34
- data/lib/datashift/data_transforms.rb +83 -0
- data/lib/datashift/delimiters.rb +58 -10
- data/lib/datashift/excel_base.rb +123 -0
- data/lib/datashift/exceptions.rb +45 -7
- data/lib/datashift/load_object.rb +25 -0
- data/lib/datashift/mapping_service.rb +91 -0
- data/lib/datashift/method_detail.rb +40 -62
- data/lib/datashift/method_details_manager.rb +18 -2
- data/lib/datashift/method_dictionary.rb +27 -10
- data/lib/datashift/method_mapper.rb +49 -41
- data/lib/datashift/model_mapper.rb +42 -22
- data/lib/datashift/populator.rb +258 -143
- data/lib/datashift/thor_base.rb +38 -0
- data/lib/exporters/csv_exporter.rb +57 -145
- data/lib/exporters/excel_exporter.rb +73 -60
- data/lib/generators/csv_generator.rb +65 -5
- data/lib/generators/generator_base.rb +69 -3
- data/lib/generators/mapping_generator.rb +112 -0
- data/lib/helpers/core_ext/csv_file.rb +33 -0
- data/lib/loaders/csv_loader.rb +41 -39
- data/lib/loaders/excel_loader.rb +130 -116
- data/lib/loaders/loader_base.rb +190 -146
- data/lib/loaders/paperclip/attachment_loader.rb +4 -4
- data/lib/loaders/paperclip/datashift_paperclip.rb +5 -3
- data/lib/loaders/paperclip/image_loading.rb +9 -7
- data/lib/loaders/reporter.rb +17 -8
- data/lib/thor/export.thor +12 -13
- data/lib/thor/generate.thor +1 -9
- data/lib/thor/import.thor +13 -24
- data/lib/thor/mapping.thor +65 -0
- data/spec/Gemfile +13 -11
- data/spec/Gemfile.lock +98 -93
- data/spec/csv_exporter_spec.rb +104 -99
- data/spec/csv_generator_spec.rb +159 -0
- data/spec/csv_loader_spec.rb +197 -16
- data/spec/datashift_spec.rb +9 -0
- data/spec/excel_exporter_spec.rb +149 -58
- data/spec/excel_generator_spec.rb +35 -44
- data/spec/excel_loader_spec.rb +196 -178
- data/spec/excel_spec.rb +8 -5
- data/spec/loader_base_spec.rb +47 -7
- data/spec/mapping_spec.rb +117 -0
- data/spec/method_dictionary_spec.rb +24 -11
- data/spec/method_mapper_spec.rb +5 -7
- data/spec/model_mapper_spec.rb +41 -0
- data/spec/paperclip_loader_spec.rb +3 -6
- data/spec/populator_spec.rb +48 -14
- data/spec/spec_helper.rb +85 -73
- data/spec/thor_spec.rb +40 -5
- metadata +93 -86
- data/lib/applications/excel_base.rb +0 -63
@@ -0,0 +1,83 @@
|
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2015
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: March 2015
|
4
|
+
# License:: MIT
|
5
|
+
#
|
6
|
+
# Details:: Stores defaults, substitutions, over rides etc
|
7
|
+
# that can be applied to incoming data while being Populated
|
8
|
+
#
|
9
|
+
# WORK In PROGRESS
|
10
|
+
|
11
|
+
module DataShift
|
12
|
+
|
13
|
+
module Transformations
|
14
|
+
|
15
|
+
# Default values and over rides can be provided in Ruby/YAML ???? config file.
|
16
|
+
#
|
17
|
+
# Format :
|
18
|
+
#
|
19
|
+
# Load Class: (e.g Spree:Product)
|
20
|
+
# datashift_defaults:
|
21
|
+
# value_as_string: "Default Project Value"
|
22
|
+
# category: reference:category_002
|
23
|
+
#
|
24
|
+
# datashift_overrides:
|
25
|
+
# value_as_double: 99.23546
|
26
|
+
#
|
27
|
+
#
|
28
|
+
# datashift_substitutions:
|
29
|
+
#
|
30
|
+
#
|
31
|
+
|
32
|
+
class Base
|
33
|
+
|
34
|
+
include DataShift::Logging
|
35
|
+
|
36
|
+
# Map a Column to all relevant transforms
|
37
|
+
|
38
|
+
def configure_from(load_object_class, yaml_file)
|
39
|
+
|
40
|
+
data = YAML::load( ERB.new( IO.read(yaml_file) ).result )
|
41
|
+
|
42
|
+
if(data[load_object_class.name])
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Set a value to be used to populate Model.operator
|
47
|
+
# Generally over-rides will be used regardless of what value caller supplied.
|
48
|
+
def set( operator, value )
|
49
|
+
override_values[operator] = value
|
50
|
+
end
|
51
|
+
|
52
|
+
def transforms
|
53
|
+
@transforms ||= {}
|
54
|
+
end
|
55
|
+
|
56
|
+
def apply( operator, current_value )
|
57
|
+
if(transforms[operator])
|
58
|
+
perform_transformcurrent_value()
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def has_transform?( operator )
|
63
|
+
return override_values.has_key?(operator)
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
class Substitution < Base
|
69
|
+
|
70
|
+
def type
|
71
|
+
:substitution
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class Override < Base
|
76
|
+
|
77
|
+
def type
|
78
|
+
:override
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
data/lib/datashift/delimiters.rb
CHANGED
@@ -10,9 +10,26 @@
|
|
10
10
|
#
|
11
11
|
module DataShift
|
12
12
|
|
13
|
-
|
14
13
|
module Delimiters
|
15
14
|
|
15
|
+
# I made these class methods, feeling delims are 'global'
|
16
|
+
# I dunno now if thats good pattern or not
|
17
|
+
|
18
|
+
|
19
|
+
# As well as just the column name, support embedding find operators for that column
|
20
|
+
# in the heading .. i.e Column header => 'BlogPosts:user_id'
|
21
|
+
# ... association has many BlogPosts selected via find_by_user_id
|
22
|
+
#
|
23
|
+
# in the heading .. i.e Column header => 'BlogPosts:user_name:John Smith'
|
24
|
+
# ... association has many BlogPosts selected via find_by_user_name("John Smith")
|
25
|
+
#
|
26
|
+
def self.column_delim
|
27
|
+
@column_delim ||= ':'
|
28
|
+
@column_delim
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.set_column_delim(x) @column_delim = x; end
|
32
|
+
|
16
33
|
|
17
34
|
# Support multiple associations being added to a base object to be specified in a single column.
|
18
35
|
#
|
@@ -40,19 +57,31 @@ module DataShift
|
|
40
57
|
end
|
41
58
|
|
42
59
|
def self.set_name_value_delim(x) @name_value_delim = x; end
|
43
|
-
|
44
|
-
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
|
49
|
-
|
60
|
+
|
61
|
+
|
62
|
+
# The simple seperator for a list of values whether it be
|
63
|
+
# "Colour:red,green,blue".split(Delimiters::multi_value_delim) => [red,green,blue]
|
64
|
+
# {name => value, n2 => v2}.split(Delimiters::multi_value_delim) => [ [name => value], [n2 => v2] ]
|
65
|
+
|
50
66
|
def self.multi_value_delim
|
51
67
|
@multi_value_delim ||= ','
|
52
|
-
@multi_value_delim
|
53
68
|
end
|
54
69
|
|
55
70
|
def self.set_multi_value_delim(x) @multi_value_delim = x; end
|
71
|
+
|
72
|
+
# Objects can be created with multiple facets in single columns.
|
73
|
+
# In this example a single Product can be configured with a consolidated mime and print types
|
74
|
+
#
|
75
|
+
# mime_type:jpeg,PDF ; print_type:colour equivalent to
|
76
|
+
#
|
77
|
+
# => mime_type:jpeg;print_type:colour | mime_type:PDF; print_type:colour
|
78
|
+
|
79
|
+
def self.multi_facet_delim
|
80
|
+
@multi_facet_delim ||= ';'
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.setmulti_facet_delim(x) @multi_facet_delim = x; end
|
84
|
+
|
56
85
|
|
57
86
|
# Multiple objects can be embedded in single columns.
|
58
87
|
# In this example a single Category column contains 3 separate entries, New, SecondHand, Retro
|
@@ -92,11 +121,30 @@ module DataShift
|
|
92
121
|
|
93
122
|
def self.csv_delim=(x) set_csv_delim(x); end
|
94
123
|
def self.set_csv_delim(x) @csv_delim = x; end
|
95
|
-
|
124
|
+
|
96
125
|
def self.eol
|
97
126
|
"\n"
|
98
127
|
end
|
99
128
|
|
129
|
+
# surround text in suitable quotes e.g "hello world, how are you" => ' "hello world, how are you" '
|
130
|
+
def text_delim
|
131
|
+
@text_delim ||= "\'"
|
132
|
+
end
|
133
|
+
|
134
|
+
def text_delim=(x)
|
135
|
+
@text_delim = x
|
136
|
+
end
|
137
|
+
|
138
|
+
# seperator for identifying normal key value pairs
|
139
|
+
|
140
|
+
def self.key_value_sep
|
141
|
+
@key_value_sep ||= "=>" #TODO check Ruby version and use appropriate has style ?
|
142
|
+
end
|
143
|
+
|
144
|
+
def self.key_value_sep=(x)
|
145
|
+
@key_value_sep = x
|
146
|
+
end
|
147
|
+
|
100
148
|
end
|
101
149
|
|
102
150
|
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# To change this template, choose Tools | Templates
|
2
|
+
# and open the template in the editor.
|
3
|
+
|
4
|
+
module ExcelBase
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
def sanitize_sheet_name( name )
|
9
|
+
name.gsub(/[\[\]:\*\/\\\?]/, '')
|
10
|
+
end
|
11
|
+
|
12
|
+
|
13
|
+
# TODO -revisit/refactor - maybe this should just be on the base class Excel
|
14
|
+
# so you can call direct like excel.parse_headers(options[:header_row])
|
15
|
+
# rather than
|
16
|
+
# sheet = excel.worksheet( sheet_number )
|
17
|
+
# parse_headers(sheet, options[:header_row])
|
18
|
+
|
19
|
+
attr_accessor :header_row_index, :excel_headers
|
20
|
+
|
21
|
+
def parse_headers( sheet, header_row = 0 )
|
22
|
+
|
23
|
+
@header_row_index = header_row || 0
|
24
|
+
|
25
|
+
header_row = sheet.row(header_row_index)
|
26
|
+
|
27
|
+
raise MissingHeadersError, "No headers found - Check Sheet #{sheet} is complete and Row #{header_row_index} contains headers" unless(header_row)
|
28
|
+
|
29
|
+
@excel_headers = []
|
30
|
+
|
31
|
+
# TODO - make more robust - currently end on first empty column
|
32
|
+
# There is no actual max columns in Excel .. you will run out of memory though at some point
|
33
|
+
(0..1024).each do |column|
|
34
|
+
cell = header_row[column]
|
35
|
+
break unless cell
|
36
|
+
header = "#{cell.to_s}".strip
|
37
|
+
break if header.empty?
|
38
|
+
@excel_headers << header
|
39
|
+
end
|
40
|
+
|
41
|
+
@excel_headers
|
42
|
+
end
|
43
|
+
|
44
|
+
# Helpers for dealing with Active Record models and collections
|
45
|
+
# Specify array of operators/associations to include - possible values are :
|
46
|
+
# [:assignment, :belongs_to, :has_one, :has_many]
|
47
|
+
|
48
|
+
def ar_to_headers( records, associations = nil, options = {} )
|
49
|
+
return if( !records.first.is_a?(ActiveRecord::Base) || records.empty?)
|
50
|
+
|
51
|
+
only = *options[:only] ? [*options[:only]] : nil
|
52
|
+
|
53
|
+
headers =[]
|
54
|
+
|
55
|
+
if associations
|
56
|
+
details_mgr = DataShift::MethodDictionary.method_details_mgrs[records.first.class]
|
57
|
+
|
58
|
+
[*associations].each do |a|
|
59
|
+
|
60
|
+
details_mgr.get_list(a).each do |md|
|
61
|
+
|
62
|
+
next if(only && !only.include?( md.name.to_sym ) )
|
63
|
+
|
64
|
+
puts md.name.to_sym.inspect
|
65
|
+
|
66
|
+
headers << "#{md.operator}"
|
67
|
+
|
68
|
+
end
|
69
|
+
end if(details_mgr)
|
70
|
+
|
71
|
+
else
|
72
|
+
|
73
|
+
headers = records.first.class.columns.collect( &:name )
|
74
|
+
end
|
75
|
+
|
76
|
+
set_headers( headers )
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
# Pass a set of AR records
|
81
|
+
def ar_to_xls(records, options = {})
|
82
|
+
return if( ! records.first.is_a?(ActiveRecord::Base) || records.empty?)
|
83
|
+
|
84
|
+
row_index =
|
85
|
+
if(options[:no_headers])
|
86
|
+
0
|
87
|
+
else
|
88
|
+
ar_to_headers( records )
|
89
|
+
1
|
90
|
+
end
|
91
|
+
|
92
|
+
records.each do |record|
|
93
|
+
ar_to_xls_row(row_index, 0, record)
|
94
|
+
|
95
|
+
row_index += 1
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
# Save data from an AR record to the current row, based on the record's columns [c1,c2,c3]
|
101
|
+
# Returns the number of the final column written to
|
102
|
+
def ar_to_xls_row(row, start_column, record)
|
103
|
+
return unless( record.is_a?(ActiveRecord::Base))
|
104
|
+
|
105
|
+
column = start_column
|
106
|
+
record.class.columns.each do |connection_column|
|
107
|
+
ar_to_xls_cell(row, column, record, connection_column)
|
108
|
+
column += 1
|
109
|
+
end
|
110
|
+
column
|
111
|
+
end
|
112
|
+
|
113
|
+
def ar_to_xls_cell(row, column, record, connection_column)
|
114
|
+
begin
|
115
|
+
datum = record.send(connection_column.name)
|
116
|
+
|
117
|
+
self[row, column] = datum
|
118
|
+
rescue => e
|
119
|
+
puts "Failed to export #{datum} from #{connection_column.inspect} to column #{column}"
|
120
|
+
puts e, e.backtrace
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
data/lib/datashift/exceptions.rb
CHANGED
@@ -1,22 +1,60 @@
|
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2014
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: June 2014
|
4
|
+
# License:: Free, Open Source.
|
5
|
+
#
|
6
|
+
|
1
7
|
module DataShift
|
8
|
+
|
9
|
+
class DataShiftException < StandardError
|
10
|
+
|
11
|
+
include DataShift::Logging
|
12
|
+
|
13
|
+
def initialize( msg )
|
14
|
+
super
|
15
|
+
logger.error( msg)
|
16
|
+
end
|
2
17
|
|
18
|
+
def self.generate name
|
19
|
+
new_class = Class.new(DataShiftException) do
|
20
|
+
def initialize( msg )
|
21
|
+
super( msg )
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
DataShift.const_set(name, new_class)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class NilDataSuppliedError < DataShiftException
|
30
|
+
def initialize( msg )
|
31
|
+
super( msg )
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
3
35
|
class BadRuby < StandardError; end
|
4
36
|
|
5
37
|
class UnsupportedFileType < StandardError; end
|
6
38
|
class BadFile < StandardError; end
|
7
39
|
|
8
40
|
class MappingDefinitionError < StandardError; end
|
9
|
-
|
41
|
+
|
10
42
|
|
11
43
|
class MissingHeadersError < StandardError; end
|
12
44
|
class MissingMandatoryError < StandardError; end
|
13
45
|
|
14
|
-
class RecordNotFound < StandardError; end
|
15
|
-
|
16
46
|
class PathError < StandardError; end
|
17
47
|
|
18
48
|
class BadUri < StandardError; end
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
DataShift::DataShiftException.generate( "ConnectionError")
|
53
|
+
DataShift::DataShiftException.generate( "CouldNotAssignAssociation")
|
54
|
+
DataShift::DataShiftException.generate( "CreateAttachmentFailed")
|
55
|
+
DataShift::DataShiftException.generate( "DataProcessingError")
|
56
|
+
DataShift::DataShiftException.generate( "FileNotFound")
|
57
|
+
DataShift::DataShiftException.generate( "NoSuchClassError")
|
58
|
+
DataShift::DataShiftException.generate( "MissingConfigOptionError")
|
59
|
+
DataShift::DataShiftException.generate( "RecordNotFound")
|
60
|
+
DataShift::DataShiftException.generate( "SaveError")
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2015
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: March 2015
|
4
|
+
# License:: MIT
|
5
|
+
#
|
6
|
+
# Details:: Manage the current loader object
|
7
|
+
#
|
8
|
+
require 'to_b'
|
9
|
+
require 'logging'
|
10
|
+
|
11
|
+
module DataShift
|
12
|
+
|
13
|
+
class LoadObject
|
14
|
+
|
15
|
+
include DataShift::Logging
|
16
|
+
|
17
|
+
attr_accessor :load_object
|
18
|
+
|
19
|
+
def initialize( current_object = nil)
|
20
|
+
@load_object = current_object
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2015
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: Aug 2015
|
4
|
+
# License:: MIT
|
5
|
+
#
|
6
|
+
# Details:: A cache type class that stores details of a source=>destination mapping
|
7
|
+
#
|
8
|
+
require 'erubis'
|
9
|
+
|
10
|
+
module DataShift
|
11
|
+
|
12
|
+
class MappingService
|
13
|
+
|
14
|
+
include DataShift::Logging
|
15
|
+
|
16
|
+
# N.B :all_config, :key_config are OpenStruct data structure
|
17
|
+
# that provides definition of config entries as attributes with their accompanying values.
|
18
|
+
# So if you had a top level config entries in the YAML called path & full_name, you can call
|
19
|
+
# config.path
|
20
|
+
# config.full_name etc
|
21
|
+
#
|
22
|
+
# For a more Hash like representation use config.yaml or config[:attribute]
|
23
|
+
|
24
|
+
attr_reader :mapped_class_name, :map_file_name
|
25
|
+
|
26
|
+
attr_reader :raw_data, :yaml_data, :mapping_entry
|
27
|
+
|
28
|
+
def initialize( klass )
|
29
|
+
@mapped_class_name = klass.name
|
30
|
+
end
|
31
|
+
|
32
|
+
def read( file, key = nil )
|
33
|
+
|
34
|
+
@map_file_name = file
|
35
|
+
|
36
|
+
unless(map_file_name && File.exists?(map_file_name))
|
37
|
+
logger.error "Cannot open mapping file - #{map_file_name} - file does not exist."
|
38
|
+
raise FileNotFound.new("Cannot open mapping file - #{map_file_name}")
|
39
|
+
end
|
40
|
+
|
41
|
+
begin
|
42
|
+
# Load application configuration
|
43
|
+
set_mapping( map_file_name )
|
44
|
+
|
45
|
+
set_key_config!( key ) if key
|
46
|
+
rescue => e
|
47
|
+
puts e.inspect
|
48
|
+
logger.error "Failed to parse config file #{map_file_name} - bad YAML ?"
|
49
|
+
raise e
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# OpenStruct not a hash .. supports form ... config.path, config.full_name etc
|
54
|
+
def method_missing(method, *args, &block)
|
55
|
+
#logger :debug, "method_missing called with : #{method}"
|
56
|
+
@mapping_entry.send(method)
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def set_mapping( file )
|
62
|
+
|
63
|
+
@raw_data = File.read(file)
|
64
|
+
|
65
|
+
erb = begin
|
66
|
+
Erubis::Eruby.new(raw_data).result
|
67
|
+
rescue => e
|
68
|
+
puts "Failed to parse erb template #{file} error: #{e.inspect}"
|
69
|
+
|
70
|
+
logger.error "Config template error: #{e.inspect}"
|
71
|
+
|
72
|
+
raise e
|
73
|
+
end
|
74
|
+
|
75
|
+
begin
|
76
|
+
@yaml_data = YAML.load(erb)
|
77
|
+
|
78
|
+
logger.info "Loaded YAML config from [#{file}]"
|
79
|
+
|
80
|
+
rescue => e
|
81
|
+
puts "YAML parse error: #{e.inspect}"
|
82
|
+
logger.error "YAML parse error: #{e.inspect}"
|
83
|
+
raise e
|
84
|
+
end
|
85
|
+
|
86
|
+
@mapping_entry = OpenStruct.new(yaml_data)
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|