datashift 0.15.0 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.markdown +91 -55
- data/VERSION +1 -1
- data/datashift.gemspec +8 -23
- data/lib/applications/jexcel_file.rb +1 -2
- data/lib/datashift.rb +34 -15
- data/lib/datashift/column_packer.rb +98 -34
- data/lib/datashift/data_transforms.rb +83 -0
- data/lib/datashift/delimiters.rb +58 -10
- data/lib/datashift/excel_base.rb +123 -0
- data/lib/datashift/exceptions.rb +45 -7
- data/lib/datashift/load_object.rb +25 -0
- data/lib/datashift/mapping_service.rb +91 -0
- data/lib/datashift/method_detail.rb +40 -62
- data/lib/datashift/method_details_manager.rb +18 -2
- data/lib/datashift/method_dictionary.rb +27 -10
- data/lib/datashift/method_mapper.rb +49 -41
- data/lib/datashift/model_mapper.rb +42 -22
- data/lib/datashift/populator.rb +258 -143
- data/lib/datashift/thor_base.rb +38 -0
- data/lib/exporters/csv_exporter.rb +57 -145
- data/lib/exporters/excel_exporter.rb +73 -60
- data/lib/generators/csv_generator.rb +65 -5
- data/lib/generators/generator_base.rb +69 -3
- data/lib/generators/mapping_generator.rb +112 -0
- data/lib/helpers/core_ext/csv_file.rb +33 -0
- data/lib/loaders/csv_loader.rb +41 -39
- data/lib/loaders/excel_loader.rb +130 -116
- data/lib/loaders/loader_base.rb +190 -146
- data/lib/loaders/paperclip/attachment_loader.rb +4 -4
- data/lib/loaders/paperclip/datashift_paperclip.rb +5 -3
- data/lib/loaders/paperclip/image_loading.rb +9 -7
- data/lib/loaders/reporter.rb +17 -8
- data/lib/thor/export.thor +12 -13
- data/lib/thor/generate.thor +1 -9
- data/lib/thor/import.thor +13 -24
- data/lib/thor/mapping.thor +65 -0
- data/spec/Gemfile +13 -11
- data/spec/Gemfile.lock +98 -93
- data/spec/csv_exporter_spec.rb +104 -99
- data/spec/csv_generator_spec.rb +159 -0
- data/spec/csv_loader_spec.rb +197 -16
- data/spec/datashift_spec.rb +9 -0
- data/spec/excel_exporter_spec.rb +149 -58
- data/spec/excel_generator_spec.rb +35 -44
- data/spec/excel_loader_spec.rb +196 -178
- data/spec/excel_spec.rb +8 -5
- data/spec/loader_base_spec.rb +47 -7
- data/spec/mapping_spec.rb +117 -0
- data/spec/method_dictionary_spec.rb +24 -11
- data/spec/method_mapper_spec.rb +5 -7
- data/spec/model_mapper_spec.rb +41 -0
- data/spec/paperclip_loader_spec.rb +3 -6
- data/spec/populator_spec.rb +48 -14
- data/spec/spec_helper.rb +85 -73
- data/spec/thor_spec.rb +40 -5
- metadata +93 -86
- data/lib/applications/excel_base.rb +0 -63
@@ -0,0 +1,83 @@
|
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2015
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: March 2015
|
4
|
+
# License:: MIT
|
5
|
+
#
|
6
|
+
# Details:: Stores defaults, substitutions, over rides etc
|
7
|
+
# that can be applied to incoming data while being Populated
|
8
|
+
#
|
9
|
+
# WORK In PROGRESS
|
10
|
+
|
11
|
+
module DataShift
|
12
|
+
|
13
|
+
module Transformations
|
14
|
+
|
15
|
+
# Default values and over rides can be provided in Ruby/YAML ???? config file.
|
16
|
+
#
|
17
|
+
# Format :
|
18
|
+
#
|
19
|
+
# Load Class: (e.g Spree:Product)
|
20
|
+
# datashift_defaults:
|
21
|
+
# value_as_string: "Default Project Value"
|
22
|
+
# category: reference:category_002
|
23
|
+
#
|
24
|
+
# datashift_overrides:
|
25
|
+
# value_as_double: 99.23546
|
26
|
+
#
|
27
|
+
#
|
28
|
+
# datashift_substitutions:
|
29
|
+
#
|
30
|
+
#
|
31
|
+
|
32
|
+
class Base
|
33
|
+
|
34
|
+
include DataShift::Logging
|
35
|
+
|
36
|
+
# Map a Column to all relevant transforms
|
37
|
+
|
38
|
+
def configure_from(load_object_class, yaml_file)
|
39
|
+
|
40
|
+
data = YAML::load( ERB.new( IO.read(yaml_file) ).result )
|
41
|
+
|
42
|
+
if(data[load_object_class.name])
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Set a value to be used to populate Model.operator
|
47
|
+
# Generally over-rides will be used regardless of what value caller supplied.
|
48
|
+
def set( operator, value )
|
49
|
+
override_values[operator] = value
|
50
|
+
end
|
51
|
+
|
52
|
+
def transforms
|
53
|
+
@transforms ||= {}
|
54
|
+
end
|
55
|
+
|
56
|
+
def apply( operator, current_value )
|
57
|
+
if(transforms[operator])
|
58
|
+
perform_transformcurrent_value()
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def has_transform?( operator )
|
63
|
+
return override_values.has_key?(operator)
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
class Substitution < Base
|
69
|
+
|
70
|
+
def type
|
71
|
+
:substitution
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
class Override < Base
|
76
|
+
|
77
|
+
def type
|
78
|
+
:override
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
data/lib/datashift/delimiters.rb
CHANGED
@@ -10,9 +10,26 @@
|
|
10
10
|
#
|
11
11
|
module DataShift
|
12
12
|
|
13
|
-
|
14
13
|
module Delimiters
|
15
14
|
|
15
|
+
# I made these class methods, feeling delims are 'global'
|
16
|
+
# I dunno now if thats good pattern or not
|
17
|
+
|
18
|
+
|
19
|
+
# As well as just the column name, support embedding find operators for that column
|
20
|
+
# in the heading .. i.e Column header => 'BlogPosts:user_id'
|
21
|
+
# ... association has many BlogPosts selected via find_by_user_id
|
22
|
+
#
|
23
|
+
# in the heading .. i.e Column header => 'BlogPosts:user_name:John Smith'
|
24
|
+
# ... association has many BlogPosts selected via find_by_user_name("John Smith")
|
25
|
+
#
|
26
|
+
def self.column_delim
|
27
|
+
@column_delim ||= ':'
|
28
|
+
@column_delim
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.set_column_delim(x) @column_delim = x; end
|
32
|
+
|
16
33
|
|
17
34
|
# Support multiple associations being added to a base object to be specified in a single column.
|
18
35
|
#
|
@@ -40,19 +57,31 @@ module DataShift
|
|
40
57
|
end
|
41
58
|
|
42
59
|
def self.set_name_value_delim(x) @name_value_delim = x; end
|
43
|
-
|
44
|
-
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
|
49
|
-
|
60
|
+
|
61
|
+
|
62
|
+
# The simple seperator for a list of values whether it be
|
63
|
+
# "Colour:red,green,blue".split(Delimiters::multi_value_delim) => [red,green,blue]
|
64
|
+
# {name => value, n2 => v2}.split(Delimiters::multi_value_delim) => [ [name => value], [n2 => v2] ]
|
65
|
+
|
50
66
|
def self.multi_value_delim
|
51
67
|
@multi_value_delim ||= ','
|
52
|
-
@multi_value_delim
|
53
68
|
end
|
54
69
|
|
55
70
|
def self.set_multi_value_delim(x) @multi_value_delim = x; end
|
71
|
+
|
72
|
+
# Objects can be created with multiple facets in single columns.
|
73
|
+
# In this example a single Product can be configured with a consolidated mime and print types
|
74
|
+
#
|
75
|
+
# mime_type:jpeg,PDF ; print_type:colour equivalent to
|
76
|
+
#
|
77
|
+
# => mime_type:jpeg;print_type:colour | mime_type:PDF; print_type:colour
|
78
|
+
|
79
|
+
def self.multi_facet_delim
|
80
|
+
@multi_facet_delim ||= ';'
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.setmulti_facet_delim(x) @multi_facet_delim = x; end
|
84
|
+
|
56
85
|
|
57
86
|
# Multiple objects can be embedded in single columns.
|
58
87
|
# In this example a single Category column contains 3 separate entries, New, SecondHand, Retro
|
@@ -92,11 +121,30 @@ module DataShift
|
|
92
121
|
|
93
122
|
def self.csv_delim=(x) set_csv_delim(x); end
|
94
123
|
def self.set_csv_delim(x) @csv_delim = x; end
|
95
|
-
|
124
|
+
|
96
125
|
def self.eol
|
97
126
|
"\n"
|
98
127
|
end
|
99
128
|
|
129
|
+
# surround text in suitable quotes e.g "hello world, how are you" => ' "hello world, how are you" '
|
130
|
+
def text_delim
|
131
|
+
@text_delim ||= "\'"
|
132
|
+
end
|
133
|
+
|
134
|
+
def text_delim=(x)
|
135
|
+
@text_delim = x
|
136
|
+
end
|
137
|
+
|
138
|
+
# seperator for identifying normal key value pairs
|
139
|
+
|
140
|
+
def self.key_value_sep
|
141
|
+
@key_value_sep ||= "=>" #TODO check Ruby version and use appropriate has style ?
|
142
|
+
end
|
143
|
+
|
144
|
+
def self.key_value_sep=(x)
|
145
|
+
@key_value_sep = x
|
146
|
+
end
|
147
|
+
|
100
148
|
end
|
101
149
|
|
102
150
|
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# To change this template, choose Tools | Templates
|
2
|
+
# and open the template in the editor.
|
3
|
+
|
4
|
+
module ExcelBase
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
def sanitize_sheet_name( name )
|
9
|
+
name.gsub(/[\[\]:\*\/\\\?]/, '')
|
10
|
+
end
|
11
|
+
|
12
|
+
|
13
|
+
# TODO -revisit/refactor - maybe this should just be on the base class Excel
|
14
|
+
# so you can call direct like excel.parse_headers(options[:header_row])
|
15
|
+
# rather than
|
16
|
+
# sheet = excel.worksheet( sheet_number )
|
17
|
+
# parse_headers(sheet, options[:header_row])
|
18
|
+
|
19
|
+
attr_accessor :header_row_index, :excel_headers
|
20
|
+
|
21
|
+
def parse_headers( sheet, header_row = 0 )
|
22
|
+
|
23
|
+
@header_row_index = header_row || 0
|
24
|
+
|
25
|
+
header_row = sheet.row(header_row_index)
|
26
|
+
|
27
|
+
raise MissingHeadersError, "No headers found - Check Sheet #{sheet} is complete and Row #{header_row_index} contains headers" unless(header_row)
|
28
|
+
|
29
|
+
@excel_headers = []
|
30
|
+
|
31
|
+
# TODO - make more robust - currently end on first empty column
|
32
|
+
# There is no actual max columns in Excel .. you will run out of memory though at some point
|
33
|
+
(0..1024).each do |column|
|
34
|
+
cell = header_row[column]
|
35
|
+
break unless cell
|
36
|
+
header = "#{cell.to_s}".strip
|
37
|
+
break if header.empty?
|
38
|
+
@excel_headers << header
|
39
|
+
end
|
40
|
+
|
41
|
+
@excel_headers
|
42
|
+
end
|
43
|
+
|
44
|
+
# Helpers for dealing with Active Record models and collections
|
45
|
+
# Specify array of operators/associations to include - possible values are :
|
46
|
+
# [:assignment, :belongs_to, :has_one, :has_many]
|
47
|
+
|
48
|
+
def ar_to_headers( records, associations = nil, options = {} )
|
49
|
+
return if( !records.first.is_a?(ActiveRecord::Base) || records.empty?)
|
50
|
+
|
51
|
+
only = *options[:only] ? [*options[:only]] : nil
|
52
|
+
|
53
|
+
headers =[]
|
54
|
+
|
55
|
+
if associations
|
56
|
+
details_mgr = DataShift::MethodDictionary.method_details_mgrs[records.first.class]
|
57
|
+
|
58
|
+
[*associations].each do |a|
|
59
|
+
|
60
|
+
details_mgr.get_list(a).each do |md|
|
61
|
+
|
62
|
+
next if(only && !only.include?( md.name.to_sym ) )
|
63
|
+
|
64
|
+
puts md.name.to_sym.inspect
|
65
|
+
|
66
|
+
headers << "#{md.operator}"
|
67
|
+
|
68
|
+
end
|
69
|
+
end if(details_mgr)
|
70
|
+
|
71
|
+
else
|
72
|
+
|
73
|
+
headers = records.first.class.columns.collect( &:name )
|
74
|
+
end
|
75
|
+
|
76
|
+
set_headers( headers )
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
# Pass a set of AR records
|
81
|
+
def ar_to_xls(records, options = {})
|
82
|
+
return if( ! records.first.is_a?(ActiveRecord::Base) || records.empty?)
|
83
|
+
|
84
|
+
row_index =
|
85
|
+
if(options[:no_headers])
|
86
|
+
0
|
87
|
+
else
|
88
|
+
ar_to_headers( records )
|
89
|
+
1
|
90
|
+
end
|
91
|
+
|
92
|
+
records.each do |record|
|
93
|
+
ar_to_xls_row(row_index, 0, record)
|
94
|
+
|
95
|
+
row_index += 1
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
# Save data from an AR record to the current row, based on the record's columns [c1,c2,c3]
|
101
|
+
# Returns the number of the final column written to
|
102
|
+
def ar_to_xls_row(row, start_column, record)
|
103
|
+
return unless( record.is_a?(ActiveRecord::Base))
|
104
|
+
|
105
|
+
column = start_column
|
106
|
+
record.class.columns.each do |connection_column|
|
107
|
+
ar_to_xls_cell(row, column, record, connection_column)
|
108
|
+
column += 1
|
109
|
+
end
|
110
|
+
column
|
111
|
+
end
|
112
|
+
|
113
|
+
def ar_to_xls_cell(row, column, record, connection_column)
|
114
|
+
begin
|
115
|
+
datum = record.send(connection_column.name)
|
116
|
+
|
117
|
+
self[row, column] = datum
|
118
|
+
rescue => e
|
119
|
+
puts "Failed to export #{datum} from #{connection_column.inspect} to column #{column}"
|
120
|
+
puts e, e.backtrace
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
data/lib/datashift/exceptions.rb
CHANGED
@@ -1,22 +1,60 @@
|
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2014
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: June 2014
|
4
|
+
# License:: Free, Open Source.
|
5
|
+
#
|
6
|
+
|
1
7
|
module DataShift
|
8
|
+
|
9
|
+
class DataShiftException < StandardError
|
10
|
+
|
11
|
+
include DataShift::Logging
|
12
|
+
|
13
|
+
def initialize( msg )
|
14
|
+
super
|
15
|
+
logger.error( msg)
|
16
|
+
end
|
2
17
|
|
18
|
+
def self.generate name
|
19
|
+
new_class = Class.new(DataShiftException) do
|
20
|
+
def initialize( msg )
|
21
|
+
super( msg )
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
DataShift.const_set(name, new_class)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class NilDataSuppliedError < DataShiftException
|
30
|
+
def initialize( msg )
|
31
|
+
super( msg )
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
3
35
|
class BadRuby < StandardError; end
|
4
36
|
|
5
37
|
class UnsupportedFileType < StandardError; end
|
6
38
|
class BadFile < StandardError; end
|
7
39
|
|
8
40
|
class MappingDefinitionError < StandardError; end
|
9
|
-
|
41
|
+
|
10
42
|
|
11
43
|
class MissingHeadersError < StandardError; end
|
12
44
|
class MissingMandatoryError < StandardError; end
|
13
45
|
|
14
|
-
class RecordNotFound < StandardError; end
|
15
|
-
|
16
46
|
class PathError < StandardError; end
|
17
47
|
|
18
48
|
class BadUri < StandardError; end
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
DataShift::DataShiftException.generate( "ConnectionError")
|
53
|
+
DataShift::DataShiftException.generate( "CouldNotAssignAssociation")
|
54
|
+
DataShift::DataShiftException.generate( "CreateAttachmentFailed")
|
55
|
+
DataShift::DataShiftException.generate( "DataProcessingError")
|
56
|
+
DataShift::DataShiftException.generate( "FileNotFound")
|
57
|
+
DataShift::DataShiftException.generate( "NoSuchClassError")
|
58
|
+
DataShift::DataShiftException.generate( "MissingConfigOptionError")
|
59
|
+
DataShift::DataShiftException.generate( "RecordNotFound")
|
60
|
+
DataShift::DataShiftException.generate( "SaveError")
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2015
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: March 2015
|
4
|
+
# License:: MIT
|
5
|
+
#
|
6
|
+
# Details:: Manage the current loader object
|
7
|
+
#
|
8
|
+
require 'to_b'
|
9
|
+
require 'logging'
|
10
|
+
|
11
|
+
module DataShift
|
12
|
+
|
13
|
+
class LoadObject
|
14
|
+
|
15
|
+
include DataShift::Logging
|
16
|
+
|
17
|
+
attr_accessor :load_object
|
18
|
+
|
19
|
+
def initialize( current_object = nil)
|
20
|
+
@load_object = current_object
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2015
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: Aug 2015
|
4
|
+
# License:: MIT
|
5
|
+
#
|
6
|
+
# Details:: A cache type class that stores details of a source=>destination mapping
|
7
|
+
#
|
8
|
+
require 'erubis'
|
9
|
+
|
10
|
+
module DataShift
|
11
|
+
|
12
|
+
class MappingService
|
13
|
+
|
14
|
+
include DataShift::Logging
|
15
|
+
|
16
|
+
# N.B :all_config, :key_config are OpenStruct data structure
|
17
|
+
# that provides definition of config entries as attributes with their accompanying values.
|
18
|
+
# So if you had a top level config entries in the YAML called path & full_name, you can call
|
19
|
+
# config.path
|
20
|
+
# config.full_name etc
|
21
|
+
#
|
22
|
+
# For a more Hash like representation use config.yaml or config[:attribute]
|
23
|
+
|
24
|
+
attr_reader :mapped_class_name, :map_file_name
|
25
|
+
|
26
|
+
attr_reader :raw_data, :yaml_data, :mapping_entry
|
27
|
+
|
28
|
+
def initialize( klass )
|
29
|
+
@mapped_class_name = klass.name
|
30
|
+
end
|
31
|
+
|
32
|
+
def read( file, key = nil )
|
33
|
+
|
34
|
+
@map_file_name = file
|
35
|
+
|
36
|
+
unless(map_file_name && File.exists?(map_file_name))
|
37
|
+
logger.error "Cannot open mapping file - #{map_file_name} - file does not exist."
|
38
|
+
raise FileNotFound.new("Cannot open mapping file - #{map_file_name}")
|
39
|
+
end
|
40
|
+
|
41
|
+
begin
|
42
|
+
# Load application configuration
|
43
|
+
set_mapping( map_file_name )
|
44
|
+
|
45
|
+
set_key_config!( key ) if key
|
46
|
+
rescue => e
|
47
|
+
puts e.inspect
|
48
|
+
logger.error "Failed to parse config file #{map_file_name} - bad YAML ?"
|
49
|
+
raise e
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# OpenStruct not a hash .. supports form ... config.path, config.full_name etc
|
54
|
+
def method_missing(method, *args, &block)
|
55
|
+
#logger :debug, "method_missing called with : #{method}"
|
56
|
+
@mapping_entry.send(method)
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def set_mapping( file )
|
62
|
+
|
63
|
+
@raw_data = File.read(file)
|
64
|
+
|
65
|
+
erb = begin
|
66
|
+
Erubis::Eruby.new(raw_data).result
|
67
|
+
rescue => e
|
68
|
+
puts "Failed to parse erb template #{file} error: #{e.inspect}"
|
69
|
+
|
70
|
+
logger.error "Config template error: #{e.inspect}"
|
71
|
+
|
72
|
+
raise e
|
73
|
+
end
|
74
|
+
|
75
|
+
begin
|
76
|
+
@yaml_data = YAML.load(erb)
|
77
|
+
|
78
|
+
logger.info "Loaded YAML config from [#{file}]"
|
79
|
+
|
80
|
+
rescue => e
|
81
|
+
puts "YAML parse error: #{e.inspect}"
|
82
|
+
logger.error "YAML parse error: #{e.inspect}"
|
83
|
+
raise e
|
84
|
+
end
|
85
|
+
|
86
|
+
@mapping_entry = OpenStruct.new(yaml_data)
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|