datashift 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/README.markdown +91 -55
  3. data/VERSION +1 -1
  4. data/datashift.gemspec +8 -23
  5. data/lib/applications/jexcel_file.rb +1 -2
  6. data/lib/datashift.rb +34 -15
  7. data/lib/datashift/column_packer.rb +98 -34
  8. data/lib/datashift/data_transforms.rb +83 -0
  9. data/lib/datashift/delimiters.rb +58 -10
  10. data/lib/datashift/excel_base.rb +123 -0
  11. data/lib/datashift/exceptions.rb +45 -7
  12. data/lib/datashift/load_object.rb +25 -0
  13. data/lib/datashift/mapping_service.rb +91 -0
  14. data/lib/datashift/method_detail.rb +40 -62
  15. data/lib/datashift/method_details_manager.rb +18 -2
  16. data/lib/datashift/method_dictionary.rb +27 -10
  17. data/lib/datashift/method_mapper.rb +49 -41
  18. data/lib/datashift/model_mapper.rb +42 -22
  19. data/lib/datashift/populator.rb +258 -143
  20. data/lib/datashift/thor_base.rb +38 -0
  21. data/lib/exporters/csv_exporter.rb +57 -145
  22. data/lib/exporters/excel_exporter.rb +73 -60
  23. data/lib/generators/csv_generator.rb +65 -5
  24. data/lib/generators/generator_base.rb +69 -3
  25. data/lib/generators/mapping_generator.rb +112 -0
  26. data/lib/helpers/core_ext/csv_file.rb +33 -0
  27. data/lib/loaders/csv_loader.rb +41 -39
  28. data/lib/loaders/excel_loader.rb +130 -116
  29. data/lib/loaders/loader_base.rb +190 -146
  30. data/lib/loaders/paperclip/attachment_loader.rb +4 -4
  31. data/lib/loaders/paperclip/datashift_paperclip.rb +5 -3
  32. data/lib/loaders/paperclip/image_loading.rb +9 -7
  33. data/lib/loaders/reporter.rb +17 -8
  34. data/lib/thor/export.thor +12 -13
  35. data/lib/thor/generate.thor +1 -9
  36. data/lib/thor/import.thor +13 -24
  37. data/lib/thor/mapping.thor +65 -0
  38. data/spec/Gemfile +13 -11
  39. data/spec/Gemfile.lock +98 -93
  40. data/spec/csv_exporter_spec.rb +104 -99
  41. data/spec/csv_generator_spec.rb +159 -0
  42. data/spec/csv_loader_spec.rb +197 -16
  43. data/spec/datashift_spec.rb +9 -0
  44. data/spec/excel_exporter_spec.rb +149 -58
  45. data/spec/excel_generator_spec.rb +35 -44
  46. data/spec/excel_loader_spec.rb +196 -178
  47. data/spec/excel_spec.rb +8 -5
  48. data/spec/loader_base_spec.rb +47 -7
  49. data/spec/mapping_spec.rb +117 -0
  50. data/spec/method_dictionary_spec.rb +24 -11
  51. data/spec/method_mapper_spec.rb +5 -7
  52. data/spec/model_mapper_spec.rb +41 -0
  53. data/spec/paperclip_loader_spec.rb +3 -6
  54. data/spec/populator_spec.rb +48 -14
  55. data/spec/spec_helper.rb +85 -73
  56. data/spec/thor_spec.rb +40 -5
  57. metadata +93 -86
  58. data/lib/applications/excel_base.rb +0 -63
@@ -0,0 +1,83 @@
1
+ # Copyright:: (c) Autotelik Media Ltd 2015
2
+ # Author :: Tom Statter
3
+ # Date :: March 2015
4
+ # License:: MIT
5
+ #
6
+ # Details:: Stores defaults, substitutions, over rides etc
7
+ # that can be applied to incoming data while being Populated
8
+ #
9
+ # WORK In PROGRESS
10
+
11
+ module DataShift
12
+
13
+ module Transformations
14
+
15
+ # Default values and over rides can be provided in Ruby/YAML ???? config file.
16
+ #
17
+ # Format :
18
+ #
19
+ # Load Class: (e.g Spree:Product)
20
+ # datashift_defaults:
21
+ # value_as_string: "Default Project Value"
22
+ # category: reference:category_002
23
+ #
24
+ # datashift_overrides:
25
+ # value_as_double: 99.23546
26
+ #
27
+ #
28
+ # datashift_substitutions:
29
+ #
30
+ #
31
+
32
+ class Base
33
+
34
+ include DataShift::Logging
35
+
36
+ # Map a Column to all relevant transforms
37
+
38
+ def configure_from(load_object_class, yaml_file)
39
+
40
+ data = YAML::load( ERB.new( IO.read(yaml_file) ).result )
41
+
42
+ if(data[load_object_class.name])
43
+ end
44
+ end
45
+
46
+ # Set a value to be used to populate Model.operator
47
+ # Generally over-rides will be used regardless of what value caller supplied.
48
+ def set( operator, value )
49
+ override_values[operator] = value
50
+ end
51
+
52
+ def transforms
53
+ @transforms ||= {}
54
+ end
55
+
56
+ def apply( operator, current_value )
57
+ if(transforms[operator])
58
+ perform_transformcurrent_value()
59
+ end
60
+ end
61
+
62
+ def has_transform?( operator )
63
+ return override_values.has_key?(operator)
64
+ end
65
+
66
+ end
67
+
68
+ class Substitution < Base
69
+
70
+ def type
71
+ :substitution
72
+ end
73
+ end
74
+
75
+ class Override < Base
76
+
77
+ def type
78
+ :override
79
+ end
80
+ end
81
+
82
+ end
83
+ end
@@ -10,9 +10,26 @@
10
10
  #
11
11
  module DataShift
12
12
 
13
-
14
13
  module Delimiters
15
14
 
15
+ # I made these class methods, feeling delims are 'global'
16
+ # I dunno now if thats good pattern or not
17
+
18
+
19
+ # As well as just the column name, support embedding find operators for that column
20
+ # in the heading .. i.e Column header => 'BlogPosts:user_id'
21
+ # ... association has many BlogPosts selected via find_by_user_id
22
+ #
23
+ # in the heading .. i.e Column header => 'BlogPosts:user_name:John Smith'
24
+ # ... association has many BlogPosts selected via find_by_user_name("John Smith")
25
+ #
26
+ def self.column_delim
27
+ @column_delim ||= ':'
28
+ @column_delim
29
+ end
30
+
31
+ def self.set_column_delim(x) @column_delim = x; end
32
+
16
33
 
17
34
  # Support multiple associations being added to a base object to be specified in a single column.
18
35
  #
@@ -40,19 +57,31 @@ module DataShift
40
57
  end
41
58
 
42
59
  def self.set_name_value_delim(x) @name_value_delim = x; end
43
- # TODO - support embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
44
- #
45
- # |Category|
46
- # name:new{ :date => '20110102', :owner = > 'blah'}
47
- #
48
-
49
-
60
+
61
+
62
+ # The simple seperator for a list of values whether it be
63
+ # "Colour:red,green,blue".split(Delimiters::multi_value_delim) => [red,green,blue]
64
+ # {name => value, n2 => v2}.split(Delimiters::multi_value_delim) => [ [name => value], [n2 => v2] ]
65
+
50
66
  def self.multi_value_delim
51
67
  @multi_value_delim ||= ','
52
- @multi_value_delim
53
68
  end
54
69
 
55
70
  def self.set_multi_value_delim(x) @multi_value_delim = x; end
71
+
72
+ # Objects can be created with multiple facets in single columns.
73
+ # In this example a single Product can be configured with a consolidated mime and print types
74
+ #
75
+ # mime_type:jpeg,PDF ; print_type:colour equivalent to
76
+ #
77
+ # => mime_type:jpeg;print_type:colour | mime_type:PDF; print_type:colour
78
+
79
+ def self.multi_facet_delim
80
+ @multi_facet_delim ||= ';'
81
+ end
82
+
83
+ def self.setmulti_facet_delim(x) @multi_facet_delim = x; end
84
+
56
85
 
57
86
  # Multiple objects can be embedded in single columns.
58
87
  # In this example a single Category column contains 3 separate entries, New, SecondHand, Retro
@@ -92,11 +121,30 @@ module DataShift
92
121
 
93
122
  def self.csv_delim=(x) set_csv_delim(x); end
94
123
  def self.set_csv_delim(x) @csv_delim = x; end
95
-
124
+
96
125
  def self.eol
97
126
  "\n"
98
127
  end
99
128
 
129
+ # surround text in suitable quotes e.g "hello world, how are you" => ' "hello world, how are you" '
130
+ def text_delim
131
+ @text_delim ||= "\'"
132
+ end
133
+
134
+ def text_delim=(x)
135
+ @text_delim = x
136
+ end
137
+
138
+ # seperator for identifying normal key value pairs
139
+
140
+ def self.key_value_sep
141
+ @key_value_sep ||= "=>" #TODO check Ruby version and use appropriate has style ?
142
+ end
143
+
144
+ def self.key_value_sep=(x)
145
+ @key_value_sep = x
146
+ end
147
+
100
148
  end
101
149
 
102
150
  end
@@ -0,0 +1,123 @@
1
+ # To change this template, choose Tools | Templates
2
+ # and open the template in the editor.
3
+
4
+ module ExcelBase
5
+
6
+
7
+
8
+ def sanitize_sheet_name( name )
9
+ name.gsub(/[\[\]:\*\/\\\?]/, '')
10
+ end
11
+
12
+
13
+ # TODO -revisit/refactor - maybe this should just be on the base class Excel
14
+ # so you can call direct like excel.parse_headers(options[:header_row])
15
+ # rather than
16
+ # sheet = excel.worksheet( sheet_number )
17
+ # parse_headers(sheet, options[:header_row])
18
+
19
+ attr_accessor :header_row_index, :excel_headers
20
+
21
+ def parse_headers( sheet, header_row = 0 )
22
+
23
+ @header_row_index = header_row || 0
24
+
25
+ header_row = sheet.row(header_row_index)
26
+
27
+ raise MissingHeadersError, "No headers found - Check Sheet #{sheet} is complete and Row #{header_row_index} contains headers" unless(header_row)
28
+
29
+ @excel_headers = []
30
+
31
+ # TODO - make more robust - currently end on first empty column
32
+ # There is no actual max columns in Excel .. you will run out of memory though at some point
33
+ (0..1024).each do |column|
34
+ cell = header_row[column]
35
+ break unless cell
36
+ header = "#{cell.to_s}".strip
37
+ break if header.empty?
38
+ @excel_headers << header
39
+ end
40
+
41
+ @excel_headers
42
+ end
43
+
44
+ # Helpers for dealing with Active Record models and collections
45
+ # Specify array of operators/associations to include - possible values are :
46
+ # [:assignment, :belongs_to, :has_one, :has_many]
47
+
48
+ def ar_to_headers( records, associations = nil, options = {} )
49
+ return if( !records.first.is_a?(ActiveRecord::Base) || records.empty?)
50
+
51
+ only = *options[:only] ? [*options[:only]] : nil
52
+
53
+ headers =[]
54
+
55
+ if associations
56
+ details_mgr = DataShift::MethodDictionary.method_details_mgrs[records.first.class]
57
+
58
+ [*associations].each do |a|
59
+
60
+ details_mgr.get_list(a).each do |md|
61
+
62
+ next if(only && !only.include?( md.name.to_sym ) )
63
+
64
+ puts md.name.to_sym.inspect
65
+
66
+ headers << "#{md.operator}"
67
+
68
+ end
69
+ end if(details_mgr)
70
+
71
+ else
72
+
73
+ headers = records.first.class.columns.collect( &:name )
74
+ end
75
+
76
+ set_headers( headers )
77
+ end
78
+
79
+
80
+ # Pass a set of AR records
81
+ def ar_to_xls(records, options = {})
82
+ return if( ! records.first.is_a?(ActiveRecord::Base) || records.empty?)
83
+
84
+ row_index =
85
+ if(options[:no_headers])
86
+ 0
87
+ else
88
+ ar_to_headers( records )
89
+ 1
90
+ end
91
+
92
+ records.each do |record|
93
+ ar_to_xls_row(row_index, 0, record)
94
+
95
+ row_index += 1
96
+ end
97
+ end
98
+
99
+
100
+ # Save data from an AR record to the current row, based on the record's columns [c1,c2,c3]
101
+ # Returns the number of the final column written to
102
+ def ar_to_xls_row(row, start_column, record)
103
+ return unless( record.is_a?(ActiveRecord::Base))
104
+
105
+ column = start_column
106
+ record.class.columns.each do |connection_column|
107
+ ar_to_xls_cell(row, column, record, connection_column)
108
+ column += 1
109
+ end
110
+ column
111
+ end
112
+
113
+ def ar_to_xls_cell(row, column, record, connection_column)
114
+ begin
115
+ datum = record.send(connection_column.name)
116
+
117
+ self[row, column] = datum
118
+ rescue => e
119
+ puts "Failed to export #{datum} from #{connection_column.inspect} to column #{column}"
120
+ puts e, e.backtrace
121
+ end
122
+ end
123
+ end
@@ -1,22 +1,60 @@
1
+ # Copyright:: (c) Autotelik Media Ltd 2014
2
+ # Author :: Tom Statter
3
+ # Date :: June 2014
4
+ # License:: Free, Open Source.
5
+ #
6
+
1
7
  module DataShift
8
+
9
+ class DataShiftException < StandardError
10
+
11
+ include DataShift::Logging
12
+
13
+ def initialize( msg )
14
+ super
15
+ logger.error( msg)
16
+ end
2
17
 
18
+ def self.generate name
19
+ new_class = Class.new(DataShiftException) do
20
+ def initialize( msg )
21
+ super( msg )
22
+ end
23
+ end
24
+
25
+ DataShift.const_set(name, new_class)
26
+ end
27
+ end
28
+
29
+ class NilDataSuppliedError < DataShiftException
30
+ def initialize( msg )
31
+ super( msg )
32
+ end
33
+ end
34
+
3
35
  class BadRuby < StandardError; end
4
36
 
5
37
  class UnsupportedFileType < StandardError; end
6
38
  class BadFile < StandardError; end
7
39
 
8
40
  class MappingDefinitionError < StandardError; end
9
- class DataProcessingError < StandardError; end
41
+
10
42
 
11
43
  class MissingHeadersError < StandardError; end
12
44
  class MissingMandatoryError < StandardError; end
13
45
 
14
- class RecordNotFound < StandardError; end
15
-
16
46
  class PathError < StandardError; end
17
47
 
18
48
  class BadUri < StandardError; end
19
-
20
- class CreateAttachmentFailed < StandardError; end
21
-
22
- end
49
+
50
+ end
51
+
52
+ DataShift::DataShiftException.generate( "ConnectionError")
53
+ DataShift::DataShiftException.generate( "CouldNotAssignAssociation")
54
+ DataShift::DataShiftException.generate( "CreateAttachmentFailed")
55
+ DataShift::DataShiftException.generate( "DataProcessingError")
56
+ DataShift::DataShiftException.generate( "FileNotFound")
57
+ DataShift::DataShiftException.generate( "NoSuchClassError")
58
+ DataShift::DataShiftException.generate( "MissingConfigOptionError")
59
+ DataShift::DataShiftException.generate( "RecordNotFound")
60
+ DataShift::DataShiftException.generate( "SaveError")
@@ -0,0 +1,25 @@
1
+ # Copyright:: (c) Autotelik Media Ltd 2015
2
+ # Author :: Tom Statter
3
+ # Date :: March 2015
4
+ # License:: MIT
5
+ #
6
+ # Details:: Manage the current loader object
7
+ #
8
+ require 'to_b'
9
+ require 'logging'
10
+
11
+ module DataShift
12
+
13
+ class LoadObject
14
+
15
+ include DataShift::Logging
16
+
17
+ attr_accessor :load_object
18
+
19
+ def initialize( current_object = nil)
20
+ @load_object = current_object
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,91 @@
1
+ # Copyright:: (c) Autotelik Media Ltd 2015
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2015
4
+ # License:: MIT
5
+ #
6
+ # Details:: A cache type class that stores details of a source=>destination mapping
7
+ #
8
+ require 'erubis'
9
+
10
+ module DataShift
11
+
12
+ class MappingService
13
+
14
+ include DataShift::Logging
15
+
16
+ # N.B :all_config, :key_config are OpenStruct data structure
17
+ # that provides definition of config entries as attributes with their accompanying values.
18
+ # So if you had a top level config entries in the YAML called path & full_name, you can call
19
+ # config.path
20
+ # config.full_name etc
21
+ #
22
+ # For a more Hash like representation use config.yaml or config[:attribute]
23
+
24
+ attr_reader :mapped_class_name, :map_file_name
25
+
26
+ attr_reader :raw_data, :yaml_data, :mapping_entry
27
+
28
+ def initialize( klass )
29
+ @mapped_class_name = klass.name
30
+ end
31
+
32
+ def read( file, key = nil )
33
+
34
+ @map_file_name = file
35
+
36
+ unless(map_file_name && File.exists?(map_file_name))
37
+ logger.error "Cannot open mapping file - #{map_file_name} - file does not exist."
38
+ raise FileNotFound.new("Cannot open mapping file - #{map_file_name}")
39
+ end
40
+
41
+ begin
42
+ # Load application configuration
43
+ set_mapping( map_file_name )
44
+
45
+ set_key_config!( key ) if key
46
+ rescue => e
47
+ puts e.inspect
48
+ logger.error "Failed to parse config file #{map_file_name} - bad YAML ?"
49
+ raise e
50
+ end
51
+ end
52
+
53
+ # OpenStruct not a hash .. supports form ... config.path, config.full_name etc
54
+ def method_missing(method, *args, &block)
55
+ #logger :debug, "method_missing called with : #{method}"
56
+ @mapping_entry.send(method)
57
+ end
58
+
59
+ private
60
+
61
+ def set_mapping( file )
62
+
63
+ @raw_data = File.read(file)
64
+
65
+ erb = begin
66
+ Erubis::Eruby.new(raw_data).result
67
+ rescue => e
68
+ puts "Failed to parse erb template #{file} error: #{e.inspect}"
69
+
70
+ logger.error "Config template error: #{e.inspect}"
71
+
72
+ raise e
73
+ end
74
+
75
+ begin
76
+ @yaml_data = YAML.load(erb)
77
+
78
+ logger.info "Loaded YAML config from [#{file}]"
79
+
80
+ rescue => e
81
+ puts "YAML parse error: #{e.inspect}"
82
+ logger.error "YAML parse error: #{e.inspect}"
83
+ raise e
84
+ end
85
+
86
+ @mapping_entry = OpenStruct.new(yaml_data)
87
+ end
88
+
89
+ end
90
+
91
+ end