datashift 0.15.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/README.markdown +91 -55
  3. data/VERSION +1 -1
  4. data/datashift.gemspec +8 -23
  5. data/lib/applications/jexcel_file.rb +1 -2
  6. data/lib/datashift.rb +34 -15
  7. data/lib/datashift/column_packer.rb +98 -34
  8. data/lib/datashift/data_transforms.rb +83 -0
  9. data/lib/datashift/delimiters.rb +58 -10
  10. data/lib/datashift/excel_base.rb +123 -0
  11. data/lib/datashift/exceptions.rb +45 -7
  12. data/lib/datashift/load_object.rb +25 -0
  13. data/lib/datashift/mapping_service.rb +91 -0
  14. data/lib/datashift/method_detail.rb +40 -62
  15. data/lib/datashift/method_details_manager.rb +18 -2
  16. data/lib/datashift/method_dictionary.rb +27 -10
  17. data/lib/datashift/method_mapper.rb +49 -41
  18. data/lib/datashift/model_mapper.rb +42 -22
  19. data/lib/datashift/populator.rb +258 -143
  20. data/lib/datashift/thor_base.rb +38 -0
  21. data/lib/exporters/csv_exporter.rb +57 -145
  22. data/lib/exporters/excel_exporter.rb +73 -60
  23. data/lib/generators/csv_generator.rb +65 -5
  24. data/lib/generators/generator_base.rb +69 -3
  25. data/lib/generators/mapping_generator.rb +112 -0
  26. data/lib/helpers/core_ext/csv_file.rb +33 -0
  27. data/lib/loaders/csv_loader.rb +41 -39
  28. data/lib/loaders/excel_loader.rb +130 -116
  29. data/lib/loaders/loader_base.rb +190 -146
  30. data/lib/loaders/paperclip/attachment_loader.rb +4 -4
  31. data/lib/loaders/paperclip/datashift_paperclip.rb +5 -3
  32. data/lib/loaders/paperclip/image_loading.rb +9 -7
  33. data/lib/loaders/reporter.rb +17 -8
  34. data/lib/thor/export.thor +12 -13
  35. data/lib/thor/generate.thor +1 -9
  36. data/lib/thor/import.thor +13 -24
  37. data/lib/thor/mapping.thor +65 -0
  38. data/spec/Gemfile +13 -11
  39. data/spec/Gemfile.lock +98 -93
  40. data/spec/csv_exporter_spec.rb +104 -99
  41. data/spec/csv_generator_spec.rb +159 -0
  42. data/spec/csv_loader_spec.rb +197 -16
  43. data/spec/datashift_spec.rb +9 -0
  44. data/spec/excel_exporter_spec.rb +149 -58
  45. data/spec/excel_generator_spec.rb +35 -44
  46. data/spec/excel_loader_spec.rb +196 -178
  47. data/spec/excel_spec.rb +8 -5
  48. data/spec/loader_base_spec.rb +47 -7
  49. data/spec/mapping_spec.rb +117 -0
  50. data/spec/method_dictionary_spec.rb +24 -11
  51. data/spec/method_mapper_spec.rb +5 -7
  52. data/spec/model_mapper_spec.rb +41 -0
  53. data/spec/paperclip_loader_spec.rb +3 -6
  54. data/spec/populator_spec.rb +48 -14
  55. data/spec/spec_helper.rb +85 -73
  56. data/spec/thor_spec.rb +40 -5
  57. metadata +93 -86
  58. data/lib/applications/excel_base.rb +0 -63
@@ -0,0 +1,83 @@
1
+ # Copyright:: (c) Autotelik Media Ltd 2015
2
+ # Author :: Tom Statter
3
+ # Date :: March 2015
4
+ # License:: MIT
5
+ #
6
+ # Details:: Stores defaults, substitutions, over rides etc
7
+ # that can be applied to incoming data while being Populated
8
+ #
9
+ # WORK In PROGRESS
10
+
11
+ module DataShift
12
+
13
+ module Transformations
14
+
15
+ # Default values and over rides can be provided in Ruby/YAML ???? config file.
16
+ #
17
+ # Format :
18
+ #
19
+ # Load Class: (e.g Spree:Product)
20
+ # datashift_defaults:
21
+ # value_as_string: "Default Project Value"
22
+ # category: reference:category_002
23
+ #
24
+ # datashift_overrides:
25
+ # value_as_double: 99.23546
26
+ #
27
+ #
28
+ # datashift_substitutions:
29
+ #
30
+ #
31
+
32
+ class Base
33
+
34
+ include DataShift::Logging
35
+
36
+ # Map a Column to all relevant transforms
37
+
38
+ def configure_from(load_object_class, yaml_file)
39
+
40
+ data = YAML::load( ERB.new( IO.read(yaml_file) ).result )
41
+
42
+ if(data[load_object_class.name])
43
+ end
44
+ end
45
+
46
+ # Set a value to be used to populate Model.operator
47
+ # Generally over-rides will be used regardless of what value caller supplied.
48
+ def set( operator, value )
49
+ override_values[operator] = value
50
+ end
51
+
52
+ def transforms
53
+ @transforms ||= {}
54
+ end
55
+
56
+ def apply( operator, current_value )
57
+ if(transforms[operator])
58
+ perform_transformcurrent_value()
59
+ end
60
+ end
61
+
62
+ def has_transform?( operator )
63
+ return override_values.has_key?(operator)
64
+ end
65
+
66
+ end
67
+
68
+ class Substitution < Base
69
+
70
+ def type
71
+ :substitution
72
+ end
73
+ end
74
+
75
+ class Override < Base
76
+
77
+ def type
78
+ :override
79
+ end
80
+ end
81
+
82
+ end
83
+ end
@@ -10,9 +10,26 @@
10
10
  #
11
11
  module DataShift
12
12
 
13
-
14
13
  module Delimiters
15
14
 
15
+ # I made these class methods, feeling delims are 'global'
16
+ # I dunno now if thats good pattern or not
17
+
18
+
19
+ # As well as just the column name, support embedding find operators for that column
20
+ # in the heading .. i.e Column header => 'BlogPosts:user_id'
21
+ # ... association has many BlogPosts selected via find_by_user_id
22
+ #
23
+ # in the heading .. i.e Column header => 'BlogPosts:user_name:John Smith'
24
+ # ... association has many BlogPosts selected via find_by_user_name("John Smith")
25
+ #
26
+ def self.column_delim
27
+ @column_delim ||= ':'
28
+ @column_delim
29
+ end
30
+
31
+ def self.set_column_delim(x) @column_delim = x; end
32
+
16
33
 
17
34
  # Support multiple associations being added to a base object to be specified in a single column.
18
35
  #
@@ -40,19 +57,31 @@ module DataShift
40
57
  end
41
58
 
42
59
  def self.set_name_value_delim(x) @name_value_delim = x; end
43
- # TODO - support embedded object creation/update via hash (which hopefully we should be able to just forward to AR)
44
- #
45
- # |Category|
46
- # name:new{ :date => '20110102', :owner = > 'blah'}
47
- #
48
-
49
-
60
+
61
+
62
+ # The simple seperator for a list of values whether it be
63
+ # "Colour:red,green,blue".split(Delimiters::multi_value_delim) => [red,green,blue]
64
+ # {name => value, n2 => v2}.split(Delimiters::multi_value_delim) => [ [name => value], [n2 => v2] ]
65
+
50
66
  def self.multi_value_delim
51
67
  @multi_value_delim ||= ','
52
- @multi_value_delim
53
68
  end
54
69
 
55
70
  def self.set_multi_value_delim(x) @multi_value_delim = x; end
71
+
72
+ # Objects can be created with multiple facets in single columns.
73
+ # In this example a single Product can be configured with a consolidated mime and print types
74
+ #
75
+ # mime_type:jpeg,PDF ; print_type:colour equivalent to
76
+ #
77
+ # => mime_type:jpeg;print_type:colour | mime_type:PDF; print_type:colour
78
+
79
+ def self.multi_facet_delim
80
+ @multi_facet_delim ||= ';'
81
+ end
82
+
83
+ def self.setmulti_facet_delim(x) @multi_facet_delim = x; end
84
+
56
85
 
57
86
  # Multiple objects can be embedded in single columns.
58
87
  # In this example a single Category column contains 3 separate entries, New, SecondHand, Retro
@@ -92,11 +121,30 @@ module DataShift
92
121
 
93
122
  def self.csv_delim=(x) set_csv_delim(x); end
94
123
  def self.set_csv_delim(x) @csv_delim = x; end
95
-
124
+
96
125
  def self.eol
97
126
  "\n"
98
127
  end
99
128
 
129
+ # surround text in suitable quotes e.g "hello world, how are you" => ' "hello world, how are you" '
130
+ def text_delim
131
+ @text_delim ||= "\'"
132
+ end
133
+
134
+ def text_delim=(x)
135
+ @text_delim = x
136
+ end
137
+
138
+ # seperator for identifying normal key value pairs
139
+
140
+ def self.key_value_sep
141
+ @key_value_sep ||= "=>" #TODO check Ruby version and use appropriate has style ?
142
+ end
143
+
144
+ def self.key_value_sep=(x)
145
+ @key_value_sep = x
146
+ end
147
+
100
148
  end
101
149
 
102
150
  end
@@ -0,0 +1,123 @@
1
+ # To change this template, choose Tools | Templates
2
+ # and open the template in the editor.
3
+
4
+ module ExcelBase
5
+
6
+
7
+
8
+ def sanitize_sheet_name( name )
9
+ name.gsub(/[\[\]:\*\/\\\?]/, '')
10
+ end
11
+
12
+
13
+ # TODO -revisit/refactor - maybe this should just be on the base class Excel
14
+ # so you can call direct like excel.parse_headers(options[:header_row])
15
+ # rather than
16
+ # sheet = excel.worksheet( sheet_number )
17
+ # parse_headers(sheet, options[:header_row])
18
+
19
+ attr_accessor :header_row_index, :excel_headers
20
+
21
+ def parse_headers( sheet, header_row = 0 )
22
+
23
+ @header_row_index = header_row || 0
24
+
25
+ header_row = sheet.row(header_row_index)
26
+
27
+ raise MissingHeadersError, "No headers found - Check Sheet #{sheet} is complete and Row #{header_row_index} contains headers" unless(header_row)
28
+
29
+ @excel_headers = []
30
+
31
+ # TODO - make more robust - currently end on first empty column
32
+ # There is no actual max columns in Excel .. you will run out of memory though at some point
33
+ (0..1024).each do |column|
34
+ cell = header_row[column]
35
+ break unless cell
36
+ header = "#{cell.to_s}".strip
37
+ break if header.empty?
38
+ @excel_headers << header
39
+ end
40
+
41
+ @excel_headers
42
+ end
43
+
44
+ # Helpers for dealing with Active Record models and collections
45
+ # Specify array of operators/associations to include - possible values are :
46
+ # [:assignment, :belongs_to, :has_one, :has_many]
47
+
48
+ def ar_to_headers( records, associations = nil, options = {} )
49
+ return if( !records.first.is_a?(ActiveRecord::Base) || records.empty?)
50
+
51
+ only = *options[:only] ? [*options[:only]] : nil
52
+
53
+ headers =[]
54
+
55
+ if associations
56
+ details_mgr = DataShift::MethodDictionary.method_details_mgrs[records.first.class]
57
+
58
+ [*associations].each do |a|
59
+
60
+ details_mgr.get_list(a).each do |md|
61
+
62
+ next if(only && !only.include?( md.name.to_sym ) )
63
+
64
+ puts md.name.to_sym.inspect
65
+
66
+ headers << "#{md.operator}"
67
+
68
+ end
69
+ end if(details_mgr)
70
+
71
+ else
72
+
73
+ headers = records.first.class.columns.collect( &:name )
74
+ end
75
+
76
+ set_headers( headers )
77
+ end
78
+
79
+
80
+ # Pass a set of AR records
81
+ def ar_to_xls(records, options = {})
82
+ return if( ! records.first.is_a?(ActiveRecord::Base) || records.empty?)
83
+
84
+ row_index =
85
+ if(options[:no_headers])
86
+ 0
87
+ else
88
+ ar_to_headers( records )
89
+ 1
90
+ end
91
+
92
+ records.each do |record|
93
+ ar_to_xls_row(row_index, 0, record)
94
+
95
+ row_index += 1
96
+ end
97
+ end
98
+
99
+
100
+ # Save data from an AR record to the current row, based on the record's columns [c1,c2,c3]
101
+ # Returns the number of the final column written to
102
+ def ar_to_xls_row(row, start_column, record)
103
+ return unless( record.is_a?(ActiveRecord::Base))
104
+
105
+ column = start_column
106
+ record.class.columns.each do |connection_column|
107
+ ar_to_xls_cell(row, column, record, connection_column)
108
+ column += 1
109
+ end
110
+ column
111
+ end
112
+
113
+ def ar_to_xls_cell(row, column, record, connection_column)
114
+ begin
115
+ datum = record.send(connection_column.name)
116
+
117
+ self[row, column] = datum
118
+ rescue => e
119
+ puts "Failed to export #{datum} from #{connection_column.inspect} to column #{column}"
120
+ puts e, e.backtrace
121
+ end
122
+ end
123
+ end
@@ -1,22 +1,60 @@
1
+ # Copyright:: (c) Autotelik Media Ltd 2014
2
+ # Author :: Tom Statter
3
+ # Date :: June 2014
4
+ # License:: Free, Open Source.
5
+ #
6
+
1
7
  module DataShift
8
+
9
+ class DataShiftException < StandardError
10
+
11
+ include DataShift::Logging
12
+
13
+ def initialize( msg )
14
+ super
15
+ logger.error( msg)
16
+ end
2
17
 
18
+ def self.generate name
19
+ new_class = Class.new(DataShiftException) do
20
+ def initialize( msg )
21
+ super( msg )
22
+ end
23
+ end
24
+
25
+ DataShift.const_set(name, new_class)
26
+ end
27
+ end
28
+
29
+ class NilDataSuppliedError < DataShiftException
30
+ def initialize( msg )
31
+ super( msg )
32
+ end
33
+ end
34
+
3
35
  class BadRuby < StandardError; end
4
36
 
5
37
  class UnsupportedFileType < StandardError; end
6
38
  class BadFile < StandardError; end
7
39
 
8
40
  class MappingDefinitionError < StandardError; end
9
- class DataProcessingError < StandardError; end
41
+
10
42
 
11
43
  class MissingHeadersError < StandardError; end
12
44
  class MissingMandatoryError < StandardError; end
13
45
 
14
- class RecordNotFound < StandardError; end
15
-
16
46
  class PathError < StandardError; end
17
47
 
18
48
  class BadUri < StandardError; end
19
-
20
- class CreateAttachmentFailed < StandardError; end
21
-
22
- end
49
+
50
+ end
51
+
52
+ DataShift::DataShiftException.generate( "ConnectionError")
53
+ DataShift::DataShiftException.generate( "CouldNotAssignAssociation")
54
+ DataShift::DataShiftException.generate( "CreateAttachmentFailed")
55
+ DataShift::DataShiftException.generate( "DataProcessingError")
56
+ DataShift::DataShiftException.generate( "FileNotFound")
57
+ DataShift::DataShiftException.generate( "NoSuchClassError")
58
+ DataShift::DataShiftException.generate( "MissingConfigOptionError")
59
+ DataShift::DataShiftException.generate( "RecordNotFound")
60
+ DataShift::DataShiftException.generate( "SaveError")
@@ -0,0 +1,25 @@
1
+ # Copyright:: (c) Autotelik Media Ltd 2015
2
+ # Author :: Tom Statter
3
+ # Date :: March 2015
4
+ # License:: MIT
5
+ #
6
+ # Details:: Manage the current loader object
7
+ #
8
+ require 'to_b'
9
+ require 'logging'
10
+
11
+ module DataShift
12
+
13
+ class LoadObject
14
+
15
+ include DataShift::Logging
16
+
17
+ attr_accessor :load_object
18
+
19
+ def initialize( current_object = nil)
20
+ @load_object = current_object
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,91 @@
1
+ # Copyright:: (c) Autotelik Media Ltd 2015
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2015
4
+ # License:: MIT
5
+ #
6
+ # Details:: A cache type class that stores details of a source=>destination mapping
7
+ #
8
+ require 'erubis'
9
+
10
+ module DataShift
11
+
12
+ class MappingService
13
+
14
+ include DataShift::Logging
15
+
16
+ # N.B :all_config, :key_config are OpenStruct data structure
17
+ # that provides definition of config entries as attributes with their accompanying values.
18
+ # So if you had a top level config entries in the YAML called path & full_name, you can call
19
+ # config.path
20
+ # config.full_name etc
21
+ #
22
+ # For a more Hash like representation use config.yaml or config[:attribute]
23
+
24
+ attr_reader :mapped_class_name, :map_file_name
25
+
26
+ attr_reader :raw_data, :yaml_data, :mapping_entry
27
+
28
+ def initialize( klass )
29
+ @mapped_class_name = klass.name
30
+ end
31
+
32
+ def read( file, key = nil )
33
+
34
+ @map_file_name = file
35
+
36
+ unless(map_file_name && File.exists?(map_file_name))
37
+ logger.error "Cannot open mapping file - #{map_file_name} - file does not exist."
38
+ raise FileNotFound.new("Cannot open mapping file - #{map_file_name}")
39
+ end
40
+
41
+ begin
42
+ # Load application configuration
43
+ set_mapping( map_file_name )
44
+
45
+ set_key_config!( key ) if key
46
+ rescue => e
47
+ puts e.inspect
48
+ logger.error "Failed to parse config file #{map_file_name} - bad YAML ?"
49
+ raise e
50
+ end
51
+ end
52
+
53
+ # OpenStruct not a hash .. supports form ... config.path, config.full_name etc
54
+ def method_missing(method, *args, &block)
55
+ #logger :debug, "method_missing called with : #{method}"
56
+ @mapping_entry.send(method)
57
+ end
58
+
59
+ private
60
+
61
+ def set_mapping( file )
62
+
63
+ @raw_data = File.read(file)
64
+
65
+ erb = begin
66
+ Erubis::Eruby.new(raw_data).result
67
+ rescue => e
68
+ puts "Failed to parse erb template #{file} error: #{e.inspect}"
69
+
70
+ logger.error "Config template error: #{e.inspect}"
71
+
72
+ raise e
73
+ end
74
+
75
+ begin
76
+ @yaml_data = YAML.load(erb)
77
+
78
+ logger.info "Loaded YAML config from [#{file}]"
79
+
80
+ rescue => e
81
+ puts "YAML parse error: #{e.inspect}"
82
+ logger.error "YAML parse error: #{e.inspect}"
83
+ raise e
84
+ end
85
+
86
+ @mapping_entry = OpenStruct.new(yaml_data)
87
+ end
88
+
89
+ end
90
+
91
+ end