datashift 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/.document +5 -5
  2. data/Gemfile +28 -25
  3. data/LICENSE.txt +26 -26
  4. data/README.markdown +302 -285
  5. data/README.rdoc +19 -19
  6. data/Rakefile +93 -95
  7. data/VERSION +5 -5
  8. data/datashift.gemspec +162 -178
  9. data/lib/applications/jruby/jexcel_file.rb +396 -396
  10. data/lib/applications/jruby/word.rb +79 -79
  11. data/lib/datashift.rb +152 -113
  12. data/lib/datashift/exceptions.rb +11 -11
  13. data/lib/datashift/file_definitions.rb +353 -353
  14. data/lib/datashift/mapping_file_definitions.rb +87 -87
  15. data/lib/datashift/method_detail.rb +236 -236
  16. data/lib/datashift/method_mapper.rb +256 -256
  17. data/lib/generators/csv_generator.rb +36 -36
  18. data/lib/generators/excel_generator.rb +121 -121
  19. data/lib/generators/generator_base.rb +13 -13
  20. data/lib/helpers/core_ext/to_b.rb +24 -24
  21. data/lib/helpers/spree_helper.rb +131 -131
  22. data/lib/java/poi-3.7/LICENSE +507 -507
  23. data/lib/java/poi-3.7/NOTICE +21 -21
  24. data/lib/java/poi-3.7/RELEASE_NOTES.txt +115 -115
  25. data/lib/loaders/csv_loader.rb +98 -98
  26. data/lib/loaders/excel_loader.rb +154 -149
  27. data/lib/loaders/loader_base.rb +403 -331
  28. data/lib/loaders/spreadsheet_loader.rb +136 -136
  29. data/lib/loaders/spree/image_loader.rb +45 -45
  30. data/lib/loaders/spree/product_loader.rb +224 -224
  31. data/spec/csv_loader_spec.rb +30 -30
  32. data/spec/datashift_spec.rb +26 -26
  33. data/spec/db/migrate/20110803201325_create_test_bed.rb +85 -85
  34. data/spec/excel_generator_spec.rb +78 -78
  35. data/spec/excel_loader_spec.rb +204 -176
  36. data/spec/file_definitions.rb +141 -141
  37. data/spec/fixtures/.~lock.ProjectsSingleCategories.xls# +1 -0
  38. data/spec/fixtures/ProjectsDefaults.yml +29 -0
  39. data/spec/fixtures/config/database.yml +24 -24
  40. data/spec/fixtures/interact_models_db.sqlite +0 -0
  41. data/spec/fixtures/interact_spree_db.sqlite +0 -0
  42. data/spec/fixtures/negative/SpreeProdMiss1Mandatory.csv +4 -4
  43. data/spec/fixtures/negative/SpreeProdMissManyMandatory.csv +4 -4
  44. data/spec/fixtures/spree/SpreeProducts.csv +4 -4
  45. data/spec/fixtures/spree/SpreeProductsMultiColumn.csv +4 -4
  46. data/spec/fixtures/spree/SpreeProductsSimple.csv +4 -4
  47. data/spec/fixtures/spree/SpreeZoneExample.csv +5 -5
  48. data/spec/fixtures/test_model_defs.rb +57 -57
  49. data/spec/loader_spec.rb +120 -120
  50. data/spec/method_mapper_spec.rb +237 -237
  51. data/spec/spec_helper.rb +115 -115
  52. data/spec/spree_generator_spec.rb +64 -64
  53. data/spec/spree_loader_spec.rb +310 -310
  54. data/spec/spree_method_mapping_spec.rb +214 -214
  55. data/tasks/config/seed_fu_product_template.erb +15 -15
  56. data/tasks/config/tidy_config.txt +12 -12
  57. data/tasks/db_tasks.rake +65 -64
  58. data/tasks/excel_generator.rake +78 -78
  59. data/tasks/file_tasks.rake +36 -36
  60. data/tasks/import/csv.rake +49 -49
  61. data/tasks/import/excel.rake +71 -66
  62. data/tasks/spree/image_load.rake +108 -108
  63. data/tasks/spree/product_loader.rake +43 -43
  64. data/tasks/word_to_seedfu.rake +166 -166
  65. data/test/helper.rb +18 -18
  66. data/test/test_interact.rb +7 -7
  67. metadata +7 -38
  68. data/Gemfile.lock +0 -211
  69. data/bin/autospec +0 -16
  70. data/bin/convert_to_should_syntax +0 -16
  71. data/bin/erubis +0 -16
  72. data/bin/htmldiff +0 -16
  73. data/bin/jeweler +0 -16
  74. data/bin/ldiff +0 -16
  75. data/bin/nokogiri +0 -16
  76. data/bin/rackup +0 -16
  77. data/bin/rails +0 -16
  78. data/bin/rake +0 -16
  79. data/bin/rake2thor +0 -16
  80. data/bin/ri +0 -16
  81. data/bin/rspec +0 -16
  82. data/bin/spree +0 -16
  83. data/bin/thor +0 -16
  84. data/bin/tilt +0 -16
  85. data/bin/tt +0 -16
@@ -1,79 +1,79 @@
1
- # Author:: Tom Statter
2
- # License:: MIT ?
3
- #
4
- # NOTES ON INVESTIGATING OLE METHODS in irb
5
- #
6
- # visible = @word_app.ole_method_help( 'Visible' ) # Get a Method Object
7
-
8
- # log( visible.return_type_detail.to_s ) # => ["BOOL"]
9
- # log( visible.invoke_kind.to_s ) # => "PROPERTYGET"
10
- # log( visible.params.to_s ) # => []
11
-
12
- # @fc.ole_method_help( 'Report' ).params[1].ole_type_detail
13
- #
14
- # prefs = @word_app.Preferences.Strings.ole_method_help( 'Set' ).params
15
- # => [index, newVal]
16
- #
17
- # WORD_OLE_CONST.constants
18
- #
19
- # WORD_OLE_CONST.constants.sort.grep /CR/
20
- # => ["ClHideCRLF", "LesCR", "LesCRLF"]
21
- #
22
- # WORD_OLE_CONST.const_get( 'LesCR' ) or WORD_OLE_CONST::LesCR
23
- # => 1
24
-
25
- if(Guards::windows?)
26
-
27
- require 'win32ole'
28
-
29
- # Module for constants to be loaded int
30
-
31
- module WORD_OLE_CONST
32
- end
33
-
34
- class Word
35
-
36
- attr_reader :wd, :doc
37
-
38
- def initialize( visible )
39
- @wd = WIN32OLE.new('Word.Application')
40
-
41
- WIN32OLE.const_load(@wd, WORD_OLE_CONST) if WORD_OLE_CONST.constants.empty?
42
-
43
- @wd.Visible = visible
44
- end
45
-
46
- def open(file)
47
- @doc = @wd.Documents.Open(file)
48
- @doc
49
- end
50
-
51
- def save()
52
- @doc.Save()
53
- @doc
54
- end
55
-
56
- # Format : From WORD_OLE_CONST e.g WORD_OLE_CONST::WdFormatHTML
57
- #
58
- def save_as(name, format)
59
- @doc.SaveAs(name, format)
60
- return @doc
61
- end
62
-
63
- # WdFormatFilteredHTML
64
- # WdFormatHTML
65
- def save_as_html(name)
66
- @doc.SaveAs(name, WORD_OLE_CONST::WdFormatHTML)
67
- return @doc
68
- end
69
-
70
- def quit
71
- @wd.quit()
72
- end
73
- end
74
-
75
- else
76
-
77
- class Word
78
- end
79
- end
1
+ # Author:: Tom Statter
2
+ # License:: MIT ?
3
+ #
4
+ # NOTES ON INVESTIGATING OLE METHODS in irb
5
+ #
6
+ # visible = @word_app.ole_method_help( 'Visible' ) # Get a Method Object
7
+
8
+ # log( visible.return_type_detail.to_s ) # => ["BOOL"]
9
+ # log( visible.invoke_kind.to_s ) # => "PROPERTYGET"
10
+ # log( visible.params.to_s ) # => []
11
+
12
+ # @fc.ole_method_help( 'Report' ).params[1].ole_type_detail
13
+ #
14
+ # prefs = @word_app.Preferences.Strings.ole_method_help( 'Set' ).params
15
+ # => [index, newVal]
16
+ #
17
+ # WORD_OLE_CONST.constants
18
+ #
19
+ # WORD_OLE_CONST.constants.sort.grep /CR/
20
+ # => ["ClHideCRLF", "LesCR", "LesCRLF"]
21
+ #
22
+ # WORD_OLE_CONST.const_get( 'LesCR' ) or WORD_OLE_CONST::LesCR
23
+ # => 1
24
+
25
+ if(Guards::windows?)
26
+
27
+ require 'win32ole'
28
+
29
+ # Module for constants to be loaded int
30
+
31
+ module WORD_OLE_CONST
32
+ end
33
+
34
+ class Word
35
+
36
+ attr_reader :wd, :doc
37
+
38
+ def initialize( visible )
39
+ @wd = WIN32OLE.new('Word.Application')
40
+
41
+ WIN32OLE.const_load(@wd, WORD_OLE_CONST) if WORD_OLE_CONST.constants.empty?
42
+
43
+ @wd.Visible = visible
44
+ end
45
+
46
+ def open(file)
47
+ @doc = @wd.Documents.Open(file)
48
+ @doc
49
+ end
50
+
51
+ def save()
52
+ @doc.Save()
53
+ @doc
54
+ end
55
+
56
+ # Format : From WORD_OLE_CONST e.g WORD_OLE_CONST::WdFormatHTML
57
+ #
58
+ def save_as(name, format)
59
+ @doc.SaveAs(name, format)
60
+ return @doc
61
+ end
62
+
63
+ # WdFormatFilteredHTML
64
+ # WdFormatHTML
65
+ def save_as_html(name)
66
+ @doc.SaveAs(name, WORD_OLE_CONST::WdFormatHTML)
67
+ return @doc
68
+ end
69
+
70
+ def quit
71
+ @wd.quit()
72
+ end
73
+ end
74
+
75
+ else
76
+
77
+ class Word
78
+ end
79
+ end
data/lib/datashift.rb CHANGED
@@ -1,114 +1,153 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2010
4
- # License:: TBD. Free, Open Source. MIT ?
5
- #
6
- # Details:: Active Record Loader
7
- #
8
- require 'rbconfig'
9
-
10
- module DataShift
11
-
12
- module Guards
13
-
14
- def self.jruby?
15
- return RUBY_PLATFORM == "java"
16
- end
17
- def self.mac?
18
- RbConfig::CONFIG['target_os'] =~ /darwin/i
19
- end
20
-
21
- def self.linux?
22
- RbConfig::CONFIG['target_os'] =~ /linux/i
23
- end
24
-
25
- def self.windows?
26
- RbConfig::CONFIG['target_os'] =~ /mswin|mingw/i
27
- end
28
-
29
- end
30
-
31
- if(Guards::jruby?)
32
- require 'java'
33
-
34
- class Object
35
- def add_to_classpath(path)
36
- $CLASSPATH << File.join( DataShift.root_path, 'lib', path.gsub("\\", "/") )
37
- end
38
- end
39
- end
40
-
41
- def self.gem_version
42
- unless(@gem_version)
43
- File.read( File.join( 'VERSION') ).match(/.*(\d+.\d+.\d+)/)
44
- @gem_version = $1
45
- end
46
- @gem_version
47
- end
48
-
49
- def self.gem_name
50
- "datashift"
51
- end
52
-
53
- def self.root_path
54
- File.expand_path("#{File.dirname(__FILE__)}/..")
55
- end
56
-
57
- def self.library_path
58
- File.expand_path("#{File.dirname(__FILE__)}/../lib")
59
- end
60
-
61
- def self.require_libraries
62
-
63
- loader_libs = %w{ lib }
64
-
65
- # Base search paths - these will be searched recursively
66
- loader_paths = []
67
-
68
- loader_libs.each {|l| loader_paths << File.join(root_path(), l) }
69
-
70
- # Define require search paths, any dir in here will be added to LOAD_PATH
71
-
72
- loader_paths.each do |base|
73
- $:.unshift base if File.directory?(base)
74
- Dir[File.join(base, '**', '**')].each do |p|
75
- if File.directory? p
76
- $:.unshift p
77
- end
78
- end
79
- end
80
-
81
- require_libs = %w{ datashift loaders helpers }
82
-
83
- require_libs.each do |base|
84
- Dir[File.join(library_path, base, '*.rb')].each do |rb|
85
- unless File.directory? rb
86
- require rb
87
- end
88
- end
89
- end
90
-
91
- end
92
-
93
- def self.load_tasks
94
- # Long parameter lists so ensure rake -T produces nice wide output
95
- ENV['RAKE_COLUMNS'] = '180'
96
- base = File.join(root_path, 'tasks', '**')
97
- Dir["#{base}/*.rake"].sort.each { |ext| load ext }
98
- end
99
-
100
- require 'logger'
101
-
102
- def self.logdir
103
- @logdir ||= File.dirname(__FILE__) + '/logs'
104
- @logdir
105
- end
106
-
107
- def self.logger
108
- @logger ||= Logger.new( File.join( logdir(), 'datashift.log') )
109
- @logger
110
- end
111
-
112
- end
113
-
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2010
4
+ # License:: TBD. Free, Open Source. MIT ?
5
+ #
6
+ # Details:: Active Record Loader
7
+ #
8
+ require 'rbconfig'
9
+
10
+ module DataShift
11
+
12
+ module Guards
13
+
14
+ def self.jruby?
15
+ return RUBY_PLATFORM == "java"
16
+ end
17
+ def self.mac?
18
+ RbConfig::CONFIG['target_os'] =~ /darwin/i
19
+ end
20
+
21
+ def self.linux?
22
+ RbConfig::CONFIG['target_os'] =~ /linux/i
23
+ end
24
+
25
+ def self.windows?
26
+ RbConfig::CONFIG['target_os'] =~ /mswin|mingw/i
27
+ end
28
+
29
+ end
30
+
31
+ if(Guards::jruby?)
32
+ require 'java'
33
+
34
+ class Object
35
+ def add_to_classpath(path)
36
+ $CLASSPATH << File.join( DataShift.root_path, 'lib', path.gsub("\\", "/") )
37
+ end
38
+ end
39
+ end
40
+
41
+ def self.gem_version
42
+ unless(@gem_version)
43
+ if(File.exists?('VERSION'))
44
+ File.read( File.join('VERSION') ).match(/.*(\d+.\d+.\d+)/)
45
+ @gem_version = $1
46
+ else
47
+ @gem_version = '1.0.0'
48
+ end
49
+ end
50
+ @gem_version
51
+ end
52
+
53
+ def self.gem_name
54
+ "datashift"
55
+ end
56
+
57
+ def self.root_path
58
+ File.expand_path("#{File.dirname(__FILE__)}/..")
59
+ end
60
+
61
+ def self.library_path
62
+ File.expand_path("#{File.dirname(__FILE__)}/../lib")
63
+ end
64
+
65
+ def self.require_libraries
66
+
67
+ loader_libs = %w{ lib }
68
+
69
+ # Base search paths - these will be searched recursively
70
+ loader_paths = []
71
+
72
+ loader_libs.each {|l| loader_paths << File.join(root_path(), l) }
73
+
74
+ # Define require search paths, any dir in here will be added to LOAD_PATH
75
+
76
+ loader_paths.each do |base|
77
+ $:.unshift base if File.directory?(base)
78
+ Dir[File.join(base, '**', '**')].each do |p|
79
+ if File.directory? p
80
+ $:.unshift p
81
+ end
82
+ end
83
+ end
84
+
85
+ require_libs = %w{ datashift loaders helpers }
86
+
87
+ require_libs.each do |base|
88
+ Dir[File.join(library_path, base, '*.rb')].each do |rb|
89
+ unless File.directory? rb
90
+ require rb
91
+ end
92
+ end
93
+ end
94
+
95
+ end
96
+
97
+ def self.load_tasks
98
+ # Long parameter lists so ensure rake -T produces nice wide output
99
+ ENV['RAKE_COLUMNS'] = '180'
100
+ base = File.join(root_path, 'tasks', '**')
101
+ Dir["#{base}/*.rake"].sort.each { |ext| load ext }
102
+ end
103
+
104
+
105
+ module Logging
106
+
107
+ class MultiIO
108
+
109
+ def initialize(*targets)
110
+ @targets = []
111
+ targets.each {|t| @targets << Logger.new(t) }
112
+ end
113
+
114
+ def add(target)
115
+ @targets << Logger.new(target)
116
+ end
117
+
118
+
119
+ def method_missing(method, *args, &block)
120
+ @targets.each {|t| t.send(method, *args, &block) }
121
+ end
122
+
123
+ def verbose
124
+ add(STDOUT)
125
+ end
126
+
127
+ end
128
+
129
+ require 'logger'
130
+
131
+ def logdir
132
+ @logdir ||= 'log'
133
+ @logdir
134
+ end
135
+
136
+ def logger
137
+ @logger ||= open
138
+ @logger
139
+ end
140
+
141
+ private
142
+
143
+ def open( log = 'datashift.log')
144
+ FileUtils::mkdir(logdir) unless File.directory?(logdir)
145
+ log_file = File.open( File.join(logdir(), 'datashift.log'), "a")
146
+ @logger = MultiIO.new(log_file)
147
+ @logger
148
+ end
149
+ end
150
+
151
+ end
152
+
114
153
  DataShift::require_libraries
@@ -1,12 +1,12 @@
1
- module DataShift
2
-
3
- class BadRuby < StandardError; end
4
-
5
- class UnsupportedFileType < StandardError; end
6
-
7
- class MappingDefinitionError < StandardError; end
8
-
9
- class MissingHeadersError < StandardError; end
10
- class MissingMandatoryError < StandardError; end
11
-
1
+ module DataShift
2
+
3
+ class BadRuby < StandardError; end
4
+
5
+ class UnsupportedFileType < StandardError; end
6
+
7
+ class MappingDefinitionError < StandardError; end
8
+
9
+ class MissingHeadersError < StandardError; end
10
+ class MissingMandatoryError < StandardError; end
11
+
12
12
  end
@@ -1,353 +1,353 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Jan 2011
4
- # License:: MIT
5
- #
6
- # Details:: This module acts as helpers for defining input/output file formats as classes.
7
- #
8
- # It provides a simple interface to define a file structure - field by field.
9
- #
10
- # By defining the structure, following methods and attributes are mixed in :
11
- #
12
- # An attribute, with accessor for each field/column.
13
- # Parse a line, assigning values to each attribute.
14
- # Parse an instance of that file line by line, accepts a block in which data can be processed.
15
- # Method to split a file by field.
16
- # Method to perform replace operations on a file by field and value.
17
- #
18
- # Either delimited or a fixed width definition can be created via macro-like class methods :
19
- #
20
- # create_field_definition [field_list]
21
- #
22
- # create_fixed_definition {field => range }
23
- #
24
- # Member attributes, with getters and setters, can be added for each field defined above via class method :
25
- #
26
- # create_field_attr_accessors
27
- #
28
- # USAGE :
29
- #
30
- # Create a class that contains definition of a file.
31
- #
32
- # class ExampleFixedWith < FileDefinitionBase
33
- # create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
34
- #
35
- # create_field_attr_accessors
36
- # end
37
- #
38
- # class ExampleCSV < FileDefinitionBase
39
- # create_field_definition %w{abc def ghi jkl}
40
- #
41
- # create_field_attr_accessors
42
- # end
43
- #
44
- # Any instance can then be used to parse the defined file type, with each field or column value
45
- # being assigned automatically to the associated instance variable.
46
- #
47
- # line = '1,2,3,4'
48
- # x = ExampleCSV.new( line )
49
- #
50
- # assert x.responds_to? :jkl
51
- # assert_equal x.abc, '1'
52
- # assert_equal x.jkl.to_i, 4
53
- #
54
- module FileDefinitions
55
-
56
- include Enumerable
57
-
58
- attr_accessor :key
59
- attr_accessor :current_line
60
-
61
- # Set the delimiter to use when splitting a line - can be either a String, or a Regexp
62
- attr_writer :field_delim
63
-
64
- def initialize( line = nil )
65
- @key = String.new
66
- parse(line) unless line.nil?
67
- end
68
-
69
- def self.included(base)
70
- base.extend(ClassMethods)
71
- subclasses << base
72
- end
73
-
74
- def self.subclasses
75
- @subclasses ||=[]
76
- end
77
-
78
-
79
- # Return the field delimiter used when splitting a line
80
- def field_delim
81
- @field_delim || ','
82
- end
83
-
84
- # Parse each line of a file based on the field definition, yields self for each successive line
85
- #
86
- def each( file )
87
- File::new(file).each_line do |line|
88
- parse( line )
89
- yield self
90
- end
91
- end
92
-
93
- def fields
94
- @fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
95
- @fields
96
- end
97
-
98
- def to_s
99
- fields.join(',')
100
- end
101
-
102
- module ClassMethods
103
-
104
- # Helper to generate methods to store and return the complete list of fields
105
- # in this File definition (also creates member @field_definition) and parse a line.
106
- #
107
- # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
108
- #
109
- def create_field_definition( *fields )
110
- instance_eval <<-end_eval
111
- @field_definition ||= %w{ #{fields.join(' ')} }
112
- def field_definition
113
- @field_definition
114
- end
115
- end_eval
116
-
117
- class_eval <<-end_eval
118
- def parse( line )
119
- @current_line = line
120
- before_parse if respond_to? :before_parse
121
- @current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
122
- after_parse if respond_to? :after_parse
123
- generate_key if respond_to? :generate_key
124
- end
125
- end_eval
126
- end
127
-
128
- def add_field(field, add_accessor = true)
129
- @field_definition ||= []
130
- @field_definition << field.to_s
131
- attr_accessor field if(add_accessor)
132
- end
133
-
134
-
135
- # Helper to generate methods that return the complete list of fixed width fields
136
- # and associated ranges in this File definition, and parse a line.
137
- # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
138
- #
139
- def create_fixed_definition( field_range_map )
140
- raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
141
-
142
- keys = field_range_map.keys.collect(&:to_s)
143
- string_map = Hash[*keys.zip(field_range_map.values).flatten]
144
-
145
- instance_eval <<-end_eval
146
- def fixed_definition
147
- @fixed_definition ||= #{string_map.inspect}
148
- @fixed_definition
149
- end
150
- end_eval
151
-
152
- instance_eval <<-end_eval
153
- def field_definition
154
- @field_definition ||= %w{ #{keys.join(' ')} }
155
- @field_definition
156
- end
157
- end_eval
158
-
159
- class_eval <<-end_eval
160
- def parse( line )
161
- @current_line = line
162
- before_parse if respond_to? :before_parse
163
- self.class.fixed_definition.each do |key, range|
164
- instance_variable_set(\"@\#{key}\", @current_line[range])
165
- end
166
- after_parse if respond_to? :after_parse
167
- generate_key if respond_to? :generate_key
168
- end
169
- end_eval
170
-
171
- end
172
-
173
- # Create accessors for each field
174
- def create_field_attr_accessors
175
- self.field_definition.each {|f| attr_accessor f}
176
- end
177
-
178
-
179
- ###############################
180
- # PARSING + FILE MANIPULATION #
181
- ###############################
182
-
183
- # Parse a complete file and return array of self, one per line
184
- def parse_file( file, options = {} )
185
- limit = options[:limit]
186
- count = 0
187
- lines = []
188
- File::new(file).each_line do |line|
189
- break if limit && ((count += 1) > limit)
190
- lines << self.new( line )
191
- end
192
- lines
193
- end
194
-
195
-
196
-
197
- # Split a file, whose field definition is represented by self,
198
- # into seperate streams, based on the values of one if it's fields.
199
- #
200
- # Writes the results, one file per split stream, to directory specified by output_path
201
- #
202
- # Options:
203
- #
204
- # :keys => Also write split files of the key fields
205
- #
206
- # :filter => Optional Regular Expression to act as filter be applid to the field.
207
- # For example split by Ccy but filter to only include certain ccys pass
208
- # filter => '[GBP|USD]'
209
- #
210
- def split_on_write( file_name, field, output_path, options = {} )
211
-
212
- path = output_path || '.'
213
-
214
- filtered = split_on( file_name, field, options )
215
-
216
- unless filtered.empty?
217
- log :info, "Writing seperate streams to #{path}"
218
-
219
- filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
220
-
221
- filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
222
- end
223
- end
224
-
225
- # Split a file, whose field definition is represented by self,
226
- # into seperate streams, based on one if it's fields.
227
- #
228
- # Returns a map of Field value => File def object
229
- #
230
- # We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
231
- #
232
- # Users can get at the raw line simply by calling the line() method on File Def object
233
- #
234
- # Options:
235
- #
236
- # :output_path => directory to write the individual streams files to
237
- #
238
- # :filter => Optional Regular Expression to act as filter be applid to the field.
239
- # For example split by Ccy but filter to only include certain ccys pass
240
- # filter => 'GBP|USD|EUR'
241
- #
242
- def split_on( file_name, field, options = {} )
243
-
244
- regex = options[:filter] ? Regexp.new(options[:filter]) : nil
245
-
246
- log :debug, "Using REGEX: #{regex.inspect}" if regex
247
-
248
- filtered = {}
249
-
250
- if( self.new.respond_to?(field) )
251
-
252
- log :info, "Splitting on #{field}"
253
-
254
- File.open( file_name ) do |t|
255
- t.each do |line|
256
- next unless(line && line.chomp!)
257
- x = self.new(line)
258
-
259
- value = x.send( field.to_sym ) # the actual field value from the specified field column
260
- next if value.nil?
261
-
262
- if( regex.nil? || value.match(regex) )
263
- filtered[value] ? filtered[value] << x : filtered[value] = [x]
264
- end
265
- end
266
- end
267
- else
268
- log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
269
- end
270
-
271
- if( options[:sort])
272
- filtered.values.each( &:sort )
273
- return filtered
274
- end
275
- return filtered
276
- end
277
-
278
- # Open and parse a file, replacing a value in the specfied field.
279
- # Does not update the file itself. Does not write a new output file.
280
- #
281
- # Returns :
282
- # 1) full collection of updated lines
283
- # 2) collection of file def objects (self), with updated value.
284
- #
285
- # Finds values matching old_value in given map
286
- #
287
- # Replaces matches with new_value in map.
288
- #
289
- # Accepts more than one field, if files is either and array of strings
290
- # or comma seperated list of fields.
291
- #
292
- def file_set_field_by_map( file_name, fields, value_map, regex = nil )
293
-
294
- lines, objects = [],[]
295
-
296
- if fields.is_a?(Array)
297
- attribs = fields
298
- else
299
- attribs = "#{fields}".split(',')
300
- end
301
-
302
- attribs.collect! do |attrib|
303
- raise ArgumentError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
304
- end
305
-
306
- log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
307
-
308
- File.open( file_name ) do |t|
309
- t.each do |line|
310
- if line.chomp.empty?
311
- lines << line
312
- objects << self.new
313
- next
314
- end
315
- x = self.new(line)
316
-
317
- attribs.each do |a|
318
- old_value = x.instance_variable_get( "@#{a}" )
319
- x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
320
- end
321
-
322
- objects << x
323
- lines << x.to_s
324
- end
325
- end
326
-
327
- return lines, objects
328
- end
329
- end # END class methods
330
-
331
- # Open and parse a file, replacing a value in the specfied field.
332
- # Does not update the file itself. Does not write a new output file.
333
- #
334
- # Returns :
335
- # 1) full collection of updated lines
336
- # 2) collection of file def objects (self), with updated value.
337
- #
338
- # Finds values matching old_value, and also accepts an optional regex for more powerful
339
- # matching strategies of values on the specfified field.
340
- #
341
- # Replaces matches with new_value.
342
- #
343
- # Accepts more than one field, if files is either and array of strings
344
- # or comma seperated list of fields.
345
- #
346
- def file_set_field( file_name, field, old_value, new_value, regex = nil )
347
-
348
- map = {old_value => new_value}
349
-
350
- return file_set_field_by_map(file_name, field, map, regex)
351
- end
352
-
353
- end
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Jan 2011
4
+ # License:: MIT
5
+ #
6
+ # Details:: This module acts as helpers for defining input/output file formats as classes.
7
+ #
8
+ # It provides a simple interface to define a file structure - field by field.
9
+ #
10
+ # By defining the structure, following methods and attributes are mixed in :
11
+ #
12
+ # An attribute, with accessor for each field/column.
13
+ # Parse a line, assigning values to each attribute.
14
+ # Parse an instance of that file line by line, accepts a block in which data can be processed.
15
+ # Method to split a file by field.
16
+ # Method to perform replace operations on a file by field and value.
17
+ #
18
+ # Either delimited or a fixed width definition can be created via macro-like class methods :
19
+ #
20
+ # create_field_definition [field_list]
21
+ #
22
+ # create_fixed_definition {field => range }
23
+ #
24
+ # Member attributes, with getters and setters, can be added for each field defined above via class method :
25
+ #
26
+ # create_field_attr_accessors
27
+ #
28
+ # USAGE :
29
+ #
30
+ # Create a class that contains definition of a file.
31
+ #
32
+ # class ExampleFixedWith < FileDefinitionBase
33
+ # create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
34
+ #
35
+ # create_field_attr_accessors
36
+ # end
37
+ #
38
+ # class ExampleCSV < FileDefinitionBase
39
+ # create_field_definition %w{abc def ghi jkl}
40
+ #
41
+ # create_field_attr_accessors
42
+ # end
43
+ #
44
+ # Any instance can then be used to parse the defined file type, with each field or column value
45
+ # being assigned automatically to the associated instance variable.
46
+ #
47
+ # line = '1,2,3,4'
48
+ # x = ExampleCSV.new( line )
49
+ #
50
+ # assert x.responds_to? :jkl
51
+ # assert_equal x.abc, '1'
52
+ # assert_equal x.jkl.to_i, 4
53
+ #
54
+ module FileDefinitions
55
+
56
+ include Enumerable
57
+
58
+ attr_accessor :key
59
+ attr_accessor :current_line
60
+
61
+ # Set the delimiter to use when splitting a line - can be either a String, or a Regexp
62
+ attr_writer :field_delim
63
+
64
+ def initialize( line = nil )
65
+ @key = String.new
66
+ parse(line) unless line.nil?
67
+ end
68
+
69
+ def self.included(base)
70
+ base.extend(ClassMethods)
71
+ subclasses << base
72
+ end
73
+
74
+ def self.subclasses
75
+ @subclasses ||=[]
76
+ end
77
+
78
+
79
+ # Return the field delimiter used when splitting a line
80
+ def field_delim
81
+ @field_delim || ','
82
+ end
83
+
84
+ # Parse each line of a file based on the field definition, yields self for each successive line
85
+ #
86
+ def each( file )
87
+ File::new(file).each_line do |line|
88
+ parse( line )
89
+ yield self
90
+ end
91
+ end
92
+
93
+ def fields
94
+ @fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
95
+ @fields
96
+ end
97
+
98
+ def to_s
99
+ fields.join(',')
100
+ end
101
+
102
+ module ClassMethods
103
+
104
+ # Helper to generate methods to store and return the complete list of fields
105
+ # in this File definition (also creates member @field_definition) and parse a line.
106
+ #
107
+ # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
108
+ #
109
+ def create_field_definition( *fields )
110
+ instance_eval <<-end_eval
111
+ @field_definition ||= %w{ #{fields.join(' ')} }
112
+ def field_definition
113
+ @field_definition
114
+ end
115
+ end_eval
116
+
117
+ class_eval <<-end_eval
118
+ def parse( line )
119
+ @current_line = line
120
+ before_parse if respond_to? :before_parse
121
+ @current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
122
+ after_parse if respond_to? :after_parse
123
+ generate_key if respond_to? :generate_key
124
+ end
125
+ end_eval
126
+ end
127
+
128
+ def add_field(field, add_accessor = true)
129
+ @field_definition ||= []
130
+ @field_definition << field.to_s
131
+ attr_accessor field if(add_accessor)
132
+ end
133
+
134
+
135
+ # Helper to generate methods that return the complete list of fixed width fields
136
+ # and associated ranges in this File definition, and parse a line.
137
+ # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
138
+ #
139
+ def create_fixed_definition( field_range_map )
140
+ raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
141
+
142
+ keys = field_range_map.keys.collect(&:to_s)
143
+ string_map = Hash[*keys.zip(field_range_map.values).flatten]
144
+
145
+ instance_eval <<-end_eval
146
+ def fixed_definition
147
+ @fixed_definition ||= #{string_map.inspect}
148
+ @fixed_definition
149
+ end
150
+ end_eval
151
+
152
+ instance_eval <<-end_eval
153
+ def field_definition
154
+ @field_definition ||= %w{ #{keys.join(' ')} }
155
+ @field_definition
156
+ end
157
+ end_eval
158
+
159
+ class_eval <<-end_eval
160
+ def parse( line )
161
+ @current_line = line
162
+ before_parse if respond_to? :before_parse
163
+ self.class.fixed_definition.each do |key, range|
164
+ instance_variable_set(\"@\#{key}\", @current_line[range])
165
+ end
166
+ after_parse if respond_to? :after_parse
167
+ generate_key if respond_to? :generate_key
168
+ end
169
+ end_eval
170
+
171
+ end
172
+
173
+ # Create accessors for each field
174
+ def create_field_attr_accessors
175
+ self.field_definition.each {|f| attr_accessor f}
176
+ end
177
+
178
+
179
+ ###############################
180
+ # PARSING + FILE MANIPULATION #
181
+ ###############################
182
+
183
+ # Parse a complete file and return array of self, one per line
184
+ def parse_file( file, options = {} )
185
+ limit = options[:limit]
186
+ count = 0
187
+ lines = []
188
+ File::new(file).each_line do |line|
189
+ break if limit && ((count += 1) > limit)
190
+ lines << self.new( line )
191
+ end
192
+ lines
193
+ end
194
+
195
+
196
+
197
+ # Split a file, whose field definition is represented by self,
198
+ # into seperate streams, based on the values of one if it's fields.
199
+ #
200
+ # Writes the results, one file per split stream, to directory specified by output_path
201
+ #
202
+ # Options:
203
+ #
204
+ # :keys => Also write split files of the key fields
205
+ #
206
+ # :filter => Optional Regular Expression to act as filter be applid to the field.
207
+ # For example split by Ccy but filter to only include certain ccys pass
208
+ # filter => '[GBP|USD]'
209
+ #
210
+ def split_on_write( file_name, field, output_path, options = {} )
211
+
212
+ path = output_path || '.'
213
+
214
+ filtered = split_on( file_name, field, options )
215
+
216
+ unless filtered.empty?
217
+ log :info, "Writing seperate streams to #{path}"
218
+
219
+ filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
220
+
221
+ filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
222
+ end
223
+ end
224
+
225
+ # Split a file, whose field definition is represented by self,
226
+ # into seperate streams, based on one if it's fields.
227
+ #
228
+ # Returns a map of Field value => File def object
229
+ #
230
+ # We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
231
+ #
232
+ # Users can get at the raw line simply by calling the line() method on File Def object
233
+ #
234
+ # Options:
235
+ #
236
+ # :output_path => directory to write the individual streams files to
237
+ #
238
+ # :filter => Optional Regular Expression to act as filter be applid to the field.
239
+ # For example split by Ccy but filter to only include certain ccys pass
240
+ # filter => 'GBP|USD|EUR'
241
+ #
242
+ def split_on( file_name, field, options = {} )
243
+
244
+ regex = options[:filter] ? Regexp.new(options[:filter]) : nil
245
+
246
+ log :debug, "Using REGEX: #{regex.inspect}" if regex
247
+
248
+ filtered = {}
249
+
250
+ if( self.new.respond_to?(field) )
251
+
252
+ log :info, "Splitting on #{field}"
253
+
254
+ File.open( file_name ) do |t|
255
+ t.each do |line|
256
+ next unless(line && line.chomp!)
257
+ x = self.new(line)
258
+
259
+ value = x.send( field.to_sym ) # the actual field value from the specified field column
260
+ next if value.nil?
261
+
262
+ if( regex.nil? || value.match(regex) )
263
+ filtered[value] ? filtered[value] << x : filtered[value] = [x]
264
+ end
265
+ end
266
+ end
267
+ else
268
+ log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
269
+ end
270
+
271
+ if( options[:sort])
272
+ filtered.values.each( &:sort )
273
+ return filtered
274
+ end
275
+ return filtered
276
+ end
277
+
278
+ # Open and parse a file, replacing a value in the specfied field.
279
+ # Does not update the file itself. Does not write a new output file.
280
+ #
281
+ # Returns :
282
+ # 1) full collection of updated lines
283
+ # 2) collection of file def objects (self), with updated value.
284
+ #
285
+ # Finds values matching old_value in given map
286
+ #
287
+ # Replaces matches with new_value in map.
288
+ #
289
+ # Accepts more than one field, if files is either and array of strings
290
+ # or comma seperated list of fields.
291
+ #
292
+ def file_set_field_by_map( file_name, fields, value_map, regex = nil )
293
+
294
+ lines, objects = [],[]
295
+
296
+ if fields.is_a?(Array)
297
+ attribs = fields
298
+ else
299
+ attribs = "#{fields}".split(',')
300
+ end
301
+
302
+ attribs.collect! do |attrib|
303
+ raise ArgumentError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
304
+ end
305
+
306
+ log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
307
+
308
+ File.open( file_name ) do |t|
309
+ t.each do |line|
310
+ if line.chomp.empty?
311
+ lines << line
312
+ objects << self.new
313
+ next
314
+ end
315
+ x = self.new(line)
316
+
317
+ attribs.each do |a|
318
+ old_value = x.instance_variable_get( "@#{a}" )
319
+ x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
320
+ end
321
+
322
+ objects << x
323
+ lines << x.to_s
324
+ end
325
+ end
326
+
327
+ return lines, objects
328
+ end
329
+ end # END class methods
330
+
331
+ # Open and parse a file, replacing a value in the specfied field.
332
+ # Does not update the file itself. Does not write a new output file.
333
+ #
334
+ # Returns :
335
+ # 1) full collection of updated lines
336
+ # 2) collection of file def objects (self), with updated value.
337
+ #
338
+ # Finds values matching old_value, and also accepts an optional regex for more powerful
339
+ # matching strategies of values on the specfified field.
340
+ #
341
+ # Replaces matches with new_value.
342
+ #
343
+ # Accepts more than one field, if files is either and array of strings
344
+ # or comma seperated list of fields.
345
+ #
346
+ def file_set_field( file_name, field, old_value, new_value, regex = nil )
347
+
348
+ map = {old_value => new_value}
349
+
350
+ return file_set_field_by_map(file_name, field, map, regex)
351
+ end
352
+
353
+ end