datashift 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/.document +5 -5
  2. data/Gemfile +28 -25
  3. data/LICENSE.txt +26 -26
  4. data/README.markdown +302 -285
  5. data/README.rdoc +19 -19
  6. data/Rakefile +93 -95
  7. data/VERSION +5 -5
  8. data/datashift.gemspec +162 -178
  9. data/lib/applications/jruby/jexcel_file.rb +396 -396
  10. data/lib/applications/jruby/word.rb +79 -79
  11. data/lib/datashift.rb +152 -113
  12. data/lib/datashift/exceptions.rb +11 -11
  13. data/lib/datashift/file_definitions.rb +353 -353
  14. data/lib/datashift/mapping_file_definitions.rb +87 -87
  15. data/lib/datashift/method_detail.rb +236 -236
  16. data/lib/datashift/method_mapper.rb +256 -256
  17. data/lib/generators/csv_generator.rb +36 -36
  18. data/lib/generators/excel_generator.rb +121 -121
  19. data/lib/generators/generator_base.rb +13 -13
  20. data/lib/helpers/core_ext/to_b.rb +24 -24
  21. data/lib/helpers/spree_helper.rb +131 -131
  22. data/lib/java/poi-3.7/LICENSE +507 -507
  23. data/lib/java/poi-3.7/NOTICE +21 -21
  24. data/lib/java/poi-3.7/RELEASE_NOTES.txt +115 -115
  25. data/lib/loaders/csv_loader.rb +98 -98
  26. data/lib/loaders/excel_loader.rb +154 -149
  27. data/lib/loaders/loader_base.rb +403 -331
  28. data/lib/loaders/spreadsheet_loader.rb +136 -136
  29. data/lib/loaders/spree/image_loader.rb +45 -45
  30. data/lib/loaders/spree/product_loader.rb +224 -224
  31. data/spec/csv_loader_spec.rb +30 -30
  32. data/spec/datashift_spec.rb +26 -26
  33. data/spec/db/migrate/20110803201325_create_test_bed.rb +85 -85
  34. data/spec/excel_generator_spec.rb +78 -78
  35. data/spec/excel_loader_spec.rb +204 -176
  36. data/spec/file_definitions.rb +141 -141
  37. data/spec/fixtures/.~lock.ProjectsSingleCategories.xls# +1 -0
  38. data/spec/fixtures/ProjectsDefaults.yml +29 -0
  39. data/spec/fixtures/config/database.yml +24 -24
  40. data/spec/fixtures/interact_models_db.sqlite +0 -0
  41. data/spec/fixtures/interact_spree_db.sqlite +0 -0
  42. data/spec/fixtures/negative/SpreeProdMiss1Mandatory.csv +4 -4
  43. data/spec/fixtures/negative/SpreeProdMissManyMandatory.csv +4 -4
  44. data/spec/fixtures/spree/SpreeProducts.csv +4 -4
  45. data/spec/fixtures/spree/SpreeProductsMultiColumn.csv +4 -4
  46. data/spec/fixtures/spree/SpreeProductsSimple.csv +4 -4
  47. data/spec/fixtures/spree/SpreeZoneExample.csv +5 -5
  48. data/spec/fixtures/test_model_defs.rb +57 -57
  49. data/spec/loader_spec.rb +120 -120
  50. data/spec/method_mapper_spec.rb +237 -237
  51. data/spec/spec_helper.rb +115 -115
  52. data/spec/spree_generator_spec.rb +64 -64
  53. data/spec/spree_loader_spec.rb +310 -310
  54. data/spec/spree_method_mapping_spec.rb +214 -214
  55. data/tasks/config/seed_fu_product_template.erb +15 -15
  56. data/tasks/config/tidy_config.txt +12 -12
  57. data/tasks/db_tasks.rake +65 -64
  58. data/tasks/excel_generator.rake +78 -78
  59. data/tasks/file_tasks.rake +36 -36
  60. data/tasks/import/csv.rake +49 -49
  61. data/tasks/import/excel.rake +71 -66
  62. data/tasks/spree/image_load.rake +108 -108
  63. data/tasks/spree/product_loader.rake +43 -43
  64. data/tasks/word_to_seedfu.rake +166 -166
  65. data/test/helper.rb +18 -18
  66. data/test/test_interact.rb +7 -7
  67. metadata +7 -38
  68. data/Gemfile.lock +0 -211
  69. data/bin/autospec +0 -16
  70. data/bin/convert_to_should_syntax +0 -16
  71. data/bin/erubis +0 -16
  72. data/bin/htmldiff +0 -16
  73. data/bin/jeweler +0 -16
  74. data/bin/ldiff +0 -16
  75. data/bin/nokogiri +0 -16
  76. data/bin/rackup +0 -16
  77. data/bin/rails +0 -16
  78. data/bin/rake +0 -16
  79. data/bin/rake2thor +0 -16
  80. data/bin/ri +0 -16
  81. data/bin/rspec +0 -16
  82. data/bin/spree +0 -16
  83. data/bin/thor +0 -16
  84. data/bin/tilt +0 -16
  85. data/bin/tt +0 -16
@@ -1,79 +1,79 @@
1
- # Author:: Tom Statter
2
- # License:: MIT ?
3
- #
4
- # NOTES ON INVESTIGATING OLE METHODS in irb
5
- #
6
- # visible = @word_app.ole_method_help( 'Visible' ) # Get a Method Object
7
-
8
- # log( visible.return_type_detail.to_s ) # => ["BOOL"]
9
- # log( visible.invoke_kind.to_s ) # => "PROPERTYGET"
10
- # log( visible.params.to_s ) # => []
11
-
12
- # @fc.ole_method_help( 'Report' ).params[1].ole_type_detail
13
- #
14
- # prefs = @word_app.Preferences.Strings.ole_method_help( 'Set' ).params
15
- # => [index, newVal]
16
- #
17
- # WORD_OLE_CONST.constants
18
- #
19
- # WORD_OLE_CONST.constants.sort.grep /CR/
20
- # => ["ClHideCRLF", "LesCR", "LesCRLF"]
21
- #
22
- # WORD_OLE_CONST.const_get( 'LesCR' ) or WORD_OLE_CONST::LesCR
23
- # => 1
24
-
25
- if(Guards::windows?)
26
-
27
- require 'win32ole'
28
-
29
- # Module for constants to be loaded int
30
-
31
- module WORD_OLE_CONST
32
- end
33
-
34
- class Word
35
-
36
- attr_reader :wd, :doc
37
-
38
- def initialize( visible )
39
- @wd = WIN32OLE.new('Word.Application')
40
-
41
- WIN32OLE.const_load(@wd, WORD_OLE_CONST) if WORD_OLE_CONST.constants.empty?
42
-
43
- @wd.Visible = visible
44
- end
45
-
46
- def open(file)
47
- @doc = @wd.Documents.Open(file)
48
- @doc
49
- end
50
-
51
- def save()
52
- @doc.Save()
53
- @doc
54
- end
55
-
56
- # Format : From WORD_OLE_CONST e.g WORD_OLE_CONST::WdFormatHTML
57
- #
58
- def save_as(name, format)
59
- @doc.SaveAs(name, format)
60
- return @doc
61
- end
62
-
63
- # WdFormatFilteredHTML
64
- # WdFormatHTML
65
- def save_as_html(name)
66
- @doc.SaveAs(name, WORD_OLE_CONST::WdFormatHTML)
67
- return @doc
68
- end
69
-
70
- def quit
71
- @wd.quit()
72
- end
73
- end
74
-
75
- else
76
-
77
- class Word
78
- end
79
- end
1
+ # Author:: Tom Statter
2
+ # License:: MIT ?
3
+ #
4
+ # NOTES ON INVESTIGATING OLE METHODS in irb
5
+ #
6
+ # visible = @word_app.ole_method_help( 'Visible' ) # Get a Method Object
7
+
8
+ # log( visible.return_type_detail.to_s ) # => ["BOOL"]
9
+ # log( visible.invoke_kind.to_s ) # => "PROPERTYGET"
10
+ # log( visible.params.to_s ) # => []
11
+
12
+ # @fc.ole_method_help( 'Report' ).params[1].ole_type_detail
13
+ #
14
+ # prefs = @word_app.Preferences.Strings.ole_method_help( 'Set' ).params
15
+ # => [index, newVal]
16
+ #
17
+ # WORD_OLE_CONST.constants
18
+ #
19
+ # WORD_OLE_CONST.constants.sort.grep /CR/
20
+ # => ["ClHideCRLF", "LesCR", "LesCRLF"]
21
+ #
22
+ # WORD_OLE_CONST.const_get( 'LesCR' ) or WORD_OLE_CONST::LesCR
23
+ # => 1
24
+
25
+ if(Guards::windows?)
26
+
27
+ require 'win32ole'
28
+
29
+ # Module for constants to be loaded int
30
+
31
+ module WORD_OLE_CONST
32
+ end
33
+
34
+ class Word
35
+
36
+ attr_reader :wd, :doc
37
+
38
+ def initialize( visible )
39
+ @wd = WIN32OLE.new('Word.Application')
40
+
41
+ WIN32OLE.const_load(@wd, WORD_OLE_CONST) if WORD_OLE_CONST.constants.empty?
42
+
43
+ @wd.Visible = visible
44
+ end
45
+
46
+ def open(file)
47
+ @doc = @wd.Documents.Open(file)
48
+ @doc
49
+ end
50
+
51
+ def save()
52
+ @doc.Save()
53
+ @doc
54
+ end
55
+
56
+ # Format : From WORD_OLE_CONST e.g WORD_OLE_CONST::WdFormatHTML
57
+ #
58
+ def save_as(name, format)
59
+ @doc.SaveAs(name, format)
60
+ return @doc
61
+ end
62
+
63
+ # WdFormatFilteredHTML
64
+ # WdFormatHTML
65
+ def save_as_html(name)
66
+ @doc.SaveAs(name, WORD_OLE_CONST::WdFormatHTML)
67
+ return @doc
68
+ end
69
+
70
+ def quit
71
+ @wd.quit()
72
+ end
73
+ end
74
+
75
+ else
76
+
77
+ class Word
78
+ end
79
+ end
data/lib/datashift.rb CHANGED
@@ -1,114 +1,153 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2010
4
- # License:: TBD. Free, Open Source. MIT ?
5
- #
6
- # Details:: Active Record Loader
7
- #
8
- require 'rbconfig'
9
-
10
- module DataShift
11
-
12
- module Guards
13
-
14
- def self.jruby?
15
- return RUBY_PLATFORM == "java"
16
- end
17
- def self.mac?
18
- RbConfig::CONFIG['target_os'] =~ /darwin/i
19
- end
20
-
21
- def self.linux?
22
- RbConfig::CONFIG['target_os'] =~ /linux/i
23
- end
24
-
25
- def self.windows?
26
- RbConfig::CONFIG['target_os'] =~ /mswin|mingw/i
27
- end
28
-
29
- end
30
-
31
- if(Guards::jruby?)
32
- require 'java'
33
-
34
- class Object
35
- def add_to_classpath(path)
36
- $CLASSPATH << File.join( DataShift.root_path, 'lib', path.gsub("\\", "/") )
37
- end
38
- end
39
- end
40
-
41
- def self.gem_version
42
- unless(@gem_version)
43
- File.read( File.join( 'VERSION') ).match(/.*(\d+.\d+.\d+)/)
44
- @gem_version = $1
45
- end
46
- @gem_version
47
- end
48
-
49
- def self.gem_name
50
- "datashift"
51
- end
52
-
53
- def self.root_path
54
- File.expand_path("#{File.dirname(__FILE__)}/..")
55
- end
56
-
57
- def self.library_path
58
- File.expand_path("#{File.dirname(__FILE__)}/../lib")
59
- end
60
-
61
- def self.require_libraries
62
-
63
- loader_libs = %w{ lib }
64
-
65
- # Base search paths - these will be searched recursively
66
- loader_paths = []
67
-
68
- loader_libs.each {|l| loader_paths << File.join(root_path(), l) }
69
-
70
- # Define require search paths, any dir in here will be added to LOAD_PATH
71
-
72
- loader_paths.each do |base|
73
- $:.unshift base if File.directory?(base)
74
- Dir[File.join(base, '**', '**')].each do |p|
75
- if File.directory? p
76
- $:.unshift p
77
- end
78
- end
79
- end
80
-
81
- require_libs = %w{ datashift loaders helpers }
82
-
83
- require_libs.each do |base|
84
- Dir[File.join(library_path, base, '*.rb')].each do |rb|
85
- unless File.directory? rb
86
- require rb
87
- end
88
- end
89
- end
90
-
91
- end
92
-
93
- def self.load_tasks
94
- # Long parameter lists so ensure rake -T produces nice wide output
95
- ENV['RAKE_COLUMNS'] = '180'
96
- base = File.join(root_path, 'tasks', '**')
97
- Dir["#{base}/*.rake"].sort.each { |ext| load ext }
98
- end
99
-
100
- require 'logger'
101
-
102
- def self.logdir
103
- @logdir ||= File.dirname(__FILE__) + '/logs'
104
- @logdir
105
- end
106
-
107
- def self.logger
108
- @logger ||= Logger.new( File.join( logdir(), 'datashift.log') )
109
- @logger
110
- end
111
-
112
- end
113
-
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2010
4
+ # License:: TBD. Free, Open Source. MIT ?
5
+ #
6
+ # Details:: Active Record Loader
7
+ #
8
+ require 'rbconfig'
9
+
10
+ module DataShift
11
+
12
+ module Guards
13
+
14
+ def self.jruby?
15
+ return RUBY_PLATFORM == "java"
16
+ end
17
+ def self.mac?
18
+ RbConfig::CONFIG['target_os'] =~ /darwin/i
19
+ end
20
+
21
+ def self.linux?
22
+ RbConfig::CONFIG['target_os'] =~ /linux/i
23
+ end
24
+
25
+ def self.windows?
26
+ RbConfig::CONFIG['target_os'] =~ /mswin|mingw/i
27
+ end
28
+
29
+ end
30
+
31
+ if(Guards::jruby?)
32
+ require 'java'
33
+
34
+ class Object
35
+ def add_to_classpath(path)
36
+ $CLASSPATH << File.join( DataShift.root_path, 'lib', path.gsub("\\", "/") )
37
+ end
38
+ end
39
+ end
40
+
41
+ def self.gem_version
42
+ unless(@gem_version)
43
+ if(File.exists?('VERSION'))
44
+ File.read( File.join('VERSION') ).match(/.*(\d+.\d+.\d+)/)
45
+ @gem_version = $1
46
+ else
47
+ @gem_version = '1.0.0'
48
+ end
49
+ end
50
+ @gem_version
51
+ end
52
+
53
+ def self.gem_name
54
+ "datashift"
55
+ end
56
+
57
+ def self.root_path
58
+ File.expand_path("#{File.dirname(__FILE__)}/..")
59
+ end
60
+
61
+ def self.library_path
62
+ File.expand_path("#{File.dirname(__FILE__)}/../lib")
63
+ end
64
+
65
+ def self.require_libraries
66
+
67
+ loader_libs = %w{ lib }
68
+
69
+ # Base search paths - these will be searched recursively
70
+ loader_paths = []
71
+
72
+ loader_libs.each {|l| loader_paths << File.join(root_path(), l) }
73
+
74
+ # Define require search paths, any dir in here will be added to LOAD_PATH
75
+
76
+ loader_paths.each do |base|
77
+ $:.unshift base if File.directory?(base)
78
+ Dir[File.join(base, '**', '**')].each do |p|
79
+ if File.directory? p
80
+ $:.unshift p
81
+ end
82
+ end
83
+ end
84
+
85
+ require_libs = %w{ datashift loaders helpers }
86
+
87
+ require_libs.each do |base|
88
+ Dir[File.join(library_path, base, '*.rb')].each do |rb|
89
+ unless File.directory? rb
90
+ require rb
91
+ end
92
+ end
93
+ end
94
+
95
+ end
96
+
97
+ def self.load_tasks
98
+ # Long parameter lists so ensure rake -T produces nice wide output
99
+ ENV['RAKE_COLUMNS'] = '180'
100
+ base = File.join(root_path, 'tasks', '**')
101
+ Dir["#{base}/*.rake"].sort.each { |ext| load ext }
102
+ end
103
+
104
+
105
+ module Logging
106
+
107
+ class MultiIO
108
+
109
+ def initialize(*targets)
110
+ @targets = []
111
+ targets.each {|t| @targets << Logger.new(t) }
112
+ end
113
+
114
+ def add(target)
115
+ @targets << Logger.new(target)
116
+ end
117
+
118
+
119
+ def method_missing(method, *args, &block)
120
+ @targets.each {|t| t.send(method, *args, &block) }
121
+ end
122
+
123
+ def verbose
124
+ add(STDOUT)
125
+ end
126
+
127
+ end
128
+
129
+ require 'logger'
130
+
131
+ def logdir
132
+ @logdir ||= 'log'
133
+ @logdir
134
+ end
135
+
136
+ def logger
137
+ @logger ||= open
138
+ @logger
139
+ end
140
+
141
+ private
142
+
143
+ def open( log = 'datashift.log')
144
+ FileUtils::mkdir(logdir) unless File.directory?(logdir)
145
+ log_file = File.open( File.join(logdir(), 'datashift.log'), "a")
146
+ @logger = MultiIO.new(log_file)
147
+ @logger
148
+ end
149
+ end
150
+
151
+ end
152
+
114
153
  DataShift::require_libraries
@@ -1,12 +1,12 @@
1
- module DataShift
2
-
3
- class BadRuby < StandardError; end
4
-
5
- class UnsupportedFileType < StandardError; end
6
-
7
- class MappingDefinitionError < StandardError; end
8
-
9
- class MissingHeadersError < StandardError; end
10
- class MissingMandatoryError < StandardError; end
11
-
1
+ module DataShift
2
+
3
+ class BadRuby < StandardError; end
4
+
5
+ class UnsupportedFileType < StandardError; end
6
+
7
+ class MappingDefinitionError < StandardError; end
8
+
9
+ class MissingHeadersError < StandardError; end
10
+ class MissingMandatoryError < StandardError; end
11
+
12
12
  end
@@ -1,353 +1,353 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Jan 2011
4
- # License:: MIT
5
- #
6
- # Details:: This module acts as helpers for defining input/output file formats as classes.
7
- #
8
- # It provides a simple interface to define a file structure - field by field.
9
- #
10
- # By defining the structure, following methods and attributes are mixed in :
11
- #
12
- # An attribute, with accessor for each field/column.
13
- # Parse a line, assigning values to each attribute.
14
- # Parse an instance of that file line by line, accepts a block in which data can be processed.
15
- # Method to split a file by field.
16
- # Method to perform replace operations on a file by field and value.
17
- #
18
- # Either delimited or a fixed width definition can be created via macro-like class methods :
19
- #
20
- # create_field_definition [field_list]
21
- #
22
- # create_fixed_definition {field => range }
23
- #
24
- # Member attributes, with getters and setters, can be added for each field defined above via class method :
25
- #
26
- # create_field_attr_accessors
27
- #
28
- # USAGE :
29
- #
30
- # Create a class that contains definition of a file.
31
- #
32
- # class ExampleFixedWith < FileDefinitionBase
33
- # create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
34
- #
35
- # create_field_attr_accessors
36
- # end
37
- #
38
- # class ExampleCSV < FileDefinitionBase
39
- # create_field_definition %w{abc def ghi jkl}
40
- #
41
- # create_field_attr_accessors
42
- # end
43
- #
44
- # Any instance can then be used to parse the defined file type, with each field or column value
45
- # being assigned automatically to the associated instance variable.
46
- #
47
- # line = '1,2,3,4'
48
- # x = ExampleCSV.new( line )
49
- #
50
- # assert x.responds_to? :jkl
51
- # assert_equal x.abc, '1'
52
- # assert_equal x.jkl.to_i, 4
53
- #
54
- module FileDefinitions
55
-
56
- include Enumerable
57
-
58
- attr_accessor :key
59
- attr_accessor :current_line
60
-
61
- # Set the delimiter to use when splitting a line - can be either a String, or a Regexp
62
- attr_writer :field_delim
63
-
64
- def initialize( line = nil )
65
- @key = String.new
66
- parse(line) unless line.nil?
67
- end
68
-
69
- def self.included(base)
70
- base.extend(ClassMethods)
71
- subclasses << base
72
- end
73
-
74
- def self.subclasses
75
- @subclasses ||=[]
76
- end
77
-
78
-
79
- # Return the field delimiter used when splitting a line
80
- def field_delim
81
- @field_delim || ','
82
- end
83
-
84
- # Parse each line of a file based on the field definition, yields self for each successive line
85
- #
86
- def each( file )
87
- File::new(file).each_line do |line|
88
- parse( line )
89
- yield self
90
- end
91
- end
92
-
93
- def fields
94
- @fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
95
- @fields
96
- end
97
-
98
- def to_s
99
- fields.join(',')
100
- end
101
-
102
- module ClassMethods
103
-
104
- # Helper to generate methods to store and return the complete list of fields
105
- # in this File definition (also creates member @field_definition) and parse a line.
106
- #
107
- # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
108
- #
109
- def create_field_definition( *fields )
110
- instance_eval <<-end_eval
111
- @field_definition ||= %w{ #{fields.join(' ')} }
112
- def field_definition
113
- @field_definition
114
- end
115
- end_eval
116
-
117
- class_eval <<-end_eval
118
- def parse( line )
119
- @current_line = line
120
- before_parse if respond_to? :before_parse
121
- @current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
122
- after_parse if respond_to? :after_parse
123
- generate_key if respond_to? :generate_key
124
- end
125
- end_eval
126
- end
127
-
128
- def add_field(field, add_accessor = true)
129
- @field_definition ||= []
130
- @field_definition << field.to_s
131
- attr_accessor field if(add_accessor)
132
- end
133
-
134
-
135
- # Helper to generate methods that return the complete list of fixed width fields
136
- # and associated ranges in this File definition, and parse a line.
137
- # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
138
- #
139
- def create_fixed_definition( field_range_map )
140
- raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
141
-
142
- keys = field_range_map.keys.collect(&:to_s)
143
- string_map = Hash[*keys.zip(field_range_map.values).flatten]
144
-
145
- instance_eval <<-end_eval
146
- def fixed_definition
147
- @fixed_definition ||= #{string_map.inspect}
148
- @fixed_definition
149
- end
150
- end_eval
151
-
152
- instance_eval <<-end_eval
153
- def field_definition
154
- @field_definition ||= %w{ #{keys.join(' ')} }
155
- @field_definition
156
- end
157
- end_eval
158
-
159
- class_eval <<-end_eval
160
- def parse( line )
161
- @current_line = line
162
- before_parse if respond_to? :before_parse
163
- self.class.fixed_definition.each do |key, range|
164
- instance_variable_set(\"@\#{key}\", @current_line[range])
165
- end
166
- after_parse if respond_to? :after_parse
167
- generate_key if respond_to? :generate_key
168
- end
169
- end_eval
170
-
171
- end
172
-
173
- # Create accessors for each field
174
- def create_field_attr_accessors
175
- self.field_definition.each {|f| attr_accessor f}
176
- end
177
-
178
-
179
- ###############################
180
- # PARSING + FILE MANIPULATION #
181
- ###############################
182
-
183
- # Parse a complete file and return array of self, one per line
184
- def parse_file( file, options = {} )
185
- limit = options[:limit]
186
- count = 0
187
- lines = []
188
- File::new(file).each_line do |line|
189
- break if limit && ((count += 1) > limit)
190
- lines << self.new( line )
191
- end
192
- lines
193
- end
194
-
195
-
196
-
197
- # Split a file, whose field definition is represented by self,
198
- # into seperate streams, based on the values of one if it's fields.
199
- #
200
- # Writes the results, one file per split stream, to directory specified by output_path
201
- #
202
- # Options:
203
- #
204
- # :keys => Also write split files of the key fields
205
- #
206
- # :filter => Optional Regular Expression to act as filter be applid to the field.
207
- # For example split by Ccy but filter to only include certain ccys pass
208
- # filter => '[GBP|USD]'
209
- #
210
- def split_on_write( file_name, field, output_path, options = {} )
211
-
212
- path = output_path || '.'
213
-
214
- filtered = split_on( file_name, field, options )
215
-
216
- unless filtered.empty?
217
- log :info, "Writing seperate streams to #{path}"
218
-
219
- filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
220
-
221
- filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
222
- end
223
- end
224
-
225
- # Split a file, whose field definition is represented by self,
226
- # into seperate streams, based on one if it's fields.
227
- #
228
- # Returns a map of Field value => File def object
229
- #
230
- # We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
231
- #
232
- # Users can get at the raw line simply by calling the line() method on File Def object
233
- #
234
- # Options:
235
- #
236
- # :output_path => directory to write the individual streams files to
237
- #
238
- # :filter => Optional Regular Expression to act as filter be applid to the field.
239
- # For example split by Ccy but filter to only include certain ccys pass
240
- # filter => 'GBP|USD|EUR'
241
- #
242
- def split_on( file_name, field, options = {} )
243
-
244
- regex = options[:filter] ? Regexp.new(options[:filter]) : nil
245
-
246
- log :debug, "Using REGEX: #{regex.inspect}" if regex
247
-
248
- filtered = {}
249
-
250
- if( self.new.respond_to?(field) )
251
-
252
- log :info, "Splitting on #{field}"
253
-
254
- File.open( file_name ) do |t|
255
- t.each do |line|
256
- next unless(line && line.chomp!)
257
- x = self.new(line)
258
-
259
- value = x.send( field.to_sym ) # the actual field value from the specified field column
260
- next if value.nil?
261
-
262
- if( regex.nil? || value.match(regex) )
263
- filtered[value] ? filtered[value] << x : filtered[value] = [x]
264
- end
265
- end
266
- end
267
- else
268
- log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
269
- end
270
-
271
- if( options[:sort])
272
- filtered.values.each( &:sort )
273
- return filtered
274
- end
275
- return filtered
276
- end
277
-
278
- # Open and parse a file, replacing a value in the specfied field.
279
- # Does not update the file itself. Does not write a new output file.
280
- #
281
- # Returns :
282
- # 1) full collection of updated lines
283
- # 2) collection of file def objects (self), with updated value.
284
- #
285
- # Finds values matching old_value in given map
286
- #
287
- # Replaces matches with new_value in map.
288
- #
289
- # Accepts more than one field, if files is either and array of strings
290
- # or comma seperated list of fields.
291
- #
292
- def file_set_field_by_map( file_name, fields, value_map, regex = nil )
293
-
294
- lines, objects = [],[]
295
-
296
- if fields.is_a?(Array)
297
- attribs = fields
298
- else
299
- attribs = "#{fields}".split(',')
300
- end
301
-
302
- attribs.collect! do |attrib|
303
- raise ArgumentError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
304
- end
305
-
306
- log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
307
-
308
- File.open( file_name ) do |t|
309
- t.each do |line|
310
- if line.chomp.empty?
311
- lines << line
312
- objects << self.new
313
- next
314
- end
315
- x = self.new(line)
316
-
317
- attribs.each do |a|
318
- old_value = x.instance_variable_get( "@#{a}" )
319
- x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
320
- end
321
-
322
- objects << x
323
- lines << x.to_s
324
- end
325
- end
326
-
327
- return lines, objects
328
- end
329
- end # END class methods
330
-
331
- # Open and parse a file, replacing a value in the specfied field.
332
- # Does not update the file itself. Does not write a new output file.
333
- #
334
- # Returns :
335
- # 1) full collection of updated lines
336
- # 2) collection of file def objects (self), with updated value.
337
- #
338
- # Finds values matching old_value, and also accepts an optional regex for more powerful
339
- # matching strategies of values on the specfified field.
340
- #
341
- # Replaces matches with new_value.
342
- #
343
- # Accepts more than one field, if files is either and array of strings
344
- # or comma seperated list of fields.
345
- #
346
- def file_set_field( file_name, field, old_value, new_value, regex = nil )
347
-
348
- map = {old_value => new_value}
349
-
350
- return file_set_field_by_map(file_name, field, map, regex)
351
- end
352
-
353
- end
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Jan 2011
4
+ # License:: MIT
5
+ #
6
+ # Details:: This module acts as helpers for defining input/output file formats as classes.
7
+ #
8
+ # It provides a simple interface to define a file structure - field by field.
9
+ #
10
+ # By defining the structure, following methods and attributes are mixed in :
11
+ #
12
+ # An attribute, with accessor for each field/column.
13
+ # Parse a line, assigning values to each attribute.
14
+ # Parse an instance of that file line by line, accepts a block in which data can be processed.
15
+ # Method to split a file by field.
16
+ # Method to perform replace operations on a file by field and value.
17
+ #
18
+ # Either delimited or a fixed width definition can be created via macro-like class methods :
19
+ #
20
+ # create_field_definition [field_list]
21
+ #
22
+ # create_fixed_definition {field => range }
23
+ #
24
+ # Member attributes, with getters and setters, can be added for each field defined above via class method :
25
+ #
26
+ # create_field_attr_accessors
27
+ #
28
+ # USAGE :
29
+ #
30
+ # Create a class that contains definition of a file.
31
+ #
32
+ # class ExampleFixedWith < FileDefinitionBase
33
+ # create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
34
+ #
35
+ # create_field_attr_accessors
36
+ # end
37
+ #
38
+ # class ExampleCSV < FileDefinitionBase
39
+ # create_field_definition %w{abc def ghi jkl}
40
+ #
41
+ # create_field_attr_accessors
42
+ # end
43
+ #
44
+ # Any instance can then be used to parse the defined file type, with each field or column value
45
+ # being assigned automatically to the associated instance variable.
46
+ #
47
+ # line = '1,2,3,4'
48
+ # x = ExampleCSV.new( line )
49
+ #
50
+ # assert x.responds_to? :jkl
51
+ # assert_equal x.abc, '1'
52
+ # assert_equal x.jkl.to_i, 4
53
+ #
54
+ module FileDefinitions
55
+
56
+ include Enumerable
57
+
58
+ attr_accessor :key
59
+ attr_accessor :current_line
60
+
61
+ # Set the delimiter to use when splitting a line - can be either a String, or a Regexp
62
+ attr_writer :field_delim
63
+
64
+ def initialize( line = nil )
65
+ @key = String.new
66
+ parse(line) unless line.nil?
67
+ end
68
+
69
+ def self.included(base)
70
+ base.extend(ClassMethods)
71
+ subclasses << base
72
+ end
73
+
74
+ def self.subclasses
75
+ @subclasses ||=[]
76
+ end
77
+
78
+
79
+ # Return the field delimiter used when splitting a line
80
+ def field_delim
81
+ @field_delim || ','
82
+ end
83
+
84
+ # Parse each line of a file based on the field definition, yields self for each successive line
85
+ #
86
+ def each( file )
87
+ File::new(file).each_line do |line|
88
+ parse( line )
89
+ yield self
90
+ end
91
+ end
92
+
93
+ def fields
94
+ @fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
95
+ @fields
96
+ end
97
+
98
+ def to_s
99
+ fields.join(',')
100
+ end
101
+
102
+ module ClassMethods
103
+
104
+ # Helper to generate methods to store and return the complete list of fields
105
+ # in this File definition (also creates member @field_definition) and parse a line.
106
+ #
107
+ # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
108
+ #
109
+ def create_field_definition( *fields )
110
+ instance_eval <<-end_eval
111
+ @field_definition ||= %w{ #{fields.join(' ')} }
112
+ def field_definition
113
+ @field_definition
114
+ end
115
+ end_eval
116
+
117
+ class_eval <<-end_eval
118
+ def parse( line )
119
+ @current_line = line
120
+ before_parse if respond_to? :before_parse
121
+ @current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
122
+ after_parse if respond_to? :after_parse
123
+ generate_key if respond_to? :generate_key
124
+ end
125
+ end_eval
126
+ end
127
+
128
+ def add_field(field, add_accessor = true)
129
+ @field_definition ||= []
130
+ @field_definition << field.to_s
131
+ attr_accessor field if(add_accessor)
132
+ end
133
+
134
+
135
+ # Helper to generate methods that return the complete list of fixed width fields
136
+ # and associated ranges in this File definition, and parse a line.
137
+ # e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
138
+ #
139
+ def create_fixed_definition( field_range_map )
140
+ raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
141
+
142
+ keys = field_range_map.keys.collect(&:to_s)
143
+ string_map = Hash[*keys.zip(field_range_map.values).flatten]
144
+
145
+ instance_eval <<-end_eval
146
+ def fixed_definition
147
+ @fixed_definition ||= #{string_map.inspect}
148
+ @fixed_definition
149
+ end
150
+ end_eval
151
+
152
+ instance_eval <<-end_eval
153
+ def field_definition
154
+ @field_definition ||= %w{ #{keys.join(' ')} }
155
+ @field_definition
156
+ end
157
+ end_eval
158
+
159
+ class_eval <<-end_eval
160
+ def parse( line )
161
+ @current_line = line
162
+ before_parse if respond_to? :before_parse
163
+ self.class.fixed_definition.each do |key, range|
164
+ instance_variable_set(\"@\#{key}\", @current_line[range])
165
+ end
166
+ after_parse if respond_to? :after_parse
167
+ generate_key if respond_to? :generate_key
168
+ end
169
+ end_eval
170
+
171
+ end
172
+
173
+ # Create accessors for each field
174
+ def create_field_attr_accessors
175
+ self.field_definition.each {|f| attr_accessor f}
176
+ end
177
+
178
+
179
+ ###############################
180
+ # PARSING + FILE MANIPULATION #
181
+ ###############################
182
+
183
+ # Parse a complete file and return array of self, one per line
184
+ def parse_file( file, options = {} )
185
+ limit = options[:limit]
186
+ count = 0
187
+ lines = []
188
+ File::new(file).each_line do |line|
189
+ break if limit && ((count += 1) > limit)
190
+ lines << self.new( line )
191
+ end
192
+ lines
193
+ end
194
+
195
+
196
+
197
+ # Split a file, whose field definition is represented by self,
198
+ # into seperate streams, based on the values of one if it's fields.
199
+ #
200
+ # Writes the results, one file per split stream, to directory specified by output_path
201
+ #
202
+ # Options:
203
+ #
204
+ # :keys => Also write split files of the key fields
205
+ #
206
+ # :filter => Optional Regular Expression to act as filter be applid to the field.
207
+ # For example split by Ccy but filter to only include certain ccys pass
208
+ # filter => '[GBP|USD]'
209
+ #
210
+ def split_on_write( file_name, field, output_path, options = {} )
211
+
212
+ path = output_path || '.'
213
+
214
+ filtered = split_on( file_name, field, options )
215
+
216
+ unless filtered.empty?
217
+ log :info, "Writing seperate streams to #{path}"
218
+
219
+ filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
220
+
221
+ filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
222
+ end
223
+ end
224
+
225
+ # Split a file, whose field definition is represented by self,
226
+ # into seperate streams, based on one if it's fields.
227
+ #
228
+ # Returns a map of Field value => File def object
229
+ #
230
+ # We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
231
+ #
232
+ # Users can get at the raw line simply by calling the line() method on File Def object
233
+ #
234
+ # Options:
235
+ #
236
+ # :output_path => directory to write the individual streams files to
237
+ #
238
+ # :filter => Optional Regular Expression to act as filter be applid to the field.
239
+ # For example split by Ccy but filter to only include certain ccys pass
240
+ # filter => 'GBP|USD|EUR'
241
+ #
242
+ def split_on( file_name, field, options = {} )
243
+
244
+ regex = options[:filter] ? Regexp.new(options[:filter]) : nil
245
+
246
+ log :debug, "Using REGEX: #{regex.inspect}" if regex
247
+
248
+ filtered = {}
249
+
250
+ if( self.new.respond_to?(field) )
251
+
252
+ log :info, "Splitting on #{field}"
253
+
254
+ File.open( file_name ) do |t|
255
+ t.each do |line|
256
+ next unless(line && line.chomp!)
257
+ x = self.new(line)
258
+
259
+ value = x.send( field.to_sym ) # the actual field value from the specified field column
260
+ next if value.nil?
261
+
262
+ if( regex.nil? || value.match(regex) )
263
+ filtered[value] ? filtered[value] << x : filtered[value] = [x]
264
+ end
265
+ end
266
+ end
267
+ else
268
+ log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
269
+ end
270
+
271
+ if( options[:sort])
272
+ filtered.values.each( &:sort )
273
+ return filtered
274
+ end
275
+ return filtered
276
+ end
277
+
278
+ # Open and parse a file, replacing a value in the specfied field.
279
+ # Does not update the file itself. Does not write a new output file.
280
+ #
281
+ # Returns :
282
+ # 1) full collection of updated lines
283
+ # 2) collection of file def objects (self), with updated value.
284
+ #
285
+ # Finds values matching old_value in given map
286
+ #
287
+ # Replaces matches with new_value in map.
288
+ #
289
+ # Accepts more than one field, if files is either and array of strings
290
+ # or comma seperated list of fields.
291
+ #
292
+ def file_set_field_by_map( file_name, fields, value_map, regex = nil )
293
+
294
+ lines, objects = [],[]
295
+
296
+ if fields.is_a?(Array)
297
+ attribs = fields
298
+ else
299
+ attribs = "#{fields}".split(',')
300
+ end
301
+
302
+ attribs.collect! do |attrib|
303
+ raise ArgumentError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
304
+ end
305
+
306
+ log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
307
+
308
+ File.open( file_name ) do |t|
309
+ t.each do |line|
310
+ if line.chomp.empty?
311
+ lines << line
312
+ objects << self.new
313
+ next
314
+ end
315
+ x = self.new(line)
316
+
317
+ attribs.each do |a|
318
+ old_value = x.instance_variable_get( "@#{a}" )
319
+ x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
320
+ end
321
+
322
+ objects << x
323
+ lines << x.to_s
324
+ end
325
+ end
326
+
327
+ return lines, objects
328
+ end
329
+ end # END class methods
330
+
331
+ # Open and parse a file, replacing a value in the specfied field.
332
+ # Does not update the file itself. Does not write a new output file.
333
+ #
334
+ # Returns :
335
+ # 1) full collection of updated lines
336
+ # 2) collection of file def objects (self), with updated value.
337
+ #
338
+ # Finds values matching old_value, and also accepts an optional regex for more powerful
339
+ # matching strategies of values on the specfified field.
340
+ #
341
+ # Replaces matches with new_value.
342
+ #
343
+ # Accepts more than one field, if files is either and array of strings
344
+ # or comma seperated list of fields.
345
+ #
346
+ def file_set_field( file_name, field, old_value, new_value, regex = nil )
347
+
348
+ map = {old_value => new_value}
349
+
350
+ return file_set_field_by_map(file_name, field, map, regex)
351
+ end
352
+
353
+ end