ar_loader 0.0.6 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +9 -9
- data/README.markdown +268 -221
- data/Rakefile +76 -76
- data/lib/VERSION +1 -1
- data/lib/ar_loader.rb +87 -66
- data/lib/ar_loader/exceptions.rb +2 -0
- data/lib/{engine → ar_loader}/file_definitions.rb +353 -353
- data/lib/{engine → ar_loader}/mapping_file_definitions.rb +87 -87
- data/lib/ar_loader/method_detail.rb +257 -0
- data/lib/ar_loader/method_mapper.rb +213 -0
- data/lib/helpers/jruby/jexcel_file.rb +187 -0
- data/lib/{engine → helpers/jruby}/word.rb +79 -70
- data/lib/helpers/spree_helper.rb +85 -0
- data/lib/loaders/csv_loader.rb +87 -0
- data/lib/loaders/excel_loader.rb +132 -0
- data/lib/loaders/loader_base.rb +205 -73
- data/lib/loaders/spree/image_loader.rb +45 -41
- data/lib/loaders/spree/product_loader.rb +140 -91
- data/lib/to_b.rb +24 -24
- data/spec/csv_loader_spec.rb +27 -0
- data/spec/database.yml +19 -6
- data/spec/db/migrate/20110803201325_create_test_bed.rb +78 -0
- data/spec/excel_loader_spec.rb +113 -98
- data/spec/fixtures/BadAssociationName.xls +0 -0
- data/spec/fixtures/DemoNegativeTesting.xls +0 -0
- data/spec/fixtures/DemoTestModelAssoc.xls +0 -0
- data/spec/fixtures/ProjectsMultiCategories.xls +0 -0
- data/spec/fixtures/SimpleProjects.xls +0 -0
- data/spec/fixtures/SpreeProducts.xls +0 -0
- data/spec/fixtures/SpreeZoneExample.csv +5 -0
- data/spec/fixtures/SpreeZoneExample.xls +0 -0
- data/spec/loader_spec.rb +116 -0
- data/spec/logs/test.log +5000 -0
- data/spec/method_mapper_spec.rb +222 -0
- data/spec/models.rb +55 -0
- data/spec/spec_helper.rb +85 -18
- data/spec/spree_loader_spec.rb +223 -157
- data/tasks/config/seed_fu_product_template.erb +15 -15
- data/tasks/config/tidy_config.txt +12 -12
- data/tasks/db_tasks.rake +64 -64
- data/tasks/excel_loader.rake +63 -113
- data/tasks/file_tasks.rake +36 -37
- data/tasks/loader.rake +45 -0
- data/tasks/spree/image_load.rake +108 -107
- data/tasks/spree/product_loader.rake +49 -107
- data/tasks/word_to_seedfu.rake +166 -166
- metadata +66 -61
- data/lib/engine/jruby/jexcel_file.rb +0 -182
- data/lib/engine/jruby/method_mapper_excel.rb +0 -44
- data/lib/engine/method_detail.rb +0 -140
- data/lib/engine/method_mapper.rb +0 -157
- data/lib/engine/method_mapper_csv.rb +0 -28
- data/spec/db/migrate/20110803201325_create_testbed.rb +0 -25
data/Rakefile
CHANGED
@@ -1,76 +1,76 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'rake'
|
3
|
-
require 'rake/clean'
|
4
|
-
require 'rake/gempackagetask'
|
5
|
-
require 'rake/rdoctask'
|
6
|
-
require 'rake/testtask'
|
7
|
-
require "lib/ar_loader"
|
8
|
-
|
9
|
-
# Copyright:: (c) Autotelik Media Ltd 2011
|
10
|
-
# Author :: Tom Statter
|
11
|
-
# Date :: Aug 2010
|
12
|
-
#
|
13
|
-
# License:: MIT - Free, OpenSource
|
14
|
-
#
|
15
|
-
# Details:: Gem::Specification for Active Record Loader gem.
|
16
|
-
#
|
17
|
-
# Specifically enabled for uploading Spree products but easily
|
18
|
-
# extended to any AR model.
|
19
|
-
#
|
20
|
-
# Currently support direct access to Excel Spreedsheets via JRuby
|
21
|
-
#
|
22
|
-
# TODO - Switch for non JRuby Rubies, enable load via CSV file instead of Excel.
|
23
|
-
#
|
24
|
-
ArLoader::
|
25
|
-
|
26
|
-
spec = Gem::Specification.new do |s|
|
27
|
-
s.name = ArLoader.gem_name
|
28
|
-
s.version = ArLoader.gem_version
|
29
|
-
s.has_rdoc = true
|
30
|
-
s.extra_rdoc_files = ['README.markdown', 'LICENSE']
|
31
|
-
s.summary = 'File based loader for Active Record models'
|
32
|
-
s.description = 'A file based loader for Active Record models. Seed database directly from Excel/CSV. Includes rake support for Spree'
|
33
|
-
s.author = 'thomas statter'
|
34
|
-
s.email = 'rubygems@autotelik.co.uk'
|
35
|
-
s.date = DateTime.now.strftime("%Y-%m-%d")
|
36
|
-
s.homepage = %q{http://www.autotelik.co.uk}
|
37
|
-
|
38
|
-
# s.executables = ['your_executable_here']
|
39
|
-
s.files = %w(LICENSE README.markdown Rakefile) + Dir.glob("{lib,spec,tasks}/**/*")
|
40
|
-
s.require_path = "lib"
|
41
|
-
s.bindir = "bin"
|
42
|
-
end
|
43
|
-
|
44
|
-
Rake::GemPackageTask.new(spec) do |p|
|
45
|
-
p.gem_spec = spec
|
46
|
-
p.need_tar = true
|
47
|
-
p.need_zip = true
|
48
|
-
end
|
49
|
-
|
50
|
-
Rake::RDocTask.new do |rdoc|
|
51
|
-
files =['README.markdown', 'LICENSE', 'lib/**/*.rb']
|
52
|
-
rdoc.rdoc_files.add(files)
|
53
|
-
rdoc.main = "README.markdown" # page to start on
|
54
|
-
rdoc.title = "ARLoader Docs"
|
55
|
-
rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
|
56
|
-
rdoc.options << '--line-numbers'
|
57
|
-
end
|
58
|
-
|
59
|
-
Rake::TestTask.new do |t|
|
60
|
-
t.test_files = FileList['test/**/*.rb']
|
61
|
-
end
|
62
|
-
|
63
|
-
# Add in our own Tasks
|
64
|
-
|
65
|
-
# Long parameter lists so ensure rake -T produces nice wide output
|
66
|
-
ENV['RAKE_COLUMNS'] = '180'
|
67
|
-
|
68
|
-
desc 'Build gem and install in one step'
|
69
|
-
task :pik_install, :needs => [:gem] do |t, args|
|
70
|
-
|
71
|
-
puts "Installing version #{ArLoader.gem_version}"
|
72
|
-
|
73
|
-
gem = "#{ArLoader.gem_name}-#{ArLoader.gem_version}.gem"
|
74
|
-
cmd = "pik gem install --no-ri --no-rdoc pkg\\#{gem}"
|
75
|
-
system(cmd)
|
76
|
-
end
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/gempackagetask'
|
5
|
+
require 'rake/rdoctask'
|
6
|
+
require 'rake/testtask'
|
7
|
+
require "lib/ar_loader"
|
8
|
+
|
9
|
+
# Copyright:: (c) Autotelik Media Ltd 2011
|
10
|
+
# Author :: Tom Statter
|
11
|
+
# Date :: Aug 2010
|
12
|
+
#
|
13
|
+
# License:: MIT - Free, OpenSource
|
14
|
+
#
|
15
|
+
# Details:: Gem::Specification for Active Record Loader gem.
|
16
|
+
#
|
17
|
+
# Specifically enabled for uploading Spree products but easily
|
18
|
+
# extended to any AR model.
|
19
|
+
#
|
20
|
+
# Currently support direct access to Excel Spreedsheets via JRuby
|
21
|
+
#
|
22
|
+
# TODO - Switch for non JRuby Rubies, enable load via CSV file instead of Excel.
|
23
|
+
#
|
24
|
+
ArLoader::load_tasks
|
25
|
+
|
26
|
+
spec = Gem::Specification.new do |s|
|
27
|
+
s.name = ArLoader.gem_name
|
28
|
+
s.version = ArLoader.gem_version
|
29
|
+
s.has_rdoc = true
|
30
|
+
s.extra_rdoc_files = ['README.markdown', 'LICENSE']
|
31
|
+
s.summary = 'File based loader for Active Record models'
|
32
|
+
s.description = 'A file based loader for Active Record models. Seed database directly from Excel/CSV. Includes rake support for Spree'
|
33
|
+
s.author = 'thomas statter'
|
34
|
+
s.email = 'rubygems@autotelik.co.uk'
|
35
|
+
s.date = DateTime.now.strftime("%Y-%m-%d")
|
36
|
+
s.homepage = %q{http://www.autotelik.co.uk}
|
37
|
+
|
38
|
+
# s.executables = ['your_executable_here']
|
39
|
+
s.files = %w(LICENSE README.markdown Rakefile) + Dir.glob("{lib,spec,tasks}/**/*")
|
40
|
+
s.require_path = "lib"
|
41
|
+
s.bindir = "bin"
|
42
|
+
end
|
43
|
+
|
44
|
+
Rake::GemPackageTask.new(spec) do |p|
|
45
|
+
p.gem_spec = spec
|
46
|
+
p.need_tar = true
|
47
|
+
p.need_zip = true
|
48
|
+
end
|
49
|
+
|
50
|
+
Rake::RDocTask.new do |rdoc|
|
51
|
+
files =['README.markdown', 'LICENSE', 'lib/**/*.rb']
|
52
|
+
rdoc.rdoc_files.add(files)
|
53
|
+
rdoc.main = "README.markdown" # page to start on
|
54
|
+
rdoc.title = "ARLoader Docs"
|
55
|
+
rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
|
56
|
+
rdoc.options << '--line-numbers'
|
57
|
+
end
|
58
|
+
|
59
|
+
Rake::TestTask.new do |t|
|
60
|
+
t.test_files = FileList['test/**/*.rb']
|
61
|
+
end
|
62
|
+
|
63
|
+
# Add in our own Tasks
|
64
|
+
|
65
|
+
# Long parameter lists so ensure rake -T produces nice wide output
|
66
|
+
ENV['RAKE_COLUMNS'] = '180'
|
67
|
+
|
68
|
+
desc 'Build gem and install in one step'
|
69
|
+
task :pik_install, :needs => [:gem] do |t, args|
|
70
|
+
|
71
|
+
puts "Installing version #{ArLoader.gem_version}"
|
72
|
+
|
73
|
+
gem = "#{ArLoader.gem_name}-#{ArLoader.gem_version}.gem"
|
74
|
+
cmd = "pik gem install --no-ri --no-rdoc pkg\\#{gem}"
|
75
|
+
system(cmd)
|
76
|
+
end
|
data/lib/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.8
|
data/lib/ar_loader.rb
CHANGED
@@ -1,66 +1,87 @@
|
|
1
|
-
# Copyright:: (c) Autotelik Media Ltd 2011
|
2
|
-
# Author :: Tom Statter
|
3
|
-
# Date :: Aug 2010
|
4
|
-
# License:: TBD. Free, Open Source. MIT ?
|
5
|
-
#
|
6
|
-
# Details:: Active Record Loader
|
7
|
-
#
|
8
|
-
require 'active_record'
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
end
|
65
|
-
|
66
|
-
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2011
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: Aug 2010
|
4
|
+
# License:: TBD. Free, Open Source. MIT ?
|
5
|
+
#
|
6
|
+
# Details:: Active Record Loader
|
7
|
+
#
|
8
|
+
require 'active_record'
|
9
|
+
require 'rbconfig'
|
10
|
+
|
11
|
+
module Guards
|
12
|
+
|
13
|
+
def self.jruby?
|
14
|
+
return RUBY_PLATFORM == "java"
|
15
|
+
end
|
16
|
+
def self.mac?
|
17
|
+
RbConfig::CONFIG['target_os'] =~ /darwin/i
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.linux?
|
21
|
+
RbConfig::CONFIG['target_os'] =~ /linux/i
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.windows?
|
25
|
+
RbConfig::CONFIG['target_os'] =~ /mswin|mingw/i
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
module ArLoader
|
31
|
+
|
32
|
+
def self.gem_version
|
33
|
+
@gem_version ||= File.read( File.join( root_path, 'lib', 'VERSION') ).chomp
|
34
|
+
@gem_version
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.gem_name
|
38
|
+
"ar_loader"
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.root_path
|
42
|
+
File.expand_path("#{File.dirname(__FILE__)}/..")
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
def self.require_libraries
|
47
|
+
|
48
|
+
loader_libs = %w{ lib }
|
49
|
+
|
50
|
+
# Base search paths - these will be searched recursively
|
51
|
+
loader_paths = []
|
52
|
+
|
53
|
+
loader_libs.each {|l| loader_paths << File.join(root_path(), l) }
|
54
|
+
|
55
|
+
# Define require search paths, any dir in here will be added to LOAD_PATH
|
56
|
+
|
57
|
+
loader_paths.each do |base|
|
58
|
+
$:.unshift base if File.directory?(base)
|
59
|
+
Dir[File.join(base, '**', '**')].each do |p|
|
60
|
+
if File.directory? p
|
61
|
+
$:.unshift p
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
require_libs = %w{ ar_loader loaders helpers }
|
67
|
+
|
68
|
+
require_libs.each do |base|
|
69
|
+
Dir[File.join('lib', base, '*.rb')].each do |rb|
|
70
|
+
unless File.directory? rb
|
71
|
+
require rb
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.load_tasks
|
79
|
+
# Long parameter lists so ensure rake -T produces nice wide output
|
80
|
+
ENV['RAKE_COLUMNS'] = '180'
|
81
|
+
base = File.join(root_path, 'tasks', '**')
|
82
|
+
Dir["#{base}/*.rake"].sort.each { |ext| load ext }
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
|
87
|
+
ArLoader::require_libraries
|
@@ -1,353 +1,353 @@
|
|
1
|
-
# Copyright:: (c) Autotelik Media Ltd 2011
|
2
|
-
# Author :: Tom Statter
|
3
|
-
# Date :: Jan 2011
|
4
|
-
# License:: MIT
|
5
|
-
#
|
6
|
-
# Details:: This module acts as helpers for defining input/output file formats as classes.
|
7
|
-
#
|
8
|
-
# It provides a simple interface to define a file structure - field by field.
|
9
|
-
#
|
10
|
-
# By defining the structure, following methods and attributes are mixed in :
|
11
|
-
#
|
12
|
-
# An attribute, with accessor for each field/column.
|
13
|
-
# Parse a line, assigning values to each attribute.
|
14
|
-
# Parse an instance of that file line by line, accepts a block in which data can be processed.
|
15
|
-
# Method to split a file by field.
|
16
|
-
# Method to perform replace operations on a file by field and value.
|
17
|
-
#
|
18
|
-
# Either delimited or a fixed width definition can be created via macro-like class methods :
|
19
|
-
#
|
20
|
-
# create_field_definition [field_list]
|
21
|
-
#
|
22
|
-
# create_fixed_definition {field => range }
|
23
|
-
#
|
24
|
-
# Member attributes, with getters and setters, can be added for each field defined above via class method :
|
25
|
-
#
|
26
|
-
# create_field_attr_accessors
|
27
|
-
#
|
28
|
-
# USAGE :
|
29
|
-
#
|
30
|
-
# Create a class that contains definition of a file.
|
31
|
-
#
|
32
|
-
# class ExampleFixedWith < FileDefinitionBase
|
33
|
-
# create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
|
34
|
-
#
|
35
|
-
# create_field_attr_accessors
|
36
|
-
# end
|
37
|
-
#
|
38
|
-
# class ExampleCSV < FileDefinitionBase
|
39
|
-
# create_field_definition %w{abc def ghi jkl}
|
40
|
-
#
|
41
|
-
# create_field_attr_accessors
|
42
|
-
# end
|
43
|
-
#
|
44
|
-
# Any instance can then be used to parse the defined file type, with each field or column value
|
45
|
-
# being assigned automatically to the associated instance variable.
|
46
|
-
#
|
47
|
-
# line = '1,2,3,4'
|
48
|
-
# x = ExampleCSV.new( line )
|
49
|
-
#
|
50
|
-
# assert x.responds_to? :jkl
|
51
|
-
# assert_equal x.abc, '1'
|
52
|
-
# assert_equal x.jkl.to_i, 4
|
53
|
-
#
|
54
|
-
module FileDefinitions
|
55
|
-
|
56
|
-
include Enumerable
|
57
|
-
|
58
|
-
attr_accessor :key
|
59
|
-
attr_accessor :current_line
|
60
|
-
|
61
|
-
# Set the delimiter to use when splitting a line - can be either a String, or a Regexp
|
62
|
-
attr_writer :field_delim
|
63
|
-
|
64
|
-
def initialize( line = nil )
|
65
|
-
@key = String.new
|
66
|
-
parse(line) unless line.nil?
|
67
|
-
end
|
68
|
-
|
69
|
-
def self.included(base)
|
70
|
-
base.extend(ClassMethods)
|
71
|
-
subclasses << base
|
72
|
-
end
|
73
|
-
|
74
|
-
def self.subclasses
|
75
|
-
@subclasses ||=[]
|
76
|
-
end
|
77
|
-
|
78
|
-
|
79
|
-
# Return the field delimiter used when splitting a line
|
80
|
-
def field_delim
|
81
|
-
@field_delim || ','
|
82
|
-
end
|
83
|
-
|
84
|
-
# Parse each line of a file based on the field definition, yields self for each successive line
|
85
|
-
#
|
86
|
-
def each( file )
|
87
|
-
File::new(file).each_line do |line|
|
88
|
-
parse( line )
|
89
|
-
yield self
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
def fields
|
94
|
-
@fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
|
95
|
-
@fields
|
96
|
-
end
|
97
|
-
|
98
|
-
def to_s
|
99
|
-
fields.join(',')
|
100
|
-
end
|
101
|
-
|
102
|
-
module ClassMethods
|
103
|
-
|
104
|
-
# Helper to generate methods to store and return the complete list of fields
|
105
|
-
# in this File definition (also creates member @field_definition) and parse a line.
|
106
|
-
#
|
107
|
-
# e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
|
108
|
-
#
|
109
|
-
def create_field_definition( *fields )
|
110
|
-
instance_eval <<-end_eval
|
111
|
-
@field_definition ||= %w{ #{fields.join(' ')} }
|
112
|
-
def field_definition
|
113
|
-
@field_definition
|
114
|
-
end
|
115
|
-
end_eval
|
116
|
-
|
117
|
-
class_eval <<-end_eval
|
118
|
-
def parse( line )
|
119
|
-
@current_line = line
|
120
|
-
before_parse if respond_to? :before_parse
|
121
|
-
@current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
|
122
|
-
after_parse if respond_to? :after_parse
|
123
|
-
generate_key if respond_to? :generate_key
|
124
|
-
end
|
125
|
-
end_eval
|
126
|
-
end
|
127
|
-
|
128
|
-
def add_field(field, add_accessor = true)
|
129
|
-
@field_definition ||= []
|
130
|
-
@field_definition << field.to_s
|
131
|
-
attr_accessor field if(add_accessor)
|
132
|
-
end
|
133
|
-
|
134
|
-
|
135
|
-
# Helper to generate methods that return the complete list of fixed width fields
|
136
|
-
# and associated ranges in this File definition, and parse a line.
|
137
|
-
# e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
|
138
|
-
#
|
139
|
-
def create_fixed_definition( field_range_map )
|
140
|
-
raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
|
141
|
-
|
142
|
-
keys = field_range_map.keys.collect(&:to_s)
|
143
|
-
string_map = Hash[*keys.zip(field_range_map.values).flatten]
|
144
|
-
|
145
|
-
instance_eval <<-end_eval
|
146
|
-
def fixed_definition
|
147
|
-
@fixed_definition ||= #{string_map.inspect}
|
148
|
-
@fixed_definition
|
149
|
-
end
|
150
|
-
end_eval
|
151
|
-
|
152
|
-
instance_eval <<-end_eval
|
153
|
-
def field_definition
|
154
|
-
@field_definition ||= %w{ #{keys.join(' ')} }
|
155
|
-
@field_definition
|
156
|
-
end
|
157
|
-
end_eval
|
158
|
-
|
159
|
-
class_eval <<-end_eval
|
160
|
-
def parse( line )
|
161
|
-
@current_line = line
|
162
|
-
before_parse if respond_to? :before_parse
|
163
|
-
self.class.fixed_definition.each do |key, range|
|
164
|
-
instance_variable_set(\"@\#{key}\", @current_line[range])
|
165
|
-
end
|
166
|
-
after_parse if respond_to? :after_parse
|
167
|
-
generate_key if respond_to? :generate_key
|
168
|
-
end
|
169
|
-
end_eval
|
170
|
-
|
171
|
-
end
|
172
|
-
|
173
|
-
# Create accessors for each field
|
174
|
-
def create_field_attr_accessors
|
175
|
-
self.field_definition.each {|f| attr_accessor f}
|
176
|
-
end
|
177
|
-
|
178
|
-
|
179
|
-
###############################
|
180
|
-
# PARSING + FILE MANIPULATION #
|
181
|
-
###############################
|
182
|
-
|
183
|
-
# Parse a complete file and return array of self, one per line
|
184
|
-
def parse_file( file, options = {} )
|
185
|
-
limit = options[:limit]
|
186
|
-
count = 0
|
187
|
-
lines = []
|
188
|
-
File::new(file).each_line do |line|
|
189
|
-
break if limit && ((count += 1) > limit)
|
190
|
-
lines << self.new( line )
|
191
|
-
end
|
192
|
-
lines
|
193
|
-
end
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
# Split a file, whose field definition is represented by self,
|
198
|
-
# into seperate streams, based on the values of one if it's fields.
|
199
|
-
#
|
200
|
-
# Writes the results, one file per split stream, to directory specified by output_path
|
201
|
-
#
|
202
|
-
# Options:
|
203
|
-
#
|
204
|
-
# :keys => Also write split files of the key fields
|
205
|
-
#
|
206
|
-
# :filter => Optional Regular Expression to act as filter be applid to the field.
|
207
|
-
# For example split by Ccy but filter to only include certain ccys pass
|
208
|
-
# filter => '[GBP|USD]'
|
209
|
-
#
|
210
|
-
def split_on_write( file_name, field, output_path, options = {} )
|
211
|
-
|
212
|
-
path = output_path || '.'
|
213
|
-
|
214
|
-
filtered = split_on( file_name, field, options )
|
215
|
-
|
216
|
-
unless filtered.empty?
|
217
|
-
log :info, "Writing seperate streams to #{path}"
|
218
|
-
|
219
|
-
filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
|
220
|
-
|
221
|
-
filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
|
222
|
-
end
|
223
|
-
end
|
224
|
-
|
225
|
-
# Split a file, whose field definition is represented by self,
|
226
|
-
# into seperate streams, based on one if it's fields.
|
227
|
-
#
|
228
|
-
# Returns a map of Field value => File def object
|
229
|
-
#
|
230
|
-
# We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
|
231
|
-
#
|
232
|
-
# Users can get at the raw line simply by calling the line() method on File Def object
|
233
|
-
#
|
234
|
-
# Options:
|
235
|
-
#
|
236
|
-
# :output_path => directory to write the individual streams files to
|
237
|
-
#
|
238
|
-
# :filter => Optional Regular Expression to act as filter be applid to the field.
|
239
|
-
# For example split by Ccy but filter to only include certain ccys pass
|
240
|
-
# filter => 'GBP|USD|EUR'
|
241
|
-
#
|
242
|
-
def split_on( file_name, field, options = {} )
|
243
|
-
|
244
|
-
regex = options[:filter] ? Regexp.new(options[:filter]) : nil
|
245
|
-
|
246
|
-
log :debug, "Using REGEX: #{regex.inspect}" if regex
|
247
|
-
|
248
|
-
filtered = {}
|
249
|
-
|
250
|
-
if( self.new.respond_to?(field) )
|
251
|
-
|
252
|
-
log :info, "Splitting on #{field}"
|
253
|
-
|
254
|
-
File.open( file_name ) do |t|
|
255
|
-
t.each do |line|
|
256
|
-
next unless(line && line.chomp!)
|
257
|
-
x = self.new(line)
|
258
|
-
|
259
|
-
value = x.send( field.to_sym ) # the actual field value from the specified field column
|
260
|
-
next if value.nil?
|
261
|
-
|
262
|
-
if( regex.nil? || value.match(regex) )
|
263
|
-
filtered[value] ? filtered[value] << x : filtered[value] = [x]
|
264
|
-
end
|
265
|
-
end
|
266
|
-
end
|
267
|
-
else
|
268
|
-
log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
|
269
|
-
end
|
270
|
-
|
271
|
-
if( options[:sort])
|
272
|
-
filtered.values.each( &:sort )
|
273
|
-
return filtered
|
274
|
-
end
|
275
|
-
return filtered
|
276
|
-
end
|
277
|
-
|
278
|
-
# Open and parse a file, replacing a value in the specfied field.
|
279
|
-
# Does not update the file itself. Does not write a new output file.
|
280
|
-
#
|
281
|
-
# Returns :
|
282
|
-
# 1) full collection of updated lines
|
283
|
-
# 2) collection of file def objects (self), with updated value.
|
284
|
-
#
|
285
|
-
# Finds values matching old_value in given map
|
286
|
-
#
|
287
|
-
# Replaces matches with new_value in map.
|
288
|
-
#
|
289
|
-
# Accepts more than one field, if files is either and array of strings
|
290
|
-
# or comma seperated list of fields.
|
291
|
-
#
|
292
|
-
def file_set_field_by_map( file_name, fields, value_map, regex = nil )
|
293
|
-
|
294
|
-
lines, objects = [],[]
|
295
|
-
|
296
|
-
if fields.is_a?(Array)
|
297
|
-
attribs = fields
|
298
|
-
else
|
299
|
-
attribs = "#{fields}".split(',')
|
300
|
-
end
|
301
|
-
|
302
|
-
attribs.collect! do |attrib|
|
303
|
-
raise BadConfigError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
|
304
|
-
end
|
305
|
-
|
306
|
-
log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
|
307
|
-
|
308
|
-
File.open( file_name ) do |t|
|
309
|
-
t.each do |line|
|
310
|
-
if line.chomp.empty?
|
311
|
-
lines << line
|
312
|
-
objects << self.new
|
313
|
-
next
|
314
|
-
end
|
315
|
-
x = self.new(line)
|
316
|
-
|
317
|
-
attribs.each do |a|
|
318
|
-
old_value = x.instance_variable_get( "@#{a}" )
|
319
|
-
x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
|
320
|
-
end
|
321
|
-
|
322
|
-
objects << x
|
323
|
-
lines << x.to_s
|
324
|
-
end
|
325
|
-
end
|
326
|
-
|
327
|
-
return lines, objects
|
328
|
-
end
|
329
|
-
end # END class methods
|
330
|
-
|
331
|
-
# Open and parse a file, replacing a value in the specfied field.
|
332
|
-
# Does not update the file itself. Does not write a new output file.
|
333
|
-
#
|
334
|
-
# Returns :
|
335
|
-
# 1) full collection of updated lines
|
336
|
-
# 2) collection of file def objects (self), with updated value.
|
337
|
-
#
|
338
|
-
# Finds values matching old_value, and also accepts an optional regex for more powerful
|
339
|
-
# matching strategies of values on the specfified field.
|
340
|
-
#
|
341
|
-
# Replaces matches with new_value.
|
342
|
-
#
|
343
|
-
# Accepts more than one field, if files is either and array of strings
|
344
|
-
# or comma seperated list of fields.
|
345
|
-
#
|
346
|
-
def file_set_field( file_name, field, old_value, new_value, regex = nil )
|
347
|
-
|
348
|
-
map = {old_value => new_value}
|
349
|
-
|
350
|
-
return file_set_field_by_map(file_name, field, map, regex)
|
351
|
-
end
|
352
|
-
|
353
|
-
end
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2011
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: Jan 2011
|
4
|
+
# License:: MIT
|
5
|
+
#
|
6
|
+
# Details:: This module acts as helpers for defining input/output file formats as classes.
|
7
|
+
#
|
8
|
+
# It provides a simple interface to define a file structure - field by field.
|
9
|
+
#
|
10
|
+
# By defining the structure, following methods and attributes are mixed in :
|
11
|
+
#
|
12
|
+
# An attribute, with accessor for each field/column.
|
13
|
+
# Parse a line, assigning values to each attribute.
|
14
|
+
# Parse an instance of that file line by line, accepts a block in which data can be processed.
|
15
|
+
# Method to split a file by field.
|
16
|
+
# Method to perform replace operations on a file by field and value.
|
17
|
+
#
|
18
|
+
# Either delimited or a fixed width definition can be created via macro-like class methods :
|
19
|
+
#
|
20
|
+
# create_field_definition [field_list]
|
21
|
+
#
|
22
|
+
# create_fixed_definition {field => range }
|
23
|
+
#
|
24
|
+
# Member attributes, with getters and setters, can be added for each field defined above via class method :
|
25
|
+
#
|
26
|
+
# create_field_attr_accessors
|
27
|
+
#
|
28
|
+
# USAGE :
|
29
|
+
#
|
30
|
+
# Create a class that contains definition of a file.
|
31
|
+
#
|
32
|
+
# class ExampleFixedWith < FileDefinitionBase
|
33
|
+
# create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
|
34
|
+
#
|
35
|
+
# create_field_attr_accessors
|
36
|
+
# end
|
37
|
+
#
|
38
|
+
# class ExampleCSV < FileDefinitionBase
|
39
|
+
# create_field_definition %w{abc def ghi jkl}
|
40
|
+
#
|
41
|
+
# create_field_attr_accessors
|
42
|
+
# end
|
43
|
+
#
|
44
|
+
# Any instance can then be used to parse the defined file type, with each field or column value
|
45
|
+
# being assigned automatically to the associated instance variable.
|
46
|
+
#
|
47
|
+
# line = '1,2,3,4'
|
48
|
+
# x = ExampleCSV.new( line )
|
49
|
+
#
|
50
|
+
# assert x.responds_to? :jkl
|
51
|
+
# assert_equal x.abc, '1'
|
52
|
+
# assert_equal x.jkl.to_i, 4
|
53
|
+
#
|
54
|
+
module FileDefinitions
|
55
|
+
|
56
|
+
include Enumerable
|
57
|
+
|
58
|
+
attr_accessor :key
|
59
|
+
attr_accessor :current_line
|
60
|
+
|
61
|
+
# Set the delimiter to use when splitting a line - can be either a String, or a Regexp
|
62
|
+
attr_writer :field_delim
|
63
|
+
|
64
|
+
def initialize( line = nil )
|
65
|
+
@key = String.new
|
66
|
+
parse(line) unless line.nil?
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.included(base)
|
70
|
+
base.extend(ClassMethods)
|
71
|
+
subclasses << base
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.subclasses
|
75
|
+
@subclasses ||=[]
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
# Return the field delimiter used when splitting a line
|
80
|
+
def field_delim
|
81
|
+
@field_delim || ','
|
82
|
+
end
|
83
|
+
|
84
|
+
# Parse each line of a file based on the field definition, yields self for each successive line
|
85
|
+
#
|
86
|
+
def each( file )
|
87
|
+
File::new(file).each_line do |line|
|
88
|
+
parse( line )
|
89
|
+
yield self
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def fields
|
94
|
+
@fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
|
95
|
+
@fields
|
96
|
+
end
|
97
|
+
|
98
|
+
def to_s
|
99
|
+
fields.join(',')
|
100
|
+
end
|
101
|
+
|
102
|
+
module ClassMethods
|
103
|
+
|
104
|
+
# Helper to generate methods to store and return the complete list of fields
|
105
|
+
# in this File definition (also creates member @field_definition) and parse a line.
|
106
|
+
#
|
107
|
+
# e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
|
108
|
+
#
|
109
|
+
def create_field_definition( *fields )
|
110
|
+
instance_eval <<-end_eval
|
111
|
+
@field_definition ||= %w{ #{fields.join(' ')} }
|
112
|
+
def field_definition
|
113
|
+
@field_definition
|
114
|
+
end
|
115
|
+
end_eval
|
116
|
+
|
117
|
+
class_eval <<-end_eval
|
118
|
+
def parse( line )
|
119
|
+
@current_line = line
|
120
|
+
before_parse if respond_to? :before_parse
|
121
|
+
@current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
|
122
|
+
after_parse if respond_to? :after_parse
|
123
|
+
generate_key if respond_to? :generate_key
|
124
|
+
end
|
125
|
+
end_eval
|
126
|
+
end
|
127
|
+
|
128
|
+
def add_field(field, add_accessor = true)
|
129
|
+
@field_definition ||= []
|
130
|
+
@field_definition << field.to_s
|
131
|
+
attr_accessor field if(add_accessor)
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
# Helper to generate methods that return the complete list of fixed width fields
|
136
|
+
# and associated ranges in this File definition, and parse a line.
|
137
|
+
# e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
|
138
|
+
#
|
139
|
+
def create_fixed_definition( field_range_map )
|
140
|
+
raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
|
141
|
+
|
142
|
+
keys = field_range_map.keys.collect(&:to_s)
|
143
|
+
string_map = Hash[*keys.zip(field_range_map.values).flatten]
|
144
|
+
|
145
|
+
instance_eval <<-end_eval
|
146
|
+
def fixed_definition
|
147
|
+
@fixed_definition ||= #{string_map.inspect}
|
148
|
+
@fixed_definition
|
149
|
+
end
|
150
|
+
end_eval
|
151
|
+
|
152
|
+
instance_eval <<-end_eval
|
153
|
+
def field_definition
|
154
|
+
@field_definition ||= %w{ #{keys.join(' ')} }
|
155
|
+
@field_definition
|
156
|
+
end
|
157
|
+
end_eval
|
158
|
+
|
159
|
+
class_eval <<-end_eval
|
160
|
+
def parse( line )
|
161
|
+
@current_line = line
|
162
|
+
before_parse if respond_to? :before_parse
|
163
|
+
self.class.fixed_definition.each do |key, range|
|
164
|
+
instance_variable_set(\"@\#{key}\", @current_line[range])
|
165
|
+
end
|
166
|
+
after_parse if respond_to? :after_parse
|
167
|
+
generate_key if respond_to? :generate_key
|
168
|
+
end
|
169
|
+
end_eval
|
170
|
+
|
171
|
+
end
|
172
|
+
|
173
|
+
# Create accessors for each field
|
174
|
+
def create_field_attr_accessors
|
175
|
+
self.field_definition.each {|f| attr_accessor f}
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
###############################
|
180
|
+
# PARSING + FILE MANIPULATION #
|
181
|
+
###############################
|
182
|
+
|
183
|
+
# Parse a complete file and return array of self, one per line
|
184
|
+
def parse_file( file, options = {} )
|
185
|
+
limit = options[:limit]
|
186
|
+
count = 0
|
187
|
+
lines = []
|
188
|
+
File::new(file).each_line do |line|
|
189
|
+
break if limit && ((count += 1) > limit)
|
190
|
+
lines << self.new( line )
|
191
|
+
end
|
192
|
+
lines
|
193
|
+
end
|
194
|
+
|
195
|
+
|
196
|
+
|
197
|
+
# Split a file, whose field definition is represented by self,
|
198
|
+
# into seperate streams, based on the values of one if it's fields.
|
199
|
+
#
|
200
|
+
# Writes the results, one file per split stream, to directory specified by output_path
|
201
|
+
#
|
202
|
+
# Options:
|
203
|
+
#
|
204
|
+
# :keys => Also write split files of the key fields
|
205
|
+
#
|
206
|
+
# :filter => Optional Regular Expression to act as filter be applid to the field.
|
207
|
+
# For example split by Ccy but filter to only include certain ccys pass
|
208
|
+
# filter => '[GBP|USD]'
|
209
|
+
#
|
210
|
+
def split_on_write( file_name, field, output_path, options = {} )
|
211
|
+
|
212
|
+
path = output_path || '.'
|
213
|
+
|
214
|
+
filtered = split_on( file_name, field, options )
|
215
|
+
|
216
|
+
unless filtered.empty?
|
217
|
+
log :info, "Writing seperate streams to #{path}"
|
218
|
+
|
219
|
+
filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
|
220
|
+
|
221
|
+
filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# Split a file, whose field definition is represented by self,
|
226
|
+
# into seperate streams, based on one if it's fields.
|
227
|
+
#
|
228
|
+
# Returns a map of Field value => File def object
|
229
|
+
#
|
230
|
+
# We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
|
231
|
+
#
|
232
|
+
# Users can get at the raw line simply by calling the line() method on File Def object
|
233
|
+
#
|
234
|
+
# Options:
|
235
|
+
#
|
236
|
+
# :output_path => directory to write the individual streams files to
|
237
|
+
#
|
238
|
+
# :filter => Optional Regular Expression to act as filter be applid to the field.
|
239
|
+
# For example split by Ccy but filter to only include certain ccys pass
|
240
|
+
# filter => 'GBP|USD|EUR'
|
241
|
+
#
|
242
|
+
def split_on( file_name, field, options = {} )
|
243
|
+
|
244
|
+
regex = options[:filter] ? Regexp.new(options[:filter]) : nil
|
245
|
+
|
246
|
+
log :debug, "Using REGEX: #{regex.inspect}" if regex
|
247
|
+
|
248
|
+
filtered = {}
|
249
|
+
|
250
|
+
if( self.new.respond_to?(field) )
|
251
|
+
|
252
|
+
log :info, "Splitting on #{field}"
|
253
|
+
|
254
|
+
File.open( file_name ) do |t|
|
255
|
+
t.each do |line|
|
256
|
+
next unless(line && line.chomp!)
|
257
|
+
x = self.new(line)
|
258
|
+
|
259
|
+
value = x.send( field.to_sym ) # the actual field value from the specified field column
|
260
|
+
next if value.nil?
|
261
|
+
|
262
|
+
if( regex.nil? || value.match(regex) )
|
263
|
+
filtered[value] ? filtered[value] << x : filtered[value] = [x]
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
else
|
268
|
+
log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
|
269
|
+
end
|
270
|
+
|
271
|
+
if( options[:sort])
|
272
|
+
filtered.values.each( &:sort )
|
273
|
+
return filtered
|
274
|
+
end
|
275
|
+
return filtered
|
276
|
+
end
|
277
|
+
|
278
|
+
# Open and parse a file, replacing a value in the specfied field.
|
279
|
+
# Does not update the file itself. Does not write a new output file.
|
280
|
+
#
|
281
|
+
# Returns :
|
282
|
+
# 1) full collection of updated lines
|
283
|
+
# 2) collection of file def objects (self), with updated value.
|
284
|
+
#
|
285
|
+
# Finds values matching old_value in given map
|
286
|
+
#
|
287
|
+
# Replaces matches with new_value in map.
|
288
|
+
#
|
289
|
+
# Accepts more than one field, if files is either and array of strings
|
290
|
+
# or comma seperated list of fields.
|
291
|
+
#
|
292
|
+
def file_set_field_by_map( file_name, fields, value_map, regex = nil )
|
293
|
+
|
294
|
+
lines, objects = [],[]
|
295
|
+
|
296
|
+
if fields.is_a?(Array)
|
297
|
+
attribs = fields
|
298
|
+
else
|
299
|
+
attribs = "#{fields}".split(',')
|
300
|
+
end
|
301
|
+
|
302
|
+
attribs.collect! do |attrib|
|
303
|
+
raise BadConfigError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
|
304
|
+
end
|
305
|
+
|
306
|
+
log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
|
307
|
+
|
308
|
+
File.open( file_name ) do |t|
|
309
|
+
t.each do |line|
|
310
|
+
if line.chomp.empty?
|
311
|
+
lines << line
|
312
|
+
objects << self.new
|
313
|
+
next
|
314
|
+
end
|
315
|
+
x = self.new(line)
|
316
|
+
|
317
|
+
attribs.each do |a|
|
318
|
+
old_value = x.instance_variable_get( "@#{a}" )
|
319
|
+
x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
|
320
|
+
end
|
321
|
+
|
322
|
+
objects << x
|
323
|
+
lines << x.to_s
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
return lines, objects
|
328
|
+
end
|
329
|
+
end # END class methods
|
330
|
+
|
331
|
+
# Open and parse a file, replacing a value in the specfied field.
|
332
|
+
# Does not update the file itself. Does not write a new output file.
|
333
|
+
#
|
334
|
+
# Returns :
|
335
|
+
# 1) full collection of updated lines
|
336
|
+
# 2) collection of file def objects (self), with updated value.
|
337
|
+
#
|
338
|
+
# Finds values matching old_value, and also accepts an optional regex for more powerful
|
339
|
+
# matching strategies of values on the specfified field.
|
340
|
+
#
|
341
|
+
# Replaces matches with new_value.
|
342
|
+
#
|
343
|
+
# Accepts more than one field, if files is either and array of strings
|
344
|
+
# or comma seperated list of fields.
|
345
|
+
#
|
346
|
+
def file_set_field( file_name, field, old_value, new_value, regex = nil )
|
347
|
+
|
348
|
+
map = {old_value => new_value}
|
349
|
+
|
350
|
+
return file_set_field_by_map(file_name, field, map, regex)
|
351
|
+
end
|
352
|
+
|
353
|
+
end
|