ar_loader 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +9 -9
- data/README.markdown +268 -221
- data/Rakefile +76 -76
- data/lib/VERSION +1 -1
- data/lib/ar_loader.rb +87 -66
- data/lib/ar_loader/exceptions.rb +2 -0
- data/lib/{engine → ar_loader}/file_definitions.rb +353 -353
- data/lib/{engine → ar_loader}/mapping_file_definitions.rb +87 -87
- data/lib/ar_loader/method_detail.rb +257 -0
- data/lib/ar_loader/method_mapper.rb +213 -0
- data/lib/helpers/jruby/jexcel_file.rb +187 -0
- data/lib/{engine → helpers/jruby}/word.rb +79 -70
- data/lib/helpers/spree_helper.rb +85 -0
- data/lib/loaders/csv_loader.rb +87 -0
- data/lib/loaders/excel_loader.rb +132 -0
- data/lib/loaders/loader_base.rb +205 -73
- data/lib/loaders/spree/image_loader.rb +45 -41
- data/lib/loaders/spree/product_loader.rb +140 -91
- data/lib/to_b.rb +24 -24
- data/spec/csv_loader_spec.rb +27 -0
- data/spec/database.yml +19 -6
- data/spec/db/migrate/20110803201325_create_test_bed.rb +78 -0
- data/spec/excel_loader_spec.rb +113 -98
- data/spec/fixtures/BadAssociationName.xls +0 -0
- data/spec/fixtures/DemoNegativeTesting.xls +0 -0
- data/spec/fixtures/DemoTestModelAssoc.xls +0 -0
- data/spec/fixtures/ProjectsMultiCategories.xls +0 -0
- data/spec/fixtures/SimpleProjects.xls +0 -0
- data/spec/fixtures/SpreeProducts.xls +0 -0
- data/spec/fixtures/SpreeZoneExample.csv +5 -0
- data/spec/fixtures/SpreeZoneExample.xls +0 -0
- data/spec/loader_spec.rb +116 -0
- data/spec/logs/test.log +5000 -0
- data/spec/method_mapper_spec.rb +222 -0
- data/spec/models.rb +55 -0
- data/spec/spec_helper.rb +85 -18
- data/spec/spree_loader_spec.rb +223 -157
- data/tasks/config/seed_fu_product_template.erb +15 -15
- data/tasks/config/tidy_config.txt +12 -12
- data/tasks/db_tasks.rake +64 -64
- data/tasks/excel_loader.rake +63 -113
- data/tasks/file_tasks.rake +36 -37
- data/tasks/loader.rake +45 -0
- data/tasks/spree/image_load.rake +108 -107
- data/tasks/spree/product_loader.rake +49 -107
- data/tasks/word_to_seedfu.rake +166 -166
- metadata +66 -61
- data/lib/engine/jruby/jexcel_file.rb +0 -182
- data/lib/engine/jruby/method_mapper_excel.rb +0 -44
- data/lib/engine/method_detail.rb +0 -140
- data/lib/engine/method_mapper.rb +0 -157
- data/lib/engine/method_mapper_csv.rb +0 -28
- data/spec/db/migrate/20110803201325_create_testbed.rb +0 -25
data/Rakefile
CHANGED
@@ -1,76 +1,76 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'rake'
|
3
|
-
require 'rake/clean'
|
4
|
-
require 'rake/gempackagetask'
|
5
|
-
require 'rake/rdoctask'
|
6
|
-
require 'rake/testtask'
|
7
|
-
require "lib/ar_loader"
|
8
|
-
|
9
|
-
# Copyright:: (c) Autotelik Media Ltd 2011
|
10
|
-
# Author :: Tom Statter
|
11
|
-
# Date :: Aug 2010
|
12
|
-
#
|
13
|
-
# License:: MIT - Free, OpenSource
|
14
|
-
#
|
15
|
-
# Details:: Gem::Specification for Active Record Loader gem.
|
16
|
-
#
|
17
|
-
# Specifically enabled for uploading Spree products but easily
|
18
|
-
# extended to any AR model.
|
19
|
-
#
|
20
|
-
# Currently support direct access to Excel Spreedsheets via JRuby
|
21
|
-
#
|
22
|
-
# TODO - Switch for non JRuby Rubies, enable load via CSV file instead of Excel.
|
23
|
-
#
|
24
|
-
ArLoader::
|
25
|
-
|
26
|
-
spec = Gem::Specification.new do |s|
|
27
|
-
s.name = ArLoader.gem_name
|
28
|
-
s.version = ArLoader.gem_version
|
29
|
-
s.has_rdoc = true
|
30
|
-
s.extra_rdoc_files = ['README.markdown', 'LICENSE']
|
31
|
-
s.summary = 'File based loader for Active Record models'
|
32
|
-
s.description = 'A file based loader for Active Record models. Seed database directly from Excel/CSV. Includes rake support for Spree'
|
33
|
-
s.author = 'thomas statter'
|
34
|
-
s.email = 'rubygems@autotelik.co.uk'
|
35
|
-
s.date = DateTime.now.strftime("%Y-%m-%d")
|
36
|
-
s.homepage = %q{http://www.autotelik.co.uk}
|
37
|
-
|
38
|
-
# s.executables = ['your_executable_here']
|
39
|
-
s.files = %w(LICENSE README.markdown Rakefile) + Dir.glob("{lib,spec,tasks}/**/*")
|
40
|
-
s.require_path = "lib"
|
41
|
-
s.bindir = "bin"
|
42
|
-
end
|
43
|
-
|
44
|
-
Rake::GemPackageTask.new(spec) do |p|
|
45
|
-
p.gem_spec = spec
|
46
|
-
p.need_tar = true
|
47
|
-
p.need_zip = true
|
48
|
-
end
|
49
|
-
|
50
|
-
Rake::RDocTask.new do |rdoc|
|
51
|
-
files =['README.markdown', 'LICENSE', 'lib/**/*.rb']
|
52
|
-
rdoc.rdoc_files.add(files)
|
53
|
-
rdoc.main = "README.markdown" # page to start on
|
54
|
-
rdoc.title = "ARLoader Docs"
|
55
|
-
rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
|
56
|
-
rdoc.options << '--line-numbers'
|
57
|
-
end
|
58
|
-
|
59
|
-
Rake::TestTask.new do |t|
|
60
|
-
t.test_files = FileList['test/**/*.rb']
|
61
|
-
end
|
62
|
-
|
63
|
-
# Add in our own Tasks
|
64
|
-
|
65
|
-
# Long parameter lists so ensure rake -T produces nice wide output
|
66
|
-
ENV['RAKE_COLUMNS'] = '180'
|
67
|
-
|
68
|
-
desc 'Build gem and install in one step'
|
69
|
-
task :pik_install, :needs => [:gem] do |t, args|
|
70
|
-
|
71
|
-
puts "Installing version #{ArLoader.gem_version}"
|
72
|
-
|
73
|
-
gem = "#{ArLoader.gem_name}-#{ArLoader.gem_version}.gem"
|
74
|
-
cmd = "pik gem install --no-ri --no-rdoc pkg\\#{gem}"
|
75
|
-
system(cmd)
|
76
|
-
end
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/gempackagetask'
|
5
|
+
require 'rake/rdoctask'
|
6
|
+
require 'rake/testtask'
|
7
|
+
require "lib/ar_loader"
|
8
|
+
|
9
|
+
# Copyright:: (c) Autotelik Media Ltd 2011
|
10
|
+
# Author :: Tom Statter
|
11
|
+
# Date :: Aug 2010
|
12
|
+
#
|
13
|
+
# License:: MIT - Free, OpenSource
|
14
|
+
#
|
15
|
+
# Details:: Gem::Specification for Active Record Loader gem.
|
16
|
+
#
|
17
|
+
# Specifically enabled for uploading Spree products but easily
|
18
|
+
# extended to any AR model.
|
19
|
+
#
|
20
|
+
# Currently support direct access to Excel Spreedsheets via JRuby
|
21
|
+
#
|
22
|
+
# TODO - Switch for non JRuby Rubies, enable load via CSV file instead of Excel.
|
23
|
+
#
|
24
|
+
ArLoader::load_tasks
|
25
|
+
|
26
|
+
spec = Gem::Specification.new do |s|
|
27
|
+
s.name = ArLoader.gem_name
|
28
|
+
s.version = ArLoader.gem_version
|
29
|
+
s.has_rdoc = true
|
30
|
+
s.extra_rdoc_files = ['README.markdown', 'LICENSE']
|
31
|
+
s.summary = 'File based loader for Active Record models'
|
32
|
+
s.description = 'A file based loader for Active Record models. Seed database directly from Excel/CSV. Includes rake support for Spree'
|
33
|
+
s.author = 'thomas statter'
|
34
|
+
s.email = 'rubygems@autotelik.co.uk'
|
35
|
+
s.date = DateTime.now.strftime("%Y-%m-%d")
|
36
|
+
s.homepage = %q{http://www.autotelik.co.uk}
|
37
|
+
|
38
|
+
# s.executables = ['your_executable_here']
|
39
|
+
s.files = %w(LICENSE README.markdown Rakefile) + Dir.glob("{lib,spec,tasks}/**/*")
|
40
|
+
s.require_path = "lib"
|
41
|
+
s.bindir = "bin"
|
42
|
+
end
|
43
|
+
|
44
|
+
Rake::GemPackageTask.new(spec) do |p|
|
45
|
+
p.gem_spec = spec
|
46
|
+
p.need_tar = true
|
47
|
+
p.need_zip = true
|
48
|
+
end
|
49
|
+
|
50
|
+
Rake::RDocTask.new do |rdoc|
|
51
|
+
files =['README.markdown', 'LICENSE', 'lib/**/*.rb']
|
52
|
+
rdoc.rdoc_files.add(files)
|
53
|
+
rdoc.main = "README.markdown" # page to start on
|
54
|
+
rdoc.title = "ARLoader Docs"
|
55
|
+
rdoc.rdoc_dir = 'doc/rdoc' # rdoc output folder
|
56
|
+
rdoc.options << '--line-numbers'
|
57
|
+
end
|
58
|
+
|
59
|
+
Rake::TestTask.new do |t|
|
60
|
+
t.test_files = FileList['test/**/*.rb']
|
61
|
+
end
|
62
|
+
|
63
|
+
# Add in our own Tasks
|
64
|
+
|
65
|
+
# Long parameter lists so ensure rake -T produces nice wide output
|
66
|
+
ENV['RAKE_COLUMNS'] = '180'
|
67
|
+
|
68
|
+
desc 'Build gem and install in one step'
|
69
|
+
task :pik_install, :needs => [:gem] do |t, args|
|
70
|
+
|
71
|
+
puts "Installing version #{ArLoader.gem_version}"
|
72
|
+
|
73
|
+
gem = "#{ArLoader.gem_name}-#{ArLoader.gem_version}.gem"
|
74
|
+
cmd = "pik gem install --no-ri --no-rdoc pkg\\#{gem}"
|
75
|
+
system(cmd)
|
76
|
+
end
|
data/lib/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.8
|
data/lib/ar_loader.rb
CHANGED
@@ -1,66 +1,87 @@
|
|
1
|
-
# Copyright:: (c) Autotelik Media Ltd 2011
|
2
|
-
# Author :: Tom Statter
|
3
|
-
# Date :: Aug 2010
|
4
|
-
# License:: TBD. Free, Open Source. MIT ?
|
5
|
-
#
|
6
|
-
# Details:: Active Record Loader
|
7
|
-
#
|
8
|
-
require 'active_record'
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
end
|
65
|
-
|
66
|
-
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2011
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: Aug 2010
|
4
|
+
# License:: TBD. Free, Open Source. MIT ?
|
5
|
+
#
|
6
|
+
# Details:: Active Record Loader
|
7
|
+
#
|
8
|
+
require 'active_record'
|
9
|
+
require 'rbconfig'
|
10
|
+
|
11
|
+
module Guards
|
12
|
+
|
13
|
+
def self.jruby?
|
14
|
+
return RUBY_PLATFORM == "java"
|
15
|
+
end
|
16
|
+
def self.mac?
|
17
|
+
RbConfig::CONFIG['target_os'] =~ /darwin/i
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.linux?
|
21
|
+
RbConfig::CONFIG['target_os'] =~ /linux/i
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.windows?
|
25
|
+
RbConfig::CONFIG['target_os'] =~ /mswin|mingw/i
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
module ArLoader
|
31
|
+
|
32
|
+
def self.gem_version
|
33
|
+
@gem_version ||= File.read( File.join( root_path, 'lib', 'VERSION') ).chomp
|
34
|
+
@gem_version
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.gem_name
|
38
|
+
"ar_loader"
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.root_path
|
42
|
+
File.expand_path("#{File.dirname(__FILE__)}/..")
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
def self.require_libraries
|
47
|
+
|
48
|
+
loader_libs = %w{ lib }
|
49
|
+
|
50
|
+
# Base search paths - these will be searched recursively
|
51
|
+
loader_paths = []
|
52
|
+
|
53
|
+
loader_libs.each {|l| loader_paths << File.join(root_path(), l) }
|
54
|
+
|
55
|
+
# Define require search paths, any dir in here will be added to LOAD_PATH
|
56
|
+
|
57
|
+
loader_paths.each do |base|
|
58
|
+
$:.unshift base if File.directory?(base)
|
59
|
+
Dir[File.join(base, '**', '**')].each do |p|
|
60
|
+
if File.directory? p
|
61
|
+
$:.unshift p
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
require_libs = %w{ ar_loader loaders helpers }
|
67
|
+
|
68
|
+
require_libs.each do |base|
|
69
|
+
Dir[File.join('lib', base, '*.rb')].each do |rb|
|
70
|
+
unless File.directory? rb
|
71
|
+
require rb
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.load_tasks
|
79
|
+
# Long parameter lists so ensure rake -T produces nice wide output
|
80
|
+
ENV['RAKE_COLUMNS'] = '180'
|
81
|
+
base = File.join(root_path, 'tasks', '**')
|
82
|
+
Dir["#{base}/*.rake"].sort.each { |ext| load ext }
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
|
87
|
+
ArLoader::require_libraries
|
@@ -1,353 +1,353 @@
|
|
1
|
-
# Copyright:: (c) Autotelik Media Ltd 2011
|
2
|
-
# Author :: Tom Statter
|
3
|
-
# Date :: Jan 2011
|
4
|
-
# License:: MIT
|
5
|
-
#
|
6
|
-
# Details:: This module acts as helpers for defining input/output file formats as classes.
|
7
|
-
#
|
8
|
-
# It provides a simple interface to define a file structure - field by field.
|
9
|
-
#
|
10
|
-
# By defining the structure, following methods and attributes are mixed in :
|
11
|
-
#
|
12
|
-
# An attribute, with accessor for each field/column.
|
13
|
-
# Parse a line, assigning values to each attribute.
|
14
|
-
# Parse an instance of that file line by line, accepts a block in which data can be processed.
|
15
|
-
# Method to split a file by field.
|
16
|
-
# Method to perform replace operations on a file by field and value.
|
17
|
-
#
|
18
|
-
# Either delimited or a fixed width definition can be created via macro-like class methods :
|
19
|
-
#
|
20
|
-
# create_field_definition [field_list]
|
21
|
-
#
|
22
|
-
# create_fixed_definition {field => range }
|
23
|
-
#
|
24
|
-
# Member attributes, with getters and setters, can be added for each field defined above via class method :
|
25
|
-
#
|
26
|
-
# create_field_attr_accessors
|
27
|
-
#
|
28
|
-
# USAGE :
|
29
|
-
#
|
30
|
-
# Create a class that contains definition of a file.
|
31
|
-
#
|
32
|
-
# class ExampleFixedWith < FileDefinitionBase
|
33
|
-
# create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
|
34
|
-
#
|
35
|
-
# create_field_attr_accessors
|
36
|
-
# end
|
37
|
-
#
|
38
|
-
# class ExampleCSV < FileDefinitionBase
|
39
|
-
# create_field_definition %w{abc def ghi jkl}
|
40
|
-
#
|
41
|
-
# create_field_attr_accessors
|
42
|
-
# end
|
43
|
-
#
|
44
|
-
# Any instance can then be used to parse the defined file type, with each field or column value
|
45
|
-
# being assigned automatically to the associated instance variable.
|
46
|
-
#
|
47
|
-
# line = '1,2,3,4'
|
48
|
-
# x = ExampleCSV.new( line )
|
49
|
-
#
|
50
|
-
# assert x.responds_to? :jkl
|
51
|
-
# assert_equal x.abc, '1'
|
52
|
-
# assert_equal x.jkl.to_i, 4
|
53
|
-
#
|
54
|
-
module FileDefinitions
|
55
|
-
|
56
|
-
include Enumerable
|
57
|
-
|
58
|
-
attr_accessor :key
|
59
|
-
attr_accessor :current_line
|
60
|
-
|
61
|
-
# Set the delimiter to use when splitting a line - can be either a String, or a Regexp
|
62
|
-
attr_writer :field_delim
|
63
|
-
|
64
|
-
def initialize( line = nil )
|
65
|
-
@key = String.new
|
66
|
-
parse(line) unless line.nil?
|
67
|
-
end
|
68
|
-
|
69
|
-
def self.included(base)
|
70
|
-
base.extend(ClassMethods)
|
71
|
-
subclasses << base
|
72
|
-
end
|
73
|
-
|
74
|
-
def self.subclasses
|
75
|
-
@subclasses ||=[]
|
76
|
-
end
|
77
|
-
|
78
|
-
|
79
|
-
# Return the field delimiter used when splitting a line
|
80
|
-
def field_delim
|
81
|
-
@field_delim || ','
|
82
|
-
end
|
83
|
-
|
84
|
-
# Parse each line of a file based on the field definition, yields self for each successive line
|
85
|
-
#
|
86
|
-
def each( file )
|
87
|
-
File::new(file).each_line do |line|
|
88
|
-
parse( line )
|
89
|
-
yield self
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
def fields
|
94
|
-
@fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
|
95
|
-
@fields
|
96
|
-
end
|
97
|
-
|
98
|
-
def to_s
|
99
|
-
fields.join(',')
|
100
|
-
end
|
101
|
-
|
102
|
-
module ClassMethods
|
103
|
-
|
104
|
-
# Helper to generate methods to store and return the complete list of fields
|
105
|
-
# in this File definition (also creates member @field_definition) and parse a line.
|
106
|
-
#
|
107
|
-
# e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
|
108
|
-
#
|
109
|
-
def create_field_definition( *fields )
|
110
|
-
instance_eval <<-end_eval
|
111
|
-
@field_definition ||= %w{ #{fields.join(' ')} }
|
112
|
-
def field_definition
|
113
|
-
@field_definition
|
114
|
-
end
|
115
|
-
end_eval
|
116
|
-
|
117
|
-
class_eval <<-end_eval
|
118
|
-
def parse( line )
|
119
|
-
@current_line = line
|
120
|
-
before_parse if respond_to? :before_parse
|
121
|
-
@current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
|
122
|
-
after_parse if respond_to? :after_parse
|
123
|
-
generate_key if respond_to? :generate_key
|
124
|
-
end
|
125
|
-
end_eval
|
126
|
-
end
|
127
|
-
|
128
|
-
def add_field(field, add_accessor = true)
|
129
|
-
@field_definition ||= []
|
130
|
-
@field_definition << field.to_s
|
131
|
-
attr_accessor field if(add_accessor)
|
132
|
-
end
|
133
|
-
|
134
|
-
|
135
|
-
# Helper to generate methods that return the complete list of fixed width fields
|
136
|
-
# and associated ranges in this File definition, and parse a line.
|
137
|
-
# e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
|
138
|
-
#
|
139
|
-
def create_fixed_definition( field_range_map )
|
140
|
-
raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
|
141
|
-
|
142
|
-
keys = field_range_map.keys.collect(&:to_s)
|
143
|
-
string_map = Hash[*keys.zip(field_range_map.values).flatten]
|
144
|
-
|
145
|
-
instance_eval <<-end_eval
|
146
|
-
def fixed_definition
|
147
|
-
@fixed_definition ||= #{string_map.inspect}
|
148
|
-
@fixed_definition
|
149
|
-
end
|
150
|
-
end_eval
|
151
|
-
|
152
|
-
instance_eval <<-end_eval
|
153
|
-
def field_definition
|
154
|
-
@field_definition ||= %w{ #{keys.join(' ')} }
|
155
|
-
@field_definition
|
156
|
-
end
|
157
|
-
end_eval
|
158
|
-
|
159
|
-
class_eval <<-end_eval
|
160
|
-
def parse( line )
|
161
|
-
@current_line = line
|
162
|
-
before_parse if respond_to? :before_parse
|
163
|
-
self.class.fixed_definition.each do |key, range|
|
164
|
-
instance_variable_set(\"@\#{key}\", @current_line[range])
|
165
|
-
end
|
166
|
-
after_parse if respond_to? :after_parse
|
167
|
-
generate_key if respond_to? :generate_key
|
168
|
-
end
|
169
|
-
end_eval
|
170
|
-
|
171
|
-
end
|
172
|
-
|
173
|
-
# Create accessors for each field
|
174
|
-
def create_field_attr_accessors
|
175
|
-
self.field_definition.each {|f| attr_accessor f}
|
176
|
-
end
|
177
|
-
|
178
|
-
|
179
|
-
###############################
|
180
|
-
# PARSING + FILE MANIPULATION #
|
181
|
-
###############################
|
182
|
-
|
183
|
-
# Parse a complete file and return array of self, one per line
|
184
|
-
def parse_file( file, options = {} )
|
185
|
-
limit = options[:limit]
|
186
|
-
count = 0
|
187
|
-
lines = []
|
188
|
-
File::new(file).each_line do |line|
|
189
|
-
break if limit && ((count += 1) > limit)
|
190
|
-
lines << self.new( line )
|
191
|
-
end
|
192
|
-
lines
|
193
|
-
end
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
# Split a file, whose field definition is represented by self,
|
198
|
-
# into seperate streams, based on the values of one if it's fields.
|
199
|
-
#
|
200
|
-
# Writes the results, one file per split stream, to directory specified by output_path
|
201
|
-
#
|
202
|
-
# Options:
|
203
|
-
#
|
204
|
-
# :keys => Also write split files of the key fields
|
205
|
-
#
|
206
|
-
# :filter => Optional Regular Expression to act as filter be applid to the field.
|
207
|
-
# For example split by Ccy but filter to only include certain ccys pass
|
208
|
-
# filter => '[GBP|USD]'
|
209
|
-
#
|
210
|
-
def split_on_write( file_name, field, output_path, options = {} )
|
211
|
-
|
212
|
-
path = output_path || '.'
|
213
|
-
|
214
|
-
filtered = split_on( file_name, field, options )
|
215
|
-
|
216
|
-
unless filtered.empty?
|
217
|
-
log :info, "Writing seperate streams to #{path}"
|
218
|
-
|
219
|
-
filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
|
220
|
-
|
221
|
-
filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
|
222
|
-
end
|
223
|
-
end
|
224
|
-
|
225
|
-
# Split a file, whose field definition is represented by self,
|
226
|
-
# into seperate streams, based on one if it's fields.
|
227
|
-
#
|
228
|
-
# Returns a map of Field value => File def object
|
229
|
-
#
|
230
|
-
# We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
|
231
|
-
#
|
232
|
-
# Users can get at the raw line simply by calling the line() method on File Def object
|
233
|
-
#
|
234
|
-
# Options:
|
235
|
-
#
|
236
|
-
# :output_path => directory to write the individual streams files to
|
237
|
-
#
|
238
|
-
# :filter => Optional Regular Expression to act as filter be applid to the field.
|
239
|
-
# For example split by Ccy but filter to only include certain ccys pass
|
240
|
-
# filter => 'GBP|USD|EUR'
|
241
|
-
#
|
242
|
-
def split_on( file_name, field, options = {} )
|
243
|
-
|
244
|
-
regex = options[:filter] ? Regexp.new(options[:filter]) : nil
|
245
|
-
|
246
|
-
log :debug, "Using REGEX: #{regex.inspect}" if regex
|
247
|
-
|
248
|
-
filtered = {}
|
249
|
-
|
250
|
-
if( self.new.respond_to?(field) )
|
251
|
-
|
252
|
-
log :info, "Splitting on #{field}"
|
253
|
-
|
254
|
-
File.open( file_name ) do |t|
|
255
|
-
t.each do |line|
|
256
|
-
next unless(line && line.chomp!)
|
257
|
-
x = self.new(line)
|
258
|
-
|
259
|
-
value = x.send( field.to_sym ) # the actual field value from the specified field column
|
260
|
-
next if value.nil?
|
261
|
-
|
262
|
-
if( regex.nil? || value.match(regex) )
|
263
|
-
filtered[value] ? filtered[value] << x : filtered[value] = [x]
|
264
|
-
end
|
265
|
-
end
|
266
|
-
end
|
267
|
-
else
|
268
|
-
log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
|
269
|
-
end
|
270
|
-
|
271
|
-
if( options[:sort])
|
272
|
-
filtered.values.each( &:sort )
|
273
|
-
return filtered
|
274
|
-
end
|
275
|
-
return filtered
|
276
|
-
end
|
277
|
-
|
278
|
-
# Open and parse a file, replacing a value in the specfied field.
|
279
|
-
# Does not update the file itself. Does not write a new output file.
|
280
|
-
#
|
281
|
-
# Returns :
|
282
|
-
# 1) full collection of updated lines
|
283
|
-
# 2) collection of file def objects (self), with updated value.
|
284
|
-
#
|
285
|
-
# Finds values matching old_value in given map
|
286
|
-
#
|
287
|
-
# Replaces matches with new_value in map.
|
288
|
-
#
|
289
|
-
# Accepts more than one field, if files is either and array of strings
|
290
|
-
# or comma seperated list of fields.
|
291
|
-
#
|
292
|
-
def file_set_field_by_map( file_name, fields, value_map, regex = nil )
|
293
|
-
|
294
|
-
lines, objects = [],[]
|
295
|
-
|
296
|
-
if fields.is_a?(Array)
|
297
|
-
attribs = fields
|
298
|
-
else
|
299
|
-
attribs = "#{fields}".split(',')
|
300
|
-
end
|
301
|
-
|
302
|
-
attribs.collect! do |attrib|
|
303
|
-
raise BadConfigError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
|
304
|
-
end
|
305
|
-
|
306
|
-
log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
|
307
|
-
|
308
|
-
File.open( file_name ) do |t|
|
309
|
-
t.each do |line|
|
310
|
-
if line.chomp.empty?
|
311
|
-
lines << line
|
312
|
-
objects << self.new
|
313
|
-
next
|
314
|
-
end
|
315
|
-
x = self.new(line)
|
316
|
-
|
317
|
-
attribs.each do |a|
|
318
|
-
old_value = x.instance_variable_get( "@#{a}" )
|
319
|
-
x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
|
320
|
-
end
|
321
|
-
|
322
|
-
objects << x
|
323
|
-
lines << x.to_s
|
324
|
-
end
|
325
|
-
end
|
326
|
-
|
327
|
-
return lines, objects
|
328
|
-
end
|
329
|
-
end # END class methods
|
330
|
-
|
331
|
-
# Open and parse a file, replacing a value in the specfied field.
|
332
|
-
# Does not update the file itself. Does not write a new output file.
|
333
|
-
#
|
334
|
-
# Returns :
|
335
|
-
# 1) full collection of updated lines
|
336
|
-
# 2) collection of file def objects (self), with updated value.
|
337
|
-
#
|
338
|
-
# Finds values matching old_value, and also accepts an optional regex for more powerful
|
339
|
-
# matching strategies of values on the specfified field.
|
340
|
-
#
|
341
|
-
# Replaces matches with new_value.
|
342
|
-
#
|
343
|
-
# Accepts more than one field, if files is either and array of strings
|
344
|
-
# or comma seperated list of fields.
|
345
|
-
#
|
346
|
-
def file_set_field( file_name, field, old_value, new_value, regex = nil )
|
347
|
-
|
348
|
-
map = {old_value => new_value}
|
349
|
-
|
350
|
-
return file_set_field_by_map(file_name, field, map, regex)
|
351
|
-
end
|
352
|
-
|
353
|
-
end
|
1
|
+
# Copyright:: (c) Autotelik Media Ltd 2011
|
2
|
+
# Author :: Tom Statter
|
3
|
+
# Date :: Jan 2011
|
4
|
+
# License:: MIT
|
5
|
+
#
|
6
|
+
# Details:: This module acts as helpers for defining input/output file formats as classes.
|
7
|
+
#
|
8
|
+
# It provides a simple interface to define a file structure - field by field.
|
9
|
+
#
|
10
|
+
# By defining the structure, following methods and attributes are mixed in :
|
11
|
+
#
|
12
|
+
# An attribute, with accessor for each field/column.
|
13
|
+
# Parse a line, assigning values to each attribute.
|
14
|
+
# Parse an instance of that file line by line, accepts a block in which data can be processed.
|
15
|
+
# Method to split a file by field.
|
16
|
+
# Method to perform replace operations on a file by field and value.
|
17
|
+
#
|
18
|
+
# Either delimited or a fixed width definition can be created via macro-like class methods :
|
19
|
+
#
|
20
|
+
# create_field_definition [field_list]
|
21
|
+
#
|
22
|
+
# create_fixed_definition {field => range }
|
23
|
+
#
|
24
|
+
# Member attributes, with getters and setters, can be added for each field defined above via class method :
|
25
|
+
#
|
26
|
+
# create_field_attr_accessors
|
27
|
+
#
|
28
|
+
# USAGE :
|
29
|
+
#
|
30
|
+
# Create a class that contains definition of a file.
|
31
|
+
#
|
32
|
+
# class ExampleFixedWith < FileDefinitionBase
|
33
|
+
# create_fixed_definition(:name => (0..7), :value => (8..15), :ccy => (16..18), :dr_or_cr => (19..19) )
|
34
|
+
#
|
35
|
+
# create_field_attr_accessors
|
36
|
+
# end
|
37
|
+
#
|
38
|
+
# class ExampleCSV < FileDefinitionBase
|
39
|
+
# create_field_definition %w{abc def ghi jkl}
|
40
|
+
#
|
41
|
+
# create_field_attr_accessors
|
42
|
+
# end
|
43
|
+
#
|
44
|
+
# Any instance can then be used to parse the defined file type, with each field or column value
|
45
|
+
# being assigned automatically to the associated instance variable.
|
46
|
+
#
|
47
|
+
# line = '1,2,3,4'
|
48
|
+
# x = ExampleCSV.new( line )
|
49
|
+
#
|
50
|
+
# assert x.responds_to? :jkl
|
51
|
+
# assert_equal x.abc, '1'
|
52
|
+
# assert_equal x.jkl.to_i, 4
|
53
|
+
#
|
54
|
+
module FileDefinitions
|
55
|
+
|
56
|
+
include Enumerable
|
57
|
+
|
58
|
+
attr_accessor :key
|
59
|
+
attr_accessor :current_line
|
60
|
+
|
61
|
+
# Set the delimiter to use when splitting a line - can be either a String, or a Regexp
|
62
|
+
attr_writer :field_delim
|
63
|
+
|
64
|
+
def initialize( line = nil )
|
65
|
+
@key = String.new
|
66
|
+
parse(line) unless line.nil?
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.included(base)
|
70
|
+
base.extend(ClassMethods)
|
71
|
+
subclasses << base
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.subclasses
|
75
|
+
@subclasses ||=[]
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
# Return the field delimiter used when splitting a line
|
80
|
+
def field_delim
|
81
|
+
@field_delim || ','
|
82
|
+
end
|
83
|
+
|
84
|
+
# Parse each line of a file based on the field definition, yields self for each successive line
|
85
|
+
#
|
86
|
+
def each( file )
|
87
|
+
File::new(file).each_line do |line|
|
88
|
+
parse( line )
|
89
|
+
yield self
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def fields
|
94
|
+
@fields = self.class.field_definition.collect {|f| instance_variable_get "@#{f}" }
|
95
|
+
@fields
|
96
|
+
end
|
97
|
+
|
98
|
+
def to_s
|
99
|
+
fields.join(',')
|
100
|
+
end
|
101
|
+
|
102
|
+
module ClassMethods
|
103
|
+
|
104
|
+
# Helper to generate methods to store and return the complete list of fields
|
105
|
+
# in this File definition (also creates member @field_definition) and parse a line.
|
106
|
+
#
|
107
|
+
# e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
|
108
|
+
#
|
109
|
+
def create_field_definition( *fields )
|
110
|
+
instance_eval <<-end_eval
|
111
|
+
@field_definition ||= %w{ #{fields.join(' ')} }
|
112
|
+
def field_definition
|
113
|
+
@field_definition
|
114
|
+
end
|
115
|
+
end_eval
|
116
|
+
|
117
|
+
class_eval <<-end_eval
|
118
|
+
def parse( line )
|
119
|
+
@current_line = line
|
120
|
+
before_parse if respond_to? :before_parse
|
121
|
+
@current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
|
122
|
+
after_parse if respond_to? :after_parse
|
123
|
+
generate_key if respond_to? :generate_key
|
124
|
+
end
|
125
|
+
end_eval
|
126
|
+
end
|
127
|
+
|
128
|
+
def add_field(field, add_accessor = true)
|
129
|
+
@field_definition ||= []
|
130
|
+
@field_definition << field.to_s
|
131
|
+
attr_accessor field if(add_accessor)
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
# Helper to generate methods that return the complete list of fixed width fields
|
136
|
+
# and associated ranges in this File definition, and parse a line.
|
137
|
+
# e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }
|
138
|
+
#
|
139
|
+
def create_fixed_definition( field_range_map )
|
140
|
+
raise ArgumentError.new('Please supply hash to create_fixed_definition') unless field_range_map.is_a? Hash
|
141
|
+
|
142
|
+
keys = field_range_map.keys.collect(&:to_s)
|
143
|
+
string_map = Hash[*keys.zip(field_range_map.values).flatten]
|
144
|
+
|
145
|
+
instance_eval <<-end_eval
|
146
|
+
def fixed_definition
|
147
|
+
@fixed_definition ||= #{string_map.inspect}
|
148
|
+
@fixed_definition
|
149
|
+
end
|
150
|
+
end_eval
|
151
|
+
|
152
|
+
instance_eval <<-end_eval
|
153
|
+
def field_definition
|
154
|
+
@field_definition ||= %w{ #{keys.join(' ')} }
|
155
|
+
@field_definition
|
156
|
+
end
|
157
|
+
end_eval
|
158
|
+
|
159
|
+
class_eval <<-end_eval
|
160
|
+
def parse( line )
|
161
|
+
@current_line = line
|
162
|
+
before_parse if respond_to? :before_parse
|
163
|
+
self.class.fixed_definition.each do |key, range|
|
164
|
+
instance_variable_set(\"@\#{key}\", @current_line[range])
|
165
|
+
end
|
166
|
+
after_parse if respond_to? :after_parse
|
167
|
+
generate_key if respond_to? :generate_key
|
168
|
+
end
|
169
|
+
end_eval
|
170
|
+
|
171
|
+
end
|
172
|
+
|
173
|
+
# Create accessors for each field
|
174
|
+
def create_field_attr_accessors
|
175
|
+
self.field_definition.each {|f| attr_accessor f}
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
###############################
|
180
|
+
# PARSING + FILE MANIPULATION #
|
181
|
+
###############################
|
182
|
+
|
183
|
+
# Parse a complete file and return array of self, one per line
|
184
|
+
def parse_file( file, options = {} )
|
185
|
+
limit = options[:limit]
|
186
|
+
count = 0
|
187
|
+
lines = []
|
188
|
+
File::new(file).each_line do |line|
|
189
|
+
break if limit && ((count += 1) > limit)
|
190
|
+
lines << self.new( line )
|
191
|
+
end
|
192
|
+
lines
|
193
|
+
end
|
194
|
+
|
195
|
+
|
196
|
+
|
197
|
+
# Split a file, whose field definition is represented by self,
|
198
|
+
# into seperate streams, based on the values of one if it's fields.
|
199
|
+
#
|
200
|
+
# Writes the results, one file per split stream, to directory specified by output_path
|
201
|
+
#
|
202
|
+
# Options:
|
203
|
+
#
|
204
|
+
# :keys => Also write split files of the key fields
|
205
|
+
#
|
206
|
+
# :filter => Optional Regular Expression to act as filter be applid to the field.
|
207
|
+
# For example split by Ccy but filter to only include certain ccys pass
|
208
|
+
# filter => '[GBP|USD]'
|
209
|
+
#
|
210
|
+
def split_on_write( file_name, field, output_path, options = {} )
|
211
|
+
|
212
|
+
path = output_path || '.'
|
213
|
+
|
214
|
+
filtered = split_on( file_name, field, options )
|
215
|
+
|
216
|
+
unless filtered.empty?
|
217
|
+
log :info, "Writing seperate streams to #{path}"
|
218
|
+
|
219
|
+
filtered.each { |strm, objects| RecsBase::write( {"keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n")}, path) } if(options.key?(:keys))
|
220
|
+
|
221
|
+
filtered.each { |strm, objects| RecsBase::write( {"#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n")}, path) }
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# Split a file, whose field definition is represented by self,
|
226
|
+
# into seperate streams, based on one if it's fields.
|
227
|
+
#
|
228
|
+
# Returns a map of Field value => File def object
|
229
|
+
#
|
230
|
+
# We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.
|
231
|
+
#
|
232
|
+
# Users can get at the raw line simply by calling the line() method on File Def object
|
233
|
+
#
|
234
|
+
# Options:
|
235
|
+
#
|
236
|
+
# :output_path => directory to write the individual streams files to
|
237
|
+
#
|
238
|
+
# :filter => Optional Regular Expression to act as filter be applid to the field.
|
239
|
+
# For example split by Ccy but filter to only include certain ccys pass
|
240
|
+
# filter => 'GBP|USD|EUR'
|
241
|
+
#
|
242
|
+
def split_on( file_name, field, options = {} )
|
243
|
+
|
244
|
+
regex = options[:filter] ? Regexp.new(options[:filter]) : nil
|
245
|
+
|
246
|
+
log :debug, "Using REGEX: #{regex.inspect}" if regex
|
247
|
+
|
248
|
+
filtered = {}
|
249
|
+
|
250
|
+
if( self.new.respond_to?(field) )
|
251
|
+
|
252
|
+
log :info, "Splitting on #{field}"
|
253
|
+
|
254
|
+
File.open( file_name ) do |t|
|
255
|
+
t.each do |line|
|
256
|
+
next unless(line && line.chomp!)
|
257
|
+
x = self.new(line)
|
258
|
+
|
259
|
+
value = x.send( field.to_sym ) # the actual field value from the specified field column
|
260
|
+
next if value.nil?
|
261
|
+
|
262
|
+
if( regex.nil? || value.match(regex) )
|
263
|
+
filtered[value] ? filtered[value] << x : filtered[value] = [x]
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
else
|
268
|
+
log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
|
269
|
+
end
|
270
|
+
|
271
|
+
if( options[:sort])
|
272
|
+
filtered.values.each( &:sort )
|
273
|
+
return filtered
|
274
|
+
end
|
275
|
+
return filtered
|
276
|
+
end
|
277
|
+
|
278
|
+
# Open and parse a file, replacing a value in the specfied field.
|
279
|
+
# Does not update the file itself. Does not write a new output file.
|
280
|
+
#
|
281
|
+
# Returns :
|
282
|
+
# 1) full collection of updated lines
|
283
|
+
# 2) collection of file def objects (self), with updated value.
|
284
|
+
#
|
285
|
+
# Finds values matching old_value in given map
|
286
|
+
#
|
287
|
+
# Replaces matches with new_value in map.
|
288
|
+
#
|
289
|
+
# Accepts more than one field, if files is either and array of strings
|
290
|
+
# or comma seperated list of fields.
|
291
|
+
#
|
292
|
+
def file_set_field_by_map( file_name, fields, value_map, regex = nil )
|
293
|
+
|
294
|
+
lines, objects = [],[]
|
295
|
+
|
296
|
+
if fields.is_a?(Array)
|
297
|
+
attribs = fields
|
298
|
+
else
|
299
|
+
attribs = "#{fields}".split(',')
|
300
|
+
end
|
301
|
+
|
302
|
+
attribs.collect! do |attrib|
|
303
|
+
raise BadConfigError.new("Field: #{attrib} is not a field on #{self.class.name}") unless self.new.respond_to?(attrib)
|
304
|
+
end
|
305
|
+
|
306
|
+
log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"
|
307
|
+
|
308
|
+
File.open( file_name ) do |t|
|
309
|
+
t.each do |line|
|
310
|
+
if line.chomp.empty?
|
311
|
+
lines << line
|
312
|
+
objects << self.new
|
313
|
+
next
|
314
|
+
end
|
315
|
+
x = self.new(line)
|
316
|
+
|
317
|
+
attribs.each do |a|
|
318
|
+
old_value = x.instance_variable_get( "@#{a}" )
|
319
|
+
x.instance_variable_set( "@#{a}", value_map[old_value] ) if value_map[old_value] || (regex && old_value.keys.detect {|k| k.match(regx) })
|
320
|
+
end
|
321
|
+
|
322
|
+
objects << x
|
323
|
+
lines << x.to_s
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
return lines, objects
|
328
|
+
end
|
329
|
+
end # END class methods
|
330
|
+
|
331
|
+
# Open and parse a file, replacing a value in the specfied field.
|
332
|
+
# Does not update the file itself. Does not write a new output file.
|
333
|
+
#
|
334
|
+
# Returns :
|
335
|
+
# 1) full collection of updated lines
|
336
|
+
# 2) collection of file def objects (self), with updated value.
|
337
|
+
#
|
338
|
+
# Finds values matching old_value, and also accepts an optional regex for more powerful
|
339
|
+
# matching strategies of values on the specfified field.
|
340
|
+
#
|
341
|
+
# Replaces matches with new_value.
|
342
|
+
#
|
343
|
+
# Accepts more than one field, if files is either and array of strings
|
344
|
+
# or comma seperated list of fields.
|
345
|
+
#
|
346
|
+
def file_set_field( file_name, field, old_value, new_value, regex = nil )
|
347
|
+
|
348
|
+
map = {old_value => new_value}
|
349
|
+
|
350
|
+
return file_set_field_by_map(file_name, field, map, regex)
|
351
|
+
end
|
352
|
+
|
353
|
+
end
|