caruby-core 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/LEGAL +5 -0
- data/LICENSE +22 -0
- data/README.md +51 -0
- data/doc/website/css/site.css +1 -5
- data/doc/website/images/avatar.png +0 -0
- data/doc/website/images/favicon.ico +0 -0
- data/doc/website/images/logo.png +0 -0
- data/doc/website/index.html +82 -0
- data/doc/website/install.html +87 -0
- data/doc/website/quick_start.html +87 -0
- data/doc/website/tissue.html +85 -0
- data/doc/website/uom.html +10 -0
- data/lib/caruby.rb +3 -0
- data/lib/caruby/active_support/README.txt +2 -0
- data/lib/caruby/active_support/core_ext/string.rb +7 -0
- data/lib/caruby/active_support/core_ext/string/inflections.rb +167 -0
- data/lib/caruby/active_support/inflections.rb +55 -0
- data/lib/caruby/active_support/inflector.rb +398 -0
- data/lib/caruby/cli/application.rb +36 -0
- data/lib/caruby/cli/command.rb +169 -0
- data/lib/caruby/csv/csv_mapper.rb +157 -0
- data/lib/caruby/csv/csvio.rb +185 -0
- data/lib/caruby/database.rb +252 -0
- data/lib/caruby/database/fetched_matcher.rb +66 -0
- data/lib/caruby/database/persistable.rb +432 -0
- data/lib/caruby/database/persistence_service.rb +162 -0
- data/lib/caruby/database/reader.rb +599 -0
- data/lib/caruby/database/saved_merger.rb +131 -0
- data/lib/caruby/database/search_template_builder.rb +59 -0
- data/lib/caruby/database/sql_executor.rb +75 -0
- data/lib/caruby/database/store_template_builder.rb +200 -0
- data/lib/caruby/database/writer.rb +469 -0
- data/lib/caruby/domain/annotatable.rb +25 -0
- data/lib/caruby/domain/annotation.rb +23 -0
- data/lib/caruby/domain/attribute_metadata.rb +447 -0
- data/lib/caruby/domain/java_attribute_metadata.rb +160 -0
- data/lib/caruby/domain/merge.rb +91 -0
- data/lib/caruby/domain/properties.rb +95 -0
- data/lib/caruby/domain/reference_visitor.rb +289 -0
- data/lib/caruby/domain/resource_attributes.rb +528 -0
- data/lib/caruby/domain/resource_dependency.rb +205 -0
- data/lib/caruby/domain/resource_introspection.rb +159 -0
- data/lib/caruby/domain/resource_metadata.rb +117 -0
- data/lib/caruby/domain/resource_module.rb +285 -0
- data/lib/caruby/domain/uniquify.rb +38 -0
- data/lib/caruby/import/annotatable_class.rb +28 -0
- data/lib/caruby/import/annotation_class.rb +27 -0
- data/lib/caruby/import/annotation_module.rb +67 -0
- data/lib/caruby/import/java.rb +338 -0
- data/lib/caruby/migration/migratable.rb +167 -0
- data/lib/caruby/migration/migrator.rb +533 -0
- data/lib/caruby/migration/resource.rb +8 -0
- data/lib/caruby/migration/resource_module.rb +11 -0
- data/lib/caruby/migration/uniquify.rb +20 -0
- data/lib/caruby/resource.rb +969 -0
- data/lib/caruby/util/attribute_path.rb +46 -0
- data/lib/caruby/util/cache.rb +53 -0
- data/lib/caruby/util/class.rb +99 -0
- data/lib/caruby/util/collection.rb +1053 -0
- data/lib/caruby/util/controlled_value.rb +35 -0
- data/lib/caruby/util/coordinate.rb +75 -0
- data/lib/caruby/util/domain_extent.rb +49 -0
- data/lib/caruby/util/file_separator.rb +65 -0
- data/lib/caruby/util/inflector.rb +20 -0
- data/lib/caruby/util/log.rb +95 -0
- data/lib/caruby/util/math.rb +12 -0
- data/lib/caruby/util/merge.rb +59 -0
- data/lib/caruby/util/module.rb +34 -0
- data/lib/caruby/util/options.rb +92 -0
- data/lib/caruby/util/partial_order.rb +36 -0
- data/lib/caruby/util/person.rb +119 -0
- data/lib/caruby/util/pretty_print.rb +184 -0
- data/lib/caruby/util/properties.rb +112 -0
- data/lib/caruby/util/stopwatch.rb +66 -0
- data/lib/caruby/util/topological_sync_enumerator.rb +53 -0
- data/lib/caruby/util/transitive_closure.rb +45 -0
- data/lib/caruby/util/tree.rb +48 -0
- data/lib/caruby/util/trie.rb +37 -0
- data/lib/caruby/util/uniquifier.rb +30 -0
- data/lib/caruby/util/validation.rb +48 -0
- data/lib/caruby/util/version.rb +56 -0
- data/lib/caruby/util/visitor.rb +351 -0
- data/lib/caruby/util/weak_hash.rb +36 -0
- data/lib/caruby/version.rb +3 -0
- metadata +186 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'caruby/util/log'
|
3
|
+
|
4
|
+
module CaRuby
|
5
|
+
module CLI
|
6
|
+
# Extends the standard Logger::Application to use the {CaRuby::Log} and add start
|
7
|
+
# functionality.
|
8
|
+
class Application < Logger::Application
|
9
|
+
# @param [String] appname the application name
|
10
|
+
def initialize(appname=nil)
|
11
|
+
super(appname)
|
12
|
+
@log = Log::instance.logger
|
13
|
+
@log.progname = @appname
|
14
|
+
@level = @log.level
|
15
|
+
end
|
16
|
+
|
17
|
+
# Overrides Logger::Application start with the following enhancements:
|
18
|
+
# * pass arguments and a block to the application run method
|
19
|
+
# * improve the output messages
|
20
|
+
# * print an exception to stderr as well as the log
|
21
|
+
def start(*args, &block)
|
22
|
+
# Adapted from Logger.
|
23
|
+
status = 1
|
24
|
+
begin
|
25
|
+
log(INFO, "Starting #{@appname}...")
|
26
|
+
status = run(*args, &block)
|
27
|
+
rescue
|
28
|
+
log(FATAL, "#{@appname} detected an exception: #{$!}\n#{$@.qp}")
|
29
|
+
$stderr.puts "#{@appname} was unsuccessful: #{$!}.\nSee the log #{Log.instance.file} for more information."
|
30
|
+
ensure
|
31
|
+
log(INFO, "#{@appname} completed with status #{status}.")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'caruby/cli/application'
|
3
|
+
|
4
|
+
module CaRuby
|
5
|
+
module CLI
|
6
|
+
# Command-line parsing errors.
|
7
|
+
class CommandError < StandardError; end
|
8
|
+
|
9
|
+
# Command-line parser and executor.
|
10
|
+
class Command < Application
|
11
|
+
# Command line application wrapper.
|
12
|
+
# The specs parameter is an array of command line option and argument
|
13
|
+
# specifications as follows:
|
14
|
+
#
|
15
|
+
# Each option specification is an array in the form:
|
16
|
+
# [:option, short, long, class, description]
|
17
|
+
# where:
|
18
|
+
# * :option is the option symbol, e.g. +:output+
|
19
|
+
# * short is the short option form, e.g. "-o"
|
20
|
+
# * long is the long option form, e.g. "--output FILE"
|
21
|
+
# * class is the option value class, e.g. Integer
|
22
|
+
# * description is the option usage, e.g. "Output file"
|
23
|
+
# The :option, long and description items are required; the short and class items can
|
24
|
+
# be omitted.
|
25
|
+
#
|
26
|
+
# Each command line argument specification is an array in the form:
|
27
|
+
# [:arg, text]
|
28
|
+
# where:
|
29
|
+
# * :arg is the argument symbol, e.g. +:input+
|
30
|
+
# * text is the usage message text, e.g. 'input', '[input]' or 'input ...'
|
31
|
+
# Both items are required.
|
32
|
+
#
|
33
|
+
# Built-in options include the following:
|
34
|
+
# * --help : print the help message and exit
|
35
|
+
# * --version : print the version and exit
|
36
|
+
# * --log FILE : log file
|
37
|
+
# * --debug : print debug messages to the log
|
38
|
+
# * --file FILE: configuration file containing other options
|
39
|
+
# This class processes these built-in options, with the exception of +--version+,
|
40
|
+
# which is a subclass responsibility. Subclasses are responsible for
|
41
|
+
# processing any remaining options.
|
42
|
+
#
|
43
|
+
# @param [(<Symbol>, <String, Class>)] specs the arguments and options
|
44
|
+
# described above
|
45
|
+
# @yield [hash] the command executor
|
46
|
+
# @yieldparam [{Symbol => Object}] the argument and option symbol => value hash
|
47
|
+
def initialize(specs=[], &executor)
|
48
|
+
unless block_given? then
|
49
|
+
raise ArgumentError.new("Command #{self.class} is missing the required execution block" )
|
50
|
+
end
|
51
|
+
@executor = executor
|
52
|
+
@opt_specs, @arg_specs = specs.partition { |spec| spec[1][0, 1] == '-' }
|
53
|
+
@opt_specs.concat(DEF_OPTS)
|
54
|
+
super($0)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Runs this command by calling the +execute+ method on the parsed command line
|
58
|
+
# option or argument symbol => value hash.
|
59
|
+
def run
|
60
|
+
# the option => value hash
|
61
|
+
hash = get_opts
|
62
|
+
# this base class's options
|
63
|
+
handle_caruby_options(hash)
|
64
|
+
# add the argument => value hash
|
65
|
+
hash.merge!(get_args)
|
66
|
+
# call the block
|
67
|
+
@executor.call(hash)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Collects the command line options.
|
71
|
+
#
|
72
|
+
# @return [{Symbol => Object}] the option => value hash
|
73
|
+
def get_opts
|
74
|
+
opts = {}
|
75
|
+
# the option parser
|
76
|
+
OptionParser.new do |parser|
|
77
|
+
arg_s = @arg_specs.map { |spec| spec[1] }.join(' ')
|
78
|
+
parser.banner = "Usage: #{parser.program_name} [options] #{arg_s}"
|
79
|
+
parser.separator ""
|
80
|
+
parser.separator "Options:"
|
81
|
+
opts = parse(parser)
|
82
|
+
@usage = parser.help
|
83
|
+
end
|
84
|
+
opts
|
85
|
+
end
|
86
|
+
|
87
|
+
# Collects the non-option command line arguments.
|
88
|
+
#
|
89
|
+
# @return [{Symbol => Object}] the argument => value hash
|
90
|
+
def get_args
|
91
|
+
return Hash::EMPTY_HASH if ARGV.empty?
|
92
|
+
args = {}
|
93
|
+
n = [ARGV.size, @arg_specs.size - 1].min
|
94
|
+
n.times { |i| args[@arg_specs[i].first] = ARGV[i] }
|
95
|
+
if n < ARGV.size then
|
96
|
+
arg, form = @arg_specs.last
|
97
|
+
if form.index('...') then
|
98
|
+
args[arg] = ARGV[n..-1]
|
99
|
+
elsif @arg_specs.size == ARGV.size then
|
100
|
+
args[arg] = ARGV[n]
|
101
|
+
else
|
102
|
+
halt("Too many arguments", 1)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
args
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
DEF_OPTS = [
|
111
|
+
[:help, "--help", "Displays this help message"],
|
112
|
+
[:file, "--file FILE", "Configuration file containing other options"],
|
113
|
+
[:log, "--log FILE", "Log file"],
|
114
|
+
[:debug, "--debug", "Displays debug log messages"],
|
115
|
+
]
|
116
|
+
|
117
|
+
# @param [OptionParser] parser the option parser
|
118
|
+
# @return [{Symbol => Object}] the option => value hash
|
119
|
+
def parse(parser)
|
120
|
+
opts = {}
|
121
|
+
@opt_specs.each do |opt, *spec|
|
122
|
+
parser.on_tail(*spec) { |v| opts[opt] = v }
|
123
|
+
end
|
124
|
+
# build the option => value hash
|
125
|
+
parser.parse!
|
126
|
+
opts
|
127
|
+
end
|
128
|
+
|
129
|
+
# Processes the built-in options.
|
130
|
+
#
|
131
|
+
# @param [{Symbol => Object}] the option => value hash
|
132
|
+
def handle_caruby_options(opts)
|
133
|
+
# if help, then print usage and exit
|
134
|
+
if opts[:help] then halt end
|
135
|
+
|
136
|
+
# open the log file
|
137
|
+
log = opts[:log]
|
138
|
+
debug = opts[:debug]
|
139
|
+
if log then
|
140
|
+
CaRuby::Log.instance.open(log, :debug => debug)
|
141
|
+
elsif debug then
|
142
|
+
CaRuby::logger.level = Logger::DEBUG
|
143
|
+
end
|
144
|
+
|
145
|
+
# if there is a file option, then load additional options from the file
|
146
|
+
file = opts.delete(:file)
|
147
|
+
if file then
|
148
|
+
props = CaRuby::Properties.new(file)
|
149
|
+
props.each { |opt, arg| ARGV << "--#{opt}" << arg }
|
150
|
+
OptionParser.new do |p|
|
151
|
+
opts.merge!(parse(p)) { |ov, nv| ov ? ov : nv }
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Prints the given error message and the program usage, then exits with status 1.
|
157
|
+
def fail(message=nil)
|
158
|
+
halt(message, 1)
|
159
|
+
end
|
160
|
+
|
161
|
+
# Prints the given message and program usage, then exits with the given status.
|
162
|
+
def halt(message=nil, status=0)
|
163
|
+
print(message) if message
|
164
|
+
print(@usage)
|
165
|
+
exit(status)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,157 @@
|
|
1
|
+
require 'caruby/csv/csvio'
|
2
|
+
require 'caruby/util/properties'
|
3
|
+
|
4
|
+
module CaRuby
|
5
|
+
# Maps a CSV extract to a caBIG application.
|
6
|
+
class CsvMapper
|
7
|
+
attr_reader :csvio, :classes
|
8
|
+
|
9
|
+
# Creates a new CsvMapper from the following parameters:
|
10
|
+
# * the required mapping configuration file config
|
11
|
+
# * the required target class
|
12
|
+
# * the required CSV file name
|
13
|
+
# * additional CsvIO options as desired
|
14
|
+
#
|
15
|
+
# If the converter block is given to this method, then that block is called to convert
|
16
|
+
# source CSV field values as described in the FasterCSV.
|
17
|
+
def initialize(config, target, csv, options={}, &converter) # :yields: value, info
|
18
|
+
@target = target
|
19
|
+
# load the config
|
20
|
+
fld_path_hash = load_config(config)
|
21
|
+
# the default input fields are obtained by CsvIO from the first line of the input;
|
22
|
+
# the default output fields are the field mapping config keys in order
|
23
|
+
options[:headers] ||= config_headers(config) if options[:mode] =~ /^w/
|
24
|
+
# the CSV wrapper; do this before making the header map since the CsvIO-generated headers
|
25
|
+
# are used to build the header map
|
26
|
+
@csvio = CsvIO.new(csv, options) do |value, info|
|
27
|
+
# nonstring headers are determined later in this initializer
|
28
|
+
if value and @string_headers.include?(info.header) then
|
29
|
+
value
|
30
|
+
elsif block_given? then
|
31
|
+
# call custom converter first, if any
|
32
|
+
yield(value, info)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
# the class => paths hash; populated in map_headers
|
36
|
+
@cls_paths_hash = LazyHash.new { Set.new }
|
37
|
+
# the path => header hash; do this after making the CsvIO
|
38
|
+
@cls_paths_hash, @hdr_map = map_headers(fld_path_hash)
|
39
|
+
# the top-level classes
|
40
|
+
klasses = @cls_paths_hash.keys
|
41
|
+
# include the target class
|
42
|
+
@cls_paths_hash[@target] ||= Set.new
|
43
|
+
# add superclass paths into subclass paths
|
44
|
+
@cls_paths_hash.each do |klass, paths|
|
45
|
+
@cls_paths_hash.each { |other, other_paths| paths.merge!(other_paths) if klass < other }
|
46
|
+
end
|
47
|
+
# include only concrete classes
|
48
|
+
@classes = @cls_paths_hash.keys
|
49
|
+
@cls_paths_hash.delete_if do |klass, paths|
|
50
|
+
klass.abstract? or klasses.any? { |other| other < klass }
|
51
|
+
end
|
52
|
+
# collect the non-string input fields for the custom CSVLoader converter
|
53
|
+
@string_headers = Set.new
|
54
|
+
@hdr_map.each do |path, cls_hdr_hash|
|
55
|
+
last = path.last
|
56
|
+
@string_headers.merge!(cls_hdr_hash.values) if AttributeMetadata === last and last.type == String
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns the given klass's mapped AttributeMetadata paths.
|
61
|
+
# The default klass is the target class.
|
62
|
+
def paths(klass=nil)
|
63
|
+
klass ||= @target
|
64
|
+
@cls_paths_hash[klass]
|
65
|
+
end
|
66
|
+
|
67
|
+
# Returns the header mapped by the given AttributeMetadata path and starting klass.
|
68
|
+
# The default klass is the target class.
|
69
|
+
def header(path, klass=nil)
|
70
|
+
klass ||= @target
|
71
|
+
@hdr_map[path][klass]
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
# Returns the field => path list hash from the field mapping configuration file.
|
77
|
+
def load_config(file)
|
78
|
+
begin
|
79
|
+
config = YAML::load_file(file)
|
80
|
+
rescue
|
81
|
+
raise ConfigurationError.new("Could not read field mapping configuration file #{file}: " + $!)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def config_headers(config)
|
86
|
+
File.open(config) do |file|
|
87
|
+
file.map { |line| line[/(^.+):/, 1] }.compact
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# @param [{Symbol => <AttributeMetadata>}] config the field => path list configuration
|
92
|
+
# @return [({Symbol => <AttributeMetadata>}, {Class => {<AttributeMetadata> => Symbol>}})]
|
93
|
+
# the class => paths hash and the path => class => header hash
|
94
|
+
def map_headers(config)
|
95
|
+
# the class => paths hash; populated in map_headers
|
96
|
+
cls_paths_hash = LazyHash.new { Set.new }
|
97
|
+
hdr_map = LazyHash.new { Hash.new }
|
98
|
+
config.each do |field, attr_list|
|
99
|
+
next if attr_list.blank?
|
100
|
+
# the header accessor method for the field
|
101
|
+
header = @csvio.accessor(field)
|
102
|
+
raise ConfigurationError.new("Field defined in field mapping configuration not found: #{field}") if header.nil?
|
103
|
+
attr_list.split(/,\s*/).each do |path_s|
|
104
|
+
klass, path = create_attribute_path(path_s)
|
105
|
+
hdr_map[path][klass] = header
|
106
|
+
# associate the class with the path
|
107
|
+
cls_paths_hash[klass] << path
|
108
|
+
end
|
109
|
+
end
|
110
|
+
[cls_paths_hash, hdr_map]
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns an array of AttributeMetadata or symbol objects for the period-delimited path string path_s in the
|
114
|
+
# pattern (_class_|_attribute_)(+.+_attribute_)*, e.g.:
|
115
|
+
# ClinicalStudy.status
|
116
|
+
# study.status
|
117
|
+
# The default starting class is this CvsMapper's target class.
|
118
|
+
# Raises ConfigurationError if the path string is malformed or an attribute is not found.
|
119
|
+
def create_attribute_path(path_s)
|
120
|
+
names = path_s.split('.')
|
121
|
+
# if the path starts with a capitalized class name, then resolve the class.
|
122
|
+
# otherwise, the target class is the start of the path.
|
123
|
+
klass = names.first =~ /^[A-Z]/ ? @target.domain_module.const_get(names.shift) : @target
|
124
|
+
# there must be at least one attribute
|
125
|
+
if names.empty? then
|
126
|
+
raise ConfigurationError.new("Attribute entry in CSV field mapping is not in <class>.<attribute> format: #{value}")
|
127
|
+
end
|
128
|
+
# build the AttributeMetadata path by traversing the names path
|
129
|
+
# if the name corresponds to a parent attribute, then add the attribute metadata.
|
130
|
+
# otherwise, if the name is a method, then add the method.
|
131
|
+
path = []
|
132
|
+
names.inject(klass) do |parent, name|
|
133
|
+
attr_md = parent.class.attribute_metadata(name) rescue nil
|
134
|
+
if attr_md then
|
135
|
+
# name is an attribute: add the attribute metadata and navigate to the attribute domain type
|
136
|
+
path << attr_md
|
137
|
+
attr_md.type
|
138
|
+
elsif parent.method_defined?(name) then
|
139
|
+
# name is not a pre-defined attribute but is a method: add the method symbol to the path and halt traversal
|
140
|
+
path << name.to_sym
|
141
|
+
break
|
142
|
+
else
|
143
|
+
# method not defined
|
144
|
+
raise ConfigurationError.new("CSV field mapping attribute not found: #{parent.qp}.#{name}")
|
145
|
+
end
|
146
|
+
end
|
147
|
+
# add remaining non-attribute symbols
|
148
|
+
tail = names[path.size..-1].map { |name| name.to_sym }
|
149
|
+
path.concat(tail)
|
150
|
+
# return the starting class and path
|
151
|
+
# Note that the starting class is not necessarily the first path AttributeMetadata declarer, since the
|
152
|
+
# starting class could be a concrete subclass of an abstract declarer. this is important, since the class
|
153
|
+
# must be instantiated.
|
154
|
+
[klass, path]
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'fastercsv'
|
3
|
+
|
4
|
+
require 'fileutils'
|
5
|
+
require 'faster_csv'
|
6
|
+
require 'caruby/util/options'
|
7
|
+
require 'caruby/util/collection'
|
8
|
+
|
9
|
+
# CsvIO reads or writes CSV records.
|
10
|
+
# This class wraps a FasterCSV with the following modifications:
|
11
|
+
# * relax the date parser to allow dd/mm/yyyy dates
|
12
|
+
# * don't convert integer text with a leading zero to an octal number
|
13
|
+
# * allow one custom converter with different semantics: if the converter block
|
14
|
+
# call returns nil, then continue conversion, otherwise return the converter
|
15
|
+
# result. This differs from FasterCSV converter semantics which calls converters
|
16
|
+
# as long the result == the input field value. The CsvIO converter semantics
|
17
|
+
# supports converters that intend a String result to be the converted result.
|
18
|
+
#
|
19
|
+
# CsvIO is Enumerable, but does not implement the complete Ruby IO interface.
|
20
|
+
class CsvIO
|
21
|
+
include Enumerable
|
22
|
+
|
23
|
+
# Returns the CSV field access header symbols.
|
24
|
+
attr_reader :headers
|
25
|
+
|
26
|
+
# Opens the CSV file and calls the given block with this CsvIO as the argument.
|
27
|
+
#
|
28
|
+
# @see #initialize the supported options
|
29
|
+
def self.open(file, options=nil) # :yields: csvio
|
30
|
+
csvio = self.new(file, options)
|
31
|
+
if block_given? then
|
32
|
+
yield csvio
|
33
|
+
csvio.close
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# #open the given CSV file and options, and call {#each} with the given block.
|
38
|
+
def self.foreach(file, options=nil, &block) # :yields: record
|
39
|
+
self.open(file, options=nil) { |csvio| csvio.each(&block) }
|
40
|
+
end
|
41
|
+
|
42
|
+
# Creates a new CsvIO for the specified source file.
|
43
|
+
# If a converter block is given, then it is added to the CSV converters list.
|
44
|
+
def initialize(file, options=nil, &converter)
|
45
|
+
# the CSV file open mode
|
46
|
+
mode = Options.get(:mode, options, "r")
|
47
|
+
# the CSV headers option; can be boolean or array
|
48
|
+
hdr_opt = Options.get(:headers, options)
|
49
|
+
# there is a header record by default for an input CSV file
|
50
|
+
hdr_opt ||= true if mode =~ /^r/
|
51
|
+
# make parent directories if necessary for an output CSV file
|
52
|
+
File.makedirs(File.dirname(file)) if mode =~ /^w/
|
53
|
+
# if headers aren't given, then convert the input CSV header record names to underscore symbols
|
54
|
+
hdr_cvtr = :symbol unless Enumerable === hdr_opt
|
55
|
+
# make a custom converter
|
56
|
+
custom = Proc.new { |f, info| convert(f, info, &converter) }
|
57
|
+
# open the CSV file
|
58
|
+
@csv = FasterCSV.open(file, mode, :headers => hdr_opt, :header_converters => hdr_cvtr, :return_headers => true, :write_headers => true, :converters => custom)
|
59
|
+
# the header => field name hash:
|
60
|
+
# if the header option is set to true, then read the input header line.
|
61
|
+
# otherwise, parse an empty string which mimics an input header line.
|
62
|
+
hdr_row = case hdr_opt
|
63
|
+
when true then
|
64
|
+
@csv.shift
|
65
|
+
when Enumerable then
|
66
|
+
''.parse_csv(:headers => hdr_opt, :header_converters => :symbol, :return_headers => true)
|
67
|
+
else
|
68
|
+
raise ArgumentError.new("CSV headers option value not supported: #{hdr_opt}")
|
69
|
+
end
|
70
|
+
# the header row headers
|
71
|
+
@headers = hdr_row.headers
|
72
|
+
# the header name => symbol map
|
73
|
+
@hdr_sym_hash = hdr_row.to_hash.invert
|
74
|
+
end
|
75
|
+
|
76
|
+
# Closes the CSV file and trash file if necessary.
|
77
|
+
def close
|
78
|
+
@csv.close
|
79
|
+
@trash.close if @trash
|
80
|
+
end
|
81
|
+
|
82
|
+
# Returns the header accessor method for the given input header name.
|
83
|
+
def accessor(header)
|
84
|
+
@hdr_sym_hash[header]
|
85
|
+
end
|
86
|
+
|
87
|
+
# Sets the trash output file. This creates a separate CSV output file distinct from the input CSV file.
|
88
|
+
# This is useful for writing rejected rows from the input. The output file has a header row.
|
89
|
+
def trash=(file)
|
90
|
+
@trash = FasterCSV.open(file, 'w', :headers => true, :header_converters => :symbol, :write_headers => true)
|
91
|
+
end
|
92
|
+
|
93
|
+
# Writes the row to the trash file if the trash file is set.
|
94
|
+
#
|
95
|
+
#@param [{Symbol => Object}] row the rejected input row
|
96
|
+
def reject(row)
|
97
|
+
@trash << row if @trash
|
98
|
+
end
|
99
|
+
|
100
|
+
# Iterates over each CSV row, yielding a row for each iteration.
|
101
|
+
# This method closes the CSV file after the iteration completes.
|
102
|
+
def each
|
103
|
+
begin
|
104
|
+
# parse each line
|
105
|
+
@csv.each { |row| yield row }
|
106
|
+
ensure
|
107
|
+
close
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# @return the next CSV row
|
112
|
+
# @see #each
|
113
|
+
def read
|
114
|
+
@csv.shift
|
115
|
+
end
|
116
|
+
|
117
|
+
alias :shift :read
|
118
|
+
|
119
|
+
# Writes the given row to the CSV file.
|
120
|
+
#
|
121
|
+
#@param [{Symbol => Object}] row the input row
|
122
|
+
def write(row)
|
123
|
+
@csv << row
|
124
|
+
end
|
125
|
+
|
126
|
+
alias :<< :write
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
# 3-letter months => month sequence hash.
|
131
|
+
MMM_MM_MAP = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'].to_compact_hash_with_index do |mmm, index|
|
132
|
+
index < 9 ? ('0' + index.succ.to_s) : index.succ.to_s
|
133
|
+
end
|
134
|
+
|
135
|
+
# DateMatcher relaxes the FasterCSV DateMatcher to allow dd/mm/yyyy dates.
|
136
|
+
DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} | \d{1,2}-\w{3}-\d{2,4} | \d{4}[-\/]\d{1,2}[-\/]\d{1,2} | \d{1,2}[-\/]\d{1,2}[-\/]\d{2,4} )\z /x
|
137
|
+
|
138
|
+
# @param f the input field value to convert
|
139
|
+
# @param info the CSV field info
|
140
|
+
# @return the converted value
|
141
|
+
def convert(f, info)
|
142
|
+
return if f.nil?
|
143
|
+
# the block has precedence
|
144
|
+
value = yield(f, info) if block_given?
|
145
|
+
# integer conversion
|
146
|
+
value ||= Integer(f) if f =~ /^[1-9]\d*$/
|
147
|
+
# date conversion
|
148
|
+
value ||= convert_date(f) if f =~ CsvIO::DateMatcher
|
149
|
+
# float conversion
|
150
|
+
value ||= (Float(f) rescue f) if f =~ /^\d+\.\d*$/ or f =~ /^\d*\.\d+$/
|
151
|
+
# return converted value or the input field if there was no conversion
|
152
|
+
value || f
|
153
|
+
end
|
154
|
+
|
155
|
+
# @param [String] the input field value
|
156
|
+
# @return [Date] the converted date
|
157
|
+
def convert_date(f)
|
158
|
+
# If input value is in dd-mmm-yy format, then reformat.
|
159
|
+
# Otherwise, parse as a Date if possible.
|
160
|
+
if f =~ /^\d{1,2}-\w{3}-\d{2,4}$/ then
|
161
|
+
ddmmyy = reformat_dd_mmm_yy_date(f) || return
|
162
|
+
convert_date(ddmmyy)
|
163
|
+
# elsif f =~ /^\w{3} \d{1,2}, \d{4}$/ then
|
164
|
+
# ddmmyy = reformat_mmm_dd_yyyy_date(f) || return
|
165
|
+
# convert_date(ddmmyy)
|
166
|
+
else
|
167
|
+
Date.parse(f, true) rescue nil
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# @param [String] the input field value in dd-mmm-yy format
|
172
|
+
# @return [String] the reformatted date String in mm/dd/yy format
|
173
|
+
def reformat_dd_mmm_yy_date(f)
|
174
|
+
all, dd, mmm, yy = /^(\d{1,2})-([[:alpha:]]{3})-(\d{2,4})$/.match(f).to_a
|
175
|
+
mm = MMM_MM_MAP[mmm.downcase] || return
|
176
|
+
"#{mm}/#{dd}/#{yy}"
|
177
|
+
end
|
178
|
+
# # @param [String] the input field value in 'mmmd d, yyyy' format
|
179
|
+
# # @return [String] the reformatted date String in mm/dd/yyyy format
|
180
|
+
# def reformat_mmm_dd_yyyy_date(f)
|
181
|
+
# all, mmm, dd, yyyy = /^(\w{3}) (\d{1,2}), (\d{4})$/.match(f).to_a
|
182
|
+
# mm = MMM_MM_MAP[mmm.downcase] || return
|
183
|
+
# "#{mm}/#{dd}/#{yyyy}"
|
184
|
+
# end
|
185
|
+
end
|