caruby-core 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/LEGAL +5 -0
- data/LICENSE +22 -0
- data/README.md +51 -0
- data/doc/website/css/site.css +1 -5
- data/doc/website/images/avatar.png +0 -0
- data/doc/website/images/favicon.ico +0 -0
- data/doc/website/images/logo.png +0 -0
- data/doc/website/index.html +82 -0
- data/doc/website/install.html +87 -0
- data/doc/website/quick_start.html +87 -0
- data/doc/website/tissue.html +85 -0
- data/doc/website/uom.html +10 -0
- data/lib/caruby.rb +3 -0
- data/lib/caruby/active_support/README.txt +2 -0
- data/lib/caruby/active_support/core_ext/string.rb +7 -0
- data/lib/caruby/active_support/core_ext/string/inflections.rb +167 -0
- data/lib/caruby/active_support/inflections.rb +55 -0
- data/lib/caruby/active_support/inflector.rb +398 -0
- data/lib/caruby/cli/application.rb +36 -0
- data/lib/caruby/cli/command.rb +169 -0
- data/lib/caruby/csv/csv_mapper.rb +157 -0
- data/lib/caruby/csv/csvio.rb +185 -0
- data/lib/caruby/database.rb +252 -0
- data/lib/caruby/database/fetched_matcher.rb +66 -0
- data/lib/caruby/database/persistable.rb +432 -0
- data/lib/caruby/database/persistence_service.rb +162 -0
- data/lib/caruby/database/reader.rb +599 -0
- data/lib/caruby/database/saved_merger.rb +131 -0
- data/lib/caruby/database/search_template_builder.rb +59 -0
- data/lib/caruby/database/sql_executor.rb +75 -0
- data/lib/caruby/database/store_template_builder.rb +200 -0
- data/lib/caruby/database/writer.rb +469 -0
- data/lib/caruby/domain/annotatable.rb +25 -0
- data/lib/caruby/domain/annotation.rb +23 -0
- data/lib/caruby/domain/attribute_metadata.rb +447 -0
- data/lib/caruby/domain/java_attribute_metadata.rb +160 -0
- data/lib/caruby/domain/merge.rb +91 -0
- data/lib/caruby/domain/properties.rb +95 -0
- data/lib/caruby/domain/reference_visitor.rb +289 -0
- data/lib/caruby/domain/resource_attributes.rb +528 -0
- data/lib/caruby/domain/resource_dependency.rb +205 -0
- data/lib/caruby/domain/resource_introspection.rb +159 -0
- data/lib/caruby/domain/resource_metadata.rb +117 -0
- data/lib/caruby/domain/resource_module.rb +285 -0
- data/lib/caruby/domain/uniquify.rb +38 -0
- data/lib/caruby/import/annotatable_class.rb +28 -0
- data/lib/caruby/import/annotation_class.rb +27 -0
- data/lib/caruby/import/annotation_module.rb +67 -0
- data/lib/caruby/import/java.rb +338 -0
- data/lib/caruby/migration/migratable.rb +167 -0
- data/lib/caruby/migration/migrator.rb +533 -0
- data/lib/caruby/migration/resource.rb +8 -0
- data/lib/caruby/migration/resource_module.rb +11 -0
- data/lib/caruby/migration/uniquify.rb +20 -0
- data/lib/caruby/resource.rb +969 -0
- data/lib/caruby/util/attribute_path.rb +46 -0
- data/lib/caruby/util/cache.rb +53 -0
- data/lib/caruby/util/class.rb +99 -0
- data/lib/caruby/util/collection.rb +1053 -0
- data/lib/caruby/util/controlled_value.rb +35 -0
- data/lib/caruby/util/coordinate.rb +75 -0
- data/lib/caruby/util/domain_extent.rb +49 -0
- data/lib/caruby/util/file_separator.rb +65 -0
- data/lib/caruby/util/inflector.rb +20 -0
- data/lib/caruby/util/log.rb +95 -0
- data/lib/caruby/util/math.rb +12 -0
- data/lib/caruby/util/merge.rb +59 -0
- data/lib/caruby/util/module.rb +34 -0
- data/lib/caruby/util/options.rb +92 -0
- data/lib/caruby/util/partial_order.rb +36 -0
- data/lib/caruby/util/person.rb +119 -0
- data/lib/caruby/util/pretty_print.rb +184 -0
- data/lib/caruby/util/properties.rb +112 -0
- data/lib/caruby/util/stopwatch.rb +66 -0
- data/lib/caruby/util/topological_sync_enumerator.rb +53 -0
- data/lib/caruby/util/transitive_closure.rb +45 -0
- data/lib/caruby/util/tree.rb +48 -0
- data/lib/caruby/util/trie.rb +37 -0
- data/lib/caruby/util/uniquifier.rb +30 -0
- data/lib/caruby/util/validation.rb +48 -0
- data/lib/caruby/util/version.rb +56 -0
- data/lib/caruby/util/visitor.rb +351 -0
- data/lib/caruby/util/weak_hash.rb +36 -0
- data/lib/caruby/version.rb +3 -0
- metadata +186 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'caruby/util/log'
|
3
|
+
|
4
|
+
module CaRuby
|
5
|
+
module CLI
|
6
|
+
# Extends the standard Logger::Application to use the {CaRuby::Log} and add start
|
7
|
+
# functionality.
|
8
|
+
class Application < Logger::Application
|
9
|
+
# @param [String] appname the application name
|
10
|
+
def initialize(appname=nil)
|
11
|
+
super(appname)
|
12
|
+
@log = Log::instance.logger
|
13
|
+
@log.progname = @appname
|
14
|
+
@level = @log.level
|
15
|
+
end
|
16
|
+
|
17
|
+
# Overrides Logger::Application start with the following enhancements:
|
18
|
+
# * pass arguments and a block to the application run method
|
19
|
+
# * improve the output messages
|
20
|
+
# * print an exception to stderr as well as the log
|
21
|
+
def start(*args, &block)
|
22
|
+
# Adapted from Logger.
|
23
|
+
status = 1
|
24
|
+
begin
|
25
|
+
log(INFO, "Starting #{@appname}...")
|
26
|
+
status = run(*args, &block)
|
27
|
+
rescue
|
28
|
+
log(FATAL, "#{@appname} detected an exception: #{$!}\n#{$@.qp}")
|
29
|
+
$stderr.puts "#{@appname} was unsuccessful: #{$!}.\nSee the log #{Log.instance.file} for more information."
|
30
|
+
ensure
|
31
|
+
log(INFO, "#{@appname} completed with status #{status}.")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'caruby/cli/application'
|
3
|
+
|
4
|
+
module CaRuby
|
5
|
+
module CLI
|
6
|
+
# Command-line parsing errors.
|
7
|
+
class CommandError < StandardError; end
|
8
|
+
|
9
|
+
# Command-line parser and executor.
|
10
|
+
class Command < Application
|
11
|
+
# Command line application wrapper.
|
12
|
+
# The specs parameter is an array of command line option and argument
|
13
|
+
# specifications as follows:
|
14
|
+
#
|
15
|
+
# Each option specification is an array in the form:
|
16
|
+
# [:option, short, long, class, description]
|
17
|
+
# where:
|
18
|
+
# * :option is the option symbol, e.g. +:output+
|
19
|
+
# * short is the short option form, e.g. "-o"
|
20
|
+
# * long is the long option form, e.g. "--output FILE"
|
21
|
+
# * class is the option value class, e.g. Integer
|
22
|
+
# * description is the option usage, e.g. "Output file"
|
23
|
+
# The :option, long and description items are required; the short and class items can
|
24
|
+
# be omitted.
|
25
|
+
#
|
26
|
+
# Each command line argument specification is an array in the form:
|
27
|
+
# [:arg, text]
|
28
|
+
# where:
|
29
|
+
# * :arg is the argument symbol, e.g. +:input+
|
30
|
+
# * text is the usage message text, e.g. 'input', '[input]' or 'input ...'
|
31
|
+
# Both items are required.
|
32
|
+
#
|
33
|
+
# Built-in options include the following:
|
34
|
+
# * --help : print the help message and exit
|
35
|
+
# * --version : print the version and exit
|
36
|
+
# * --log FILE : log file
|
37
|
+
# * --debug : print debug messages to the log
|
38
|
+
# * --file FILE: configuration file containing other options
|
39
|
+
# This class processes these built-in options, with the exception of +--version+,
|
40
|
+
# which is a subclass responsibility. Subclasses are responsible for
|
41
|
+
# processing any remaining options.
|
42
|
+
#
|
43
|
+
# @param [(<Symbol>, <String, Class>)] specs the arguments and options
|
44
|
+
# described above
|
45
|
+
# @yield [hash] the command executor
|
46
|
+
# @yieldparam [{Symbol => Object}] the argument and option symbol => value hash
|
47
|
+
def initialize(specs=[], &executor)
|
48
|
+
unless block_given? then
|
49
|
+
raise ArgumentError.new("Command #{self.class} is missing the required execution block" )
|
50
|
+
end
|
51
|
+
@executor = executor
|
52
|
+
@opt_specs, @arg_specs = specs.partition { |spec| spec[1][0, 1] == '-' }
|
53
|
+
@opt_specs.concat(DEF_OPTS)
|
54
|
+
super($0)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Runs this command by calling the +execute+ method on the parsed command line
|
58
|
+
# option or argument symbol => value hash.
|
59
|
+
def run
|
60
|
+
# the option => value hash
|
61
|
+
hash = get_opts
|
62
|
+
# this base class's options
|
63
|
+
handle_caruby_options(hash)
|
64
|
+
# add the argument => value hash
|
65
|
+
hash.merge!(get_args)
|
66
|
+
# call the block
|
67
|
+
@executor.call(hash)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Collects the command line options.
|
71
|
+
#
|
72
|
+
# @return [{Symbol => Object}] the option => value hash
|
73
|
+
def get_opts
|
74
|
+
opts = {}
|
75
|
+
# the option parser
|
76
|
+
OptionParser.new do |parser|
|
77
|
+
arg_s = @arg_specs.map { |spec| spec[1] }.join(' ')
|
78
|
+
parser.banner = "Usage: #{parser.program_name} [options] #{arg_s}"
|
79
|
+
parser.separator ""
|
80
|
+
parser.separator "Options:"
|
81
|
+
opts = parse(parser)
|
82
|
+
@usage = parser.help
|
83
|
+
end
|
84
|
+
opts
|
85
|
+
end
|
86
|
+
|
87
|
+
# Collects the non-option command line arguments.
|
88
|
+
#
|
89
|
+
# @return [{Symbol => Object}] the argument => value hash
|
90
|
+
def get_args
|
91
|
+
return Hash::EMPTY_HASH if ARGV.empty?
|
92
|
+
args = {}
|
93
|
+
n = [ARGV.size, @arg_specs.size - 1].min
|
94
|
+
n.times { |i| args[@arg_specs[i].first] = ARGV[i] }
|
95
|
+
if n < ARGV.size then
|
96
|
+
arg, form = @arg_specs.last
|
97
|
+
if form.index('...') then
|
98
|
+
args[arg] = ARGV[n..-1]
|
99
|
+
elsif @arg_specs.size == ARGV.size then
|
100
|
+
args[arg] = ARGV[n]
|
101
|
+
else
|
102
|
+
halt("Too many arguments", 1)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
args
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
DEF_OPTS = [
|
111
|
+
[:help, "--help", "Displays this help message"],
|
112
|
+
[:file, "--file FILE", "Configuration file containing other options"],
|
113
|
+
[:log, "--log FILE", "Log file"],
|
114
|
+
[:debug, "--debug", "Displays debug log messages"],
|
115
|
+
]
|
116
|
+
|
117
|
+
# @param [OptionParser] parser the option parser
|
118
|
+
# @return [{Symbol => Object}] the option => value hash
|
119
|
+
def parse(parser)
|
120
|
+
opts = {}
|
121
|
+
@opt_specs.each do |opt, *spec|
|
122
|
+
parser.on_tail(*spec) { |v| opts[opt] = v }
|
123
|
+
end
|
124
|
+
# build the option => value hash
|
125
|
+
parser.parse!
|
126
|
+
opts
|
127
|
+
end
|
128
|
+
|
129
|
+
# Processes the built-in options.
|
130
|
+
#
|
131
|
+
# @param [{Symbol => Object}] the option => value hash
|
132
|
+
def handle_caruby_options(opts)
|
133
|
+
# if help, then print usage and exit
|
134
|
+
if opts[:help] then halt end
|
135
|
+
|
136
|
+
# open the log file
|
137
|
+
log = opts[:log]
|
138
|
+
debug = opts[:debug]
|
139
|
+
if log then
|
140
|
+
CaRuby::Log.instance.open(log, :debug => debug)
|
141
|
+
elsif debug then
|
142
|
+
CaRuby::logger.level = Logger::DEBUG
|
143
|
+
end
|
144
|
+
|
145
|
+
# if there is a file option, then load additional options from the file
|
146
|
+
file = opts.delete(:file)
|
147
|
+
if file then
|
148
|
+
props = CaRuby::Properties.new(file)
|
149
|
+
props.each { |opt, arg| ARGV << "--#{opt}" << arg }
|
150
|
+
OptionParser.new do |p|
|
151
|
+
opts.merge!(parse(p)) { |ov, nv| ov ? ov : nv }
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Prints the given error message and the program usage, then exits with status 1.
|
157
|
+
def fail(message=nil)
|
158
|
+
halt(message, 1)
|
159
|
+
end
|
160
|
+
|
161
|
+
# Prints the given message and program usage, then exits with the given status.
|
162
|
+
def halt(message=nil, status=0)
|
163
|
+
print(message) if message
|
164
|
+
print(@usage)
|
165
|
+
exit(status)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,157 @@
|
|
1
|
+
require 'caruby/csv/csvio'
|
2
|
+
require 'caruby/util/properties'
|
3
|
+
|
4
|
+
module CaRuby
|
5
|
+
# Maps a CSV extract to a caBIG application.
|
6
|
+
class CsvMapper
|
7
|
+
attr_reader :csvio, :classes
|
8
|
+
|
9
|
+
# Creates a new CsvMapper from the following parameters:
|
10
|
+
# * the required mapping configuration file config
|
11
|
+
# * the required target class
|
12
|
+
# * the required CSV file name
|
13
|
+
# * additional CsvIO options as desired
|
14
|
+
#
|
15
|
+
# If the converter block is given to this method, then that block is called to convert
|
16
|
+
# source CSV field values as described in the FasterCSV.
|
17
|
+
def initialize(config, target, csv, options={}, &converter) # :yields: value, info
|
18
|
+
@target = target
|
19
|
+
# load the config
|
20
|
+
fld_path_hash = load_config(config)
|
21
|
+
# the default input fields are obtained by CsvIO from the first line of the input;
|
22
|
+
# the default output fields are the field mapping config keys in order
|
23
|
+
options[:headers] ||= config_headers(config) if options[:mode] =~ /^w/
|
24
|
+
# the CSV wrapper; do this before making the header map since the CsvIO-generated headers
|
25
|
+
# are used to build the header map
|
26
|
+
@csvio = CsvIO.new(csv, options) do |value, info|
|
27
|
+
# nonstring headers are determined later in this initializer
|
28
|
+
if value and @string_headers.include?(info.header) then
|
29
|
+
value
|
30
|
+
elsif block_given? then
|
31
|
+
# call custom converter first, if any
|
32
|
+
yield(value, info)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
# the class => paths hash; populated in map_headers
|
36
|
+
@cls_paths_hash = LazyHash.new { Set.new }
|
37
|
+
# the path => header hash; do this after making the CsvIO
|
38
|
+
@cls_paths_hash, @hdr_map = map_headers(fld_path_hash)
|
39
|
+
# the top-level classes
|
40
|
+
klasses = @cls_paths_hash.keys
|
41
|
+
# include the target class
|
42
|
+
@cls_paths_hash[@target] ||= Set.new
|
43
|
+
# add superclass paths into subclass paths
|
44
|
+
@cls_paths_hash.each do |klass, paths|
|
45
|
+
@cls_paths_hash.each { |other, other_paths| paths.merge!(other_paths) if klass < other }
|
46
|
+
end
|
47
|
+
# include only concrete classes
|
48
|
+
@classes = @cls_paths_hash.keys
|
49
|
+
@cls_paths_hash.delete_if do |klass, paths|
|
50
|
+
klass.abstract? or klasses.any? { |other| other < klass }
|
51
|
+
end
|
52
|
+
# collect the non-string input fields for the custom CSVLoader converter
|
53
|
+
@string_headers = Set.new
|
54
|
+
@hdr_map.each do |path, cls_hdr_hash|
|
55
|
+
last = path.last
|
56
|
+
@string_headers.merge!(cls_hdr_hash.values) if AttributeMetadata === last and last.type == String
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns the given klass's mapped AttributeMetadata paths.
|
61
|
+
# The default klass is the target class.
|
62
|
+
def paths(klass=nil)
|
63
|
+
klass ||= @target
|
64
|
+
@cls_paths_hash[klass]
|
65
|
+
end
|
66
|
+
|
67
|
+
# Returns the header mapped by the given AttributeMetadata path and starting klass.
|
68
|
+
# The default klass is the target class.
|
69
|
+
def header(path, klass=nil)
|
70
|
+
klass ||= @target
|
71
|
+
@hdr_map[path][klass]
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
# Returns the field => path list hash from the field mapping configuration file.
|
77
|
+
def load_config(file)
|
78
|
+
begin
|
79
|
+
config = YAML::load_file(file)
|
80
|
+
rescue
|
81
|
+
raise ConfigurationError.new("Could not read field mapping configuration file #{file}: " + $!)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def config_headers(config)
|
86
|
+
File.open(config) do |file|
|
87
|
+
file.map { |line| line[/(^.+):/, 1] }.compact
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# @param [{Symbol => <AttributeMetadata>}] config the field => path list configuration
|
92
|
+
# @return [({Symbol => <AttributeMetadata>}, {Class => {<AttributeMetadata> => Symbol>}})]
|
93
|
+
# the class => paths hash and the path => class => header hash
|
94
|
+
def map_headers(config)
|
95
|
+
# the class => paths hash; populated in map_headers
|
96
|
+
cls_paths_hash = LazyHash.new { Set.new }
|
97
|
+
hdr_map = LazyHash.new { Hash.new }
|
98
|
+
config.each do |field, attr_list|
|
99
|
+
next if attr_list.blank?
|
100
|
+
# the header accessor method for the field
|
101
|
+
header = @csvio.accessor(field)
|
102
|
+
raise ConfigurationError.new("Field defined in field mapping configuration not found: #{field}") if header.nil?
|
103
|
+
attr_list.split(/,\s*/).each do |path_s|
|
104
|
+
klass, path = create_attribute_path(path_s)
|
105
|
+
hdr_map[path][klass] = header
|
106
|
+
# associate the class with the path
|
107
|
+
cls_paths_hash[klass] << path
|
108
|
+
end
|
109
|
+
end
|
110
|
+
[cls_paths_hash, hdr_map]
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns an array of AttributeMetadata or symbol objects for the period-delimited path string path_s in the
|
114
|
+
# pattern (_class_|_attribute_)(+.+_attribute_)*, e.g.:
|
115
|
+
# ClinicalStudy.status
|
116
|
+
# study.status
|
117
|
+
# The default starting class is this CvsMapper's target class.
|
118
|
+
# Raises ConfigurationError if the path string is malformed or an attribute is not found.
|
119
|
+
def create_attribute_path(path_s)
|
120
|
+
names = path_s.split('.')
|
121
|
+
# if the path starts with a capitalized class name, then resolve the class.
|
122
|
+
# otherwise, the target class is the start of the path.
|
123
|
+
klass = names.first =~ /^[A-Z]/ ? @target.domain_module.const_get(names.shift) : @target
|
124
|
+
# there must be at least one attribute
|
125
|
+
if names.empty? then
|
126
|
+
raise ConfigurationError.new("Attribute entry in CSV field mapping is not in <class>.<attribute> format: #{value}")
|
127
|
+
end
|
128
|
+
# build the AttributeMetadata path by traversing the names path
|
129
|
+
# if the name corresponds to a parent attribute, then add the attribute metadata.
|
130
|
+
# otherwise, if the name is a method, then add the method.
|
131
|
+
path = []
|
132
|
+
names.inject(klass) do |parent, name|
|
133
|
+
attr_md = parent.class.attribute_metadata(name) rescue nil
|
134
|
+
if attr_md then
|
135
|
+
# name is an attribute: add the attribute metadata and navigate to the attribute domain type
|
136
|
+
path << attr_md
|
137
|
+
attr_md.type
|
138
|
+
elsif parent.method_defined?(name) then
|
139
|
+
# name is not a pre-defined attribute but is a method: add the method symbol to the path and halt traversal
|
140
|
+
path << name.to_sym
|
141
|
+
break
|
142
|
+
else
|
143
|
+
# method not defined
|
144
|
+
raise ConfigurationError.new("CSV field mapping attribute not found: #{parent.qp}.#{name}")
|
145
|
+
end
|
146
|
+
end
|
147
|
+
# add remaining non-attribute symbols
|
148
|
+
tail = names[path.size..-1].map { |name| name.to_sym }
|
149
|
+
path.concat(tail)
|
150
|
+
# return the starting class and path
|
151
|
+
# Note that the starting class is not necessarily the first path AttributeMetadata declarer, since the
|
152
|
+
# starting class could be a concrete subclass of an abstract declarer. this is important, since the class
|
153
|
+
# must be instantiated.
|
154
|
+
[klass, path]
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem 'fastercsv'
|
3
|
+
|
4
|
+
require 'fileutils'
|
5
|
+
require 'faster_csv'
|
6
|
+
require 'caruby/util/options'
|
7
|
+
require 'caruby/util/collection'
|
8
|
+
|
9
|
+
# CsvIO reads or writes CSV records.
|
10
|
+
# This class wraps a FasterCSV with the following modifications:
|
11
|
+
# * relax the date parser to allow dd/mm/yyyy dates
|
12
|
+
# * don't convert integer text with a leading zero to an octal number
|
13
|
+
# * allow one custom converter with different semantics: if the converter block
|
14
|
+
# call returns nil, then continue conversion, otherwise return the converter
|
15
|
+
# result. This differs from FasterCSV converter semantics which calls converters
|
16
|
+
# as long the result == the input field value. The CsvIO converter semantics
|
17
|
+
# supports converters that intend a String result to be the converted result.
|
18
|
+
#
|
19
|
+
# CsvIO is Enumerable, but does not implement the complete Ruby IO interface.
|
20
|
+
class CsvIO
|
21
|
+
include Enumerable
|
22
|
+
|
23
|
+
# Returns the CSV field access header symbols.
|
24
|
+
attr_reader :headers
|
25
|
+
|
26
|
+
# Opens the CSV file and calls the given block with this CsvIO as the argument.
|
27
|
+
#
|
28
|
+
# @see #initialize the supported options
|
29
|
+
def self.open(file, options=nil) # :yields: csvio
|
30
|
+
csvio = self.new(file, options)
|
31
|
+
if block_given? then
|
32
|
+
yield csvio
|
33
|
+
csvio.close
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# #open the given CSV file and options, and call {#each} with the given block.
|
38
|
+
def self.foreach(file, options=nil, &block) # :yields: record
|
39
|
+
self.open(file, options=nil) { |csvio| csvio.each(&block) }
|
40
|
+
end
|
41
|
+
|
42
|
+
# Creates a new CsvIO for the specified source file.
|
43
|
+
# If a converter block is given, then it is added to the CSV converters list.
|
44
|
+
def initialize(file, options=nil, &converter)
|
45
|
+
# the CSV file open mode
|
46
|
+
mode = Options.get(:mode, options, "r")
|
47
|
+
# the CSV headers option; can be boolean or array
|
48
|
+
hdr_opt = Options.get(:headers, options)
|
49
|
+
# there is a header record by default for an input CSV file
|
50
|
+
hdr_opt ||= true if mode =~ /^r/
|
51
|
+
# make parent directories if necessary for an output CSV file
|
52
|
+
File.makedirs(File.dirname(file)) if mode =~ /^w/
|
53
|
+
# if headers aren't given, then convert the input CSV header record names to underscore symbols
|
54
|
+
hdr_cvtr = :symbol unless Enumerable === hdr_opt
|
55
|
+
# make a custom converter
|
56
|
+
custom = Proc.new { |f, info| convert(f, info, &converter) }
|
57
|
+
# open the CSV file
|
58
|
+
@csv = FasterCSV.open(file, mode, :headers => hdr_opt, :header_converters => hdr_cvtr, :return_headers => true, :write_headers => true, :converters => custom)
|
59
|
+
# the header => field name hash:
|
60
|
+
# if the header option is set to true, then read the input header line.
|
61
|
+
# otherwise, parse an empty string which mimics an input header line.
|
62
|
+
hdr_row = case hdr_opt
|
63
|
+
when true then
|
64
|
+
@csv.shift
|
65
|
+
when Enumerable then
|
66
|
+
''.parse_csv(:headers => hdr_opt, :header_converters => :symbol, :return_headers => true)
|
67
|
+
else
|
68
|
+
raise ArgumentError.new("CSV headers option value not supported: #{hdr_opt}")
|
69
|
+
end
|
70
|
+
# the header row headers
|
71
|
+
@headers = hdr_row.headers
|
72
|
+
# the header name => symbol map
|
73
|
+
@hdr_sym_hash = hdr_row.to_hash.invert
|
74
|
+
end
|
75
|
+
|
76
|
+
# Closes the CSV file and trash file if necessary.
|
77
|
+
def close
|
78
|
+
@csv.close
|
79
|
+
@trash.close if @trash
|
80
|
+
end
|
81
|
+
|
82
|
+
# Returns the header accessor method for the given input header name.
|
83
|
+
def accessor(header)
|
84
|
+
@hdr_sym_hash[header]
|
85
|
+
end
|
86
|
+
|
87
|
+
# Sets the trash output file. This creates a separate CSV output file distinct from the input CSV file.
|
88
|
+
# This is useful for writing rejected rows from the input. The output file has a header row.
|
89
|
+
def trash=(file)
|
90
|
+
@trash = FasterCSV.open(file, 'w', :headers => true, :header_converters => :symbol, :write_headers => true)
|
91
|
+
end
|
92
|
+
|
93
|
+
# Writes the row to the trash file if the trash file is set.
|
94
|
+
#
|
95
|
+
#@param [{Symbol => Object}] row the rejected input row
|
96
|
+
def reject(row)
|
97
|
+
@trash << row if @trash
|
98
|
+
end
|
99
|
+
|
100
|
+
# Iterates over each CSV row, yielding a row for each iteration.
|
101
|
+
# This method closes the CSV file after the iteration completes.
|
102
|
+
def each
|
103
|
+
begin
|
104
|
+
# parse each line
|
105
|
+
@csv.each { |row| yield row }
|
106
|
+
ensure
|
107
|
+
close
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# @return the next CSV row
|
112
|
+
# @see #each
|
113
|
+
def read
|
114
|
+
@csv.shift
|
115
|
+
end
|
116
|
+
|
117
|
+
alias :shift :read
|
118
|
+
|
119
|
+
# Writes the given row to the CSV file.
|
120
|
+
#
|
121
|
+
#@param [{Symbol => Object}] row the input row
|
122
|
+
def write(row)
|
123
|
+
@csv << row
|
124
|
+
end
|
125
|
+
|
126
|
+
alias :<< :write
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
# 3-letter months => month sequence hash.
|
131
|
+
MMM_MM_MAP = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'].to_compact_hash_with_index do |mmm, index|
|
132
|
+
index < 9 ? ('0' + index.succ.to_s) : index.succ.to_s
|
133
|
+
end
|
134
|
+
|
135
|
+
# DateMatcher relaxes the FasterCSV DateMatcher to allow dd/mm/yyyy dates.
|
136
|
+
DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} | \d{1,2}-\w{3}-\d{2,4} | \d{4}[-\/]\d{1,2}[-\/]\d{1,2} | \d{1,2}[-\/]\d{1,2}[-\/]\d{2,4} )\z /x
|
137
|
+
|
138
|
+
# @param f the input field value to convert
|
139
|
+
# @param info the CSV field info
|
140
|
+
# @return the converted value
|
141
|
+
def convert(f, info)
|
142
|
+
return if f.nil?
|
143
|
+
# the block has precedence
|
144
|
+
value = yield(f, info) if block_given?
|
145
|
+
# integer conversion
|
146
|
+
value ||= Integer(f) if f =~ /^[1-9]\d*$/
|
147
|
+
# date conversion
|
148
|
+
value ||= convert_date(f) if f =~ CsvIO::DateMatcher
|
149
|
+
# float conversion
|
150
|
+
value ||= (Float(f) rescue f) if f =~ /^\d+\.\d*$/ or f =~ /^\d*\.\d+$/
|
151
|
+
# return converted value or the input field if there was no conversion
|
152
|
+
value || f
|
153
|
+
end
|
154
|
+
|
155
|
+
# @param [String] the input field value
|
156
|
+
# @return [Date] the converted date
|
157
|
+
def convert_date(f)
|
158
|
+
# If input value is in dd-mmm-yy format, then reformat.
|
159
|
+
# Otherwise, parse as a Date if possible.
|
160
|
+
if f =~ /^\d{1,2}-\w{3}-\d{2,4}$/ then
|
161
|
+
ddmmyy = reformat_dd_mmm_yy_date(f) || return
|
162
|
+
convert_date(ddmmyy)
|
163
|
+
# elsif f =~ /^\w{3} \d{1,2}, \d{4}$/ then
|
164
|
+
# ddmmyy = reformat_mmm_dd_yyyy_date(f) || return
|
165
|
+
# convert_date(ddmmyy)
|
166
|
+
else
|
167
|
+
Date.parse(f, true) rescue nil
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# @param [String] the input field value in dd-mmm-yy format
|
172
|
+
# @return [String] the reformatted date String in mm/dd/yy format
|
173
|
+
def reformat_dd_mmm_yy_date(f)
|
174
|
+
all, dd, mmm, yy = /^(\d{1,2})-([[:alpha:]]{3})-(\d{2,4})$/.match(f).to_a
|
175
|
+
mm = MMM_MM_MAP[mmm.downcase] || return
|
176
|
+
"#{mm}/#{dd}/#{yy}"
|
177
|
+
end
|
178
|
+
# # @param [String] the input field value in 'mmmd d, yyyy' format
|
179
|
+
# # @return [String] the reformatted date String in mm/dd/yyyy format
|
180
|
+
# def reformat_mmm_dd_yyyy_date(f)
|
181
|
+
# all, mmm, dd, yyyy = /^(\w{3}) (\d{1,2}), (\d{4})$/.match(f).to_a
|
182
|
+
# mm = MMM_MM_MAP[mmm.downcase] || return
|
183
|
+
# "#{mm}/#{dd}/#{yyyy}"
|
184
|
+
# end
|
185
|
+
end
|