caruby-core 1.5.5 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (126) hide show
  1. data/Gemfile +9 -0
  2. data/History.md +5 -1
  3. data/lib/caruby.rb +3 -5
  4. data/lib/caruby/caruby-src.tar.gz +0 -0
  5. data/lib/caruby/database.rb +53 -69
  6. data/lib/caruby/database/application_service.rb +25 -0
  7. data/lib/caruby/database/cache.rb +60 -0
  8. data/lib/caruby/database/fetched_matcher.rb +52 -38
  9. data/lib/caruby/database/lazy_loader.rb +4 -4
  10. data/lib/caruby/database/operation.rb +34 -0
  11. data/lib/caruby/database/persistable.rb +171 -86
  12. data/lib/caruby/database/persistence_service.rb +32 -34
  13. data/lib/caruby/database/persistifier.rb +100 -43
  14. data/lib/caruby/database/reader.rb +107 -85
  15. data/lib/caruby/database/reader_template_builder.rb +60 -0
  16. data/lib/caruby/database/saved_matcher.rb +3 -3
  17. data/lib/caruby/database/sql_executor.rb +88 -17
  18. data/lib/caruby/database/writer.rb +213 -177
  19. data/lib/caruby/database/writer_template_builder.rb +334 -0
  20. data/lib/caruby/{util → helpers}/controlled_value.rb +0 -0
  21. data/lib/caruby/{util → helpers}/coordinate.rb +4 -4
  22. data/lib/caruby/{util → helpers}/person.rb +3 -3
  23. data/lib/caruby/{util → helpers}/properties.rb +7 -9
  24. data/lib/caruby/{util → helpers}/roman.rb +2 -2
  25. data/lib/caruby/{util → helpers}/version.rb +1 -1
  26. data/lib/caruby/json/deserializer.rb +2 -2
  27. data/lib/caruby/json/serializer.rb +49 -7
  28. data/lib/caruby/metadata.rb +30 -0
  29. data/lib/caruby/metadata/java_property.rb +21 -0
  30. data/lib/caruby/metadata/propertied.rb +191 -0
  31. data/lib/caruby/metadata/property.rb +22 -0
  32. data/lib/caruby/metadata/property_characteristics.rb +201 -0
  33. data/lib/caruby/migration/migratable.rb +11 -182
  34. data/lib/caruby/rdbi/driver/jdbc.rb +446 -0
  35. data/lib/caruby/resource.rb +20 -823
  36. data/lib/caruby/version.rb +1 -1
  37. data/test/lib/caruby/database/cache_test.rb +54 -0
  38. data/test/lib/caruby/{util → helpers}/controlled_value_test.rb +3 -5
  39. data/test/lib/caruby/{util → helpers}/person_test.rb +4 -6
  40. data/test/lib/caruby/helpers/properties_test.rb +34 -0
  41. data/test/lib/caruby/{util → helpers}/roman_test.rb +2 -3
  42. data/test/lib/caruby/{util → helpers}/version_test.rb +2 -3
  43. data/test/lib/helper.rb +7 -0
  44. metadata +161 -214
  45. data/lib/caruby/cli/application.rb +0 -36
  46. data/lib/caruby/cli/command.rb +0 -202
  47. data/lib/caruby/csv/csv_mapper.rb +0 -159
  48. data/lib/caruby/csv/csvio.rb +0 -203
  49. data/lib/caruby/database/search_template_builder.rb +0 -56
  50. data/lib/caruby/database/store_template_builder.rb +0 -278
  51. data/lib/caruby/domain.rb +0 -193
  52. data/lib/caruby/domain/attribute.rb +0 -584
  53. data/lib/caruby/domain/attributes.rb +0 -628
  54. data/lib/caruby/domain/dependency.rb +0 -225
  55. data/lib/caruby/domain/id_alias.rb +0 -22
  56. data/lib/caruby/domain/importer.rb +0 -183
  57. data/lib/caruby/domain/introspection.rb +0 -176
  58. data/lib/caruby/domain/inverse.rb +0 -172
  59. data/lib/caruby/domain/inversible.rb +0 -90
  60. data/lib/caruby/domain/java_attribute.rb +0 -173
  61. data/lib/caruby/domain/merge.rb +0 -185
  62. data/lib/caruby/domain/metadata.rb +0 -142
  63. data/lib/caruby/domain/mixin.rb +0 -35
  64. data/lib/caruby/domain/properties.rb +0 -95
  65. data/lib/caruby/domain/reference_visitor.rb +0 -428
  66. data/lib/caruby/domain/uniquify.rb +0 -50
  67. data/lib/caruby/import/java.rb +0 -387
  68. data/lib/caruby/migration/migrator.rb +0 -918
  69. data/lib/caruby/migration/resource_module.rb +0 -9
  70. data/lib/caruby/migration/uniquify.rb +0 -17
  71. data/lib/caruby/util/attribute_path.rb +0 -44
  72. data/lib/caruby/util/cache.rb +0 -56
  73. data/lib/caruby/util/class.rb +0 -149
  74. data/lib/caruby/util/collection.rb +0 -1152
  75. data/lib/caruby/util/domain_extent.rb +0 -46
  76. data/lib/caruby/util/file_separator.rb +0 -65
  77. data/lib/caruby/util/inflector.rb +0 -27
  78. data/lib/caruby/util/log.rb +0 -95
  79. data/lib/caruby/util/math.rb +0 -12
  80. data/lib/caruby/util/merge.rb +0 -59
  81. data/lib/caruby/util/module.rb +0 -18
  82. data/lib/caruby/util/options.rb +0 -97
  83. data/lib/caruby/util/partial_order.rb +0 -35
  84. data/lib/caruby/util/pretty_print.rb +0 -204
  85. data/lib/caruby/util/stopwatch.rb +0 -74
  86. data/lib/caruby/util/topological_sync_enumerator.rb +0 -62
  87. data/lib/caruby/util/transitive_closure.rb +0 -55
  88. data/lib/caruby/util/tree.rb +0 -48
  89. data/lib/caruby/util/trie.rb +0 -37
  90. data/lib/caruby/util/uniquifier.rb +0 -30
  91. data/lib/caruby/util/validation.rb +0 -20
  92. data/lib/caruby/util/visitor.rb +0 -365
  93. data/lib/caruby/util/weak_hash.rb +0 -36
  94. data/test/lib/caruby/csv/csv_mapper_test.rb +0 -40
  95. data/test/lib/caruby/csv/csvio_test.rb +0 -69
  96. data/test/lib/caruby/database/persistable_test.rb +0 -92
  97. data/test/lib/caruby/domain/domain_test.rb +0 -112
  98. data/test/lib/caruby/domain/inversible_test.rb +0 -99
  99. data/test/lib/caruby/domain/reference_visitor_test.rb +0 -130
  100. data/test/lib/caruby/import/java_test.rb +0 -80
  101. data/test/lib/caruby/import/mixed_case_test.rb +0 -14
  102. data/test/lib/caruby/migration/test_case.rb +0 -102
  103. data/test/lib/caruby/test_case.rb +0 -230
  104. data/test/lib/caruby/util/cache_test.rb +0 -23
  105. data/test/lib/caruby/util/class_test.rb +0 -61
  106. data/test/lib/caruby/util/collection_test.rb +0 -398
  107. data/test/lib/caruby/util/command_test.rb +0 -55
  108. data/test/lib/caruby/util/domain_extent_test.rb +0 -60
  109. data/test/lib/caruby/util/file_separator_test.rb +0 -30
  110. data/test/lib/caruby/util/inflector_test.rb +0 -12
  111. data/test/lib/caruby/util/lazy_hash_test.rb +0 -34
  112. data/test/lib/caruby/util/merge_test.rb +0 -83
  113. data/test/lib/caruby/util/module_test.rb +0 -25
  114. data/test/lib/caruby/util/options_test.rb +0 -59
  115. data/test/lib/caruby/util/partial_order_test.rb +0 -42
  116. data/test/lib/caruby/util/pretty_print_test.rb +0 -85
  117. data/test/lib/caruby/util/properties_test.rb +0 -50
  118. data/test/lib/caruby/util/stopwatch_test.rb +0 -18
  119. data/test/lib/caruby/util/topological_sync_enumerator_test.rb +0 -69
  120. data/test/lib/caruby/util/transitive_closure_test.rb +0 -67
  121. data/test/lib/caruby/util/tree_test.rb +0 -23
  122. data/test/lib/caruby/util/trie_test.rb +0 -14
  123. data/test/lib/caruby/util/visitor_test.rb +0 -278
  124. data/test/lib/caruby/util/weak_hash_test.rb +0 -45
  125. data/test/lib/examples/clinical_trials/migration/migration_test.rb +0 -58
  126. data/test/lib/examples/clinical_trials/migration/test_case.rb +0 -38
@@ -1,36 +0,0 @@
1
- require 'logger'
2
- require 'caruby/util/log'
3
-
4
- module CaRuby
5
- module CLI
6
- # Extends the standard Logger::Application to use the {Log} and add start
7
- # functionality.
8
- class Application < Logger::Application
9
- # @param [String] appname the application name
10
- def initialize(appname=nil)
11
- super(appname)
12
- @log = Log::instance.logger
13
- @log.progname = @appname
14
- @level = @log.level
15
- end
16
-
17
- # Overrides Logger::Application start with the following enhancements:
18
- # * pass arguments and a block to the application run method
19
- # * improve the output messages
20
- # * print an exception to stderr as well as the log
21
- def start(*args, &block)
22
- # Adapted from Logger.
23
- status = 1
24
- begin
25
- log(INFO, "Starting #{@appname}...")
26
- status = run(*args, &block)
27
- rescue
28
- log(FATAL, "#{@appname} detected an exception: #{$!}\n#{$@.qp}")
29
- $stderr.puts "#{@appname} was unsuccessful: #{$!}.\nSee the log #{Log.instance.file} for more information."
30
- ensure
31
- log(INFO, "#{@appname} completed with status #{status}.")
32
- end
33
- end
34
- end
35
- end
36
- end
@@ -1,202 +0,0 @@
1
- require 'optparse'
2
- require 'caruby/cli/application'
3
-
4
- module CaRuby
5
- module CLI
6
- # Command-line parsing errors.
7
- class CommandError < StandardError; end
8
-
9
- # Command-line parser and executor.
10
- class Command < Application
11
- # Command line application wrapper.
12
- # The specs parameter is an array of command line option and argument
13
- # specifications as follows:
14
- #
15
- # Each option specification is an array in the form:
16
- # [option, short, long, class, description]
17
- # where:
18
- # * option is the option symbol, e.g. +:output+
19
- # * short is the short option form, e.g. "-o"
20
- # * long is the long option form, e.g. "--output FILE"
21
- # * class is the option value class, e.g. Integer
22
- # * description is the option usage, e.g. "Output file"
23
- # The option, long and description items are required; the short and class items can
24
- # be omitted.
25
- #
26
- # Each command line argument specification is an array in the form:
27
- # [arg, text]
28
- # where:
29
- # * arg is the argument symbol, e.g. +:input+
30
- # * text is the usage message text, e.g. 'input', '[input]' or 'input ...'
31
- # The arg and description items are required.
32
- #
33
- # Built-in options include the following:
34
- # * --help : print the help message and exit
35
- # * --verbose : print additional information to the console
36
- # * --log FILE : log file
37
- # * --debug : print debug messages to the log
38
- # * --file FILE: configuration file containing other options
39
- # * --quiet: suppress printing messages to stdout
40
- #
41
- # This class processes these built-in options, with the exception of +--version+,
42
- # which is a subclass responsibility. Subclasses are responsible for
43
- # processing any remaining options.
44
- #
45
- # @param [(<Symbol>, <String, Class>)] specs the arguments and options
46
- # described above
47
- # @yield (see #run)
48
- # @yieldparam (see #run)
49
- def initialize(specs=[], &executor)
50
- @executor = executor
51
- # Options start with a dash, arguments are whatever is left.
52
- @opt_specs, @arg_specs = specs.partition { |spec| spec[1][0, 1] == '-' }
53
- # Add the default option specifications.
54
- @opt_specs.concat(DEF_OPTS)
55
- # The application name is the command.
56
- super($0)
57
- end
58
-
59
- # Runs this command by calling the block given to this method, if provided,
60
- # otherwise the block given to {#initialize}
61
- # option or argument symbol => value hash.
62
- # @yield [hash] the command execution block
63
- # @yieldparam [{Symbol => Object}] hash the argument and option symbol => value hash
64
- def run
65
- # the option => value hash
66
- opts = get_opts
67
- # this base class's options
68
- handle_options(opts)
69
- # add the argument => value hash
70
- opts.merge!(get_args)
71
- # call the block
72
- block_given? ? yield(opts) : call_executor(opts)
73
- end
74
-
75
- private
76
-
77
- # The default options that apply to all commands.
78
- DEF_OPTS = [
79
- [:help, "-h", "--help", "Display this help message"],
80
- [:file, "--file FILE", "Configuration file containing other options"],
81
- [:log, "--log FILE", "Log file"],
82
- [:debug, "--debug", "Display debug log messages"],
83
- [:quiet, "-q", "--quiet", "Suppress printing messages to stdout"],
84
- [:verbose, "-v", "--verbose", "Print additional messages to stdout"]
85
- ]
86
-
87
- # @param [{Symbol => Object}] opts the option => value hash
88
- def call_executor(opts)
89
- if @executor.nil? then raise CommandError.new("Command #{self} does not have an execution block") end
90
- @executor.call(opts)
91
- end
92
-
93
- # Collects the command line options.
94
- #
95
- # @return [{Symbol => Object}] the option => value hash
96
- def get_opts
97
- # the options hash
98
- opts = {}
99
- # the option parser
100
- OptionParser.new do |parser|
101
- # The help argument string is comprised of the argument specification labels.
102
- arg_s = @arg_specs.map { |spec| spec[1] }.join(' ')
103
- # Build the usage message.
104
- parser.banner = "Usage: #{parser.program_name} [options] #{arg_s}"
105
- parser.separator ""
106
- parser.separator "Options:"
107
- # parse the options
108
- opts = parse(parser)
109
- # grab the usage message
110
- @usage = parser.help
111
- end
112
- opts
113
- end
114
-
115
- # Collects the non-option command line arguments.
116
- #
117
- # @return [{Symbol => Object}] the argument => value hash
118
- def get_args
119
- return Hash::EMPTY_HASH if ARGV.empty?
120
- if @arg_specs.empty? then too_many_arguments end
121
- # Collect the arguments from the command line.
122
- args = {}
123
- # The number of command line arguments or all but the last argument specifications,
124
- # whichever is less. The last argument can have more than one value, indicated by
125
- # the argument specification form '...', so it is processed separately below.
126
- n = [ARGV.size, @arg_specs.size - 1].min
127
- # the single-valued arguments
128
- n.times { |i| args[@arg_specs[i].first] = ARGV[i] }
129
- # Process the last argument.
130
- if n < ARGV.size then
131
- arg, form = @arg_specs.last
132
- # A multi-valued last argument is the residual command argument array.
133
- # A single-valued last argument is the last value, if there is exactly one.
134
- # Otherwise, there are too many arguments.
135
- if form.index('...') then
136
- args[arg] = ARGV[n..-1]
137
- elsif @arg_specs.size == ARGV.size then
138
- args[arg] = ARGV[n]
139
- else
140
- too_many_arguments
141
- end
142
- end
143
- args
144
- end
145
-
146
- def too_many_arguments
147
- halt("Too many arguments - expected #{@arg_specs.size}, found: #{ARGV.join(' ')}.", 1)
148
- end
149
-
150
- # @param [OptionParser] parser the option parser
151
- # @return [{Symbol => Object}] the option => value hash
152
- def parse(parser)
153
- opts = {}
154
- @opt_specs.each do |opt, *spec|
155
- parser.on_tail(*spec) { |v| opts[opt] = v }
156
- end
157
- # build the option => value hash
158
- parser.parse!
159
- opts
160
- end
161
-
162
- # Processes the built-in options.
163
- #
164
- # @param [{Symbol => Object}] the option => value hash
165
- def handle_options(opts)
166
- # if help, then print usage and exit
167
- if opts[:help] then halt end
168
-
169
- # open the log file
170
- log = opts[:log]
171
- debug = opts[:debug]
172
- if log then
173
- CaRuby::Log.instance.open(log, :debug => debug)
174
- elsif debug then
175
- logger.level = Logger::DEBUG
176
- end
177
-
178
- # if there is a file option, then load additional options from the file
179
- file = opts.delete(:file)
180
- if file then
181
- props = CaRuby::Properties.new(file)
182
- props.each { |opt, arg| ARGV << "--#{opt}" << arg }
183
- OptionParser.new do |p|
184
- opts.merge!(parse(p)) { |ov, nv| ov ? ov : nv }
185
- end
186
- end
187
- end
188
-
189
- # Prints the given error message and the program usage, then exits with status 1.
190
- def fail(message=nil)
191
- halt(message, 1)
192
- end
193
-
194
- # Prints the given message and program usage, then exits with the given status.
195
- def halt(message=nil, status=0)
196
- puts(message) if message
197
- puts(@usage)
198
- exit(status)
199
- end
200
- end
201
- end
202
- end
@@ -1,159 +0,0 @@
1
- require 'caruby/csv/csvio'
2
- require 'caruby/util/properties'
3
-
4
- module CaRuby
5
- # Maps a CSV extract to a caBIG application.
6
- #
7
- # _Note_: CsvMapper is an experimental class used only by the CaTissue::Extractor.
8
- class CsvMapper
9
- attr_reader :csvio, :classes
10
-
11
- # Creates a new CsvMapper from the following parameters:
12
- # * the required mapping configuration file config
13
- # * the required target class
14
- # * the required CSV file name
15
- # * additional CsvIO options as desired
16
- #
17
- # If the converter block is given to this method, then that block is called to convert
18
- # source CSV field values as described in the FasterCSV.
19
- def initialize(config, target, csv, options={}, &converter) # :yields: value, info
20
- @target = target
21
- # load the config
22
- fld_path_hash = load_config(config)
23
- # the default input fields are obtained by CsvIO from the first line of the input;
24
- # the default output fields are the field mapping config keys in order
25
- options[:headers] ||= config_headers(config) if options[:mode] =~ /^w/
26
- # the CSV wrapper; do this before making the header map since the CsvIO-generated headers
27
- # are used to build the header map
28
- @csvio = CsvIO.new(csv, options) do |value, info|
29
- # nonstring headers are determined later in this initializer
30
- if value and @string_headers.include?(info.header) then
31
- value
32
- elsif block_given? then
33
- # call custom converter first, if any
34
- yield(value, info)
35
- end
36
- end
37
- # the class => paths hash; populated in map_headers
38
- @cls_paths_hash = LazyHash.new { Set.new }
39
- # the path => header hash; do this after making the CsvIO
40
- @cls_paths_hash, @hdr_map = map_headers(fld_path_hash)
41
- # the top-level classes
42
- klasses = @cls_paths_hash.keys
43
- # include the target class
44
- @cls_paths_hash[@target] ||= Set.new
45
- # add superclass paths into subclass paths
46
- @cls_paths_hash.each do |klass, paths|
47
- @cls_paths_hash.each { |other, other_paths| paths.merge!(other_paths) if klass < other }
48
- end
49
- # include only concrete classes
50
- @classes = @cls_paths_hash.keys
51
- @cls_paths_hash.delete_if do |klass, paths|
52
- klass.abstract? or klasses.any? { |other| other < klass }
53
- end
54
- # collect the non-string input fields for the custom CSVLoader converter
55
- @string_headers = Set.new
56
- @hdr_map.each do |path, cls_hdr_hash|
57
- last = path.last
58
- @string_headers.merge!(cls_hdr_hash.values) if Attribute === last and last.type == String
59
- end
60
- end
61
-
62
- # Returns the given klass's mapped Attribute paths.
63
- # The default klass is the target class.
64
- def paths(klass=nil)
65
- klass ||= @target
66
- @cls_paths_hash[klass]
67
- end
68
-
69
- # Returns the header mapped by the given Attribute path and starting klass.
70
- # The default klass is the target class.
71
- def header(path, klass=nil)
72
- klass ||= @target
73
- @hdr_map[path][klass]
74
- end
75
-
76
- private
77
-
78
- # Returns the field => path list hash from the field mapping configuration file.
79
- def load_config(file)
80
- begin
81
- config = YAML::load_file(file)
82
- rescue
83
- raise ConfigurationError.new("Could not read field mapping configuration file #{file}: " + $!)
84
- end
85
- end
86
-
87
- def config_headers(config)
88
- File.open(config) do |file|
89
- file.map { |line| line[/(^.+):/, 1] }.compact
90
- end
91
- end
92
-
93
- # @param [{Symbol => <Attribute>}] config the field => path list configuration
94
- # @return [({Symbol => <Attribute>}, {Class => {<Attribute> => Symbol>}})]
95
- # the class => paths hash and the path => class => header hash
96
- def map_headers(config)
97
- # the class => paths hash; populated in map_headers
98
- cls_paths_hash = LazyHash.new { Set.new }
99
- hdr_map = LazyHash.new { Hash.new }
100
- config.each do |field, attr_list|
101
- next if attr_list.blank?
102
- # the header accessor method for the field
103
- header = @csvio.accessor(field)
104
- raise ConfigurationError.new("Field defined in field mapping configuration not found: #{field}") if header.nil?
105
- attr_list.split(/,\s*/).each do |path_s|
106
- klass, path = create_attribute_path(path_s)
107
- hdr_map[path][klass] = header
108
- # associate the class with the path
109
- cls_paths_hash[klass] << path
110
- end
111
- end
112
- [cls_paths_hash, hdr_map]
113
- end
114
-
115
- # Returns an array of Attribute or symbol objects for the period-delimited path string path_s in the
116
- # pattern (_class_|_attribute_)(+.+_attribute_)*, e.g.:
117
- # ClinicalStudy.status
118
- # study.status
119
- # The default starting class is this CvsMapper's target class.
120
- # Raises ConfigurationError if the path string is malformed or an attribute is not found.
121
- def create_attribute_path(path_s)
122
- names = path_s.split('.')
123
- # if the path starts with a capitalized class name, then resolve the class.
124
- # otherwise, the target class is the start of the path.
125
- klass = names.first =~ /^[A-Z]/ ? @target.domain_module.const_get(names.shift) : @target
126
- # there must be at least one attribute
127
- if names.empty? then
128
- raise ConfigurationError.new("Attribute entry in CSV field mapping is not in <class>.<attribute> format: #{value}")
129
- end
130
- # build the Attribute path by traversing the names path
131
- # if the name corresponds to a parent attribute, then add the attribute metadata.
132
- # otherwise, if the name is a method, then add the method.
133
- path = []
134
- names.inject(klass) do |parent, name|
135
- attr_md = parent.class.attribute_metadata(name) rescue nil
136
- if attr_md then
137
- # name is an attribute: add the attribute metadata and navigate to the attribute domain type
138
- path << attr_md
139
- attr_md.type
140
- elsif parent.method_defined?(name) then
141
- # name is not a pre-defined attribute but is a method: add the method symbol to the path and halt traversal
142
- path << name.to_sym
143
- break
144
- else
145
- # method not defined
146
- raise ConfigurationError.new("CSV field mapping attribute not found: #{parent.qp}.#{name}")
147
- end
148
- end
149
- # add remaining non-attribute symbols
150
- tail = names[path.size..-1].map { |name| name.to_sym }
151
- path.concat(tail)
152
- # return the starting class and path
153
- # Note that the starting class is not necessarily the first path Attribute declarer, since the
154
- # starting class could be a concrete subclass of an abstract declarer. this is important, since the class
155
- # must be instantiated.
156
- [klass, path]
157
- end
158
- end
159
- end
@@ -1,203 +0,0 @@
1
- require 'rubygems'
2
- gem 'fastercsv'
3
-
4
- require 'fileutils'
5
- require 'faster_csv'
6
- require 'caruby/util/options'
7
- require 'caruby/util/collection'
8
-
9
- module CaRuby
10
- # CsvIO reads or writes CSV records.
11
- # This class wraps a FasterCSV with the following modifications:
12
- # * relax the date parser to allow dd/mm/yyyy dates
13
- # * don't convert integer text with a leading zero to an octal number
14
- # * allow one custom converter with different semantics: if the converter block
15
- # call returns nil, then continue conversion, otherwise return the converter
16
- # result. This differs from FasterCSV converter semantics which calls converters
17
- # as long the result equals the input field value. The CsvIO converter semantics
18
- # supports converters that intend a String result to be the converted result.
19
- #
20
- # CsvIO is Enumerable, but does not implement the complete Ruby IO interface.
21
- class CsvIO
22
- include Enumerable
23
-
24
- # Returns the CSV field access header symbols.
25
- attr_reader :headers
26
-
27
- # Opens the CSV file and calls the given block with this CsvIO as the argument.
28
- #
29
- # @param (see #initialize)
30
- # @option (see #initialize)
31
- # @yield [csvio] the optional block to execute
32
- # @yieldparam [CsvIO] csvio the open CSVIO instance
33
- def self.open(file, opts=nil)
34
- csvio = new(file, opts)
35
- if block_given? then
36
- yield csvio
37
- csvio.close
38
- end
39
- end
40
-
41
- # Opens the given CSV file and calls {#each} with the given block.
42
- #
43
- # @param (see #initialize)
44
- # @option (see #initialize)
45
- # @yield [row] the block to execute on the row
46
- # @yieldparam [{Symbol => Object}] row the field symbol => value hash
47
- def self.foreach(file, opts=nil, &block)
48
- open(file, opts) { |csvio| csvio.each(&block) }
49
- end
50
-
51
- # Creates a new CsvIO for the specified source file.
52
- # If a converter block is given, then it is added to the CSV converters list.
53
- #
54
- # @param [String] file the input CSV file to open
55
- # @param [Hash] opts the open options
56
- # @option opts [String] :mode the input mode (default +r+)
57
- # @option opts [String] :headers the input field headers
58
- # @yield [value, info] converts the input value
59
- # @yieldparam [String] value the input value
60
- # @yieldparam info the current field's FasterCSV FieldInfo metadata
61
- def initialize(file, opts=nil, &converter)
62
- # the CSV file open mode
63
- mode = Options.get(:mode, opts, 'r')
64
- # the CSV headers option; can be boolean or array
65
- hdr_opt = Options.get(:headers, opts)
66
- # there is a header record by default for an input CSV file
67
- hdr_opt ||= true if mode =~ /^r/
68
- # make parent directories if necessary for an output CSV file
69
- File.makedirs(File.dirname(file)) if mode =~ /^w/
70
- # if headers aren't given, then convert the input CSV header record names to underscore symbols
71
- hdr_cvtr = :symbol unless Enumerable === hdr_opt
72
- # make a custom converter
73
- custom = Proc.new { |value, info| convert(value, info, &converter) }
74
- # open the CSV file
75
- @csv = FasterCSV.open(file, mode, :headers => hdr_opt, :header_converters => hdr_cvtr, :return_headers => true, :write_headers => true, :converters => custom)
76
- # the header => field name hash:
77
- # if the header option is set to true, then read the input header line.
78
- # otherwise, parse an empty string which mimics an input header line.
79
- hdr_row = case hdr_opt
80
- when true then
81
- @csv.shift
82
- when Enumerable then
83
- ''.parse_csv(:headers => hdr_opt, :header_converters => :symbol, :return_headers => true)
84
- else
85
- raise ArgumentError.new("CSV headers option value not supported: #{hdr_opt}")
86
- end
87
- # the header row headers
88
- @headers = hdr_row.headers
89
- # the header name => symbol map
90
- @hdr_sym_hash = hdr_row.to_hash.invert
91
- end
92
-
93
- # Closes the CSV file and trash file if necessary.
94
- def close
95
- @csv.close
96
- @trash.close if @trash
97
- end
98
-
99
- # Returns the header accessor method for the given input header name.
100
- def accessor(header)
101
- @hdr_sym_hash[header]
102
- end
103
-
104
- # Sets the trash output file. This creates a separate CSV output file distinct from the input CSV file.
105
- # This is useful for writing rejected rows from the input. The output file has a header row.
106
- def trash=(file)
107
- @trash = FasterCSV.open(file, 'w', :headers => true, :header_converters => :symbol, :write_headers => true)
108
- end
109
-
110
- # Writes the row to the trash file if the trash file is set.
111
- #
112
- #@param [{Symbol => Object}] row the rejected input row
113
- def reject(row)
114
- @trash << row if @trash
115
- end
116
-
117
- # Iterates over each CSV row, yielding a row for each iteration.
118
- # This method closes the CSV file after the iteration completes.
119
- def each
120
- begin
121
- # parse each line
122
- @csv.each { |row| yield row }
123
- ensure
124
- close
125
- end
126
- end
127
-
128
- # @return the next CSV row
129
- # @see #each
130
- def read
131
- @csv.shift
132
- end
133
-
134
- alias :shift :read
135
-
136
- # Writes the given row to the CSV file.
137
- #
138
- #@param [{Symbol => Object}] row the input row
139
- def write(row)
140
- @csv << row
141
- end
142
-
143
- alias :<< :write
144
-
145
- private
146
-
147
- # 3-letter months => month sequence hash.
148
- MMM_MM_MAP = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'].to_compact_hash_with_index do |mmm, index|
149
- index < 9 ? ('0' + index.succ.to_s) : index.succ.to_s
150
- end
151
-
152
- # DateMatcher relaxes the FasterCSV DateMatcher to allow dd/mm/yyyy dates.
153
- DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} | \d{1,2}-\w{3}-\d{2,4} | \d{4}[-\/]\d{1,2}[-\/]\d{1,2} | \d{1,2}[-\/]\d{1,2}[-\/]\d{2,4} )\z /x
154
-
155
- # @param f the input field value to convert
156
- # @param info the CSV field info
157
- # @return the converted value
158
- def convert(f, info)
159
- return if f.nil?
160
- # the block has precedence
161
- value = yield(f, info) if block_given?
162
- # integer conversion
163
- value ||= Integer(f) if f =~ /^[1-9]\d*$/
164
- # date conversion
165
- value ||= convert_date(f) if f =~ CsvIO::DateMatcher
166
- # float conversion
167
- value ||= (Float(f) rescue f) if f =~ /^\d+\.\d*$/ or f =~ /^\d*\.\d+$/
168
- # return converted value or the input field if there was no conversion
169
- value || f
170
- end
171
-
172
- # @param [String] the input field value
173
- # @return [Date] the converted date
174
- def convert_date(f)
175
- # If input value is in dd-mmm-yy format, then reformat.
176
- # Otherwise, parse as a Date if possible.
177
- if f =~ /^\d{1,2}-\w{3}-\d{2,4}$/ then
178
- ddmmyy = reformat_dd_mmm_yy_date(f) || return
179
- convert_date(ddmmyy)
180
- # elsif f =~ /^\w{3} \d{1,2}, \d{4}$/ then
181
- # ddmmyy = reformat_mmm_dd_yyyy_date(f) || return
182
- # convert_date(ddmmyy)
183
- else
184
- Date.parse(f, true) rescue nil
185
- end
186
- end
187
-
188
- # @param [String] the input field value in dd-mmm-yy format
189
- # @return [String] the reformatted date String in mm/dd/yy format
190
- def reformat_dd_mmm_yy_date(f)
191
- all, dd, mmm, yy = /^(\d{1,2})-([[:alpha:]]{3})-(\d{2,4})$/.match(f).to_a
192
- mm = MMM_MM_MAP[mmm.downcase] || return
193
- "#{mm}/#{dd}/#{yy}"
194
- end
195
- # # @param [String] the input field value in 'mmmd d, yyyy' format
196
- # # @return [String] the reformatted date String in mm/dd/yyyy format
197
- # def reformat_mmm_dd_yyyy_date(f)
198
- # all, mmm, dd, yyyy = /^(\w{3}) (\d{1,2}), (\d{4})$/.match(f).to_a
199
- # mm = MMM_MM_MAP[mmm.downcase] || return
200
- # "#{mm}/#{dd}/#{yyyy}"
201
- # end
202
- end
203
- end