rsmart_toolbox 0.5 → 0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b2e7e3f98bbbd519f2f57c8c5a9da33cca0a3de3
4
- data.tar.gz: c90a54912dc4d9c536337697a13c028bbdfb8011
3
+ metadata.gz: 27fabd26f64bc774e3d4bbfefd30d7cc519bd561
4
+ data.tar.gz: 888167314f2c75e27f67461cdf8592f9f394440f
5
5
  SHA512:
6
- metadata.gz: 34892dcd8adf786319d6d4261fc291bda5cdeb4903c78ed8fcc74b0de74cc734c192886ce809b2c9a91e40f7916b21941214be55a32c7832787dbccc92a56b05
7
- data.tar.gz: 6ec1f8e88e4a45aee20efb607c05169dee3994262093367915abc8a25ca138f850aa043672fa353d2c33a3b90db41ac208043d22e4cfdbfb36fc235831bb2065
6
+ metadata.gz: e75b6da705654223a6ff1b8f36eb9d049c46511448d7eb8da9d2512f27b82b63f4c006d15dc85c03c592b7e713e7ec41687da6156581eba30754d4719294cb79
7
+ data.tar.gz: 3fc723d8d3595c88425cf35fdcd2a518f5c67bf2f71153ce9702a097660f84c75522a40ea7f8d20e2a35bf87041567bf57d33527e538df34e5bcf42505979597
checksums.yaml.gz.sig CHANGED
Binary file
data.tar.gz.sig CHANGED
Binary file
@@ -21,7 +21,8 @@ module Rsmart::ETL
21
21
  class TextParseError < StandardError
22
22
  end
23
23
 
24
- # Responds to String or Exception.
24
+ # @param [String, Exception] e the error to handle
25
+ # @return [Exception] an Exception with a message formatted with $INPUT_LINE_NUMBER.
25
26
  def self.error(e)
26
27
  if e.kind_of? String
27
28
  # default to TextParseError
@@ -33,7 +34,8 @@ module Rsmart::ETL
33
34
  raise ArgumentError, "Unsupported error type: #{e.class}"
34
35
  end
35
36
 
36
- # Responds to String or Exception.
37
+ # @param [String, Exception] e the warning to handle
38
+ # @return [Exception] an Exception with a message formatted with $INPUT_LINE_NUMBER.
37
39
  def self.warning(e)
38
40
  if e.kind_of? String
39
41
  # default to TextParseError
@@ -45,7 +47,14 @@ module Rsmart::ETL
45
47
  raise ArgumentError, "Unsupported error type: #{e.class}"
46
48
  end
47
49
 
48
- # Test to see if subject is a member of valid_values Array
50
+ # @param [String, #match] subject used for validity checking.
51
+ # @param [Array<Object>, Regexp] valid_values all of the possible valid values.
52
+ # @option opt [Boolean] :case_sensitive performs case sensitive matching
53
+ # @return [Boolean] true if the subject matches valid_values.
54
+ # FYI valid_values must respond to #casecmp.
55
+ # @raise [ArgumentError] if valid_values is nil or empty.
56
+ # @raise [ArgumentError] case sensitive matching only works for objects
57
+ # that respond to #casecmp; primarily String objects.
49
58
  def self.valid_value(subject, valid_values, opt={})
50
59
  raise ArgumentError, "valid_values must not be nil!" if valid_values.nil?
51
60
  if valid_values.kind_of? Regexp
@@ -54,7 +63,7 @@ module Rsmart::ETL
54
63
  if valid_values.kind_of? Array
55
64
  raise ArgumentError, "valid_values must have at least one element!" unless valid_values.length > 0
56
65
  if opt[:case_sensitive] == false # case insensitive comparison requested
57
- raise ArgumentError, "case_sensitive only supported for Strings!" unless subject.kind_of?(String)
66
+ raise ArgumentError, "Object must respond to #casecmp" unless subject.respond_to? 'casecmp'
58
67
  valid_values.each do |valid_value|
59
68
  return true if valid_value.casecmp(subject) == 0
60
69
  end
@@ -64,6 +73,10 @@ module Rsmart::ETL
64
73
  return false
65
74
  end
66
75
 
76
+ # @param [String] str String to be matched against well known boolean patterns.
77
+ # @option opt [Boolean] :default the default return value if str is empty.
78
+ # @return [Boolean] the result of matching the str input against well known boolean patterns.
79
+ # @raise [TextParseError] if none of the known boolean patterns could be matched.
67
80
  def self.parse_boolean(str, opt={})
68
81
  return true if str == true
69
82
  return false if str == false
@@ -79,22 +92,34 @@ module Rsmart::ETL
79
92
  raise Rsmart::ETL::error TextParseError.new "invalid value for Boolean: '#{str}'"
80
93
  end
81
94
 
82
- # Simply here to help ensure we consistently apply the same encoding options.
83
- def self.encode(str, opt={} )
95
+ # @param [String] str the String to be encoded and invalid characters replaced with valid characters.
96
+ # @option opt [String] :encoding the character encoding to use.
97
+ # @return [String] the result of encoding the String and replacing invalid characters with valid characters.
98
+ def self.encode(str, opt={ encoding: "UTF-8" } )
84
99
  opt[:encoding] = "UTF-8" if opt[:encoding].nil?
85
100
  str.encode( opt[:encoding], :invalid => :replace,
86
101
  :undef => :replace, :replace => "" )
87
102
  end
88
103
 
89
- # Matches MRI CSV specification:
104
+ # Matches the MRI CSV specification:
90
105
  # The header String is downcased, spaces are replaced with underscores,
91
106
  # non-word characters are dropped, and finally to_sym() is called.
107
+ # @param [String] str the String to be symbolized.
108
+ # @return [Symbol] String is downcased, spaces are replaced with underscores,
109
+ # non-word characters are dropped
110
+ # @raise [ArgumentError] if str is nil or empty.
92
111
  def self.to_symbol(str)
93
112
  raise ArgumentError, "Illegal symbol name: '#{str}'" if str.nil? || str.empty?
94
113
  encode( str.downcase.gsub(/\s+/, "_").gsub(/\W+/, "") ).to_sym
95
114
  end
96
115
 
97
- # DRY up some common string manipulation
116
+ # Mutates insert_str and values_str with column_name and value respectively.
117
+ # Proper SQL value quoting will be performed based on object type.
118
+ # @param [String] insert_str the left side of the insert statement (i.e. columns)
119
+ # @param [String] column_name the column name to append to insert_str.
120
+ # @param [String] values_str the right side of the insert statement (i.e. values)
121
+ # @param [Object] value the value to append to values_str. Must respond to #to_s.
122
+ # @return [void]
98
123
  def self.mutate_sql_stmt!(insert_str, column_name, values_str, value)
99
124
  insert_str.concat "#{column_name.upcase},"
100
125
  # TODO what are all of the valid types that should not be quoted?
@@ -103,8 +128,13 @@ module Rsmart::ETL
103
128
  else
104
129
  values_str.concat "'#{value}',"
105
130
  end
131
+ return nil
106
132
  end
107
133
 
134
+ # Prepares a String for a SQL statement where single quotes need to be escaped.
135
+ # @param [String] str the String to be escaped.
136
+ # @return [String, nil] the resulting String with single quotes escaped with a backslash.
137
+ # If a nil is passed, nil is returned.
108
138
  def self.escape_single_quotes(str)
109
139
  if str.nil?
110
140
  return nil
@@ -112,14 +142,31 @@ module Rsmart::ETL
112
142
  return str.to_s.gsub("'", "\\\\'")
113
143
  end
114
144
 
115
- def self.parse_string(str, opt={})
116
- opt[:strict] = true if opt[:strict].nil?
145
+ # @param [String] str the String to be parsed.
146
+ # @option opt [String, #to_s] :default the default return value if str is empty. Must respond to #to_s
147
+ # @option opt [Boolean] :escape_single_quotes escape single quote characters.
148
+ # @option opt [Integer] :length raise a TextParseError if str.length > :length.
149
+ # @option opt [String] :name the name of the field being parsed. Used only for error handling.
150
+ # @option opt [Boolean] :required raise a TextParseError if str is empty.
151
+ # @option opt [Boolean] :strict strict length checking will produce errors instead of warnings.
152
+ # @option opt [Array<Object>, Regexp] :valid_values all of the possible valid values.
153
+ # @return [String] the parsed results. nil or empty inputs will return the empty String by default(i.e. '').
154
+ # @raise [TextParseError] if the field is :required and found to be empty.
155
+ # @raise [TextParseError] if str.length > :length && :strict
156
+ # @raise [TextParseError] if str does not match :valid_values
157
+ # @example nil or empty inputs will return the empty String by default
158
+ # '' == parse_string(nil) && '' == parse_string('')
159
+ # @see valid_value
160
+ # @see escape_single_quotes
161
+ def self.parse_string(str, opt={ strict: true, required: false, escape_single_quotes: true })
162
+ opt[:strict] = true if opt[:strict].nil?
163
+ opt[:escape_single_quotes] = true if opt[:escape_single_quotes].nil?
117
164
  retval = encode str.to_s.strip
118
165
  if opt[:required] && retval.empty?
119
166
  raise Rsmart::ETL::error TextParseError.new "Required data element '#{opt[:name]}' not found: '#{str}'"
120
167
  end
121
168
  if opt[:default] && retval.empty?
122
- retval = opt[:default]
169
+ retval = opt[:default].to_s
123
170
  end
124
171
  if opt[:length] && retval.length > opt[:length].to_i
125
172
  detail = "#{opt[:name]}.length > #{opt[:length]}: '#{str}'-->'#{str[0..(opt[:length] - 1)]}'"
@@ -131,15 +178,36 @@ module Rsmart::ETL
131
178
  if opt[:valid_values] && ! valid_value(retval, opt[:valid_values], opt)
132
179
  raise Rsmart::ETL::error TextParseError.new "Illegal #{opt[:name]}: value '#{str}' not found in: #{opt[:valid_values]}"
133
180
  end
134
- return escape_single_quotes retval
181
+ if opt[:escape_single_quotes]
182
+ retval = escape_single_quotes retval
183
+ end
184
+ return retval
135
185
  end
136
186
 
187
+ # Helper method which finds the value by column :name and mutates the SQL statement accordingly.
188
+ # @param [CSV::Row] row the CSV Row being parsed
189
+ # @param [String] insert_str the left side of the insert statement (i.e. columns)
190
+ # @param [String] values_str the right side of the insert statement (i.e. values)
191
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
192
+ # @option opt [String] :name the name of the field being parsed. Required.
193
+ # @return [void]
194
+ # @raise [ArgumentError] :name is required.
195
+ # @see parse_string
196
+ # @see mutate_sql_stmt!
137
197
  def self.parse_string!(row, insert_str, values_str, opt={})
138
198
  raise ArgumentError, "opt[:name] is required!" unless opt[:name]
139
199
  str = parse_string( row[ to_symbol( opt[:name] ) ], opt )
140
200
  mutate_sql_stmt! insert_str, opt[:name], values_str, str
141
201
  end
142
202
 
203
+ # Parse an Integer from a String.
204
+ # @note Note the behavioral difference versus #to_i.
205
+ # @param [String] str the String to be parsed.
206
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
207
+ # @return [Integer, nil] the parsed Integer. nil or empty inputs will return nil by default.
208
+ # @example Unlike #to_i, nil or empty inputs will return nil by default
209
+ # nil == parse_integer(nil) && nil == parse_integer('') && 0 != parse_integer(nil)
210
+ # @see parse_string
143
211
  def self.parse_integer(str, opt={})
144
212
  s = parse_string str, opt
145
213
  if s.empty?
@@ -149,12 +217,30 @@ module Rsmart::ETL
149
217
  end
150
218
  end
151
219
 
220
+ # Helper method which finds the value by column :name and mutates the SQL statement accordingly.
221
+ # @param [CSV::Row] row the CSV Row being parsed
222
+ # @param [String] insert_str the left side of the insert statement (i.e. columns)
223
+ # @param [String] values_str the right side of the insert statement (i.e. values)
224
+ # @param [Hash] opt options Hash will be passed through to #parse_integer.
225
+ # @option opt [String] :name the name of the field being parsed. Required.
226
+ # @return [void]
227
+ # @raise [ArgumentError] :name is required.
228
+ # @see parse_integer
229
+ # @see mutate_sql_stmt!
152
230
  def self.parse_integer!(row, insert_str, values_str, opt={})
153
231
  raise ArgumentError, "opt[:name] is required!" unless opt[:name]
154
232
  i = parse_integer( row[ to_symbol( opt[:name] ) ], opt )
155
233
  mutate_sql_stmt! insert_str, opt[:name], values_str, i
156
234
  end
157
235
 
236
+ # Parse a Float from a String.
237
+ # @note Note the behavioral difference versus #to_f.
238
+ # @param [String] str the String to be parsed.
239
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
240
+ # @return [Float, nil] the parsed Float. nil or empty inputs will return nil by default.
241
+ # @example Unlike #to_f, nil or empty inputs will return nil by default
242
+ # nil == parse_float(nil) && nil == parse_float('') && 0.0 != parse_float(nil)
243
+ # @see parse_string
158
244
  def self.parse_float(str, opt={})
159
245
  s = parse_string str, opt
160
246
  if s.empty?
@@ -164,9 +250,14 @@ module Rsmart::ETL
164
250
  end
165
251
  end
166
252
 
167
- # Useful for parsing "flag" like values. Always returns upcase for consistency.
168
- # Assumes :strict :length of 1 by default.
169
- def self.parse_flag(str, opt={})
253
+ # Useful for parsing "flag" like values; i.e. usually single characters.
254
+ # @param [String] str the String to be parsed.
255
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
256
+ # @option opt [Integer] :length the maximum supported length of the field.
257
+ # @option opt [Boolean] :upcase if true upcase the results.
258
+ # @return [String] the parsed "flag".
259
+ # @see parse_string
260
+ def self.parse_flag(str, opt={ length: 1, upcase: true })
170
261
  opt[:length] = 1 if opt[:length].nil?
171
262
  opt[:upcase] = true if opt[:upcase].nil?
172
263
  retval = parse_string str, opt
@@ -174,32 +265,22 @@ module Rsmart::ETL
174
265
  return retval
175
266
  end
176
267
 
177
- # Designed specifically for actv_ind, but could be used on *any*
178
- # fields that matches /^(Y|N)$/i.
179
- def self.parse_actv_ind(str, opt={})
180
- # `ACTV_IND` varchar(1) COLLATE utf8_bin DEFAULT 'Y',
181
- opt[:name] = "actv_ind" if opt[:name].nil?
182
- opt[:default] = "Y" if opt[:default].nil?
183
- opt[:valid_values] = /^(Y|N)$/i if opt[:valid_values].nil?
184
- return parse_flag str, opt
185
- end
186
-
187
- # Designed specifically for actv_ind, but could be used on *any*
188
- # fields that matches /^(Y|N)$/i.
189
- def self.parse_actv_ind!(row, insert_str, values_str, opt={})
190
- # `ACTV_IND` varchar(1) COLLATE utf8_bin DEFAULT 'Y',
191
- opt[:name] = "actv_ind" if opt[:name].nil?
192
- actv_ind = Rsmart::ETL::parse_actv_ind row[ to_symbol( opt[:name] ) ]
193
- Rsmart::ETL::mutate_sql_stmt! insert_str, opt[:name], values_str, actv_ind
194
- end
195
-
196
268
  # Parse common command line options for CSV --> SQL transformations.
269
+ # @param [String] executable the name of the script from which we are executing. See example.
270
+ # @param [Array<String>] args the command line args.
271
+ # @option opt [String] :csv_filename the input file from which the CSV will be read.
272
+ # Defaults to the first element of args Array.
273
+ # @option opt [String] :sql_filename the output file to which the SQL will be written.
274
+ # @option opt [Hash] :csv_options the options that will be used by the CSV parser.
275
+ # @return [Hash] a Hash containing the parsed command line results.
276
+ # @example The most common usage:
277
+ # opt = Rsmart::ETL.parse_csv_command_line_options (File.basename $0), ARGF.argv
197
278
  def self.parse_csv_command_line_options(
198
279
  executable, args, opt={ csv_options: { headers: :first_row,
199
280
  header_converters: :symbol,
200
281
  skip_blanks: true,
201
- col_sep: ",", # comma by default
202
- quote_char: '"', # double quote by default
282
+ col_sep: ",",
283
+ quote_char: '"'
203
284
  }
204
285
  } )
205
286
  optparse = OptionParser.new do |opts|
@@ -208,10 +289,10 @@ module Rsmart::ETL
208
289
  opt[:sql_filename] = f
209
290
  end
210
291
  opts.on( '-s [separator_character]' ,'--separator [separator_character]', 'The character that separates each column of the CSV file.') do |s|
211
- opt[:col_sep] = s
292
+ opt[:csv_options][:col_sep] = s
212
293
  end
213
294
  opts.on( '-q [quote_character]' ,'--quote [quote_character]', 'The character used to quote fields.') do |q|
214
- opt[:quote_char] = q
295
+ opt[:csv_options][:quote_char] = q
215
296
  end
216
297
  opts.on( '-h', '--help', 'Display this screen' ) do
217
298
  puts opts
@@ -187,4 +187,23 @@ module Rsmart::ETL::GRM
187
187
  return Rsmart::ETL::parse_flag str, opt
188
188
  end
189
189
 
190
+ # Designed specifically for actv_ind, but could be used on *any*
191
+ # fields that matches /^(Y|N)$/i.
192
+ def self.parse_actv_ind(str, opt={})
193
+ # `ACTV_IND` varchar(1) COLLATE utf8_bin DEFAULT 'Y',
194
+ opt[:name] = "actv_ind" if opt[:name].nil?
195
+ opt[:default] = "Y" if opt[:default].nil?
196
+ opt[:valid_values] = /^(Y|N)$/i if opt[:valid_values].nil?
197
+ return Rsmart::ETL::parse_flag str, opt
198
+ end
199
+
200
+ # Designed specifically for actv_ind, but could be used on *any*
201
+ # fields that matches /^(Y|N)$/i.
202
+ def self.parse_actv_ind!(row, insert_str, values_str, opt={})
203
+ # `ACTV_IND` varchar(1) COLLATE utf8_bin DEFAULT 'Y',
204
+ opt[:name] = "actv_ind" if opt[:name].nil?
205
+ actv_ind = parse_actv_ind row[ Rsmart::ETL::to_symbol( opt[:name] ) ]
206
+ Rsmart::ETL::mutate_sql_stmt! insert_str, opt[:name], values_str, actv_ind
207
+ end
208
+
190
209
  end
@@ -15,5 +15,5 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
 
17
17
  module Rsmart
18
- VERSION = "0.5"
18
+ VERSION = "0.6"
19
19
  end
@@ -342,7 +342,7 @@ RSpec.describe "Rsmart::ETL" do
342
342
  it "Modifies the insert_str and values_str based on a CSV::Row match" do
343
343
  insert_str = ""; values_str = "";
344
344
  row = CSV::Row.new(['actv_ind'.to_sym], ['Y'], true)
345
- ETL.parse_actv_ind!(row, insert_str, values_str)
345
+ GRM.parse_actv_ind!(row, insert_str, values_str)
346
346
  expect(insert_str).to eq("ACTV_IND,")
347
347
  expect(values_str).to eq("'Y',")
348
348
  end
@@ -350,7 +350,7 @@ RSpec.describe "Rsmart::ETL" do
350
350
  it "allows for lowercase input Strings" do
351
351
  insert_str = ""; values_str = "";
352
352
  row = CSV::Row.new(['actv_ind'.to_sym], ['n'], true)
353
- ETL.parse_actv_ind!(row, insert_str, values_str)
353
+ GRM.parse_actv_ind!(row, insert_str, values_str)
354
354
  expect(insert_str).to eq("ACTV_IND,")
355
355
  expect(values_str).to eq("'N',")
356
356
  end
@@ -358,12 +358,12 @@ RSpec.describe "Rsmart::ETL" do
358
358
  it "Returns a default value of 'Y' and does not raise an TextParseError if nil or empty" do
359
359
  insert_str = ""; values_str = "";
360
360
  row = CSV::Row.new(['actv_ind'.to_sym], [nil], true)
361
- expect { ETL.parse_actv_ind!(row, insert_str, values_str) }.not_to raise_error
361
+ expect { GRM.parse_actv_ind!(row, insert_str, values_str) }.not_to raise_error
362
362
  expect(insert_str).to eq("ACTV_IND,")
363
363
  expect(values_str).to eq("'Y',")
364
364
  insert_str = ""; values_str = "";
365
365
  row = CSV::Row.new(['actv_ind'.to_sym], [''], true)
366
- expect { ETL.parse_actv_ind!(row, insert_str, values_str) }.not_to raise_error
366
+ expect { GRM.parse_actv_ind!(row, insert_str, values_str) }.not_to raise_error
367
367
  expect(insert_str).to eq("ACTV_IND,")
368
368
  expect(values_str).to eq("'Y',")
369
369
  end
@@ -371,13 +371,13 @@ RSpec.describe "Rsmart::ETL" do
371
371
  it "Raises an TextParseError if not a valid 'Y/N' value" do
372
372
  insert_str = ""; values_str = "";
373
373
  row = CSV::Row.new(['actv_ind'.to_sym], ["Q"], true)
374
- expect { ETL.parse_actv_ind!(row, insert_str, values_str) }.to raise_error(TextParseError)
374
+ expect { GRM.parse_actv_ind!(row, insert_str, values_str) }.to raise_error(TextParseError)
375
375
  end
376
376
 
377
377
  it "Raises an TextParseError if length exceeds 1 characters" do
378
378
  insert_str = ""; values_str = "";
379
379
  row = CSV::Row.new(['actv_ind'.to_sym], ["x" * 2], true)
380
- expect { ETL.parse_actv_ind!(row, insert_str, values_str) }.to raise_error(TextParseError)
380
+ expect { GRM.parse_actv_ind!(row, insert_str, values_str) }.to raise_error(TextParseError)
381
381
  end
382
382
  end
383
383
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rsmart_toolbox
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: '0.6'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lance Speelmon
@@ -30,7 +30,7 @@ cert_chain:
30
30
  sKRWzEtHFamxQaIspOja5O4oQKiCbWa90fEuIoCtwyy1rQtL9VKoDTs4vZASXNuc
31
31
  F/lEyekXSjN36uTtlt4LkKLn/k7k5gRbt4+C9Q==
32
32
  -----END CERTIFICATE-----
33
- date: 2014-08-17 00:00:00.000000000 Z
33
+ date: 2014-08-18 00:00:00.000000000 Z
34
34
  dependencies:
35
35
  - !ruby/object:Gem::Dependency
36
36
  name: builder
metadata.gz.sig CHANGED
Binary file