rsmart_toolbox 0.5 → 0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b2e7e3f98bbbd519f2f57c8c5a9da33cca0a3de3
4
- data.tar.gz: c90a54912dc4d9c536337697a13c028bbdfb8011
3
+ metadata.gz: 27fabd26f64bc774e3d4bbfefd30d7cc519bd561
4
+ data.tar.gz: 888167314f2c75e27f67461cdf8592f9f394440f
5
5
  SHA512:
6
- metadata.gz: 34892dcd8adf786319d6d4261fc291bda5cdeb4903c78ed8fcc74b0de74cc734c192886ce809b2c9a91e40f7916b21941214be55a32c7832787dbccc92a56b05
7
- data.tar.gz: 6ec1f8e88e4a45aee20efb607c05169dee3994262093367915abc8a25ca138f850aa043672fa353d2c33a3b90db41ac208043d22e4cfdbfb36fc235831bb2065
6
+ metadata.gz: e75b6da705654223a6ff1b8f36eb9d049c46511448d7eb8da9d2512f27b82b63f4c006d15dc85c03c592b7e713e7ec41687da6156581eba30754d4719294cb79
7
+ data.tar.gz: 3fc723d8d3595c88425cf35fdcd2a518f5c67bf2f71153ce9702a097660f84c75522a40ea7f8d20e2a35bf87041567bf57d33527e538df34e5bcf42505979597
checksums.yaml.gz.sig CHANGED
Binary file
data.tar.gz.sig CHANGED
Binary file
@@ -21,7 +21,8 @@ module Rsmart::ETL
21
21
  class TextParseError < StandardError
22
22
  end
23
23
 
24
- # Responds to String or Exception.
24
+ # @param [String, Exception] e the error to handle
25
+ # @return [Exception] an Exception with a message formatted with $INPUT_LINE_NUMBER.
25
26
  def self.error(e)
26
27
  if e.kind_of? String
27
28
  # default to TextParseError
@@ -33,7 +34,8 @@ module Rsmart::ETL
33
34
  raise ArgumentError, "Unsupported error type: #{e.class}"
34
35
  end
35
36
 
36
- # Responds to String or Exception.
37
+ # @param [String, Exception] e the warning to handle
38
+ # @return [Exception] an Exception with a message formatted with $INPUT_LINE_NUMBER.
37
39
  def self.warning(e)
38
40
  if e.kind_of? String
39
41
  # default to TextParseError
@@ -45,7 +47,14 @@ module Rsmart::ETL
45
47
  raise ArgumentError, "Unsupported error type: #{e.class}"
46
48
  end
47
49
 
48
- # Test to see if subject is a member of valid_values Array
50
+ # @param [String, #match] subject used for validity checking.
51
+ # @param [Array<Object>, Regexp] valid_values all of the possible valid values.
52
+ # @option opt [Boolean] :case_sensitive performs case sensitive matching
53
+ # @return [Boolean] true if the subject matches valid_values.
54
+ # FYI valid_values must respond to #casecmp.
55
+ # @raise [ArgumentError] if valid_values is nil or empty.
56
+ # @raise [ArgumentError] case sensitive matching only works for objects
57
+ # that respond to #casecmp; primarily String objects.
49
58
  def self.valid_value(subject, valid_values, opt={})
50
59
  raise ArgumentError, "valid_values must not be nil!" if valid_values.nil?
51
60
  if valid_values.kind_of? Regexp
@@ -54,7 +63,7 @@ module Rsmart::ETL
54
63
  if valid_values.kind_of? Array
55
64
  raise ArgumentError, "valid_values must have at least one element!" unless valid_values.length > 0
56
65
  if opt[:case_sensitive] == false # case insensitive comparison requested
57
- raise ArgumentError, "case_sensitive only supported for Strings!" unless subject.kind_of?(String)
66
+ raise ArgumentError, "Object must respond to #casecmp" unless subject.respond_to? 'casecmp'
58
67
  valid_values.each do |valid_value|
59
68
  return true if valid_value.casecmp(subject) == 0
60
69
  end
@@ -64,6 +73,10 @@ module Rsmart::ETL
64
73
  return false
65
74
  end
66
75
 
76
+ # @param [String] str String to be matched against well known boolean patterns.
77
+ # @option opt [Boolean] :default the default return value if str is empty.
78
+ # @return [Boolean] the result of matching the str input against well known boolean patterns.
79
+ # @raise [TextParseError] if none of the known boolean patterns could be matched.
67
80
  def self.parse_boolean(str, opt={})
68
81
  return true if str == true
69
82
  return false if str == false
@@ -79,22 +92,34 @@ module Rsmart::ETL
79
92
  raise Rsmart::ETL::error TextParseError.new "invalid value for Boolean: '#{str}'"
80
93
  end
81
94
 
82
- # Simply here to help ensure we consistently apply the same encoding options.
83
- def self.encode(str, opt={} )
95
+ # @param [String] str the String to be encoded and invalid characters replaced with valid characters.
96
+ # @option opt [String] :encoding the character encoding to use.
97
+ # @return [String] the result of encoding the String and replacing invalid characters with valid characters.
98
+ def self.encode(str, opt={ encoding: "UTF-8" } )
84
99
  opt[:encoding] = "UTF-8" if opt[:encoding].nil?
85
100
  str.encode( opt[:encoding], :invalid => :replace,
86
101
  :undef => :replace, :replace => "" )
87
102
  end
88
103
 
89
- # Matches MRI CSV specification:
104
+ # Matches the MRI CSV specification:
90
105
  # The header String is downcased, spaces are replaced with underscores,
91
106
  # non-word characters are dropped, and finally to_sym() is called.
107
+ # @param [String] str the String to be symbolized.
108
+ # @return [Symbol] String is downcased, spaces are replaced with underscores,
109
+ # non-word characters are dropped
110
+ # @raise [ArgumentError] if str is nil or empty.
92
111
  def self.to_symbol(str)
93
112
  raise ArgumentError, "Illegal symbol name: '#{str}'" if str.nil? || str.empty?
94
113
  encode( str.downcase.gsub(/\s+/, "_").gsub(/\W+/, "") ).to_sym
95
114
  end
96
115
 
97
- # DRY up some common string manipulation
116
+ # Mutates insert_str and values_str with column_name and value respectively.
117
+ # Proper SQL value quoting will be performed based on object type.
118
+ # @param [String] insert_str the left side of the insert statement (i.e. columns)
119
+ # @param [String] column_name the column name to append to insert_str.
120
+ # @param [String] values_str the right side of the insert statement (i.e. values)
121
+ # @param [Object] value the value to append to values_str. Must respond to #to_s.
122
+ # @return [void]
98
123
  def self.mutate_sql_stmt!(insert_str, column_name, values_str, value)
99
124
  insert_str.concat "#{column_name.upcase},"
100
125
  # TODO what are all of the valid types that should not be quoted?
@@ -103,8 +128,13 @@ module Rsmart::ETL
103
128
  else
104
129
  values_str.concat "'#{value}',"
105
130
  end
131
+ return nil
106
132
  end
107
133
 
134
+ # Prepares a String for a SQL statement where single quotes need to be escaped.
135
+ # @param [String] str the String to be escaped.
136
+ # @return [String, nil] the resulting String with single quotes escaped with a backslash.
137
+ # If a nil is passed, nil is returned.
108
138
  def self.escape_single_quotes(str)
109
139
  if str.nil?
110
140
  return nil
@@ -112,14 +142,31 @@ module Rsmart::ETL
112
142
  return str.to_s.gsub("'", "\\\\'")
113
143
  end
114
144
 
115
- def self.parse_string(str, opt={})
116
- opt[:strict] = true if opt[:strict].nil?
145
+ # @param [String] str the String to be parsed.
146
+ # @option opt [String, #to_s] :default the default return value if str is empty. Must respond to #to_s
147
+ # @option opt [Boolean] :escape_single_quotes escape single quote characters.
148
+ # @option opt [Integer] :length raise a TextParseError if str.length > :length.
149
+ # @option opt [String] :name the name of the field being parsed. Used only for error handling.
150
+ # @option opt [Boolean] :required raise a TextParseError if str is empty.
151
+ # @option opt [Boolean] :strict strict length checking will produce errors instead of warnings.
152
+ # @option opt [Array<Object>, Regexp] :valid_values all of the possible valid values.
153
+ # @return [String] the parsed results. nil or empty inputs will return the empty String by default(i.e. '').
154
+ # @raise [TextParseError] if the field is :required and found to be empty.
155
+ # @raise [TextParseError] if str.length > :length && :strict
156
+ # @raise [TextParseError] if str does not match :valid_values
157
+ # @example nil or empty inputs will return the empty String by default
158
+ # '' == parse_string(nil) && '' == parse_string('')
159
+ # @see valid_value
160
+ # @see escape_single_quotes
161
+ def self.parse_string(str, opt={ strict: true, required: false, escape_single_quotes: true })
162
+ opt[:strict] = true if opt[:strict].nil?
163
+ opt[:escape_single_quotes] = true if opt[:escape_single_quotes].nil?
117
164
  retval = encode str.to_s.strip
118
165
  if opt[:required] && retval.empty?
119
166
  raise Rsmart::ETL::error TextParseError.new "Required data element '#{opt[:name]}' not found: '#{str}'"
120
167
  end
121
168
  if opt[:default] && retval.empty?
122
- retval = opt[:default]
169
+ retval = opt[:default].to_s
123
170
  end
124
171
  if opt[:length] && retval.length > opt[:length].to_i
125
172
  detail = "#{opt[:name]}.length > #{opt[:length]}: '#{str}'-->'#{str[0..(opt[:length] - 1)]}'"
@@ -131,15 +178,36 @@ module Rsmart::ETL
131
178
  if opt[:valid_values] && ! valid_value(retval, opt[:valid_values], opt)
132
179
  raise Rsmart::ETL::error TextParseError.new "Illegal #{opt[:name]}: value '#{str}' not found in: #{opt[:valid_values]}"
133
180
  end
134
- return escape_single_quotes retval
181
+ if opt[:escape_single_quotes]
182
+ retval = escape_single_quotes retval
183
+ end
184
+ return retval
135
185
  end
136
186
 
187
+ # Helper method which finds the value by column :name and mutates the SQL statement accordingly.
188
+ # @param [CSV::Row] row the CSV Row being parsed
189
+ # @param [String] insert_str the left side of the insert statement (i.e. columns)
190
+ # @param [String] values_str the right side of the insert statement (i.e. values)
191
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
192
+ # @option opt [String] :name the name of the field being parsed. Required.
193
+ # @return [void]
194
+ # @raise [ArgumentError] :name is required.
195
+ # @see parse_string
196
+ # @see mutate_sql_stmt!
137
197
  def self.parse_string!(row, insert_str, values_str, opt={})
138
198
  raise ArgumentError, "opt[:name] is required!" unless opt[:name]
139
199
  str = parse_string( row[ to_symbol( opt[:name] ) ], opt )
140
200
  mutate_sql_stmt! insert_str, opt[:name], values_str, str
141
201
  end
142
202
 
203
+ # Parse an Integer from a String.
204
+ # @note Note the behavioral difference versus #to_i.
205
+ # @param [String] str the String to be parsed.
206
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
207
+ # @return [Integer, nil] the parsed Integer. nil or empty inputs will return nil by default.
208
+ # @example Unlike #to_i, nil or empty inputs will return nil by default
209
+ # nil == parse_integer(nil) && nil == parse_integer('') && 0 != parse_integer(nil)
210
+ # @see parse_string
143
211
  def self.parse_integer(str, opt={})
144
212
  s = parse_string str, opt
145
213
  if s.empty?
@@ -149,12 +217,30 @@ module Rsmart::ETL
149
217
  end
150
218
  end
151
219
 
220
+ # Helper method which finds the value by column :name and mutates the SQL statement accordingly.
221
+ # @param [CSV::Row] row the CSV Row being parsed
222
+ # @param [String] insert_str the left side of the insert statement (i.e. columns)
223
+ # @param [String] values_str the right side of the insert statement (i.e. values)
224
+ # @param [Hash] opt options Hash will be passed through to #parse_integer.
225
+ # @option opt [String] :name the name of the field being parsed. Required.
226
+ # @return [void]
227
+ # @raise [ArgumentError] :name is required.
228
+ # @see parse_integer
229
+ # @see mutate_sql_stmt!
152
230
  def self.parse_integer!(row, insert_str, values_str, opt={})
153
231
  raise ArgumentError, "opt[:name] is required!" unless opt[:name]
154
232
  i = parse_integer( row[ to_symbol( opt[:name] ) ], opt )
155
233
  mutate_sql_stmt! insert_str, opt[:name], values_str, i
156
234
  end
157
235
 
236
+ # Parse a Float from a String.
237
+ # @note Note the behavioral difference versus #to_f.
238
+ # @param [String] str the String to be parsed.
239
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
240
+ # @return [Float, nil] the parsed Float. nil or empty inputs will return nil by default.
241
+ # @example Unlike #to_f, nil or empty inputs will return nil by default
242
+ # nil == parse_float(nil) && nil == parse_float('') && 0.0 != parse_float(nil)
243
+ # @see parse_string
158
244
  def self.parse_float(str, opt={})
159
245
  s = parse_string str, opt
160
246
  if s.empty?
@@ -164,9 +250,14 @@ module Rsmart::ETL
164
250
  end
165
251
  end
166
252
 
167
- # Useful for parsing "flag" like values. Always returns upcase for consistency.
168
- # Assumes :strict :length of 1 by default.
169
- def self.parse_flag(str, opt={})
253
+ # Useful for parsing "flag" like values; i.e. usually single characters.
254
+ # @param [String] str the String to be parsed.
255
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
256
+ # @option opt [Integer] :length the maximum supported length of the field.
257
+ # @option opt [Boolean] :upcase if true upcase the results.
258
+ # @return [String] the parsed "flag".
259
+ # @see parse_string
260
+ def self.parse_flag(str, opt={ length: 1, upcase: true })
170
261
  opt[:length] = 1 if opt[:length].nil?
171
262
  opt[:upcase] = true if opt[:upcase].nil?
172
263
  retval = parse_string str, opt
@@ -174,32 +265,22 @@ module Rsmart::ETL
174
265
  return retval
175
266
  end
176
267
 
177
- # Designed specifically for actv_ind, but could be used on *any*
178
- # fields that matches /^(Y|N)$/i.
179
- def self.parse_actv_ind(str, opt={})
180
- # `ACTV_IND` varchar(1) COLLATE utf8_bin DEFAULT 'Y',
181
- opt[:name] = "actv_ind" if opt[:name].nil?
182
- opt[:default] = "Y" if opt[:default].nil?
183
- opt[:valid_values] = /^(Y|N)$/i if opt[:valid_values].nil?
184
- return parse_flag str, opt
185
- end
186
-
187
- # Designed specifically for actv_ind, but could be used on *any*
188
- # fields that matches /^(Y|N)$/i.
189
- def self.parse_actv_ind!(row, insert_str, values_str, opt={})
190
- # `ACTV_IND` varchar(1) COLLATE utf8_bin DEFAULT 'Y',
191
- opt[:name] = "actv_ind" if opt[:name].nil?
192
- actv_ind = Rsmart::ETL::parse_actv_ind row[ to_symbol( opt[:name] ) ]
193
- Rsmart::ETL::mutate_sql_stmt! insert_str, opt[:name], values_str, actv_ind
194
- end
195
-
196
268
  # Parse common command line options for CSV --> SQL transformations.
269
+ # @param [String] executable the name of the script from which we are executing. See example.
270
+ # @param [Array<String>] args the command line args.
271
+ # @option opt [String] :csv_filename the input file from which the CSV will be read.
272
+ # Defaults to the first element of args Array.
273
+ # @option opt [String] :sql_filename the output file to which the SQL will be written.
274
+ # @option opt [Hash] :csv_options the options that will be used by the CSV parser.
275
+ # @return [Hash] a Hash containing the parsed command line results.
276
+ # @example The most common usage:
277
+ # opt = Rsmart::ETL.parse_csv_command_line_options (File.basename $0), ARGF.argv
197
278
  def self.parse_csv_command_line_options(
198
279
  executable, args, opt={ csv_options: { headers: :first_row,
199
280
  header_converters: :symbol,
200
281
  skip_blanks: true,
201
- col_sep: ",", # comma by default
202
- quote_char: '"', # double quote by default
282
+ col_sep: ",",
283
+ quote_char: '"'
203
284
  }
204
285
  } )
205
286
  optparse = OptionParser.new do |opts|
@@ -208,10 +289,10 @@ module Rsmart::ETL
208
289
  opt[:sql_filename] = f
209
290
  end
210
291
  opts.on( '-s [separator_character]' ,'--separator [separator_character]', 'The character that separates each column of the CSV file.') do |s|
211
- opt[:col_sep] = s
292
+ opt[:csv_options][:col_sep] = s
212
293
  end
213
294
  opts.on( '-q [quote_character]' ,'--quote [quote_character]', 'The character used to quote fields.') do |q|
214
- opt[:quote_char] = q
295
+ opt[:csv_options][:quote_char] = q
215
296
  end
216
297
  opts.on( '-h', '--help', 'Display this screen' ) do
217
298
  puts opts
@@ -187,4 +187,23 @@ module Rsmart::ETL::GRM
187
187
  return Rsmart::ETL::parse_flag str, opt
188
188
  end
189
189
 
190
+ # Designed specifically for actv_ind, but could be used on *any*
191
+ # fields that matches /^(Y|N)$/i.
192
+ def self.parse_actv_ind(str, opt={})
193
+ # `ACTV_IND` varchar(1) COLLATE utf8_bin DEFAULT 'Y',
194
+ opt[:name] = "actv_ind" if opt[:name].nil?
195
+ opt[:default] = "Y" if opt[:default].nil?
196
+ opt[:valid_values] = /^(Y|N)$/i if opt[:valid_values].nil?
197
+ return Rsmart::ETL::parse_flag str, opt
198
+ end
199
+
200
+ # Designed specifically for actv_ind, but could be used on *any*
201
+ # fields that matches /^(Y|N)$/i.
202
+ def self.parse_actv_ind!(row, insert_str, values_str, opt={})
203
+ # `ACTV_IND` varchar(1) COLLATE utf8_bin DEFAULT 'Y',
204
+ opt[:name] = "actv_ind" if opt[:name].nil?
205
+ actv_ind = parse_actv_ind row[ Rsmart::ETL::to_symbol( opt[:name] ) ]
206
+ Rsmart::ETL::mutate_sql_stmt! insert_str, opt[:name], values_str, actv_ind
207
+ end
208
+
190
209
  end
@@ -15,5 +15,5 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
 
17
17
  module Rsmart
18
- VERSION = "0.5"
18
+ VERSION = "0.6"
19
19
  end
@@ -342,7 +342,7 @@ RSpec.describe "Rsmart::ETL" do
342
342
  it "Modifies the insert_str and values_str based on a CSV::Row match" do
343
343
  insert_str = ""; values_str = "";
344
344
  row = CSV::Row.new(['actv_ind'.to_sym], ['Y'], true)
345
- ETL.parse_actv_ind!(row, insert_str, values_str)
345
+ GRM.parse_actv_ind!(row, insert_str, values_str)
346
346
  expect(insert_str).to eq("ACTV_IND,")
347
347
  expect(values_str).to eq("'Y',")
348
348
  end
@@ -350,7 +350,7 @@ RSpec.describe "Rsmart::ETL" do
350
350
  it "allows for lowercase input Strings" do
351
351
  insert_str = ""; values_str = "";
352
352
  row = CSV::Row.new(['actv_ind'.to_sym], ['n'], true)
353
- ETL.parse_actv_ind!(row, insert_str, values_str)
353
+ GRM.parse_actv_ind!(row, insert_str, values_str)
354
354
  expect(insert_str).to eq("ACTV_IND,")
355
355
  expect(values_str).to eq("'N',")
356
356
  end
@@ -358,12 +358,12 @@ RSpec.describe "Rsmart::ETL" do
358
358
  it "Returns a default value of 'Y' and does not raise an TextParseError if nil or empty" do
359
359
  insert_str = ""; values_str = "";
360
360
  row = CSV::Row.new(['actv_ind'.to_sym], [nil], true)
361
- expect { ETL.parse_actv_ind!(row, insert_str, values_str) }.not_to raise_error
361
+ expect { GRM.parse_actv_ind!(row, insert_str, values_str) }.not_to raise_error
362
362
  expect(insert_str).to eq("ACTV_IND,")
363
363
  expect(values_str).to eq("'Y',")
364
364
  insert_str = ""; values_str = "";
365
365
  row = CSV::Row.new(['actv_ind'.to_sym], [''], true)
366
- expect { ETL.parse_actv_ind!(row, insert_str, values_str) }.not_to raise_error
366
+ expect { GRM.parse_actv_ind!(row, insert_str, values_str) }.not_to raise_error
367
367
  expect(insert_str).to eq("ACTV_IND,")
368
368
  expect(values_str).to eq("'Y',")
369
369
  end
@@ -371,13 +371,13 @@ RSpec.describe "Rsmart::ETL" do
371
371
  it "Raises an TextParseError if not a valid 'Y/N' value" do
372
372
  insert_str = ""; values_str = "";
373
373
  row = CSV::Row.new(['actv_ind'.to_sym], ["Q"], true)
374
- expect { ETL.parse_actv_ind!(row, insert_str, values_str) }.to raise_error(TextParseError)
374
+ expect { GRM.parse_actv_ind!(row, insert_str, values_str) }.to raise_error(TextParseError)
375
375
  end
376
376
 
377
377
  it "Raises an TextParseError if length exceeds 1 characters" do
378
378
  insert_str = ""; values_str = "";
379
379
  row = CSV::Row.new(['actv_ind'.to_sym], ["x" * 2], true)
380
- expect { ETL.parse_actv_ind!(row, insert_str, values_str) }.to raise_error(TextParseError)
380
+ expect { GRM.parse_actv_ind!(row, insert_str, values_str) }.to raise_error(TextParseError)
381
381
  end
382
382
  end
383
383
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rsmart_toolbox
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: '0.6'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lance Speelmon
@@ -30,7 +30,7 @@ cert_chain:
30
30
  sKRWzEtHFamxQaIspOja5O4oQKiCbWa90fEuIoCtwyy1rQtL9VKoDTs4vZASXNuc
31
31
  F/lEyekXSjN36uTtlt4LkKLn/k7k5gRbt4+C9Q==
32
32
  -----END CERTIFICATE-----
33
- date: 2014-08-17 00:00:00.000000000 Z
33
+ date: 2014-08-18 00:00:00.000000000 Z
34
34
  dependencies:
35
35
  - !ruby/object:Gem::Dependency
36
36
  name: builder
metadata.gz.sig CHANGED
Binary file