kuali_toolbox 0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,410 @@
1
+ # KualiCo's client library and command-line tool to help interact with KualiCo's cloud APIs.
2
+ # Copyright (C) 2014-2015 KualiCo, Inc.
3
+
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU Affero General Public License for more details.
13
+
14
+ # You should have received a copy of the GNU Affero General Public License
15
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+
17
+ require "kuali_toolbox"
18
+
19
+ # KualiCo extract, transform and load methods.
20
+ module KualiCo::ETL
21
+
22
+ # Any text parsing related errors will use this Exception.
23
+ class TextParseError < StandardError
24
+ end
25
+
26
+ # Prepares an Exception for consistent error handling.
27
+ # @param [String, Exception] e the error to handle
28
+ # @return [Exception] an Exception with a message formatted with $INPUT_LINE_NUMBER.
29
+ # @raise [ArgumentError] if an invalid argument is passed.
30
+ def self.error(e)
31
+ if e.kind_of? String
32
+ # default to TextParseError
33
+ return TextParseError.new "ERROR: Line #{$INPUT_LINE_NUMBER}: #{e}"
34
+ end
35
+ if e.kind_of? Exception
36
+ return e.exception "ERROR: Line #{$INPUT_LINE_NUMBER}: #{e}"
37
+ end
38
+ raise ArgumentError, "Unsupported error type: #{e.class}"
39
+ end
40
+
41
+ # Prepares an Exception for consistent warning handling.
42
+ # @param [String, Exception] e the warning to handle
43
+ # @return [Exception] an Exception with a message formatted with $INPUT_LINE_NUMBER.
44
+ # @raise [ArgumentError] if an invalid argument is passed.
45
+ def self.warning(e)
46
+ if e.kind_of? String
47
+ # default to TextParseError
48
+ return TextParseError.new "WARN: Line #{$INPUT_LINE_NUMBER}: #{e}"
49
+ end
50
+ if e.kind_of? Exception
51
+ return e.exception "WARN: Line #{$INPUT_LINE_NUMBER}: #{e}"
52
+ end
53
+ raise ArgumentError, "Unsupported error type: #{e.class}"
54
+ end
55
+
56
+ # Tests whether the subject matches one of the valid values.
57
+ # @param [String, #match] subject used for validity checking.
58
+ # @param [Array<Object>, Regexp] valid_values all of the possible valid values.
59
+ # @option opt [Boolean] :case_sensitive performs case sensitive matching
60
+ # @return [Boolean] true if the subject matches valid_values.
61
+ # FYI valid_values must respond to #casecmp.
62
+ # @raise [ArgumentError] if valid_values is nil or empty.
63
+ # @raise [ArgumentError] case sensitive matching only works for objects
64
+ # that respond to #casecmp; primarily String objects.
65
+ def self.valid_value(subject, valid_values, opt={})
66
+ raise ArgumentError, "valid_values must not be nil!" if valid_values.nil?
67
+ if valid_values.kind_of? Regexp
68
+ return true if subject =~ valid_values
69
+ end
70
+ if valid_values.kind_of? Array
71
+ raise ArgumentError, "valid_values must have at least one element!" unless valid_values.length > 0
72
+ if opt[:case_sensitive] == false # case insensitive comparison requested
73
+ raise ArgumentError, "Object must respond to #casecmp" unless subject.respond_to? 'casecmp'
74
+ valid_values.each do |valid_value|
75
+ return true if valid_value.casecmp(subject) == 0
76
+ end
77
+ end
78
+ return true if valid_values.include? subject # default to == equality
79
+ end
80
+ return false
81
+ end
82
+
83
+ # Matches the input against a set of well known boolean patterns.
84
+ # @param [String] str String to be matched against well known boolean patterns.
85
+ # @option opt [Boolean] :default the default return value if str is empty.
86
+ # @return [Boolean] the result of matching the str input against well known boolean patterns.
87
+ # @raise [TextParseError] if none of the known boolean patterns could be matched.
88
+ def self.parse_boolean(str, opt={})
89
+ return true if str == true
90
+ return false if str == false
91
+ b = parse_string str, opt
92
+ return true if b =~ /^(active|a|true|t|yes|y|1)$/i
93
+ return false if b =~ /^(inactive|i|false|f|no|n|0)$/i
94
+ if b.empty? && !opt[:default].nil?
95
+ return opt[:default]
96
+ end
97
+ if b.empty?
98
+ return nil
99
+ end
100
+ raise KualiCo::ETL::error TextParseError.new "invalid value for Boolean: '#{str}'"
101
+ end
102
+
103
+ # Encodes the input String and replaces invalid or undefined characters.
104
+ # @param [String] str the String to be encoded and invalid characters replaced with valid characters.
105
+ # @option opt [String] :encoding the character encoding to use.
106
+ # @return [String] the result of encoding the String and replacing invalid characters with valid characters.
107
+ # @see String#encode
108
+ def self.encode(str, opt={ encoding: "UTF-8" } )
109
+ opt[:encoding] = "UTF-8" if opt[:encoding].nil?
110
+ str.encode( opt[:encoding], :invalid => :replace,
111
+ :undef => :replace, :replace => "" )
112
+ end
113
+
114
+ # Matches the MRI CSV specification:
115
+ # The header String is downcased, spaces are replaced with underscores,
116
+ # non-word characters are dropped, and finally to_sym() is called.
117
+ # @param [String] str the String to be symbolized.
118
+ # @return [Symbol] String is downcased, spaces are replaced with underscores,
119
+ # non-word characters are dropped
120
+ # @raise [ArgumentError] if str is nil or empty.
121
+ def self.to_symbol(str)
122
+ raise ArgumentError, "Illegal symbol name: '#{str}'" if str.nil? || str.empty?
123
+ encode( str.downcase.gsub(/\s+/, "_").gsub(/\W+/, "") ).to_sym
124
+ end
125
+
126
+ # Mutates two sides of a SQL insert statement: insert_str and values_str with column_name and value respectively.
127
+ # Proper SQL value quoting will be performed based on object type.
128
+ # @param [String] insert_str the left side of the insert statement (i.e. columns)
129
+ # @param [String] column_name the column name to append to insert_str.
130
+ # @param [String] values_str the right side of the insert statement (i.e. values)
131
+ # @param [Object] value the value to append to values_str. Must respond to #to_s.
132
+ # @return [void]
133
+ def self.mutate_sql_stmt!(insert_str, column_name, values_str, value)
134
+ insert_str.concat "#{column_name.upcase},"
135
+ # TODO what are all of the valid types that should not be quoted?
136
+ if value.kind_of? Integer
137
+ values_str.concat "#{value},"
138
+ else
139
+ values_str.concat "'#{value}',"
140
+ end
141
+ return nil
142
+ end
143
+
144
+ # Prepares a String for a SQL statement where single quotes need to be escaped.
145
+ # @param [String] str the String to be escaped.
146
+ # @return [String, nil] the resulting String with single quotes escaped with a backslash.
147
+ # If a nil is passed, nil is returned.
148
+ def self.escape_single_quotes(str)
149
+ if str.nil?
150
+ return nil
151
+ end
152
+ return str.to_s.gsub("'", "\\\\'")
153
+ end
154
+
155
+ # Parses a string using common parsing behavior with options. This method forms the foundation
156
+ # of all the other parsing methods.
157
+ # @param [String] str the String to be parsed.
158
+ # @option opt [String, #to_s] :default the default return value if str is empty. Must respond to #to_s
159
+ # @option opt [Boolean] :escape_single_quotes escape single quote characters.
160
+ # @option opt [Integer] :length raise a TextParseError if str.length > :length.
161
+ # @option opt [String] :name the name of the field being parsed. Used only for error handling.
162
+ # @option opt [Boolean] :required raise a TextParseError if str is empty.
163
+ # @option opt [Boolean] :strict strict length checking will produce errors instead of warnings.
164
+ # @option opt [Array<Object>, Regexp] :valid_values all of the possible valid values.
165
+ # @return [String] the parsed results. nil or empty inputs will return the empty String by default(i.e. '').
166
+ # @raise [TextParseError] if the field is :required and found to be empty.
167
+ # @raise [TextParseError] if str.length > :length && :strict
168
+ # @raise [TextParseError] if str does not match :valid_values
169
+ # @example nil or empty inputs will return the empty String by default
170
+ # '' == parse_string(nil) && '' == parse_string('')
171
+ # @see valid_value
172
+ # @see escape_single_quotes
173
+ def self.parse_string(str, opt={ strict: true, required: false, escape_single_quotes: true })
174
+ opt[:strict] = true if opt[:strict].nil?
175
+ opt[:escape_single_quotes] = true if opt[:escape_single_quotes].nil?
176
+ retval = encode str.to_s.strip
177
+ if opt[:required] && retval.empty?
178
+ raise KualiCo::ETL::error TextParseError.new "Required data element '#{opt[:name]}' not found: '#{str}'"
179
+ end
180
+ if opt[:default] && retval.empty?
181
+ retval = opt[:default].to_s
182
+ end
183
+ if opt[:length] && retval.length > opt[:length].to_i
184
+ detail = "#{opt[:name]}.length > #{opt[:length]}: '#{str}'-->'#{str[0..(opt[:length] - 1)]}'"
185
+ if opt[:strict]
186
+ raise KualiCo::ETL::error TextParseError.new "Data exceeds maximum field length: #{detail}"
187
+ end
188
+ KualiCo::ETL::warning "Data will be truncated: #{detail}"
189
+ end
190
+ if opt[:valid_values] && ! valid_value(retval, opt[:valid_values], opt)
191
+ raise KualiCo::ETL::error TextParseError.new "Illegal #{opt[:name]}: value '#{str}' not found in: #{opt[:valid_values]}"
192
+ end
193
+ if opt[:escape_single_quotes]
194
+ retval = escape_single_quotes retval
195
+ end
196
+ return retval
197
+ end
198
+
199
+ # Helper method which finds the value by column :name and mutates the SQL statement accordingly.
200
+ # @param [CSV::Row] row the CSV Row being parsed
201
+ # @param [String] insert_str the left side of the insert statement (i.e. columns)
202
+ # @param [String] values_str the right side of the insert statement (i.e. values)
203
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
204
+ # @option opt [String] :name the name of the field being parsed. Required.
205
+ # @return [void]
206
+ # @raise [ArgumentError] :name is required.
207
+ # @see parse_string
208
+ # @see mutate_sql_stmt!
209
+ def self.parse_string!(row, insert_str, values_str, opt={})
210
+ raise ArgumentError, "opt[:name] is required!" unless opt[:name]
211
+ str = parse_string( row[ to_symbol( opt[:name] ) ], opt )
212
+ mutate_sql_stmt! insert_str, opt[:name], values_str, str
213
+ end
214
+
215
+ # Parse an Integer from a String.
216
+ # @note Note the behavioral difference versus #to_i.
217
+ # @param [String] str the String to be parsed.
218
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
219
+ # @return [Integer, nil] the parsed Integer. nil or empty inputs will return nil by default.
220
+ # @example Unlike #to_i, nil or empty inputs will return nil by default
221
+ # nil == parse_integer(nil) && nil == parse_integer('') && 0 != parse_integer(nil)
222
+ # @see parse_string
223
+ def self.parse_integer(str, opt={})
224
+ s = parse_string str, opt
225
+ if s.empty?
226
+ return nil;
227
+ else
228
+ return s.to_i
229
+ end
230
+ end
231
+
232
+ # Helper method which finds the value by column :name and mutates the SQL statement accordingly.
233
+ # @param [CSV::Row] row the CSV Row being parsed
234
+ # @param [String] insert_str the left side of the insert statement (i.e. columns)
235
+ # @param [String] values_str the right side of the insert statement (i.e. values)
236
+ # @param [Hash] opt options Hash will be passed through to #parse_integer.
237
+ # @option opt [String] :name the name of the field being parsed. Required.
238
+ # @return [void]
239
+ # @raise [ArgumentError] :name is required.
240
+ # @see parse_integer
241
+ # @see mutate_sql_stmt!
242
+ def self.parse_integer!(row, insert_str, values_str, opt={})
243
+ raise ArgumentError, "opt[:name] is required!" unless opt[:name]
244
+ i = parse_integer( row[ to_symbol( opt[:name] ) ], opt )
245
+ mutate_sql_stmt! insert_str, opt[:name], values_str, i
246
+ end
247
+
248
+ # Parse a Float from a String.
249
+ # @note Note the behavioral difference versus #to_f.
250
+ # @param [String] str the String to be parsed.
251
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
252
+ # @return [Float, nil] the parsed Float. nil or empty inputs will return nil by default.
253
+ # @example Unlike #to_f, nil or empty inputs will return nil by default
254
+ # nil == parse_float(nil) && nil == parse_float('') && 0.0 != parse_float(nil)
255
+ # @see parse_string
256
+ def self.parse_float(str, opt={})
257
+ s = parse_string str, opt
258
+ if s.empty?
259
+ return nil;
260
+ else
261
+ return s.to_f
262
+ end
263
+ end
264
+
265
+ # Parse a SQL date from a String.
266
+ # @param [String] str the String to be parsed.
267
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
268
+ # @return [String] the parsed date. nil or empty inputs will return '' by default.
269
+ # @see parse_string
270
+ def self.parse_date(str, opt={ valid_values: /^$|(\d{4}\-\d{2}\-\d{2}){1}/ })
271
+ opt[:valid_values] = /^$|(\d{4}\-\d{2}\-\d{2}){1}/ if opt[:valid_values].nil?
272
+ return parse_string str, opt
273
+ end
274
+
275
+ # Helper method for #parse_date which finds the value by column :name and mutates the SQL statement accordingly.
276
+ # @param [CSV::Row] row the CSV Row being parsed
277
+ # @param [String] insert_str the left side of the insert statement (i.e. columns)
278
+ # @param [String] values_str the right side of the insert statement (i.e. values)
279
+ # @param [Hash] opt options Hash will be passed through to #parse_date.
280
+ # @option opt [String] :name the name of the field being parsed. Required.
281
+ # @return [void]
282
+ # @raise [ArgumentError] :name is required.
283
+ # @see parse_date
284
+ # @see mutate_sql_stmt!
285
+ def self.parse_date!(row, insert_str, values_str, opt={})
286
+ raise ArgumentError, "opt[:name] is required!" unless opt[:name]
287
+ date = parse_date( row[ to_symbol( opt[:name] ) ], opt )
288
+ mutate_sql_stmt! insert_str, opt[:name], values_str, date
289
+ end
290
+
291
+ # Parse a SQL datetime from a String.
292
+ # @param [String] str the String to be parsed.
293
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
294
+ # @return [String] the parsed datetime. nil or empty inputs will return '' by default.
295
+ # @see parse_string
296
+ def self.parse_datetime(str, opt={ valid_values: /^$|(\d{4}\-\d{2}\-\d{2}){1}\s(\d{2}:\d{2}:\d{2})?/ })
297
+ opt[:valid_values] = /^$|(\d{4}\-\d{2}\-\d{2}){1}\s(\d{2}:\d{2}:\d{2})?/ if opt[:valid_values].nil?
298
+ return parse_string str, opt
299
+ end
300
+
301
+ # Helper method for #parse_datetime which finds the value by column :name and mutates the SQL statement accordingly.
302
+ # @param [CSV::Row] row the CSV Row being parsed
303
+ # @param [String] insert_str the left side of the insert statement (i.e. columns)
304
+ # @param [String] values_str the right side of the insert statement (i.e. values)
305
+ # @param [Hash] opt options Hash will be passed through to #parse_datetime.
306
+ # @option opt [String] :name the name of the field being parsed. Required.
307
+ # @return [void]
308
+ # @raise [ArgumentError] :name is required.
309
+ # @see parse_datetime
310
+ # @see mutate_sql_stmt!
311
+ def self.parse_datetime!(row, insert_str, values_str, opt={})
312
+ raise ArgumentError, "opt[:name] is required!" unless opt[:name]
313
+ datetime = parse_datetime( row[ to_symbol( opt[:name] ) ], opt )
314
+ mutate_sql_stmt! insert_str, opt[:name], values_str, datetime
315
+ end
316
+
317
+ # Parse a SQL timestamp from a String.
318
+ # @param [String] str the String to be parsed.
319
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
320
+ # @return [String] the parsed timestamp. nil or empty inputs will return '' by default.
321
+ # @see parse_string
322
+ def self.parse_timestamp(str, opt={ valid_values: /^$|(\d{4}\-\d{2}\-\d{2}){1}\s(\d{2}:\d{2}:\d{2})?/ })
323
+ return parse_datetime str, opt
324
+ end
325
+
326
+ # Helper method for #parse_timestamp which finds the value by column :name and mutates the SQL statement accordingly.
327
+ # @param [CSV::Row] row the CSV Row being parsed
328
+ # @param [String] insert_str the left side of the insert statement (i.e. columns)
329
+ # @param [String] values_str the right side of the insert statement (i.e. values)
330
+ # @param [Hash] opt options Hash will be passed through to #parse_timestamp.
331
+ # @option opt [String] :name the name of the field being parsed. Required.
332
+ # @return [void]
333
+ # @raise [ArgumentError] :name is required.
334
+ # @see parse_timestamp
335
+ # @see mutate_sql_stmt!
336
+ def self.parse_timestamp!(row, insert_str, values_str, opt={})
337
+ raise ArgumentError, "opt[:name] is required!" unless opt[:name]
338
+ timestamp = parse_datetime( row[ to_symbol( opt[:name] ) ], opt )
339
+ mutate_sql_stmt! insert_str, opt[:name], values_str, timestamp
340
+ end
341
+
342
+ # Useful for parsing "flag" like values; i.e. usually single characters.
343
+ # @param [String] str the String to be parsed.
344
+ # @param [Hash] opt options Hash will be passed through to #parse_string.
345
+ # @option opt [Integer] :length the maximum supported length of the field.
346
+ # @option opt [Boolean] :upcase if true upcase the results.
347
+ # @return [String] the parsed "flag".
348
+ # @see parse_string
349
+ def self.parse_flag(str, opt={ length: 1, upcase: true })
350
+ opt[:length] = 1 if opt[:length].nil?
351
+ opt[:upcase] = true if opt[:upcase].nil?
352
+ retval = parse_string str, opt
353
+ retval = retval.upcase if opt[:upcase] == true
354
+ return retval
355
+ end
356
+
357
+ # Parse common command line options for CSV --> SQL transformations.
358
+ # @param [String] executable the name of the script from which we are executing. See example.
359
+ # @param [Array<String>] args the command line args.
360
+ # @option opt [String] :csv_filename the input file from which the CSV will be read.
361
+ # Defaults to the first element of args Array.
362
+ # @option opt [String] :sql_filename the output file to which the SQL will be written.
363
+ # @option opt [Hash] :csv_options the options that will be used by the CSV parser.
364
+ # @return [Hash] a Hash containing the parsed command line results.
365
+ # @example The most common usage:
366
+ # opt = KualiCo::ETL.parse_csv_command_line_options (File.basename $0), ARGF.argv
367
+ def self.parse_csv_command_line_options(
368
+ executable, args, opt={ csv_options: { headers: :first_row,
369
+ header_converters: :symbol,
370
+ skip_blanks: true,
371
+ col_sep: ",",
372
+ quote_char: '"'
373
+ }
374
+ } )
375
+ optparse = OptionParser.new do |opts|
376
+ opts.banner = "Usage: #{executable} [options] csv_file"
377
+ opts.on( '-o' ,'--output SQL_FILE_OUTPUT', 'The file the SQL data will be writen to... (defaults to <csv_file>.sql)') do |f|
378
+ opt[:sql_filename] = f
379
+ end
380
+ opts.on( '-s' ,'--separator SEPARATOR_CHARACTER', 'The character that separates each column of the CSV file.') do |s|
381
+ opt[:csv_options][:col_sep] = s
382
+ end
383
+ opts.on( '-q' ,'--quote QUOTE_CHARACTER', 'The character used to quote fields.') do |q|
384
+ opt[:csv_options][:quote_char] = q
385
+ end
386
+ opts.on( '-h', '--help', 'Display this screen' ) do
387
+ puts opts
388
+ exit 1
389
+ end
390
+
391
+ opt[:csv_filename] = args[0] unless opt[:csv_filename]
392
+ if opt[:csv_filename].nil? || opt[:csv_filename].empty?
393
+ puts opts
394
+ exit 1
395
+ end
396
+ end
397
+ optparse.parse!
398
+
399
+ # construct a sensible default ouptput filename
400
+ unless opt[:sql_filename]
401
+ file_extension = File.extname opt[:csv_filename]
402
+ dir_name = File.dirname opt[:csv_filename]
403
+ base_name = File.basename opt[:csv_filename], file_extension
404
+ opt[:sql_filename] = "#{dir_name}/#{base_name}.sql"
405
+ end
406
+
407
+ return opt
408
+ end
409
+
410
+ end
@@ -0,0 +1,20 @@
1
+ # KualiCo's client library and command-line tool to help interact with KualiCo's cloud APIs.
2
+ # Copyright (C) 2014-2015 KualiCo, Inc.
3
+
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU Affero General Public License for more details.
13
+
14
+ # You should have received a copy of the GNU Affero General Public License
15
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+
17
+ module KualiCo
18
+ # The gem version number.
19
+ VERSION = "0.18"
20
+ end
@@ -0,0 +1,23 @@
1
+ # KualiCo's client library and command-line tool to help interact with KualiCo's cloud APIs.
2
+ # Copyright (C) 2014-2015 KualiCo, Inc.
3
+
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU Affero General Public License for more details.
13
+
14
+ # You should have received a copy of the GNU Affero General Public License
15
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+
17
+ require 'csv'
18
+ require 'optparse'
19
+ require 'pp'
20
+ require "kuali_toolbox/version"
21
+
22
+ module KualiCo
23
+ end