mtsql 1.11.17__py3-none-any.whl → 1.11.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,7 +37,7 @@ registry.register(
37
37
  )
38
38
 
39
39
  registry.register(
40
- "rs+redshift_connector",
40
+ "mtsql_redshift",
41
41
  "mt.sql.redshift.dialect",
42
42
  "RedshiftDialect_redshift_connector",
43
43
  )
@@ -0,0 +1,1040 @@
1
+ import enum
2
+ import numbers
3
+ import re
4
+ import warnings
5
+ try:
6
+ from collections.abc import Iterable
7
+ except ImportError:
8
+ from collections import Iterable
9
+
10
+ import sqlalchemy as sa
11
+ from sqlalchemy import exc as sa_exc
12
+ from sqlalchemy.ext import compiler as sa_compiler
13
+ from sqlalchemy.sql import expression as sa_expression
14
+
15
+
16
+ # At the time of this implementation, no specification for a session token was
17
+ # found. After looking at a few session tokens they appear to be the same as
18
+ # the aws_secret_access_key pattern, but much longer. An example token can be
19
+ # found here:
20
+ # https://docs.aws.amazon.com/STS/latest/APIReference/API_GetSessionToken.html
21
+ # The regexs for access keys can be found here:
22
+ # https://blogs.aws.amazon.com/security/blog/tag/key+rotation
23
+ # The pattern of IAM role ARNs can be found here:
24
+ # http://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html#arn-syntax-iam
25
+
26
+ ACCESS_KEY_ID_RE = re.compile('[A-Z0-9]{20}')
27
+ SECRET_ACCESS_KEY_RE = re.compile('[A-Za-z0-9/+=]{40}')
28
+ TOKEN_RE = re.compile('[A-Za-z0-9/+=]+')
29
+ AWS_PARTITIONS = frozenset({'aws', 'aws-cn', 'aws-us-gov'})
30
+ AWS_ACCOUNT_ID_RE = re.compile('[0-9]{12}')
31
+ IAM_ROLE_NAME_RE = re.compile('[A-Za-z0-9+=,.@\-_]{1,64}') # noqa
32
+ IAM_ROLE_ARN_RE = re.compile('arn:(aws|aws-cn|aws-us-gov):iam::'
33
+ '[0-9]{12}:role/[A-Za-z0-9+=,.@\-_]{1,64}') # noqa
34
+
35
+
36
+ def _process_aws_credentials(access_key_id=None, secret_access_key=None,
37
+ session_token=None, aws_partition='aws',
38
+ aws_account_id=None, iam_role_name=None,
39
+ iam_role_arns=None):
40
+ uses_iam_role = aws_account_id is not None and iam_role_name is not None
41
+ uses_iam_roles = iam_role_arns is not None
42
+ uses_key = access_key_id is not None and secret_access_key is not None
43
+
44
+ if uses_iam_role + uses_iam_roles + uses_key > 1:
45
+ raise TypeError(
46
+ 'Either access key based credentials or role based credentials '
47
+ 'should be specified, but not both'
48
+ )
49
+
50
+ credentials = None
51
+
52
+ if aws_account_id is not None and iam_role_name is not None:
53
+ if aws_partition not in AWS_PARTITIONS:
54
+ raise ValueError('invalid AWS partition')
55
+ if not AWS_ACCOUNT_ID_RE.match(aws_account_id):
56
+ raise ValueError(
57
+ 'invalid AWS account ID; does not match {pattern}'.format(
58
+ pattern=AWS_ACCOUNT_ID_RE.pattern,
59
+ )
60
+ )
61
+ elif not IAM_ROLE_NAME_RE.match(iam_role_name):
62
+ raise ValueError(
63
+ 'invalid IAM role name; does not match {pattern}'.format(
64
+ pattern=IAM_ROLE_NAME_RE.pattern,
65
+ )
66
+ )
67
+
68
+ credentials = 'aws_iam_role=arn:{0}:iam::{1}:role/{2}'.format(
69
+ aws_partition,
70
+ aws_account_id,
71
+ iam_role_name,
72
+ )
73
+
74
+ if iam_role_arns is not None:
75
+ if isinstance(iam_role_arns, str):
76
+ iam_role_arns = [iam_role_arns]
77
+ if not isinstance(iam_role_arns, list):
78
+ raise ValueError('iam_role_arns must be a list')
79
+ for arn in iam_role_arns:
80
+ if not IAM_ROLE_ARN_RE.match(arn):
81
+ raise ValueError(
82
+ 'invalid AWS account ID; does not match {pattern}'.format(
83
+ pattern=IAM_ROLE_ARN_RE.pattern,
84
+ )
85
+ )
86
+
87
+ credentials = 'aws_iam_role=' + ','.join(iam_role_arns)
88
+
89
+ if access_key_id is not None and secret_access_key is not None:
90
+ if not ACCESS_KEY_ID_RE.match(access_key_id):
91
+ raise ValueError(
92
+ 'invalid access_key_id; does not match {pattern}'.format(
93
+ pattern=ACCESS_KEY_ID_RE.pattern,
94
+ )
95
+ )
96
+ if not SECRET_ACCESS_KEY_RE.match(secret_access_key):
97
+ raise ValueError(
98
+ 'invalid secret_access_key; does not match {pattern}'.format(
99
+ pattern=SECRET_ACCESS_KEY_RE.pattern,
100
+ )
101
+ )
102
+
103
+ credentials = 'aws_access_key_id={0};aws_secret_access_key={1}'.format(
104
+ access_key_id,
105
+ secret_access_key,
106
+ )
107
+
108
+ if session_token is not None:
109
+ if not TOKEN_RE.match(session_token):
110
+ raise ValueError(
111
+ 'invalid session_token; does not match {pattern}'.format(
112
+ pattern=TOKEN_RE.pattern,
113
+ )
114
+ )
115
+ credentials += ';token={0}'.format(session_token)
116
+
117
+ if credentials is None:
118
+ raise TypeError(
119
+ 'Either access key based credentials or role based credentials '
120
+ 'should be specified'
121
+ )
122
+
123
+ return credentials
124
+
125
+
126
+ def _process_fixed_width(spec):
127
+ return ','.join(('{0}:{1:d}'.format(col, width) for col, width in spec))
128
+
129
+
130
+ class _ExecutableClause(sa_expression.Executable,
131
+ sa_expression.ClauseElement):
132
+ pass
133
+
134
+
135
+ class AlterTableAppendCommand(_ExecutableClause):
136
+ """
137
+ Prepares an `ALTER TABLE APPEND` statement to efficiently move data from
138
+ one table to another, much faster than an INSERT INTO ... SELECT.
139
+
140
+ CAUTION: This moves the underlying storage blocks from the source table to
141
+ the target table, so the source table will be *empty* after this command
142
+ finishes.
143
+
144
+ See the documentation for additional restrictions and other information:
145
+ https://docs.aws.amazon.com/redshift/latest/dg/r_ALTER_TABLE_APPEND.html
146
+
147
+ Parameters
148
+ ----------
149
+
150
+ source: sqlalchemy.Table
151
+ The table to move data from. Must be an existing permanent table.
152
+ target: sqlalchemy.Table
153
+ The table to move data into. Must be an existing permanent table.
154
+ ignore_extra: bool, optional
155
+ If the source table includes columns not present in the target table,
156
+ discard those columns. Mutually exclusive with `fill_target`.
157
+ fill_target: bool, optional
158
+ If the target table includes columns not present in the source table,
159
+ fill those columns with the default column value or NULL. Mutually
160
+ exclusive with `ignore_extra`.
161
+ """
162
+ def __init__(self, source, target, ignore_extra=False, fill_target=False):
163
+ if ignore_extra and fill_target:
164
+ raise ValueError(
165
+ '"ignore_extra" cannot be used with "fill_target".')
166
+
167
+ self.source = source
168
+ self.target = target
169
+ self.ignore_extra = ignore_extra
170
+ self.fill_target = fill_target
171
+
172
+
173
+ @sa_compiler.compiles(AlterTableAppendCommand)
174
+ def visit_alter_table_append_command(element, compiler, **kw):
175
+ """
176
+ Returns the actual SQL query for the AlterTableAppendCommand class.
177
+ """
178
+ if element.ignore_extra:
179
+ fill_option = 'IGNOREEXTRA'
180
+ elif element.fill_target:
181
+ fill_option = 'FILLTARGET'
182
+ else:
183
+ fill_option = ''
184
+
185
+ query_text = \
186
+ 'ALTER TABLE {target} APPEND FROM {source} {fill_option}'.format(
187
+ target=compiler.preparer.format_table(element.target),
188
+ source=compiler.preparer.format_table(element.source),
189
+ fill_option=fill_option,
190
+ )
191
+ return compiler.process(sa.text(query_text), **kw)
192
+
193
+
194
+ class UnloadFromSelect(_ExecutableClause):
195
+ """
196
+ Prepares a Redshift unload statement to drop a query to Amazon S3
197
+ https://docs.aws.amazon.com/redshift/latest/dg/r_UNLOAD_command_examples.html
198
+
199
+ Parameters
200
+ ----------
201
+ select: sqlalchemy.sql.selectable.Selectable
202
+ The selectable Core Table Expression query to unload from.
203
+ unload_location: str
204
+ The Amazon S3 location where the file will be created, or a manifest
205
+ file if the `manifest` option is used
206
+ access_key_id: str, optional
207
+ Access Key. Required unless you supply role-based credentials
208
+ (``aws_account_id`` and ``iam_role_name`` or ``iam_role_arns``)
209
+ secret_access_key: str, optional
210
+ Secret Access Key ID. Required unless you supply role-based credentials
211
+ (``aws_account_id`` and ``iam_role_name`` or ``iam_role_arns``)
212
+ session_token : str, optional
213
+ iam_role_arns : str or list of strings, optional
214
+ Either a single arn or a list of arns of roles to assume when unloading
215
+ Required unless you supply key based credentials (``access_key_id`` and
216
+ ``secret_access_key``) or (``aws_account_id`` and ``iam_role_name``)
217
+ separately.
218
+ aws_partition: str, optional
219
+ AWS partition to use with role-based credentials. Defaults to
220
+ ``'aws'``. Not applicable when using key based credentials
221
+ (``access_key_id`` and ``secret_access_key``) or role arns
222
+ (``iam_role_arns``) directly.
223
+ aws_account_id: str, optional
224
+ AWS account ID for role-based credentials. Required unless you supply
225
+ key based credentials (``access_key_id`` and ``secret_access_key``)
226
+ or role arns (``iam_role_arns``) directly.
227
+ iam_role_name: str, optional
228
+ IAM role name for role-based credentials. Required unless you supply
229
+ key based credentials (``access_key_id`` and ``secret_access_key``)
230
+ or role arns (``iam_role_arns``) directly.
231
+ manifest: bool, optional
232
+ Boolean value denoting whether data_location is a manifest file.
233
+ delimiter: File delimiter, optional
234
+ defaults to '|'
235
+ fixed_width: iterable of (str, int), optional
236
+ List of (column name, length) pairs to control fixed-width output.
237
+ encrypted: bool, optional
238
+ Write to encrypted S3 key.
239
+ gzip: bool, optional
240
+ Create file using GZIP compression.
241
+ add_quotes: bool, optional
242
+ Quote fields so that fields containing the delimiter can be
243
+ distinguished.
244
+ null: str, optional
245
+ Write null values as the given string. Defaults to ''.
246
+ escape: bool, optional
247
+ For CHAR and VARCHAR columns in delimited unload files, an escape
248
+ character (``\\``) is placed before every occurrence of the following
249
+ characters: ``\\r``, ``\\n``, ``\\``, the specified delimiter string.
250
+ If `add_quotes` is specified, ``"`` and ``'`` are also escaped.
251
+ allow_overwrite: bool, optional
252
+ Overwrite the key at unload_location in the S3 bucket.
253
+ parallel: bool, optional
254
+ If disabled unload sequentially as one file.
255
+ header: bool, optional
256
+ Boolean value denoting whether to add header line
257
+ containing column names at the top of each output file.
258
+ Text transformation options, such as delimiter, add_quotes,
259
+ and escape, also apply to the header line.
260
+ `header` can't be used with fixed_width.
261
+ region: str, optional
262
+ The AWS region where the target S3 bucket is located, if the Redshift
263
+ cluster isn't in the same region as the S3 bucket.
264
+ max_file_size: int, optional
265
+ Maximum size (in bytes) of files to create in S3. This must be between
266
+ 5 * 1024**2 and 6.24 * 1024**3. Note that Redshift appears to round
267
+ to the nearest KiB.
268
+ format : Format, optional
269
+ Indicates the type of file to unload to.
270
+ """
271
+
272
+ def __init__(self, select, unload_location, access_key_id=None,
273
+ secret_access_key=None, session_token=None,
274
+ aws_partition='aws', aws_account_id=None, iam_role_name=None,
275
+ manifest=False, delimiter=None, fixed_width=None,
276
+ encrypted=False, gzip=False, add_quotes=False, null=None,
277
+ escape=False, allow_overwrite=False, parallel=True,
278
+ header=False, region=None, max_file_size=None,
279
+ format=None, iam_role_arns=None):
280
+
281
+ if delimiter is not None and len(delimiter) != 1:
282
+ raise ValueError(
283
+ '"delimiter" parameter must be a single character'
284
+ )
285
+
286
+ if header and fixed_width is not None:
287
+ raise ValueError(
288
+ "'header' cannot be used with 'fixed_width'"
289
+ )
290
+
291
+ credentials = _process_aws_credentials(
292
+ access_key_id=access_key_id,
293
+ secret_access_key=secret_access_key,
294
+ session_token=session_token,
295
+ aws_partition=aws_partition,
296
+ aws_account_id=aws_account_id,
297
+ iam_role_name=iam_role_name,
298
+ iam_role_arns=iam_role_arns,
299
+ )
300
+
301
+ self.select = select
302
+ self.unload_location = unload_location
303
+ self.credentials = credentials
304
+ self.manifest = manifest
305
+ self.header = header
306
+ self.format = _check_enum(Format, format)
307
+ self.delimiter = delimiter
308
+ self.fixed_width = fixed_width
309
+ self.encrypted = encrypted
310
+ self.gzip = gzip
311
+ self.add_quotes = add_quotes
312
+ self.null = null
313
+ self.escape = escape
314
+ self.allow_overwrite = allow_overwrite
315
+ self.parallel = parallel
316
+ self.region = region
317
+ self.max_file_size = max_file_size
318
+
319
+
320
+ @sa_compiler.compiles(UnloadFromSelect)
321
+ def visit_unload_from_select(element, compiler, **kw):
322
+ """Returns the actual sql query for the UnloadFromSelect class."""
323
+
324
+ template = """
325
+ UNLOAD (:select) TO :unload_location
326
+ CREDENTIALS :credentials
327
+ {manifest}
328
+ {header}
329
+ {format}
330
+ {delimiter}
331
+ {encrypted}
332
+ {fixed_width}
333
+ {gzip}
334
+ {add_quotes}
335
+ {null}
336
+ {escape}
337
+ {allow_overwrite}
338
+ {parallel}
339
+ {region}
340
+ {max_file_size}
341
+ """
342
+ el = element
343
+
344
+ if el.format is None:
345
+ format_ = ''
346
+ elif el.format == Format.csv:
347
+ format_ = 'FORMAT AS {}'.format(el.format.value)
348
+ if el.delimiter is not None or el.fixed_width is not None:
349
+ raise ValueError(
350
+ 'CSV format cannot be used with delimiter or fixed_width')
351
+ elif el.format == Format.parquet:
352
+ format_ = 'FORMAT AS {}'.format(el.format.value)
353
+ if any((
354
+ el.delimiter, el.fixed_width, el.add_quotes, el.escape, el.null,
355
+ el.header, el.gzip
356
+ )):
357
+ raise ValueError(
358
+ "Parquet format can't be used with `delimiter`, `fixed_width`,"
359
+ ' `add_quotes`, `escape`, `null`, `header`, or `gzip`.'
360
+ )
361
+ else:
362
+ raise ValueError(
363
+ 'Only CSV and Parquet formats are currently supported.'
364
+ )
365
+
366
+ qs = template.format(
367
+ manifest='MANIFEST' if el.manifest else '',
368
+ header='HEADER' if el.header else '',
369
+ format=format_,
370
+ delimiter=(
371
+ 'DELIMITER AS :delimiter' if el.delimiter is not None else ''
372
+ ),
373
+ encrypted='ENCRYPTED' if el.encrypted else '',
374
+ fixed_width='FIXEDWIDTH AS :fixed_width' if el.fixed_width else '',
375
+ gzip='GZIP' if el.gzip else '',
376
+ add_quotes='ADDQUOTES' if el.add_quotes else '',
377
+ escape='ESCAPE' if el.escape else '',
378
+ null='NULL AS :null_as' if el.null is not None else '',
379
+ allow_overwrite='ALLOWOVERWRITE' if el.allow_overwrite else '',
380
+ parallel='PARALLEL OFF' if not el.parallel else '',
381
+ region='REGION :region' if el.region is not None else '',
382
+ max_file_size=(
383
+ 'MAXFILESIZE :max_file_size MB'
384
+ if el.max_file_size is not None else ''
385
+ ),
386
+ )
387
+
388
+ query = sa.text(qs)
389
+
390
+ if el.delimiter is not None:
391
+ query = query.bindparams(sa.bindparam(
392
+ 'delimiter', value=element.delimiter, type_=sa.String,
393
+ ))
394
+
395
+ if el.fixed_width:
396
+ query = query.bindparams(sa.bindparam(
397
+ 'fixed_width',
398
+ value=_process_fixed_width(el.fixed_width),
399
+ type_=sa.String,
400
+ ))
401
+
402
+ if el.null is not None:
403
+ query = query.bindparams(sa.bindparam(
404
+ 'null_as', value=el.null, type_=sa.String
405
+ ))
406
+
407
+ if el.region is not None:
408
+ query = query.bindparams(sa.bindparam(
409
+ 'region', value=el.region, type_=sa.String
410
+ ))
411
+
412
+ if el.max_file_size is not None:
413
+ max_file_size_mib = float(el.max_file_size) / 1024 / 1024
414
+ query = query.bindparams(sa.bindparam(
415
+ 'max_file_size', value=max_file_size_mib, type_=sa.Float
416
+ ))
417
+
418
+ return compiler.process(
419
+ query.bindparams(
420
+ sa.bindparam('credentials', value=el.credentials, type_=sa.String),
421
+ sa.bindparam(
422
+ 'unload_location', value=el.unload_location, type_=sa.String,
423
+ ),
424
+ sa.bindparam(
425
+ 'select',
426
+ value=compiler.process(
427
+ el.select,
428
+ literal_binds=True,
429
+ ),
430
+ type_=sa.String,
431
+ ),
432
+ ),
433
+ **kw
434
+ )
435
+
436
+
437
+ class Format(enum.Enum):
438
+ csv = 'CSV'
439
+ json = 'JSON'
440
+ avro = 'AVRO'
441
+ orc = 'ORC'
442
+ parquet = 'PARQUET'
443
+ fixed_width = 'FIXEDWIDTH'
444
+
445
+
446
+ class Compression(enum.Enum):
447
+ gzip = 'GZIP'
448
+ lzop = 'LZOP'
449
+ bzip2 = 'BZIP2'
450
+
451
+
452
+ class Encoding(enum.Enum):
453
+ utf8 = 'UTF8'
454
+ utf16 = 'UTF16'
455
+ utf16le = 'UTF16LE'
456
+ utf16be = 'UTF16BE'
457
+
458
+
459
+ def _check_enum(Enum, val):
460
+ if val is None:
461
+ return
462
+
463
+ cleaned = Enum(val)
464
+ if cleaned is not val:
465
+ tpl = '{val!r} should be, {cleaned!r}, an instance of {Enum!r}'
466
+ msg = tpl.format(val=val, cleaned=cleaned, Enum=Enum)
467
+ warnings.warn(msg, DeprecationWarning)
468
+
469
+ return cleaned
470
+
471
+
472
+ class CopyCommand(_ExecutableClause):
473
+ """
474
+ Prepares a Redshift COPY statement.
475
+
476
+ Parameters
477
+ ----------
478
+ to : sqlalchemy.Table or iterable of sqlalchemy.ColumnElement
479
+ The table or columns to copy data into
480
+ data_location : str
481
+ The Amazon S3 location from where to copy, or a manifest file if
482
+ the `manifest` option is used
483
+ access_key_id: str, optional
484
+ Access Key. Required unless you supply role-based credentials
485
+ (``aws_account_id`` and ``iam_role_name`` or ``iam_role_arns``)
486
+ secret_access_key: str, optional
487
+ Secret Access Key ID. Required unless you supply role-based credentials
488
+ (``aws_account_id`` and ``iam_role_name`` or ``iam_role_arns``)
489
+ session_token : str, optional
490
+ iam_role_arns : str or list of strings, optional
491
+ Either a single arn or a list of arns of roles to assume when unloading
492
+ Required unless you supply key based credentials (``access_key_id`` and
493
+ ``secret_access_key``) or (``aws_account_id`` and ``iam_role_name``)
494
+ separately.
495
+ aws_partition: str, optional
496
+ AWS partition to use with role-based credentials. Defaults to
497
+ ``'aws'``. Not applicable when using key based credentials
498
+ (``access_key_id`` and ``secret_access_key``) or role arns
499
+ (``iam_role_arns``) directly.
500
+ aws_account_id: str, optional
501
+ AWS account ID for role-based credentials. Required unless you supply
502
+ key based credentials (``access_key_id`` and ``secret_access_key``)
503
+ or role arns (``iam_role_arns``) directly.
504
+ iam_role_name: str, optional
505
+ IAM role name for role-based credentials. Required unless you supply
506
+ key based credentials (``access_key_id`` and ``secret_access_key``)
507
+ or role arns (``iam_role_arns``) directly.
508
+ format : Format, optional
509
+ Indicates the type of file to copy from
510
+ quote : str, optional
511
+ Specifies the character to be used as the quote character when using
512
+ ``format=Format.csv``. The default is a double quotation mark ( ``"`` )
513
+ delimiter : Field delimiter, optional
514
+ defaults to ``|``
515
+ path_file : str, optional
516
+ Specifies an Amazon S3 location to a JSONPaths file to explicitly map
517
+ Avro or JSON data elements to columns.
518
+ defaults to ``'auto'``
519
+ fixed_width: iterable of (str, int), optional
520
+ List of (column name, length) pairs to control fixed-width output.
521
+ compression : Compression, optional
522
+ indicates the type of compression of the file to copy
523
+ accept_any_date : bool, optional
524
+ Allows any date format, including invalid formats such as
525
+ ``00/00/00 00:00:00``, to be loaded as NULL without generating an error
526
+ defaults to False
527
+ accept_inv_chars : str, optional
528
+ Enables loading of data into VARCHAR columns even if the data contains
529
+ invalid UTF-8 characters. When specified each invalid UTF-8 byte is
530
+ replaced by the specified replacement character
531
+ blanks_as_null : bool, optional
532
+ Boolean value denoting whether to load VARCHAR fields with whitespace
533
+ only values as NULL instead of whitespace
534
+ date_format : str, optional
535
+ Specified the date format. If you want Amazon Redshift to automatically
536
+ recognize and convert the date format in your source data, specify
537
+ ``'auto'``
538
+ empty_as_null : bool, optional
539
+ Boolean value denoting whether to load VARCHAR fields with empty
540
+ values as NULL instead of empty string
541
+ encoding : Encoding, optional
542
+ Specifies the encoding type of the load data defaults to
543
+ ``Encoding.utf8``
544
+ escape : bool, optional
545
+ When this parameter is specified, the backslash character (``\\``) in
546
+ input data is treated as an escape character. The character that
547
+ immediately follows the backslash character is loaded into the table
548
+ as part of the current column value, even if it is a character that
549
+ normally serves a special purpose
550
+ explicit_ids : bool, optional
551
+ Override the autogenerated IDENTITY column values with explicit values
552
+ from the source data files for the tables
553
+ fill_record : bool, optional
554
+ Allows data files to be loaded when contiguous columns are missing at
555
+ the end of some of the records. The missing columns are filled with
556
+ either zero-length strings or NULLs, as appropriate for the data types
557
+ of the columns in question.
558
+ ignore_blank_lines : bool, optional
559
+ Ignores blank lines that only contain a line feed in a data file and
560
+ does not try to load them
561
+ ignore_header : int, optional
562
+ Integer value of number of lines to skip at the start of each file
563
+ dangerous_null_delimiter : str, optional
564
+ Optional string value denoting what to interpret as a NULL value from
565
+ the file. Note that this parameter *is not properly quoted* due to a
566
+ difference between redshift's and postgres's COPY commands
567
+ interpretation of strings. For example, null bytes must be passed to
568
+ redshift's ``NULL`` verbatim as ``'\\0'`` whereas postgres's ``NULL``
569
+ accepts ``'\\x00'``.
570
+ remove_quotes : bool, optional
571
+ Removes surrounding quotation marks from strings in the incoming data.
572
+ All characters within the quotation marks, including delimiters, are
573
+ retained.
574
+ roundec : bool, optional
575
+ Rounds up numeric values when the scale of the input value is greater
576
+ than the scale of the column
577
+ time_format : str, optional
578
+ Specified the date format. If you want Amazon Redshift to automatically
579
+ recognize and convert the time format in your source data, specify
580
+ ``'auto'``
581
+ trim_blanks : bool, optional
582
+ Removes the trailing white space characters from a VARCHAR string
583
+ truncate_columns : bool, optional
584
+ Truncates data in columns to the appropriate number of characters so
585
+ that it fits the column specification
586
+ comp_rows : int, optional
587
+ Specifies the number of rows to be used as the sample size for
588
+ compression analysis
589
+ comp_update : bool, optional
590
+ Controls whether compression encodings are automatically applied.
591
+ If omitted or None, COPY applies automatic compression only if the
592
+ target table is empty and all the table columns either have RAW
593
+ encoding or no encoding.
594
+ If True COPY applies automatic compression if the table is empty, even
595
+ if the table columns already have encodings other than RAW.
596
+ If False automatic compression is disabled
597
+ max_error : int, optional
598
+ If the load returns the ``max_error`` number of errors or greater, the
599
+ load fails
600
+ defaults to 100000
601
+ no_load : bool, optional
602
+ Checks the validity of the data file without actually loading the data
603
+ stat_update : bool, optional
604
+ Update statistics automatically regardless of whether the table is
605
+ initially empty
606
+ manifest : bool, optional
607
+ Boolean value denoting whether data_location is a manifest file.
608
+ region: str, optional
609
+ The AWS region where the target S3 bucket is located, if the Redshift
610
+ cluster isn't in the same region as the S3 bucket.
611
+ """
612
+
613
+ def __init__(self, to, data_location, access_key_id=None,
614
+ secret_access_key=None, session_token=None,
615
+ aws_partition='aws', aws_account_id=None, iam_role_name=None,
616
+ format=None, quote=None,
617
+ path_file='auto', delimiter=None, fixed_width=None,
618
+ compression=None, accept_any_date=False,
619
+ accept_inv_chars=None, blanks_as_null=False, date_format=None,
620
+ empty_as_null=False, encoding=None, escape=False,
621
+ explicit_ids=False, fill_record=False,
622
+ ignore_blank_lines=False, ignore_header=None,
623
+ dangerous_null_delimiter=None, remove_quotes=False,
624
+ roundec=False, time_format=None, trim_blanks=False,
625
+ truncate_columns=False, comp_rows=None, comp_update=None,
626
+ max_error=None, no_load=False, stat_update=None,
627
+ manifest=False, region=None, iam_role_arns=None):
628
+
629
+ credentials = _process_aws_credentials(
630
+ access_key_id=access_key_id,
631
+ secret_access_key=secret_access_key,
632
+ session_token=session_token,
633
+ aws_partition=aws_partition,
634
+ aws_account_id=aws_account_id,
635
+ iam_role_name=iam_role_name,
636
+ iam_role_arns=iam_role_arns,
637
+ )
638
+
639
+ if delimiter is not None and len(delimiter) != 1:
640
+ raise ValueError('"delimiter" parameter must be a single '
641
+ 'character')
642
+
643
+ if ignore_header is not None:
644
+ if not isinstance(ignore_header, numbers.Integral):
645
+ raise TypeError(
646
+ '"ignore_header" parameter should be an integer'
647
+ )
648
+
649
+ table = None
650
+ columns = []
651
+ if isinstance(to, Iterable):
652
+ for column in to:
653
+ if table is not None and table != column.table:
654
+ raise ValueError(
655
+ 'All columns must come from the same table: '
656
+ '%s comes from %s not %s' % (
657
+ column, column.table, table
658
+ ),
659
+ )
660
+ columns.append(column)
661
+ table = column.table
662
+ else:
663
+ table = to
664
+
665
+ self.table = table
666
+ self.columns = columns
667
+ self.data_location = data_location
668
+ self.credentials = credentials
669
+ self.format = _check_enum(Format, format)
670
+ self.quote = quote
671
+ self.path_file = path_file
672
+ self.delimiter = delimiter
673
+ self.fixed_width = fixed_width
674
+ self.compression = _check_enum(Compression, compression)
675
+ self.manifest = manifest
676
+ self.accept_any_date = accept_any_date
677
+ self.accept_inv_chars = accept_inv_chars
678
+ self.blanks_as_null = blanks_as_null
679
+ self.date_format = date_format
680
+ self.empty_as_null = empty_as_null
681
+ self.encoding = _check_enum(Encoding, encoding)
682
+ self.escape = escape
683
+ self.explicit_ids = explicit_ids
684
+ self.fill_record = fill_record
685
+ self.ignore_blank_lines = ignore_blank_lines
686
+ self.ignore_header = ignore_header
687
+ self.dangerous_null_delimiter = dangerous_null_delimiter
688
+ self.remove_quotes = remove_quotes
689
+ self.roundec = roundec
690
+ self.time_format = time_format
691
+ self.trim_blanks = trim_blanks
692
+ self.truncate_columns = truncate_columns
693
+ self.comp_rows = comp_rows
694
+ self.comp_update = comp_update
695
+ self.max_error = max_error
696
+ self.no_load = no_load
697
+ self.stat_update = stat_update
698
+ self.region = region
699
+
700
+
701
+ @sa_compiler.compiles(CopyCommand)
702
+ def visit_copy_command(element, compiler, **kw):
703
+ """
704
+ Returns the actual sql query for the CopyCommand class.
705
+ """
706
+ qs = """COPY {table}{columns} FROM :data_location
707
+ WITH CREDENTIALS AS :credentials
708
+ {format}
709
+ {parameters}"""
710
+ parameters = []
711
+ bindparams = [
712
+ sa.bindparam(
713
+ 'data_location',
714
+ value=element.data_location,
715
+ type_=sa.String,
716
+ ),
717
+ sa.bindparam(
718
+ 'credentials',
719
+ value=element.credentials,
720
+ type_=sa.String,
721
+ ),
722
+ ]
723
+
724
+ if element.format == Format.csv:
725
+ format_ = 'FORMAT AS CSV'
726
+ if element.quote is not None:
727
+ format_ += ' QUOTE AS :quote_character'
728
+ bindparams.append(sa.bindparam(
729
+ 'quote_character',
730
+ value=element.quote,
731
+ type_=sa.String,
732
+ ))
733
+ elif element.format == Format.json:
734
+ format_ = 'FORMAT AS JSON AS :json_option'
735
+ bindparams.append(sa.bindparam(
736
+ 'json_option',
737
+ value=element.path_file,
738
+ type_=sa.String,
739
+ ))
740
+ elif element.format == Format.avro:
741
+ format_ = 'FORMAT AS AVRO AS :avro_option'
742
+ bindparams.append(sa.bindparam(
743
+ 'avro_option',
744
+ value=element.path_file,
745
+ type_=sa.String,
746
+ ))
747
+ elif element.format == Format.orc:
748
+ format_ = 'FORMAT AS ORC'
749
+ elif element.format == Format.parquet:
750
+ format_ = 'FORMAT AS PARQUET'
751
+ elif element.format == Format.fixed_width and element.fixed_width is None:
752
+ raise sa_exc.CompileError(
753
+ "'fixed_width' argument required for format 'FIXEDWIDTH'.")
754
+ else:
755
+ format_ = ''
756
+
757
+ if element.delimiter is not None:
758
+ parameters.append('DELIMITER AS :delimiter_char')
759
+ bindparams.append(sa.bindparam(
760
+ 'delimiter_char',
761
+ value=element.delimiter,
762
+ type_=sa.String,
763
+ ))
764
+
765
+ if element.fixed_width is not None:
766
+ parameters.append('FIXEDWIDTH AS :fixedwidth_spec')
767
+ bindparams.append(sa.bindparam(
768
+ 'fixedwidth_spec',
769
+ value=_process_fixed_width(element.fixed_width),
770
+ type_=sa.String,
771
+ ))
772
+
773
+ if element.compression is not None:
774
+ parameters.append(Compression(element.compression).value)
775
+
776
+ if element.manifest:
777
+ parameters.append('MANIFEST')
778
+
779
+ if element.accept_any_date:
780
+ parameters.append('ACCEPTANYDATE')
781
+
782
+ if element.accept_inv_chars is not None:
783
+ parameters.append('ACCEPTINVCHARS AS :replacement_char')
784
+ bindparams.append(sa.bindparam(
785
+ 'replacement_char',
786
+ value=element.accept_inv_chars,
787
+ type_=sa.String
788
+ ))
789
+
790
+ if element.blanks_as_null:
791
+ parameters.append('BLANKSASNULL')
792
+
793
+ if element.date_format is not None:
794
+ parameters.append('DATEFORMAT AS :dateformat_string')
795
+ bindparams.append(sa.bindparam(
796
+ 'dateformat_string',
797
+ value=element.date_format,
798
+ type_=sa.String,
799
+ ))
800
+
801
+ if element.empty_as_null:
802
+ parameters.append('EMPTYASNULL')
803
+
804
+ if element.encoding is not None:
805
+ parameters.append('ENCODING AS ' + Encoding(element.encoding).value)
806
+
807
+ if element.escape:
808
+ parameters.append('ESCAPE')
809
+
810
+ if element.explicit_ids:
811
+ parameters.append('EXPLICIT_IDS')
812
+
813
+ if element.fill_record:
814
+ parameters.append('FILLRECORD')
815
+
816
+ if element.ignore_blank_lines:
817
+ parameters.append('IGNOREBLANKLINES')
818
+
819
+ if element.ignore_header is not None:
820
+ parameters.append('IGNOREHEADER AS :number_rows')
821
+ bindparams.append(sa.bindparam(
822
+ 'number_rows',
823
+ value=element.ignore_header,
824
+ type_=sa.Integer,
825
+ ))
826
+
827
+ if element.dangerous_null_delimiter is not None:
828
+ parameters.append("NULL AS '%s'" % element.dangerous_null_delimiter)
829
+
830
+ if element.remove_quotes:
831
+ parameters.append('REMOVEQUOTES')
832
+
833
+ if element.roundec:
834
+ parameters.append('ROUNDEC')
835
+
836
+ if element.time_format is not None:
837
+ parameters.append('TIMEFORMAT AS :timeformat_string')
838
+ bindparams.append(sa.bindparam(
839
+ 'timeformat_string',
840
+ value=element.time_format,
841
+ type_=sa.String,
842
+ ))
843
+
844
+ if element.trim_blanks:
845
+ parameters.append('TRIMBLANKS')
846
+
847
+ if element.truncate_columns:
848
+ parameters.append('TRUNCATECOLUMNS')
849
+
850
+ if element.comp_rows:
851
+ parameters.append('COMPROWS :numrows')
852
+ bindparams.append(sa.bindparam(
853
+ 'numrows',
854
+ value=element.comp_rows,
855
+ type_=sa.Integer,
856
+ ))
857
+
858
+ if element.comp_update:
859
+ parameters.append('COMPUPDATE ON')
860
+ elif element.comp_update is not None:
861
+ parameters.append('COMPUPDATE OFF')
862
+
863
+ if element.max_error is not None:
864
+ parameters.append('MAXERROR AS :error_count')
865
+ bindparams.append(sa.bindparam(
866
+ 'error_count',
867
+ value=element.max_error,
868
+ type_=sa.Integer,
869
+ ))
870
+
871
+ if element.no_load:
872
+ parameters.append('NOLOAD')
873
+
874
+ if element.stat_update:
875
+ parameters.append('STATUPDATE ON')
876
+ elif element.stat_update is not None:
877
+ parameters.append('STATUPDATE OFF')
878
+
879
+ if element.region is not None:
880
+ parameters.append('REGION :region')
881
+ bindparams.append(sa.bindparam(
882
+ 'region',
883
+ value=element.region,
884
+ type_=sa.String
885
+ ))
886
+
887
+ columns = ' (%s)' % ', '.join(
888
+ compiler.preparer.format_column(column) for column in element.columns
889
+ ) if element.columns else ''
890
+
891
+ qs = qs.format(
892
+ table=compiler.preparer.format_table(element.table),
893
+ columns=columns,
894
+ format=format_,
895
+ parameters='\n'.join(parameters)
896
+ )
897
+
898
+ return compiler.process(sa.text(qs).bindparams(*bindparams), **kw)
899
+
900
+
901
+ class CreateLibraryCommand(_ExecutableClause):
902
+ """Prepares a Redshift CREATE LIBRARY statement.
903
+ https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_LIBRARY.html
904
+
905
+ Parameters
906
+ ----------
907
+ library_name: str, required
908
+ The name of the library to install.
909
+ location: str, required
910
+ The location of the library file. Must be either a HTTP/HTTPS URL or an
911
+ S3 location.
912
+ access_key_id: str, optional
913
+ Access Key. Required unless you supply role-based credentials
914
+ (``aws_account_id`` and ``iam_role_name`` or ``iam_role_arns``)
915
+ secret_access_key: str, optional
916
+ Secret Access Key ID. Required unless you supply role-based credentials
917
+ (``aws_account_id`` and ``iam_role_name`` or ``iam_role_arns``)
918
+ session_token : str, optional
919
+ iam_role_arns : str or list of strings, optional
920
+ Either a single arn or a list of arns of roles to assume when unloading
921
+ Required unless you supply key based credentials (``access_key_id`` and
922
+ ``secret_access_key``) or (``aws_account_id`` and ``iam_role_name``)
923
+ separately.
924
+ aws_partition: str, optional
925
+ AWS partition to use with role-based credentials. Defaults to
926
+ ``'aws'``. Not applicable when using key based credentials
927
+ (``access_key_id`` and ``secret_access_key``) or role arns
928
+ (``iam_role_arns``) directly.
929
+ aws_account_id: str, optional
930
+ AWS account ID for role-based credentials. Required unless you supply
931
+ key based credentials (``access_key_id`` and ``secret_access_key``)
932
+ or role arns (``iam_role_arns``) directly.
933
+ iam_role_name: str, optional
934
+ IAM role name for role-based credentials. Required unless you supply
935
+ key based credentials (``access_key_id`` and ``secret_access_key``)
936
+ or role arns (``iam_role_arns``) directly.
937
+ replace: bool, optional, default False
938
+ Controls the presence of ``OR REPLACE`` in the compiled statement. See
939
+ the command documentation for details.
940
+ region: str, optional
941
+ The AWS region where the library's S3 bucket is located, if the
942
+ Redshift cluster isn't in the same region as the S3 bucket.
943
+ """
944
+ def __init__(self, library_name, location, access_key_id=None,
945
+ secret_access_key=None, session_token=None,
946
+ aws_account_id=None, iam_role_name=None, replace=False,
947
+ region=None, iam_role_arns=None):
948
+ self.library_name = library_name
949
+ self.location = location
950
+ self.credentials = _process_aws_credentials(
951
+ access_key_id=access_key_id,
952
+ secret_access_key=secret_access_key,
953
+ session_token=session_token,
954
+ aws_account_id=aws_account_id,
955
+ iam_role_name=iam_role_name,
956
+ iam_role_arns=iam_role_arns,
957
+ )
958
+ self.replace = replace
959
+ self.region = region
960
+
961
+
962
+ @sa_compiler.compiles(CreateLibraryCommand)
963
+ def visit_create_library_command(element, compiler, **kw):
964
+ """
965
+ Returns the actual sql query for the CreateLibraryCommand class.
966
+ """
967
+ query = """
968
+ CREATE {or_replace} LIBRARY {name}
969
+ LANGUAGE pythonplu
970
+ FROM :location
971
+ WITH CREDENTIALS AS :credentials
972
+ {region}
973
+ """
974
+ bindparams = [
975
+ sa.bindparam(
976
+ 'location',
977
+ value=element.location,
978
+ type_=sa.String,
979
+ ),
980
+ sa.bindparam(
981
+ 'credentials',
982
+ value=element.credentials,
983
+ type_=sa.String,
984
+ ),
985
+ ]
986
+
987
+ if element.region is not None:
988
+ bindparams.append(sa.bindparam(
989
+ 'region',
990
+ value=element.region,
991
+ type_=sa.String,
992
+ ))
993
+
994
+ quoted_lib_name = compiler.preparer.quote_identifier(element.library_name)
995
+ query = query.format(name=quoted_lib_name,
996
+ or_replace='OR REPLACE' if element.replace else '',
997
+ region='REGION :region' if element.region else '')
998
+ return compiler.process(sa.text(query).bindparams(*bindparams), **kw)
999
+
1000
+
1001
+ class RefreshMaterializedView(_ExecutableClause):
1002
+ """
1003
+ Prepares a Redshift REFRESH MATERIALIZED VIEW statement.
1004
+ SEE:
1005
+ docs.aws.amazon.com/redshift/latest/dg/materialized-view-refresh-sql-command
1006
+
1007
+ This reruns the query underlying the view to ensure the materialized data
1008
+ is up to date.
1009
+
1010
+ >>> import sqlalchemy as sa
1011
+ >>> from sqlalchemy_redshift.dialect import RefreshMaterializedView
1012
+ >>> engine = sa.create_engine('redshift+psycopg2://example')
1013
+ >>> refresh = RefreshMaterializedView('materialized_view_of_users')
1014
+ >>> print(refresh.compile(engine))
1015
+ <BLANKLINE>
1016
+ REFRESH MATERIALIZED VIEW materialized_view_of_users
1017
+ <BLANKLINE>
1018
+ <BLANKLINE>
1019
+
1020
+ This can be included in any execute() statement.
1021
+ """
1022
+ def __init__(self, name):
1023
+ """
1024
+ Builds the Executable/ClauseElement that represents the refresh command
1025
+
1026
+ Parameters
1027
+ ----------
1028
+ name: str, required
1029
+ The name of the view to refresh
1030
+ """
1031
+ self.name = name
1032
+
1033
+
1034
+ @sa_compiler.compiles(RefreshMaterializedView)
1035
+ def compile_refresh_materialized_view(element, compiler, **kw):
1036
+ """
1037
+ Formats and returns the refresh statement for materialized views.
1038
+ """
1039
+ text = "REFRESH MATERIALIZED VIEW {name}"
1040
+ return text.format(name=element.name)
mt/sql/version.py CHANGED
@@ -1,5 +1,5 @@
1
1
  MAJOR_VERSION = 1
2
2
  MINOR_VERSION = 11
3
- PATCH_VERSION = 17
3
+ PATCH_VERSION = 19
4
4
  version = '{}.{}.{}'.format(MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION)
5
5
  __all__ = ['MAJOR_VERSION', 'MINOR_VERSION', 'PATCH_VERSION', 'version']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mtsql
3
- Version: 1.11.17
3
+ Version: 1.11.19
4
4
  Summary: Extra Python modules to deal with the interaction between pandas dataframes and remote SQL servers, for Minh-Tri Pham
5
5
  Home-page: https://github.com/inteplus/mtsql
6
6
  Author: ['Minh-Tri Pham']
@@ -3,14 +3,15 @@ mt/sql/base.py,sha256=9aTVudlH9_HEq_v7uHV6p6asDuxMidQwAlCRSXDofvY,11967
3
3
  mt/sql/mysql.py,sha256=n2ENDctdUqZuSaDAcrqZYtPtawq3Wx4dOPCRsCB5Q4w,4894
4
4
  mt/sql/psql.py,sha256=AmXdDVRbvzK7hWK8kysrdiXkAUwSdWmh_OqlWdoNOig,66578
5
5
  mt/sql/sqlite.py,sha256=T2ak_hhNi_zRfpg_gp8JhNHn7D2kl4i-Ey6-9ANMtz0,8678
6
- mt/sql/version.py,sha256=aINC4XD2_fqhs2RYyDA6mLdPRCjYyqKgPOFgJNuhzPA,208
7
- mt/sql/redshift/__init__.py,sha256=RuP0MA6EuEO5UtVyGH3pou6oG0dRxCVZ7gCERWYjF5U,1116
6
+ mt/sql/version.py,sha256=tyY0dIFdgmr7xvJ3BSpJm8kjZ4P75BSzhCQ9Ap9niF0,208
7
+ mt/sql/redshift/__init__.py,sha256=OKRr1xTKbddzbXTsE_zrAkTI2kKi6CdemIpKC8M5gUI,1109
8
+ mt/sql/redshift/commands.py,sha256=mzb7JGtCaVvwUQ_wzGDOIwpMTmrxTdRyXzMXHvpKYu8,39701
8
9
  mt/sql/redshift/ddl.py,sha256=1B6TfbKbMPdwxNjUpoa5kIpfEI6Ikow5g6lyFPYjcV8,9972
9
10
  mt/sql/redshift/dialect.py,sha256=bpXgVeckx79ogX_amN0_ZmWSVasrhX3U7pyHexVsktE,54868
10
11
  mt/sql/redshift/main.py,sha256=6dwnwNJ1F0_V9o2oqrSOkyN_pAMrgE01CCoqAjoyOME,17116
11
12
  mt/sql/redshift/redshift-ca-bundle.crt,sha256=532qYkOpQOstFE0mdXE1GVtL3v00XDKgZNTr6gK5-KE,8621
12
- mtsql-1.11.17.dist-info/LICENSE,sha256=PojkRlQzTT5Eg6Nj03XoIVEefN3u8iiIFf1p4rqe_t4,1070
13
- mtsql-1.11.17.dist-info/METADATA,sha256=uEHNtF1hVroNTjt9XO8yCGAvCuXOgMldpBDVe5bawgs,675
14
- mtsql-1.11.17.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
15
- mtsql-1.11.17.dist-info/top_level.txt,sha256=WcqGFu9cV7iMZg09iam8eNxUvGpLSKKF2Iubf6SJVOo,3
16
- mtsql-1.11.17.dist-info/RECORD,,
13
+ mtsql-1.11.19.dist-info/LICENSE,sha256=PojkRlQzTT5Eg6Nj03XoIVEefN3u8iiIFf1p4rqe_t4,1070
14
+ mtsql-1.11.19.dist-info/METADATA,sha256=z32mxPAiBzaths-bLANmka_lJz7DD12R6RYvRLt1o0M,675
15
+ mtsql-1.11.19.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
16
+ mtsql-1.11.19.dist-info/top_level.txt,sha256=WcqGFu9cV7iMZg09iam8eNxUvGpLSKKF2Iubf6SJVOo,3
17
+ mtsql-1.11.19.dist-info/RECORD,,