mtsql 1.11.16__py3-none-any.whl → 1.11.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mt/sql/redshift/__init__.py +26 -0
- mt/sql/redshift/commands.py +1040 -0
- mt/sql/version.py +1 -1
- {mtsql-1.11.16.dist-info → mtsql-1.11.18.dist-info}/METADATA +1 -1
- {mtsql-1.11.16.dist-info → mtsql-1.11.18.dist-info}/RECORD +8 -7
- {mtsql-1.11.16.dist-info → mtsql-1.11.18.dist-info}/LICENSE +0 -0
- {mtsql-1.11.16.dist-info → mtsql-1.11.18.dist-info}/WHEEL +0 -0
- {mtsql-1.11.16.dist-info → mtsql-1.11.18.dist-info}/top_level.txt +0 -0
mt/sql/redshift/__init__.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from pkg_resources import DistributionNotFound, get_distribution, parse_version
|
|
2
|
+
from sqlalchemy.dialects import registry # noqa
|
|
3
|
+
|
|
1
4
|
from .main import *
|
|
2
5
|
|
|
3
6
|
__api__ = [
|
|
@@ -15,3 +18,26 @@ __api__ = [
|
|
|
15
18
|
"drop_column",
|
|
16
19
|
"conform",
|
|
17
20
|
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
for package in ["psycopg2", "psycopg2-binary", "psycopg2cffi"]:
|
|
24
|
+
try:
|
|
25
|
+
if get_distribution(package).parsed_version < parse_version("2.5"):
|
|
26
|
+
raise ImportError("Minimum required version for psycopg2 is 2.5")
|
|
27
|
+
break
|
|
28
|
+
except DistributionNotFound:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
registry.register("rs", "mt.sql.redshift.dialect", "RedshiftDialect_psycopg2")
|
|
32
|
+
registry.register("rs.psycopg2", "mt.sql.redshift.dialect", "RedshiftDialect_psycopg2")
|
|
33
|
+
registry.register(
|
|
34
|
+
"rs+psycopg2cffi",
|
|
35
|
+
"mt.sql.redshift.dialect",
|
|
36
|
+
"RedshiftDialect_psycopg2cffi",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
registry.register(
|
|
40
|
+
"rs+redshift_connector",
|
|
41
|
+
"mt.sql.redshift.dialect",
|
|
42
|
+
"RedshiftDialect_redshift_connector",
|
|
43
|
+
)
|
|
@@ -0,0 +1,1040 @@
|
|
|
1
|
+
import enum
|
|
2
|
+
import numbers
|
|
3
|
+
import re
|
|
4
|
+
import warnings
|
|
5
|
+
try:
|
|
6
|
+
from collections.abc import Iterable
|
|
7
|
+
except ImportError:
|
|
8
|
+
from collections import Iterable
|
|
9
|
+
|
|
10
|
+
import sqlalchemy as sa
|
|
11
|
+
from sqlalchemy import exc as sa_exc
|
|
12
|
+
from sqlalchemy.ext import compiler as sa_compiler
|
|
13
|
+
from sqlalchemy.sql import expression as sa_expression
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# At the time of this implementation, no specification for a session token was
|
|
17
|
+
# found. After looking at a few session tokens they appear to be the same as
|
|
18
|
+
# the aws_secret_access_key pattern, but much longer. An example token can be
|
|
19
|
+
# found here:
|
|
20
|
+
# https://docs.aws.amazon.com/STS/latest/APIReference/API_GetSessionToken.html
|
|
21
|
+
# The regexs for access keys can be found here:
|
|
22
|
+
# https://blogs.aws.amazon.com/security/blog/tag/key+rotation
|
|
23
|
+
# The pattern of IAM role ARNs can be found here:
|
|
24
|
+
# http://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html#arn-syntax-iam
|
|
25
|
+
|
|
26
|
+
ACCESS_KEY_ID_RE = re.compile('[A-Z0-9]{20}')
|
|
27
|
+
SECRET_ACCESS_KEY_RE = re.compile('[A-Za-z0-9/+=]{40}')
|
|
28
|
+
TOKEN_RE = re.compile('[A-Za-z0-9/+=]+')
|
|
29
|
+
AWS_PARTITIONS = frozenset({'aws', 'aws-cn', 'aws-us-gov'})
|
|
30
|
+
AWS_ACCOUNT_ID_RE = re.compile('[0-9]{12}')
|
|
31
|
+
IAM_ROLE_NAME_RE = re.compile('[A-Za-z0-9+=,.@\-_]{1,64}') # noqa
|
|
32
|
+
IAM_ROLE_ARN_RE = re.compile('arn:(aws|aws-cn|aws-us-gov):iam::'
|
|
33
|
+
'[0-9]{12}:role/[A-Za-z0-9+=,.@\-_]{1,64}') # noqa
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _process_aws_credentials(access_key_id=None, secret_access_key=None,
|
|
37
|
+
session_token=None, aws_partition='aws',
|
|
38
|
+
aws_account_id=None, iam_role_name=None,
|
|
39
|
+
iam_role_arns=None):
|
|
40
|
+
uses_iam_role = aws_account_id is not None and iam_role_name is not None
|
|
41
|
+
uses_iam_roles = iam_role_arns is not None
|
|
42
|
+
uses_key = access_key_id is not None and secret_access_key is not None
|
|
43
|
+
|
|
44
|
+
if uses_iam_role + uses_iam_roles + uses_key > 1:
|
|
45
|
+
raise TypeError(
|
|
46
|
+
'Either access key based credentials or role based credentials '
|
|
47
|
+
'should be specified, but not both'
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
credentials = None
|
|
51
|
+
|
|
52
|
+
if aws_account_id is not None and iam_role_name is not None:
|
|
53
|
+
if aws_partition not in AWS_PARTITIONS:
|
|
54
|
+
raise ValueError('invalid AWS partition')
|
|
55
|
+
if not AWS_ACCOUNT_ID_RE.match(aws_account_id):
|
|
56
|
+
raise ValueError(
|
|
57
|
+
'invalid AWS account ID; does not match {pattern}'.format(
|
|
58
|
+
pattern=AWS_ACCOUNT_ID_RE.pattern,
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
elif not IAM_ROLE_NAME_RE.match(iam_role_name):
|
|
62
|
+
raise ValueError(
|
|
63
|
+
'invalid IAM role name; does not match {pattern}'.format(
|
|
64
|
+
pattern=IAM_ROLE_NAME_RE.pattern,
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
credentials = 'aws_iam_role=arn:{0}:iam::{1}:role/{2}'.format(
|
|
69
|
+
aws_partition,
|
|
70
|
+
aws_account_id,
|
|
71
|
+
iam_role_name,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if iam_role_arns is not None:
|
|
75
|
+
if isinstance(iam_role_arns, str):
|
|
76
|
+
iam_role_arns = [iam_role_arns]
|
|
77
|
+
if not isinstance(iam_role_arns, list):
|
|
78
|
+
raise ValueError('iam_role_arns must be a list')
|
|
79
|
+
for arn in iam_role_arns:
|
|
80
|
+
if not IAM_ROLE_ARN_RE.match(arn):
|
|
81
|
+
raise ValueError(
|
|
82
|
+
'invalid AWS account ID; does not match {pattern}'.format(
|
|
83
|
+
pattern=IAM_ROLE_ARN_RE.pattern,
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
credentials = 'aws_iam_role=' + ','.join(iam_role_arns)
|
|
88
|
+
|
|
89
|
+
if access_key_id is not None and secret_access_key is not None:
|
|
90
|
+
if not ACCESS_KEY_ID_RE.match(access_key_id):
|
|
91
|
+
raise ValueError(
|
|
92
|
+
'invalid access_key_id; does not match {pattern}'.format(
|
|
93
|
+
pattern=ACCESS_KEY_ID_RE.pattern,
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
if not SECRET_ACCESS_KEY_RE.match(secret_access_key):
|
|
97
|
+
raise ValueError(
|
|
98
|
+
'invalid secret_access_key; does not match {pattern}'.format(
|
|
99
|
+
pattern=SECRET_ACCESS_KEY_RE.pattern,
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
credentials = 'aws_access_key_id={0};aws_secret_access_key={1}'.format(
|
|
104
|
+
access_key_id,
|
|
105
|
+
secret_access_key,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
if session_token is not None:
|
|
109
|
+
if not TOKEN_RE.match(session_token):
|
|
110
|
+
raise ValueError(
|
|
111
|
+
'invalid session_token; does not match {pattern}'.format(
|
|
112
|
+
pattern=TOKEN_RE.pattern,
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
credentials += ';token={0}'.format(session_token)
|
|
116
|
+
|
|
117
|
+
if credentials is None:
|
|
118
|
+
raise TypeError(
|
|
119
|
+
'Either access key based credentials or role based credentials '
|
|
120
|
+
'should be specified'
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
return credentials
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _process_fixed_width(spec):
|
|
127
|
+
return ','.join(('{0}:{1:d}'.format(col, width) for col, width in spec))
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class _ExecutableClause(sa_expression.Executable,
|
|
131
|
+
sa_expression.ClauseElement):
|
|
132
|
+
pass
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class AlterTableAppendCommand(_ExecutableClause):
|
|
136
|
+
"""
|
|
137
|
+
Prepares an `ALTER TABLE APPEND` statement to efficiently move data from
|
|
138
|
+
one table to another, much faster than an INSERT INTO ... SELECT.
|
|
139
|
+
|
|
140
|
+
CAUTION: This moves the underlying storage blocks from the source table to
|
|
141
|
+
the target table, so the source table will be *empty* after this command
|
|
142
|
+
finishes.
|
|
143
|
+
|
|
144
|
+
See the documentation for additional restrictions and other information:
|
|
145
|
+
https://docs.aws.amazon.com/redshift/latest/dg/r_ALTER_TABLE_APPEND.html
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
|
|
150
|
+
source: sqlalchemy.Table
|
|
151
|
+
The table to move data from. Must be an existing permanent table.
|
|
152
|
+
target: sqlalchemy.Table
|
|
153
|
+
The table to move data into. Must be an existing permanent table.
|
|
154
|
+
ignore_extra: bool, optional
|
|
155
|
+
If the source table includes columns not present in the target table,
|
|
156
|
+
discard those columns. Mutually exclusive with `fill_target`.
|
|
157
|
+
fill_target: bool, optional
|
|
158
|
+
If the target table includes columns not present in the source table,
|
|
159
|
+
fill those columns with the default column value or NULL. Mutually
|
|
160
|
+
exclusive with `ignore_extra`.
|
|
161
|
+
"""
|
|
162
|
+
def __init__(self, source, target, ignore_extra=False, fill_target=False):
|
|
163
|
+
if ignore_extra and fill_target:
|
|
164
|
+
raise ValueError(
|
|
165
|
+
'"ignore_extra" cannot be used with "fill_target".')
|
|
166
|
+
|
|
167
|
+
self.source = source
|
|
168
|
+
self.target = target
|
|
169
|
+
self.ignore_extra = ignore_extra
|
|
170
|
+
self.fill_target = fill_target
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@sa_compiler.compiles(AlterTableAppendCommand)
|
|
174
|
+
def visit_alter_table_append_command(element, compiler, **kw):
|
|
175
|
+
"""
|
|
176
|
+
Returns the actual SQL query for the AlterTableAppendCommand class.
|
|
177
|
+
"""
|
|
178
|
+
if element.ignore_extra:
|
|
179
|
+
fill_option = 'IGNOREEXTRA'
|
|
180
|
+
elif element.fill_target:
|
|
181
|
+
fill_option = 'FILLTARGET'
|
|
182
|
+
else:
|
|
183
|
+
fill_option = ''
|
|
184
|
+
|
|
185
|
+
query_text = \
|
|
186
|
+
'ALTER TABLE {target} APPEND FROM {source} {fill_option}'.format(
|
|
187
|
+
target=compiler.preparer.format_table(element.target),
|
|
188
|
+
source=compiler.preparer.format_table(element.source),
|
|
189
|
+
fill_option=fill_option,
|
|
190
|
+
)
|
|
191
|
+
return compiler.process(sa.text(query_text), **kw)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class UnloadFromSelect(_ExecutableClause):
|
|
195
|
+
"""
|
|
196
|
+
Prepares a Redshift unload statement to drop a query to Amazon S3
|
|
197
|
+
https://docs.aws.amazon.com/redshift/latest/dg/r_UNLOAD_command_examples.html
|
|
198
|
+
|
|
199
|
+
Parameters
|
|
200
|
+
----------
|
|
201
|
+
select: sqlalchemy.sql.selectable.Selectable
|
|
202
|
+
The selectable Core Table Expression query to unload from.
|
|
203
|
+
unload_location: str
|
|
204
|
+
The Amazon S3 location where the file will be created, or a manifest
|
|
205
|
+
file if the `manifest` option is used
|
|
206
|
+
access_key_id: str, optional
|
|
207
|
+
Access Key. Required unless you supply role-based credentials
|
|
208
|
+
(``aws_account_id`` and ``iam_role_name`` or ``iam_role_arns``)
|
|
209
|
+
secret_access_key: str, optional
|
|
210
|
+
Secret Access Key ID. Required unless you supply role-based credentials
|
|
211
|
+
(``aws_account_id`` and ``iam_role_name`` or ``iam_role_arns``)
|
|
212
|
+
session_token : str, optional
|
|
213
|
+
iam_role_arns : str or list of strings, optional
|
|
214
|
+
Either a single arn or a list of arns of roles to assume when unloading
|
|
215
|
+
Required unless you supply key based credentials (``access_key_id`` and
|
|
216
|
+
``secret_access_key``) or (``aws_account_id`` and ``iam_role_name``)
|
|
217
|
+
separately.
|
|
218
|
+
aws_partition: str, optional
|
|
219
|
+
AWS partition to use with role-based credentials. Defaults to
|
|
220
|
+
``'aws'``. Not applicable when using key based credentials
|
|
221
|
+
(``access_key_id`` and ``secret_access_key``) or role arns
|
|
222
|
+
(``iam_role_arns``) directly.
|
|
223
|
+
aws_account_id: str, optional
|
|
224
|
+
AWS account ID for role-based credentials. Required unless you supply
|
|
225
|
+
key based credentials (``access_key_id`` and ``secret_access_key``)
|
|
226
|
+
or role arns (``iam_role_arns``) directly.
|
|
227
|
+
iam_role_name: str, optional
|
|
228
|
+
IAM role name for role-based credentials. Required unless you supply
|
|
229
|
+
key based credentials (``access_key_id`` and ``secret_access_key``)
|
|
230
|
+
or role arns (``iam_role_arns``) directly.
|
|
231
|
+
manifest: bool, optional
|
|
232
|
+
Boolean value denoting whether data_location is a manifest file.
|
|
233
|
+
delimiter: File delimiter, optional
|
|
234
|
+
defaults to '|'
|
|
235
|
+
fixed_width: iterable of (str, int), optional
|
|
236
|
+
List of (column name, length) pairs to control fixed-width output.
|
|
237
|
+
encrypted: bool, optional
|
|
238
|
+
Write to encrypted S3 key.
|
|
239
|
+
gzip: bool, optional
|
|
240
|
+
Create file using GZIP compression.
|
|
241
|
+
add_quotes: bool, optional
|
|
242
|
+
Quote fields so that fields containing the delimiter can be
|
|
243
|
+
distinguished.
|
|
244
|
+
null: str, optional
|
|
245
|
+
Write null values as the given string. Defaults to ''.
|
|
246
|
+
escape: bool, optional
|
|
247
|
+
For CHAR and VARCHAR columns in delimited unload files, an escape
|
|
248
|
+
character (``\\``) is placed before every occurrence of the following
|
|
249
|
+
characters: ``\\r``, ``\\n``, ``\\``, the specified delimiter string.
|
|
250
|
+
If `add_quotes` is specified, ``"`` and ``'`` are also escaped.
|
|
251
|
+
allow_overwrite: bool, optional
|
|
252
|
+
Overwrite the key at unload_location in the S3 bucket.
|
|
253
|
+
parallel: bool, optional
|
|
254
|
+
If disabled unload sequentially as one file.
|
|
255
|
+
header: bool, optional
|
|
256
|
+
Boolean value denoting whether to add header line
|
|
257
|
+
containing column names at the top of each output file.
|
|
258
|
+
Text transformation options, such as delimiter, add_quotes,
|
|
259
|
+
and escape, also apply to the header line.
|
|
260
|
+
`header` can't be used with fixed_width.
|
|
261
|
+
region: str, optional
|
|
262
|
+
The AWS region where the target S3 bucket is located, if the Redshift
|
|
263
|
+
cluster isn't in the same region as the S3 bucket.
|
|
264
|
+
max_file_size: int, optional
|
|
265
|
+
Maximum size (in bytes) of files to create in S3. This must be between
|
|
266
|
+
5 * 1024**2 and 6.24 * 1024**3. Note that Redshift appears to round
|
|
267
|
+
to the nearest KiB.
|
|
268
|
+
format : Format, optional
|
|
269
|
+
Indicates the type of file to unload to.
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
def __init__(self, select, unload_location, access_key_id=None,
|
|
273
|
+
secret_access_key=None, session_token=None,
|
|
274
|
+
aws_partition='aws', aws_account_id=None, iam_role_name=None,
|
|
275
|
+
manifest=False, delimiter=None, fixed_width=None,
|
|
276
|
+
encrypted=False, gzip=False, add_quotes=False, null=None,
|
|
277
|
+
escape=False, allow_overwrite=False, parallel=True,
|
|
278
|
+
header=False, region=None, max_file_size=None,
|
|
279
|
+
format=None, iam_role_arns=None):
|
|
280
|
+
|
|
281
|
+
if delimiter is not None and len(delimiter) != 1:
|
|
282
|
+
raise ValueError(
|
|
283
|
+
'"delimiter" parameter must be a single character'
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
if header and fixed_width is not None:
|
|
287
|
+
raise ValueError(
|
|
288
|
+
"'header' cannot be used with 'fixed_width'"
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
credentials = _process_aws_credentials(
|
|
292
|
+
access_key_id=access_key_id,
|
|
293
|
+
secret_access_key=secret_access_key,
|
|
294
|
+
session_token=session_token,
|
|
295
|
+
aws_partition=aws_partition,
|
|
296
|
+
aws_account_id=aws_account_id,
|
|
297
|
+
iam_role_name=iam_role_name,
|
|
298
|
+
iam_role_arns=iam_role_arns,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
self.select = select
|
|
302
|
+
self.unload_location = unload_location
|
|
303
|
+
self.credentials = credentials
|
|
304
|
+
self.manifest = manifest
|
|
305
|
+
self.header = header
|
|
306
|
+
self.format = _check_enum(Format, format)
|
|
307
|
+
self.delimiter = delimiter
|
|
308
|
+
self.fixed_width = fixed_width
|
|
309
|
+
self.encrypted = encrypted
|
|
310
|
+
self.gzip = gzip
|
|
311
|
+
self.add_quotes = add_quotes
|
|
312
|
+
self.null = null
|
|
313
|
+
self.escape = escape
|
|
314
|
+
self.allow_overwrite = allow_overwrite
|
|
315
|
+
self.parallel = parallel
|
|
316
|
+
self.region = region
|
|
317
|
+
self.max_file_size = max_file_size
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
@sa_compiler.compiles(UnloadFromSelect)
|
|
321
|
+
def visit_unload_from_select(element, compiler, **kw):
|
|
322
|
+
"""Returns the actual sql query for the UnloadFromSelect class."""
|
|
323
|
+
|
|
324
|
+
template = """
|
|
325
|
+
UNLOAD (:select) TO :unload_location
|
|
326
|
+
CREDENTIALS :credentials
|
|
327
|
+
{manifest}
|
|
328
|
+
{header}
|
|
329
|
+
{format}
|
|
330
|
+
{delimiter}
|
|
331
|
+
{encrypted}
|
|
332
|
+
{fixed_width}
|
|
333
|
+
{gzip}
|
|
334
|
+
{add_quotes}
|
|
335
|
+
{null}
|
|
336
|
+
{escape}
|
|
337
|
+
{allow_overwrite}
|
|
338
|
+
{parallel}
|
|
339
|
+
{region}
|
|
340
|
+
{max_file_size}
|
|
341
|
+
"""
|
|
342
|
+
el = element
|
|
343
|
+
|
|
344
|
+
if el.format is None:
|
|
345
|
+
format_ = ''
|
|
346
|
+
elif el.format == Format.csv:
|
|
347
|
+
format_ = 'FORMAT AS {}'.format(el.format.value)
|
|
348
|
+
if el.delimiter is not None or el.fixed_width is not None:
|
|
349
|
+
raise ValueError(
|
|
350
|
+
'CSV format cannot be used with delimiter or fixed_width')
|
|
351
|
+
elif el.format == Format.parquet:
|
|
352
|
+
format_ = 'FORMAT AS {}'.format(el.format.value)
|
|
353
|
+
if any((
|
|
354
|
+
el.delimiter, el.fixed_width, el.add_quotes, el.escape, el.null,
|
|
355
|
+
el.header, el.gzip
|
|
356
|
+
)):
|
|
357
|
+
raise ValueError(
|
|
358
|
+
"Parquet format can't be used with `delimiter`, `fixed_width`,"
|
|
359
|
+
' `add_quotes`, `escape`, `null`, `header`, or `gzip`.'
|
|
360
|
+
)
|
|
361
|
+
else:
|
|
362
|
+
raise ValueError(
|
|
363
|
+
'Only CSV and Parquet formats are currently supported.'
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
qs = template.format(
|
|
367
|
+
manifest='MANIFEST' if el.manifest else '',
|
|
368
|
+
header='HEADER' if el.header else '',
|
|
369
|
+
format=format_,
|
|
370
|
+
delimiter=(
|
|
371
|
+
'DELIMITER AS :delimiter' if el.delimiter is not None else ''
|
|
372
|
+
),
|
|
373
|
+
encrypted='ENCRYPTED' if el.encrypted else '',
|
|
374
|
+
fixed_width='FIXEDWIDTH AS :fixed_width' if el.fixed_width else '',
|
|
375
|
+
gzip='GZIP' if el.gzip else '',
|
|
376
|
+
add_quotes='ADDQUOTES' if el.add_quotes else '',
|
|
377
|
+
escape='ESCAPE' if el.escape else '',
|
|
378
|
+
null='NULL AS :null_as' if el.null is not None else '',
|
|
379
|
+
allow_overwrite='ALLOWOVERWRITE' if el.allow_overwrite else '',
|
|
380
|
+
parallel='PARALLEL OFF' if not el.parallel else '',
|
|
381
|
+
region='REGION :region' if el.region is not None else '',
|
|
382
|
+
max_file_size=(
|
|
383
|
+
'MAXFILESIZE :max_file_size MB'
|
|
384
|
+
if el.max_file_size is not None else ''
|
|
385
|
+
),
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
query = sa.text(qs)
|
|
389
|
+
|
|
390
|
+
if el.delimiter is not None:
|
|
391
|
+
query = query.bindparams(sa.bindparam(
|
|
392
|
+
'delimiter', value=element.delimiter, type_=sa.String,
|
|
393
|
+
))
|
|
394
|
+
|
|
395
|
+
if el.fixed_width:
|
|
396
|
+
query = query.bindparams(sa.bindparam(
|
|
397
|
+
'fixed_width',
|
|
398
|
+
value=_process_fixed_width(el.fixed_width),
|
|
399
|
+
type_=sa.String,
|
|
400
|
+
))
|
|
401
|
+
|
|
402
|
+
if el.null is not None:
|
|
403
|
+
query = query.bindparams(sa.bindparam(
|
|
404
|
+
'null_as', value=el.null, type_=sa.String
|
|
405
|
+
))
|
|
406
|
+
|
|
407
|
+
if el.region is not None:
|
|
408
|
+
query = query.bindparams(sa.bindparam(
|
|
409
|
+
'region', value=el.region, type_=sa.String
|
|
410
|
+
))
|
|
411
|
+
|
|
412
|
+
if el.max_file_size is not None:
|
|
413
|
+
max_file_size_mib = float(el.max_file_size) / 1024 / 1024
|
|
414
|
+
query = query.bindparams(sa.bindparam(
|
|
415
|
+
'max_file_size', value=max_file_size_mib, type_=sa.Float
|
|
416
|
+
))
|
|
417
|
+
|
|
418
|
+
return compiler.process(
|
|
419
|
+
query.bindparams(
|
|
420
|
+
sa.bindparam('credentials', value=el.credentials, type_=sa.String),
|
|
421
|
+
sa.bindparam(
|
|
422
|
+
'unload_location', value=el.unload_location, type_=sa.String,
|
|
423
|
+
),
|
|
424
|
+
sa.bindparam(
|
|
425
|
+
'select',
|
|
426
|
+
value=compiler.process(
|
|
427
|
+
el.select,
|
|
428
|
+
literal_binds=True,
|
|
429
|
+
),
|
|
430
|
+
type_=sa.String,
|
|
431
|
+
),
|
|
432
|
+
),
|
|
433
|
+
**kw
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
class Format(enum.Enum):
|
|
438
|
+
csv = 'CSV'
|
|
439
|
+
json = 'JSON'
|
|
440
|
+
avro = 'AVRO'
|
|
441
|
+
orc = 'ORC'
|
|
442
|
+
parquet = 'PARQUET'
|
|
443
|
+
fixed_width = 'FIXEDWIDTH'
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
class Compression(enum.Enum):
|
|
447
|
+
gzip = 'GZIP'
|
|
448
|
+
lzop = 'LZOP'
|
|
449
|
+
bzip2 = 'BZIP2'
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
class Encoding(enum.Enum):
|
|
453
|
+
utf8 = 'UTF8'
|
|
454
|
+
utf16 = 'UTF16'
|
|
455
|
+
utf16le = 'UTF16LE'
|
|
456
|
+
utf16be = 'UTF16BE'
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def _check_enum(Enum, val):
|
|
460
|
+
if val is None:
|
|
461
|
+
return
|
|
462
|
+
|
|
463
|
+
cleaned = Enum(val)
|
|
464
|
+
if cleaned is not val:
|
|
465
|
+
tpl = '{val!r} should be, {cleaned!r}, an instance of {Enum!r}'
|
|
466
|
+
msg = tpl.format(val=val, cleaned=cleaned, Enum=Enum)
|
|
467
|
+
warnings.warn(msg, DeprecationWarning)
|
|
468
|
+
|
|
469
|
+
return cleaned
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
class CopyCommand(_ExecutableClause):
|
|
473
|
+
"""
|
|
474
|
+
Prepares a Redshift COPY statement.
|
|
475
|
+
|
|
476
|
+
Parameters
|
|
477
|
+
----------
|
|
478
|
+
to : sqlalchemy.Table or iterable of sqlalchemy.ColumnElement
|
|
479
|
+
The table or columns to copy data into
|
|
480
|
+
data_location : str
|
|
481
|
+
The Amazon S3 location from where to copy, or a manifest file if
|
|
482
|
+
the `manifest` option is used
|
|
483
|
+
access_key_id: str, optional
|
|
484
|
+
Access Key. Required unless you supply role-based credentials
|
|
485
|
+
(``aws_account_id`` and ``iam_role_name`` or ``iam_role_arns``)
|
|
486
|
+
secret_access_key: str, optional
|
|
487
|
+
Secret Access Key ID. Required unless you supply role-based credentials
|
|
488
|
+
(``aws_account_id`` and ``iam_role_name`` or ``iam_role_arns``)
|
|
489
|
+
session_token : str, optional
|
|
490
|
+
iam_role_arns : str or list of strings, optional
|
|
491
|
+
Either a single arn or a list of arns of roles to assume when unloading
|
|
492
|
+
Required unless you supply key based credentials (``access_key_id`` and
|
|
493
|
+
``secret_access_key``) or (``aws_account_id`` and ``iam_role_name``)
|
|
494
|
+
separately.
|
|
495
|
+
aws_partition: str, optional
|
|
496
|
+
AWS partition to use with role-based credentials. Defaults to
|
|
497
|
+
``'aws'``. Not applicable when using key based credentials
|
|
498
|
+
(``access_key_id`` and ``secret_access_key``) or role arns
|
|
499
|
+
(``iam_role_arns``) directly.
|
|
500
|
+
aws_account_id: str, optional
|
|
501
|
+
AWS account ID for role-based credentials. Required unless you supply
|
|
502
|
+
key based credentials (``access_key_id`` and ``secret_access_key``)
|
|
503
|
+
or role arns (``iam_role_arns``) directly.
|
|
504
|
+
iam_role_name: str, optional
|
|
505
|
+
IAM role name for role-based credentials. Required unless you supply
|
|
506
|
+
key based credentials (``access_key_id`` and ``secret_access_key``)
|
|
507
|
+
or role arns (``iam_role_arns``) directly.
|
|
508
|
+
format : Format, optional
|
|
509
|
+
Indicates the type of file to copy from
|
|
510
|
+
quote : str, optional
|
|
511
|
+
Specifies the character to be used as the quote character when using
|
|
512
|
+
``format=Format.csv``. The default is a double quotation mark ( ``"`` )
|
|
513
|
+
delimiter : Field delimiter, optional
|
|
514
|
+
defaults to ``|``
|
|
515
|
+
path_file : str, optional
|
|
516
|
+
Specifies an Amazon S3 location to a JSONPaths file to explicitly map
|
|
517
|
+
Avro or JSON data elements to columns.
|
|
518
|
+
defaults to ``'auto'``
|
|
519
|
+
fixed_width: iterable of (str, int), optional
|
|
520
|
+
List of (column name, length) pairs to control fixed-width output.
|
|
521
|
+
compression : Compression, optional
|
|
522
|
+
indicates the type of compression of the file to copy
|
|
523
|
+
accept_any_date : bool, optional
|
|
524
|
+
Allows any date format, including invalid formats such as
|
|
525
|
+
``00/00/00 00:00:00``, to be loaded as NULL without generating an error
|
|
526
|
+
defaults to False
|
|
527
|
+
accept_inv_chars : str, optional
|
|
528
|
+
Enables loading of data into VARCHAR columns even if the data contains
|
|
529
|
+
invalid UTF-8 characters. When specified each invalid UTF-8 byte is
|
|
530
|
+
replaced by the specified replacement character
|
|
531
|
+
blanks_as_null : bool, optional
|
|
532
|
+
Boolean value denoting whether to load VARCHAR fields with whitespace
|
|
533
|
+
only values as NULL instead of whitespace
|
|
534
|
+
date_format : str, optional
|
|
535
|
+
Specified the date format. If you want Amazon Redshift to automatically
|
|
536
|
+
recognize and convert the date format in your source data, specify
|
|
537
|
+
``'auto'``
|
|
538
|
+
empty_as_null : bool, optional
|
|
539
|
+
Boolean value denoting whether to load VARCHAR fields with empty
|
|
540
|
+
values as NULL instead of empty string
|
|
541
|
+
encoding : Encoding, optional
|
|
542
|
+
Specifies the encoding type of the load data defaults to
|
|
543
|
+
``Encoding.utf8``
|
|
544
|
+
escape : bool, optional
|
|
545
|
+
When this parameter is specified, the backslash character (``\\``) in
|
|
546
|
+
input data is treated as an escape character. The character that
|
|
547
|
+
immediately follows the backslash character is loaded into the table
|
|
548
|
+
as part of the current column value, even if it is a character that
|
|
549
|
+
normally serves a special purpose
|
|
550
|
+
explicit_ids : bool, optional
|
|
551
|
+
Override the autogenerated IDENTITY column values with explicit values
|
|
552
|
+
from the source data files for the tables
|
|
553
|
+
fill_record : bool, optional
|
|
554
|
+
Allows data files to be loaded when contiguous columns are missing at
|
|
555
|
+
the end of some of the records. The missing columns are filled with
|
|
556
|
+
either zero-length strings or NULLs, as appropriate for the data types
|
|
557
|
+
of the columns in question.
|
|
558
|
+
ignore_blank_lines : bool, optional
|
|
559
|
+
Ignores blank lines that only contain a line feed in a data file and
|
|
560
|
+
does not try to load them
|
|
561
|
+
ignore_header : int, optional
|
|
562
|
+
Integer value of number of lines to skip at the start of each file
|
|
563
|
+
dangerous_null_delimiter : str, optional
|
|
564
|
+
Optional string value denoting what to interpret as a NULL value from
|
|
565
|
+
the file. Note that this parameter *is not properly quoted* due to a
|
|
566
|
+
difference between redshift's and postgres's COPY commands
|
|
567
|
+
interpretation of strings. For example, null bytes must be passed to
|
|
568
|
+
redshift's ``NULL`` verbatim as ``'\\0'`` whereas postgres's ``NULL``
|
|
569
|
+
accepts ``'\\x00'``.
|
|
570
|
+
remove_quotes : bool, optional
|
|
571
|
+
Removes surrounding quotation marks from strings in the incoming data.
|
|
572
|
+
All characters within the quotation marks, including delimiters, are
|
|
573
|
+
retained.
|
|
574
|
+
roundec : bool, optional
|
|
575
|
+
Rounds up numeric values when the scale of the input value is greater
|
|
576
|
+
than the scale of the column
|
|
577
|
+
time_format : str, optional
|
|
578
|
+
Specified the date format. If you want Amazon Redshift to automatically
|
|
579
|
+
recognize and convert the time format in your source data, specify
|
|
580
|
+
``'auto'``
|
|
581
|
+
trim_blanks : bool, optional
|
|
582
|
+
Removes the trailing white space characters from a VARCHAR string
|
|
583
|
+
truncate_columns : bool, optional
|
|
584
|
+
Truncates data in columns to the appropriate number of characters so
|
|
585
|
+
that it fits the column specification
|
|
586
|
+
comp_rows : int, optional
|
|
587
|
+
Specifies the number of rows to be used as the sample size for
|
|
588
|
+
compression analysis
|
|
589
|
+
comp_update : bool, optional
|
|
590
|
+
Controls whether compression encodings are automatically applied.
|
|
591
|
+
If omitted or None, COPY applies automatic compression only if the
|
|
592
|
+
target table is empty and all the table columns either have RAW
|
|
593
|
+
encoding or no encoding.
|
|
594
|
+
If True COPY applies automatic compression if the table is empty, even
|
|
595
|
+
if the table columns already have encodings other than RAW.
|
|
596
|
+
If False automatic compression is disabled
|
|
597
|
+
max_error : int, optional
|
|
598
|
+
If the load returns the ``max_error`` number of errors or greater, the
|
|
599
|
+
load fails
|
|
600
|
+
defaults to 100000
|
|
601
|
+
no_load : bool, optional
|
|
602
|
+
Checks the validity of the data file without actually loading the data
|
|
603
|
+
stat_update : bool, optional
|
|
604
|
+
Update statistics automatically regardless of whether the table is
|
|
605
|
+
initially empty
|
|
606
|
+
manifest : bool, optional
|
|
607
|
+
Boolean value denoting whether data_location is a manifest file.
|
|
608
|
+
region: str, optional
|
|
609
|
+
The AWS region where the target S3 bucket is located, if the Redshift
|
|
610
|
+
cluster isn't in the same region as the S3 bucket.
|
|
611
|
+
"""
|
|
612
|
+
|
|
613
|
+
def __init__(self, to, data_location, access_key_id=None,
|
|
614
|
+
secret_access_key=None, session_token=None,
|
|
615
|
+
aws_partition='aws', aws_account_id=None, iam_role_name=None,
|
|
616
|
+
format=None, quote=None,
|
|
617
|
+
path_file='auto', delimiter=None, fixed_width=None,
|
|
618
|
+
compression=None, accept_any_date=False,
|
|
619
|
+
accept_inv_chars=None, blanks_as_null=False, date_format=None,
|
|
620
|
+
empty_as_null=False, encoding=None, escape=False,
|
|
621
|
+
explicit_ids=False, fill_record=False,
|
|
622
|
+
ignore_blank_lines=False, ignore_header=None,
|
|
623
|
+
dangerous_null_delimiter=None, remove_quotes=False,
|
|
624
|
+
roundec=False, time_format=None, trim_blanks=False,
|
|
625
|
+
truncate_columns=False, comp_rows=None, comp_update=None,
|
|
626
|
+
max_error=None, no_load=False, stat_update=None,
|
|
627
|
+
manifest=False, region=None, iam_role_arns=None):
|
|
628
|
+
|
|
629
|
+
credentials = _process_aws_credentials(
|
|
630
|
+
access_key_id=access_key_id,
|
|
631
|
+
secret_access_key=secret_access_key,
|
|
632
|
+
session_token=session_token,
|
|
633
|
+
aws_partition=aws_partition,
|
|
634
|
+
aws_account_id=aws_account_id,
|
|
635
|
+
iam_role_name=iam_role_name,
|
|
636
|
+
iam_role_arns=iam_role_arns,
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
if delimiter is not None and len(delimiter) != 1:
|
|
640
|
+
raise ValueError('"delimiter" parameter must be a single '
|
|
641
|
+
'character')
|
|
642
|
+
|
|
643
|
+
if ignore_header is not None:
|
|
644
|
+
if not isinstance(ignore_header, numbers.Integral):
|
|
645
|
+
raise TypeError(
|
|
646
|
+
'"ignore_header" parameter should be an integer'
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
table = None
|
|
650
|
+
columns = []
|
|
651
|
+
if isinstance(to, Iterable):
|
|
652
|
+
for column in to:
|
|
653
|
+
if table is not None and table != column.table:
|
|
654
|
+
raise ValueError(
|
|
655
|
+
'All columns must come from the same table: '
|
|
656
|
+
'%s comes from %s not %s' % (
|
|
657
|
+
column, column.table, table
|
|
658
|
+
),
|
|
659
|
+
)
|
|
660
|
+
columns.append(column)
|
|
661
|
+
table = column.table
|
|
662
|
+
else:
|
|
663
|
+
table = to
|
|
664
|
+
|
|
665
|
+
self.table = table
|
|
666
|
+
self.columns = columns
|
|
667
|
+
self.data_location = data_location
|
|
668
|
+
self.credentials = credentials
|
|
669
|
+
self.format = _check_enum(Format, format)
|
|
670
|
+
self.quote = quote
|
|
671
|
+
self.path_file = path_file
|
|
672
|
+
self.delimiter = delimiter
|
|
673
|
+
self.fixed_width = fixed_width
|
|
674
|
+
self.compression = _check_enum(Compression, compression)
|
|
675
|
+
self.manifest = manifest
|
|
676
|
+
self.accept_any_date = accept_any_date
|
|
677
|
+
self.accept_inv_chars = accept_inv_chars
|
|
678
|
+
self.blanks_as_null = blanks_as_null
|
|
679
|
+
self.date_format = date_format
|
|
680
|
+
self.empty_as_null = empty_as_null
|
|
681
|
+
self.encoding = _check_enum(Encoding, encoding)
|
|
682
|
+
self.escape = escape
|
|
683
|
+
self.explicit_ids = explicit_ids
|
|
684
|
+
self.fill_record = fill_record
|
|
685
|
+
self.ignore_blank_lines = ignore_blank_lines
|
|
686
|
+
self.ignore_header = ignore_header
|
|
687
|
+
self.dangerous_null_delimiter = dangerous_null_delimiter
|
|
688
|
+
self.remove_quotes = remove_quotes
|
|
689
|
+
self.roundec = roundec
|
|
690
|
+
self.time_format = time_format
|
|
691
|
+
self.trim_blanks = trim_blanks
|
|
692
|
+
self.truncate_columns = truncate_columns
|
|
693
|
+
self.comp_rows = comp_rows
|
|
694
|
+
self.comp_update = comp_update
|
|
695
|
+
self.max_error = max_error
|
|
696
|
+
self.no_load = no_load
|
|
697
|
+
self.stat_update = stat_update
|
|
698
|
+
self.region = region
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
@sa_compiler.compiles(CopyCommand)
|
|
702
|
+
def visit_copy_command(element, compiler, **kw):
|
|
703
|
+
"""
|
|
704
|
+
Returns the actual sql query for the CopyCommand class.
|
|
705
|
+
"""
|
|
706
|
+
qs = """COPY {table}{columns} FROM :data_location
|
|
707
|
+
WITH CREDENTIALS AS :credentials
|
|
708
|
+
{format}
|
|
709
|
+
{parameters}"""
|
|
710
|
+
parameters = []
|
|
711
|
+
bindparams = [
|
|
712
|
+
sa.bindparam(
|
|
713
|
+
'data_location',
|
|
714
|
+
value=element.data_location,
|
|
715
|
+
type_=sa.String,
|
|
716
|
+
),
|
|
717
|
+
sa.bindparam(
|
|
718
|
+
'credentials',
|
|
719
|
+
value=element.credentials,
|
|
720
|
+
type_=sa.String,
|
|
721
|
+
),
|
|
722
|
+
]
|
|
723
|
+
|
|
724
|
+
if element.format == Format.csv:
|
|
725
|
+
format_ = 'FORMAT AS CSV'
|
|
726
|
+
if element.quote is not None:
|
|
727
|
+
format_ += ' QUOTE AS :quote_character'
|
|
728
|
+
bindparams.append(sa.bindparam(
|
|
729
|
+
'quote_character',
|
|
730
|
+
value=element.quote,
|
|
731
|
+
type_=sa.String,
|
|
732
|
+
))
|
|
733
|
+
elif element.format == Format.json:
|
|
734
|
+
format_ = 'FORMAT AS JSON AS :json_option'
|
|
735
|
+
bindparams.append(sa.bindparam(
|
|
736
|
+
'json_option',
|
|
737
|
+
value=element.path_file,
|
|
738
|
+
type_=sa.String,
|
|
739
|
+
))
|
|
740
|
+
elif element.format == Format.avro:
|
|
741
|
+
format_ = 'FORMAT AS AVRO AS :avro_option'
|
|
742
|
+
bindparams.append(sa.bindparam(
|
|
743
|
+
'avro_option',
|
|
744
|
+
value=element.path_file,
|
|
745
|
+
type_=sa.String,
|
|
746
|
+
))
|
|
747
|
+
elif element.format == Format.orc:
|
|
748
|
+
format_ = 'FORMAT AS ORC'
|
|
749
|
+
elif element.format == Format.parquet:
|
|
750
|
+
format_ = 'FORMAT AS PARQUET'
|
|
751
|
+
elif element.format == Format.fixed_width and element.fixed_width is None:
|
|
752
|
+
raise sa_exc.CompileError(
|
|
753
|
+
"'fixed_width' argument required for format 'FIXEDWIDTH'.")
|
|
754
|
+
else:
|
|
755
|
+
format_ = ''
|
|
756
|
+
|
|
757
|
+
if element.delimiter is not None:
|
|
758
|
+
parameters.append('DELIMITER AS :delimiter_char')
|
|
759
|
+
bindparams.append(sa.bindparam(
|
|
760
|
+
'delimiter_char',
|
|
761
|
+
value=element.delimiter,
|
|
762
|
+
type_=sa.String,
|
|
763
|
+
))
|
|
764
|
+
|
|
765
|
+
if element.fixed_width is not None:
|
|
766
|
+
parameters.append('FIXEDWIDTH AS :fixedwidth_spec')
|
|
767
|
+
bindparams.append(sa.bindparam(
|
|
768
|
+
'fixedwidth_spec',
|
|
769
|
+
value=_process_fixed_width(element.fixed_width),
|
|
770
|
+
type_=sa.String,
|
|
771
|
+
))
|
|
772
|
+
|
|
773
|
+
if element.compression is not None:
|
|
774
|
+
parameters.append(Compression(element.compression).value)
|
|
775
|
+
|
|
776
|
+
if element.manifest:
|
|
777
|
+
parameters.append('MANIFEST')
|
|
778
|
+
|
|
779
|
+
if element.accept_any_date:
|
|
780
|
+
parameters.append('ACCEPTANYDATE')
|
|
781
|
+
|
|
782
|
+
if element.accept_inv_chars is not None:
|
|
783
|
+
parameters.append('ACCEPTINVCHARS AS :replacement_char')
|
|
784
|
+
bindparams.append(sa.bindparam(
|
|
785
|
+
'replacement_char',
|
|
786
|
+
value=element.accept_inv_chars,
|
|
787
|
+
type_=sa.String
|
|
788
|
+
))
|
|
789
|
+
|
|
790
|
+
if element.blanks_as_null:
|
|
791
|
+
parameters.append('BLANKSASNULL')
|
|
792
|
+
|
|
793
|
+
if element.date_format is not None:
|
|
794
|
+
parameters.append('DATEFORMAT AS :dateformat_string')
|
|
795
|
+
bindparams.append(sa.bindparam(
|
|
796
|
+
'dateformat_string',
|
|
797
|
+
value=element.date_format,
|
|
798
|
+
type_=sa.String,
|
|
799
|
+
))
|
|
800
|
+
|
|
801
|
+
if element.empty_as_null:
|
|
802
|
+
parameters.append('EMPTYASNULL')
|
|
803
|
+
|
|
804
|
+
if element.encoding is not None:
|
|
805
|
+
parameters.append('ENCODING AS ' + Encoding(element.encoding).value)
|
|
806
|
+
|
|
807
|
+
if element.escape:
|
|
808
|
+
parameters.append('ESCAPE')
|
|
809
|
+
|
|
810
|
+
if element.explicit_ids:
|
|
811
|
+
parameters.append('EXPLICIT_IDS')
|
|
812
|
+
|
|
813
|
+
if element.fill_record:
|
|
814
|
+
parameters.append('FILLRECORD')
|
|
815
|
+
|
|
816
|
+
if element.ignore_blank_lines:
|
|
817
|
+
parameters.append('IGNOREBLANKLINES')
|
|
818
|
+
|
|
819
|
+
if element.ignore_header is not None:
|
|
820
|
+
parameters.append('IGNOREHEADER AS :number_rows')
|
|
821
|
+
bindparams.append(sa.bindparam(
|
|
822
|
+
'number_rows',
|
|
823
|
+
value=element.ignore_header,
|
|
824
|
+
type_=sa.Integer,
|
|
825
|
+
))
|
|
826
|
+
|
|
827
|
+
if element.dangerous_null_delimiter is not None:
|
|
828
|
+
parameters.append("NULL AS '%s'" % element.dangerous_null_delimiter)
|
|
829
|
+
|
|
830
|
+
if element.remove_quotes:
|
|
831
|
+
parameters.append('REMOVEQUOTES')
|
|
832
|
+
|
|
833
|
+
if element.roundec:
|
|
834
|
+
parameters.append('ROUNDEC')
|
|
835
|
+
|
|
836
|
+
if element.time_format is not None:
|
|
837
|
+
parameters.append('TIMEFORMAT AS :timeformat_string')
|
|
838
|
+
bindparams.append(sa.bindparam(
|
|
839
|
+
'timeformat_string',
|
|
840
|
+
value=element.time_format,
|
|
841
|
+
type_=sa.String,
|
|
842
|
+
))
|
|
843
|
+
|
|
844
|
+
if element.trim_blanks:
|
|
845
|
+
parameters.append('TRIMBLANKS')
|
|
846
|
+
|
|
847
|
+
if element.truncate_columns:
|
|
848
|
+
parameters.append('TRUNCATECOLUMNS')
|
|
849
|
+
|
|
850
|
+
if element.comp_rows:
|
|
851
|
+
parameters.append('COMPROWS :numrows')
|
|
852
|
+
bindparams.append(sa.bindparam(
|
|
853
|
+
'numrows',
|
|
854
|
+
value=element.comp_rows,
|
|
855
|
+
type_=sa.Integer,
|
|
856
|
+
))
|
|
857
|
+
|
|
858
|
+
if element.comp_update:
|
|
859
|
+
parameters.append('COMPUPDATE ON')
|
|
860
|
+
elif element.comp_update is not None:
|
|
861
|
+
parameters.append('COMPUPDATE OFF')
|
|
862
|
+
|
|
863
|
+
if element.max_error is not None:
|
|
864
|
+
parameters.append('MAXERROR AS :error_count')
|
|
865
|
+
bindparams.append(sa.bindparam(
|
|
866
|
+
'error_count',
|
|
867
|
+
value=element.max_error,
|
|
868
|
+
type_=sa.Integer,
|
|
869
|
+
))
|
|
870
|
+
|
|
871
|
+
if element.no_load:
|
|
872
|
+
parameters.append('NOLOAD')
|
|
873
|
+
|
|
874
|
+
if element.stat_update:
|
|
875
|
+
parameters.append('STATUPDATE ON')
|
|
876
|
+
elif element.stat_update is not None:
|
|
877
|
+
parameters.append('STATUPDATE OFF')
|
|
878
|
+
|
|
879
|
+
if element.region is not None:
|
|
880
|
+
parameters.append('REGION :region')
|
|
881
|
+
bindparams.append(sa.bindparam(
|
|
882
|
+
'region',
|
|
883
|
+
value=element.region,
|
|
884
|
+
type_=sa.String
|
|
885
|
+
))
|
|
886
|
+
|
|
887
|
+
columns = ' (%s)' % ', '.join(
|
|
888
|
+
compiler.preparer.format_column(column) for column in element.columns
|
|
889
|
+
) if element.columns else ''
|
|
890
|
+
|
|
891
|
+
qs = qs.format(
|
|
892
|
+
table=compiler.preparer.format_table(element.table),
|
|
893
|
+
columns=columns,
|
|
894
|
+
format=format_,
|
|
895
|
+
parameters='\n'.join(parameters)
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
return compiler.process(sa.text(qs).bindparams(*bindparams), **kw)
|
|
899
|
+
|
|
900
|
+
|
|
901
|
+
class CreateLibraryCommand(_ExecutableClause):
|
|
902
|
+
"""Prepares a Redshift CREATE LIBRARY statement.
|
|
903
|
+
https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_LIBRARY.html
|
|
904
|
+
|
|
905
|
+
Parameters
|
|
906
|
+
----------
|
|
907
|
+
library_name: str, required
|
|
908
|
+
The name of the library to install.
|
|
909
|
+
location: str, required
|
|
910
|
+
The location of the library file. Must be either a HTTP/HTTPS URL or an
|
|
911
|
+
S3 location.
|
|
912
|
+
access_key_id: str, optional
|
|
913
|
+
Access Key. Required unless you supply role-based credentials
|
|
914
|
+
(``aws_account_id`` and ``iam_role_name`` or ``iam_role_arns``)
|
|
915
|
+
secret_access_key: str, optional
|
|
916
|
+
Secret Access Key ID. Required unless you supply role-based credentials
|
|
917
|
+
(``aws_account_id`` and ``iam_role_name`` or ``iam_role_arns``)
|
|
918
|
+
session_token : str, optional
|
|
919
|
+
iam_role_arns : str or list of strings, optional
|
|
920
|
+
Either a single arn or a list of arns of roles to assume when unloading
|
|
921
|
+
Required unless you supply key based credentials (``access_key_id`` and
|
|
922
|
+
``secret_access_key``) or (``aws_account_id`` and ``iam_role_name``)
|
|
923
|
+
separately.
|
|
924
|
+
aws_partition: str, optional
|
|
925
|
+
AWS partition to use with role-based credentials. Defaults to
|
|
926
|
+
``'aws'``. Not applicable when using key based credentials
|
|
927
|
+
(``access_key_id`` and ``secret_access_key``) or role arns
|
|
928
|
+
(``iam_role_arns``) directly.
|
|
929
|
+
aws_account_id: str, optional
|
|
930
|
+
AWS account ID for role-based credentials. Required unless you supply
|
|
931
|
+
key based credentials (``access_key_id`` and ``secret_access_key``)
|
|
932
|
+
or role arns (``iam_role_arns``) directly.
|
|
933
|
+
iam_role_name: str, optional
|
|
934
|
+
IAM role name for role-based credentials. Required unless you supply
|
|
935
|
+
key based credentials (``access_key_id`` and ``secret_access_key``)
|
|
936
|
+
or role arns (``iam_role_arns``) directly.
|
|
937
|
+
replace: bool, optional, default False
|
|
938
|
+
Controls the presence of ``OR REPLACE`` in the compiled statement. See
|
|
939
|
+
the command documentation for details.
|
|
940
|
+
region: str, optional
|
|
941
|
+
The AWS region where the library's S3 bucket is located, if the
|
|
942
|
+
Redshift cluster isn't in the same region as the S3 bucket.
|
|
943
|
+
"""
|
|
944
|
+
def __init__(self, library_name, location, access_key_id=None,
|
|
945
|
+
secret_access_key=None, session_token=None,
|
|
946
|
+
aws_account_id=None, iam_role_name=None, replace=False,
|
|
947
|
+
region=None, iam_role_arns=None):
|
|
948
|
+
self.library_name = library_name
|
|
949
|
+
self.location = location
|
|
950
|
+
self.credentials = _process_aws_credentials(
|
|
951
|
+
access_key_id=access_key_id,
|
|
952
|
+
secret_access_key=secret_access_key,
|
|
953
|
+
session_token=session_token,
|
|
954
|
+
aws_account_id=aws_account_id,
|
|
955
|
+
iam_role_name=iam_role_name,
|
|
956
|
+
iam_role_arns=iam_role_arns,
|
|
957
|
+
)
|
|
958
|
+
self.replace = replace
|
|
959
|
+
self.region = region
|
|
960
|
+
|
|
961
|
+
|
|
962
|
+
@sa_compiler.compiles(CreateLibraryCommand)
|
|
963
|
+
def visit_create_library_command(element, compiler, **kw):
|
|
964
|
+
"""
|
|
965
|
+
Returns the actual sql query for the CreateLibraryCommand class.
|
|
966
|
+
"""
|
|
967
|
+
query = """
|
|
968
|
+
CREATE {or_replace} LIBRARY {name}
|
|
969
|
+
LANGUAGE pythonplu
|
|
970
|
+
FROM :location
|
|
971
|
+
WITH CREDENTIALS AS :credentials
|
|
972
|
+
{region}
|
|
973
|
+
"""
|
|
974
|
+
bindparams = [
|
|
975
|
+
sa.bindparam(
|
|
976
|
+
'location',
|
|
977
|
+
value=element.location,
|
|
978
|
+
type_=sa.String,
|
|
979
|
+
),
|
|
980
|
+
sa.bindparam(
|
|
981
|
+
'credentials',
|
|
982
|
+
value=element.credentials,
|
|
983
|
+
type_=sa.String,
|
|
984
|
+
),
|
|
985
|
+
]
|
|
986
|
+
|
|
987
|
+
if element.region is not None:
|
|
988
|
+
bindparams.append(sa.bindparam(
|
|
989
|
+
'region',
|
|
990
|
+
value=element.region,
|
|
991
|
+
type_=sa.String,
|
|
992
|
+
))
|
|
993
|
+
|
|
994
|
+
quoted_lib_name = compiler.preparer.quote_identifier(element.library_name)
|
|
995
|
+
query = query.format(name=quoted_lib_name,
|
|
996
|
+
or_replace='OR REPLACE' if element.replace else '',
|
|
997
|
+
region='REGION :region' if element.region else '')
|
|
998
|
+
return compiler.process(sa.text(query).bindparams(*bindparams), **kw)
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
class RefreshMaterializedView(_ExecutableClause):
|
|
1002
|
+
"""
|
|
1003
|
+
Prepares a Redshift REFRESH MATERIALIZED VIEW statement.
|
|
1004
|
+
SEE:
|
|
1005
|
+
docs.aws.amazon.com/redshift/latest/dg/materialized-view-refresh-sql-command
|
|
1006
|
+
|
|
1007
|
+
This reruns the query underlying the view to ensure the materialized data
|
|
1008
|
+
is up to date.
|
|
1009
|
+
|
|
1010
|
+
>>> import sqlalchemy as sa
|
|
1011
|
+
>>> from sqlalchemy_redshift.dialect import RefreshMaterializedView
|
|
1012
|
+
>>> engine = sa.create_engine('redshift+psycopg2://example')
|
|
1013
|
+
>>> refresh = RefreshMaterializedView('materialized_view_of_users')
|
|
1014
|
+
>>> print(refresh.compile(engine))
|
|
1015
|
+
<BLANKLINE>
|
|
1016
|
+
REFRESH MATERIALIZED VIEW materialized_view_of_users
|
|
1017
|
+
<BLANKLINE>
|
|
1018
|
+
<BLANKLINE>
|
|
1019
|
+
|
|
1020
|
+
This can be included in any execute() statement.
|
|
1021
|
+
"""
|
|
1022
|
+
def __init__(self, name):
|
|
1023
|
+
"""
|
|
1024
|
+
Builds the Executable/ClauseElement that represents the refresh command
|
|
1025
|
+
|
|
1026
|
+
Parameters
|
|
1027
|
+
----------
|
|
1028
|
+
name: str, required
|
|
1029
|
+
The name of the view to refresh
|
|
1030
|
+
"""
|
|
1031
|
+
self.name = name
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
@sa_compiler.compiles(RefreshMaterializedView)
|
|
1035
|
+
def compile_refresh_materialized_view(element, compiler, **kw):
|
|
1036
|
+
"""
|
|
1037
|
+
Formats and returns the refresh statement for materialized views.
|
|
1038
|
+
"""
|
|
1039
|
+
text = "REFRESH MATERIALIZED VIEW {name}"
|
|
1040
|
+
return text.format(name=element.name)
|
mt/sql/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: mtsql
|
|
3
|
-
Version: 1.11.
|
|
3
|
+
Version: 1.11.18
|
|
4
4
|
Summary: Extra Python modules to deal with the interaction between pandas dataframes and remote SQL servers, for Minh-Tri Pham
|
|
5
5
|
Home-page: https://github.com/inteplus/mtsql
|
|
6
6
|
Author: ['Minh-Tri Pham']
|
|
@@ -3,14 +3,15 @@ mt/sql/base.py,sha256=9aTVudlH9_HEq_v7uHV6p6asDuxMidQwAlCRSXDofvY,11967
|
|
|
3
3
|
mt/sql/mysql.py,sha256=n2ENDctdUqZuSaDAcrqZYtPtawq3Wx4dOPCRsCB5Q4w,4894
|
|
4
4
|
mt/sql/psql.py,sha256=AmXdDVRbvzK7hWK8kysrdiXkAUwSdWmh_OqlWdoNOig,66578
|
|
5
5
|
mt/sql/sqlite.py,sha256=T2ak_hhNi_zRfpg_gp8JhNHn7D2kl4i-Ey6-9ANMtz0,8678
|
|
6
|
-
mt/sql/version.py,sha256=
|
|
7
|
-
mt/sql/redshift/__init__.py,sha256=
|
|
6
|
+
mt/sql/version.py,sha256=gfPfLM5HeoiuODbiAI1qDaG4fqYR1qGw1WekUZ8ZpTo,208
|
|
7
|
+
mt/sql/redshift/__init__.py,sha256=RuP0MA6EuEO5UtVyGH3pou6oG0dRxCVZ7gCERWYjF5U,1116
|
|
8
|
+
mt/sql/redshift/commands.py,sha256=mzb7JGtCaVvwUQ_wzGDOIwpMTmrxTdRyXzMXHvpKYu8,39701
|
|
8
9
|
mt/sql/redshift/ddl.py,sha256=1B6TfbKbMPdwxNjUpoa5kIpfEI6Ikow5g6lyFPYjcV8,9972
|
|
9
10
|
mt/sql/redshift/dialect.py,sha256=bpXgVeckx79ogX_amN0_ZmWSVasrhX3U7pyHexVsktE,54868
|
|
10
11
|
mt/sql/redshift/main.py,sha256=6dwnwNJ1F0_V9o2oqrSOkyN_pAMrgE01CCoqAjoyOME,17116
|
|
11
12
|
mt/sql/redshift/redshift-ca-bundle.crt,sha256=532qYkOpQOstFE0mdXE1GVtL3v00XDKgZNTr6gK5-KE,8621
|
|
12
|
-
mtsql-1.11.
|
|
13
|
-
mtsql-1.11.
|
|
14
|
-
mtsql-1.11.
|
|
15
|
-
mtsql-1.11.
|
|
16
|
-
mtsql-1.11.
|
|
13
|
+
mtsql-1.11.18.dist-info/LICENSE,sha256=PojkRlQzTT5Eg6Nj03XoIVEefN3u8iiIFf1p4rqe_t4,1070
|
|
14
|
+
mtsql-1.11.18.dist-info/METADATA,sha256=8mAoLMUhkidBfQWCE1fJnBvW28gWBUPIKn4j9Yg6bzA,675
|
|
15
|
+
mtsql-1.11.18.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
16
|
+
mtsql-1.11.18.dist-info/top_level.txt,sha256=WcqGFu9cV7iMZg09iam8eNxUvGpLSKKF2Iubf6SJVOo,3
|
|
17
|
+
mtsql-1.11.18.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|