AWSGlueDataplanePython 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- awsglue/README.md +37 -0
- awsglue/__init__.py +15 -0
- awsglue/context.py +690 -0
- awsglue/data_sink.py +49 -0
- awsglue/data_source.py +49 -0
- awsglue/dataframe_transforms/__init__.py +17 -0
- awsglue/dataframe_transforms/apply_mapping.py +76 -0
- awsglue/dataframereader.py +41 -0
- awsglue/dataframewriter.py +21 -0
- awsglue/devutils.py +236 -0
- awsglue/dynamicframe.py +669 -0
- awsglue/functions.py +31 -0
- awsglue/glue_shell.py +38 -0
- awsglue/gluetypes.py +461 -0
- awsglue/job.py +59 -0
- awsglue/scripts/__init__.py +12 -0
- awsglue/scripts/activate_etl_connector.py +362 -0
- awsglue/scripts/connector_activation_util.py +38 -0
- awsglue/scripts/crawler_redo_from_backup.py +75 -0
- awsglue/scripts/crawler_undo.py +121 -0
- awsglue/scripts/scripts_utils.py +106 -0
- awsglue/streaming_data_source.py +28 -0
- awsglue/transforms/__init__.py +47 -0
- awsglue/transforms/apply_mapping.py +72 -0
- awsglue/transforms/coalesce.py +66 -0
- awsglue/transforms/collection_transforms.py +155 -0
- awsglue/transforms/drop_nulls.py +85 -0
- awsglue/transforms/dynamicframe_filter.py +66 -0
- awsglue/transforms/dynamicframe_map.py +72 -0
- awsglue/transforms/errors_as_dynamicframe.py +45 -0
- awsglue/transforms/field_transforms.py +469 -0
- awsglue/transforms/relationalize.py +105 -0
- awsglue/transforms/repartition.py +61 -0
- awsglue/transforms/resolve_choice.py +85 -0
- awsglue/transforms/transform.py +92 -0
- awsglue/transforms/unbox.py +112 -0
- awsglue/transforms/union.py +66 -0
- awsglue/transforms/unnest_frame.py +75 -0
- awsglue/utils.py +159 -0
- awsgluedataplanepython-5.0.0.dist-info/METADATA +178 -0
- awsgluedataplanepython-5.0.0.dist-info/RECORD +45 -0
- awsgluedataplanepython-5.0.0.dist-info/WHEEL +5 -0
- awsgluedataplanepython-5.0.0.dist-info/licenses/LICENSE.txt +96 -0
- awsgluedataplanepython-5.0.0.dist-info/licenses/NOTICE.txt +3 -0
- awsgluedataplanepython-5.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# Licensed under the Amazon Software License (the "License"). You may not use
|
|
3
|
+
# this file except in compliance with the License. A copy of the License is
|
|
4
|
+
# located at
|
|
5
|
+
#
|
|
6
|
+
# http://aws.amazon.com/asl/
|
|
7
|
+
#
|
|
8
|
+
# or in the "license" file accompanying this file. This file is distributed
|
|
9
|
+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
|
|
10
|
+
# or implied. See the License for the specific language governing
|
|
11
|
+
# permissions and limitations under the License.
|
|
12
|
+
|
|
13
|
+
from awsglue.transforms import GlueTransform
|
|
14
|
+
from awsglue.utils import _global_args
|
|
15
|
+
|
|
16
|
+
class Relationalize(GlueTransform):
|
|
17
|
+
"""
|
|
18
|
+
Relationalizes a dynamic frame. i.e. produces a list of frames that are
|
|
19
|
+
generated by unnesting nested columns and pivoting array columns. The
|
|
20
|
+
pivoted array column can be joined to the root table using the joinkey
|
|
21
|
+
generated in unnest phase
|
|
22
|
+
:param frame: DynamicFrame to relationalize
|
|
23
|
+
:param staging_path: path to store partitions of pivoted tables in csv format. Pivoted tables are read back from
|
|
24
|
+
this path
|
|
25
|
+
:param name: name for the root table
|
|
26
|
+
:param options: dict of optional parameters for relationalize
|
|
27
|
+
:param transformation_ctx: context key to retrieve metadata about the current transformation
|
|
28
|
+
:param info: String, any string to be associated with errors in this transformation.
|
|
29
|
+
:param stageThreshold: Long, number of errors in the given transformation for which the processing needs to error out.
|
|
30
|
+
:param totalThreshold: Long, total number of errors upto and including in this transformation
|
|
31
|
+
for which the processing needs to error out.
|
|
32
|
+
:return: DynamicFrameCollection
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
# TODO: Make staging_path a mandatory argument
|
|
36
|
+
def __call__(self, frame, staging_path=None, name='roottable', options=None, transformation_ctx = "", info="",
|
|
37
|
+
stageThreshold=0, totalThreshold=0):
|
|
38
|
+
options = options or {}
|
|
39
|
+
# TODO: Remove special handling of staging_path and make it mandatory after TempDir is made a mandatory argument
|
|
40
|
+
# We are directly accessing the args variable assuming that it is available in the global scope. This is to
|
|
41
|
+
# maintain backward compatibility with the relationalize call that did not have the mandatory staging_path arg
|
|
42
|
+
if staging_path is None:
|
|
43
|
+
if _global_args['TempDir'] is not None and _global_args['TempDir'] != "":
|
|
44
|
+
staging_path = _global_args['TempDir']
|
|
45
|
+
else:
|
|
46
|
+
raise RuntimeError("Unable to set staging_path using args "+str(_global_args))
|
|
47
|
+
return frame.relationalize(name, staging_path, options, transformation_ctx, info, stageThreshold, totalThreshold)
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def describeArgs(cls):
|
|
51
|
+
arg1 = {"name": "frame",
|
|
52
|
+
"type": "DynamicFrame",
|
|
53
|
+
"description": "The DynamicFrame to relationalize",
|
|
54
|
+
"optional": False,
|
|
55
|
+
"defaultValue": None}
|
|
56
|
+
arg2 = {"name": "staging_path",
|
|
57
|
+
"type": "String",
|
|
58
|
+
"description": "path to store partitions of pivoted tables in csv format",
|
|
59
|
+
"optional": True,
|
|
60
|
+
"defaultValue": None}
|
|
61
|
+
arg3 = {"name": "name",
|
|
62
|
+
"type": "String",
|
|
63
|
+
"description": "Name of the root table",
|
|
64
|
+
"optional": True,
|
|
65
|
+
"defaultValue": "roottable"}
|
|
66
|
+
arg4 = {"name": "options",
|
|
67
|
+
"type": "Dictionary",
|
|
68
|
+
"description": "dict of optional parameters for relationalize",
|
|
69
|
+
"optional": True,
|
|
70
|
+
"defaultValue": "{}"}
|
|
71
|
+
arg5 = {"name": "transformation_ctx",
|
|
72
|
+
"type": "String",
|
|
73
|
+
"description": "A unique string that is used to identify stats / state information",
|
|
74
|
+
"optional": True,
|
|
75
|
+
"defaultValue": ""}
|
|
76
|
+
arg6 = {"name": "info",
|
|
77
|
+
"type": "String",
|
|
78
|
+
"description": "Any string to be associated with errors in the transformation",
|
|
79
|
+
"optional": True,
|
|
80
|
+
"defaultValue": "\"\""}
|
|
81
|
+
arg7 = {"name": "stageThreshold",
|
|
82
|
+
"type": "Integer",
|
|
83
|
+
"description": "Max number of errors in the transformation until processing will error out",
|
|
84
|
+
"optional": True,
|
|
85
|
+
"defaultValue": "0"}
|
|
86
|
+
arg8 = {"name": "totalThreshold",
|
|
87
|
+
"type": "Integer",
|
|
88
|
+
"description": "Max number of errors total until processing will error out.",
|
|
89
|
+
"optional": True,
|
|
90
|
+
"defaultValue": "0"}
|
|
91
|
+
|
|
92
|
+
return [arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8]
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def describeTransform(cls):
|
|
96
|
+
return "Flatten nested schema and pivot out array columns from the flattened frame"
|
|
97
|
+
|
|
98
|
+
@classmethod
|
|
99
|
+
def describeErrors(cls):
|
|
100
|
+
return []
|
|
101
|
+
|
|
102
|
+
@classmethod
|
|
103
|
+
def describeReturn(cls):
|
|
104
|
+
return {"type": "DynamicFrameCollection",
|
|
105
|
+
"description": "DynamicFrameCollection resulting from Relationalize call"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# Licensed under the Amazon Software License (the "License"). You may not use
|
|
3
|
+
# this file except in compliance with the License. A copy of the License is
|
|
4
|
+
# located at
|
|
5
|
+
#
|
|
6
|
+
# http://aws.amazon.com/asl/
|
|
7
|
+
#
|
|
8
|
+
# or in the "license" file accompanying this file. This file is distributed
|
|
9
|
+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
|
|
10
|
+
# or implied. See the License for the specific language governing
|
|
11
|
+
# permissions and limitations under the License.
|
|
12
|
+
|
|
13
|
+
from awsglue.transforms import GlueTransform
|
|
14
|
+
|
|
15
|
+
class Repartition(GlueTransform):
|
|
16
|
+
def __call__(self, frame, num_partitions, transformation_ctx = "", info = "",
|
|
17
|
+
stageThreshold = 0, totalThreshold = 0):
|
|
18
|
+
return frame.repartition(num_partitions, transformation_ctx, info, stageThreshold, totalThreshold)
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def describeArgs(cls):
|
|
22
|
+
arg1 = {"name": "num_partitions",
|
|
23
|
+
"type": "DynamicFrame",
|
|
24
|
+
"description": "Number of partitions",
|
|
25
|
+
"optional": False,
|
|
26
|
+
"defaultValue": None}
|
|
27
|
+
arg2 = {"name": "transformation_ctx",
|
|
28
|
+
"type": "String",
|
|
29
|
+
"description": "A unique string that is used to identify stats / state information",
|
|
30
|
+
"optional": True,
|
|
31
|
+
"defaultValue": ""}
|
|
32
|
+
arg3 = {"name": "info",
|
|
33
|
+
"type": "String",
|
|
34
|
+
"description": "Any string to be associated with errors in the transformation",
|
|
35
|
+
"optional": True,
|
|
36
|
+
"defaultValue": "\"\""}
|
|
37
|
+
arg4 = {"name": "stageThreshold",
|
|
38
|
+
"type": "Integer",
|
|
39
|
+
"description": "Max number of errors in the transformation until processing will error out",
|
|
40
|
+
"optional": True,
|
|
41
|
+
"defaultValue": "0"}
|
|
42
|
+
arg5 = {"name": "totalThreshold",
|
|
43
|
+
"type": "Integer",
|
|
44
|
+
"description": "Max number of errors total until processing will error out.",
|
|
45
|
+
"optional": True,
|
|
46
|
+
"defaultValue": "0"}
|
|
47
|
+
|
|
48
|
+
return [arg1, arg2, arg3, arg4, arg5]
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def describeTransform(cls):
|
|
52
|
+
return "Repartitions a DynamicFrame."
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def describeErrors(cls):
|
|
56
|
+
return []
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def describeReturn(cls):
|
|
60
|
+
return {"type": "DynamicFrame",
|
|
61
|
+
"description": "The repartitioned DynamicFrame."}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# Licensed under the Amazon Software License (the "License"). You may not use
|
|
3
|
+
# this file except in compliance with the License. A copy of the License is
|
|
4
|
+
# located at
|
|
5
|
+
#
|
|
6
|
+
# http://aws.amazon.com/asl/
|
|
7
|
+
#
|
|
8
|
+
# or in the "license" file accompanying this file. This file is distributed
|
|
9
|
+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
|
|
10
|
+
# or implied. See the License for the specific language governing
|
|
11
|
+
# permissions and limitations under the License.
|
|
12
|
+
|
|
13
|
+
from awsglue.transforms import GlueTransform
|
|
14
|
+
|
|
15
|
+
class ResolveChoice(GlueTransform):
|
|
16
|
+
def __call__(self, frame, specs=None, choice="", database=None, table_name=None, transformation_ctx="", info="", stageThreshold=0, totalThreshold=0, catalog_id=None):
|
|
17
|
+
return frame.resolveChoice(specs, choice, database, table_name, transformation_ctx, info, stageThreshold, totalThreshold, catalog_id)
|
|
18
|
+
|
|
19
|
+
@classmethod
|
|
20
|
+
def describeArgs(cls):
|
|
21
|
+
arg1 = {"name": "frame",
|
|
22
|
+
"type": "DynamicFrame",
|
|
23
|
+
"description": "DynamicFrame to transform",
|
|
24
|
+
"optional": False,
|
|
25
|
+
"defaultValue": None}
|
|
26
|
+
arg2 = {"name": "specs",
|
|
27
|
+
"type": "List",
|
|
28
|
+
"description": "List of specs (path, action)",
|
|
29
|
+
"optional": True,
|
|
30
|
+
"defaultValue": None}
|
|
31
|
+
arg3 = {"name": "choice",
|
|
32
|
+
"type": "String",
|
|
33
|
+
"description": "resolve choice option",
|
|
34
|
+
"optional": True,
|
|
35
|
+
"defaultValue": ""}
|
|
36
|
+
arg4 = {"name": "database",
|
|
37
|
+
"type": "String",
|
|
38
|
+
"description": "Glue catalog database name, required for MATCH_CATALOG choice",
|
|
39
|
+
"optional": True,
|
|
40
|
+
"defaultValue": ""}
|
|
41
|
+
arg5 = {"name": "table_name",
|
|
42
|
+
"type": "String",
|
|
43
|
+
"description": "Glue catalog table name, required for MATCH_CATALOG choice",
|
|
44
|
+
"optional": True,
|
|
45
|
+
"defaultValue": ""}
|
|
46
|
+
arg6 = {"name": "transformation_ctx",
|
|
47
|
+
"type": "String",
|
|
48
|
+
"description": "A unique string that is used to identify stats / state information",
|
|
49
|
+
"optional": True,
|
|
50
|
+
"defaultValue": ""}
|
|
51
|
+
arg7 = {"name": "info",
|
|
52
|
+
"type": "String",
|
|
53
|
+
"description": "Any string to be associated with errors in the transformation",
|
|
54
|
+
"optional": True,
|
|
55
|
+
"defaultValue": "\"\""}
|
|
56
|
+
arg8 = {"name": "stageThreshold",
|
|
57
|
+
"type": "Integer",
|
|
58
|
+
"description": "Max number of errors in the transformation until processing will error out",
|
|
59
|
+
"optional": True,
|
|
60
|
+
"defaultValue": "0"}
|
|
61
|
+
arg9 = {"name": "totalThreshold",
|
|
62
|
+
"type": "Integer",
|
|
63
|
+
"description": "Max number of errors total until processing will error out.",
|
|
64
|
+
"optional": True,
|
|
65
|
+
"defaultValue": "0"}
|
|
66
|
+
arg10 = {"name": "catalog_id",
|
|
67
|
+
"type": "String",
|
|
68
|
+
"description": "Catalog id for match_catalog id.",
|
|
69
|
+
"optional": True,
|
|
70
|
+
"defaultValue": "accountId"}
|
|
71
|
+
|
|
72
|
+
return [arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10]
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def describeTransform(cls):
|
|
76
|
+
return "Resolve choice type in this DynamicFrame."
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def describeErrors(cls):
|
|
80
|
+
return []
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def describeReturn(cls):
|
|
84
|
+
return {"type": "DynamicFrame",
|
|
85
|
+
"description": "DynamicFrame after resolving choice type."}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# Licensed under the Amazon Software License (the "License"). You may not use
|
|
3
|
+
# this file except in compliance with the License. A copy of the License is
|
|
4
|
+
# located at
|
|
5
|
+
#
|
|
6
|
+
# http://aws.amazon.com/asl/
|
|
7
|
+
#
|
|
8
|
+
# or in the "license" file accompanying this file. This file is distributed
|
|
9
|
+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
|
|
10
|
+
# or implied. See the License for the specific language governing
|
|
11
|
+
# permissions and limitations under the License.
|
|
12
|
+
|
|
13
|
+
class GlueTransform(object):
|
|
14
|
+
"""Base class for all Glue Transforms.
|
|
15
|
+
|
|
16
|
+
All Glue transformations should inherit from GlueTransform and define a
|
|
17
|
+
__call__ method. They can optionally override the name classmethod or use
|
|
18
|
+
the default of the class name.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def apply(cls, *args, **kwargs):
|
|
23
|
+
transform = cls()
|
|
24
|
+
return transform(*args, **kwargs)
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def name(cls):
|
|
28
|
+
return cls.__name__
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def describeArgs(cls):
|
|
32
|
+
'''
|
|
33
|
+
Returns: a list of dictionaries, with each corresponding to
|
|
34
|
+
an argument, in the following format:
|
|
35
|
+
[{"name": "<name of argument>",
|
|
36
|
+
"type": "<type of argument>",
|
|
37
|
+
"description": "<description of argument>",
|
|
38
|
+
"optional": "<Boolean>",
|
|
39
|
+
"defaultValue": "<String default value or None>"}, ...]
|
|
40
|
+
Raises: NotImplementedError if not implemented by Transform
|
|
41
|
+
'''
|
|
42
|
+
raise NotImplementedError("describeArgs method not implemented for Transform {}".format(cls.__name__))
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def describeReturn(cls):
|
|
46
|
+
'''
|
|
47
|
+
Returns: A dictionary with information about the return type,
|
|
48
|
+
in the following format:
|
|
49
|
+
{"type": "<return type>",
|
|
50
|
+
"description": "<description of output>"}
|
|
51
|
+
Raises: NotImplementedError if not implemented by Transform
|
|
52
|
+
'''
|
|
53
|
+
raise NotImplementedError("describeReturn method not implemented for Transform {}".format(cls.__name__))
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def describeTransform(cls):
|
|
57
|
+
'''
|
|
58
|
+
Returns: A string describing the transform, e.g.
|
|
59
|
+
"Base class for all Glue Transforms"
|
|
60
|
+
Raises: NotImplementedError if not implemented by Transform
|
|
61
|
+
'''
|
|
62
|
+
|
|
63
|
+
raise NotImplementedError("describeTransform method not implemented for Transform {}".format(cls.__name__))
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def describeErrors(cls):
|
|
67
|
+
'''
|
|
68
|
+
Returns: A list of dictionaries, each describing possible errors thrown by
|
|
69
|
+
this transform, in the following format:
|
|
70
|
+
[{"type": "<type of error>",
|
|
71
|
+
"description": "<description of error>"}]
|
|
72
|
+
Raises: NotImplementedError if not implemented by Transform
|
|
73
|
+
'''
|
|
74
|
+
raise NotImplementedError("describeErrors method not implemented for Transform {}".format(cls.__name__))
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def describe(cls):
|
|
78
|
+
return {"transform": {"name": cls.name(),
|
|
79
|
+
"args": cls.describeArgs(),
|
|
80
|
+
"returns": cls.describeReturn(),
|
|
81
|
+
"description": cls.describeTransform(),
|
|
82
|
+
"raises": cls.describeErrors(),
|
|
83
|
+
"location": "internal"}}
|
|
84
|
+
|
|
85
|
+
def __eq__(self, other):
|
|
86
|
+
return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
|
|
87
|
+
|
|
88
|
+
def __hash__(self):
|
|
89
|
+
return hash(tuple(sorted(self.__dict__.items())))
|
|
90
|
+
|
|
91
|
+
def __repr__(self):
|
|
92
|
+
return "<Transform: {}>".format(self.name())
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# Licensed under the Amazon Software License (the "License"). You may not use
|
|
3
|
+
# this file except in compliance with the License. A copy of the License is
|
|
4
|
+
# located at
|
|
5
|
+
#
|
|
6
|
+
# http://aws.amazon.com/asl/
|
|
7
|
+
#
|
|
8
|
+
# or in the "license" file accompanying this file. This file is distributed
|
|
9
|
+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
|
|
10
|
+
# or implied. See the License for the specific language governing
|
|
11
|
+
# permissions and limitations under the License.
|
|
12
|
+
|
|
13
|
+
from awsglue.transforms import GlueTransform
|
|
14
|
+
|
|
15
|
+
class Unbox(GlueTransform):
|
|
16
|
+
|
|
17
|
+
def __call__(self, frame, path, format, transformation_ctx = "", info="", stageThreshold=0, totalThreshold=0, **options):
|
|
18
|
+
"""
|
|
19
|
+
unbox a string field
|
|
20
|
+
|
|
21
|
+
:param frame: dynamicFrame on which to call unbox
|
|
22
|
+
:param path: full path to the StringNode you want to unbox
|
|
23
|
+
:param format: "avro" or "json"
|
|
24
|
+
:param info: String, any string to be associated with errors in this transformation.
|
|
25
|
+
:param stageThreshold: Long, number of errors in the given transformation for which the processing needs to error out.
|
|
26
|
+
:param totalThreshold: Long, total number of errors upto and including in this transformation
|
|
27
|
+
for which the processing needs to error out.
|
|
28
|
+
:param options:
|
|
29
|
+
separator: String,
|
|
30
|
+
escaper: String,
|
|
31
|
+
skipFirst: Boolean,
|
|
32
|
+
withSchema: String, schema string should always be called by using StructType.json()
|
|
33
|
+
withHeader: Boolean
|
|
34
|
+
"""
|
|
35
|
+
return frame.unbox(path, format, transformation_ctx, info, stageThreshold, totalThreshold, **options)
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def describeArgs(cls):
|
|
39
|
+
arg1 = {"name": "frame",
|
|
40
|
+
"type": "DynamicFrame",
|
|
41
|
+
"description": "The DynamicFrame on which to call Unbox",
|
|
42
|
+
"optional": False,
|
|
43
|
+
"defaultValue": None}
|
|
44
|
+
arg2 = {"name": "path",
|
|
45
|
+
"type": "String",
|
|
46
|
+
"description": "full path to the StringNode to unbox",
|
|
47
|
+
"optional": False,
|
|
48
|
+
"defaultValue": None}
|
|
49
|
+
arg3 = {"name": "format",
|
|
50
|
+
"type": "String",
|
|
51
|
+
"description": "file format -- \"avro\" or \"json\" only",
|
|
52
|
+
"optional": False,
|
|
53
|
+
"defaultValue": None}
|
|
54
|
+
arg4 = {"name": "transformation_ctx",
|
|
55
|
+
"type": "String",
|
|
56
|
+
"description": "A unique string that is used to identify stats / state information",
|
|
57
|
+
"optional": True,
|
|
58
|
+
"defaultValue": ""}
|
|
59
|
+
arg5 = {"name": "info",
|
|
60
|
+
"type": "String",
|
|
61
|
+
"description": "Any string to be associated with errors in the transformation",
|
|
62
|
+
"optional": True,
|
|
63
|
+
"defaultValue": "\"\""}
|
|
64
|
+
arg6 = {"name": "stageThreshold",
|
|
65
|
+
"type": "Integer",
|
|
66
|
+
"description": "Max number of errors in the transformation until processing will error out",
|
|
67
|
+
"optional": True,
|
|
68
|
+
"defaultValue": "0"}
|
|
69
|
+
arg7 = {"name": "totalThreshold",
|
|
70
|
+
"type": "Integer",
|
|
71
|
+
"description": "Max number of errors total until processing will error out.",
|
|
72
|
+
"optional": True,
|
|
73
|
+
"defaultValue": "0"}
|
|
74
|
+
arg8 = {"name": "separator",
|
|
75
|
+
"type": "String",
|
|
76
|
+
"description": "separator token",
|
|
77
|
+
"optional": True,
|
|
78
|
+
"defaultValue": "None, but individual readers may have their own defaults"}
|
|
79
|
+
arg9 = {"name": "escaper",
|
|
80
|
+
"type": "String",
|
|
81
|
+
"description": "escape token",
|
|
82
|
+
"optional": True,
|
|
83
|
+
"defaultValue": "None, but individual readers may have their own defaults"}
|
|
84
|
+
arg10 = {"name": "skipFirst",
|
|
85
|
+
"type": "Boolean",
|
|
86
|
+
"description": "whether to skip the first line of data",
|
|
87
|
+
"optional": True,
|
|
88
|
+
"defaultValue": "None, but individual readers may have their own defaults"}
|
|
89
|
+
arg11 = {"name": "withSchema",
|
|
90
|
+
"type": "String",
|
|
91
|
+
"description":"schema for data to unbox, should always be created by using StructType.json()",
|
|
92
|
+
"optional": True,
|
|
93
|
+
"defaultValue": "None, but individual readers may have their own defaults"}
|
|
94
|
+
arg12 = {"name": "withHeader",
|
|
95
|
+
"type": "Boolean",
|
|
96
|
+
"description": "whether data being unpacked includes a header",
|
|
97
|
+
"optional": True,
|
|
98
|
+
"defaultValue": "None, but individual readers may have their own defaults"}
|
|
99
|
+
return [arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12]
|
|
100
|
+
|
|
101
|
+
@classmethod
|
|
102
|
+
def describeTransform(cls):
|
|
103
|
+
return "unbox a string field"
|
|
104
|
+
|
|
105
|
+
@classmethod
|
|
106
|
+
def describeErrors(cls):
|
|
107
|
+
return []
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def describeReturn(cls):
|
|
111
|
+
return {"type": "DynamicFrame",
|
|
112
|
+
"description": "new DynamicFrame with unboxed DynamicRecords"}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# Licensed under the Amazon Software License (the "License"). You may not use
|
|
3
|
+
# this file except in compliance with the License. A copy of the License is
|
|
4
|
+
# located at
|
|
5
|
+
#
|
|
6
|
+
# http://aws.amazon.com/asl/
|
|
7
|
+
#
|
|
8
|
+
# or in the "license" file accompanying this file. This file is distributed
|
|
9
|
+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
|
|
10
|
+
# or implied. See the License for the specific language governing
|
|
11
|
+
# permissions and limitations under the License.
|
|
12
|
+
|
|
13
|
+
from awsglue.transforms import GlueTransform
|
|
14
|
+
|
|
15
|
+
class Union(GlueTransform):
|
|
16
|
+
def __call__(self, frame1, frame2, transformation_ctx = "",
|
|
17
|
+
info = "", stageThreshold = 0, totalThreshold = 0):
|
|
18
|
+
return frame1.union(mappings, case_sensitive)
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def describeArgs(cls):
|
|
22
|
+
arg1 = {"name": "frame1",
|
|
23
|
+
"type": "DynamicFrame",
|
|
24
|
+
"description": "First DynamicFrame to union.",
|
|
25
|
+
"optional": False,
|
|
26
|
+
"defaultValue": None}
|
|
27
|
+
arg2 = {"name": "frame2",
|
|
28
|
+
"type": "DynamicFrame",
|
|
29
|
+
"description": "Second DynamicFrame to union.",
|
|
30
|
+
"optional": False,
|
|
31
|
+
"defaultValue": None}
|
|
32
|
+
arg3 = {"name": "transformation_ctx",
|
|
33
|
+
"type": "String",
|
|
34
|
+
"description": "A unique string that is used to identify stats / state information",
|
|
35
|
+
"optional": True,
|
|
36
|
+
"defaultValue": ""}
|
|
37
|
+
arg4 = {"name": "info",
|
|
38
|
+
"type": "String",
|
|
39
|
+
"description": "Any string to be associated with errors in the transformation",
|
|
40
|
+
"optional": True,
|
|
41
|
+
"defaultValue": "\"\""}
|
|
42
|
+
arg5 = {"name": "stageThreshold",
|
|
43
|
+
"type": "Integer",
|
|
44
|
+
"description": "Max number of errors in the transformation until processing will error out",
|
|
45
|
+
"optional": True,
|
|
46
|
+
"defaultValue": "0"}
|
|
47
|
+
arg6 = {"name": "totalThreshold",
|
|
48
|
+
"type": "Integer",
|
|
49
|
+
"description": "Max number of errors total until processing will error out.",
|
|
50
|
+
"optional": True,
|
|
51
|
+
"defaultValue": "0"}
|
|
52
|
+
|
|
53
|
+
return [arg1, arg2, arg3, arg4, arg5, arg6]
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def describeTransform(cls):
|
|
57
|
+
return "Union two DynamicFrames."
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def describeErrors(cls):
|
|
61
|
+
return []
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def describeReturn(cls):
|
|
65
|
+
return {"type": "DynamicFrame",
|
|
66
|
+
"description": "DynamicFrame containing all records from both input DynamicFrames."}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# Licensed under the Amazon Software License (the "License"). You may not use
|
|
3
|
+
# this file except in compliance with the License. A copy of the License is
|
|
4
|
+
# located at
|
|
5
|
+
#
|
|
6
|
+
# http://aws.amazon.com/asl/
|
|
7
|
+
#
|
|
8
|
+
# or in the "license" file accompanying this file. This file is distributed
|
|
9
|
+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
|
|
10
|
+
# or implied. See the License for the specific language governing
|
|
11
|
+
# permissions and limitations under the License.
|
|
12
|
+
|
|
13
|
+
from awsglue.transforms import GlueTransform
|
|
14
|
+
|
|
15
|
+
class UnnestFrame(GlueTransform):
|
|
16
|
+
"""
|
|
17
|
+
unnest a dynamic frame. i.e. flattens nested objects to top level elements.
|
|
18
|
+
It also generates joinkeys for array objects
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __call__(self, frame, transformation_ctx = "", info="", stageThreshold=0, totalThreshold=0):
|
|
22
|
+
"""
|
|
23
|
+
unnest a dynamic frame. i.e. flattens nested objects to top level elements.
|
|
24
|
+
It also generates joinkeys for array objects
|
|
25
|
+
:param frame: DynamicFrame, the dynamicframe to unnest
|
|
26
|
+
:param info: String, any string to be associated with errors in this transformation.
|
|
27
|
+
:param stageThreshold: Long, number of errors in the given transformation for which the processing needs to error out.
|
|
28
|
+
:param totalThreshold: Long, total number of errors upto and including in this transformation
|
|
29
|
+
for which the processing needs to error out.
|
|
30
|
+
:return: a new unnested dynamic frame
|
|
31
|
+
"""
|
|
32
|
+
return frame.unnest(transformation_ctx, info, stageThreshold, totalThreshold)
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def describeArgs(cls):
|
|
36
|
+
arg1 = {"name": "frame",
|
|
37
|
+
"type": "DynamicFrame",
|
|
38
|
+
"description": "The DynamicFrame to unnest",
|
|
39
|
+
"optional": False,
|
|
40
|
+
"defaultValue": None}
|
|
41
|
+
arg2 = {"name": "transformation_ctx",
|
|
42
|
+
"type": "String",
|
|
43
|
+
"description": "A unique string that is used to identify stats / state information",
|
|
44
|
+
"optional": True,
|
|
45
|
+
"defaultValue": ""}
|
|
46
|
+
arg3 = {"name": "info",
|
|
47
|
+
"type": "String",
|
|
48
|
+
"description": "Any string to be associated with errors in the transformation",
|
|
49
|
+
"optional": True,
|
|
50
|
+
"defaultValue": "\"\""}
|
|
51
|
+
arg4 = {"name": "stageThreshold",
|
|
52
|
+
"type": "Integer",
|
|
53
|
+
"description": "Max number of errors in the transformation until processing will error out",
|
|
54
|
+
"optional": True,
|
|
55
|
+
"defaultValue": "0"}
|
|
56
|
+
arg5 = {"name": "totalThreshold",
|
|
57
|
+
"type": "Integer",
|
|
58
|
+
"description": "Max number of errors total until processing will error out.",
|
|
59
|
+
"optional": True,
|
|
60
|
+
"defaultValue": "0"}
|
|
61
|
+
|
|
62
|
+
return [arg1, arg2, arg3, arg4, arg5]
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def describeTransform(cls):
|
|
66
|
+
return "unnest a dynamic frame. i.e. flatten nested objects to top level elements."
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def describeErrors(cls):
|
|
70
|
+
return []
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def describeReturn(cls):
|
|
74
|
+
return {"type": "DynamicFrame",
|
|
75
|
+
"description": "new unnested DynamicFrame"}
|