AWSGlueDataplanePython 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- awsglue/README.md +37 -0
- awsglue/__init__.py +15 -0
- awsglue/context.py +690 -0
- awsglue/data_sink.py +49 -0
- awsglue/data_source.py +49 -0
- awsglue/dataframe_transforms/__init__.py +17 -0
- awsglue/dataframe_transforms/apply_mapping.py +76 -0
- awsglue/dataframereader.py +41 -0
- awsglue/dataframewriter.py +21 -0
- awsglue/devutils.py +236 -0
- awsglue/dynamicframe.py +669 -0
- awsglue/functions.py +31 -0
- awsglue/glue_shell.py +38 -0
- awsglue/gluetypes.py +461 -0
- awsglue/job.py +59 -0
- awsglue/scripts/__init__.py +12 -0
- awsglue/scripts/activate_etl_connector.py +362 -0
- awsglue/scripts/connector_activation_util.py +38 -0
- awsglue/scripts/crawler_redo_from_backup.py +75 -0
- awsglue/scripts/crawler_undo.py +121 -0
- awsglue/scripts/scripts_utils.py +106 -0
- awsglue/streaming_data_source.py +28 -0
- awsglue/transforms/__init__.py +47 -0
- awsglue/transforms/apply_mapping.py +72 -0
- awsglue/transforms/coalesce.py +66 -0
- awsglue/transforms/collection_transforms.py +155 -0
- awsglue/transforms/drop_nulls.py +85 -0
- awsglue/transforms/dynamicframe_filter.py +66 -0
- awsglue/transforms/dynamicframe_map.py +72 -0
- awsglue/transforms/errors_as_dynamicframe.py +45 -0
- awsglue/transforms/field_transforms.py +469 -0
- awsglue/transforms/relationalize.py +105 -0
- awsglue/transforms/repartition.py +61 -0
- awsglue/transforms/resolve_choice.py +85 -0
- awsglue/transforms/transform.py +92 -0
- awsglue/transforms/unbox.py +112 -0
- awsglue/transforms/union.py +66 -0
- awsglue/transforms/unnest_frame.py +75 -0
- awsglue/utils.py +159 -0
- awsgluedataplanepython-5.0.0.dist-info/METADATA +178 -0
- awsgluedataplanepython-5.0.0.dist-info/RECORD +45 -0
- awsgluedataplanepython-5.0.0.dist-info/WHEEL +5 -0
- awsgluedataplanepython-5.0.0.dist-info/licenses/LICENSE.txt +96 -0
- awsgluedataplanepython-5.0.0.dist-info/licenses/NOTICE.txt +3 -0
- awsgluedataplanepython-5.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# Licensed under the Amazon Software License (the "License"). You may not use
|
|
3
|
+
# this file except in compliance with the License. A copy of the License is
|
|
4
|
+
# located at
|
|
5
|
+
#
|
|
6
|
+
# http://aws.amazon.com/asl/
|
|
7
|
+
#
|
|
8
|
+
# or in the "license" file accompanying this file. This file is distributed
|
|
9
|
+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
|
|
10
|
+
# or implied. See the License for the specific language governing
|
|
11
|
+
# permissions and limitations under the License.
|
|
12
|
+
|
|
13
|
+
from awsglue.transforms import GlueTransform
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Map(GlueTransform):
|
|
17
|
+
def __call__(self, frame, f, preservesPartitioning = False,transformation_ctx = "", info="", stageThreshold=0, totalThreshold=0):
|
|
18
|
+
return frame.map(f, preservesPartitioning, transformation_ctx, info, stageThreshold, totalThreshold)
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def describeArgs(cls):
|
|
22
|
+
arg1 = {"name": "frame",
|
|
23
|
+
"type": "DynamicFrame",
|
|
24
|
+
"description": "The DynamicFrame to apply the Map function",
|
|
25
|
+
"optional": False,
|
|
26
|
+
"defaultValue": None}
|
|
27
|
+
arg2 = {"name": "f",
|
|
28
|
+
"type": "Function",
|
|
29
|
+
"description": "Function to apply on records in the DynamicFrame. The function takes a DynamicRecord as an argument and returns a DynamicRecord",
|
|
30
|
+
"optional": False,
|
|
31
|
+
"defaultValue": None}
|
|
32
|
+
arg3 = {"name": "preservesPartitioning",
|
|
33
|
+
"type": "Boolean",
|
|
34
|
+
"description": "Whether to preserve the partitioning in the DynamicFrame.",
|
|
35
|
+
"optional": True,
|
|
36
|
+
"defaultValue": False}
|
|
37
|
+
arg4 = {"name": "transformation_ctx",
|
|
38
|
+
"type": "String",
|
|
39
|
+
"description": "A unique string that is used to identify stats / state information",
|
|
40
|
+
"optional": True,
|
|
41
|
+
"defaultValue": ""}
|
|
42
|
+
arg5 = {"name": "info",
|
|
43
|
+
"type": "String",
|
|
44
|
+
"description": "Any string to be associated with errors in the transformation",
|
|
45
|
+
"optional": True,
|
|
46
|
+
"defaultValue": "\"\""}
|
|
47
|
+
arg6 = {"name": "stageThreshold",
|
|
48
|
+
"type": "Integer",
|
|
49
|
+
"description": "Max number of errors in the transformation until processing will error out",
|
|
50
|
+
"optional": True,
|
|
51
|
+
"defaultValue": "0"}
|
|
52
|
+
arg7 = {"name": "totalThreshold",
|
|
53
|
+
"type": "Integer",
|
|
54
|
+
"description": "Max number of errors total until processing will error out.",
|
|
55
|
+
"optional": True,
|
|
56
|
+
"defaultValue": "0"}
|
|
57
|
+
|
|
58
|
+
return [arg1, arg2, arg3, arg4, arg5, arg6, arg7]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def describeTransform(cls):
|
|
63
|
+
return "Builds a new DynamicFrame by applying a function to all records in the input DynamicFrame"
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def describeErrors(cls):
|
|
67
|
+
return []
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def describeReturn(cls):
|
|
71
|
+
return {"type": "DynamicFrame",
|
|
72
|
+
"description": "New DynamicFrame with DynamicRecords as a result of a function"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# Licensed under the Amazon Software License (the "License"). You may not use
|
|
3
|
+
# this file except in compliance with the License. A copy of the License is
|
|
4
|
+
# located at
|
|
5
|
+
#
|
|
6
|
+
# http://aws.amazon.com/asl/
|
|
7
|
+
#
|
|
8
|
+
# or in the "license" file accompanying this file. This file is distributed
|
|
9
|
+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
|
|
10
|
+
# or implied. See the License for the specific language governing
|
|
11
|
+
# permissions and limitations under the License.
|
|
12
|
+
|
|
13
|
+
from awsglue.transforms import GlueTransform
|
|
14
|
+
|
|
15
|
+
class ErrorsAsDynamicFrame(GlueTransform):
|
|
16
|
+
|
|
17
|
+
def __call__(self, frame):
|
|
18
|
+
"""
|
|
19
|
+
Returns a DynamicFrame which has error records leading up to the source DynmaicFrame, nested in the returned DynamicFrame.
|
|
20
|
+
|
|
21
|
+
:param frame: Source dynamicFrame
|
|
22
|
+
"""
|
|
23
|
+
return frame.errorsAsDynamicFrame()
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def describeArgs(cls):
|
|
27
|
+
arg1 = {"name": "frame",
|
|
28
|
+
"type": "DynamicFrame",
|
|
29
|
+
"description": "The DynamicFrame on which to call errorsAsDynamicFrame",
|
|
30
|
+
"optional": False,
|
|
31
|
+
"defaultValue": None}
|
|
32
|
+
return [arg1]
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def describeTransform(cls):
|
|
36
|
+
return "Get error records leading up to the source DynmaicFrame"
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def describeErrors(cls):
|
|
40
|
+
return []
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def describeReturn(cls):
|
|
44
|
+
return {"type": "DynamicFrame",
|
|
45
|
+
"description": "new DynamicFrame with error DynamicRecords"}
|
|
@@ -0,0 +1,469 @@
|
|
|
1
|
+
# Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# Licensed under the Amazon Software License (the "License"). You may not use
|
|
3
|
+
# this file except in compliance with the License. A copy of the License is
|
|
4
|
+
# located at
|
|
5
|
+
#
|
|
6
|
+
# http://aws.amazon.com/asl/
|
|
7
|
+
#
|
|
8
|
+
# or in the "license" file accompanying this file. This file is distributed
|
|
9
|
+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
|
|
10
|
+
# or implied. See the License for the specific language governing
|
|
11
|
+
# permissions and limitations under the License.
|
|
12
|
+
|
|
13
|
+
from awsglue.transforms import GlueTransform
|
|
14
|
+
|
|
15
|
+
class RenameField(GlueTransform):
|
|
16
|
+
"""
|
|
17
|
+
Rename a node within a DynamicFrame
|
|
18
|
+
|
|
19
|
+
:param frame: DynamicFrame
|
|
20
|
+
:param oldName: String, full path to the node you want to rename
|
|
21
|
+
:param newName: String, new name including full path
|
|
22
|
+
:param info: String, any string to be associated with errors in this transformation.
|
|
23
|
+
:param stageThreshold: Long, number of errors in the given transformation for which the processing needs to error out.
|
|
24
|
+
:param totalThreshold: Long, total number of errors upto and including in this transformation
|
|
25
|
+
for which the processing needs to error out.
|
|
26
|
+
:return: DynamicFrame
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __call__(self, frame, old_name, new_name, transformation_ctx = "", info = "", stageThreshold = 0, totalThreshold = 0):
|
|
30
|
+
return frame.rename_field(old_name, new_name, transformation_ctx, info, stageThreshold, totalThreshold)
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def describeArgs(cls):
|
|
34
|
+
arg1 = {"name": "frame",
|
|
35
|
+
"type": "DynamicFrame",
|
|
36
|
+
"description": "The DynamicFrame on which to rename a field",
|
|
37
|
+
"optional": False,
|
|
38
|
+
"defaultValue": None}
|
|
39
|
+
arg2 = {"name": "old_name",
|
|
40
|
+
"type": "String",
|
|
41
|
+
"description": "Full path to the node to rename",
|
|
42
|
+
"optional": False,
|
|
43
|
+
"defaultValue": None}
|
|
44
|
+
arg3 = {"name": "new_name",
|
|
45
|
+
"type": "String",
|
|
46
|
+
"description": "New name, including full path",
|
|
47
|
+
"optional": False,
|
|
48
|
+
"defaultValue": None}
|
|
49
|
+
arg4 = {"name": "transformation_ctx",
|
|
50
|
+
"type": "String",
|
|
51
|
+
"description": "A unique string that is used to identify stats / state information",
|
|
52
|
+
"optional": True,
|
|
53
|
+
"defaultValue": ""}
|
|
54
|
+
arg5 = {"name": "info",
|
|
55
|
+
"type": "String",
|
|
56
|
+
"description": "Any string to be associated with errors in the transformation",
|
|
57
|
+
"optional": True,
|
|
58
|
+
"defaultValue": "\"\""}
|
|
59
|
+
arg6 = {"name": "stageThreshold",
|
|
60
|
+
"type": "Integer",
|
|
61
|
+
"description": "Max number of errors in the transformation until processing will error out",
|
|
62
|
+
"optional": True,
|
|
63
|
+
"defaultValue": "0"}
|
|
64
|
+
arg7 = {"name": "totalThreshold",
|
|
65
|
+
"type": "Integer",
|
|
66
|
+
"description": "Max number of errors total until processing will error out.",
|
|
67
|
+
"optional": True,
|
|
68
|
+
"defaultValue": "0"}
|
|
69
|
+
|
|
70
|
+
return [arg1, arg2, arg3, arg4, arg5, arg6, arg7]
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def describeTransform(cls):
|
|
74
|
+
return "Rename a node within a DynamicFrame"
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def describeErrors(cls):
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
@classmethod
|
|
81
|
+
def describeReturn(cls):
|
|
82
|
+
return {"type": "DynamicFrame",
|
|
83
|
+
"description": "new DynamicFrame with indicated field renamed"}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class DropFields(GlueTransform):
|
|
87
|
+
"""
|
|
88
|
+
Drop fields within a DynamicFrame
|
|
89
|
+
|
|
90
|
+
:param frame: DynamicFrame
|
|
91
|
+
:param paths: List of Strings, each the full path to a node you want to drop
|
|
92
|
+
:param info: String, any string to be associated with errors in this transformation.
|
|
93
|
+
:param stageThreshold: Long, number of errors in the given transformation for which the processing needs to error out.
|
|
94
|
+
:param totalThreshold: Long, total number of errors upto and including in this transformation
|
|
95
|
+
for which the processing needs to error out.
|
|
96
|
+
:return: DynamicFrame
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
def __call__(self, frame, paths, transformation_ctx = "", info = "", stageThreshold = 0, totalThreshold = 0):
|
|
100
|
+
return frame.drop_fields(paths, transformation_ctx, info, stageThreshold, totalThreshold)
|
|
101
|
+
|
|
102
|
+
@classmethod
|
|
103
|
+
def describeArgs(cls):
|
|
104
|
+
arg1 = {"name": "frame",
|
|
105
|
+
"type": "DynamicFrame",
|
|
106
|
+
"description": "The DynamicFrame from which to drop fields",
|
|
107
|
+
"optional": False,
|
|
108
|
+
"defaultValue": None}
|
|
109
|
+
arg2 = {"name": "paths",
|
|
110
|
+
"type": "List[String]",
|
|
111
|
+
"description": "full paths corresponding to nodes to drop",
|
|
112
|
+
"optional": False,
|
|
113
|
+
"defaultValue": None}
|
|
114
|
+
arg3 = {"name": "transformation_ctx",
|
|
115
|
+
"type": "String",
|
|
116
|
+
"description": "A unique string that is used to identify stats / state information",
|
|
117
|
+
"optional": True,
|
|
118
|
+
"defaultValue": ""}
|
|
119
|
+
arg4 = {"name": "info",
|
|
120
|
+
"type": "String",
|
|
121
|
+
"description": "Any string to be associated with errors in the transformation",
|
|
122
|
+
"optional": True,
|
|
123
|
+
"defaultValue": "\"\""}
|
|
124
|
+
arg5 = {"name": "stageThreshold",
|
|
125
|
+
"type": "Integer",
|
|
126
|
+
"description": "Max number of errors in the transformation until processing will error out",
|
|
127
|
+
"optional": True,
|
|
128
|
+
"defaultValue": "0"}
|
|
129
|
+
arg6 = {"name": "totalThreshold",
|
|
130
|
+
"type": "Integer",
|
|
131
|
+
"description": "Max number of errors total until processing will error out.",
|
|
132
|
+
"optional": True,
|
|
133
|
+
"defaultValue": "0"}
|
|
134
|
+
|
|
135
|
+
return [arg1, arg2, arg3, arg4, arg5, arg6]
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def describeTransform(cls):
|
|
139
|
+
return "Drop fields from a DynamicFrame"
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def describeErrors(cls):
|
|
143
|
+
return []
|
|
144
|
+
|
|
145
|
+
@classmethod
|
|
146
|
+
def describeReturn(cls):
|
|
147
|
+
return {"type": "DynamicFrame",
|
|
148
|
+
"description": "new DynamicFrame without indicated fields"}
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class SelectFields(GlueTransform):
|
|
152
|
+
"""
|
|
153
|
+
Get fields within a DynamicFrame
|
|
154
|
+
|
|
155
|
+
:param frame: DynamicFrame
|
|
156
|
+
:param paths: List of Strings, each the full path to a node you want to get
|
|
157
|
+
:param info: String, any string to be associated with errors in this transformation.
|
|
158
|
+
:param stageThreshold: Long, number of errors in the given transformation for which the processing needs to error out.
|
|
159
|
+
:param totalThreshold: Long, total number of errors upto and including in this transformation
|
|
160
|
+
for which the processing needs to error out.
|
|
161
|
+
:return: DynamicFrame
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
def __call__(self, frame, paths, transformation_ctx = "", info = "", stageThreshold = 0, totalThreshold = 0):
|
|
165
|
+
return frame.select_fields(paths, transformation_ctx, info, stageThreshold, totalThreshold)
|
|
166
|
+
|
|
167
|
+
@classmethod
|
|
168
|
+
def describeArgs(cls):
|
|
169
|
+
arg1 = {"name": "frame",
|
|
170
|
+
"type": "DynamicFrame",
|
|
171
|
+
"description": "The DynamicFrame from which to select fields",
|
|
172
|
+
"optional": False,
|
|
173
|
+
"defaultValue": None}
|
|
174
|
+
arg2 = {"name": "paths",
|
|
175
|
+
"type": "List[String]",
|
|
176
|
+
"description": "full paths corresponding to nodes to select",
|
|
177
|
+
"optional": False,
|
|
178
|
+
"defaultValue": None}
|
|
179
|
+
arg3 = {"name": "transformation_ctx",
|
|
180
|
+
"type": "String",
|
|
181
|
+
"description": "A unique string that is used to identify stats / state information",
|
|
182
|
+
"optional": True,
|
|
183
|
+
"defaultValue": ""}
|
|
184
|
+
arg4 = {"name": "info",
|
|
185
|
+
"type": "String",
|
|
186
|
+
"description": "Any string to be associated with errors in the transformation",
|
|
187
|
+
"optional": True,
|
|
188
|
+
"defaultValue": "\"\""}
|
|
189
|
+
arg5 = {"name": "stageThreshold",
|
|
190
|
+
"type": "Integer",
|
|
191
|
+
"description": "Max number of errors in the transformation until processing will error out",
|
|
192
|
+
"optional": True,
|
|
193
|
+
"defaultValue": "0"}
|
|
194
|
+
arg6 = {"name": "totalThreshold",
|
|
195
|
+
"type": "Integer",
|
|
196
|
+
"description": "Max number of errors total until processing will error out.",
|
|
197
|
+
"optional": True,
|
|
198
|
+
"defaultValue": "0"}
|
|
199
|
+
|
|
200
|
+
return [arg1, arg2, arg3, arg4, arg5, arg6]
|
|
201
|
+
|
|
202
|
+
@classmethod
|
|
203
|
+
def describeTransform(cls):
|
|
204
|
+
return "Select fields from a DynamicFrame"
|
|
205
|
+
|
|
206
|
+
@classmethod
|
|
207
|
+
def describeErrors(cls):
|
|
208
|
+
return []
|
|
209
|
+
|
|
210
|
+
@classmethod
|
|
211
|
+
def describeReturn(cls):
|
|
212
|
+
return {"type": "DynamicFrame",
|
|
213
|
+
"description": "new DynamicFrame with only indicated fields"}
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class SplitFields(GlueTransform):
|
|
217
|
+
"""
|
|
218
|
+
Split fields within a DynamicFrame
|
|
219
|
+
|
|
220
|
+
:param frame: DynamicFrame
|
|
221
|
+
:param paths: List of Strings, each the full path to a node that you would like
|
|
222
|
+
to split into a new frame
|
|
223
|
+
:param info: String, any string to be associated with errors in this transformation.
|
|
224
|
+
:param stageThreshold: Long, number of errors in the given transformation for which the processing needs to error out.
|
|
225
|
+
:param totalThreshold: Long, total number of errors upto and including in this transformation
|
|
226
|
+
for which the processing needs to error out.
|
|
227
|
+
:return: DynamicFrameCollection with two Dynamic Frames, the first containing all the fields that you have
|
|
228
|
+
split off, and the second containing the remaining fields
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
def __call__(self, frame, paths, name1 = None, name2 = None, transformation_ctx = "", info = "", stageThreshold = 0, totalThreshold = 0):
|
|
232
|
+
# Incorporate the existing DynamicFrame name into the new names.
|
|
233
|
+
frame_name = frame.name if len(frame.name) > 0 else "frame"
|
|
234
|
+
|
|
235
|
+
if name1 == None:
|
|
236
|
+
name1 = frame_name + "1"
|
|
237
|
+
if name2 == None:
|
|
238
|
+
name2 = frame_name + "2"
|
|
239
|
+
|
|
240
|
+
return frame.split_fields(paths, name1, name2, transformation_ctx, info, stageThreshold, totalThreshold)
|
|
241
|
+
|
|
242
|
+
@classmethod
|
|
243
|
+
def describeArgs(cls):
|
|
244
|
+
arg1 = {"name": "frame",
|
|
245
|
+
"type": "DynamicFrame",
|
|
246
|
+
"description": "DynamicFrame from which to split fields",
|
|
247
|
+
"optional": False,
|
|
248
|
+
"defaultValue": None}
|
|
249
|
+
arg2 = {"name": "paths",
|
|
250
|
+
"type": "List[String]",
|
|
251
|
+
"description": "full paths corresponding to nodes to split into new DynamicFrame",
|
|
252
|
+
"optional": False,
|
|
253
|
+
"defaultValue": None}
|
|
254
|
+
arg3 = {"name": "frame1",
|
|
255
|
+
"type": "String",
|
|
256
|
+
"description": "name for the dynamic frame to be split off",
|
|
257
|
+
"optional": True,
|
|
258
|
+
"defaultValue": "frame1"}
|
|
259
|
+
arg4 = {"name": "frame2",
|
|
260
|
+
"type": "String",
|
|
261
|
+
"description": "name for the dynamic frame remains on original",
|
|
262
|
+
"optional": True,
|
|
263
|
+
"defaultValue": "frame2"}
|
|
264
|
+
arg5 = {"name": "transformation_ctx",
|
|
265
|
+
"type": "String",
|
|
266
|
+
"description": "A unique string that is used to identify stats / state information",
|
|
267
|
+
"optional": True,
|
|
268
|
+
"defaultValue": ""}
|
|
269
|
+
arg6 = {"name": "info",
|
|
270
|
+
"type": "String",
|
|
271
|
+
"description": "Any string to be associated with errors in the transformation",
|
|
272
|
+
"optional": True,
|
|
273
|
+
"defaultValue": "\"\""}
|
|
274
|
+
arg7 = {"name": "stageThreshold",
|
|
275
|
+
"type": "Integer",
|
|
276
|
+
"description": "Max number of errors in the transformation until processing will error out",
|
|
277
|
+
"optional": True,
|
|
278
|
+
"defaultValue": "0"}
|
|
279
|
+
arg8 = {"name": "totalThreshold",
|
|
280
|
+
"type": "Integer",
|
|
281
|
+
"description": "Max number of errors total until processing will error out.",
|
|
282
|
+
"optional": True,
|
|
283
|
+
"defaultValue": "0"}
|
|
284
|
+
|
|
285
|
+
return [arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8]
|
|
286
|
+
|
|
287
|
+
@classmethod
|
|
288
|
+
def describeTransform(cls):
|
|
289
|
+
return "Split fields within a DynamicFrame"
|
|
290
|
+
|
|
291
|
+
@classmethod
|
|
292
|
+
def describeErrors(cls):
|
|
293
|
+
return []
|
|
294
|
+
|
|
295
|
+
@classmethod
|
|
296
|
+
def describeReturn(cls):
|
|
297
|
+
desc = "[new DynamicFrame with only indicated fields, new DynamicFrame without indicated fields]"
|
|
298
|
+
return {"type": "DynamicFrameCollection",
|
|
299
|
+
"description": desc}
|
|
300
|
+
|
|
301
|
+
class SplitRows(GlueTransform):
|
|
302
|
+
"""
|
|
303
|
+
Split rows within a DynamicFrame
|
|
304
|
+
|
|
305
|
+
:param frame: DynamicFrame
|
|
306
|
+
:param comparison_dict: a dictionary where the key is the path to a column,
|
|
307
|
+
the the value is another dictionary maping comparators to the value to which the column
|
|
308
|
+
will be compared, e.g. {"age": {">": 10, "<": 20}} will give back rows where age between 10 and 20
|
|
309
|
+
exclusive split from rows that do not meet this criteria
|
|
310
|
+
:param info: String, any string to be associated with errors in this transformation.
|
|
311
|
+
:param stageThreshold: Long, number of errors in the given transformation for which the processing needs to error out.
|
|
312
|
+
:param totalThreshold: Long, total number of errors upto and including in this transformation
|
|
313
|
+
for which the processing needs to error out.
|
|
314
|
+
:return: A DynamicFrameCollection with two Dynamic Frames, the first containing all the rows that you have
|
|
315
|
+
split off, and the second containing the remaining rows
|
|
316
|
+
"""
|
|
317
|
+
|
|
318
|
+
def __call__(self, frame, comparison_dict, name1 = "frame1", name2 = "frame2", transformation_ctx = "", info = None, stageThreshold = 0, totalThreshold = 0):
|
|
319
|
+
info = info or ""
|
|
320
|
+
return frame.split_rows(comparison_dict, name1, name2, transformation_ctx, info, stageThreshold, totalThreshold)
|
|
321
|
+
|
|
322
|
+
@classmethod
|
|
323
|
+
def describeArgs(cls):
|
|
324
|
+
arg1 = {"name": "frame",
|
|
325
|
+
"type": "DynamicFrame",
|
|
326
|
+
"description": "DynamicFrame from which to split rows",
|
|
327
|
+
"optional": False,
|
|
328
|
+
"defaultValue": None}
|
|
329
|
+
arg2 = {"name": "comparison_dict",
|
|
330
|
+
"type": "Dictionary, {String 'path to node': {String 'operator': String or Integer 'value'}}",
|
|
331
|
+
"description": "{paths to columns: {comparators: value to which each the column will be compared.}}\
|
|
332
|
+
Example: {'age': {'>': 10, '<': 20}} will give back rows where age is between 10 and 20 exclusive, \
|
|
333
|
+
and rows where this criteria is not met",
|
|
334
|
+
"optional": False,
|
|
335
|
+
"defaultValue": None}
|
|
336
|
+
arg3 = {"name": "frame1",
|
|
337
|
+
"type": "String",
|
|
338
|
+
"description": "name for the dynamic frame to be split off",
|
|
339
|
+
"optional": True,
|
|
340
|
+
"defaultValue": "frame1"}
|
|
341
|
+
arg4 = {"name": "frame2",
|
|
342
|
+
"type": "String",
|
|
343
|
+
"description": "name for the dynamic frame remains on original",
|
|
344
|
+
"optional": True,
|
|
345
|
+
"defaultValue": "frame2"}
|
|
346
|
+
arg5 = {"name": "transformation_ctx",
|
|
347
|
+
"type": "String",
|
|
348
|
+
"description": "A unique string that is used to identify stats / state information",
|
|
349
|
+
"optional": True,
|
|
350
|
+
"defaultValue": ""}
|
|
351
|
+
arg6 = {"name": "info",
|
|
352
|
+
"type": "String",
|
|
353
|
+
"description": "Any string to be associated with errors in the transformation",
|
|
354
|
+
"optional": True,
|
|
355
|
+
"defaultValue": None}
|
|
356
|
+
arg7 = {"name": "stageThreshold",
|
|
357
|
+
"type": "Integer",
|
|
358
|
+
"description": "Max number of errors in the transformation until processing will error out",
|
|
359
|
+
"optional": True,
|
|
360
|
+
"defaultValue": "0"}
|
|
361
|
+
arg8 = {"name": "totalThreshold",
|
|
362
|
+
"type": "Integer",
|
|
363
|
+
"description": "Max number of errors total until processing will error out.",
|
|
364
|
+
"optional": True,
|
|
365
|
+
"defaultValue": "0"}
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
return [arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8]
|
|
369
|
+
|
|
370
|
+
@classmethod
|
|
371
|
+
def describeTransform(cls):
|
|
372
|
+
return "Split rows within a DynamicFrame based on comparators"
|
|
373
|
+
|
|
374
|
+
@classmethod
|
|
375
|
+
def describeErrors(cls):
|
|
376
|
+
return []
|
|
377
|
+
|
|
378
|
+
@classmethod
|
|
379
|
+
def describeReturn(cls):
|
|
380
|
+
desc = "DynamicFrameCollection[new DynamicFrame with only indicated rows, new DynamicFrame without indicated rows]"
|
|
381
|
+
return {"type": "DynamicFrameCollection",
|
|
382
|
+
"description": desc}
|
|
383
|
+
|
|
384
|
+
class Join(GlueTransform):
|
|
385
|
+
|
|
386
|
+
def __call__(self, frame1, frame2, keys1, keys2, transformation_ctx = ""):
|
|
387
|
+
return frame1.join(keys1, keys2, frame2)
|
|
388
|
+
|
|
389
|
+
@classmethod
|
|
390
|
+
def describeArgs(cls):
|
|
391
|
+
arg1 = {"name": "frame1",
|
|
392
|
+
"type": "DynamicFrame",
|
|
393
|
+
"description": "join this DynamicFrame",
|
|
394
|
+
"optional": False,
|
|
395
|
+
"defaultValue": None}
|
|
396
|
+
|
|
397
|
+
arg1 = {"name": "frame2",
|
|
398
|
+
"type": "DynamicFrame",
|
|
399
|
+
"description": "join with this DynamicFrame",
|
|
400
|
+
"optional": False,
|
|
401
|
+
"defaultValue": None}
|
|
402
|
+
|
|
403
|
+
arg2 = {"name": "keys1",
|
|
404
|
+
"type": "String",
|
|
405
|
+
"description": "The keys to join on for the first frame",
|
|
406
|
+
"optional": False,
|
|
407
|
+
"defaultValue": None}
|
|
408
|
+
|
|
409
|
+
arg3 = {"name": "keys2",
|
|
410
|
+
"type": "String",
|
|
411
|
+
"description": "The keys to join on for the second frame",
|
|
412
|
+
"optional": False,
|
|
413
|
+
"defaultValue": None}
|
|
414
|
+
|
|
415
|
+
return [arg1, arg2, arg3, arg4]
|
|
416
|
+
|
|
417
|
+
@classmethod
|
|
418
|
+
def describeTransform(cls):
|
|
419
|
+
return "equality join two dynamic frames DynamicFrames"
|
|
420
|
+
|
|
421
|
+
@classmethod
|
|
422
|
+
def describeErrors(cls):
|
|
423
|
+
return []
|
|
424
|
+
|
|
425
|
+
@classmethod
|
|
426
|
+
def describeReturn(cls):
|
|
427
|
+
return {"type": "DynamicFrame",
|
|
428
|
+
"description": "DynamicFrame obtained by joining two frames"}
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
class Spigot(GlueTransform):
|
|
432
|
+
|
|
433
|
+
def __call__(self, frame, path, options, transformation_ctx = ""):
|
|
434
|
+
return frame.spigot(path,options,transformation_ctx)
|
|
435
|
+
|
|
436
|
+
@classmethod
|
|
437
|
+
def describeArgs(cls):
|
|
438
|
+
arg1 = {"name": "frame",
|
|
439
|
+
"type": "DynamicFrame",
|
|
440
|
+
"description": "spigot this DynamicFrame",
|
|
441
|
+
"optional": False,
|
|
442
|
+
"defaultValue": None}
|
|
443
|
+
|
|
444
|
+
arg2 = {"name": "path",
|
|
445
|
+
"type": "string",
|
|
446
|
+
"description": "file path to write spigot",
|
|
447
|
+
"optional": False,
|
|
448
|
+
"defaultValue": None}
|
|
449
|
+
|
|
450
|
+
arg3 = {"name": "options",
|
|
451
|
+
"type": "Json",
|
|
452
|
+
"description": "topk -> first k records, prob -> probability of picking any record",
|
|
453
|
+
"optional": True,
|
|
454
|
+
"defaultValue": None}
|
|
455
|
+
|
|
456
|
+
return [arg1, arg2, arg3]
|
|
457
|
+
|
|
458
|
+
@classmethod
|
|
459
|
+
def describeTransform(cls):
|
|
460
|
+
return "write sample records to path destination mid transformation"
|
|
461
|
+
|
|
462
|
+
@classmethod
|
|
463
|
+
def describeErrors(cls):
|
|
464
|
+
return []
|
|
465
|
+
|
|
466
|
+
@classmethod
|
|
467
|
+
def describeReturn(cls):
|
|
468
|
+
return {"type": "DynamicFrame",
|
|
469
|
+
"description": "DynamicFrame is the same as the infput dynamicFrame with an additional write step"}
|