AWSGlueDataplanePython 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. awsglue/README.md +37 -0
  2. awsglue/__init__.py +15 -0
  3. awsglue/context.py +690 -0
  4. awsglue/data_sink.py +49 -0
  5. awsglue/data_source.py +49 -0
  6. awsglue/dataframe_transforms/__init__.py +17 -0
  7. awsglue/dataframe_transforms/apply_mapping.py +76 -0
  8. awsglue/dataframereader.py +41 -0
  9. awsglue/dataframewriter.py +21 -0
  10. awsglue/devutils.py +236 -0
  11. awsglue/dynamicframe.py +669 -0
  12. awsglue/functions.py +31 -0
  13. awsglue/glue_shell.py +38 -0
  14. awsglue/gluetypes.py +461 -0
  15. awsglue/job.py +59 -0
  16. awsglue/scripts/__init__.py +12 -0
  17. awsglue/scripts/activate_etl_connector.py +362 -0
  18. awsglue/scripts/connector_activation_util.py +38 -0
  19. awsglue/scripts/crawler_redo_from_backup.py +75 -0
  20. awsglue/scripts/crawler_undo.py +121 -0
  21. awsglue/scripts/scripts_utils.py +106 -0
  22. awsglue/streaming_data_source.py +28 -0
  23. awsglue/transforms/__init__.py +47 -0
  24. awsglue/transforms/apply_mapping.py +72 -0
  25. awsglue/transforms/coalesce.py +66 -0
  26. awsglue/transforms/collection_transforms.py +155 -0
  27. awsglue/transforms/drop_nulls.py +85 -0
  28. awsglue/transforms/dynamicframe_filter.py +66 -0
  29. awsglue/transforms/dynamicframe_map.py +72 -0
  30. awsglue/transforms/errors_as_dynamicframe.py +45 -0
  31. awsglue/transforms/field_transforms.py +469 -0
  32. awsglue/transforms/relationalize.py +105 -0
  33. awsglue/transforms/repartition.py +61 -0
  34. awsglue/transforms/resolve_choice.py +85 -0
  35. awsglue/transforms/transform.py +92 -0
  36. awsglue/transforms/unbox.py +112 -0
  37. awsglue/transforms/union.py +66 -0
  38. awsglue/transforms/unnest_frame.py +75 -0
  39. awsglue/utils.py +159 -0
  40. awsgluedataplanepython-5.0.0.dist-info/METADATA +178 -0
  41. awsgluedataplanepython-5.0.0.dist-info/RECORD +45 -0
  42. awsgluedataplanepython-5.0.0.dist-info/WHEEL +5 -0
  43. awsgluedataplanepython-5.0.0.dist-info/licenses/LICENSE.txt +96 -0
  44. awsgluedataplanepython-5.0.0.dist-info/licenses/NOTICE.txt +3 -0
  45. awsgluedataplanepython-5.0.0.dist-info/top_level.txt +1 -0
awsglue/functions.py ADDED
@@ -0,0 +1,31 @@
1
+ # Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # Licensed under the Amazon Software License (the "License"). You may not use
3
+ # this file except in compliance with the License. A copy of the License is
4
+ # located at
5
+ #
6
+ # http://aws.amazon.com/asl/
7
+ #
8
+ # or in the "license" file accompanying this file. This file is distributed
9
+ # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
10
+ # or implied. See the License for the specific language governing
11
+ # permissions and limitations under the License.
12
+
13
+ from pyspark import SparkContext
14
+ from pyspark.sql.column import Column, _to_java_column, _to_seq
15
+
16
+ def replaceArrayElement(srcCol, replaceCol, idx):
17
+ sc = SparkContext._active_spark_context
18
+ jsrcCol, jreplaceCol = _to_java_column(srcCol), _to_java_column(replaceCol)
19
+ return Column(sc._jvm.gluefunctions.replaceArrayElement(jsrcCol, jreplaceCol, idx))
20
+
21
+ def namedStruct(*cols):
22
+ sc = SparkContext._active_spark_context
23
+ if len(cols) == 1 and isinstance(cols[0], (list, set)):
24
+ cols = cols[0]
25
+ jc = sc._jvm.gluefunctions.namedStruct(_to_seq(sc, cols, _to_java_column))
26
+ return Column(jc)
27
+
28
+ def explodeWithIndex(col):
29
+ sc = SparkContext._active_spark_context
30
+ jc = sc._jvm.gluefunctions.explodeWithIndex(_to_java_column(col))
31
+ return Column(jc).alias('index', 'val')
awsglue/glue_shell.py ADDED
@@ -0,0 +1,38 @@
1
+ # Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # Licensed under the Amazon Software License (the "License"). You may not use
3
+ # this file except in compliance with the License. A copy of the License is
4
+ # located at
5
+ #
6
+ # http://aws.amazon.com/asl/
7
+ #
8
+ # or in the "license" file accompanying this file. This file is distributed
9
+ # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
10
+ # or implied. See the License for the specific language governing
11
+ # permissions and limitations under the License.
12
+
13
+ from __future__ import print_function
14
+ import platform
15
+ import pyspark
16
+ from pyspark.context import SparkContext
17
+ from pyspark.sql import SQLContext
18
+ from awsglue.context import GlueContext
19
+
20
+ sc = SparkContext()
21
+ # Change to GlueContext
22
+ # TODO: Figure out if/how to use HiveContext
23
+ glueContext = GlueContext(sc)
24
+
25
+ welcome_msg = """Welcome to
26
+ ___ _ _______ ________
27
+ / | | / / ___/ / ____/ /_ _____
28
+ / /| | | /| / /\\__ \\ / / __/ / / / / _ \\
29
+ / ___ | |/ |/ /___/ / / /_/ / / /_/ / __/
30
+ /_/ |_|__/|__//____/ \____/_/\____/\___/
31
+ """
32
+
33
+ print(welcome_msg)
34
+ print("Using Python version %s (%s, %s)" % (
35
+ platform.python_version(),
36
+ platform.python_build()[0],
37
+ platform.python_build()[1]))
38
+ print("GlueContext available as glueContext.")
awsglue/gluetypes.py ADDED
@@ -0,0 +1,461 @@
1
+ # Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # Licensed under the Amazon Software License (the "License"). You may not use
3
+ # this file except in compliance with the License. A copy of the License is
4
+ # located at
5
+ #
6
+ # http://aws.amazon.com/asl/
7
+ #
8
+ # or in the "license" file accompanying this file. This file is distributed
9
+ # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
10
+ # or implied. See the License for the specific language governing
11
+ # permissions and limitations under the License.
12
+
13
+ import json
14
+ import sys
15
+ from awsglue.utils import iteritems
16
+
17
+
18
+ if sys.version >= "3":
19
+ basestring = unicode = str
20
+
21
+
22
+ class DataType(object):
23
+ def __init__(self, properties={}):
24
+ self.properties = properties
25
+
26
+ def __eq__(self, other):
27
+ return (isinstance(other, self.__class__) and
28
+ self.__dict__ == other.__dict__)
29
+
30
+ def __hash__(self):
31
+ return hash(str(self.__class__))
32
+
33
+ @classmethod
34
+ def typeName(cls):
35
+ return cls.__name__[:-4].lower()
36
+
37
+ def jsonValue(self):
38
+ return {"dataType": self.typeName(), "properties": self.properties}
39
+
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Atomic types
44
+ # ---------------------------------------------------------------------------
45
+
46
+ # Note we can't use singletons like Spark does because DataType instances can
47
+ # have properties.
48
+
49
+
50
+ class AtomicType(DataType):
51
+ def __repr__(self):
52
+ return "{}({})".format(self.__class__.__name__, self.properties)
53
+
54
+ @classmethod
55
+ def fromJsonValue(cls, json_value):
56
+ return cls(**{k: v for k, v in iteritems(json_value)
57
+ if k != "dataType"})
58
+
59
+
60
+ class BinaryType(AtomicType):
61
+ pass
62
+
63
+
64
+ class BooleanType(AtomicType):
65
+ pass
66
+
67
+
68
+ class ByteType(AtomicType):
69
+ pass
70
+
71
+
72
+ class DateType(AtomicType):
73
+ pass
74
+
75
+
76
+ class DecimalType(AtomicType):
77
+ def __init__(self, precision=10, scale=2, properties={}):
78
+ super(DecimalType, self).__init__(properties)
79
+ self.precision = precision
80
+ self.scale = scale
81
+
82
+ def __repr__(self):
83
+ return "DecimalType({}, {}, {})".format(self.precision,
84
+ self.scale,
85
+ self.properties)
86
+
87
+ def jsonValue(self):
88
+ return dict(list(super(DecimalType, self).jsonValue().items()) +
89
+ [('precision', self.precision), ('scale', self.scale)])
90
+
91
+
92
+ class DoubleType(AtomicType):
93
+ pass
94
+
95
+
96
+ class EnumType(AtomicType):
97
+ def __init__(self, options, properties={}):
98
+ super(EnumType, self).__init__(properties)
99
+ DataType.__init__(self, properties)
100
+ self.options = options
101
+
102
+ def __repr__(self):
103
+ options_str = ",".join(self.options[0:3])
104
+ if len(self.options) > 3:
105
+ options_str = options_str + ",..."
106
+ return "EnumType([{}], {})".format(options_str, self.properties)
107
+
108
+ def jsonValue(self):
109
+ dict(list(super(EnumType, self).jsonValue().items()) +
110
+ [('options', list(self.options))])
111
+
112
+
113
+ class FloatType(AtomicType):
114
+ pass
115
+
116
+
117
+ class IntegerType(AtomicType):
118
+ @classmethod
119
+ def typeName(cls):
120
+ return "int"
121
+
122
+
123
+ class LongType(AtomicType):
124
+ pass
125
+
126
+
127
+ class NullType(AtomicType):
128
+ pass
129
+
130
+
131
+ class ShortType(AtomicType):
132
+ pass
133
+
134
+
135
+ class StringType(AtomicType):
136
+ pass
137
+
138
+
139
+ class TimestampType(AtomicType):
140
+ pass
141
+
142
+ class TimestampNTZType(AtomicType):
143
+ pass
144
+
145
+
146
+ class UnknownType(AtomicType):
147
+ pass
148
+
149
+
150
+ # ---------------------------------------------------------------------------
151
+ # Collection types
152
+ # ---------------------------------------------------------------------------
153
+
154
+ class ArrayType(DataType):
155
+
156
+ def __init__(self, elementType=UnknownType(), properties={}):
157
+ assert isinstance(elementType, DataType),\
158
+ "elementType should be DataType. Got" + str(elementType.__class__)
159
+ super(ArrayType, self).__init__(properties)
160
+ self.elementType = elementType
161
+
162
+ def __repr__(self):
163
+ return "ArrayType({}, {})".format(self.elementType, self.properties)
164
+
165
+ def jsonValue(self):
166
+ return dict(list(super(ArrayType, self).jsonValue().items()) +
167
+ [("elementType", self.elementType.jsonValue())])
168
+
169
+ @classmethod
170
+ def fromJsonValue(cls, json_value):
171
+ element_type = _deserialize_json_value(json_value["elementType"])
172
+ return cls(elementType=element_type,
173
+ properties=json_value.get('properties', {}))
174
+
175
+ class SetType(DataType):
176
+
177
+ def __init__(self, elementType=UnknownType(), properties={}):
178
+ assert isinstance(elementType, DataType), \
179
+ "elementType should be DataType. Got" + str(elementType.__class__)
180
+ super(SetType, self).__init__(properties)
181
+ self.elementType = elementType
182
+
183
+ def __repr__(self):
184
+ return "SetType({}, {})".format(self.elementType, self.properties)
185
+
186
+ def jsonValue(self):
187
+ return dict(list(super(SetType, self).jsonValue().items()) +
188
+ [("elementType", self.elementType.jsonValue())])
189
+
190
+ @classmethod
191
+ def fromJsonValue(cls, json_value):
192
+ element_type = _deserialize_json_value(json_value["elementType"])
193
+ return cls(elementType=element_type,
194
+ properties=json_value.get('properties', {}))
195
+
196
+
197
+ class ChoiceType(DataType):
198
+
199
+ def __init__(self, choices=[], properties={}):
200
+ super(ChoiceType, self).__init__(properties)
201
+ self.choices = {}
202
+ for choice in choices:
203
+ self.add(choice)
204
+
205
+ def __repr__(self):
206
+ sorted_values = sorted(self.choices.values(),
207
+ key = lambda x: x.typeName())
208
+ choice_str = "[{}]".format(",".join([str(c) for c in sorted_values]))
209
+
210
+ return "ChoiceType({}, {})".format(choice_str, self.properties)
211
+
212
+ def add(self, new_choice):
213
+ if new_choice.typeName() in self.choices:
214
+ raise ValueError("Attempting to insert duplicate choice",
215
+ new_choice)
216
+ self.choices[new_choice.typeName()] = new_choice
217
+
218
+ def merge(self, new_choices):
219
+ if not isinstance(new_choices, list):
220
+ new_choices = [ new_choices ]
221
+ for choice in new_choices:
222
+ existing = self.choices.get(choice.typeName(), UnknownType())
223
+ self.choices[choice.typeName()] = mergeDataTypes(existing, choice)
224
+
225
+ def jsonValue(self):
226
+ return dict(list(super(ChoiceType, self).jsonValue().items()) +
227
+ [("choices", [v.jsonValue()
228
+ for v in self.choices.values()])])
229
+
230
+ @classmethod
231
+ def fromJsonValue(cls, json_value):
232
+ choices = [_deserialize_json_value(c) for c in json_value["choices"]]
233
+ return cls(choices=choices, properties=json_value.get('properties', {}))
234
+
235
+
236
+ class MapType(DataType):
237
+
238
+ def __init__(self, valueType=UnknownType(), properties={}):
239
+ assert isinstance(valueType, DataType), "valueType should be DataType"
240
+ super(MapType, self).__init__(properties)
241
+ self.valueType = valueType
242
+
243
+ def __repr__(self):
244
+ return "MapType({}, {})".format(self.valueType, self.properties)
245
+
246
+ def jsonValue(self):
247
+ return dict(list(super(MapType, self).jsonValue().items()) +
248
+ [("valueType", self.valueType.jsonValue())])
249
+
250
+ @classmethod
251
+ def fromJsonValue(cls, json_value):
252
+ return cls(valueType=_deserialize_json_value(json_value["valueType"]),
253
+ properties=json_value.get('properties', {}))
254
+
255
+
256
+ class Field(object):
257
+
258
+ def __init__(self, name, dataType, properties={}):
259
+ assert isinstance(dataType, DataType),\
260
+ "dataType should be DataType. Got " + str(dataType.__class__)
261
+ assert isinstance(name, basestring),\
262
+ "Field name must be a string. Got " + str(name.__class__)
263
+
264
+ # Note this only applies in Python 2.7 if the name is type unicode. In that case
265
+ # we return a str (bytestring) encoded as utf-8. This is the same behavior as
266
+ # pyspark.sql.types.StructField. Since we are serializing as utf-8 encoded JSON,
267
+ # the correct values should be preserved when this gets mapped to Scala.
268
+ if not isinstance(name, str):
269
+ name = name.encode('utf-8')
270
+ self.name = name
271
+ self.dataType = dataType
272
+ self.properties = properties
273
+
274
+ def __eq__(self, other):
275
+ return (self.name == other.name and
276
+ self.dataType == other.dataType)
277
+
278
+ def __repr__(self):
279
+ return "Field({}, {}, {})".format(self.name, self.dataType,
280
+ self.properties)
281
+
282
+ def jsonValue(self):
283
+ return {"name": self.name,
284
+ "container": self.dataType.jsonValue(),
285
+ "properties": self.properties}
286
+
287
+ @classmethod
288
+ def fromJsonValue(cls, json_value):
289
+ return cls(json_value["name"],
290
+ _deserialize_json_value(json_value["container"]),
291
+ json_value.get("properties", {}))
292
+
293
+
294
+ class StructType(DataType):
295
+
296
+ def __init__(self, fields=[], properties={}):
297
+ super(StructType, self).__init__(properties)
298
+ assert all(isinstance(f, Field) for f in fields),\
299
+ "fields should be a list of Field"
300
+ self.fields = fields
301
+ self.field_map = {field.name: field for field in fields}
302
+
303
+ def __iter__(self):
304
+ return iter(self.fields)
305
+
306
+ def __repr__(self):
307
+ return "StructType([{}], {})".format(
308
+ ",".join([str(f) for f in self.fields]), self.properties)
309
+
310
+ def add(self, field):
311
+ assert isinstance(field, Field), "field must be of type Field"
312
+ self.fields.append(field)
313
+ self.field_map[field.name] = field
314
+
315
+ def hasField(self, field):
316
+ if isinstance(field, Field):
317
+ field = field.name
318
+ return field in self.field_map
319
+
320
+ def getField(self, field):
321
+ if isinstance(field, Field):
322
+ field = field.name
323
+ return self.field_map[field]
324
+
325
+ def jsonValue(self):
326
+ return dict(list(super(StructType, self).jsonValue().items()) +
327
+ [("fields", [f.jsonValue() for f in self.fields])])
328
+
329
+ @classmethod
330
+ def fromJsonValue(cls, json_value):
331
+ return cls([Field.fromJsonValue(f) for f in json_value["fields"]],
332
+ json_value.get("properties", {}))
333
+
334
+
335
+ class EntityType(DataType):
336
+ def __init__(self, entity, base_type, properties):
337
+ raise NotImplementedError("EntityTypes not yet supported in Tape.")
338
+
339
+
340
+ # ---------------------------------------------------------------------------
341
+ # Utility methods
342
+ # ---------------------------------------------------------------------------
343
+
344
+ _atomic_types = [BinaryType, BooleanType, ByteType, DateType, DecimalType,
345
+ DoubleType, EnumType, FloatType, IntegerType, LongType, NullType,
346
+ ShortType, StringType, TimestampType, TimestampNTZType, UnknownType]
347
+
348
+
349
+ _complex_types = [ArrayType, ChoiceType, MapType, StructType, SetType]
350
+
351
+
352
+ _atomic_type_map = dict((t.typeName(), t) for t in _atomic_types) # type: ignore
353
+
354
+
355
+ _complex_type_map = dict((t.typeName(), t) for t in _complex_types)
356
+
357
+
358
+ _all_type_map = dict((t.typeName(), t) for t in _atomic_types + _complex_types) # type: ignore
359
+
360
+
361
+ def _deserialize_json_string(json_str):
362
+ return _deserialize_json_value(json.loads(json_str))
363
+
364
+
365
+ def _deserialize_json_value(json_val):
366
+ assert isinstance(json_val, dict), "Json value must be dictionary"
367
+ data_type = json_val["dataType"]
368
+ return _all_type_map[data_type].fromJsonValue(json_val)
369
+
370
+ def _serialize_schema(schema):
371
+ return json.dumps(schema.jsonValue())
372
+
373
+ def _make_choice(s1, s2):
374
+ if isinstance(s1, ChoiceType):
375
+ left_types = s1.choices
376
+ else:
377
+ left_types = {s1.typeName(): s1}
378
+
379
+ if isinstance(s2, ChoiceType):
380
+ right_types = s2.choices
381
+ else:
382
+ right_types = {s2.typeName(): s2}
383
+
384
+ for typecode, datatype in iteritems(left_types):
385
+ if typecode in right_types:
386
+ right_types[typecode] = mergeDataTypes(datatype,
387
+ right_types[typecode])
388
+ else:
389
+ right_types[typecode] = datatype
390
+
391
+ return ChoiceType(right_types.values(), s1.properties)
392
+
393
+
394
+ # Simple Python merge implementation. This is less efficient than the Scala
395
+ # version and should be used primarily for interactive manipulation.
396
+ # Has similar limitations to the Scala version -- does not merge properties,
397
+ # for instance.
398
+ def mergeDataTypes(s1, s2):
399
+ if isinstance(s1, UnknownType) or isinstance(s1, NullType):
400
+ return s2
401
+ elif isinstance(s2, UnknownType) or isinstance(s2, NullType):
402
+ return s1
403
+ elif isinstance(s1, ChoiceType) or isinstance(s2, ChoiceType):
404
+ return _make_choice(s1, s2)
405
+ elif type(s1) != type(s2):
406
+ return _make_choice(s1, s2)
407
+ else:
408
+ if isinstance(s1, StructType):
409
+ new_fields = []
410
+ # Fields that are present in both s1 and s2.
411
+ for field in s1:
412
+ if s2.hasField(field):
413
+ new_fields.append(
414
+ Field(field.name,
415
+ mergeDataTypes(field.dataType,
416
+ s2.getField(field).dataType),
417
+ field.properties))
418
+ else:
419
+ # Fields in s1 that are not in s2.
420
+ new_fields.append(Field(field.name, field.dataType,
421
+ field.properties))
422
+
423
+ # Fields in s2 that are not in s1.
424
+ new_fields.extend([Field(field.name, field.dataType,
425
+ field.properties)
426
+ for field in s2 if not s1.hasField(field)])
427
+ return StructType(new_fields, s1.properties)
428
+ elif isinstance(s1, ArrayType):
429
+ return ArrayType(mergeDataTypes(s1.elementType, s2.elementType))
430
+ elif isinstance(s1, MapType):
431
+ return MapType(mergeDataTypes(s1.valueType, s2.valueType))
432
+ elif isinstance(s1, EnumType):
433
+ return EnumType(s1.options + s2.options)
434
+ else:
435
+ return s1
436
+
437
+
438
+ def _create_dynamic_record(dynamicRecord):
439
+ vals = dict()
440
+ for k, v in dynamicRecord.items():
441
+ val = v
442
+ if type(v) == dict:
443
+ val = DynamicRecord(v)
444
+ vals[k] = val
445
+ return DynamicRecord(vals)
446
+
447
+
448
+ def _revert_to_dict(dynamicRecord):
449
+ if isinstance(dynamicRecord, dict):
450
+ return {k: _revert_to_dict(v) for k,v in iteritems(dynamicRecord)}
451
+ elif isinstance(dynamicRecord, list):
452
+ return [_revert_to_dict(v) for v in dynamicRecord]
453
+ else:
454
+ return dynamicRecord
455
+
456
+ class DynamicRecord(dict):
457
+ def __getattr__(self, attr):
458
+ return self[attr]
459
+
460
+ def __setattr__(self, attr, value):
461
+ self[attr] = value
awsglue/job.py ADDED
@@ -0,0 +1,59 @@
1
+ # Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # Licensed under the Amazon Software License (the "License"). You may not use
3
+ # this file except in compliance with the License. A copy of the License is
4
+ # located at
5
+ #
6
+ # http://aws.amazon.com/asl/
7
+ #
8
+ # or in the "license" file accompanying this file. This file is distributed
9
+ # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
10
+ # or implied. See the License for the specific language governing
11
+ # permissions and limitations under the License.
12
+ from py4j.java_gateway import java_import # type: ignore
13
+ class Job:
14
+ @classmethod
15
+ def continuation_options(cls):
16
+ return [ '--continuation-option', 'continuation-enabled', 'continuation-readonly', 'continuation-ignore' ]
17
+
18
+ @classmethod
19
+ def job_bookmark_options(cls):
20
+ return [ '--job-bookmark-option', 'job-bookmark-enable', 'job-bookmark-pause', 'job-bookmark-disable' ]
21
+ @classmethod
22
+ def job_bookmark_range_options(cls):
23
+ return [ '--job-bookmark-from', '--job-bookmark-to' ]
24
+
25
+ @classmethod
26
+ def id_params(cls):
27
+ return [ '--JOB_NAME', '--JOB_ID', '--JOB_RUN_ID', '--SECURITY_CONFIGURATION' ]
28
+
29
+ @classmethod
30
+ def encryption_type_options(cls):
31
+ return [ '--encryption-type' , 'sse-s3' ]
32
+
33
+ @classmethod
34
+ def data_lineage_options(cls):
35
+ return [ '--enable-data-lineage']
36
+ def __init__(self, glue_context_or_spark_session):
37
+ from pyspark.sql import SparkSession
38
+ from awsglue.context import GlueContext
39
+ if isinstance(glue_context_or_spark_session, GlueContext):
40
+ self._job = glue_context_or_spark_session._jvm.Job
41
+ self._glue_context = glue_context_or_spark_session
42
+ self._spark_session = glue_context_or_spark_session.sparkSession
43
+ elif isinstance(glue_context_or_spark_session, SparkSession):
44
+ java_import(glue_context_or_spark_session._jvm, "com.amazonaws.services.glue.util.Job")
45
+ self._job = glue_context_or_spark_session._jvm.Job
46
+ self._glue_context = None
47
+ self._spark_session = glue_context_or_spark_session
48
+ else:
49
+ raise Exception("cannot init Job instance given input parameter type: " + str(type(glue_context_or_spark_session)))
50
+
51
+ def init(self, job_name, args = {}):
52
+ self._job.init(job_name, self._spark_session._jsparkSession, args)
53
+
54
+ def isInitialized(self):
55
+ return self._job.isInitialized()
56
+
57
+ def commit(self):
58
+ self._job.commit()
59
+
@@ -0,0 +1,12 @@
1
+ # Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # Licensed under the Amazon Software License (the "License"). You may not use
3
+ # this file except in compliance with the License. A copy of the License is
4
+ # located at
5
+ #
6
+ # http://aws.amazon.com/asl/
7
+ #
8
+ # or in the "license" file accompanying this file. This file is distributed
9
+ # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
10
+ # or implied. See the License for the specific language governing
11
+ # permissions and limitations under the License.
12
+