AWSGlueDataplanePython 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. awsglue/README.md +37 -0
  2. awsglue/__init__.py +15 -0
  3. awsglue/context.py +690 -0
  4. awsglue/data_sink.py +49 -0
  5. awsglue/data_source.py +49 -0
  6. awsglue/dataframe_transforms/__init__.py +17 -0
  7. awsglue/dataframe_transforms/apply_mapping.py +76 -0
  8. awsglue/dataframereader.py +41 -0
  9. awsglue/dataframewriter.py +21 -0
  10. awsglue/devutils.py +236 -0
  11. awsglue/dynamicframe.py +669 -0
  12. awsglue/functions.py +31 -0
  13. awsglue/glue_shell.py +38 -0
  14. awsglue/gluetypes.py +461 -0
  15. awsglue/job.py +59 -0
  16. awsglue/scripts/__init__.py +12 -0
  17. awsglue/scripts/activate_etl_connector.py +362 -0
  18. awsglue/scripts/connector_activation_util.py +38 -0
  19. awsglue/scripts/crawler_redo_from_backup.py +75 -0
  20. awsglue/scripts/crawler_undo.py +121 -0
  21. awsglue/scripts/scripts_utils.py +106 -0
  22. awsglue/streaming_data_source.py +28 -0
  23. awsglue/transforms/__init__.py +47 -0
  24. awsglue/transforms/apply_mapping.py +72 -0
  25. awsglue/transforms/coalesce.py +66 -0
  26. awsglue/transforms/collection_transforms.py +155 -0
  27. awsglue/transforms/drop_nulls.py +85 -0
  28. awsglue/transforms/dynamicframe_filter.py +66 -0
  29. awsglue/transforms/dynamicframe_map.py +72 -0
  30. awsglue/transforms/errors_as_dynamicframe.py +45 -0
  31. awsglue/transforms/field_transforms.py +469 -0
  32. awsglue/transforms/relationalize.py +105 -0
  33. awsglue/transforms/repartition.py +61 -0
  34. awsglue/transforms/resolve_choice.py +85 -0
  35. awsglue/transforms/transform.py +92 -0
  36. awsglue/transforms/unbox.py +112 -0
  37. awsglue/transforms/union.py +66 -0
  38. awsglue/transforms/unnest_frame.py +75 -0
  39. awsglue/utils.py +159 -0
  40. awsgluedataplanepython-5.0.0.dist-info/METADATA +178 -0
  41. awsgluedataplanepython-5.0.0.dist-info/RECORD +45 -0
  42. awsgluedataplanepython-5.0.0.dist-info/WHEEL +5 -0
  43. awsgluedataplanepython-5.0.0.dist-info/licenses/LICENSE.txt +96 -0
  44. awsgluedataplanepython-5.0.0.dist-info/licenses/NOTICE.txt +3 -0
  45. awsgluedataplanepython-5.0.0.dist-info/top_level.txt +1 -0
awsglue/data_sink.py ADDED
@@ -0,0 +1,49 @@
1
+ # Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # Licensed under the Amazon Software License (the "License"). You may not use
3
+ # this file except in compliance with the License. A copy of the License is
4
+ # located at
5
+ #
6
+ # http://aws.amazon.com/asl/
7
+ #
8
+ # or in the "license" file accompanying this file. This file is distributed
9
+ # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
10
+ # or implied. See the License for the specific language governing
11
+ # permissions and limitations under the License.
12
+
13
+ from awsglue.dynamicframe import DynamicFrame, DynamicFrameCollection
14
+ from awsglue.utils import makeOptions, callsite
15
+ from pyspark.sql import DataFrame
16
+
17
+ class DataSink(object):
18
+ def __init__(self, j_sink, sql_ctx):
19
+ self._jsink = j_sink
20
+ self._sql_ctx = sql_ctx
21
+
22
+ def setFormat(self, format, **options):
23
+ self._jsink.setFormat(format, makeOptions(self._sql_ctx._sc, options))
24
+
25
+ def setAccumulableSize(self, size):
26
+ self._jsink.setAccumulableSize(size)
27
+
28
+ def setCatalogInfo(self, catalogDatabase, catalogTableName, catalogId = ""):
29
+ self._jsink.setCatalogInfo(catalogDatabase, catalogTableName, catalogId)
30
+
31
+ def writeFrame(self, dynamic_frame, info = ""):
32
+ return DynamicFrame(self._jsink.pyWriteDynamicFrame(dynamic_frame._jdf, callsite(), info), dynamic_frame.glue_ctx, dynamic_frame.name + "_errors")
33
+
34
+ def writeDataFrame(self, data_frame, glue_context, info = ""):
35
+ return DataFrame(self._jsink.pyWriteDataFrame(data_frame._jdf, glue_context._glue_scala_context, callsite(), info), self._sql_ctx)
36
+
37
+ def write(self, dynamic_frame_or_dfc, info = ""):
38
+ if isinstance(dynamic_frame_or_dfc, DynamicFrame):
39
+ return self.writeFrame(dynamic_frame_or_dfc, info)
40
+
41
+ elif isinstance(dynamic_frame_or_dfc, DynamicFrameCollection):
42
+ res_frames = [self.writeFrame(frame)
43
+ for frame in dynamic_frame_or_dfc.values()]
44
+ return DynamicFrameCollection(res_frames, self._sql_ctx)
45
+
46
+ else:
47
+ raise TypeError("dynamic_frame_or_dfc must be an instance of"
48
+ "DynamicFrame or DynamicFrameCollection. Got "
49
+ + str(type(dynamic_frame_or_dfc)))
awsglue/data_source.py ADDED
@@ -0,0 +1,49 @@
1
+ # Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # Licensed under the Amazon Software License (the "License"). You may not use
3
+ # this file except in compliance with the License. A copy of the License is
4
+ # located at
5
+ #
6
+ # http://aws.amazon.com/asl/
7
+ #
8
+ # or in the "license" file accompanying this file. This file is distributed
9
+ # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
10
+ # or implied. See the License for the specific language governing
11
+ # permissions and limitations under the License.
12
+
13
+ from awsglue.dynamicframe import DynamicFrame
14
+ from awsglue.utils import makeOptions, callsite
15
+ from pyspark.sql import DataFrame
16
+
17
+ class DataSource(object):
18
+ def __init__(self, j_source, sql_ctx, name):
19
+ self._jsource = j_source
20
+ self._sql_ctx = sql_ctx
21
+ self.name = name
22
+
23
+ def setFormat(self, format, **options):
24
+ options["callSite"] = callsite()
25
+ self._jsource.setFormat(format, makeOptions(self._sql_ctx._sc, options))
26
+
27
+ def getFrame(self, **options):
28
+ minPartitions = targetPartitions = None
29
+
30
+ if 'minPartitions' in options:
31
+ minPartitions = options['minPartitions']
32
+ targetPartitions = options.get('targetPartitions', minPartitions)
33
+ elif 'targetPartitions' in options:
34
+ minPartitions = targetPartitions = options['targetPartitions']
35
+
36
+ if minPartitions is None:
37
+ jframe = self._jsource.getDynamicFrame()
38
+ else:
39
+ jframe = self._jsource.getDynamicFrame(minPartitions, targetPartitions)
40
+
41
+ return DynamicFrame(jframe, self._sql_ctx, self.name)
42
+
43
+ def getSampleFrame(self, num, **options):
44
+ jframe = self._jsource.getSampleDynamicFrame(num, makeOptions(self._sql_ctx._sc, options))
45
+ return DynamicFrame(jframe, self._sql_ctx, self.name)
46
+
47
+ def getDataFrame(self):
48
+ jdf = self._jsource.getDataFrame()
49
+ return DataFrame(jdf, self._sql_ctx)
@@ -0,0 +1,17 @@
1
+ # Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # Licensed under the Amazon Software License (the "License"). You may not use
3
+ # this file except in compliance with the License. A copy of the License is
4
+ # located at
5
+ #
6
+ # http://aws.amazon.com/asl/
7
+ #
8
+ # or in the "license" file accompanying this file. This file is distributed
9
+ # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
10
+ # or implied. See the License for the specific language governing
11
+ # permissions and limitations under the License.
12
+
13
+ from .apply_mapping import ApplyMapping
14
+
15
+ ALL_TRANSFORMS = {ApplyMapping}
16
+
17
+ __all__ = [transform.__name__ for transform in ALL_TRANSFORMS]
@@ -0,0 +1,76 @@
1
+ # Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # Licensed under the Amazon Software License (the "License"). You may not use
3
+ # this file except in compliance with the License. A copy of the License is
4
+ # located at
5
+ #
6
+ # http://aws.amazon.com/asl/
7
+ #
8
+ # or in the "license" file accompanying this file. This file is distributed
9
+ # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
10
+ # or implied. See the License for the specific language governing
11
+ # permissions and limitations under the License.
12
+
13
+ from py4j.java_gateway import java_import # type: ignore
14
+ from pyspark.sql.dataframe import DataFrame
15
+
16
+ class ApplyMapping():
17
+ @staticmethod
18
+ def apply(frame, mappings):
19
+ jvm = frame.sql_ctx._jvm
20
+
21
+ def _to_java_mapping(mapping_tup):
22
+ if not isinstance(mapping_tup, tuple):
23
+ raise TypeError("Mapping must be specified as a tuple. Got " +
24
+ mapping_tup)
25
+
26
+ tup2 = jvm.scala.Tuple2
27
+ tup3 = jvm.scala.Tuple3
28
+ tup4 = jvm.scala.Tuple4
29
+
30
+ if len(mapping_tup) == 2:
31
+ return tup2.apply(mapping_tup[0], mapping_tup[1])
32
+ elif len(mapping_tup) == 3:
33
+ return tup3.apply(mapping_tup[0], mapping_tup[1], mapping_tup[2])
34
+ elif len(mapping_tup) == 4:
35
+ return tup4.apply(mapping_tup[0], mapping_tup[1], mapping_tup[2], mapping_tup[3])
36
+ else:
37
+ raise ValueError("Mapping tuple must be of length 2, 3, or 4"
38
+ "Got tuple of length " + str(len(mapping_tup)))
39
+
40
+ if isinstance(mappings, tuple):
41
+ mappings = [mappings]
42
+
43
+ mappings_seq = jvm.PythonUtils.toSeq([_to_java_mapping(m) for m in mappings])
44
+
45
+ java_import(jvm, "com.amazonaws.services.glue.dataframeTransforms.ApplyMapping")
46
+
47
+ return DataFrame(jvm.ApplyMapping.apply(frame._jdf, mappings_seq), frame.sql_ctx)
48
+
49
+ @classmethod
50
+ def describeArgs(cls):
51
+ arg1 = {"name": "frame",
52
+ "type": "DataFrame",
53
+ "description": "DataFrame to transform",
54
+ "optional": False,
55
+ "defaultValue": None}
56
+ arg2 = {"name": "mappings",
57
+ "type": "DataFrame",
58
+ "description": "List of mapping tuples (source col, source type, target col, target type)",
59
+ "optional": False,
60
+ "defaultValue": None}
61
+
62
+ return [arg1, arg2]
63
+
64
+ @classmethod
65
+ def describeTransform(cls):
66
+ return "Apply a declarative mapping to this DataFrame."
67
+
68
+ @classmethod
69
+ def describeErrors(cls):
70
+ return []
71
+
72
+ @classmethod
73
+ def describeReturn(cls):
74
+ return {"type": "DataFrame",
75
+ "description": "DataFrame after applying mappings."}
76
+
@@ -0,0 +1,41 @@
1
+ # Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # Licensed under the Amazon Software License (the "License"). You may not use
3
+ # this file except in compliance with the License. A copy of the License is
4
+ # located at
5
+ #
6
+ # http://aws.amazon.com/asl/
7
+ #
8
+ # or in the "license" file accompanying this file. This file is distributed
9
+ # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
10
+ # or implied. See the License for the specific language governing
11
+ # permissions and limitations under the License.
12
+
13
+ class DataFrameReader(object):
14
+ def __init__(self, glue_context):
15
+ self._glue_context = glue_context
16
+
17
+ def from_catalog(self, database = None, table_name = None, redshift_tmp_dir = "", transformation_ctx = "", push_down_predicate = "", additional_options = {}, catalog_id = None, **kwargs):
18
+ """Creates a DynamicFrame with the specified catalog name space and table name.
19
+ """
20
+ if database is not None and "name_space" in kwargs:
21
+ raise Exception("Parameter name_space and database are both specified, choose one.")
22
+ elif database is None and "name_space" not in kwargs:
23
+ raise Exception("Parameter name_space or database is missing.")
24
+ elif "name_space" in kwargs:
25
+ db = kwargs.pop("name_space")
26
+ else:
27
+ db = database
28
+
29
+ if table_name is None:
30
+ raise Exception("Parameter table_name is missing.")
31
+
32
+ return self._glue_context.create_data_frame_from_catalog(db, table_name, redshift_tmp_dir, transformation_ctx, push_down_predicate, additional_options, catalog_id, **kwargs)
33
+
34
+ def from_options(self, connection_type, connection_options={},
35
+ format=None, format_options={}, transformation_ctx="", push_down_predicate = "", **kwargs):
36
+ """Creates a DataFrame with the specified connection and format.
37
+ """
38
+ return self._glue_context.create_data_frame_from_options(connection_type,
39
+ connection_options,
40
+ format,
41
+ format_options, transformation_ctx, push_down_predicate, **kwargs)
@@ -0,0 +1,21 @@
1
+ class DataFrameWriter(object):
2
+ def __init__(self, glue_context):
3
+ self._glue_context = glue_context
4
+ def from_catalog(self, frame, database=None, table_name=None, redshift_tmp_dir="", transformation_ctx="",
5
+ additional_options={}, catalog_id=None, **kwargs):
6
+ """Writes a DataFrame with the specified catalog name space and table name.
7
+ """
8
+ if database is not None and "name_space" in kwargs:
9
+ raise Exception("Parameter name_space and database are both specified, choose one.")
10
+ elif database is None and "name_space" not in kwargs:
11
+ raise Exception("Parameter name_space or database is missing.")
12
+ elif "name_space" in kwargs:
13
+ db = kwargs.pop("name_space")
14
+ else:
15
+ db = database
16
+
17
+ if table_name is None:
18
+ raise Exception("Parameter table_name is missing.")
19
+
20
+ return self._glue_context.write_data_frame_from_catalog(frame, db, table_name, redshift_tmp_dir,
21
+ transformation_ctx, additional_options, catalog_id)
awsglue/devutils.py ADDED
@@ -0,0 +1,236 @@
1
+ # Copyright 2016-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # Licensed under the Amazon Software License (the "License"). You may not use
3
+ # this file except in compliance with the License. A copy of the License is
4
+ # located at
5
+ #
6
+ # http://aws.amazon.com/asl/
7
+ #
8
+ # or in the "license" file accompanying this file. This file is distributed
9
+ # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express
10
+ # or implied. See the License for the specific language governing
11
+ # permissions and limitations under the License.
12
+
13
+ from __future__ import print_function
14
+ import boto3
15
+ import os
16
+ import logging
17
+ import copy
18
+ from datetime import datetime
19
+
20
+
21
+ class ExecutionProperty:
22
+ def __init__(self, maxConcurrentRuns=1):
23
+ self.maxConcurrentRuns = maxConcurrentRuns
24
+
25
+ def __repr__(self):
26
+ return "{'maxConcurrentRuns': "+ str(self.maxConcurrentRuns)+ "}"
27
+
28
+ def as_dict(self):
29
+ return {'maxConcurrentRuns': self.maxConcurrentRuns}
30
+
31
+
32
+ class Command:
33
+ def __init__(self, name, scriptLocation):
34
+ self.name=name
35
+ self.scriptLocation=scriptLocation
36
+
37
+ def __repr__(self):
38
+ return "{'name': '"+ str(self.name)+",' 'scriptLocation': '"+ str(self.scriptLocation)+"'}"
39
+
40
+ def as_dict(self):
41
+ return {'name': self.name, 'scriptLocation': self.scriptLocation}
42
+
43
+
44
+ class Connections:
45
+ def __init__(self, connections=[]):
46
+ self.connections=connections
47
+
48
+ def __repr__(self):
49
+ return "{'connections': "+str(self.connections) + "}"
50
+
51
+ def as_dict(self):
52
+ return {'connections': self.connections}
53
+
54
+
55
+ class Job:
56
+ def __init__(self):
57
+ self.name = ''
58
+ self.description = ''
59
+ self.logUri = ''
60
+ self.role = ''
61
+ self.executionProperty = ExecutionProperty()
62
+ self.command = Command("glueetl", "UNKNOWN")
63
+ self.defaultArguments = {}
64
+ self.connections = Connections()
65
+ self.maxRetries = 1
66
+ self.allocatedCapacity = 1
67
+ self.createdOn = datetime.now()
68
+ self.lastModifiedOn = datetime.now()
69
+
70
+ def __repr__(self):
71
+ return "{'command': "+str(self.command) + ",\n" + \
72
+ "'connections': "+str(self.connections) + ",\n" + \
73
+ "'createdOn': "+str(self.createdOn) + ",\n" + \
74
+ "'description': '"+str(self.description) + "',\n" + \
75
+ "'defaultArguments': "+str(self.defaultArguments) + ",\n" + \
76
+ "'executionProperty': "+str(self.executionProperty) + ",\n" + \
77
+ "'lastModifiedOn': "+str(self.lastModifiedOn) + ",\n" + \
78
+ "'logUri': '"+str(self.logUri) + "',\n" + \
79
+ "'maxRetries': "+str(self.maxRetries) + ",\n" + \
80
+ "'name': '"+str(self.name) + "',\n" + \
81
+ "'role': '"+str(self.role) + "',\n" + \
82
+ "}"
83
+
84
+ def as_dict(self):
85
+ job_dict = {}
86
+ job_dict['command'] = self.command.as_dict()
87
+ if len(self.connections.connections) > 0:
88
+ job_dict['connections'] = self.connections.as_dict()
89
+ job_dict['createdOn'] = self.createdOn
90
+ if len(self.description) > 0:
91
+ job_dict['description'] = self.description
92
+ job_dict['defaultArguments'] = self.defaultArguments
93
+ job_dict['executionProperty'] = self.executionProperty.as_dict()
94
+ job_dict['lastModifiedOn'] = self.lastModifiedOn
95
+ job_dict['logUri'] = self.logUri
96
+ job_dict['maxRetries'] = self.maxRetries
97
+ job_dict['name'] = self.name
98
+ job_dict['role'] = self.role
99
+ return job_dict
100
+
101
+ def as_job_create_dict(self):
102
+ job_dict = copy.deepcopy(self.as_dict())
103
+ del job_dict['createdOn']
104
+ del job_dict['lastModifiedOn']
105
+ return job_dict
106
+
107
+ def as_job_update_dict(self):
108
+ job_dict = copy.deepcopy(self.as_dict())
109
+ del job_dict['name']
110
+ del job_dict['createdOn']
111
+ del job_dict['lastModifiedOn']
112
+ return job_dict
113
+
114
+
115
+ class GlueJobUtils:
116
+ def __init__(self, glue_context):
117
+ proxy_url = glue_context._jvm.AWSConnectionUtils.getGlueProxyUrl()
118
+ glue_endpoint = glue_context._jvm.AWSConnectionUtils.getGlueEndpoint()
119
+ region = glue_context._jvm.AWSConnectionUtils.getRegion()
120
+ # s3 service calls are not allowed through the proxy for the moment, so we use the s3 vpc endpoint instead
121
+ self.s3 = boto3.resource('s3')
122
+ # Boto does not have a API to set proxy information. It uses environment variables to lookup proxy informtion
123
+ if not proxy_url[8:].startswith('null'):
124
+ os.environ['https_proxy'] = proxy_url
125
+ self.glue = boto3.client('glue', endpoint_url=glue_endpoint, region_name=region)
126
+
127
+
128
+ def _glue_job_response_to_job(self, response_job):
129
+ job = Job()
130
+ job.name = response_job['name']
131
+
132
+ try:
133
+ job.description = response_job['description']
134
+ except KeyError:
135
+ logging.warning('description is missing in job response for job %s' % job.name)
136
+
137
+ try:
138
+ job.defaultArguments = response_job['defaultArguments']
139
+ except KeyError:
140
+ logging.warning('defaultArguments is missing in job response for job %s' % job.name)
141
+
142
+ try:
143
+ job.logUri = response_job['logUri']
144
+ except KeyError:
145
+ logging.warning('logUri is missing in job response for job %s' % job.name)
146
+
147
+ try:
148
+ job.role = response_job['role']
149
+ except KeyError:
150
+ logging.warning('role is missing in job response for job %s' % job.name)
151
+
152
+ try:
153
+ execution_property_dict = response_job['executionProperty']
154
+ job.executionProperty = ExecutionProperty(execution_property_dict['maxConcurrentRuns'])
155
+ except KeyError:
156
+ logging.warning('executionProperty is missing in job response for job %s' % job.name)
157
+
158
+ try:
159
+ command_dict = response_job['command']
160
+ job.command = Command(command_dict['name'], command_dict['scriptLocation'])
161
+ except KeyError:
162
+ logging.warning('command is missing in job response for job %s' % job.name)
163
+
164
+ try:
165
+ connections_dict = response_job['connections']
166
+ job.connections = Connections(connections_dict['connections'])
167
+ except KeyError:
168
+ logging.warning('connections is missing in job response for job %s' % job.name)
169
+
170
+ try:
171
+ job.maxRetries = response_job['maxRetries']
172
+ except KeyError:
173
+ logging.warning('maxRetries is missing in job response for job %s' % job.name)
174
+
175
+ try:
176
+ job.createdOn = response_job['createdOn']
177
+ except KeyError:
178
+ logging.warning('createdOn is missing in job response for job %s' % job.name)
179
+
180
+ try:
181
+ job.lastModifiedOn = response_job['lastModifiedOn']
182
+ except KeyError:
183
+ logging.warning('lastModifiedOn is missing in job response for job %s' % job.name)
184
+
185
+ return job
186
+
187
+ def get_jobs(self, nextToken=''):
188
+ response = self.glue.get_jobs(nextToken=nextToken)
189
+ list_jobs_response = {}
190
+ try:
191
+ list_jobs_response['NextToken'] = response['NextToken']
192
+ except KeyError:
193
+ logging.info('NextToken is not present in get_jobs response')
194
+ list_jobs_response['jobs'] = [self._glue_job_response_to_job(j) for j in response['jobs']]
195
+ return list_jobs_response
196
+
197
+ def get_job(self, jobName):
198
+ response = self.glue.get_job(jobName=jobName)
199
+ return self._glue_job_response_to_job(response['job'])
200
+
201
+ def _get_bucket_prefix_from_s3_url(self, s3_url):
202
+ if not s3_url.startswith('s3://'):
203
+ raise Exception('s3 url for scriptLocation should start with s3:// but given %s' % s3_url)
204
+ url_parts = s3_url[5:].split('/', 1)
205
+ if not len(url_parts) == 2:
206
+ raise Exception('s3 url for scriptLocation does not include a prefix: %s' % s3_url)
207
+ if url_parts[1].endswith('/'):
208
+ raise Exception('s3 url for scriptLocation should ot end with '/': %s' % s3_url)
209
+ return {'bucket': url_parts[0], 'prefix': url_parts[1]}
210
+
211
+ def _upload_file_to_s3(self, s3_url, file):
212
+ if len(file) == 0:
213
+ logging.warning('script file is not specified, skipping upload of script to s3')
214
+ else:
215
+ s3_parts = self._get_bucket_prefix_from_s3_url(s3_url)
216
+ self.s3.Object(s3_parts['bucket'], s3_parts['prefix']).put(Body=open(file, 'rb'))
217
+
218
+ def create_job(self, job, file=''):
219
+ try:
220
+ self._upload_file_to_s3(job.command.scriptLocation, file)
221
+ return self.glue.create_job(**job.as_job_create_dict())
222
+ except Exception as inst:
223
+ print(inst)
224
+ logging.error('Failed to create job')
225
+
226
+ def update_job(self, job, file=''):
227
+ try:
228
+ self._upload_file_to_s3(job.command.scriptLocation, file)
229
+ return self.glue.update_job(jobName=job.name, jobUpdate=job.as_job_update_dict())
230
+ except Exception as inst:
231
+ print(inst)
232
+ logging.error('Failed to update job')
233
+
234
+ def delete_job(self, jobName):
235
+ return self.glue.delete_job(jobName=jobName)
236
+