AWSGlueDataplanePython 0.0.2__tar.gz → 5.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. awsgluedataplanepython-5.0.0/.github/workflows/publish.yml +34 -0
  2. awsgluedataplanepython-5.0.0/.gitignore +8 -0
  3. awsgluedataplanepython-5.0.0/AWSGlueDataplanePython.egg-info/PKG-INFO +178 -0
  4. awsgluedataplanepython-5.0.0/AWSGlueDataplanePython.egg-info/SOURCES.txt +57 -0
  5. awsgluedataplanepython-5.0.0/AWSGlueDataplanePython.egg-info/top_level.txt +1 -0
  6. awsgluedataplanepython-5.0.0/LICENSE.txt +96 -0
  7. awsgluedataplanepython-5.0.0/NOTICE.txt +3 -0
  8. awsgluedataplanepython-5.0.0/PKG-INFO +178 -0
  9. awsgluedataplanepython-5.0.0/README.md +71 -0
  10. awsgluedataplanepython-5.0.0/THIRD-PARTY-LICENSES +4087 -0
  11. awsgluedataplanepython-5.0.0/awsglue/README.md +37 -0
  12. awsgluedataplanepython-5.0.0/awsglue/__init__.py +15 -0
  13. awsgluedataplanepython-5.0.0/awsglue/context.py +690 -0
  14. awsgluedataplanepython-5.0.0/awsglue/data_sink.py +49 -0
  15. awsgluedataplanepython-5.0.0/awsglue/data_source.py +49 -0
  16. awsgluedataplanepython-5.0.0/awsglue/dataframe_transforms/__init__.py +17 -0
  17. awsgluedataplanepython-5.0.0/awsglue/dataframe_transforms/apply_mapping.py +76 -0
  18. awsgluedataplanepython-5.0.0/awsglue/dataframereader.py +41 -0
  19. awsgluedataplanepython-5.0.0/awsglue/dataframewriter.py +21 -0
  20. awsgluedataplanepython-5.0.0/awsglue/devutils.py +236 -0
  21. awsgluedataplanepython-5.0.0/awsglue/dynamicframe.py +669 -0
  22. awsgluedataplanepython-5.0.0/awsglue/functions.py +31 -0
  23. awsgluedataplanepython-5.0.0/awsglue/glue_shell.py +38 -0
  24. awsgluedataplanepython-5.0.0/awsglue/gluetypes.py +461 -0
  25. awsgluedataplanepython-5.0.0/awsglue/job.py +59 -0
  26. awsgluedataplanepython-5.0.0/awsglue/scripts/__init__.py +12 -0
  27. awsgluedataplanepython-5.0.0/awsglue/scripts/activate_etl_connector.py +362 -0
  28. awsgluedataplanepython-5.0.0/awsglue/scripts/connector_activation_util.py +38 -0
  29. awsgluedataplanepython-5.0.0/awsglue/scripts/crawler_redo_from_backup.py +75 -0
  30. awsgluedataplanepython-5.0.0/awsglue/scripts/crawler_undo.py +121 -0
  31. awsgluedataplanepython-5.0.0/awsglue/scripts/scripts_utils.py +106 -0
  32. awsgluedataplanepython-5.0.0/awsglue/streaming_data_source.py +28 -0
  33. awsgluedataplanepython-5.0.0/awsglue/transforms/__init__.py +47 -0
  34. awsgluedataplanepython-5.0.0/awsglue/transforms/apply_mapping.py +72 -0
  35. awsgluedataplanepython-5.0.0/awsglue/transforms/coalesce.py +66 -0
  36. awsgluedataplanepython-5.0.0/awsglue/transforms/collection_transforms.py +155 -0
  37. awsgluedataplanepython-5.0.0/awsglue/transforms/drop_nulls.py +85 -0
  38. awsgluedataplanepython-5.0.0/awsglue/transforms/dynamicframe_filter.py +66 -0
  39. awsgluedataplanepython-5.0.0/awsglue/transforms/dynamicframe_map.py +72 -0
  40. awsgluedataplanepython-5.0.0/awsglue/transforms/errors_as_dynamicframe.py +45 -0
  41. awsgluedataplanepython-5.0.0/awsglue/transforms/field_transforms.py +469 -0
  42. awsgluedataplanepython-5.0.0/awsglue/transforms/relationalize.py +105 -0
  43. awsgluedataplanepython-5.0.0/awsglue/transforms/repartition.py +61 -0
  44. awsgluedataplanepython-5.0.0/awsglue/transforms/resolve_choice.py +85 -0
  45. awsgluedataplanepython-5.0.0/awsglue/transforms/transform.py +92 -0
  46. awsgluedataplanepython-5.0.0/awsglue/transforms/unbox.py +112 -0
  47. awsgluedataplanepython-5.0.0/awsglue/transforms/union.py +66 -0
  48. awsgluedataplanepython-5.0.0/awsglue/transforms/unnest_frame.py +75 -0
  49. awsgluedataplanepython-5.0.0/awsglue/utils.py +159 -0
  50. awsgluedataplanepython-5.0.0/bin/glue-setup.sh +29 -0
  51. awsgluedataplanepython-5.0.0/bin/gluepyspark +5 -0
  52. awsgluedataplanepython-5.0.0/bin/gluepytest +5 -0
  53. awsgluedataplanepython-5.0.0/bin/gluesparksubmit +5 -0
  54. awsgluedataplanepython-5.0.0/pom.xml +54 -0
  55. awsgluedataplanepython-5.0.0/pyproject.toml +16 -0
  56. awsgluedataplanepython-5.0.0/setup.py +11 -0
  57. AWSGlueDataplanePython-0.0.2/AWSGlueDataplanePython.egg-info/PKG-INFO +0 -13
  58. AWSGlueDataplanePython-0.0.2/AWSGlueDataplanePython.egg-info/SOURCES.txt +0 -5
  59. AWSGlueDataplanePython-0.0.2/PKG-INFO +0 -13
  60. AWSGlueDataplanePython-0.0.2/setup.py +0 -18
  61. {AWSGlueDataplanePython-0.0.2 → awsgluedataplanepython-5.0.0}/AWSGlueDataplanePython.egg-info/dependency_links.txt +0 -0
  62. /AWSGlueDataplanePython-0.0.2/AWSGlueDataplanePython.egg-info/top_level.txt → /awsgluedataplanepython-5.0.0/AWSGlueDataplanePython.egg-info/not-zip-safe +0 -0
  63. {AWSGlueDataplanePython-0.0.2 → awsgluedataplanepython-5.0.0}/setup.cfg +0 -0
@@ -0,0 +1,34 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - uses: actions/setup-python@v5
14
+ with:
15
+ python-version: "3.11"
16
+ - run: pip install build
17
+ - run: python -m build
18
+ - uses: actions/upload-artifact@v4
19
+ with:
20
+ name: dist
21
+ path: dist/
22
+
23
+ publish:
24
+ needs: build
25
+ runs-on: ubuntu-latest
26
+ environment: pypi
27
+ permissions:
28
+ id-token: write
29
+ steps:
30
+ - uses: actions/download-artifact@v4
31
+ with:
32
+ name: dist
33
+ path: dist/
34
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,8 @@
1
+ *.pyc
2
+ PyGlue.zip
3
+ conf/
4
+ jars/
5
+ /jarsv1/
6
+ derby.log
7
+ metastore_db/
8
+ *.DS_Store
@@ -0,0 +1,178 @@
1
+ Metadata-Version: 2.4
2
+ Name: AWSGlueDataplanePython
3
+ Version: 5.0.0
4
+ Summary: AWS Glue Python library for local ETL script development
5
+ License: Amazon Software License 1.0
6
+
7
+ This Amazon Software License ("License") governs your use, reproduction, and
8
+ distribution of the accompanying software as specified below.
9
+
10
+ 1. Definitions
11
+
12
+ "Licensor" means any person or entity that distributes its Work.
13
+
14
+ "Software" means the original work of authorship made available under this
15
+ License.
16
+
17
+ "Work" means the Software and any additions to or derivative works of the
18
+ Software that are made available under this License.
19
+
20
+ The terms "reproduce," "reproduction," "derivative works," and
21
+ "distribution" have the meaning as provided under U.S. copyright law;
22
+ provided, however, that for the purposes of this License, derivative works
23
+ shall not include works that remain separable from, or merely link (or bind
24
+ by name) to the interfaces of, the Work.
25
+
26
+ Works, including the Software, are "made available" under this License by
27
+ including in or with the Work either (a) a copyright notice referencing the
28
+ applicability of this License to the Work, or (b) a copy of this License.
29
+
30
+ 2. License Grants
31
+
32
+ 2.1 Copyright Grant. Subject to the terms and conditions of this License,
33
+ each Licensor grants to you a perpetual, worldwide, non-exclusive,
34
+ royalty-free, copyright license to reproduce, prepare derivative works of,
35
+ publicly display, publicly perform, sublicense and distribute its Work and
36
+ any resulting derivative works in any form.
37
+
38
+ 2.2 Patent Grant. Subject to the terms and conditions of this License, each
39
+ Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free
40
+ patent license to make, have made, use, sell, offer for sale, import, and
41
+ otherwise transfer its Work, in whole or in part. The foregoing license
42
+ applies only to the patent claims licensable by Licensor that would be
43
+ infringed by Licensor's Work (or portion thereof) individually and
44
+ excluding any combinations with any other materials or technology.
45
+
46
+ 3. Limitations
47
+
48
+ 3.1 Redistribution. You may reproduce or distribute the Work only if
49
+ (a) you do so under this License, (b) you include a complete copy of this
50
+ License with your distribution, and (c) you retain without modification
51
+ any copyright, patent, trademark, or attribution notices that are present
52
+ in the Work.
53
+
54
+ 3.2 Derivative Works. You may specify that additional or different terms
55
+ apply to the use, reproduction, and distribution of your derivative works
56
+ of the Work ("Your Terms") only if (a) Your Terms provide that the use
57
+ limitation in Section 3.3 applies to your derivative works, and (b) you
58
+ identify the specific derivative works that are subject to Your Terms.
59
+ Notwithstanding Your Terms, this License (including the redistribution
60
+ requirements in Section 3.1) will continue to apply to the Work itself.
61
+
62
+ 3.3 Use Limitation. The Work and any derivative works thereof only may be
63
+ used or intended for use with the web services, computing platforms or
64
+ applications provided by Amazon.com, Inc. or its affiliates, including
65
+ Amazon Web Services, Inc.
66
+
67
+ 3.4 Patent Claims. If you bring or threaten to bring a patent claim against
68
+ any Licensor (including any claim, cross-claim or counterclaim in a
69
+ lawsuit) to enforce any patents that you allege are infringed by any Work,
70
+ then your rights under this License from such Licensor (including the
71
+ grants in Sections 2.1 and 2.2) will terminate immediately.
72
+
73
+ 3.5 Trademarks. This License does not grant any rights to use any
74
+ Licensor's or its affiliates' names, logos, or trademarks, except as
75
+ necessary to reproduce the notices described in this License.
76
+
77
+ 3.6 Termination. If you violate any term of this License, then your rights
78
+ under this License (including the grants in Sections 2.1 and 2.2) will
79
+ terminate immediately.
80
+
81
+ 4. Disclaimer of Warranty.
82
+
83
+ THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
84
+ EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
85
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
86
+ NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
87
+ THIS LICENSE. SOME STATES' CONSUMER LAWS DO NOT ALLOW EXCLUSION OF AN
88
+ IMPLIED WARRANTY, SO THIS DISCLAIMER MAY NOT APPLY TO YOU.
89
+
90
+ 5. Limitation of Liability.
91
+
92
+ EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
93
+ THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
94
+ SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
95
+ INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR
96
+ RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING
97
+ BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS
98
+ OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES
99
+ OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF
100
+ SUCH DAMAGES.
101
+
102
+ Requires-Python: <3.12,>=3.11
103
+ Description-Content-Type: text/markdown
104
+ License-File: LICENSE.txt
105
+ License-File: NOTICE.txt
106
+ Dynamic: license-file
107
+
108
+ # aws-glue-libs
109
+
110
+ This repository supports python libraries for local development of glue pyspark batch jobs. Glue streaming is supported in the separate repository [aws-glue-streaming-libs](https://github.com/awslabs/aws-glue-streaming-libs).
111
+
112
+ ## Contents
113
+ This repository contains:
114
+ * `awsglue` - the Python libary you can use to author [AWS Glue](https://aws.amazon.com/glue) ETL job. This library extends [Apache Spark](https://spark.apache.org/) with additional data types and operations for ETL workflows. It's an interface for Glue ETL library in Python.
115
+ * `bin` - this directory hosts several executables that allow you to run the Python library locally or open up a PySpark shell to run Glue Spark code interactively.
116
+
117
+ ## Python versions by Glue Version
118
+
119
+ Different Glue versions support different Python versions. The following table below is for your reference, which also includes the associated repository's branch for each glue version.
120
+
121
+ | Glue Version | Python 3 Version | aws-glue-libs branch |
122
+ |---|---|----------------------|
123
+ | 2.0 | 3.7 | glue-2.0 |
124
+ | 3.0 | 3.7 | glue-3.0 |
125
+ | 4.0 | 3.10 | glue-4.0 |
126
+ | 5.0 | 3.11 | glue-5.0 |
127
+
128
+ You may refer to AWS Glue's official [release notes](https://docs.aws.amazon.com/glue/latest/dg/release-notes.html) for more information
129
+
130
+ ## Setup guide
131
+
132
+ If you haven't already, please refer to the [official AWS Glue Python local development documentation](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-libraries.html#develop-local-python) for the official setup documentation. The following is a summary of the AWS documentation:
133
+
134
+ The `awsglue` library provides only the Python interface to the Glue Spark runtime, you need the Glue ETL jar to run it locally. The jar is now available via the maven build system in a s3 backed maven repository. Here are the steps to set up your dev environment locally.
135
+
136
+ 1. install Apache Maven from the following location: https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-common/apache-maven-3.6.0-bin.tar.gz
137
+ 1. use `copy-dependencies` target in Apache Maven to download the jar from S3 to your local dev environment.
138
+ 1. download and extract the Apache Spark distribution based on the Glue version you're using:
139
+ * Glue version 2.0: `https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-2.0/spark-2.4.3-bin-hadoop2.8.tgz1`
140
+ * Glue version 3.0: `https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-3.0/spark-3.1.1-amzn-0-bin-3.2.1-amzn-3.tgz`
141
+ * Glue version 4.0: `https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-4.0/spark-3.3.0-amzn-1-bin-3.3.3-amzn-0.tgz`
142
+ * Glue version 5.0: download the Apache Spark 3.5.4 distribution from `https://spark.apache.org`
143
+ 1. export the `SPARK_HOME` environmental variable to the extracted location of the above Spark distribution. For example:
144
+ ```
145
+ Glue version 2.0: export SPARK_HOME=/home/$USER/spark-2.4.3-bin-hadoop2.8
146
+ Glue version 3.0: export SPARK_HOME=/home/$USER/spark-3.1.1-amzn-0-bin-3.2.1-amzn-3
147
+ Glue version 4.0: export SPARK_HOME=/home/$USER/spark-3.3.0-amzn-1-bin-3.3.3-amzn-0
148
+ Glue version 5.0: export SPARK_HOME=/home/$USER/spark-3.5.4-bin-hadoop3
149
+ ```
150
+ 1. now you can run the executables in the `bin` directory to start a Glue Shell or submit a Glue Spark application.
151
+ ```
152
+ Glue shell: ./bin/gluepyspark
153
+ Glue submit: ./bin/gluesparksubmit
154
+ pytest: ./bin/gluepytest
155
+ ```
156
+ (The `gluepytest` script assumes that the pytest module is installed and available in the `PATH` env variable)
157
+
158
+ ## Licensing
159
+
160
+ The libraries in this repository licensed under the [Amazon Software License](http://aws.amazon.com/asl/) (the "License"). They may not be used except in compliance with the License, a copy of which is included here in the LICENSE file.
161
+
162
+ ---
163
+
164
+ # Release Notes
165
+
166
+ ## July 26 2023
167
+ * According to [AWS Glue version support policy](https://docs.aws.amazon.com/glue/latest/dg/glue-version-support-policy.html), branches for Glue 0.9 and 1.0 are removed as they are already deprecated.
168
+
169
+
170
+ ## August 27 2021
171
+ * The master branch has been modified from representing Glue 0.9 to Glue 3.0, we have also created a glue-0.9 branch to reflect the former state of the master branch with Glue 0.9. To rename your local clone of the older master branch and point to the glue-0.9 branch, you may use the following commands:
172
+ ```
173
+ git branch -m master glue-0.9
174
+ git fetch origin
175
+ git branch -u origin/glue-0.9 glue-0.9
176
+ git remote set-head origin -a
177
+ ```
178
+
@@ -0,0 +1,57 @@
1
+ .gitignore
2
+ LICENSE.txt
3
+ NOTICE.txt
4
+ README.md
5
+ THIRD-PARTY-LICENSES
6
+ pom.xml
7
+ pyproject.toml
8
+ setup.py
9
+ .github/workflows/publish.yml
10
+ AWSGlueDataplanePython.egg-info/PKG-INFO
11
+ AWSGlueDataplanePython.egg-info/SOURCES.txt
12
+ AWSGlueDataplanePython.egg-info/dependency_links.txt
13
+ AWSGlueDataplanePython.egg-info/not-zip-safe
14
+ AWSGlueDataplanePython.egg-info/top_level.txt
15
+ awsglue/README.md
16
+ awsglue/__init__.py
17
+ awsglue/context.py
18
+ awsglue/data_sink.py
19
+ awsglue/data_source.py
20
+ awsglue/dataframereader.py
21
+ awsglue/dataframewriter.py
22
+ awsglue/devutils.py
23
+ awsglue/dynamicframe.py
24
+ awsglue/functions.py
25
+ awsglue/glue_shell.py
26
+ awsglue/gluetypes.py
27
+ awsglue/job.py
28
+ awsglue/streaming_data_source.py
29
+ awsglue/utils.py
30
+ awsglue/dataframe_transforms/__init__.py
31
+ awsglue/dataframe_transforms/apply_mapping.py
32
+ awsglue/scripts/__init__.py
33
+ awsglue/scripts/activate_etl_connector.py
34
+ awsglue/scripts/connector_activation_util.py
35
+ awsglue/scripts/crawler_redo_from_backup.py
36
+ awsglue/scripts/crawler_undo.py
37
+ awsglue/scripts/scripts_utils.py
38
+ awsglue/transforms/__init__.py
39
+ awsglue/transforms/apply_mapping.py
40
+ awsglue/transforms/coalesce.py
41
+ awsglue/transforms/collection_transforms.py
42
+ awsglue/transforms/drop_nulls.py
43
+ awsglue/transforms/dynamicframe_filter.py
44
+ awsglue/transforms/dynamicframe_map.py
45
+ awsglue/transforms/errors_as_dynamicframe.py
46
+ awsglue/transforms/field_transforms.py
47
+ awsglue/transforms/relationalize.py
48
+ awsglue/transforms/repartition.py
49
+ awsglue/transforms/resolve_choice.py
50
+ awsglue/transforms/transform.py
51
+ awsglue/transforms/unbox.py
52
+ awsglue/transforms/union.py
53
+ awsglue/transforms/unnest_frame.py
54
+ bin/glue-setup.sh
55
+ bin/gluepyspark
56
+ bin/gluepytest
57
+ bin/gluesparksubmit
@@ -0,0 +1,96 @@
1
+ Amazon Software License 1.0
2
+
3
+ This Amazon Software License ("License") governs your use, reproduction, and
4
+ distribution of the accompanying software as specified below.
5
+
6
+ 1. Definitions
7
+
8
+ "Licensor" means any person or entity that distributes its Work.
9
+
10
+ "Software" means the original work of authorship made available under this
11
+ License.
12
+
13
+ "Work" means the Software and any additions to or derivative works of the
14
+ Software that are made available under this License.
15
+
16
+ The terms "reproduce," "reproduction," "derivative works," and
17
+ "distribution" have the meaning as provided under U.S. copyright law;
18
+ provided, however, that for the purposes of this License, derivative works
19
+ shall not include works that remain separable from, or merely link (or bind
20
+ by name) to the interfaces of, the Work.
21
+
22
+ Works, including the Software, are "made available" under this License by
23
+ including in or with the Work either (a) a copyright notice referencing the
24
+ applicability of this License to the Work, or (b) a copy of this License.
25
+
26
+ 2. License Grants
27
+
28
+ 2.1 Copyright Grant. Subject to the terms and conditions of this License,
29
+ each Licensor grants to you a perpetual, worldwide, non-exclusive,
30
+ royalty-free, copyright license to reproduce, prepare derivative works of,
31
+ publicly display, publicly perform, sublicense and distribute its Work and
32
+ any resulting derivative works in any form.
33
+
34
+ 2.2 Patent Grant. Subject to the terms and conditions of this License, each
35
+ Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free
36
+ patent license to make, have made, use, sell, offer for sale, import, and
37
+ otherwise transfer its Work, in whole or in part. The foregoing license
38
+ applies only to the patent claims licensable by Licensor that would be
39
+ infringed by Licensor's Work (or portion thereof) individually and
40
+ excluding any combinations with any other materials or technology.
41
+
42
+ 3. Limitations
43
+
44
+ 3.1 Redistribution. You may reproduce or distribute the Work only if
45
+ (a) you do so under this License, (b) you include a complete copy of this
46
+ License with your distribution, and (c) you retain without modification
47
+ any copyright, patent, trademark, or attribution notices that are present
48
+ in the Work.
49
+
50
+ 3.2 Derivative Works. You may specify that additional or different terms
51
+ apply to the use, reproduction, and distribution of your derivative works
52
+ of the Work ("Your Terms") only if (a) Your Terms provide that the use
53
+ limitation in Section 3.3 applies to your derivative works, and (b) you
54
+ identify the specific derivative works that are subject to Your Terms.
55
+ Notwithstanding Your Terms, this License (including the redistribution
56
+ requirements in Section 3.1) will continue to apply to the Work itself.
57
+
58
+ 3.3 Use Limitation. The Work and any derivative works thereof only may be
59
+ used or intended for use with the web services, computing platforms or
60
+ applications provided by Amazon.com, Inc. or its affiliates, including
61
+ Amazon Web Services, Inc.
62
+
63
+ 3.4 Patent Claims. If you bring or threaten to bring a patent claim against
64
+ any Licensor (including any claim, cross-claim or counterclaim in a
65
+ lawsuit) to enforce any patents that you allege are infringed by any Work,
66
+ then your rights under this License from such Licensor (including the
67
+ grants in Sections 2.1 and 2.2) will terminate immediately.
68
+
69
+ 3.5 Trademarks. This License does not grant any rights to use any
70
+ Licensor's or its affiliates' names, logos, or trademarks, except as
71
+ necessary to reproduce the notices described in this License.
72
+
73
+ 3.6 Termination. If you violate any term of this License, then your rights
74
+ under this License (including the grants in Sections 2.1 and 2.2) will
75
+ terminate immediately.
76
+
77
+ 4. Disclaimer of Warranty.
78
+
79
+ THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
80
+ EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
81
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
82
+ NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
83
+ THIS LICENSE. SOME STATES' CONSUMER LAWS DO NOT ALLOW EXCLUSION OF AN
84
+ IMPLIED WARRANTY, SO THIS DISCLAIMER MAY NOT APPLY TO YOU.
85
+
86
+ 5. Limitation of Liability.
87
+
88
+ EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
89
+ THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
90
+ SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
91
+ INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR
92
+ RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING
93
+ BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS
94
+ OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES
95
+ OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF
96
+ SUCH DAMAGES.
@@ -0,0 +1,3 @@
1
+ aws-glue-libs
2
+ Copyright 2016-2026 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+
@@ -0,0 +1,178 @@
1
+ Metadata-Version: 2.4
2
+ Name: AWSGlueDataplanePython
3
+ Version: 5.0.0
4
+ Summary: AWS Glue Python library for local ETL script development
5
+ License: Amazon Software License 1.0
6
+
7
+ This Amazon Software License ("License") governs your use, reproduction, and
8
+ distribution of the accompanying software as specified below.
9
+
10
+ 1. Definitions
11
+
12
+ "Licensor" means any person or entity that distributes its Work.
13
+
14
+ "Software" means the original work of authorship made available under this
15
+ License.
16
+
17
+ "Work" means the Software and any additions to or derivative works of the
18
+ Software that are made available under this License.
19
+
20
+ The terms "reproduce," "reproduction," "derivative works," and
21
+ "distribution" have the meaning as provided under U.S. copyright law;
22
+ provided, however, that for the purposes of this License, derivative works
23
+ shall not include works that remain separable from, or merely link (or bind
24
+ by name) to the interfaces of, the Work.
25
+
26
+ Works, including the Software, are "made available" under this License by
27
+ including in or with the Work either (a) a copyright notice referencing the
28
+ applicability of this License to the Work, or (b) a copy of this License.
29
+
30
+ 2. License Grants
31
+
32
+ 2.1 Copyright Grant. Subject to the terms and conditions of this License,
33
+ each Licensor grants to you a perpetual, worldwide, non-exclusive,
34
+ royalty-free, copyright license to reproduce, prepare derivative works of,
35
+ publicly display, publicly perform, sublicense and distribute its Work and
36
+ any resulting derivative works in any form.
37
+
38
+ 2.2 Patent Grant. Subject to the terms and conditions of this License, each
39
+ Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free
40
+ patent license to make, have made, use, sell, offer for sale, import, and
41
+ otherwise transfer its Work, in whole or in part. The foregoing license
42
+ applies only to the patent claims licensable by Licensor that would be
43
+ infringed by Licensor's Work (or portion thereof) individually and
44
+ excluding any combinations with any other materials or technology.
45
+
46
+ 3. Limitations
47
+
48
+ 3.1 Redistribution. You may reproduce or distribute the Work only if
49
+ (a) you do so under this License, (b) you include a complete copy of this
50
+ License with your distribution, and (c) you retain without modification
51
+ any copyright, patent, trademark, or attribution notices that are present
52
+ in the Work.
53
+
54
+ 3.2 Derivative Works. You may specify that additional or different terms
55
+ apply to the use, reproduction, and distribution of your derivative works
56
+ of the Work ("Your Terms") only if (a) Your Terms provide that the use
57
+ limitation in Section 3.3 applies to your derivative works, and (b) you
58
+ identify the specific derivative works that are subject to Your Terms.
59
+ Notwithstanding Your Terms, this License (including the redistribution
60
+ requirements in Section 3.1) will continue to apply to the Work itself.
61
+
62
+ 3.3 Use Limitation. The Work and any derivative works thereof only may be
63
+ used or intended for use with the web services, computing platforms or
64
+ applications provided by Amazon.com, Inc. or its affiliates, including
65
+ Amazon Web Services, Inc.
66
+
67
+ 3.4 Patent Claims. If you bring or threaten to bring a patent claim against
68
+ any Licensor (including any claim, cross-claim or counterclaim in a
69
+ lawsuit) to enforce any patents that you allege are infringed by any Work,
70
+ then your rights under this License from such Licensor (including the
71
+ grants in Sections 2.1 and 2.2) will terminate immediately.
72
+
73
+ 3.5 Trademarks. This License does not grant any rights to use any
74
+ Licensor's or its affiliates' names, logos, or trademarks, except as
75
+ necessary to reproduce the notices described in this License.
76
+
77
+ 3.6 Termination. If you violate any term of this License, then your rights
78
+ under this License (including the grants in Sections 2.1 and 2.2) will
79
+ terminate immediately.
80
+
81
+ 4. Disclaimer of Warranty.
82
+
83
+ THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
84
+ EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
85
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
86
+ NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
87
+ THIS LICENSE. SOME STATES' CONSUMER LAWS DO NOT ALLOW EXCLUSION OF AN
88
+ IMPLIED WARRANTY, SO THIS DISCLAIMER MAY NOT APPLY TO YOU.
89
+
90
+ 5. Limitation of Liability.
91
+
92
+ EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
93
+ THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
94
+ SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
95
+ INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR
96
+ RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING
97
+ BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS
98
+ OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES
99
+ OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF
100
+ SUCH DAMAGES.
101
+
102
+ Requires-Python: <3.12,>=3.11
103
+ Description-Content-Type: text/markdown
104
+ License-File: LICENSE.txt
105
+ License-File: NOTICE.txt
106
+ Dynamic: license-file
107
+
108
+ # aws-glue-libs
109
+
110
+ This repository supports python libraries for local development of glue pyspark batch jobs. Glue streaming is supported in the separate repository [aws-glue-streaming-libs](https://github.com/awslabs/aws-glue-streaming-libs).
111
+
112
+ ## Contents
113
+ This repository contains:
114
+ * `awsglue` - the Python libary you can use to author [AWS Glue](https://aws.amazon.com/glue) ETL job. This library extends [Apache Spark](https://spark.apache.org/) with additional data types and operations for ETL workflows. It's an interface for Glue ETL library in Python.
115
+ * `bin` - this directory hosts several executables that allow you to run the Python library locally or open up a PySpark shell to run Glue Spark code interactively.
116
+
117
+ ## Python versions by Glue Version
118
+
119
+ Different Glue versions support different Python versions. The following table below is for your reference, which also includes the associated repository's branch for each glue version.
120
+
121
+ | Glue Version | Python 3 Version | aws-glue-libs branch |
122
+ |---|---|----------------------|
123
+ | 2.0 | 3.7 | glue-2.0 |
124
+ | 3.0 | 3.7 | glue-3.0 |
125
+ | 4.0 | 3.10 | glue-4.0 |
126
+ | 5.0 | 3.11 | glue-5.0 |
127
+
128
+ You may refer to AWS Glue's official [release notes](https://docs.aws.amazon.com/glue/latest/dg/release-notes.html) for more information
129
+
130
+ ## Setup guide
131
+
132
+ If you haven't already, please refer to the [official AWS Glue Python local development documentation](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-libraries.html#develop-local-python) for the official setup documentation. The following is a summary of the AWS documentation:
133
+
134
+ The `awsglue` library provides only the Python interface to the Glue Spark runtime, you need the Glue ETL jar to run it locally. The jar is now available via the maven build system in a s3 backed maven repository. Here are the steps to set up your dev environment locally.
135
+
136
+ 1. install Apache Maven from the following location: https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-common/apache-maven-3.6.0-bin.tar.gz
137
+ 1. use `copy-dependencies` target in Apache Maven to download the jar from S3 to your local dev environment.
138
+ 1. download and extract the Apache Spark distribution based on the Glue version you're using:
139
+ * Glue version 2.0: `https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-2.0/spark-2.4.3-bin-hadoop2.8.tgz1`
140
+ * Glue version 3.0: `https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-3.0/spark-3.1.1-amzn-0-bin-3.2.1-amzn-3.tgz`
141
+ * Glue version 4.0: `https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-4.0/spark-3.3.0-amzn-1-bin-3.3.3-amzn-0.tgz`
142
+ * Glue version 5.0: download the Apache Spark 3.5.4 distribution from `https://spark.apache.org`
143
+ 1. export the `SPARK_HOME` environmental variable to the extracted location of the above Spark distribution. For example:
144
+ ```
145
+ Glue version 2.0: export SPARK_HOME=/home/$USER/spark-2.4.3-bin-hadoop2.8
146
+ Glue version 3.0: export SPARK_HOME=/home/$USER/spark-3.1.1-amzn-0-bin-3.2.1-amzn-3
147
+ Glue version 4.0: export SPARK_HOME=/home/$USER/spark-3.3.0-amzn-1-bin-3.3.3-amzn-0
148
+ Glue version 5.0: export SPARK_HOME=/home/$USER/spark-3.5.4-bin-hadoop3
149
+ ```
150
+ 1. now you can run the executables in the `bin` directory to start a Glue Shell or submit a Glue Spark application.
151
+ ```
152
+ Glue shell: ./bin/gluepyspark
153
+ Glue submit: ./bin/gluesparksubmit
154
+ pytest: ./bin/gluepytest
155
+ ```
156
+ (The `gluepytest` script assumes that the pytest module is installed and available in the `PATH` env variable)
157
+
158
+ ## Licensing
159
+
160
+ The libraries in this repository licensed under the [Amazon Software License](http://aws.amazon.com/asl/) (the "License"). They may not be used except in compliance with the License, a copy of which is included here in the LICENSE file.
161
+
162
+ ---
163
+
164
+ # Release Notes
165
+
166
+ ## July 26 2023
167
+ * According to [AWS Glue version support policy](https://docs.aws.amazon.com/glue/latest/dg/glue-version-support-policy.html), branches for Glue 0.9 and 1.0 are removed as they are already deprecated.
168
+
169
+
170
+ ## August 27 2021
171
+ * The master branch has been modified from representing Glue 0.9 to Glue 3.0, we have also created a glue-0.9 branch to reflect the former state of the master branch with Glue 0.9. To rename your local clone of the older master branch and point to the glue-0.9 branch, you may use the following commands:
172
+ ```
173
+ git branch -m master glue-0.9
174
+ git fetch origin
175
+ git branch -u origin/glue-0.9 glue-0.9
176
+ git remote set-head origin -a
177
+ ```
178
+
@@ -0,0 +1,71 @@
1
+ # aws-glue-libs
2
+
3
+ This repository supports python libraries for local development of glue pyspark batch jobs. Glue streaming is supported in the separate repository [aws-glue-streaming-libs](https://github.com/awslabs/aws-glue-streaming-libs).
4
+
5
+ ## Contents
6
+ This repository contains:
7
+ * `awsglue` - the Python libary you can use to author [AWS Glue](https://aws.amazon.com/glue) ETL job. This library extends [Apache Spark](https://spark.apache.org/) with additional data types and operations for ETL workflows. It's an interface for Glue ETL library in Python.
8
+ * `bin` - this directory hosts several executables that allow you to run the Python library locally or open up a PySpark shell to run Glue Spark code interactively.
9
+
10
+ ## Python versions by Glue Version
11
+
12
+ Different Glue versions support different Python versions. The following table below is for your reference, which also includes the associated repository's branch for each glue version.
13
+
14
+ | Glue Version | Python 3 Version | aws-glue-libs branch |
15
+ |---|---|----------------------|
16
+ | 2.0 | 3.7 | glue-2.0 |
17
+ | 3.0 | 3.7 | glue-3.0 |
18
+ | 4.0 | 3.10 | glue-4.0 |
19
+ | 5.0 | 3.11 | glue-5.0 |
20
+
21
+ You may refer to AWS Glue's official [release notes](https://docs.aws.amazon.com/glue/latest/dg/release-notes.html) for more information
22
+
23
+ ## Setup guide
24
+
25
+ If you haven't already, please refer to the [official AWS Glue Python local development documentation](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-libraries.html#develop-local-python) for the official setup documentation. The following is a summary of the AWS documentation:
26
+
27
+ The `awsglue` library provides only the Python interface to the Glue Spark runtime, you need the Glue ETL jar to run it locally. The jar is now available via the maven build system in a s3 backed maven repository. Here are the steps to set up your dev environment locally.
28
+
29
+ 1. install Apache Maven from the following location: https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-common/apache-maven-3.6.0-bin.tar.gz
30
+ 1. use `copy-dependencies` target in Apache Maven to download the jar from S3 to your local dev environment.
31
+ 1. download and extract the Apache Spark distribution based on the Glue version you're using:
32
+ * Glue version 2.0: `https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-2.0/spark-2.4.3-bin-hadoop2.8.tgz1`
33
+ * Glue version 3.0: `https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-3.0/spark-3.1.1-amzn-0-bin-3.2.1-amzn-3.tgz`
34
+ * Glue version 4.0: `https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-4.0/spark-3.3.0-amzn-1-bin-3.3.3-amzn-0.tgz`
35
+ * Glue version 5.0: download the Apache Spark 3.5.4 distribution from `https://spark.apache.org`
36
+ 1. export the `SPARK_HOME` environmental variable to the extracted location of the above Spark distribution. For example:
37
+ ```
38
+ Glue version 2.0: export SPARK_HOME=/home/$USER/spark-2.4.3-bin-hadoop2.8
39
+ Glue version 3.0: export SPARK_HOME=/home/$USER/spark-3.1.1-amzn-0-bin-3.2.1-amzn-3
40
+ Glue version 4.0: export SPARK_HOME=/home/$USER/spark-3.3.0-amzn-1-bin-3.3.3-amzn-0
41
+ Glue version 5.0: export SPARK_HOME=/home/$USER/spark-3.5.4-bin-hadoop3
42
+ ```
43
+ 1. now you can run the executables in the `bin` directory to start a Glue Shell or submit a Glue Spark application.
44
+ ```
45
+ Glue shell: ./bin/gluepyspark
46
+ Glue submit: ./bin/gluesparksubmit
47
+ pytest: ./bin/gluepytest
48
+ ```
49
+ (The `gluepytest` script assumes that the pytest module is installed and available in the `PATH` env variable)
50
+
51
+ ## Licensing
52
+
53
+ The libraries in this repository licensed under the [Amazon Software License](http://aws.amazon.com/asl/) (the "License"). They may not be used except in compliance with the License, a copy of which is included here in the LICENSE file.
54
+
55
+ ---
56
+
57
+ # Release Notes
58
+
59
+ ## July 26 2023
60
+ * According to [AWS Glue version support policy](https://docs.aws.amazon.com/glue/latest/dg/glue-version-support-policy.html), branches for Glue 0.9 and 1.0 are removed as they are already deprecated.
61
+
62
+
63
+ ## August 27 2021
64
+ * The master branch has been modified from representing Glue 0.9 to Glue 3.0, we have also created a glue-0.9 branch to reflect the former state of the master branch with Glue 0.9. To rename your local clone of the older master branch and point to the glue-0.9 branch, you may use the following commands:
65
+ ```
66
+ git branch -m master glue-0.9
67
+ git fetch origin
68
+ git branch -u origin/glue-0.9 glue-0.9
69
+ git remote set-head origin -a
70
+ ```
71
+