apache-gravitino 0.1.0.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. apache-gravitino-0.1.0.dev1/MANIFEST.in +22 -0
  2. apache-gravitino-0.1.0.dev1/PKG-INFO +159 -0
  3. apache-gravitino-0.1.0.dev1/README.md +120 -0
  4. apache-gravitino-0.1.0.dev1/apache_gravitino.egg-info/PKG-INFO +159 -0
  5. apache-gravitino-0.1.0.dev1/apache_gravitino.egg-info/SOURCES.txt +101 -0
  6. apache-gravitino-0.1.0.dev1/apache_gravitino.egg-info/dependency_links.txt +1 -0
  7. apache-gravitino-0.1.0.dev1/apache_gravitino.egg-info/requires.txt +22 -0
  8. apache-gravitino-0.1.0.dev1/apache_gravitino.egg-info/top_level.txt +1 -0
  9. apache-gravitino-0.1.0.dev1/gravitino/__init__.py +30 -0
  10. apache-gravitino-0.1.0.dev1/gravitino/api/__init__.py +18 -0
  11. apache-gravitino-0.1.0.dev1/gravitino/api/audit.py +59 -0
  12. apache-gravitino-0.1.0.dev1/gravitino/api/auditable.py +33 -0
  13. apache-gravitino-0.1.0.dev1/gravitino/api/catalog.py +149 -0
  14. apache-gravitino-0.1.0.dev1/gravitino/api/catalog_change.py +269 -0
  15. apache-gravitino-0.1.0.dev1/gravitino/api/fileset.py +114 -0
  16. apache-gravitino-0.1.0.dev1/gravitino/api/fileset_change.py +314 -0
  17. apache-gravitino-0.1.0.dev1/gravitino/api/metalake.py +59 -0
  18. apache-gravitino-0.1.0.dev1/gravitino/api/metalake_change.py +130 -0
  19. apache-gravitino-0.1.0.dev1/gravitino/api/schema.py +47 -0
  20. apache-gravitino-0.1.0.dev1/gravitino/api/schema_change.py +153 -0
  21. apache-gravitino-0.1.0.dev1/gravitino/api/supports_schemas.py +139 -0
  22. apache-gravitino-0.1.0.dev1/gravitino/auth/__init__.py +18 -0
  23. apache-gravitino-0.1.0.dev1/gravitino/auth/auth_constants.py +26 -0
  24. apache-gravitino-0.1.0.dev1/gravitino/auth/auth_data_provider.py +47 -0
  25. apache-gravitino-0.1.0.dev1/gravitino/auth/default_oauth2_token_provider.py +136 -0
  26. apache-gravitino-0.1.0.dev1/gravitino/auth/oauth2_token_provider.py +75 -0
  27. apache-gravitino-0.1.0.dev1/gravitino/auth/simple_auth_provider.py +56 -0
  28. apache-gravitino-0.1.0.dev1/gravitino/catalog/__init__.py +18 -0
  29. apache-gravitino-0.1.0.dev1/gravitino/catalog/base_schema_catalog.py +249 -0
  30. apache-gravitino-0.1.0.dev1/gravitino/catalog/fileset_catalog.py +285 -0
  31. apache-gravitino-0.1.0.dev1/gravitino/client/__init__.py +18 -0
  32. apache-gravitino-0.1.0.dev1/gravitino/client/gravitino_admin_client.py +136 -0
  33. apache-gravitino-0.1.0.dev1/gravitino/client/gravitino_client.py +95 -0
  34. apache-gravitino-0.1.0.dev1/gravitino/client/gravitino_client_base.py +153 -0
  35. apache-gravitino-0.1.0.dev1/gravitino/client/gravitino_metalake.py +202 -0
  36. apache-gravitino-0.1.0.dev1/gravitino/client/gravitino_version.py +84 -0
  37. apache-gravitino-0.1.0.dev1/gravitino/constants/__init__.py +18 -0
  38. apache-gravitino-0.1.0.dev1/gravitino/constants/doc.py +22 -0
  39. apache-gravitino-0.1.0.dev1/gravitino/constants/error.py +71 -0
  40. apache-gravitino-0.1.0.dev1/gravitino/constants/root.py +24 -0
  41. apache-gravitino-0.1.0.dev1/gravitino/constants/timeout.py +20 -0
  42. apache-gravitino-0.1.0.dev1/gravitino/constants/version.py +32 -0
  43. apache-gravitino-0.1.0.dev1/gravitino/dto/__init__.py +18 -0
  44. apache-gravitino-0.1.0.dev1/gravitino/dto/audit_dto.py +78 -0
  45. apache-gravitino-0.1.0.dev1/gravitino/dto/catalog_dto.py +72 -0
  46. apache-gravitino-0.1.0.dev1/gravitino/dto/dto_converters.py +88 -0
  47. apache-gravitino-0.1.0.dev1/gravitino/dto/fileset_dto.py +58 -0
  48. apache-gravitino-0.1.0.dev1/gravitino/dto/metalake_dto.py +79 -0
  49. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/__init__.py +18 -0
  50. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/catalog_create_request.py +67 -0
  51. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/catalog_update_request.py +128 -0
  52. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/catalog_updates_request.py +50 -0
  53. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/fileset_create_request.py +64 -0
  54. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/fileset_update_request.py +167 -0
  55. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/fileset_updates_request.py +41 -0
  56. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/metalake_create_request.py +53 -0
  57. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/metalake_update_request.py +144 -0
  58. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/metalake_updates_request.py +50 -0
  59. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/oauth2_client_credential_request.py +33 -0
  60. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/schema_create_request.py +46 -0
  61. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/schema_update_request.py +99 -0
  62. apache-gravitino-0.1.0.dev1/gravitino/dto/requests/schema_updates_request.py +49 -0
  63. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/__init__.py +18 -0
  64. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/base_response.py +40 -0
  65. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/catalog_list_response.py +36 -0
  66. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/catalog_response.py +50 -0
  67. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/drop_response.py +34 -0
  68. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/entity_list_response.py +46 -0
  69. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/error_response.py +79 -0
  70. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/fileset_response.py +51 -0
  71. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/metalake_list_response.py +53 -0
  72. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/metalake_response.py +50 -0
  73. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/oauth2_error_response.py +40 -0
  74. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/oauth2_token_response.py +55 -0
  75. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/schema_response.py +51 -0
  76. apache-gravitino-0.1.0.dev1/gravitino/dto/responses/version_response.py +53 -0
  77. apache-gravitino-0.1.0.dev1/gravitino/dto/schema_dto.py +60 -0
  78. apache-gravitino-0.1.0.dev1/gravitino/dto/version_dto.py +44 -0
  79. apache-gravitino-0.1.0.dev1/gravitino/exceptions/__init__.py +18 -0
  80. apache-gravitino-0.1.0.dev1/gravitino/exceptions/base.py +103 -0
  81. apache-gravitino-0.1.0.dev1/gravitino/exceptions/handlers/__init__.py +18 -0
  82. apache-gravitino-0.1.0.dev1/gravitino/exceptions/handlers/error_handler.py +40 -0
  83. apache-gravitino-0.1.0.dev1/gravitino/exceptions/handlers/fileset_error_handler.py +43 -0
  84. apache-gravitino-0.1.0.dev1/gravitino/exceptions/handlers/metalake_error_handler.py +44 -0
  85. apache-gravitino-0.1.0.dev1/gravitino/exceptions/handlers/oauth_error_handler.py +58 -0
  86. apache-gravitino-0.1.0.dev1/gravitino/exceptions/handlers/rest_error_handler.py +40 -0
  87. apache-gravitino-0.1.0.dev1/gravitino/exceptions/handlers/schema_error_handler.py +49 -0
  88. apache-gravitino-0.1.0.dev1/gravitino/filesystem/__init__.py +18 -0
  89. apache-gravitino-0.1.0.dev1/gravitino/filesystem/gvfs.py +720 -0
  90. apache-gravitino-0.1.0.dev1/gravitino/filesystem/gvfs_config.py +29 -0
  91. apache-gravitino-0.1.0.dev1/gravitino/name_identifier.py +146 -0
  92. apache-gravitino-0.1.0.dev1/gravitino/namespace.py +139 -0
  93. apache-gravitino-0.1.0.dev1/gravitino/rest/__init__.py +18 -0
  94. apache-gravitino-0.1.0.dev1/gravitino/rest/rest_message.py +52 -0
  95. apache-gravitino-0.1.0.dev1/gravitino/rest/rest_utils.py +27 -0
  96. apache-gravitino-0.1.0.dev1/gravitino/typing.py +25 -0
  97. apache-gravitino-0.1.0.dev1/gravitino/utils/__init__.py +20 -0
  98. apache-gravitino-0.1.0.dev1/gravitino/utils/http_client.py +262 -0
  99. apache-gravitino-0.1.0.dev1/gravitino/version.ini +21 -0
  100. apache-gravitino-0.1.0.dev1/requirements-dev.txt +31 -0
  101. apache-gravitino-0.1.0.dev1/requirements.txt +25 -0
  102. apache-gravitino-0.1.0.dev1/setup.cfg +4 -0
  103. apache-gravitino-0.1.0.dev1/setup.py +55 -0
@@ -0,0 +1,22 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+
19
+ include requirements.txt
20
+ include requirements-dev.txt
21
+ include README.md
22
+ include gravitino/version.ini
@@ -0,0 +1,159 @@
1
+ Metadata-Version: 2.1
2
+ Name: apache-gravitino
3
+ Version: 0.1.0.dev1
4
+ Summary: Python lib/client for Apache Gravitino
5
+ Home-page: https://github.com/apache/gravitino
6
+ Author: apache-gravitino
7
+ Author-email: dev@gravitino.apache.org
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3.8
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Requires-Python: >=3.8
17
+ Description-Content-Type: text/markdown
18
+ Requires-Dist: requests==2.32.2
19
+ Requires-Dist: dataclasses-json==0.6.6
20
+ Requires-Dist: readerwriterlock==1.0.9
21
+ Requires-Dist: fsspec==2024.3.1
22
+ Requires-Dist: pyarrow==15.0.2
23
+ Requires-Dist: cachetools==5.3.3
24
+ Provides-Extra: dev
25
+ Requires-Dist: requests==2.32.2; extra == "dev"
26
+ Requires-Dist: dataclasses-json==0.6.6; extra == "dev"
27
+ Requires-Dist: pylint==3.2.2; extra == "dev"
28
+ Requires-Dist: black==24.4.2; extra == "dev"
29
+ Requires-Dist: twine==5.1.1; extra == "dev"
30
+ Requires-Dist: coverage==7.5.1; extra == "dev"
31
+ Requires-Dist: pandas==2.0.3; extra == "dev"
32
+ Requires-Dist: pyarrow==15.0.2; extra == "dev"
33
+ Requires-Dist: llama-index==0.10.40; extra == "dev"
34
+ Requires-Dist: tenacity==8.3.0; extra == "dev"
35
+ Requires-Dist: cachetools==5.3.3; extra == "dev"
36
+ Requires-Dist: readerwriterlock==1.0.9; extra == "dev"
37
+ Requires-Dist: docker==7.1.0; extra == "dev"
38
+ Requires-Dist: pyjwt[crypto]==2.8.0; extra == "dev"
39
+
40
+ ## Introduction
41
+
42
+ Apache Gravitino is a high-performance, geo-distributed, and federated metadata lake.
43
+ It manages the metadata directly in different sources, types, and regions, also provides users
44
+ the unified metadata access for data and AI assets.
45
+
46
+ Gravitino Python client helps data scientists easily manage metadata using Python language.
47
+
48
+ ![gravitino-python-client-introduction](https://raw.githubusercontent.org/apache/gravitino/main/docs/assets/gravitino-python-client-introduction.png)
49
+
50
+ ## Use Guidance
51
+
52
+ You can use Gravitino Python client library with Spark, PyTorch, Tensorflow, Ray and Python environment.
53
+
54
+ First of all, You must have a Gravitino server set up and run, You can refer document of
55
+ [How to install Gravitino](https://datastrato.ai/docs/latest/how-to-install/) to build Gravitino server from source code and
56
+ install it in your local.
57
+
58
+ ### Apache Gravitino Python client API
59
+
60
+ ```shell
61
+ pip install gravitino
62
+ ```
63
+
64
+ 1. [Manage metalake using Gravitino Python API](https://datastrato.ai/docs/latest/manage-metalake-using-gravitino/?language=python)
65
+ 2. [Manage fileset metadata using Gravitino Python API](https://datastrato.ai/docs/latest/manage-fileset-metadata-using-gravitino/?language=python)
66
+
67
+ ### Apache Gravitino Fileset Example
68
+
69
+ We offer a playground environment to help you quickly understand how to use Gravitino Python
70
+ client to manage non-tabular data on HDFS via Fileset in Gravitino. You can refer to the
71
+ document [How to use the playground#Launch AI components of playground](https://datastrato.ai/docs/latest/how-to-use-the-playground/#launch-ai-components-of-playground)
72
+ to launch a Gravitino server, HDFS and Jupyter notebook environment in you local Docker environment.
73
+
74
+ Waiting for the playground Docker environment to start, you can directly open
75
+ `http://localhost:8888/lab/tree/gravitino-fileset-example.ipynb` in the browser and run the example.
76
+
77
+ The [gravitino-fileset-example](https://github.com/apache/gravitino-playground/blob/main/init/jupyter/gravitino-fileset-example.ipynb)
78
+ contains the following code snippets:
79
+
80
+ 1. Install HDFS Python client.
81
+ 2. Create a HDFS client to connect HDFS and to do some test operations.
82
+ 3. Install Gravitino Python client.
83
+ 4. Initialize Gravitino admin client and create a Gravitino metalake.
84
+ 5. Initialize Gravitino client and list metalakes.
85
+ 6. Create a Gravitino `Catalog` and special `type` is `Catalog.Type.FILESET` and `provider` is
86
+ [hadoop](https://datastrato.ai/docs/latest/hadoop-catalog/)
87
+ 7. Create a Gravitino `Schema` with the `location` pointed to a HDFS path, and use `hdfs client` to
88
+ check if the schema location is successfully created in HDFS.
89
+ 8. Create a `Fileset` with `type` is [Fileset.Type.MANAGED](https://datastrato.ai/docs/latest/manage-fileset-metadata-using-gravitino/#fileset-operations),
90
+ use `hdfs client` to check if the fileset location was successfully created in HDFS.
91
+ 9. Drop this `Fileset.Type.MANAGED` type fileset and check if the fileset location was
92
+ successfully deleted in HDFS.
93
+ 10. Create a `Fileset` with `type` is [Fileset.Type.EXTERNAL](https://datastrato.ai/docs/latest/manage-fileset-metadata-using-gravitino/#fileset-operations)
94
+ and `location` pointed to exist HDFS path
95
+ 11. Drop this `Fileset.Type.EXTERNAL` type fileset and check if the fileset location was
96
+ not deleted in HDFS.
97
+
98
+ ## How to development Apache Gravitino Python Client
99
+
100
+ You can ues any IDE to develop Gravitino Python Client. Directly open the client-python module project in the IDE.
101
+
102
+ ### Prerequisites
103
+
104
+ + Python 3.8+
105
+ + Refer to [How to build Gravitino](https://datastrato.ai/docs/latest/how-to-build/#prerequisites) to have necessary build
106
+ environment ready for building.
107
+
108
+ ### Build and testing
109
+
110
+ 1. Clone the Gravitino project.
111
+
112
+ ```shell
113
+ git clone git@github.com:apache/gravitino.git
114
+ ```
115
+
116
+ 2. Build the Gravitino Python client module
117
+
118
+ ```shell
119
+ ./gradlew :clients:client-python:build
120
+ ```
121
+
122
+ 3. Run unit tests
123
+
124
+ ```shell
125
+ ./gradlew :clients:client-python:test -PskipITs
126
+ ```
127
+
128
+ 4. Run integration tests
129
+
130
+ Because Python client connects to Gravitino Server to run integration tests,
131
+ So it runs `./gradlew compileDistribution -x test` command automatically to compile the
132
+ Gravitino project in the `distribution` directory. When you run integration tests via Gradle
133
+ command or IDE, Gravitino integration test framework (`integration_test_env.py`)
134
+ will start and stop Gravitino server automatically.
135
+
136
+ ```shell
137
+ ./gradlew :clients:client-python:test
138
+ ```
139
+
140
+ 5. Distribute the Gravitino Python client module
141
+
142
+ ```shell
143
+ ./gradlew :clients:client-python:distribution
144
+ ```
145
+
146
+ 6. Deploy the Gravitino Python client to https://pypi.org/project/gravitino/
147
+
148
+ ```shell
149
+ ./gradlew :clients:client-python:deploy
150
+ ```
151
+
152
+ ## Resources
153
+
154
+ + Official website https://gravitino.apache.org/ (coming soon)
155
+ + Project home on GitHub: https://github.com/apache/gravitino/
156
+ + Playground with Docker: https://github.com/apache/gravitino-playground
157
+ + User documentation: https://datastrato.ai/docs/
158
+ + Videos on Youtube: https://www.youtube.com/@Datastrato
159
+ + Slack Community: [https://the-asf.slack.com
@@ -0,0 +1,120 @@
1
+ ## Introduction
2
+
3
+ Apache Gravitino is a high-performance, geo-distributed, and federated metadata lake.
4
+ It manages the metadata directly in different sources, types, and regions, also provides users
5
+ the unified metadata access for data and AI assets.
6
+
7
+ Gravitino Python client helps data scientists easily manage metadata using Python language.
8
+
9
+ ![gravitino-python-client-introduction](https://raw.githubusercontent.org/apache/gravitino/main/docs/assets/gravitino-python-client-introduction.png)
10
+
11
+ ## Use Guidance
12
+
13
+ You can use Gravitino Python client library with Spark, PyTorch, Tensorflow, Ray and Python environment.
14
+
15
+ First of all, You must have a Gravitino server set up and run, You can refer document of
16
+ [How to install Gravitino](https://datastrato.ai/docs/latest/how-to-install/) to build Gravitino server from source code and
17
+ install it in your local.
18
+
19
+ ### Apache Gravitino Python client API
20
+
21
+ ```shell
22
+ pip install gravitino
23
+ ```
24
+
25
+ 1. [Manage metalake using Gravitino Python API](https://datastrato.ai/docs/latest/manage-metalake-using-gravitino/?language=python)
26
+ 2. [Manage fileset metadata using Gravitino Python API](https://datastrato.ai/docs/latest/manage-fileset-metadata-using-gravitino/?language=python)
27
+
28
+ ### Apache Gravitino Fileset Example
29
+
30
+ We offer a playground environment to help you quickly understand how to use Gravitino Python
31
+ client to manage non-tabular data on HDFS via Fileset in Gravitino. You can refer to the
32
+ document [How to use the playground#Launch AI components of playground](https://datastrato.ai/docs/latest/how-to-use-the-playground/#launch-ai-components-of-playground)
33
+ to launch a Gravitino server, HDFS and Jupyter notebook environment in you local Docker environment.
34
+
35
+ Waiting for the playground Docker environment to start, you can directly open
36
+ `http://localhost:8888/lab/tree/gravitino-fileset-example.ipynb` in the browser and run the example.
37
+
38
+ The [gravitino-fileset-example](https://github.com/apache/gravitino-playground/blob/main/init/jupyter/gravitino-fileset-example.ipynb)
39
+ contains the following code snippets:
40
+
41
+ 1. Install HDFS Python client.
42
+ 2. Create a HDFS client to connect HDFS and to do some test operations.
43
+ 3. Install Gravitino Python client.
44
+ 4. Initialize Gravitino admin client and create a Gravitino metalake.
45
+ 5. Initialize Gravitino client and list metalakes.
46
+ 6. Create a Gravitino `Catalog` and special `type` is `Catalog.Type.FILESET` and `provider` is
47
+ [hadoop](https://datastrato.ai/docs/latest/hadoop-catalog/)
48
+ 7. Create a Gravitino `Schema` with the `location` pointed to a HDFS path, and use `hdfs client` to
49
+ check if the schema location is successfully created in HDFS.
50
+ 8. Create a `Fileset` with `type` is [Fileset.Type.MANAGED](https://datastrato.ai/docs/latest/manage-fileset-metadata-using-gravitino/#fileset-operations),
51
+ use `hdfs client` to check if the fileset location was successfully created in HDFS.
52
+ 9. Drop this `Fileset.Type.MANAGED` type fileset and check if the fileset location was
53
+ successfully deleted in HDFS.
54
+ 10. Create a `Fileset` with `type` is [Fileset.Type.EXTERNAL](https://datastrato.ai/docs/latest/manage-fileset-metadata-using-gravitino/#fileset-operations)
55
+ and `location` pointed to exist HDFS path
56
+ 11. Drop this `Fileset.Type.EXTERNAL` type fileset and check if the fileset location was
57
+ not deleted in HDFS.
58
+
59
+ ## How to development Apache Gravitino Python Client
60
+
61
+ You can ues any IDE to develop Gravitino Python Client. Directly open the client-python module project in the IDE.
62
+
63
+ ### Prerequisites
64
+
65
+ + Python 3.8+
66
+ + Refer to [How to build Gravitino](https://datastrato.ai/docs/latest/how-to-build/#prerequisites) to have necessary build
67
+ environment ready for building.
68
+
69
+ ### Build and testing
70
+
71
+ 1. Clone the Gravitino project.
72
+
73
+ ```shell
74
+ git clone git@github.com:apache/gravitino.git
75
+ ```
76
+
77
+ 2. Build the Gravitino Python client module
78
+
79
+ ```shell
80
+ ./gradlew :clients:client-python:build
81
+ ```
82
+
83
+ 3. Run unit tests
84
+
85
+ ```shell
86
+ ./gradlew :clients:client-python:test -PskipITs
87
+ ```
88
+
89
+ 4. Run integration tests
90
+
91
+ Because Python client connects to Gravitino Server to run integration tests,
92
+ So it runs `./gradlew compileDistribution -x test` command automatically to compile the
93
+ Gravitino project in the `distribution` directory. When you run integration tests via Gradle
94
+ command or IDE, Gravitino integration test framework (`integration_test_env.py`)
95
+ will start and stop Gravitino server automatically.
96
+
97
+ ```shell
98
+ ./gradlew :clients:client-python:test
99
+ ```
100
+
101
+ 5. Distribute the Gravitino Python client module
102
+
103
+ ```shell
104
+ ./gradlew :clients:client-python:distribution
105
+ ```
106
+
107
+ 6. Deploy the Gravitino Python client to https://pypi.org/project/gravitino/
108
+
109
+ ```shell
110
+ ./gradlew :clients:client-python:deploy
111
+ ```
112
+
113
+ ## Resources
114
+
115
+ + Official website https://gravitino.apache.org/ (coming soon)
116
+ + Project home on GitHub: https://github.com/apache/gravitino/
117
+ + Playground with Docker: https://github.com/apache/gravitino-playground
118
+ + User documentation: https://datastrato.ai/docs/
119
+ + Videos on Youtube: https://www.youtube.com/@Datastrato
120
+ + Slack Community: [https://the-asf.slack.com
@@ -0,0 +1,159 @@
1
+ Metadata-Version: 2.1
2
+ Name: apache-gravitino
3
+ Version: 0.1.0.dev1
4
+ Summary: Python lib/client for Apache Gravitino
5
+ Home-page: https://github.com/apache/gravitino
6
+ Author: apache-gravitino
7
+ Author-email: dev@gravitino.apache.org
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3.8
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Requires-Python: >=3.8
17
+ Description-Content-Type: text/markdown
18
+ Requires-Dist: requests==2.32.2
19
+ Requires-Dist: dataclasses-json==0.6.6
20
+ Requires-Dist: readerwriterlock==1.0.9
21
+ Requires-Dist: fsspec==2024.3.1
22
+ Requires-Dist: pyarrow==15.0.2
23
+ Requires-Dist: cachetools==5.3.3
24
+ Provides-Extra: dev
25
+ Requires-Dist: requests==2.32.2; extra == "dev"
26
+ Requires-Dist: dataclasses-json==0.6.6; extra == "dev"
27
+ Requires-Dist: pylint==3.2.2; extra == "dev"
28
+ Requires-Dist: black==24.4.2; extra == "dev"
29
+ Requires-Dist: twine==5.1.1; extra == "dev"
30
+ Requires-Dist: coverage==7.5.1; extra == "dev"
31
+ Requires-Dist: pandas==2.0.3; extra == "dev"
32
+ Requires-Dist: pyarrow==15.0.2; extra == "dev"
33
+ Requires-Dist: llama-index==0.10.40; extra == "dev"
34
+ Requires-Dist: tenacity==8.3.0; extra == "dev"
35
+ Requires-Dist: cachetools==5.3.3; extra == "dev"
36
+ Requires-Dist: readerwriterlock==1.0.9; extra == "dev"
37
+ Requires-Dist: docker==7.1.0; extra == "dev"
38
+ Requires-Dist: pyjwt[crypto]==2.8.0; extra == "dev"
39
+
40
+ ## Introduction
41
+
42
+ Apache Gravitino is a high-performance, geo-distributed, and federated metadata lake.
43
+ It manages the metadata directly in different sources, types, and regions, also provides users
44
+ the unified metadata access for data and AI assets.
45
+
46
+ Gravitino Python client helps data scientists easily manage metadata using Python language.
47
+
48
+ ![gravitino-python-client-introduction](https://raw.githubusercontent.org/apache/gravitino/main/docs/assets/gravitino-python-client-introduction.png)
49
+
50
+ ## Use Guidance
51
+
52
+ You can use Gravitino Python client library with Spark, PyTorch, Tensorflow, Ray and Python environment.
53
+
54
+ First of all, You must have a Gravitino server set up and run, You can refer document of
55
+ [How to install Gravitino](https://datastrato.ai/docs/latest/how-to-install/) to build Gravitino server from source code and
56
+ install it in your local.
57
+
58
+ ### Apache Gravitino Python client API
59
+
60
+ ```shell
61
+ pip install gravitino
62
+ ```
63
+
64
+ 1. [Manage metalake using Gravitino Python API](https://datastrato.ai/docs/latest/manage-metalake-using-gravitino/?language=python)
65
+ 2. [Manage fileset metadata using Gravitino Python API](https://datastrato.ai/docs/latest/manage-fileset-metadata-using-gravitino/?language=python)
66
+
67
+ ### Apache Gravitino Fileset Example
68
+
69
+ We offer a playground environment to help you quickly understand how to use Gravitino Python
70
+ client to manage non-tabular data on HDFS via Fileset in Gravitino. You can refer to the
71
+ document [How to use the playground#Launch AI components of playground](https://datastrato.ai/docs/latest/how-to-use-the-playground/#launch-ai-components-of-playground)
72
+ to launch a Gravitino server, HDFS and Jupyter notebook environment in you local Docker environment.
73
+
74
+ Waiting for the playground Docker environment to start, you can directly open
75
+ `http://localhost:8888/lab/tree/gravitino-fileset-example.ipynb` in the browser and run the example.
76
+
77
+ The [gravitino-fileset-example](https://github.com/apache/gravitino-playground/blob/main/init/jupyter/gravitino-fileset-example.ipynb)
78
+ contains the following code snippets:
79
+
80
+ 1. Install HDFS Python client.
81
+ 2. Create a HDFS client to connect HDFS and to do some test operations.
82
+ 3. Install Gravitino Python client.
83
+ 4. Initialize Gravitino admin client and create a Gravitino metalake.
84
+ 5. Initialize Gravitino client and list metalakes.
85
+ 6. Create a Gravitino `Catalog` and special `type` is `Catalog.Type.FILESET` and `provider` is
86
+ [hadoop](https://datastrato.ai/docs/latest/hadoop-catalog/)
87
+ 7. Create a Gravitino `Schema` with the `location` pointed to a HDFS path, and use `hdfs client` to
88
+ check if the schema location is successfully created in HDFS.
89
+ 8. Create a `Fileset` with `type` is [Fileset.Type.MANAGED](https://datastrato.ai/docs/latest/manage-fileset-metadata-using-gravitino/#fileset-operations),
90
+ use `hdfs client` to check if the fileset location was successfully created in HDFS.
91
+ 9. Drop this `Fileset.Type.MANAGED` type fileset and check if the fileset location was
92
+ successfully deleted in HDFS.
93
+ 10. Create a `Fileset` with `type` is [Fileset.Type.EXTERNAL](https://datastrato.ai/docs/latest/manage-fileset-metadata-using-gravitino/#fileset-operations)
94
+ and `location` pointed to exist HDFS path
95
+ 11. Drop this `Fileset.Type.EXTERNAL` type fileset and check if the fileset location was
96
+ not deleted in HDFS.
97
+
98
+ ## How to development Apache Gravitino Python Client
99
+
100
+ You can ues any IDE to develop Gravitino Python Client. Directly open the client-python module project in the IDE.
101
+
102
+ ### Prerequisites
103
+
104
+ + Python 3.8+
105
+ + Refer to [How to build Gravitino](https://datastrato.ai/docs/latest/how-to-build/#prerequisites) to have necessary build
106
+ environment ready for building.
107
+
108
+ ### Build and testing
109
+
110
+ 1. Clone the Gravitino project.
111
+
112
+ ```shell
113
+ git clone git@github.com:apache/gravitino.git
114
+ ```
115
+
116
+ 2. Build the Gravitino Python client module
117
+
118
+ ```shell
119
+ ./gradlew :clients:client-python:build
120
+ ```
121
+
122
+ 3. Run unit tests
123
+
124
+ ```shell
125
+ ./gradlew :clients:client-python:test -PskipITs
126
+ ```
127
+
128
+ 4. Run integration tests
129
+
130
+ Because Python client connects to Gravitino Server to run integration tests,
131
+ So it runs `./gradlew compileDistribution -x test` command automatically to compile the
132
+ Gravitino project in the `distribution` directory. When you run integration tests via Gradle
133
+ command or IDE, Gravitino integration test framework (`integration_test_env.py`)
134
+ will start and stop Gravitino server automatically.
135
+
136
+ ```shell
137
+ ./gradlew :clients:client-python:test
138
+ ```
139
+
140
+ 5. Distribute the Gravitino Python client module
141
+
142
+ ```shell
143
+ ./gradlew :clients:client-python:distribution
144
+ ```
145
+
146
+ 6. Deploy the Gravitino Python client to https://pypi.org/project/gravitino/
147
+
148
+ ```shell
149
+ ./gradlew :clients:client-python:deploy
150
+ ```
151
+
152
+ ## Resources
153
+
154
+ + Official website https://gravitino.apache.org/ (coming soon)
155
+ + Project home on GitHub: https://github.com/apache/gravitino/
156
+ + Playground with Docker: https://github.com/apache/gravitino-playground
157
+ + User documentation: https://datastrato.ai/docs/
158
+ + Videos on Youtube: https://www.youtube.com/@Datastrato
159
+ + Slack Community: [https://the-asf.slack.com
@@ -0,0 +1,101 @@
1
+ MANIFEST.in
2
+ README.md
3
+ requirements-dev.txt
4
+ requirements.txt
5
+ setup.py
6
+ apache_gravitino.egg-info/PKG-INFO
7
+ apache_gravitino.egg-info/SOURCES.txt
8
+ apache_gravitino.egg-info/dependency_links.txt
9
+ apache_gravitino.egg-info/requires.txt
10
+ apache_gravitino.egg-info/top_level.txt
11
+ gravitino/__init__.py
12
+ gravitino/name_identifier.py
13
+ gravitino/namespace.py
14
+ gravitino/typing.py
15
+ gravitino/version.ini
16
+ gravitino/api/__init__.py
17
+ gravitino/api/audit.py
18
+ gravitino/api/auditable.py
19
+ gravitino/api/catalog.py
20
+ gravitino/api/catalog_change.py
21
+ gravitino/api/fileset.py
22
+ gravitino/api/fileset_change.py
23
+ gravitino/api/metalake.py
24
+ gravitino/api/metalake_change.py
25
+ gravitino/api/schema.py
26
+ gravitino/api/schema_change.py
27
+ gravitino/api/supports_schemas.py
28
+ gravitino/auth/__init__.py
29
+ gravitino/auth/auth_constants.py
30
+ gravitino/auth/auth_data_provider.py
31
+ gravitino/auth/default_oauth2_token_provider.py
32
+ gravitino/auth/oauth2_token_provider.py
33
+ gravitino/auth/simple_auth_provider.py
34
+ gravitino/catalog/__init__.py
35
+ gravitino/catalog/base_schema_catalog.py
36
+ gravitino/catalog/fileset_catalog.py
37
+ gravitino/client/__init__.py
38
+ gravitino/client/gravitino_admin_client.py
39
+ gravitino/client/gravitino_client.py
40
+ gravitino/client/gravitino_client_base.py
41
+ gravitino/client/gravitino_metalake.py
42
+ gravitino/client/gravitino_version.py
43
+ gravitino/constants/__init__.py
44
+ gravitino/constants/doc.py
45
+ gravitino/constants/error.py
46
+ gravitino/constants/root.py
47
+ gravitino/constants/timeout.py
48
+ gravitino/constants/version.py
49
+ gravitino/dto/__init__.py
50
+ gravitino/dto/audit_dto.py
51
+ gravitino/dto/catalog_dto.py
52
+ gravitino/dto/dto_converters.py
53
+ gravitino/dto/fileset_dto.py
54
+ gravitino/dto/metalake_dto.py
55
+ gravitino/dto/schema_dto.py
56
+ gravitino/dto/version_dto.py
57
+ gravitino/dto/requests/__init__.py
58
+ gravitino/dto/requests/catalog_create_request.py
59
+ gravitino/dto/requests/catalog_update_request.py
60
+ gravitino/dto/requests/catalog_updates_request.py
61
+ gravitino/dto/requests/fileset_create_request.py
62
+ gravitino/dto/requests/fileset_update_request.py
63
+ gravitino/dto/requests/fileset_updates_request.py
64
+ gravitino/dto/requests/metalake_create_request.py
65
+ gravitino/dto/requests/metalake_update_request.py
66
+ gravitino/dto/requests/metalake_updates_request.py
67
+ gravitino/dto/requests/oauth2_client_credential_request.py
68
+ gravitino/dto/requests/schema_create_request.py
69
+ gravitino/dto/requests/schema_update_request.py
70
+ gravitino/dto/requests/schema_updates_request.py
71
+ gravitino/dto/responses/__init__.py
72
+ gravitino/dto/responses/base_response.py
73
+ gravitino/dto/responses/catalog_list_response.py
74
+ gravitino/dto/responses/catalog_response.py
75
+ gravitino/dto/responses/drop_response.py
76
+ gravitino/dto/responses/entity_list_response.py
77
+ gravitino/dto/responses/error_response.py
78
+ gravitino/dto/responses/fileset_response.py
79
+ gravitino/dto/responses/metalake_list_response.py
80
+ gravitino/dto/responses/metalake_response.py
81
+ gravitino/dto/responses/oauth2_error_response.py
82
+ gravitino/dto/responses/oauth2_token_response.py
83
+ gravitino/dto/responses/schema_response.py
84
+ gravitino/dto/responses/version_response.py
85
+ gravitino/exceptions/__init__.py
86
+ gravitino/exceptions/base.py
87
+ gravitino/exceptions/handlers/__init__.py
88
+ gravitino/exceptions/handlers/error_handler.py
89
+ gravitino/exceptions/handlers/fileset_error_handler.py
90
+ gravitino/exceptions/handlers/metalake_error_handler.py
91
+ gravitino/exceptions/handlers/oauth_error_handler.py
92
+ gravitino/exceptions/handlers/rest_error_handler.py
93
+ gravitino/exceptions/handlers/schema_error_handler.py
94
+ gravitino/filesystem/__init__.py
95
+ gravitino/filesystem/gvfs.py
96
+ gravitino/filesystem/gvfs_config.py
97
+ gravitino/rest/__init__.py
98
+ gravitino/rest/rest_message.py
99
+ gravitino/rest/rest_utils.py
100
+ gravitino/utils/__init__.py
101
+ gravitino/utils/http_client.py
@@ -0,0 +1,22 @@
1
+ requests==2.32.2
2
+ dataclasses-json==0.6.6
3
+ readerwriterlock==1.0.9
4
+ fsspec==2024.3.1
5
+ pyarrow==15.0.2
6
+ cachetools==5.3.3
7
+
8
+ [dev]
9
+ requests==2.32.2
10
+ dataclasses-json==0.6.6
11
+ pylint==3.2.2
12
+ black==24.4.2
13
+ twine==5.1.1
14
+ coverage==7.5.1
15
+ pandas==2.0.3
16
+ pyarrow==15.0.2
17
+ llama-index==0.10.40
18
+ tenacity==8.3.0
19
+ cachetools==5.3.3
20
+ readerwriterlock==1.0.9
21
+ docker==7.1.0
22
+ pyjwt[crypto]==2.8.0
@@ -0,0 +1,30 @@
1
+ """
2
+ Licensed to the Apache Software Foundation (ASF) under one
3
+ or more contributor license agreements. See the NOTICE file
4
+ distributed with this work for additional information
5
+ regarding copyright ownership. The ASF licenses this file
6
+ to you under the Apache License, Version 2.0 (the
7
+ "License"); you may not use this file except in compliance
8
+ with the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing,
13
+ software distributed under the License is distributed on an
14
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ KIND, either express or implied. See the License for the
16
+ specific language governing permissions and limitations
17
+ under the License.
18
+ """
19
+
20
+ from gravitino.api.catalog import Catalog
21
+ from gravitino.api.schema import Schema
22
+ from gravitino.api.fileset import Fileset
23
+ from gravitino.api.fileset_change import FilesetChange
24
+ from gravitino.api.metalake_change import MetalakeChange
25
+ from gravitino.api.schema_change import SchemaChange
26
+ from gravitino.client.gravitino_client import GravitinoClient
27
+ from gravitino.client.gravitino_admin_client import GravitinoAdminClient
28
+ from gravitino.client.gravitino_metalake import GravitinoMetalake
29
+ from gravitino.name_identifier import NameIdentifier
30
+ from gravitino.filesystem import gvfs
@@ -0,0 +1,18 @@
1
+ """
2
+ Licensed to the Apache Software Foundation (ASF) under one
3
+ or more contributor license agreements. See the NOTICE file
4
+ distributed with this work for additional information
5
+ regarding copyright ownership. The ASF licenses this file
6
+ to you under the Apache License, Version 2.0 (the
7
+ "License"); you may not use this file except in compliance
8
+ with the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing,
13
+ software distributed under the License is distributed on an
14
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ KIND, either express or implied. See the License for the
16
+ specific language governing permissions and limitations
17
+ under the License.
18
+ """