ingestr 0.10.1__py3-none-any.whl → 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/destinations.py +61 -1
- ingestr/src/factory.py +2 -0
- ingestr/src/version.py +1 -1
- {ingestr-0.10.1.dist-info → ingestr-0.10.2.dist-info}/METADATA +4 -2
- {ingestr-0.10.1.dist-info → ingestr-0.10.2.dist-info}/RECORD +8 -8
- {ingestr-0.10.1.dist-info → ingestr-0.10.2.dist-info}/WHEEL +0 -0
- {ingestr-0.10.1.dist-info → ingestr-0.10.2.dist-info}/entry_points.txt +0 -0
- {ingestr-0.10.1.dist-info → ingestr-0.10.2.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/destinations.py
CHANGED
|
@@ -5,9 +5,10 @@ import json
|
|
|
5
5
|
import os
|
|
6
6
|
import shutil
|
|
7
7
|
import tempfile
|
|
8
|
-
from urllib.parse import parse_qs, urlparse
|
|
8
|
+
from urllib.parse import parse_qs, quote, urlparse
|
|
9
9
|
|
|
10
10
|
import dlt
|
|
11
|
+
from dlt.common.configuration.specs import AwsCredentials
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
class GenericSqlDestination:
|
|
@@ -194,3 +195,62 @@ class CsvDestination(GenericSqlDestination):
|
|
|
194
195
|
csv_writer.writerow(json_obj)
|
|
195
196
|
|
|
196
197
|
shutil.rmtree(self.temp_path)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class AthenaDestination:
|
|
201
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
202
|
+
encoded_uri = quote(uri, safe=":/?&=")
|
|
203
|
+
source_fields = urlparse(encoded_uri)
|
|
204
|
+
source_params = parse_qs(source_fields.query)
|
|
205
|
+
|
|
206
|
+
bucket = source_params.get("bucket", [None])[0]
|
|
207
|
+
if not bucket:
|
|
208
|
+
raise ValueError("A bucket is required to connect to Athena.")
|
|
209
|
+
|
|
210
|
+
if not bucket.startswith("s3://"):
|
|
211
|
+
bucket = f"s3://{bucket}"
|
|
212
|
+
|
|
213
|
+
query_result_path = source_params.get("query_results_path", [None])[0]
|
|
214
|
+
if query_result_path:
|
|
215
|
+
if not query_result_path.startswith("s3://"):
|
|
216
|
+
query_result_path = f"s3://{query_result_path}"
|
|
217
|
+
else:
|
|
218
|
+
query_result_path = bucket
|
|
219
|
+
|
|
220
|
+
access_key_id = source_params.get("access_key_id", [None])[0]
|
|
221
|
+
if not access_key_id:
|
|
222
|
+
raise ValueError("The AWS access_key_id is required to connect to Athena.")
|
|
223
|
+
|
|
224
|
+
secret_access_key = source_params.get("secret_access_key", [None])[0]
|
|
225
|
+
if not secret_access_key:
|
|
226
|
+
raise ValueError("The AWS secret_access_key is required to connect Athena")
|
|
227
|
+
|
|
228
|
+
work_group = source_params.get("workgroup", [None])[0]
|
|
229
|
+
|
|
230
|
+
region_name = source_params.get("region_name", [None])[0]
|
|
231
|
+
if not region_name:
|
|
232
|
+
raise ValueError("The region_name is required to connect to Athena.")
|
|
233
|
+
|
|
234
|
+
os.environ["DESTINATION__BUCKET_URL"] = bucket
|
|
235
|
+
os.environ["DESTINATION__CREDENTIALS__AWS_ACCESS_KEY_ID"] = access_key_id
|
|
236
|
+
os.environ["DESTINATION__CREDENTIALS__AWS_SECRET_ACCESS_KEY"] = (
|
|
237
|
+
secret_access_key
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
credentials = AwsCredentials(
|
|
241
|
+
aws_access_key_id=access_key_id,
|
|
242
|
+
aws_secret_access_key=secret_access_key,
|
|
243
|
+
region_name=region_name,
|
|
244
|
+
)
|
|
245
|
+
return dlt.destinations.athena(
|
|
246
|
+
query_result_bucket=query_result_path,
|
|
247
|
+
athena_work_group=work_group,
|
|
248
|
+
credentials=credentials,
|
|
249
|
+
destination_name=bucket,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
|
|
253
|
+
return {}
|
|
254
|
+
|
|
255
|
+
def post_load(self):
|
|
256
|
+
pass
|
ingestr/src/factory.py
CHANGED
|
@@ -4,6 +4,7 @@ from urllib.parse import urlparse
|
|
|
4
4
|
from dlt.common.destination import Destination
|
|
5
5
|
|
|
6
6
|
from ingestr.src.destinations import (
|
|
7
|
+
AthenaDestination,
|
|
7
8
|
BigQueryDestination,
|
|
8
9
|
CsvDestination,
|
|
9
10
|
DatabricksDestination,
|
|
@@ -159,6 +160,7 @@ class SourceDestinationFactory:
|
|
|
159
160
|
"snowflake": SnowflakeDestination(),
|
|
160
161
|
"synapse": SynapseDestination(),
|
|
161
162
|
"csv": CsvDestination(),
|
|
163
|
+
"athena": AthenaDestination(),
|
|
162
164
|
}
|
|
163
165
|
|
|
164
166
|
if self.destination_scheme in match:
|
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.10.
|
|
1
|
+
__version__ = "0.10.2"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.2
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -26,13 +26,15 @@ Requires-Dist: pendulum==3.0.0
|
|
|
26
26
|
Requires-Dist: psycopg2-binary==2.9.10
|
|
27
27
|
Requires-Dist: py-machineid==0.6.0
|
|
28
28
|
Requires-Dist: pyairtable==2.3.3
|
|
29
|
+
Requires-Dist: pyarrow==18.1.0
|
|
30
|
+
Requires-Dist: pyathena==3.9.0
|
|
29
31
|
Requires-Dist: pymongo==4.10.1
|
|
30
32
|
Requires-Dist: pymysql==1.1.1
|
|
31
33
|
Requires-Dist: pyrate-limiter==3.7.0
|
|
32
34
|
Requires-Dist: redshift-connector==2.1.3
|
|
33
35
|
Requires-Dist: rich==13.9.4
|
|
34
36
|
Requires-Dist: rudder-sdk-python==2.1.4
|
|
35
|
-
Requires-Dist: s3fs==2024.
|
|
37
|
+
Requires-Dist: s3fs==2024.10.0
|
|
36
38
|
Requires-Dist: snowflake-sqlalchemy==1.6.1
|
|
37
39
|
Requires-Dist: sqlalchemy-bigquery==1.12.0
|
|
38
40
|
Requires-Dist: sqlalchemy-hana==2.0.0
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
ingestr/main.py,sha256=Uq0GTfCtlYu94Iw41AWhexZjgNdaEi7SqxkRAl0iTCg,22001
|
|
2
2
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
|
-
ingestr/src/destinations.py,sha256=
|
|
4
|
-
ingestr/src/factory.py,sha256=
|
|
3
|
+
ingestr/src/destinations.py,sha256=wT76Pi3JBbzfKj2goy4-L_XDPfjyPK6b95zyRxksr9g,8555
|
|
4
|
+
ingestr/src/factory.py,sha256=nYWgWQINQEQKPeELwGY7MCeiOSoCP6JDPozfKKyGNXk,5013
|
|
5
5
|
ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
|
|
6
6
|
ingestr/src/sources.py,sha256=KhY6AH91zZoSthi7AbFd4_OsrPmxP3Q4ratA7ZscsZU,34810
|
|
7
7
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
8
|
-
ingestr/src/version.py,sha256=
|
|
8
|
+
ingestr/src/version.py,sha256=A_AARqtxTOj_AQTpjpgOxNx-UOBio5wYFfZ2mrdMKfs,23
|
|
9
9
|
ingestr/src/adjust/__init__.py,sha256=I_G90D260OPIWCS716k0U4aeztlAieW9zi0R9-oW7TA,3007
|
|
10
10
|
ingestr/src/adjust/adjust_helpers.py,sha256=-tmmxy9k3wms-ZEIgxmlp2cAQ2X_O1lgjY1128bbMu4,3224
|
|
11
11
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
@@ -70,8 +70,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
70
70
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
71
71
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
72
72
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
73
|
-
ingestr-0.10.
|
|
74
|
-
ingestr-0.10.
|
|
75
|
-
ingestr-0.10.
|
|
76
|
-
ingestr-0.10.
|
|
77
|
-
ingestr-0.10.
|
|
73
|
+
ingestr-0.10.2.dist-info/METADATA,sha256=3_ilZkg36lUCtkfUBRlI2LnVj4Vl5OKq_R8NhvKwWk4,7123
|
|
74
|
+
ingestr-0.10.2.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
|
75
|
+
ingestr-0.10.2.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
76
|
+
ingestr-0.10.2.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
77
|
+
ingestr-0.10.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|