ingestr 0.10.1__py3-none-any.whl → 0.10.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/main.py CHANGED
@@ -288,6 +288,8 @@ def ingest(
288
288
  ),
289
289
  ] = [], # type: ignore
290
290
  ):
291
+
292
+ # TODO(turtledev): can't we move this to the top of this file?
291
293
  import hashlib
292
294
  import tempfile
293
295
  from datetime import datetime
@@ -383,6 +385,15 @@ def ingest(
383
385
  )
384
386
 
385
387
  factory = SourceDestinationFactory(source_uri, dest_uri)
388
+ track(
389
+ "command_running",
390
+ {
391
+ "command": "ingest",
392
+ "source_type": factory.source_scheme,
393
+ "destination_type": factory.destination_scheme,
394
+ },
395
+ )
396
+
386
397
  source = factory.get_source()
387
398
  destination = factory.get_destination()
388
399
 
@@ -5,9 +5,10 @@ import json
5
5
  import os
6
6
  import shutil
7
7
  import tempfile
8
- from urllib.parse import parse_qs, urlparse
8
+ from urllib.parse import parse_qs, quote, urlparse
9
9
 
10
10
  import dlt
11
+ from dlt.common.configuration.specs import AwsCredentials
11
12
 
12
13
 
13
14
  class GenericSqlDestination:
@@ -194,3 +195,62 @@ class CsvDestination(GenericSqlDestination):
194
195
  csv_writer.writerow(json_obj)
195
196
 
196
197
  shutil.rmtree(self.temp_path)
198
+
199
+
200
+ class AthenaDestination:
201
+ def dlt_dest(self, uri: str, **kwargs):
202
+ encoded_uri = quote(uri, safe=":/?&=")
203
+ source_fields = urlparse(encoded_uri)
204
+ source_params = parse_qs(source_fields.query)
205
+
206
+ bucket = source_params.get("bucket", [None])[0]
207
+ if not bucket:
208
+ raise ValueError("A bucket is required to connect to Athena.")
209
+
210
+ if not bucket.startswith("s3://"):
211
+ bucket = f"s3://{bucket}"
212
+
213
+ query_result_path = source_params.get("query_results_path", [None])[0]
214
+ if query_result_path:
215
+ if not query_result_path.startswith("s3://"):
216
+ query_result_path = f"s3://{query_result_path}"
217
+ else:
218
+ query_result_path = bucket
219
+
220
+ access_key_id = source_params.get("access_key_id", [None])[0]
221
+ if not access_key_id:
222
+ raise ValueError("The AWS access_key_id is required to connect to Athena.")
223
+
224
+ secret_access_key = source_params.get("secret_access_key", [None])[0]
225
+ if not secret_access_key:
226
+ raise ValueError("The AWS secret_access_key is required to connect Athena")
227
+
228
+ work_group = source_params.get("workgroup", [None])[0]
229
+
230
+ region_name = source_params.get("region_name", [None])[0]
231
+ if not region_name:
232
+ raise ValueError("The region_name is required to connect to Athena.")
233
+
234
+ os.environ["DESTINATION__BUCKET_URL"] = bucket
235
+ os.environ["DESTINATION__CREDENTIALS__AWS_ACCESS_KEY_ID"] = access_key_id
236
+ os.environ["DESTINATION__CREDENTIALS__AWS_SECRET_ACCESS_KEY"] = (
237
+ secret_access_key
238
+ )
239
+
240
+ credentials = AwsCredentials(
241
+ aws_access_key_id=access_key_id,
242
+ aws_secret_access_key=secret_access_key,
243
+ region_name=region_name,
244
+ )
245
+ return dlt.destinations.athena(
246
+ query_result_bucket=query_result_path,
247
+ athena_work_group=work_group,
248
+ credentials=credentials,
249
+ destination_name=bucket,
250
+ )
251
+
252
+ def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
253
+ return {}
254
+
255
+ def post_load(self):
256
+ pass
ingestr/src/factory.py CHANGED
@@ -4,6 +4,7 @@ from urllib.parse import urlparse
4
4
  from dlt.common.destination import Destination
5
5
 
6
6
  from ingestr.src.destinations import (
7
+ AthenaDestination,
7
8
  BigQueryDestination,
8
9
  CsvDestination,
9
10
  DatabricksDestination,
@@ -159,6 +160,7 @@ class SourceDestinationFactory:
159
160
  "snowflake": SnowflakeDestination(),
160
161
  "synapse": SynapseDestination(),
161
162
  "csv": CsvDestination(),
163
+ "athena": AthenaDestination(),
162
164
  }
163
165
 
164
166
  if self.destination_scheme in match:
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.10.1"
1
+ __version__ = "0.10.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.10.1
3
+ Version: 0.10.3
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -26,13 +26,15 @@ Requires-Dist: pendulum==3.0.0
26
26
  Requires-Dist: psycopg2-binary==2.9.10
27
27
  Requires-Dist: py-machineid==0.6.0
28
28
  Requires-Dist: pyairtable==2.3.3
29
+ Requires-Dist: pyarrow==18.1.0
30
+ Requires-Dist: pyathena==3.9.0
29
31
  Requires-Dist: pymongo==4.10.1
30
32
  Requires-Dist: pymysql==1.1.1
31
33
  Requires-Dist: pyrate-limiter==3.7.0
32
34
  Requires-Dist: redshift-connector==2.1.3
33
35
  Requires-Dist: rich==13.9.4
34
36
  Requires-Dist: rudder-sdk-python==2.1.4
35
- Requires-Dist: s3fs==2024.9.0
37
+ Requires-Dist: s3fs==2024.10.0
36
38
  Requires-Dist: snowflake-sqlalchemy==1.6.1
37
39
  Requires-Dist: sqlalchemy-bigquery==1.12.0
38
40
  Requires-Dist: sqlalchemy-hana==2.0.0
@@ -72,11 +74,20 @@ ingestr is a command-line app that allows you to ingest data from any source int
72
74
  ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the data land on its destination.
73
75
 
74
76
  ## Installation
77
+ We recommend using [uv](https://github.com/astral-sh/uv) to run `ingestr`.
75
78
 
76
79
  ```
77
- pip install ingestr
80
+ pip install uv
81
+ uvx ingestr
78
82
  ```
79
83
 
84
+ Alternatively, if you'd like to install it globally:
85
+ ```
86
+ uv pip install --system ingestr
87
+ ```
88
+
89
+ While installation with vanilla `pip` is possible, it's an order of magnitude slower.
90
+
80
91
  ## Quickstart
81
92
 
82
93
  ```bash
@@ -102,6 +113,13 @@ You can see the full documentation [here](https://bruin-data.github.io/ingestr/g
102
113
 
103
114
  Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
104
115
 
116
+ ## Contributing
117
+
118
+ Pull requests are welcome. However, please open an issue first to discuss what you would like to change. We maybe able to offer you help and feedback regarding any changes you would like to make.
119
+
120
+ > [!NOTE]
121
+ > After cloning `ingestr` make sure to run `make setup` to install githooks.
122
+
105
123
  ## Supported sources & destinations
106
124
 
107
125
  <table>
@@ -1,11 +1,11 @@
1
- ingestr/main.py,sha256=Uq0GTfCtlYu94Iw41AWhexZjgNdaEi7SqxkRAl0iTCg,22001
1
+ ingestr/main.py,sha256=KR43c1BeIZUOjV8XJUvYEyFHi1nRaTcKdrmwAn79hQk,22311
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
- ingestr/src/destinations.py,sha256=2SfPMjtTelPmzQmc3zNs8xGcKIPuGn_hoZFIBUuhjXI,6338
4
- ingestr/src/factory.py,sha256=ft81B-YJgvEROkHAZjMjTIS7IYvle-uZQv45b7-Wfk0,4947
3
+ ingestr/src/destinations.py,sha256=wT76Pi3JBbzfKj2goy4-L_XDPfjyPK6b95zyRxksr9g,8555
4
+ ingestr/src/factory.py,sha256=nYWgWQINQEQKPeELwGY7MCeiOSoCP6JDPozfKKyGNXk,5013
5
5
  ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
6
6
  ingestr/src/sources.py,sha256=KhY6AH91zZoSthi7AbFd4_OsrPmxP3Q4ratA7ZscsZU,34810
7
7
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
8
- ingestr/src/version.py,sha256=v7Gyp89umFzDtY45tTjCdXqZnQ2RN01AibdYNxEvxYo,23
8
+ ingestr/src/version.py,sha256=0C8KcY1dzs3hdkAre06v0NCQ0Uxcqv6g9a93bRcVLW0,23
9
9
  ingestr/src/adjust/__init__.py,sha256=I_G90D260OPIWCS716k0U4aeztlAieW9zi0R9-oW7TA,3007
10
10
  ingestr/src/adjust/adjust_helpers.py,sha256=-tmmxy9k3wms-ZEIgxmlp2cAQ2X_O1lgjY1128bbMu4,3224
11
11
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
@@ -70,8 +70,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
70
70
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
71
71
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
72
72
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
73
- ingestr-0.10.1.dist-info/METADATA,sha256=FcFG3P5z-yvC3TiZSvf07DLaLVoJsGL3cEDqLqHFCeY,7060
74
- ingestr-0.10.1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
75
- ingestr-0.10.1.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
76
- ingestr-0.10.1.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
77
- ingestr-0.10.1.dist-info/RECORD,,
73
+ ingestr-0.10.3.dist-info/METADATA,sha256=bQgQaW4dtrtPYMTyJRyR6OG-6rLobsyxCfIf04quiIA,7688
74
+ ingestr-0.10.3.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
75
+ ingestr-0.10.3.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
76
+ ingestr-0.10.3.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
77
+ ingestr-0.10.3.dist-info/RECORD,,