ingestr 0.12.8__py3-none-any.whl → 0.12.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/appstore/__init__.py +4 -0
- ingestr/src/factory.py +2 -0
- ingestr/src/filesystem/__init__.py +0 -2
- ingestr/src/sources.py +75 -9
- ingestr/src/version.py +1 -1
- {ingestr-0.12.8.dist-info → ingestr-0.12.9.dist-info}/METADATA +2 -1
- {ingestr-0.12.8.dist-info → ingestr-0.12.9.dist-info}/RECORD +10 -10
- {ingestr-0.12.8.dist-info → ingestr-0.12.9.dist-info}/WHEEL +0 -0
- {ingestr-0.12.8.dist-info → ingestr-0.12.9.dist-info}/entry_points.txt +0 -0
- {ingestr-0.12.8.dist-info → ingestr-0.12.9.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/appstore/__init__.py
CHANGED
|
@@ -28,6 +28,10 @@ def app_store(
|
|
|
28
28
|
start_date: Optional[datetime] = None,
|
|
29
29
|
end_date: Optional[datetime] = None,
|
|
30
30
|
) -> Iterable[DltResource]:
|
|
31
|
+
if start_date and start_date.tzinfo is not None:
|
|
32
|
+
start_date = start_date.replace(tzinfo=None)
|
|
33
|
+
if end_date and end_date.tzinfo is not None:
|
|
34
|
+
end_date = end_date.replace(tzinfo=None)
|
|
31
35
|
for resource in RESOURCES:
|
|
32
36
|
yield dlt.resource(
|
|
33
37
|
get_analytics_reports,
|
ingestr/src/factory.py
CHANGED
|
@@ -25,6 +25,7 @@ from ingestr.src.sources import (
|
|
|
25
25
|
ChessSource,
|
|
26
26
|
DynamoDBSource,
|
|
27
27
|
FacebookAdsSource,
|
|
28
|
+
GCSSource,
|
|
28
29
|
GitHubSource,
|
|
29
30
|
GoogleAnalyticsSource,
|
|
30
31
|
GoogleSheetsSource,
|
|
@@ -124,6 +125,7 @@ class SourceDestinationFactory:
|
|
|
124
125
|
"tiktok": TikTokSource,
|
|
125
126
|
"googleanalytics": GoogleAnalyticsSource,
|
|
126
127
|
"appstore": AppleAppStoreSource,
|
|
128
|
+
"gs": GCSSource,
|
|
127
129
|
}
|
|
128
130
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
129
131
|
"bigquery": BigQueryDestination,
|
|
@@ -39,8 +39,6 @@ def readers(
|
|
|
39
39
|
filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
|
|
40
40
|
filesystem_resource.apply_hints(
|
|
41
41
|
incremental=dlt.sources.incremental("modification_date"),
|
|
42
|
-
range_end="closed",
|
|
43
|
-
range_start="closed",
|
|
44
42
|
)
|
|
45
43
|
return (
|
|
46
44
|
filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
|
ingestr/src/sources.py
CHANGED
|
@@ -17,6 +17,8 @@ from typing import (
|
|
|
17
17
|
from urllib.parse import ParseResult, parse_qs, quote, urlparse
|
|
18
18
|
|
|
19
19
|
import dlt
|
|
20
|
+
import gcsfs # type: ignore
|
|
21
|
+
import s3fs # type: ignore
|
|
20
22
|
import pendulum
|
|
21
23
|
from dlt.common.configuration.specs import (
|
|
22
24
|
AwsCredentials,
|
|
@@ -1091,19 +1093,17 @@ class S3Source:
|
|
|
1091
1093
|
bucket_name = parsed_uri.hostname
|
|
1092
1094
|
if not bucket_name:
|
|
1093
1095
|
raise ValueError(
|
|
1094
|
-
"Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name
|
|
1096
|
+
"Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name"
|
|
1095
1097
|
)
|
|
1096
1098
|
bucket_url = f"s3://{bucket_name}"
|
|
1097
1099
|
|
|
1098
|
-
path_to_file = parsed_uri.path.lstrip("/")
|
|
1100
|
+
path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
|
|
1099
1101
|
if not path_to_file:
|
|
1100
|
-
raise ValueError(
|
|
1101
|
-
"Invalid S3 URI: The file path is missing. Ensure your S3 URI follows the format 's3://bucket-name/path/to/file"
|
|
1102
|
-
)
|
|
1102
|
+
raise ValueError("--source-table must be specified")
|
|
1103
1103
|
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1104
|
+
fs = s3fs.S3FileSystem(
|
|
1105
|
+
key=access_key_id[0],
|
|
1106
|
+
secret=secret_access_key[0],
|
|
1107
1107
|
)
|
|
1108
1108
|
|
|
1109
1109
|
file_extension = path_to_file.split(".")[-1]
|
|
@@ -1119,7 +1119,7 @@ class S3Source:
|
|
|
1119
1119
|
)
|
|
1120
1120
|
|
|
1121
1121
|
return readers(
|
|
1122
|
-
bucket_url
|
|
1122
|
+
bucket_url, fs, path_to_file
|
|
1123
1123
|
).with_resources(endpoint)
|
|
1124
1124
|
|
|
1125
1125
|
|
|
@@ -1503,3 +1503,69 @@ class AppleAppStoreSource:
|
|
|
1503
1503
|
raise UnsupportedResourceError(table, "AppStore")
|
|
1504
1504
|
|
|
1505
1505
|
return src.with_resources(table)
|
|
1506
|
+
|
|
1507
|
+
|
|
1508
|
+
class GCSSource:
|
|
1509
|
+
def handles_incrementality(self) -> bool:
|
|
1510
|
+
return True
|
|
1511
|
+
|
|
1512
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1513
|
+
if kwargs.get("incremental_key"):
|
|
1514
|
+
raise ValueError(
|
|
1515
|
+
"GCS takes care of incrementality on its own, you should not provide incremental_key"
|
|
1516
|
+
)
|
|
1517
|
+
|
|
1518
|
+
parsed_uri = urlparse(uri)
|
|
1519
|
+
params = parse_qs(parsed_uri.query)
|
|
1520
|
+
credentials_path = params.get("credentials_path")
|
|
1521
|
+
credentials_base64 = params.get("credentials_base64")
|
|
1522
|
+
credentials_available = any(
|
|
1523
|
+
map(
|
|
1524
|
+
lambda x: x is not None,
|
|
1525
|
+
[credentials_path, credentials_base64],
|
|
1526
|
+
)
|
|
1527
|
+
)
|
|
1528
|
+
if credentials_available is False:
|
|
1529
|
+
raise MissingValueError("credentials_path or credentials_base64", "GCS")
|
|
1530
|
+
|
|
1531
|
+
bucket_name = parsed_uri.hostname
|
|
1532
|
+
if not bucket_name:
|
|
1533
|
+
raise ValueError(
|
|
1534
|
+
"Invalid GCS URI: The bucket name is missing. Ensure your GCS URI follows the format 'gs://bucket-name/path/to/file"
|
|
1535
|
+
)
|
|
1536
|
+
bucket_url = f"gs://{bucket_name}/"
|
|
1537
|
+
|
|
1538
|
+
path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
|
|
1539
|
+
if not path_to_file:
|
|
1540
|
+
raise ValueError("--source-table must be specified")
|
|
1541
|
+
|
|
1542
|
+
credentials = None
|
|
1543
|
+
if credentials_path:
|
|
1544
|
+
credentials = credentials_path[0]
|
|
1545
|
+
else:
|
|
1546
|
+
credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
|
|
1547
|
+
|
|
1548
|
+
# There's a compatiblity issue between google-auth, dlt and gcsfs
|
|
1549
|
+
# that makes it difficult to use google.oauth2.service_account.Credentials
|
|
1550
|
+
# (The RECOMMENDED way of passing service account credentials)
|
|
1551
|
+
# directly with gcsfs. As a workaround, we construct the GCSFileSystem
|
|
1552
|
+
# and pass it directly to filesystem.readers.
|
|
1553
|
+
fs = gcsfs.GCSFileSystem(
|
|
1554
|
+
token=credentials,
|
|
1555
|
+
)
|
|
1556
|
+
|
|
1557
|
+
file_extension = path_to_file.split(".")[-1]
|
|
1558
|
+
if file_extension == "csv":
|
|
1559
|
+
endpoint = "read_csv"
|
|
1560
|
+
elif file_extension == "jsonl":
|
|
1561
|
+
endpoint = "read_jsonl"
|
|
1562
|
+
elif file_extension == "parquet":
|
|
1563
|
+
endpoint = "read_parquet"
|
|
1564
|
+
else:
|
|
1565
|
+
raise ValueError(
|
|
1566
|
+
"GCS Source only supports specific formats files: csv, jsonl, parquet"
|
|
1567
|
+
)
|
|
1568
|
+
|
|
1569
|
+
return readers(
|
|
1570
|
+
bucket_url, fs, path_to_file
|
|
1571
|
+
).with_resources(endpoint)
|
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.12.
|
|
1
|
+
__version__ = "0.12.9"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.9
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -22,6 +22,7 @@ Requires-Dist: dlt==1.5.0
|
|
|
22
22
|
Requires-Dist: duckdb-engine==0.13.5
|
|
23
23
|
Requires-Dist: duckdb==1.1.3
|
|
24
24
|
Requires-Dist: facebook-business==20.0.0
|
|
25
|
+
Requires-Dist: gcsfs==2024.10.0
|
|
25
26
|
Requires-Dist: google-analytics-data==0.18.16
|
|
26
27
|
Requires-Dist: google-api-python-client==2.130.0
|
|
27
28
|
Requires-Dist: google-cloud-bigquery-storage==2.24.0
|
|
@@ -2,18 +2,18 @@ ingestr/main.py,sha256=fRWnyoPzMvvxTa61EIAP_dsKu0B_0yOwoyt0Slq9WQU,24723
|
|
|
2
2
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
3
|
ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
|
|
4
4
|
ingestr/src/errors.py,sha256=MrdLY5Gpr3g3qbYjl-U8-m8kxBJQOJo4ZVOsQpQbRR8,447
|
|
5
|
-
ingestr/src/factory.py,sha256=
|
|
5
|
+
ingestr/src/factory.py,sha256=oNF9dovovLG34xLgRZ5fbyA_XSHxEuTW27s1cb35KDM,4622
|
|
6
6
|
ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
|
|
7
|
-
ingestr/src/sources.py,sha256=
|
|
7
|
+
ingestr/src/sources.py,sha256=JoO-IQ_eB4Ia1fC1GWs6N74l9A3tXQT-Fj0uNBiSI_Y,55978
|
|
8
8
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
9
9
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
10
|
-
ingestr/src/version.py,sha256=
|
|
10
|
+
ingestr/src/version.py,sha256=FSGqM7DffUSCa5R2rqVlNo-yNzBd6cgAXS1_0tElLy0,23
|
|
11
11
|
ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
|
|
12
12
|
ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
|
|
13
13
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
14
14
|
ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
|
|
15
15
|
ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
|
|
16
|
-
ingestr/src/appstore/__init__.py,sha256=
|
|
16
|
+
ingestr/src/appstore/__init__.py,sha256=3P4VZH2WJF477QjW19jMTwu6L8DXcLkYSdutnvp3AmM,4742
|
|
17
17
|
ingestr/src/appstore/client.py,sha256=qY9nBZPNIAveR-Dn-pW141Mr9xi9LMOz2HHfnfueHvE,3975
|
|
18
18
|
ingestr/src/appstore/errors.py,sha256=KVpPWth5qlv6_QWEm3aJAt3cdf6miPJs0UDzxknx2Ms,481
|
|
19
19
|
ingestr/src/appstore/models.py,sha256=tW1JSATHBIxZ6a77-RTCBQptJk6iRC8fWcmx4NW7SVA,1716
|
|
@@ -30,7 +30,7 @@ ingestr/src/facebook_ads/__init__.py,sha256=reEpSr4BaKA1wO3qVgCH51gW-TgWkbJ_g24U
|
|
|
30
30
|
ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
|
|
31
31
|
ingestr/src/facebook_ads/helpers.py,sha256=ZLbNHiKer5lPb4g3_435XeBJr57Wv0o1KTyBA1mQ100,9068
|
|
32
32
|
ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_tl6UTFk,4881
|
|
33
|
-
ingestr/src/filesystem/__init__.py,sha256=
|
|
33
|
+
ingestr/src/filesystem/__init__.py,sha256=zkIwbRr0ir0EUdniI25p2zGiVc-7M9EmR351AjNb0eA,4163
|
|
34
34
|
ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
|
|
35
35
|
ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
|
|
36
36
|
ingestr/src/github/__init__.py,sha256=xVijF-Wi4p88hkVJnKH-oTixismjD3aUcGqGa6Wr4e4,5889
|
|
@@ -91,8 +91,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
91
91
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
92
92
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
93
93
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
94
|
-
ingestr-0.12.
|
|
95
|
-
ingestr-0.12.
|
|
96
|
-
ingestr-0.12.
|
|
97
|
-
ingestr-0.12.
|
|
98
|
-
ingestr-0.12.
|
|
94
|
+
ingestr-0.12.9.dist-info/METADATA,sha256=p7RGcw0cnHPU93RLIPWOkMtj36Ax9BnA7bPSKIQ3pfg,8056
|
|
95
|
+
ingestr-0.12.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
96
|
+
ingestr-0.12.9.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
97
|
+
ingestr-0.12.9.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
98
|
+
ingestr-0.12.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|