ingestr 0.12.8__py3-none-any.whl → 0.12.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -28,6 +28,10 @@ def app_store(
28
28
  start_date: Optional[datetime] = None,
29
29
  end_date: Optional[datetime] = None,
30
30
  ) -> Iterable[DltResource]:
31
+ if start_date and start_date.tzinfo is not None:
32
+ start_date = start_date.replace(tzinfo=None)
33
+ if end_date and end_date.tzinfo is not None:
34
+ end_date = end_date.replace(tzinfo=None)
31
35
  for resource in RESOURCES:
32
36
  yield dlt.resource(
33
37
  get_analytics_reports,
ingestr/src/factory.py CHANGED
@@ -25,6 +25,7 @@ from ingestr.src.sources import (
25
25
  ChessSource,
26
26
  DynamoDBSource,
27
27
  FacebookAdsSource,
28
+ GCSSource,
28
29
  GitHubSource,
29
30
  GoogleAnalyticsSource,
30
31
  GoogleSheetsSource,
@@ -124,6 +125,7 @@ class SourceDestinationFactory:
124
125
  "tiktok": TikTokSource,
125
126
  "googleanalytics": GoogleAnalyticsSource,
126
127
  "appstore": AppleAppStoreSource,
128
+ "gs": GCSSource,
127
129
  }
128
130
  destinations: Dict[str, Type[DestinationProtocol]] = {
129
131
  "bigquery": BigQueryDestination,
@@ -39,8 +39,6 @@ def readers(
39
39
  filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
40
40
  filesystem_resource.apply_hints(
41
41
  incremental=dlt.sources.incremental("modification_date"),
42
- range_end="closed",
43
- range_start="closed",
44
42
  )
45
43
  return (
46
44
  filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
ingestr/src/sources.py CHANGED
@@ -17,6 +17,8 @@ from typing import (
17
17
  from urllib.parse import ParseResult, parse_qs, quote, urlparse
18
18
 
19
19
  import dlt
20
+ import gcsfs # type: ignore
21
+ import s3fs # type: ignore
20
22
  import pendulum
21
23
  from dlt.common.configuration.specs import (
22
24
  AwsCredentials,
@@ -1091,19 +1093,17 @@ class S3Source:
1091
1093
  bucket_name = parsed_uri.hostname
1092
1094
  if not bucket_name:
1093
1095
  raise ValueError(
1094
- "Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name/path/to/file"
1096
+ "Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name"
1095
1097
  )
1096
1098
  bucket_url = f"s3://{bucket_name}"
1097
1099
 
1098
- path_to_file = parsed_uri.path.lstrip("/")
1100
+ path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
1099
1101
  if not path_to_file:
1100
- raise ValueError(
1101
- "Invalid S3 URI: The file path is missing. Ensure your S3 URI follows the format 's3://bucket-name/path/to/file"
1102
- )
1102
+ raise ValueError("--source-table must be specified")
1103
1103
 
1104
- aws_credentials = AwsCredentials(
1105
- aws_access_key_id=access_key_id[0],
1106
- aws_secret_access_key=TSecretStrValue(secret_access_key[0]),
1104
+ fs = s3fs.S3FileSystem(
1105
+ key=access_key_id[0],
1106
+ secret=secret_access_key[0],
1107
1107
  )
1108
1108
 
1109
1109
  file_extension = path_to_file.split(".")[-1]
@@ -1119,7 +1119,7 @@ class S3Source:
1119
1119
  )
1120
1120
 
1121
1121
  return readers(
1122
- bucket_url=bucket_url, credentials=aws_credentials, file_glob=path_to_file
1122
+ bucket_url, fs, path_to_file
1123
1123
  ).with_resources(endpoint)
1124
1124
 
1125
1125
 
@@ -1503,3 +1503,69 @@ class AppleAppStoreSource:
1503
1503
  raise UnsupportedResourceError(table, "AppStore")
1504
1504
 
1505
1505
  return src.with_resources(table)
1506
+
1507
+
1508
+ class GCSSource:
1509
+ def handles_incrementality(self) -> bool:
1510
+ return True
1511
+
1512
+ def dlt_source(self, uri: str, table: str, **kwargs):
1513
+ if kwargs.get("incremental_key"):
1514
+ raise ValueError(
1515
+ "GCS takes care of incrementality on its own, you should not provide incremental_key"
1516
+ )
1517
+
1518
+ parsed_uri = urlparse(uri)
1519
+ params = parse_qs(parsed_uri.query)
1520
+ credentials_path = params.get("credentials_path")
1521
+ credentials_base64 = params.get("credentials_base64")
1522
+ credentials_available = any(
1523
+ map(
1524
+ lambda x: x is not None,
1525
+ [credentials_path, credentials_base64],
1526
+ )
1527
+ )
1528
+ if credentials_available is False:
1529
+ raise MissingValueError("credentials_path or credentials_base64", "GCS")
1530
+
1531
+ bucket_name = parsed_uri.hostname
1532
+ if not bucket_name:
1533
+ raise ValueError(
1534
+ "Invalid GCS URI: The bucket name is missing. Ensure your GCS URI follows the format 'gs://bucket-name/path/to/file"
1535
+ )
1536
+ bucket_url = f"gs://{bucket_name}/"
1537
+
1538
+ path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
1539
+ if not path_to_file:
1540
+ raise ValueError("--source-table must be specified")
1541
+
1542
+ credentials = None
1543
+ if credentials_path:
1544
+ credentials = credentials_path[0]
1545
+ else:
1546
+ credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
1547
+
1548
+ # There's a compatiblity issue between google-auth, dlt and gcsfs
1549
+ # that makes it difficult to use google.oauth2.service_account.Credentials
1550
+ # (The RECOMMENDED way of passing service account credentials)
1551
+ # directly with gcsfs. As a workaround, we construct the GCSFileSystem
1552
+ # and pass it directly to filesystem.readers.
1553
+ fs = gcsfs.GCSFileSystem(
1554
+ token=credentials,
1555
+ )
1556
+
1557
+ file_extension = path_to_file.split(".")[-1]
1558
+ if file_extension == "csv":
1559
+ endpoint = "read_csv"
1560
+ elif file_extension == "jsonl":
1561
+ endpoint = "read_jsonl"
1562
+ elif file_extension == "parquet":
1563
+ endpoint = "read_parquet"
1564
+ else:
1565
+ raise ValueError(
1566
+ "GCS Source only supports specific formats files: csv, jsonl, parquet"
1567
+ )
1568
+
1569
+ return readers(
1570
+ bucket_url, fs, path_to_file
1571
+ ).with_resources(endpoint)
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.12.8"
1
+ __version__ = "0.12.9"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.8
3
+ Version: 0.12.9
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -22,6 +22,7 @@ Requires-Dist: dlt==1.5.0
22
22
  Requires-Dist: duckdb-engine==0.13.5
23
23
  Requires-Dist: duckdb==1.1.3
24
24
  Requires-Dist: facebook-business==20.0.0
25
+ Requires-Dist: gcsfs==2024.10.0
25
26
  Requires-Dist: google-analytics-data==0.18.16
26
27
  Requires-Dist: google-api-python-client==2.130.0
27
28
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
@@ -2,18 +2,18 @@ ingestr/main.py,sha256=fRWnyoPzMvvxTa61EIAP_dsKu0B_0yOwoyt0Slq9WQU,24723
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
4
4
  ingestr/src/errors.py,sha256=MrdLY5Gpr3g3qbYjl-U8-m8kxBJQOJo4ZVOsQpQbRR8,447
5
- ingestr/src/factory.py,sha256=jjxieXpSK02tNcg7f_t5xxqs49EnI739smRLX8qLsUU,4582
5
+ ingestr/src/factory.py,sha256=oNF9dovovLG34xLgRZ5fbyA_XSHxEuTW27s1cb35KDM,4622
6
6
  ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
7
- ingestr/src/sources.py,sha256=dMXTfykbAZTN8SNpOWJbtl10krdJfg12S13at3Z4L38,53647
7
+ ingestr/src/sources.py,sha256=JoO-IQ_eB4Ia1fC1GWs6N74l9A3tXQT-Fj0uNBiSI_Y,55978
8
8
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
9
9
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
10
- ingestr/src/version.py,sha256=F7xxYe0dXryqS1cGEXFikx8AI7-UsZzdi89hJdyx-b0,23
10
+ ingestr/src/version.py,sha256=FSGqM7DffUSCa5R2rqVlNo-yNzBd6cgAXS1_0tElLy0,23
11
11
  ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
12
12
  ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
13
13
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
14
14
  ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
15
15
  ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
16
- ingestr/src/appstore/__init__.py,sha256=s39r3YUjdfStA6lBcPzqQzestiojC3U41LB3F6Y8gG0,4538
16
+ ingestr/src/appstore/__init__.py,sha256=3P4VZH2WJF477QjW19jMTwu6L8DXcLkYSdutnvp3AmM,4742
17
17
  ingestr/src/appstore/client.py,sha256=qY9nBZPNIAveR-Dn-pW141Mr9xi9LMOz2HHfnfueHvE,3975
18
18
  ingestr/src/appstore/errors.py,sha256=KVpPWth5qlv6_QWEm3aJAt3cdf6miPJs0UDzxknx2Ms,481
19
19
  ingestr/src/appstore/models.py,sha256=tW1JSATHBIxZ6a77-RTCBQptJk6iRC8fWcmx4NW7SVA,1716
@@ -30,7 +30,7 @@ ingestr/src/facebook_ads/__init__.py,sha256=reEpSr4BaKA1wO3qVgCH51gW-TgWkbJ_g24U
30
30
  ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
31
31
  ingestr/src/facebook_ads/helpers.py,sha256=ZLbNHiKer5lPb4g3_435XeBJr57Wv0o1KTyBA1mQ100,9068
32
32
  ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_tl6UTFk,4881
33
- ingestr/src/filesystem/__init__.py,sha256=hcN_sO356ChTPyg72AufrikdkFBBIScTdxtGfDm-W0E,4221
33
+ ingestr/src/filesystem/__init__.py,sha256=zkIwbRr0ir0EUdniI25p2zGiVc-7M9EmR351AjNb0eA,4163
34
34
  ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
35
35
  ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
36
36
  ingestr/src/github/__init__.py,sha256=xVijF-Wi4p88hkVJnKH-oTixismjD3aUcGqGa6Wr4e4,5889
@@ -91,8 +91,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
91
91
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
92
92
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
93
93
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
94
- ingestr-0.12.8.dist-info/METADATA,sha256=zbhdTjqZrWDsmnXTxy1tfC79Q75vzHc-7UWLM62vocQ,8024
95
- ingestr-0.12.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
96
- ingestr-0.12.8.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
97
- ingestr-0.12.8.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
98
- ingestr-0.12.8.dist-info/RECORD,,
94
+ ingestr-0.12.9.dist-info/METADATA,sha256=p7RGcw0cnHPU93RLIPWOkMtj36Ax9BnA7bPSKIQ3pfg,8056
95
+ ingestr-0.12.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
96
+ ingestr-0.12.9.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
97
+ ingestr-0.12.9.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
98
+ ingestr-0.12.9.dist-info/RECORD,,