ingestr 0.13.33__py3-none-any.whl → 0.13.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/airtable/__init__.py +3 -2
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/destinations.py +14 -6
- ingestr/src/hubspot/__init__.py +11 -2
- ingestr/src/hubspot/helpers.py +0 -4
- ingestr/src/sources.py +25 -11
- {ingestr-0.13.33.dist-info → ingestr-0.13.35.dist-info}/METADATA +2 -2
- {ingestr-0.13.33.dist-info → ingestr-0.13.35.dist-info}/RECORD +11 -11
- {ingestr-0.13.33.dist-info → ingestr-0.13.35.dist-info}/WHEEL +0 -0
- {ingestr-0.13.33.dist-info → ingestr-0.13.35.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.33.dist-info → ingestr-0.13.35.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/airtable/__init__.py
CHANGED
|
@@ -9,7 +9,7 @@ import pyairtable
|
|
|
9
9
|
from dlt.sources import DltResource
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
@dlt.source
|
|
12
|
+
@dlt.source(max_table_nesting=1)
|
|
13
13
|
def airtable_source(
|
|
14
14
|
base_id: str = dlt.config.value,
|
|
15
15
|
table_names: Optional[List[str]] = dlt.config.value,
|
|
@@ -50,12 +50,13 @@ def airtable_resource(
|
|
|
50
50
|
It starts with "app". See https://support.airtable.com/docs/finding-airtable-ids
|
|
51
51
|
table (Dict[str, Any]): Metadata about an airtable, does not contain the actual records
|
|
52
52
|
"""
|
|
53
|
+
|
|
53
54
|
primary_key_id = table["primaryFieldId"]
|
|
54
55
|
primary_key_field = [
|
|
55
56
|
field for field in table["fields"] if field["id"] == primary_key_id
|
|
56
57
|
][0]
|
|
57
58
|
table_name: str = table["name"]
|
|
58
|
-
primary_key: List[str] = [f"fields__{primary_key_field['name']}"]
|
|
59
|
+
primary_key: List[str] = [f"fields__{primary_key_field['name']}".lower()]
|
|
59
60
|
air_table = api.table(base_id, table["id"])
|
|
60
61
|
|
|
61
62
|
# Table.iterate() supports rich customization options, such as chunk size, fields, cell format, timezone, locale, and view
|
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.35"
|
ingestr/src/destinations.py
CHANGED
|
@@ -235,12 +235,19 @@ class AthenaDestination:
|
|
|
235
235
|
if not bucket.startswith("s3://"):
|
|
236
236
|
bucket = f"s3://{bucket}"
|
|
237
237
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
238
|
+
bucket = bucket.rstrip("/")
|
|
239
|
+
|
|
240
|
+
dest_table = kwargs.get("dest_table", None)
|
|
241
|
+
if not dest_table:
|
|
242
|
+
raise ValueError("A destination table is required to connect to Athena.")
|
|
243
|
+
|
|
244
|
+
dest_table_fields = dest_table.split(".")
|
|
245
|
+
if len(dest_table_fields) != 2:
|
|
246
|
+
raise ValueError(
|
|
247
|
+
f"Table name must be in the format <schema>.<table>, given: {dest_table}"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
query_result_path = f"{bucket}/{dest_table_fields[0]}_staging/metadata"
|
|
244
251
|
|
|
245
252
|
access_key_id = source_params.get("access_key_id", [None])[0]
|
|
246
253
|
secret_access_key = source_params.get("secret_access_key", [None])[0]
|
|
@@ -285,6 +292,7 @@ class AthenaDestination:
|
|
|
285
292
|
region_name=region_name,
|
|
286
293
|
),
|
|
287
294
|
destination_name=bucket,
|
|
295
|
+
force_iceberg=True,
|
|
288
296
|
)
|
|
289
297
|
|
|
290
298
|
def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
|
ingestr/src/hubspot/__init__.py
CHANGED
|
@@ -197,11 +197,18 @@ def hubspot(
|
|
|
197
197
|
api_key: str = api_key,
|
|
198
198
|
custom_object_name: str = custom_object,
|
|
199
199
|
) -> Iterator[TDataItems]:
|
|
200
|
-
|
|
200
|
+
custom_objects = fetch_data_raw(CRM_SCHEMAS_ENDPOINT, api_key)
|
|
201
201
|
object_type_id = None
|
|
202
|
+
associations = None
|
|
203
|
+
if ":" in custom_object_name:
|
|
204
|
+
fields = custom_object_name.split(":")
|
|
205
|
+
if len(fields) == 2:
|
|
206
|
+
custom_object_name = fields[0]
|
|
207
|
+
associations = fields[1]
|
|
202
208
|
|
|
203
209
|
custom_object_lowercase = custom_object_name.lower()
|
|
204
|
-
|
|
210
|
+
|
|
211
|
+
for custom_object in custom_objects["results"]:
|
|
205
212
|
if custom_object["name"].lower() == custom_object_lowercase:
|
|
206
213
|
object_type_id = custom_object["objectTypeId"]
|
|
207
214
|
break
|
|
@@ -223,6 +230,8 @@ def hubspot(
|
|
|
223
230
|
props = ",".join(sorted(list(set(props))))
|
|
224
231
|
|
|
225
232
|
custom_object_endpoint = f"crm/v3/objects/{object_type_id}/?properties={props}"
|
|
233
|
+
if associations:
|
|
234
|
+
custom_object_endpoint += f"&associations={associations}"
|
|
226
235
|
|
|
227
236
|
"""Hubspot custom object details resource"""
|
|
228
237
|
yield from fetch_data(custom_object_endpoint, api_key, resource_name="custom")
|
ingestr/src/hubspot/helpers.py
CHANGED
|
@@ -148,10 +148,6 @@ def fetch_data(
|
|
|
148
148
|
"updatedAt": _result.get("updatedAt", ""),
|
|
149
149
|
}
|
|
150
150
|
)
|
|
151
|
-
elif resource_name == "custom":
|
|
152
|
-
_objects.append(
|
|
153
|
-
_result.get("properties", ""),
|
|
154
|
-
)
|
|
155
151
|
else:
|
|
156
152
|
_obj = _result.get("properties", _result)
|
|
157
153
|
if "id" not in _obj and "id" in _result:
|
ingestr/src/sources.py
CHANGED
|
@@ -722,7 +722,7 @@ class FacebookAdsSource:
|
|
|
722
722
|
).with_resources("facebook_insights")
|
|
723
723
|
else:
|
|
724
724
|
raise ValueError(
|
|
725
|
-
"
|
|
725
|
+
f"Resource '{table}' is not supported for Facebook Ads source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
726
726
|
)
|
|
727
727
|
|
|
728
728
|
return facebook_ads_source(
|
|
@@ -804,11 +804,16 @@ class HubspotSource:
|
|
|
804
804
|
|
|
805
805
|
if table.startswith("custom:"):
|
|
806
806
|
fields = table.split(":", 2)
|
|
807
|
-
if len(fields) != 2:
|
|
807
|
+
if len(fields) != 2 and len(fields) != 3:
|
|
808
808
|
raise ValueError(
|
|
809
|
-
"Invalid Hubspot custom table format. Expected format: custom:<custom_object_type>"
|
|
809
|
+
"Invalid Hubspot custom table format. Expected format: custom:<custom_object_type> or custom:<custom_object_type>:<associations>"
|
|
810
810
|
)
|
|
811
|
-
|
|
811
|
+
|
|
812
|
+
if len(fields) == 2:
|
|
813
|
+
endpoint = fields[1]
|
|
814
|
+
else:
|
|
815
|
+
endpoint = f"{fields[1]}:{fields[2]}"
|
|
816
|
+
|
|
812
817
|
return hubspot(
|
|
813
818
|
api_key=api_key[0],
|
|
814
819
|
custom_object=endpoint,
|
|
@@ -847,22 +852,31 @@ class AirtableSource:
|
|
|
847
852
|
if not table:
|
|
848
853
|
raise ValueError("Source table is required to connect to Airtable")
|
|
849
854
|
|
|
850
|
-
tables = table.split(",")
|
|
851
|
-
|
|
852
855
|
source_parts = urlparse(uri)
|
|
853
856
|
source_fields = parse_qs(source_parts.query)
|
|
854
|
-
base_id = source_fields.get("base_id")
|
|
855
857
|
access_token = source_fields.get("access_token")
|
|
856
858
|
|
|
857
|
-
if not
|
|
859
|
+
if not access_token:
|
|
858
860
|
raise ValueError(
|
|
859
|
-
"
|
|
861
|
+
"access_token in the URI is required to connect to Airtable"
|
|
860
862
|
)
|
|
861
863
|
|
|
864
|
+
base_id = source_fields.get("base_id", [None])[0]
|
|
865
|
+
clean_table = table
|
|
866
|
+
|
|
867
|
+
table_fields = table.split("/")
|
|
868
|
+
if len(table_fields) == 2:
|
|
869
|
+
clean_table = table_fields[1]
|
|
870
|
+
if not base_id:
|
|
871
|
+
base_id = table_fields[0]
|
|
872
|
+
|
|
873
|
+
if not base_id:
|
|
874
|
+
raise ValueError("base_id in the URI is required to connect to Airtable")
|
|
875
|
+
|
|
862
876
|
from ingestr.src.airtable import airtable_source
|
|
863
877
|
|
|
864
878
|
return airtable_source(
|
|
865
|
-
base_id=base_id
|
|
879
|
+
base_id=base_id, table_names=[clean_table], access_token=access_token[0]
|
|
866
880
|
)
|
|
867
881
|
|
|
868
882
|
|
|
@@ -1175,7 +1189,7 @@ class ZendeskSource:
|
|
|
1175
1189
|
).with_resources(table)
|
|
1176
1190
|
else:
|
|
1177
1191
|
raise ValueError(
|
|
1178
|
-
"
|
|
1192
|
+
f"Resource '{table}' is not supported for Zendesk source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
1179
1193
|
)
|
|
1180
1194
|
|
|
1181
1195
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.35
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -46,7 +46,7 @@ Requires-Dist: databricks-sqlalchemy==1.0.2
|
|
|
46
46
|
Requires-Dist: dataclasses-json==0.6.7
|
|
47
47
|
Requires-Dist: decorator==5.2.1
|
|
48
48
|
Requires-Dist: deprecation==2.1.0
|
|
49
|
-
Requires-Dist: dlt==1.
|
|
49
|
+
Requires-Dist: dlt==1.10.0
|
|
50
50
|
Requires-Dist: dnspython==2.7.0
|
|
51
51
|
Requires-Dist: duckdb-engine==0.17.0
|
|
52
52
|
Requires-Dist: duckdb==1.2.1
|
|
@@ -2,21 +2,21 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
|
|
|
2
2
|
ingestr/main.py,sha256=mRlGSqi2sHcZ2AKlwn5MqoMvFxXlSjcZxmPJr76rmRk,25187
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
|
|
5
|
-
ingestr/src/buildinfo.py,sha256
|
|
6
|
-
ingestr/src/destinations.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=-fdK0r3dEieckm9FbunVnN7VEWpVvtyhbo9bq89u0Es,21
|
|
6
|
+
ingestr/src/destinations.py,sha256=Z79f01BSmEaXnQno2IQVt4Th4dmD-BiOQXlibZJ5sTw,13180
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
8
|
ingestr/src/factory.py,sha256=659h_sVRBhtPv2dvtOK8tf3PtUhlK3KsWLrb20_iQKw,5333
|
|
9
9
|
ingestr/src/filters.py,sha256=C-_TIVkF_cxZBgG-Run2Oyn0TAhJgA8IWXZ-OPY3uek,1136
|
|
10
10
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
11
11
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
12
12
|
ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
|
|
13
|
-
ingestr/src/sources.py,sha256=
|
|
13
|
+
ingestr/src/sources.py,sha256=Xinebylg-PqzyQ-r2wFukqhsPsv611fEoTvTWY1L-B4,76461
|
|
14
14
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
15
15
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
16
16
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
17
17
|
ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
|
|
18
18
|
ingestr/src/adjust/adjust_helpers.py,sha256=IHSS94A7enOWkZ8cP5iW3RdYt0Xl3qZGAmDc1Xy4qkI,3802
|
|
19
|
-
ingestr/src/airtable/__init__.py,sha256=
|
|
19
|
+
ingestr/src/airtable/__init__.py,sha256=XzRsS39xszUlh_s7P1_zq5v8vLfjz3m-NtTPaa8TTZU,2818
|
|
20
20
|
ingestr/src/applovin/__init__.py,sha256=X_YCLppPrnL8KXfYWICE_uDfMzHHH3JZ-DBGZ1RlaOI,6984
|
|
21
21
|
ingestr/src/applovin_max/__init__.py,sha256=ZrxOUSirGxkGDmM9wsQO3anwNVzqtoCwN_OuCXfPkXE,3285
|
|
22
22
|
ingestr/src/appsflyer/__init__.py,sha256=QoK-B3cYYMD3bqzQaLWNH6FkJyjRbzRkBF2n6urxubs,8071
|
|
@@ -62,8 +62,8 @@ ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9
|
|
|
62
62
|
ingestr/src/google_sheets/helpers/data_processing.py,sha256=RNt2MYfdJhk4bRahnQVezpNg2x9z0vx60YFq2ukZ8vI,11004
|
|
63
63
|
ingestr/src/gorgias/__init__.py,sha256=_mFkMYwlY5OKEY0o_FK1OKol03A-8uk7bm1cKlmt5cs,21432
|
|
64
64
|
ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
|
|
65
|
-
ingestr/src/hubspot/__init__.py,sha256=
|
|
66
|
-
ingestr/src/hubspot/helpers.py,sha256=
|
|
65
|
+
ingestr/src/hubspot/__init__.py,sha256=wqHefhc_YRI5dNFCcpvH-UUilNThE49sbGouSBiHYsw,11776
|
|
66
|
+
ingestr/src/hubspot/helpers.py,sha256=k2b-lhxqBNKHoOSHoHegFSsk8xxjjGA0I04V0XyX2b4,7883
|
|
67
67
|
ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
|
|
68
68
|
ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
|
|
69
69
|
ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
|
|
@@ -122,8 +122,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
122
122
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
123
123
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
124
124
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
125
|
-
ingestr-0.13.
|
|
126
|
-
ingestr-0.13.
|
|
127
|
-
ingestr-0.13.
|
|
128
|
-
ingestr-0.13.
|
|
129
|
-
ingestr-0.13.
|
|
125
|
+
ingestr-0.13.35.dist-info/METADATA,sha256=HazXK_VyPcaappMDArhp7cBeRRaVc1oOTzgo3S7Gtr0,13575
|
|
126
|
+
ingestr-0.13.35.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
127
|
+
ingestr-0.13.35.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
128
|
+
ingestr-0.13.35.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
129
|
+
ingestr-0.13.35.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|