ingestr 0.13.32__py3-none-any.whl → 0.13.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/main.py CHANGED
@@ -264,6 +264,13 @@ def ingest(
264
264
  envvar=["YIELD_LIMIT", "INGESTR_YIELD_LIMIT"],
265
265
  ),
266
266
  ] = None, # type: ignore
267
+ staging_bucket: Annotated[
268
+ Optional[str],
269
+ typer.Option(
270
+ help="The staging bucket to be used for the ingestion, must be prefixed with 'gs://' or 's3://'",
271
+ envvar=["STAGING_BUCKET", "INGESTR_STAGING_BUCKET"],
272
+ ),
273
+ ] = None, # type: ignore
267
274
  ):
268
275
  import hashlib
269
276
  import tempfile
@@ -272,8 +279,6 @@ def ingest(
272
279
  import dlt
273
280
  import humanize
274
281
  import typer
275
- from dlt.common.data_types import TDataType
276
- from dlt.common.destination import Destination
277
282
  from dlt.common.pipeline import LoadInfo
278
283
  from dlt.common.runtime.collector import Collector, LogCollector
279
284
  from dlt.common.schema.typing import TColumnSchema
@@ -318,7 +323,7 @@ def ingest(
318
323
  return (source_table, dest_table)
319
324
 
320
325
  def validate_loader_file_format(
321
- dlt_dest: Destination, loader_file_format: Optional[LoaderFileFormat]
326
+ dlt_dest, loader_file_format: Optional[LoaderFileFormat]
322
327
  ):
323
328
  if (
324
329
  loader_file_format
@@ -330,9 +335,11 @@ def ingest(
330
335
  )
331
336
  raise typer.Abort()
332
337
 
333
- def parse_columns(columns: list[str]) -> dict[str, TDataType]:
338
+ def parse_columns(columns: list[str]) -> dict:
334
339
  from typing import cast, get_args
335
340
 
341
+ from dlt.common.data_types import TDataType
342
+
336
343
  possible_types = get_args(TDataType)
337
344
 
338
345
  types: dict[str, TDataType] = {}
@@ -417,7 +424,9 @@ def ingest(
417
424
  pipelines_dir = tempfile.mkdtemp()
418
425
  is_pipelines_dir_temp = True
419
426
 
420
- dlt_dest = destination.dlt_dest(uri=dest_uri, dest_table=dest_table)
427
+ dlt_dest = destination.dlt_dest(
428
+ uri=dest_uri, dest_table=dest_table, staging_bucket=staging_bucket
429
+ )
421
430
  validate_loader_file_format(dlt_dest, loader_file_format)
422
431
 
423
432
  if partition_by:
@@ -566,6 +575,7 @@ def ingest(
566
575
  **destination.dlt_run_params(
567
576
  uri=dest_uri,
568
577
  table=dest_table,
578
+ staging_bucket=staging_bucket,
569
579
  ),
570
580
  write_disposition=write_disposition, # type: ignore
571
581
  primary_key=(primary_key if primary_key and len(primary_key) > 0 else None), # type: ignore
@@ -55,7 +55,7 @@ def airtable_resource(
55
55
  field for field in table["fields"] if field["id"] == primary_key_id
56
56
  ][0]
57
57
  table_name: str = table["name"]
58
- primary_key: List[str] = [primary_key_field["name"]]
58
+ primary_key: List[str] = [f"fields__{primary_key_field['name']}"]
59
59
  air_table = api.table(base_id, table["id"])
60
60
 
61
61
  # Table.iterate() supports rich customization options, such as chunk size, fields, cell format, timezone, locale, and view
ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.32"
1
+ version = "v0.13.34"
@@ -60,6 +60,22 @@ class BigQueryDestination:
60
60
  base64.b64decode(credentials_base64[0]).decode("utf-8")
61
61
  )
62
62
 
63
+ staging_bucket = kwargs.get("staging_bucket", None)
64
+ if staging_bucket:
65
+ if not staging_bucket.startswith("gs://"):
66
+ raise ValueError("Staging bucket must start with gs://")
67
+
68
+ os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] = staging_bucket
69
+ os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__PROJECT_ID"] = (
70
+ credentials.get("project_id", None)
71
+ )
72
+ os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__PRIVATE_KEY"] = (
73
+ credentials.get("private_key", None)
74
+ )
75
+ os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__CLIENT_EMAIL"] = (
76
+ credentials.get("client_email", None)
77
+ )
78
+
63
79
  project_id = None
64
80
  if source_fields.hostname:
65
81
  project_id = source_fields.hostname
@@ -83,6 +99,10 @@ class BigQueryDestination:
83
99
  "table_name": table_fields[-1],
84
100
  }
85
101
 
102
+ staging_bucket = kwargs.get("staging_bucket", None)
103
+ if staging_bucket:
104
+ res["staging"] = "filesystem"
105
+
86
106
  return res
87
107
 
88
108
  def post_load(self):
ingestr/src/filters.py CHANGED
@@ -1,6 +1,3 @@
1
- from dlt.common.libs.sql_alchemy import Table
2
-
3
-
4
1
  def cast_set_to_list(row):
5
2
  # this handles just the sqlalchemy backend for now
6
3
  if isinstance(row, dict):
@@ -32,6 +29,8 @@ def handle_mysql_empty_dates(row):
32
29
 
33
30
 
34
31
  def table_adapter_exclude_columns(cols: list[str]):
32
+ from dlt.common.libs.sql_alchemy import Table
33
+
35
34
  def excluder(table: Table):
36
35
  cols_to_remove = [col for col in table._columns if col.name in cols] # type: ignore
37
36
  for col in cols_to_remove:
@@ -197,11 +197,18 @@ def hubspot(
197
197
  api_key: str = api_key,
198
198
  custom_object_name: str = custom_object,
199
199
  ) -> Iterator[TDataItems]:
200
- get_custom_object = fetch_data_raw(CRM_SCHEMAS_ENDPOINT, api_key)
200
+ custom_objects = fetch_data_raw(CRM_SCHEMAS_ENDPOINT, api_key)
201
201
  object_type_id = None
202
+ associations = None
203
+ if ":" in custom_object_name:
204
+ fields = custom_object_name.split(":")
205
+ if len(fields) == 2:
206
+ custom_object_name = fields[0]
207
+ associations = fields[1]
202
208
 
203
209
  custom_object_lowercase = custom_object_name.lower()
204
- for custom_object in get_custom_object["results"]:
210
+
211
+ for custom_object in custom_objects["results"]:
205
212
  if custom_object["name"].lower() == custom_object_lowercase:
206
213
  object_type_id = custom_object["objectTypeId"]
207
214
  break
@@ -223,6 +230,8 @@ def hubspot(
223
230
  props = ",".join(sorted(list(set(props))))
224
231
 
225
232
  custom_object_endpoint = f"crm/v3/objects/{object_type_id}/?properties={props}"
233
+ if associations:
234
+ custom_object_endpoint += f"&associations={associations}"
226
235
 
227
236
  """Hubspot custom object details resource"""
228
237
  yield from fetch_data(custom_object_endpoint, api_key, resource_name="custom")
@@ -148,10 +148,6 @@ def fetch_data(
148
148
  "updatedAt": _result.get("updatedAt", ""),
149
149
  }
150
150
  )
151
- elif resource_name == "custom":
152
- _objects.append(
153
- _result.get("properties", ""),
154
- )
155
151
  else:
156
152
  _obj = _result.get("properties", _result)
157
153
  if "id" not in _obj and "id" in _result:
ingestr/src/partition.py CHANGED
@@ -1,7 +1,6 @@
1
1
  from typing import Dict
2
2
 
3
3
  from dlt.common.schema.typing import TColumnSchema
4
- from dlt.destinations.adapters import athena_adapter, athena_partition
5
4
  from dlt.sources import DltResource, DltSource
6
5
 
7
6
  import ingestr.src.resource as resource
@@ -12,6 +11,8 @@ def apply_athena_hints(
12
11
  partition_column: str,
13
12
  additional_hints: Dict[str, TColumnSchema] = {},
14
13
  ) -> None:
14
+ from dlt.destinations.adapters import athena_adapter, athena_partition
15
+
15
16
  def _apply_partition_hint(resource: DltResource) -> None:
16
17
  columns = resource.columns if resource.columns else {}
17
18
 
ingestr/src/sources.py CHANGED
@@ -13,91 +13,37 @@ from typing import (
13
13
  List,
14
14
  Literal,
15
15
  Optional,
16
+ TypeAlias,
16
17
  Union,
17
18
  )
18
19
  from urllib.parse import ParseResult, parse_qs, quote, urlencode, urlparse
19
20
 
20
- import dlt
21
21
  import pendulum
22
- from dlt.common.configuration.specs import (
23
- AwsCredentials,
24
- )
25
- from dlt.common.libs.sql_alchemy import (
26
- Engine,
27
- MetaData,
28
- )
29
22
  from dlt.common.time import ensure_pendulum_datetime
30
- from dlt.common.typing import TDataItem, TSecretStrValue
31
23
  from dlt.extract import Incremental
24
+ from dlt.sources import incremental as dlt_incremental
32
25
  from dlt.sources.credentials import (
33
26
  ConnectionStringCredentials,
34
27
  )
35
- from dlt.sources.sql_database import sql_table
36
- from dlt.sources.sql_database.helpers import TableLoader
37
- from dlt.sources.sql_database.schema_types import (
38
- ReflectionLevel,
39
- SelectAny,
40
- Table,
41
- TTypeAdapter,
42
- )
43
- from sqlalchemy import Column
44
- from sqlalchemy import types as sa
45
28
 
46
29
  from ingestr.src import blob
47
- from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
48
- from ingestr.src.adjust.adjust_helpers import parse_filters
49
- from ingestr.src.applovin import applovin_source
50
- from ingestr.src.applovin_max import applovin_max_source
51
- from ingestr.src.arrow import memory_mapped_arrow
52
- from ingestr.src.chess import source
53
30
  from ingestr.src.errors import (
54
31
  InvalidBlobTableError,
55
32
  MissingValueError,
56
33
  UnsupportedResourceError,
57
34
  )
58
- from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
59
- from ingestr.src.filesystem import readers
60
- from ingestr.src.filters import table_adapter_exclude_columns
61
- from ingestr.src.frankfurter import frankfurter_source
62
- from ingestr.src.frankfurter.helpers import validate_dates
63
- from ingestr.src.github import github_reactions, github_repo_events, github_stargazers
64
- from ingestr.src.gorgias import gorgias_source
65
- from ingestr.src.hubspot import hubspot
66
- from ingestr.src.kafka import kafka_consumer
67
- from ingestr.src.kafka.helpers import KafkaCredentials
68
- from ingestr.src.klaviyo._init_ import klaviyo_source
69
- from ingestr.src.linkedin_ads import linked_in_ads_source
70
- from ingestr.src.linkedin_ads.dimension_time_enum import (
71
- Dimension,
72
- TimeGranularity,
73
- )
74
- from ingestr.src.notion import notion_databases
75
- from ingestr.src.personio import personio_source
76
- from ingestr.src.shopify import shopify_source
77
- from ingestr.src.slack import slack_source
78
- from ingestr.src.sql_database.callbacks import (
79
- chained_query_adapter_callback,
80
- custom_query_variable_subsitution,
81
- limit_callback,
82
- type_adapter_callback,
83
- )
84
35
  from ingestr.src.table_definition import TableDefinition, table_string_to_dataclass
85
- from ingestr.src.tiktok_ads import tiktok_source
86
- from ingestr.src.time import isotime
87
- from ingestr.src.zendesk import zendesk_chat, zendesk_support, zendesk_talk
88
- from ingestr.src.zendesk.helpers.credentials import (
89
- ZendeskCredentialsOAuth,
90
- ZendeskCredentialsToken,
91
- )
92
-
93
- TableBackend = Literal["sqlalchemy", "pyarrow", "pandas", "connectorx"]
94
- TQueryAdapter = Callable[[SelectAny, Table], SelectAny]
95
36
 
96
37
 
97
38
  class SqlSource:
98
39
  table_builder: Callable
99
40
 
100
- def __init__(self, table_builder=sql_table) -> None:
41
+ def __init__(self, table_builder=None) -> None:
42
+ if table_builder is None:
43
+ from dlt.sources.sql_database import sql_table
44
+
45
+ table_builder = sql_table
46
+
101
47
  self.table_builder = table_builder
102
48
 
103
49
  def handles_incrementality(self) -> bool:
@@ -112,7 +58,7 @@ class SqlSource:
112
58
  if kwargs.get("incremental_key"):
113
59
  start_value = kwargs.get("interval_start")
114
60
  end_value = kwargs.get("interval_end")
115
- incremental = dlt.sources.incremental(
61
+ incremental = dlt_incremental(
116
62
  kwargs.get("incremental_key", ""),
117
63
  initial_value=start_value,
118
64
  end_value=end_value,
@@ -167,6 +113,27 @@ class SqlSource:
167
113
  if uri.startswith("db2://"):
168
114
  uri = uri.replace("db2://", "db2+ibm_db://")
169
115
 
116
+ from dlt.common.libs.sql_alchemy import (
117
+ Engine,
118
+ MetaData,
119
+ )
120
+ from dlt.sources.sql_database.schema_types import (
121
+ ReflectionLevel,
122
+ SelectAny,
123
+ Table,
124
+ TTypeAdapter,
125
+ )
126
+ from sqlalchemy import Column
127
+ from sqlalchemy import types as sa
128
+
129
+ from ingestr.src.filters import table_adapter_exclude_columns
130
+ from ingestr.src.sql_database.callbacks import (
131
+ chained_query_adapter_callback,
132
+ custom_query_variable_subsitution,
133
+ limit_callback,
134
+ type_adapter_callback,
135
+ )
136
+
170
137
  query_adapters = []
171
138
  if kwargs.get("sql_limit"):
172
139
  query_adapters.append(
@@ -185,6 +152,13 @@ class SqlSource:
185
152
  defer_table_reflect = True
186
153
  query_value = table.split(":", 1)[1]
187
154
 
155
+ TableBackend: TypeAlias = Literal[
156
+ "sqlalchemy", "pyarrow", "pandas", "connectorx"
157
+ ]
158
+ TQueryAdapter: TypeAlias = Callable[[SelectAny, Table], SelectAny]
159
+ import dlt
160
+ from dlt.common.typing import TDataItem
161
+
188
162
  # this is a very hacky version of the table_rows function. it is built this way to go around the dlt's table loader.
189
163
  # I didn't want to write a full fledged sqlalchemy source for now, and wanted to benefit from the existing stuff to begin with.
190
164
  # this is by no means a production ready solution, but it works for now.
@@ -235,6 +209,8 @@ class SqlSource:
235
209
  *cols,
236
210
  )
237
211
 
212
+ from dlt.sources.sql_database.helpers import TableLoader
213
+
238
214
  loader = TableLoader(
239
215
  engine,
240
216
  backend,
@@ -277,7 +253,12 @@ class SqlSource:
277
253
  class ArrowMemoryMappedSource:
278
254
  table_builder: Callable
279
255
 
280
- def __init__(self, table_builder=memory_mapped_arrow) -> None:
256
+ def __init__(self, table_builder=None) -> None:
257
+ if table_builder is None:
258
+ from ingestr.src.arrow import memory_mapped_arrow
259
+
260
+ table_builder = memory_mapped_arrow
261
+
281
262
  self.table_builder = table_builder
282
263
 
283
264
  def handles_incrementality(self) -> bool:
@@ -289,7 +270,7 @@ class ArrowMemoryMappedSource:
289
270
  start_value = kwargs.get("interval_start")
290
271
  end_value = kwargs.get("interval_end")
291
272
 
292
- incremental = dlt.sources.incremental(
273
+ incremental = dlt_incremental(
293
274
  kwargs.get("incremental_key", ""),
294
275
  initial_value=start_value,
295
276
  end_value=end_value,
@@ -341,7 +322,7 @@ class MongoDbSource:
341
322
  start_value = kwargs.get("interval_start")
342
323
  end_value = kwargs.get("interval_end")
343
324
 
344
- incremental = dlt.sources.incremental(
325
+ incremental = dlt_incremental(
345
326
  kwargs.get("incremental_key", ""),
346
327
  initial_value=start_value,
347
328
  end_value=end_value,
@@ -366,7 +347,7 @@ class LocalCsvSource:
366
347
 
367
348
  def dlt_source(self, uri: str, table: str, **kwargs):
368
349
  def csv_file(
369
- incremental: Optional[dlt.sources.incremental[Any]] = None,
350
+ incremental: Optional[dlt_incremental[Any]] = None,
370
351
  ):
371
352
  file_path = uri.split("://")[1]
372
353
  myFile = open(file_path, "r")
@@ -408,11 +389,13 @@ class LocalCsvSource:
408
389
  if page:
409
390
  yield page
410
391
 
411
- return dlt.resource(
392
+ from dlt import resource
393
+
394
+ return resource(
412
395
  csv_file,
413
396
  merge_key=kwargs.get("merge_key"), # type: ignore
414
397
  )(
415
- incremental=dlt.sources.incremental(
398
+ incremental=dlt_incremental(
416
399
  kwargs.get("incremental_key", ""),
417
400
  initial_value=kwargs.get("interval_start"),
418
401
  end_value=kwargs.get("interval_end"),
@@ -428,7 +411,12 @@ class LocalCsvSource:
428
411
  class NotionSource:
429
412
  table_builder: Callable
430
413
 
431
- def __init__(self, table_builder=notion_databases) -> None:
414
+ def __init__(self, table_builder=None) -> None:
415
+ if table_builder is None:
416
+ from ingestr.src.notion import notion_databases
417
+
418
+ table_builder = notion_databases
419
+
432
420
  self.table_builder = table_builder
433
421
 
434
422
  def handles_incrementality(self) -> bool:
@@ -488,6 +476,8 @@ class ShopifySource:
488
476
  f"Table name '{table}' is not supported for Shopify source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
489
477
  )
490
478
 
479
+ from ingestr.src.shopify import shopify_source
480
+
491
481
  return shopify_source(
492
482
  private_app_password=api_key[0],
493
483
  shop_url=f"https://{source_fields.netloc}",
@@ -532,6 +522,8 @@ class GorgiasSource:
532
522
  if kwargs.get("interval_end"):
533
523
  date_args["end_date"] = kwargs.get("interval_end")
534
524
 
525
+ from ingestr.src.gorgias import gorgias_source
526
+
535
527
  return gorgias_source(
536
528
  domain=source_fields.netloc,
537
529
  email=email[0],
@@ -629,6 +621,8 @@ class ChessSource:
629
621
  f"Resource '{table}' is not supported for Chess source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
630
622
  )
631
623
 
624
+ from ingestr.src.chess import source
625
+
632
626
  return source(players=list_players, **date_args).with_resources(
633
627
  table_mapping[table]
634
628
  )
@@ -713,6 +707,11 @@ class FacebookAdsSource:
713
707
  "access_token and accound_id are required to connect to Facebook Ads."
714
708
  )
715
709
 
710
+ from ingestr.src.facebook_ads import (
711
+ facebook_ads_source,
712
+ facebook_insights_source,
713
+ )
714
+
716
715
  endpoint = None
717
716
  if table in ["campaigns", "ad_sets", "ad_creatives", "ads", "leads"]:
718
717
  endpoint = table
@@ -723,7 +722,7 @@ class FacebookAdsSource:
723
722
  ).with_resources("facebook_insights")
724
723
  else:
725
724
  raise ValueError(
726
- "fResource '{table}' is not supported for Facebook Ads source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
725
+ f"Resource '{table}' is not supported for Facebook Ads source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
727
726
  )
728
727
 
729
728
  return facebook_ads_source(
@@ -770,6 +769,8 @@ class SlackSource:
770
769
  if kwargs.get("interval_end"):
771
770
  date_args["end_date"] = kwargs.get("interval_end")
772
771
 
772
+ from ingestr.src.slack import slack_source
773
+
773
774
  return slack_source(
774
775
  access_token=api_key[0],
775
776
  table_per_channel=False,
@@ -799,13 +800,20 @@ class HubspotSource:
799
800
 
800
801
  endpoint = None
801
802
 
803
+ from ingestr.src.hubspot import hubspot
804
+
802
805
  if table.startswith("custom:"):
803
806
  fields = table.split(":", 2)
804
- if len(fields) != 2:
807
+ if len(fields) != 2 and len(fields) != 3:
805
808
  raise ValueError(
806
- "Invalid Hubspot custom table format. Expected format: custom:<custom_object_type>"
809
+ "Invalid Hubspot custom table format. Expected format: custom:<custom_object_type> or custom:<custom_object_type>:<associations>"
807
810
  )
808
- endpoint = fields[1]
811
+
812
+ if len(fields) == 2:
813
+ endpoint = fields[1]
814
+ else:
815
+ endpoint = f"{fields[1]}:{fields[2]}"
816
+
809
817
  return hubspot(
810
818
  api_key=api_key[0],
811
819
  custom_object=endpoint,
@@ -905,6 +913,9 @@ class KlaviyoSource:
905
913
  )
906
914
 
907
915
  start_date = kwargs.get("interval_start") or "2000-01-01"
916
+
917
+ from ingestr.src.klaviyo import klaviyo_source
918
+
908
919
  return klaviyo_source(
909
920
  api_key=api_key[0],
910
921
  start_date=start_date,
@@ -938,6 +949,9 @@ class KafkaSource:
938
949
  raise ValueError("group_id in the URI is required to connect to kafka")
939
950
 
940
951
  start_date = kwargs.get("interval_start")
952
+ from ingestr.src.kafka import kafka_consumer
953
+ from ingestr.src.kafka.helpers import KafkaCredentials
954
+
941
955
  return kafka_consumer(
942
956
  topics=[table],
943
957
  credentials=KafkaCredentials(
@@ -993,6 +1007,9 @@ class AdjustSource:
993
1007
  if kwargs.get("interval_end"):
994
1008
  end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
995
1009
 
1010
+ from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
1011
+ from ingestr.src.adjust.adjust_helpers import parse_filters
1012
+
996
1013
  dimensions = None
997
1014
  metrics = None
998
1015
  filters = []
@@ -1099,6 +1116,12 @@ class ZendeskSource:
1099
1116
  if not subdomain:
1100
1117
  raise ValueError("Subdomain is required to connect with Zendesk")
1101
1118
 
1119
+ from ingestr.src.zendesk import zendesk_chat, zendesk_support, zendesk_talk
1120
+ from ingestr.src.zendesk.helpers.credentials import (
1121
+ ZendeskCredentialsOAuth,
1122
+ ZendeskCredentialsToken,
1123
+ )
1124
+
1102
1125
  if not source_fields.username and source_fields.password:
1103
1126
  oauth_token = source_fields.password
1104
1127
  if not oauth_token:
@@ -1157,7 +1180,7 @@ class ZendeskSource:
1157
1180
  ).with_resources(table)
1158
1181
  else:
1159
1182
  raise ValueError(
1160
- "fResource '{table}' is not supported for Zendesk source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
1183
+ f"Resource '{table}' is not supported for Zendesk source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
1161
1184
  )
1162
1185
 
1163
1186
 
@@ -1206,6 +1229,8 @@ class S3Source:
1206
1229
  "S3 Source only supports specific formats files: csv, jsonl, parquet"
1207
1230
  )
1208
1231
 
1232
+ from ingestr.src.filesystem import readers
1233
+
1209
1234
  return readers(bucket_url, fs, path_to_file).with_resources(endpoint)
1210
1235
 
1211
1236
 
@@ -1300,6 +1325,8 @@ class TikTokSource:
1300
1325
  filter_name = list(filters.keys())[0]
1301
1326
  filter_value = list(map(int, filters[list(filters.keys())[0]]))
1302
1327
 
1328
+ from ingestr.src.tiktok_ads import tiktok_source
1329
+
1303
1330
  return tiktok_source(
1304
1331
  start_date=start_date,
1305
1332
  end_date=end_date,
@@ -1348,9 +1375,12 @@ class AsanaSource:
1348
1375
  f"Resource '{table}' is not supported for Asana source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
1349
1376
  )
1350
1377
 
1351
- dlt.secrets["sources.asana_source.access_token"] = access_token[0]
1378
+ import dlt
1379
+
1352
1380
  from ingestr.src.asana_source import asana_source
1353
1381
 
1382
+ dlt.secrets["sources.asana_source.access_token"] = access_token[0]
1383
+
1354
1384
  src = asana_source()
1355
1385
  src.workspaces.add_filter(lambda w: w["gid"] == workspace)
1356
1386
  return src.with_resources(table)
@@ -1396,6 +1426,9 @@ class DynamoDBSource:
1396
1426
  if not secret_key:
1397
1427
  raise ValueError("secret_access_key is required to connect to Dynamodb")
1398
1428
 
1429
+ from dlt.common.configuration.specs import AwsCredentials
1430
+ from dlt.common.typing import TSecretStrValue
1431
+
1399
1432
  creds = AwsCredentials(
1400
1433
  aws_access_key_id=access_key[0],
1401
1434
  aws_secret_access_key=TSecretStrValue(secret_key[0]),
@@ -1406,8 +1439,11 @@ class DynamoDBSource:
1406
1439
  incremental = None
1407
1440
  incremental_key = kwargs.get("incremental_key")
1408
1441
 
1442
+ from ingestr.src.dynamodb import dynamodb
1443
+ from ingestr.src.time import isotime
1444
+
1409
1445
  if incremental_key:
1410
- incremental = dlt.sources.incremental(
1446
+ incremental = dlt_incremental(
1411
1447
  incremental_key.strip(),
1412
1448
  initial_value=isotime(kwargs.get("interval_start")),
1413
1449
  end_value=isotime(kwargs.get("interval_end")),
@@ -1415,8 +1451,6 @@ class DynamoDBSource:
1415
1451
  range_start="closed",
1416
1452
  )
1417
1453
 
1418
- from ingestr.src.dynamodb import dynamodb
1419
-
1420
1454
  # bug: we never validate table.
1421
1455
  return dynamodb(table, creds, incremental)
1422
1456
 
@@ -1517,6 +1551,12 @@ class GitHubSource:
1517
1551
 
1518
1552
  access_token = source_fields.get("access_token", [""])[0]
1519
1553
 
1554
+ from ingestr.src.github import (
1555
+ github_reactions,
1556
+ github_repo_events,
1557
+ github_stargazers,
1558
+ )
1559
+
1520
1560
  if table in ["issues", "pull_requests"]:
1521
1561
  return github_reactions(
1522
1562
  owner=owner, name=repo, access_token=access_token
@@ -1667,6 +1707,8 @@ class GCSSource:
1667
1707
  "GCS Source only supports specific formats files: csv, jsonl, parquet"
1668
1708
  )
1669
1709
 
1710
+ from ingestr.src.filesystem import readers
1711
+
1670
1712
  return readers(bucket_url, fs, path_to_file).with_resources(endpoint)
1671
1713
 
1672
1714
 
@@ -1818,6 +1860,12 @@ class LinkedInAdsSource:
1818
1860
  "'date' or 'month' is required to connect to LinkedIn Ads, please provide at least one of these dimensions."
1819
1861
  )
1820
1862
 
1863
+ from ingestr.src.linkedin_ads import linked_in_ads_source
1864
+ from ingestr.src.linkedin_ads.dimension_time_enum import (
1865
+ Dimension,
1866
+ TimeGranularity,
1867
+ )
1868
+
1821
1869
  if "date" in dimensions:
1822
1870
  time_granularity = TimeGranularity.daily
1823
1871
  dimensions.remove("date")
@@ -1876,6 +1924,8 @@ class AppLovinSource:
1876
1924
  custom_report = table
1877
1925
  table = "custom_report"
1878
1926
 
1927
+ from ingestr.src.applovin import applovin_source
1928
+
1879
1929
  src = applovin_source(
1880
1930
  api_key[0],
1881
1931
  start_date.strftime("%Y-%m-%d"),
@@ -1943,6 +1993,8 @@ class ApplovinMaxSource:
1943
1993
 
1944
1994
  end_date = interval_end.date() if interval_end is not None else None
1945
1995
 
1996
+ from ingestr.src.applovin_max import applovin_max_source
1997
+
1946
1998
  return applovin_max_source(
1947
1999
  start_date=start_date,
1948
2000
  end_date=end_date,
@@ -2020,6 +2072,8 @@ class PersonioSource:
2020
2072
  ]:
2021
2073
  raise UnsupportedResourceError(table, "Personio")
2022
2074
 
2075
+ from ingestr.src.personio import personio_source
2076
+
2023
2077
  return personio_source(
2024
2078
  client_id=client_id[0],
2025
2079
  client_secret=client_secret[0],
@@ -2054,14 +2108,17 @@ class KinesisSource:
2054
2108
  if start_date is not None:
2055
2109
  # the resource will read all messages after this timestamp.
2056
2110
  start_date = ensure_pendulum_datetime(start_date)
2111
+
2112
+ from dlt.common.configuration.specs import AwsCredentials
2113
+
2114
+ from ingestr.src.kinesis import kinesis_stream
2115
+
2057
2116
  credentials = AwsCredentials(
2058
2117
  aws_access_key_id=aws_access_key_id[0],
2059
2118
  aws_secret_access_key=aws_secret_access_key[0],
2060
2119
  region_name=region_name[0],
2061
2120
  )
2062
2121
 
2063
- from ingestr.src.kinesis import kinesis_stream
2064
-
2065
2122
  return kinesis_stream(
2066
2123
  stream_name=table, credentials=credentials, initial_at_timestamp=start_date
2067
2124
  )
@@ -2109,6 +2166,9 @@ class FrankfurterSource:
2109
2166
  def dlt_source(self, uri: str, table: str, **kwargs):
2110
2167
  # start and end dates only assigned and validated for exchange_rates table
2111
2168
  # Note: if an end date but no start date is provided, start date and end date will be set to current date
2169
+ from ingestr.src.frankfurter import frankfurter_source
2170
+ from ingestr.src.frankfurter.helpers import validate_dates
2171
+
2112
2172
  if table == "exchange_rates":
2113
2173
  if kwargs.get("interval_start"):
2114
2174
  start_date = ensure_pendulum_datetime(str(kwargs.get("interval_start")))
@@ -1,13 +1,4 @@
1
1
  import os
2
- import platform
3
-
4
- import machineid
5
- import rudderstack.analytics as rudder_analytics # type: ignore
6
-
7
- from ingestr.src.version import __version__ # type: ignore
8
-
9
- rudder_analytics.write_key = "2cUr13DDQcX2x2kAfMEfdrKvrQa"
10
- rudder_analytics.dataPlaneUrl = "https://getbruinbumlky.dataplane.rudderstack.com"
11
2
 
12
3
 
13
4
  def track(event_name, event_properties: dict):
@@ -16,6 +7,16 @@ def track(event_name, event_properties: dict):
16
7
  ):
17
8
  return
18
9
 
10
+ import platform
11
+
12
+ import machineid
13
+ import rudderstack.analytics as rudder_analytics # type: ignore
14
+
15
+ from ingestr.src.version import __version__ # type: ignore
16
+
17
+ rudder_analytics.write_key = "2cUr13DDQcX2x2kAfMEfdrKvrQa"
18
+ rudder_analytics.dataPlaneUrl = "https://getbruinbumlky.dataplane.rudderstack.com"
19
+
19
20
  try:
20
21
  if not event_properties:
21
22
  event_properties = {}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.32
3
+ Version: 0.13.34
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -1,22 +1,22 @@
1
1
  ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
2
- ingestr/main.py,sha256=yPEn1FvfjCdWZe0PZqgKB79GQiwUy_qH3QelET13RCE,24870
2
+ ingestr/main.py,sha256=mRlGSqi2sHcZ2AKlwn5MqoMvFxXlSjcZxmPJr76rmRk,25187
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
4
  ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
5
- ingestr/src/buildinfo.py,sha256=mvCtlkSChRqpUCkS3LXuEwmkyyBr5oK4zK-YUERhKf8,21
6
- ingestr/src/destinations.py,sha256=_jRB_0rc-HNb5uvy30ZuFSm_NpK_8I9OJHmwbt0jxCM,12057
5
+ ingestr/src/buildinfo.py,sha256=i3Tz80qXUH6VzMC8jzlySZd05zyoaaBcvoyLd2q-wKg,21
6
+ ingestr/src/destinations.py,sha256=0fEwLY78SQDXbHcX4iz4Xc7H8FXN-QhVJL9uoUTZOs4,12924
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
8
  ingestr/src/factory.py,sha256=659h_sVRBhtPv2dvtOK8tf3PtUhlK3KsWLrb20_iQKw,5333
9
- ingestr/src/filters.py,sha256=5LNpBgm8FJXdrFHGyM7dLVyphKykSpPk7yuQAZ8GML4,1133
9
+ ingestr/src/filters.py,sha256=C-_TIVkF_cxZBgG-Run2Oyn0TAhJgA8IWXZ-OPY3uek,1136
10
10
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
11
- ingestr/src/partition.py,sha256=E0WHqh1FTheQAIVK_-jWUx0dgyYZCD1VxlAm362gao4,964
11
+ ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
12
12
  ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
13
- ingestr/src/sources.py,sha256=gWWYlj3cyscernMrCePDR8Q_zJF5wXFwGM6x0LVRbV4,74921
13
+ ingestr/src/sources.py,sha256=uRERygJ41y0MNXF3-FJvHr4btxlEM93ZeWr_Liz3N2M,76181
14
14
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
15
15
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
16
16
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
17
17
  ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
18
18
  ingestr/src/adjust/adjust_helpers.py,sha256=IHSS94A7enOWkZ8cP5iW3RdYt0Xl3qZGAmDc1Xy4qkI,3802
19
- ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
19
+ ingestr/src/airtable/__init__.py,sha256=mdzeaq0g12HR8gbhtVR_aS_5GVWPZn6XD-zHUE5FunI,2788
20
20
  ingestr/src/applovin/__init__.py,sha256=X_YCLppPrnL8KXfYWICE_uDfMzHHH3JZ-DBGZ1RlaOI,6984
21
21
  ingestr/src/applovin_max/__init__.py,sha256=ZrxOUSirGxkGDmM9wsQO3anwNVzqtoCwN_OuCXfPkXE,3285
22
22
  ingestr/src/appsflyer/__init__.py,sha256=QoK-B3cYYMD3bqzQaLWNH6FkJyjRbzRkBF2n6urxubs,8071
@@ -62,14 +62,14 @@ ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9
62
62
  ingestr/src/google_sheets/helpers/data_processing.py,sha256=RNt2MYfdJhk4bRahnQVezpNg2x9z0vx60YFq2ukZ8vI,11004
63
63
  ingestr/src/gorgias/__init__.py,sha256=_mFkMYwlY5OKEY0o_FK1OKol03A-8uk7bm1cKlmt5cs,21432
64
64
  ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
65
- ingestr/src/hubspot/__init__.py,sha256=QheZb_F2TEBdzd29SEgkU3AMdIIF7Gpj-t27EXnSIZ4,11448
66
- ingestr/src/hubspot/helpers.py,sha256=4aVOSzIsQV3RemqRJEJLu7BWMdcOuubwNvrjuMu87rg,8045
65
+ ingestr/src/hubspot/__init__.py,sha256=wqHefhc_YRI5dNFCcpvH-UUilNThE49sbGouSBiHYsw,11776
66
+ ingestr/src/hubspot/helpers.py,sha256=k2b-lhxqBNKHoOSHoHegFSsk8xxjjGA0I04V0XyX2b4,7883
67
67
  ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
68
68
  ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
69
69
  ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
70
70
  ingestr/src/kinesis/__init__.py,sha256=u5ThH1y8uObZKXgIo71em1UnX6MsVHWOjcf1jKqKbE8,6205
71
71
  ingestr/src/kinesis/helpers.py,sha256=aF0GCDKSectaaW8XPdERY_6bUs0ky19dcBs24ZFn-o0,2473
72
- ingestr/src/klaviyo/_init_.py,sha256=o_noUgbxLk36s4f9W56_ibPorF0n7kVapPUlV0p-jfA,7875
72
+ ingestr/src/klaviyo/__init__.py,sha256=o_noUgbxLk36s4f9W56_ibPorF0n7kVapPUlV0p-jfA,7875
73
73
  ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
74
74
  ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
75
75
  ingestr/src/linkedin_ads/__init__.py,sha256=CAPWFyV24loziiphbLmODxZUXZJwm4JxlFkr56q0jfo,1855
@@ -104,7 +104,7 @@ ingestr/src/sql_database/callbacks.py,sha256=sEFFmXxAURY3yeBjnawigDtq9LBCvi8HFqG
104
104
  ingestr/src/stripe_analytics/__init__.py,sha256=0HCL0qsrh_si1RR3a4k9XS94VWQ4v9aG7CqXF-V-57M,4593
105
105
  ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu69vAJoTxgEo,1976
106
106
  ingestr/src/stripe_analytics/settings.py,sha256=rl9L5XumxO0pjkZf7MGesXHp4QLRgnz3RWLuDWDBKXo,380
107
- ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
107
+ ingestr/src/telemetry/event.py,sha256=W7bs4uVfPakQ5otmiqgqu1l5SqjYx1p87wudnWXckBc,949
108
108
  ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
109
109
  ingestr/src/tiktok_ads/__init__.py,sha256=aEqCl3dTH6_d43s1jgAeG1UasEls_SlorORulYMwIL8,4590
110
110
  ingestr/src/tiktok_ads/tiktok_helpers.py,sha256=jmWHvZzN1Vt_PWrJkgq5a2wIwon-OBEzXoZx0jEy-74,3905
@@ -122,8 +122,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
122
122
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
123
123
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
124
124
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
125
- ingestr-0.13.32.dist-info/METADATA,sha256=RK2X2A37bphtuSAtyffInwt0ev8kKgx7VeZMDILrwsM,13574
126
- ingestr-0.13.32.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
127
- ingestr-0.13.32.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
128
- ingestr-0.13.32.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
129
- ingestr-0.13.32.dist-info/RECORD,,
125
+ ingestr-0.13.34.dist-info/METADATA,sha256=84NPfN9LSTGrw79p3116CXH9BZGjnXgEvglsXpVhEY0,13574
126
+ ingestr-0.13.34.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
127
+ ingestr-0.13.34.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
128
+ ingestr-0.13.34.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
129
+ ingestr-0.13.34.dist-info/RECORD,,
File without changes