ingestr 0.13.53__py3-none-any.whl → 0.13.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.53"
1
+ version = "v0.13.55"
@@ -476,7 +476,7 @@ class SqliteDestination(GenericSqlDestination):
476
476
 
477
477
  def dlt_run_params(self, uri: str, table: str, **kwargs):
478
478
  return {
479
- #https://dlthub.com/docs/dlt-ecosystem/destinations/sqlalchemy#dataset-files
479
+ # https://dlthub.com/docs/dlt-ecosystem/destinations/sqlalchemy#dataset-files
480
480
  "dataset_name": "main",
481
481
  "table_name": table,
482
482
  }
@@ -495,6 +495,3 @@ class MySqlDestination(GenericSqlDestination):
495
495
  "dataset_name": database,
496
496
  "table_name": table,
497
497
  }
498
-
499
-
500
-
@@ -116,6 +116,8 @@ def facebook_insights_source(
116
116
  batch_size: int = 50,
117
117
  request_timeout: int = 300,
118
118
  app_api_version: str = None,
119
+ start_date: pendulum.DateTime | None = None,
120
+ end_date: pendulum.DateTime | None = None,
119
121
  ) -> DltResource:
120
122
  """Incrementally loads insight reports with defined granularity level, fields, breakdowns etc.
121
123
 
@@ -148,27 +150,32 @@ def facebook_insights_source(
148
150
  account_id, access_token, request_timeout, app_api_version
149
151
  )
150
152
 
151
- # we load with a defined lag
152
- initial_load_start_date = pendulum.today().subtract(days=initial_load_past_days)
153
- initial_load_start_date_str = initial_load_start_date.isoformat()
153
+ if start_date is None:
154
+ start_date = pendulum.today().subtract(days=initial_load_past_days)
155
+
156
+ columns = {}
157
+ for field in fields:
158
+ if field in INSIGHT_FIELDS_TYPES:
159
+ columns[field] = INSIGHT_FIELDS_TYPES[field]
154
160
 
155
161
  @dlt.resource(
156
162
  primary_key=INSIGHTS_PRIMARY_KEY,
157
163
  write_disposition="merge",
158
- columns=INSIGHT_FIELDS_TYPES,
164
+ columns=columns,
159
165
  )
160
166
  def facebook_insights(
161
167
  date_start: dlt.sources.incremental[str] = dlt.sources.incremental(
162
168
  "date_start",
163
- initial_value=initial_load_start_date_str,
169
+ initial_value=start_date.isoformat(),
170
+ end_value=end_date.isoformat() if end_date else None,
164
171
  range_end="closed",
165
172
  range_start="closed",
173
+ lag=attribution_window_days_lag * 24 * 60 * 60, # Convert days to seconds
166
174
  ),
167
175
  ) -> Iterator[TDataItems]:
168
- start_date = get_start_date(date_start, attribution_window_days_lag)
176
+ start_date = get_start_date(date_start)
169
177
  end_date = pendulum.now()
170
178
 
171
- # fetch insights in incremental day steps
172
179
  while start_date <= end_date:
173
180
  query = {
174
181
  "level": level,
@@ -193,7 +200,10 @@ def facebook_insights_source(
193
200
  }
194
201
  ],
195
202
  }
196
- job = execute_job(account.get_insights(params=query, is_async=True))
203
+ job = execute_job(
204
+ account.get_insights(params=query, is_async=True),
205
+ insights_max_async_sleep_seconds=10,
206
+ )
197
207
  yield list(map(process_report_item, job.get_result()))
198
208
  start_date = start_date.add(days=time_increment_days)
199
209
 
@@ -31,14 +31,13 @@ from .settings import (
31
31
 
32
32
  def get_start_date(
33
33
  incremental_start_date: dlt.sources.incremental[str],
34
- attribution_window_days_lag: int = 7,
35
34
  ) -> pendulum.DateTime:
36
35
  """
37
36
  Get the start date for incremental loading of Facebook Insights data.
38
37
  """
39
38
  start_date: pendulum.DateTime = ensure_pendulum_datetime(
40
39
  incremental_start_date.start_value
41
- ).subtract(days=attribution_window_days_lag)
40
+ )
42
41
 
43
42
  # facebook forgets insights so trim the lag and warn
44
43
  min_start_date = pendulum.today().subtract(
@@ -65,7 +64,6 @@ def process_report_item(item: AbstractObject) -> DictStrAny:
65
64
  for pki in INSIGHTS_PRIMARY_KEY:
66
65
  if pki not in d:
67
66
  d[pki] = "no_" + pki
68
-
69
67
  return d
70
68
 
71
69
 
@@ -138,7 +136,7 @@ def execute_job(
138
136
  ) -> AbstractCrudObject:
139
137
  status: str = None
140
138
  time_start = time.time()
141
- sleep_time = 10
139
+ sleep_time = 3
142
140
  while status != "Job Completed":
143
141
  duration = time.time() - time_start
144
142
  job = job.api_get()
@@ -112,6 +112,8 @@ DEFAULT_INSIGHT_FIELDS = (
112
112
  "social_spend",
113
113
  "spend",
114
114
  "website_ctr",
115
+ "conversions",
116
+ "video_thruplay_watched_actions",
115
117
  )
116
118
 
117
119
  TInsightsLevels = Literal["account", "campaign", "adset", "ad"]
@@ -0,0 +1,39 @@
1
+ from typing import Dict
2
+
3
+ import dlt
4
+ from dlt.common.configuration.inject import with_config
5
+ from dlt.sources.helpers import requests
6
+
7
+
8
+ @with_config(sections=("sources", "facebook_ads"))
9
+ def debug_access_token(
10
+ access_token: str = dlt.secrets.value,
11
+ client_id: str = dlt.secrets.value,
12
+ client_secret: str = dlt.secrets.value,
13
+ ) -> str:
14
+ """Debugs the `access_token` providing info on expiration time, scopes etc. If arguments are not provides, `dlt` will inject them from configuration"""
15
+ debug_url = f"https://graph.facebook.com/debug_token?input_token={access_token}&access_token={client_id}|{client_secret}"
16
+ response = requests.get(debug_url)
17
+ data: Dict[str, str] = response.json()
18
+
19
+ if "error" in data:
20
+ raise Exception(f"Error debugging token: {data['error']}")
21
+
22
+ return data["data"]
23
+
24
+
25
+ @with_config(sections=("sources", "facebook_ads"))
26
+ def get_long_lived_token(
27
+ access_token: str = dlt.secrets.value,
28
+ client_id: str = dlt.secrets.value,
29
+ client_secret: str = dlt.secrets.value,
30
+ ) -> str:
31
+ """Gets the long lived access token (60 days) from `access_token`. If arguments are not provides, `dlt` will inject them from configuration"""
32
+ exchange_url = f"https://graph.facebook.com/v13.0/oauth/access_token?grant_type=fb_exchange_token&client_id={client_id}&client_secret={client_secret}&fb_exchange_token={access_token}"
33
+ response = requests.get(exchange_url)
34
+ data: Dict[str, str] = response.json()
35
+
36
+ if "error" in data:
37
+ raise Exception(f"Error refreshing token: {data['error']}")
38
+
39
+ return data["access_token"]
ingestr/src/factory.py CHANGED
@@ -47,11 +47,13 @@ from ingestr.src.sources import (
47
47
  KlaviyoSource,
48
48
  LinkedInAdsSource,
49
49
  LocalCsvSource,
50
+ MixpanelSource,
50
51
  MongoDbSource,
51
52
  NotionSource,
52
53
  PersonioSource,
53
54
  PhantombusterSource,
54
55
  PipedriveSource,
56
+ QuickBooksSource,
55
57
  S3Source,
56
58
  SalesforceSource,
57
59
  SFTPSource,
@@ -140,6 +142,7 @@ class SourceDestinationFactory:
140
142
  "hubspot": HubspotSource,
141
143
  "airtable": AirtableSource,
142
144
  "klaviyo": KlaviyoSource,
145
+ "mixpanel": MixpanelSource,
143
146
  "appsflyer": AppsflyerSource,
144
147
  "kafka": KafkaSource,
145
148
  "adjust": AdjustSource,
@@ -166,6 +169,7 @@ class SourceDestinationFactory:
166
169
  "elasticsearch": ElasticsearchSource,
167
170
  "attio": AttioSource,
168
171
  "solidgate": SolidgateSource,
172
+ "quickbooks": QuickBooksSource,
169
173
  "smartsheet": SmartsheetSource,
170
174
  "sftp": SFTPSource,
171
175
  }
@@ -0,0 +1,62 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import pendulum
5
+ from dlt.common.typing import TDataItem
6
+ from dlt.sources import DltResource
7
+
8
+ from .client import MixpanelClient
9
+
10
+
11
+ @dlt.source(max_table_nesting=0)
12
+ def mixpanel_source(
13
+ username: str,
14
+ password: str,
15
+ project_id: str,
16
+ server: str,
17
+ start_date: pendulum.DateTime,
18
+ end_date: pendulum.DateTime | None = None,
19
+ ) -> Iterable[DltResource]:
20
+ client = MixpanelClient(username, password, project_id, server)
21
+
22
+ @dlt.resource(write_disposition="merge", name="events", primary_key="distinct_id")
23
+ def events(
24
+ date=dlt.sources.incremental(
25
+ "time",
26
+ initial_value=start_date.int_timestamp,
27
+ end_value=end_date.int_timestamp if end_date else None,
28
+ range_end="closed",
29
+ range_start="closed",
30
+ ),
31
+ ) -> Iterable[TDataItem]:
32
+ if date.end_value is None:
33
+ end_dt = pendulum.now(tz="UTC")
34
+ else:
35
+ end_dt = pendulum.from_timestamp(date.end_value)
36
+
37
+ start_dt = pendulum.from_timestamp(date.last_value)
38
+
39
+ yield from client.fetch_events(
40
+ start_dt,
41
+ end_dt,
42
+ )
43
+
44
+ @dlt.resource(write_disposition="merge", primary_key="distinct_id", name="profiles")
45
+ def profiles(
46
+ last_seen=dlt.sources.incremental(
47
+ "last_seen",
48
+ initial_value=start_date,
49
+ end_value=end_date,
50
+ range_end="closed",
51
+ range_start="closed",
52
+ ),
53
+ ) -> Iterable[TDataItem]:
54
+ if last_seen.end_value is None:
55
+ end_dt = pendulum.now(tz="UTC")
56
+ else:
57
+ end_dt = last_seen.end_value
58
+
59
+ start_dt = last_seen.last_value
60
+ yield from client.fetch_profiles(start_dt, end_dt)
61
+
62
+ return events, profiles
@@ -0,0 +1,99 @@
1
+ import json
2
+ from typing import Iterable
3
+
4
+ import pendulum
5
+ from dlt.sources.helpers.requests import Client
6
+
7
+
8
+ class MixpanelClient:
9
+ def __init__(self, username: str, password: str, project_id: str, server: str):
10
+ self.username = username
11
+ self.password = password
12
+ self.project_id = project_id
13
+ self.server = server
14
+ self.session = Client(raise_for_status=False).session
15
+
16
+ def fetch_events(
17
+ self, start_date: pendulum.DateTime, end_date: pendulum.DateTime
18
+ ) -> Iterable[dict]:
19
+ if self.server == "us":
20
+ server = "data"
21
+ elif self.server == "in":
22
+ server = "data-in"
23
+ else:
24
+ server = "data-eu"
25
+
26
+ url = f"https://{server}.mixpanel.com/api/2.0/export/"
27
+ params = {
28
+ "project_id": self.project_id,
29
+ "from_date": start_date.format("YYYY-MM-DD"),
30
+ "to_date": end_date.format("YYYY-MM-DD"),
31
+ }
32
+ headers = {
33
+ "accept": "text/plain",
34
+ }
35
+ from requests.auth import HTTPBasicAuth
36
+
37
+ auth = HTTPBasicAuth(self.username, self.password)
38
+ resp = self.session.get(url, params=params, headers=headers, auth=auth)
39
+ resp.raise_for_status()
40
+ for line in resp.iter_lines():
41
+ if line:
42
+ data = json.loads(line.decode())
43
+ if "properties" in data:
44
+ for key, value in data["properties"].items():
45
+ if key.startswith("$"):
46
+ data[key[1:]] = value
47
+ else:
48
+ data[key] = value
49
+ del data["properties"]
50
+ yield data
51
+
52
+ def fetch_profiles(
53
+ self, start_date: pendulum.DateTime, end_date: pendulum.DateTime
54
+ ) -> Iterable[dict]:
55
+ if self.server == "us":
56
+ server = ""
57
+ elif self.server == "in":
58
+ server = "in."
59
+ else:
60
+ server = "eu."
61
+ url = f"https://{server}mixpanel.com/api/query/engage"
62
+ headers = {
63
+ "accept": "application/json",
64
+ "content-type": "application/x-www-form-urlencoded",
65
+ }
66
+ from requests.auth import HTTPBasicAuth
67
+
68
+ auth = HTTPBasicAuth(self.username, self.password)
69
+ page = 0
70
+ session_id = None
71
+ while True:
72
+ params = {"project_id": self.project_id, "page": str(page)}
73
+ if session_id:
74
+ params["session_id"] = session_id
75
+ start_str = start_date.format("YYYY-MM-DDTHH:mm:ss")
76
+ end_str = end_date.format("YYYY-MM-DDTHH:mm:ss")
77
+ where = f'properties["$last_seen"] >= "{start_str}" and properties["$last_seen"] <= "{end_str}"'
78
+ params["where"] = where
79
+ resp = self.session.post(url, params=params, headers=headers, auth=auth)
80
+
81
+ resp.raise_for_status()
82
+ data = resp.json()
83
+
84
+ for result in data.get("results", []):
85
+ for key, value in result["$properties"].items():
86
+ if key.startswith("$"):
87
+ if key == "$last_seen":
88
+ result["last_seen"] = pendulum.parse(value)
89
+ else:
90
+ result[key[1:]] = value
91
+ result["distinct_id"] = result["$distinct_id"]
92
+ del result["$properties"]
93
+ del result["$distinct_id"]
94
+ yield result
95
+ if not data.get("results"):
96
+ break
97
+ session_id = data.get("session_id", session_id)
98
+
99
+ page += 1
@@ -0,0 +1,117 @@
1
+ """QuickBooks source built on top of python-quickbooks."""
2
+
3
+ from typing import Iterable, Iterator, List, Optional
4
+
5
+ import dlt
6
+ import pendulum
7
+ from dlt.common.time import ensure_pendulum_datetime
8
+ from dlt.common.typing import TDataItem
9
+ from dlt.sources import DltResource
10
+ from intuitlib.client import AuthClient # type: ignore
11
+
12
+ from quickbooks import QuickBooks # type: ignore
13
+
14
+
15
+ @dlt.source(name="quickbooks", max_table_nesting=0)
16
+ def quickbooks_source(
17
+ company_id: str,
18
+ start_date: pendulum.DateTime,
19
+ object: str,
20
+ end_date: pendulum.DateTime | None,
21
+ client_id: str,
22
+ client_secret: str,
23
+ refresh_token: str,
24
+ environment: str = "production",
25
+ minor_version: Optional[str] = None,
26
+ ) -> Iterable[DltResource]:
27
+ """Create dlt resources for QuickBooks objects.
28
+
29
+ Parameters
30
+ ----------
31
+ company_id: str
32
+ QuickBooks company id (realm id).
33
+ client_id: str
34
+ OAuth client id.
35
+ client_secret: str
36
+ OAuth client secret.
37
+ refresh_token: str
38
+ OAuth refresh token.
39
+ access_token: Optional[str]
40
+ Optional access token. If not provided the library will refresh using the
41
+ provided refresh token.
42
+ environment: str
43
+ Either ``"production"`` or ``"sandbox"``.
44
+ minor_version: Optional[int]
45
+ QuickBooks API minor version if needed.
46
+ """
47
+
48
+ auth_client = AuthClient(
49
+ client_id=client_id,
50
+ client_secret=client_secret,
51
+ environment=environment,
52
+ # redirect_uri is not used since we authenticate using refresh token which skips the step of redirect callback.
53
+ # as redirect_uri is required param, we are passing empty string.
54
+ redirect_uri="",
55
+ )
56
+
57
+ # https://help.developer.intuit.com/s/article/Validity-of-Refresh-Token
58
+ client = QuickBooks(
59
+ auth_client=auth_client,
60
+ refresh_token=refresh_token,
61
+ company_id=company_id,
62
+ minorversion=minor_version,
63
+ )
64
+
65
+ def fetch_object(
66
+ obj_name: str,
67
+ updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
68
+ "lastupdatedtime",
69
+ initial_value=start_date, # type: ignore
70
+ end_value=end_date, # type: ignore
71
+ range_start="closed",
72
+ range_end="closed",
73
+ allow_external_schedulers=True,
74
+ ),
75
+ ) -> Iterator[List[TDataItem]]:
76
+ start_pos = 1
77
+
78
+ end_dt = updated_at.end_value or pendulum.now(tz="UTC")
79
+ start_dt = ensure_pendulum_datetime(str(updated_at.last_value)).in_tz("UTC")
80
+
81
+ start_str = start_dt.isoformat()
82
+ end_str = end_dt.isoformat()
83
+
84
+ where_clause = f"WHERE MetaData.LastUpdatedTime >= '{start_str}' AND MetaData.LastUpdatedTime < '{end_str}'"
85
+ while True:
86
+ query = (
87
+ f"SELECT * FROM {obj_name} {where_clause} "
88
+ f"ORDERBY MetaData.LastUpdatedTime ASC STARTPOSITION {start_pos} MAXRESULTS 1000"
89
+ )
90
+
91
+ result = client.query(query)
92
+
93
+ items = result.get("QueryResponse", {}).get(obj_name.capitalize(), [])
94
+ if not items:
95
+ break
96
+
97
+ for item in items:
98
+ if item.get("MetaData") and item["MetaData"].get("LastUpdatedTime"):
99
+ item["lastupdatedtime"] = ensure_pendulum_datetime(
100
+ item["MetaData"]["LastUpdatedTime"]
101
+ )
102
+ item["id"] = item["Id"]
103
+ del item["Id"]
104
+
105
+ yield item
106
+
107
+ if len(items) < 1000:
108
+ break
109
+
110
+ start_pos += 1000
111
+
112
+ yield dlt.resource(
113
+ fetch_object,
114
+ name=object.lower(),
115
+ write_disposition="merge",
116
+ primary_key="id",
117
+ )(object)
ingestr/src/sources.py CHANGED
@@ -79,7 +79,7 @@ class SqlSource:
79
79
  # clickhouse://<username>:<password>@<host>:<port>?secure=<secure>
80
80
  if uri.startswith("clickhouse://"):
81
81
  parsed_uri = urlparse(uri)
82
-
82
+
83
83
  query_params = parse_qs(parsed_uri.query)
84
84
 
85
85
  if "http_port" in query_params:
@@ -691,8 +691,6 @@ class StripeAnalyticsSource:
691
691
  endpoint,
692
692
  ],
693
693
  stripe_secret_key=api_key[0],
694
- start_date=kwargs.get("interval_start", None),
695
- end_date=kwargs.get("interval_end", None),
696
694
  ).with_resources(endpoint)
697
695
 
698
696
  elif table in INCREMENTAL_ENDPOINTS:
@@ -749,11 +747,64 @@ class FacebookAdsSource:
749
747
  endpoint = None
750
748
  if table in ["campaigns", "ad_sets", "ad_creatives", "ads", "leads"]:
751
749
  endpoint = table
752
- elif table in "facebook_insights":
750
+ elif table == "facebook_insights":
753
751
  return facebook_insights_source(
754
752
  access_token=access_token[0],
755
753
  account_id=account_id[0],
754
+ start_date=kwargs.get("interval_start"),
755
+ end_date=kwargs.get("interval_end"),
756
756
  ).with_resources("facebook_insights")
757
+ elif table.startswith("facebook_insights:"):
758
+ # Parse custom breakdowns and metrics from table name
759
+ # Supported formats:
760
+ # facebook_insights:breakdown_type
761
+ # facebook_insights:breakdown_type:metric1,metric2...
762
+ parts = table.split(":")
763
+
764
+ if len(parts) < 2 or len(parts) > 3:
765
+ raise ValueError(
766
+ "Invalid facebook_insights format. Expected: facebook_insights:breakdown_type or facebook_insights:breakdown_type:metric1,metric2..."
767
+ )
768
+
769
+ breakdown_type = parts[1].strip()
770
+ if not breakdown_type:
771
+ raise ValueError(
772
+ "Breakdown type must be provided in format: facebook_insights:breakdown_type"
773
+ )
774
+
775
+ # Validate breakdown type against available options from settings
776
+ import typing
777
+
778
+ from ingestr.src.facebook_ads.settings import TInsightsBreakdownOptions
779
+
780
+ # Get valid breakdown options from the type definition
781
+ valid_breakdowns = list(typing.get_args(TInsightsBreakdownOptions))
782
+
783
+ if breakdown_type not in valid_breakdowns:
784
+ raise ValueError(
785
+ f"Invalid breakdown type '{breakdown_type}'. Valid options: {', '.join(valid_breakdowns)}"
786
+ )
787
+
788
+ source_kwargs = {
789
+ "access_token": access_token[0],
790
+ "account_id": account_id[0],
791
+ "start_date": kwargs.get("interval_start"),
792
+ "end_date": kwargs.get("interval_end"),
793
+ "breakdowns": breakdown_type,
794
+ }
795
+
796
+ # If custom metrics are provided, parse them
797
+ if len(parts) == 3:
798
+ fields = [f.strip() for f in parts[2].split(",") if f.strip()]
799
+ if not fields:
800
+ raise ValueError(
801
+ "Custom metrics must be provided after the second colon in format: facebook_insights:breakdown_type:metric1,metric2..."
802
+ )
803
+ source_kwargs["fields"] = fields
804
+
805
+ return facebook_insights_source(**source_kwargs).with_resources(
806
+ "facebook_insights"
807
+ )
757
808
  else:
758
809
  raise ValueError(
759
810
  f"Resource '{table}' is not supported for Facebook Ads source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
@@ -965,6 +1016,57 @@ class KlaviyoSource:
965
1016
  ).with_resources(resource)
966
1017
 
967
1018
 
1019
+ class MixpanelSource:
1020
+ def handles_incrementality(self) -> bool:
1021
+ return True
1022
+
1023
+ def dlt_source(self, uri: str, table: str, **kwargs):
1024
+ if kwargs.get("incremental_key"):
1025
+ raise ValueError(
1026
+ "Mixpanel takes care of incrementality on its own, you should not provide incremental_key"
1027
+ )
1028
+
1029
+ parsed = urlparse(uri)
1030
+ params = parse_qs(parsed.query)
1031
+ username = params.get("username")
1032
+ password = params.get("password")
1033
+ project_id = params.get("project_id")
1034
+ server = params.get("server", ["eu"])
1035
+
1036
+ if not username or not password or not project_id:
1037
+ raise ValueError(
1038
+ "username, password, project_id are required to connect to Mixpanel"
1039
+ )
1040
+
1041
+ if table not in ["events", "profiles"]:
1042
+ raise ValueError(
1043
+ f"Resource '{table}' is not supported for Mixpanel source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
1044
+ )
1045
+
1046
+ start_date = kwargs.get("interval_start")
1047
+ if start_date:
1048
+ start_date = ensure_pendulum_datetime(start_date).in_timezone("UTC")
1049
+ else:
1050
+ start_date = pendulum.datetime(2020, 1, 1).in_timezone("UTC")
1051
+
1052
+ end_date = kwargs.get("interval_end")
1053
+ if end_date:
1054
+ end_date = ensure_pendulum_datetime(end_date).in_timezone("UTC")
1055
+ else:
1056
+ end_date = pendulum.now().in_timezone("UTC")
1057
+
1058
+ from ingestr.src.mixpanel import mixpanel_source
1059
+
1060
+ return mixpanel_source(
1061
+ username=username[0],
1062
+ password=password[0],
1063
+ project_id=project_id[0],
1064
+ start_date=start_date,
1065
+ end_date=end_date,
1066
+ server=server[0],
1067
+ ).with_resources(table)
1068
+
1069
+
968
1070
  class KafkaSource:
969
1071
  def handles_incrementality(self) -> bool:
970
1072
  return False
@@ -2536,3 +2638,73 @@ class SFTPSource:
2536
2638
 
2537
2639
  dlt_source_resource = readers(bucket_url, fs, file_glob)
2538
2640
  return dlt_source_resource.with_resources(endpoint)
2641
+
2642
+
2643
+ class QuickBooksSource:
2644
+ def handles_incrementality(self) -> bool:
2645
+ return True
2646
+
2647
+ # quickbooks://?company_id=<company_id>&client_id=<client_id>&client_secret=<client_secret>&refresh_token=<refresh>&access_token=<access_token>&environment=<env>&minor_version=<version>
2648
+ def dlt_source(self, uri: str, table: str, **kwargs):
2649
+ parsed_uri = urlparse(uri)
2650
+
2651
+ params = parse_qs(parsed_uri.query)
2652
+ company_id = params.get("company_id")
2653
+ client_id = params.get("client_id")
2654
+ client_secret = params.get("client_secret")
2655
+ refresh_token = params.get("refresh_token")
2656
+ environment = params.get("environment", ["production"])
2657
+ minor_version = params.get("minor_version", [None])
2658
+
2659
+ if not client_id or not client_id[0].strip():
2660
+ raise MissingValueError("client_id", "QuickBooks")
2661
+
2662
+ if not client_secret or not client_secret[0].strip():
2663
+ raise MissingValueError("client_secret", "QuickBooks")
2664
+
2665
+ if not refresh_token or not refresh_token[0].strip():
2666
+ raise MissingValueError("refresh_token", "QuickBooks")
2667
+
2668
+ if not company_id or not company_id[0].strip():
2669
+ raise MissingValueError("company_id", "QuickBooks")
2670
+
2671
+ if environment[0] not in ["production", "sandbox"]:
2672
+ raise ValueError(
2673
+ "Invalid environment. Must be either 'production' or 'sandbox'."
2674
+ )
2675
+
2676
+ from ingestr.src.quickbooks import quickbooks_source
2677
+
2678
+ table_name = table.replace(" ", "")
2679
+ table_mapping = {
2680
+ "customers": "customer",
2681
+ "invoices": "invoice",
2682
+ "accounts": "account",
2683
+ "vendors": "vendor",
2684
+ "payments": "payment",
2685
+ }
2686
+ if table_name in table_mapping:
2687
+ table_name = table_mapping[table_name]
2688
+
2689
+ start_date = kwargs.get("interval_start")
2690
+ if start_date is None:
2691
+ start_date = ensure_pendulum_datetime("2025-01-01").in_tz("UTC")
2692
+ else:
2693
+ start_date = ensure_pendulum_datetime(start_date).in_tz("UTC")
2694
+
2695
+ end_date = kwargs.get("interval_end")
2696
+
2697
+ if end_date is not None:
2698
+ end_date = ensure_pendulum_datetime(end_date).in_tz("UTC")
2699
+
2700
+ return quickbooks_source(
2701
+ company_id=company_id[0],
2702
+ start_date=start_date,
2703
+ end_date=end_date,
2704
+ client_id=client_id[0],
2705
+ client_secret=client_secret[0],
2706
+ refresh_token=refresh_token[0],
2707
+ environment=environment[0],
2708
+ minor_version=minor_version[0],
2709
+ object=table_name,
2710
+ ).with_resources(table_name)
@@ -85,12 +85,14 @@ def incremental_stripe_source(
85
85
  created: Optional[Any] = dlt.sources.incremental(
86
86
  "created",
87
87
  initial_value=start_date_unix,
88
+ end_value=transform_date(end_date) if end_date is not None else None,
88
89
  range_end="closed",
89
90
  range_start="closed",
90
91
  ),
91
92
  ) -> Generator[Dict[Any, Any], Any, None]:
92
- start_value = created.last_value
93
- yield from pagination(endpoint, start_date=start_value, end_date=end_date)
93
+ yield from pagination(
94
+ endpoint, start_date=created.last_value, end_date=created.end_value
95
+ )
94
96
 
95
97
  for endpoint in endpoints:
96
98
  yield dlt.resource(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.53
3
+ Version: 0.13.55
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -52,14 +52,17 @@ Requires-Dist: dlt==1.10.0
52
52
  Requires-Dist: dnspython==2.7.0
53
53
  Requires-Dist: duckdb-engine==0.17.0
54
54
  Requires-Dist: duckdb==1.2.1
55
+ Requires-Dist: ecdsa==0.19.1
55
56
  Requires-Dist: elastic-transport==8.17.1
56
57
  Requires-Dist: elasticsearch==8.10.1
58
+ Requires-Dist: enum-compat==0.0.3
57
59
  Requires-Dist: et-xmlfile==2.0.0
58
60
  Requires-Dist: facebook-business==20.0.0
59
61
  Requires-Dist: filelock==3.17.0
60
62
  Requires-Dist: flatten-json==0.1.14
61
63
  Requires-Dist: frozenlist==1.5.0
62
64
  Requires-Dist: fsspec==2025.3.2
65
+ Requires-Dist: future==1.0.0
63
66
  Requires-Dist: gcsfs==2025.3.2
64
67
  Requires-Dist: geojson==3.2.0
65
68
  Requires-Dist: gitdb==4.0.12
@@ -93,6 +96,7 @@ Requires-Dist: ibm-db-sa==0.4.1
93
96
  Requires-Dist: ibm-db==3.2.6
94
97
  Requires-Dist: idna==3.10
95
98
  Requires-Dist: inflection==0.5.1
99
+ Requires-Dist: intuit-oauth==1.2.4
96
100
  Requires-Dist: isodate==0.7.2
97
101
  Requires-Dist: jmespath==1.0.1
98
102
  Requires-Dist: jsonpath-ng==1.7.0
@@ -147,8 +151,11 @@ Requires-Dist: pyparsing==3.2.1
147
151
  Requires-Dist: pyrate-limiter==3.7.0
148
152
  Requires-Dist: python-dateutil==2.9.0.post0
149
153
  Requires-Dist: python-dotenv==1.0.1
154
+ Requires-Dist: python-jose==3.5.0
155
+ Requires-Dist: python-quickbooks==0.9.2
150
156
  Requires-Dist: pytz==2025.1
151
157
  Requires-Dist: pyyaml==6.0.2
158
+ Requires-Dist: rauth==0.7.3
152
159
  Requires-Dist: redshift-connector==2.1.5
153
160
  Requires-Dist: requests-file==2.1.0
154
161
  Requires-Dist: requests-oauthlib==1.3.1
@@ -2,16 +2,16 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
2
2
  ingestr/main.py,sha256=GkC1hdq8AVGrvolc95zMfjmibI95p2pmFkbgCOVf-Og,26311
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
4
  ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
5
- ingestr/src/buildinfo.py,sha256=TRZbcB7mFeJZcDA_BqTjZvRtxs0161pLvM9EmkNt90U,21
6
- ingestr/src/destinations.py,sha256=wYE6p_DC9HrQ5KehhgbLxgnkS1P9wE9L21Hw_lgAZ70,16884
5
+ ingestr/src/buildinfo.py,sha256=bdi0-mZhnHheYgs6WuEb8p-RIk_RFAXRCF9HalRfV0k,21
6
+ ingestr/src/destinations.py,sha256=TcxM2rcwHfgMMP6U0yRNcfWKlEzkBbZbqCIDww7lkTY,16882
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
- ingestr/src/factory.py,sha256=wDa3F7xbnQcQwfnQIbxWypuJ1G8GGhRpdssWRPtEO_Q,6020
8
+ ingestr/src/factory.py,sha256=mcjgbmrZr6TvP9fCMQxo-aMGcrb2PqToRcSLp5nldww,6138
9
9
  ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
10
10
  ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
11
11
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
12
12
  ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
13
13
  ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
14
- ingestr/src/sources.py,sha256=sEi-09LXySuhHqUpscHlfQTmQ_7Bgq1GDY_y5mma-sg,87437
14
+ ingestr/src/sources.py,sha256=3ozLt9lhhNANspfjA2vb8u6qjgBJezH8QBV1XKqT1fg,94124
15
15
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
16
16
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
17
17
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -39,10 +39,11 @@ ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k
39
39
  ingestr/src/collector/spinner.py,sha256=_ZUqF5MI43hVIULdjF5s5mrAZbhEFXaiWirQmrv3Yk4,1201
40
40
  ingestr/src/dynamodb/__init__.py,sha256=swhxkeYBbJ35jn1IghCtvYWT2BM33KynVCh_oR4z28A,2264
41
41
  ingestr/src/elasticsearch/__init__.py,sha256=m-q93HgUmTwGDUwHOjHawstWL06TC3WIX3H05szybrY,2556
42
- ingestr/src/facebook_ads/__init__.py,sha256=reEpSr4BaKA1wO3qVgCH51gW-TgWkbJ_g24UIhJWbac,9286
42
+ ingestr/src/facebook_ads/__init__.py,sha256=a1A5fO1r_FotoH9UET42tamqo_-ftCm9vBrkm5lpjG0,9579
43
43
  ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
44
- ingestr/src/facebook_ads/helpers.py,sha256=ZLbNHiKer5lPb4g3_435XeBJr57Wv0o1KTyBA1mQ100,9068
45
- ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_tl6UTFk,4881
44
+ ingestr/src/facebook_ads/helpers.py,sha256=EYqOAPUlhVNxwzjP_CUGjJvAXTq65nJC-v75BfyJKmg,8981
45
+ ingestr/src/facebook_ads/settings.py,sha256=Bsic8RcmH-NfEZ7r_NGospTCmwISK9XaMT5y2NZirtg,4938
46
+ ingestr/src/facebook_ads/utils.py,sha256=ES2ylPoW3j3fjp6OMUgp21n1cG1OktXsmWWMk5vBW_I,1590
46
47
  ingestr/src/filesystem/__init__.py,sha256=zkIwbRr0ir0EUdniI25p2zGiVc-7M9EmR351AjNb0eA,4163
47
48
  ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
48
49
  ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
@@ -82,6 +83,8 @@ ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYH
82
83
  ingestr/src/linkedin_ads/__init__.py,sha256=CAPWFyV24loziiphbLmODxZUXZJwm4JxlFkr56q0jfo,1855
83
84
  ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffWJxmAadZMbpTvf0xkQc,198
84
85
  ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnbLfxzds,4498
86
+ ingestr/src/mixpanel/__init__.py,sha256=s1QtqMP0BTGW6YtdCabJFWj7lEn7KujzELwGpBOQgfs,1796
87
+ ingestr/src/mixpanel/client.py,sha256=c_reouegOVYBOwHLfgYFwpmkba0Sxro1Zkml07NCYf0,3602
85
88
  ingestr/src/mongodb/__init__.py,sha256=T-RYPS_skl_2gNVfYWWXan2bVQYmm0bFBcCCqG5ejvg,7275
86
89
  ingestr/src/mongodb/helpers.py,sha256=H0GpOK3bPBhFWBEhJZOjywUBdzih6MOpmyVO_cKSN14,24178
87
90
  ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
@@ -99,6 +102,7 @@ ingestr/src/pipedrive/typing.py,sha256=lEMXu4hhAA3XkhVSlBUa-juqyupisd3c-qSQKxFvz
99
102
  ingestr/src/pipedrive/helpers/__init__.py,sha256=UX1K_qnGXB0ShtnBOfp2XuVbK8RRoCK8TdEmIjRckgg,710
100
103
  ingestr/src/pipedrive/helpers/custom_fields_munger.py,sha256=rZ4AjdITHfJE2NNomCR7vMBS1KnWpEGVF6fADwsIHUE,4488
101
104
  ingestr/src/pipedrive/helpers/pages.py,sha256=Klpjw2OnMuhzit3PpiHKsfzGcJ3rQPSQBl3HhE3-6eA,3358
105
+ ingestr/src/quickbooks/__init__.py,sha256=cZUuVCOTGPHTscRj6i0DytO63_fWF-4ieMxoU4PcyTg,3727
102
106
  ingestr/src/salesforce/__init__.py,sha256=2hik5pRrxVODdDTlUEMoyccNC07zozjnxkMHcjMT1qA,4558
103
107
  ingestr/src/salesforce/helpers.py,sha256=QTdazBt-qRTBbCQMZnyclIaDQFmBixBy_RDKD00Lt-8,2492
104
108
  ingestr/src/shopify/__init__.py,sha256=PF_6VQnS065Br1UzSIekTVXBu3WtrMQL_v5CfbfaX5Y,63151
@@ -113,7 +117,7 @@ ingestr/src/solidgate/__init__.py,sha256=JdaXvAu5QGuf9-FY294vwCQCEmfrqIld9oqbzqC
113
117
  ingestr/src/solidgate/helpers.py,sha256=oePEc9nnvmN3IaKrfJCvyKL79xdGM0-gRTN3-8tY4Fc,4952
114
118
  ingestr/src/sql_database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
115
119
  ingestr/src/sql_database/callbacks.py,sha256=sEFFmXxAURY3yeBjnawigDtq9LBCvi8HFqG4kLd7tMU,2002
116
- ingestr/src/stripe_analytics/__init__.py,sha256=FBkZu5op5Z-FceEi4zG7qcAgZfUYJRPMVPPrPMjvmXw,4502
120
+ ingestr/src/stripe_analytics/__init__.py,sha256=j3Vmvo8G75fJJIF4rUnpGliGTpYQZt372wo-AjGImYs,4581
117
121
  ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu69vAJoTxgEo,1976
118
122
  ingestr/src/stripe_analytics/settings.py,sha256=ZahhZg3Sq2KnvnDcfSaXO494Csy3tElBDEHnvA1AVmA,2461
119
123
  ingestr/src/telemetry/event.py,sha256=W7bs4uVfPakQ5otmiqgqu1l5SqjYx1p87wudnWXckBc,949
@@ -135,8 +139,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
135
139
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
136
140
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
137
141
  ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
138
- ingestr-0.13.53.dist-info/METADATA,sha256=vVMGPiZ4snksSxZGUKgNl0a3bfSxUBeTrWpUG0lHFhw,14902
139
- ingestr-0.13.53.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
140
- ingestr-0.13.53.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
141
- ingestr-0.13.53.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
142
- ingestr-0.13.53.dist-info/RECORD,,
142
+ ingestr-0.13.55.dist-info/METADATA,sha256=WNMM4qLCTDJg4xUnYNefHffB6vidRl4xopoBaaux-FM,15131
143
+ ingestr-0.13.55.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
144
+ ingestr-0.13.55.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
145
+ ingestr-0.13.55.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
146
+ ingestr-0.13.55.dist-info/RECORD,,