ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,138 @@
1
+ """Elasticsearch destination helpers"""
2
+
3
+ import json
4
+ import logging
5
+ from typing import Any, Dict, Iterator
6
+ from urllib.parse import urlparse
7
+
8
+ import dlt
9
+
10
+ from elasticsearch import Elasticsearch
11
+ from elasticsearch.helpers import bulk
12
+
13
+ # Suppress Elasticsearch transport logging
14
+ logging.getLogger("elasticsearch.transport").setLevel(logging.WARNING)
15
+ logging.getLogger("elastic_transport.transport").setLevel(logging.WARNING)
16
+
17
+
18
+ def process_file_items(file_path: str) -> Iterator[Dict[str, Any]]:
19
+ """Process items from a file path (JSONL format)."""
20
+ with open(file_path, "r") as f:
21
+ for line in f:
22
+ if line.strip():
23
+ doc = json.loads(line.strip())
24
+ # Clean DLT metadata
25
+ cleaned_doc = {
26
+ k: v for k, v in doc.items() if not k.startswith("_dlt_")
27
+ }
28
+ yield cleaned_doc
29
+
30
+
31
+ def process_iterable_items(items: Any) -> Iterator[Dict[str, Any]]:
32
+ """Process items from an iterable."""
33
+ for item in items:
34
+ if isinstance(item, dict):
35
+ # Clean DLT metadata
36
+ cleaned_item = {k: v for k, v in item.items() if not k.startswith("_dlt_")}
37
+ yield cleaned_item
38
+
39
+
40
+ @dlt.destination(
41
+ name="elasticsearch",
42
+ loader_file_format="typed-jsonl",
43
+ batch_size=1000,
44
+ naming_convention="snake_case",
45
+ )
46
+ def elasticsearch_insert(
47
+ items, table, connection_string: str = dlt.secrets.value
48
+ ) -> None:
49
+ """Insert data into Elasticsearch index.
50
+
51
+ Args:
52
+ items: Data items (file path or iterable)
53
+ table: Table metadata containing name and schema info
54
+ connection_string: Elasticsearch connection string
55
+ """
56
+ # Parse connection string
57
+ parsed = urlparse(connection_string)
58
+
59
+ # Build Elasticsearch client configuration
60
+ actual_url = connection_string
61
+ secure = True # Default to HTTPS (secure by default)
62
+
63
+ if connection_string.startswith("elasticsearch://"):
64
+ actual_url = connection_string.replace("elasticsearch://", "")
65
+
66
+ # Parse to check for query parameters
67
+ temp_parsed = urlparse("http://" + actual_url)
68
+ from urllib.parse import parse_qs
69
+
70
+ query_params = parse_qs(temp_parsed.query)
71
+
72
+ # Check ?secure parameter (defaults to true)
73
+ if "secure" in query_params:
74
+ secure = query_params["secure"][0].lower() in ["true", "1", "yes"]
75
+
76
+ # Remove query params from URL for ES client
77
+ actual_url = actual_url.split("?")[0]
78
+
79
+ # Add scheme
80
+ scheme = "https" if secure else "http"
81
+ actual_url = f"{scheme}://{actual_url}"
82
+
83
+ parsed = urlparse(actual_url)
84
+
85
+ es_config: Dict[str, Any] = {
86
+ "hosts": [actual_url],
87
+ "verify_certs": secure,
88
+ "ssl_show_warn": False,
89
+ }
90
+
91
+ # Add authentication if present
92
+ if parsed.username and parsed.password:
93
+ es_config["http_auth"] = (parsed.username, parsed.password)
94
+
95
+ # Get index name from table metadata
96
+ index_name = table["name"]
97
+
98
+ # Connect to Elasticsearch
99
+ client = Elasticsearch(**es_config)
100
+
101
+ # Replace mode: delete existing index if it exists
102
+ if client.indices.exists(index=index_name):
103
+ client.indices.delete(index=index_name)
104
+
105
+ # Process and insert documents
106
+ if isinstance(items, str):
107
+ documents = process_file_items(items)
108
+ else:
109
+ documents = process_iterable_items(items)
110
+
111
+ # Prepare documents for bulk insert as generator
112
+ def doc_generator():
113
+ for doc in documents:
114
+ es_doc: Dict[str, Any] = {"_index": index_name, "_source": doc.copy()}
115
+
116
+ # Use _id if present, otherwise let ES generate one
117
+ if "_id" in doc:
118
+ es_doc["_id"] = str(doc["_id"])
119
+ # Remove _id from source since it's metadata
120
+ if "_id" in es_doc["_source"]:
121
+ del es_doc["_source"]["_id"]
122
+ elif "id" in doc:
123
+ es_doc["_id"] = str(doc["id"])
124
+
125
+ yield es_doc
126
+
127
+ # Bulk insert
128
+ try:
129
+ _, failed_items = bulk(client, doc_generator(), request_timeout=60)
130
+ if failed_items:
131
+ failed_count = (
132
+ len(failed_items) if isinstance(failed_items, list) else failed_items
133
+ )
134
+ raise Exception(
135
+ f"Failed to insert {failed_count} documents: {failed_items}"
136
+ )
137
+ except Exception as e:
138
+ raise Exception(f"Elasticsearch bulk insert failed: {str(e)}")
ingestr/src/errors.py CHANGED
@@ -1,3 +1,6 @@
1
+ import requests
2
+
3
+
1
4
  class MissingValueError(Exception):
2
5
  def __init__(self, value, source):
3
6
  super().__init__(f"{value} is required to connect to {source}")
@@ -16,3 +19,8 @@ class InvalidBlobTableError(Exception):
16
19
  f"Invalid source table for {source} "
17
20
  "Ensure that the table is in the format {bucket-name}/{file glob}"
18
21
  )
22
+
23
+
24
+ class HTTPError(Exception):
25
+ def __init__(self, source: requests.HTTPError):
26
+ super().__init__(f"HTTP {source.response.status_code}: {source.response.text}")
@@ -4,6 +4,7 @@ from typing import Iterator, Sequence
4
4
 
5
5
  import dlt
6
6
  from dlt.common import pendulum
7
+ from dlt.common.time import ensure_pendulum_datetime
7
8
  from dlt.common.typing import TDataItems
8
9
  from dlt.sources import DltResource
9
10
  from facebook_business.adobjects.ad import Ad
@@ -12,7 +13,6 @@ from .helpers import (
12
13
  execute_job,
13
14
  get_ads_account,
14
15
  get_data_chunked,
15
- get_start_date,
16
16
  process_report_item,
17
17
  )
18
18
  from .settings import (
@@ -22,13 +22,8 @@ from .settings import (
22
22
  DEFAULT_ADCREATIVE_FIELDS,
23
23
  DEFAULT_ADSET_FIELDS,
24
24
  DEFAULT_CAMPAIGN_FIELDS,
25
- DEFAULT_INSIGHT_FIELDS,
26
25
  DEFAULT_LEAD_FIELDS,
27
26
  INSIGHT_FIELDS_TYPES,
28
- INSIGHTS_BREAKDOWNS_OPTIONS,
29
- INSIGHTS_PRIMARY_KEY,
30
- INVALID_INSIGHTS_FIELDS,
31
- TInsightsBreakdownOptions,
32
27
  TInsightsLevels,
33
28
  )
34
29
 
@@ -106,16 +101,20 @@ def facebook_insights_source(
106
101
  account_id: str = dlt.config.value,
107
102
  access_token: str = dlt.secrets.value,
108
103
  initial_load_past_days: int = 1,
109
- fields: Sequence[str] = DEFAULT_INSIGHT_FIELDS,
110
- attribution_window_days_lag: int = 7,
104
+ dimensions: Sequence[str] = None,
105
+ fields: Sequence[str] = None,
111
106
  time_increment_days: int = 1,
112
- breakdowns: TInsightsBreakdownOptions = "ads_insights",
113
107
  action_breakdowns: Sequence[str] = ALL_ACTION_BREAKDOWNS,
114
108
  level: TInsightsLevels = "ad",
115
109
  action_attribution_windows: Sequence[str] = ALL_ACTION_ATTRIBUTION_WINDOWS,
116
110
  batch_size: int = 50,
117
111
  request_timeout: int = 300,
118
112
  app_api_version: str = None,
113
+ start_date: pendulum.DateTime | None = None,
114
+ end_date: pendulum.DateTime | None = None,
115
+ insights_max_wait_to_finish_seconds: int = 60 * 60 * 4,
116
+ insights_max_wait_to_start_seconds: int = 60 * 30,
117
+ insights_max_async_sleep_seconds: int = 20,
119
118
  ) -> DltResource:
120
119
  """Incrementally loads insight reports with defined granularity level, fields, breakdowns etc.
121
120
 
@@ -148,40 +147,54 @@ def facebook_insights_source(
148
147
  account_id, access_token, request_timeout, app_api_version
149
148
  )
150
149
 
151
- # we load with a defined lag
152
- initial_load_start_date = pendulum.today().subtract(days=initial_load_past_days)
153
- initial_load_start_date_str = initial_load_start_date.isoformat()
150
+ if start_date is None:
151
+ start_date = pendulum.today().subtract(days=initial_load_past_days)
152
+
153
+ if dimensions is None:
154
+ dimensions = []
155
+ if fields is None:
156
+ fields = []
157
+
158
+ columns = {}
159
+ for field in fields:
160
+ if field in INSIGHT_FIELDS_TYPES:
161
+ columns[field] = INSIGHT_FIELDS_TYPES[field]
154
162
 
155
163
  @dlt.resource(
156
- primary_key=INSIGHTS_PRIMARY_KEY,
157
164
  write_disposition="merge",
158
- columns=INSIGHT_FIELDS_TYPES,
165
+ merge_key="date_start",
166
+ columns=columns,
159
167
  )
160
168
  def facebook_insights(
161
169
  date_start: dlt.sources.incremental[str] = dlt.sources.incremental(
162
170
  "date_start",
163
- initial_value=initial_load_start_date_str,
171
+ initial_value=ensure_pendulum_datetime(start_date).start_of("day").date(),
172
+ end_value=ensure_pendulum_datetime(end_date).end_of("day").date()
173
+ if end_date
174
+ else None,
164
175
  range_end="closed",
165
176
  range_start="closed",
166
177
  ),
167
178
  ) -> Iterator[TDataItems]:
168
- start_date = get_start_date(date_start, attribution_window_days_lag)
169
- end_date = pendulum.now()
179
+ start_date = date_start.last_value
180
+ if date_start.end_value:
181
+ end_date_val = pendulum.instance(date_start.end_value)
182
+
183
+ end_date = (
184
+ end_date_val
185
+ if isinstance(end_date_val, pendulum.Date)
186
+ else end_date_val.date()
187
+ )
188
+ else:
189
+ end_date = pendulum.now().date()
170
190
 
171
- # fetch insights in incremental day steps
172
191
  while start_date <= end_date:
173
192
  query = {
174
193
  "level": level,
175
194
  "action_breakdowns": list(action_breakdowns),
176
- "breakdowns": list(
177
- INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["breakdowns"]
178
- ),
195
+ "breakdowns": dimensions,
179
196
  "limit": batch_size,
180
- "fields": list(
181
- set(fields)
182
- .union(INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["fields"])
183
- .difference(INVALID_INSIGHTS_FIELDS)
184
- ),
197
+ "fields": fields,
185
198
  "time_increment": time_increment_days,
186
199
  "action_attribution_windows": list(action_attribution_windows),
187
200
  "time_ranges": [
@@ -193,8 +206,14 @@ def facebook_insights_source(
193
206
  }
194
207
  ],
195
208
  }
196
- job = execute_job(account.get_insights(params=query, is_async=True))
197
- yield list(map(process_report_item, job.get_result()))
209
+ job = execute_job(
210
+ account.get_insights(params=query, is_async=True),
211
+ insights_max_async_sleep_seconds=insights_max_async_sleep_seconds,
212
+ insights_max_wait_to_finish_seconds=insights_max_wait_to_finish_seconds,
213
+ insights_max_wait_to_start_seconds=insights_max_wait_to_start_seconds,
214
+ )
215
+ output = list(map(process_report_item, job.get_result()))
216
+ yield output
198
217
  start_date = start_date.add(days=time_increment_days)
199
218
 
200
219
  return facebook_insights
@@ -3,14 +3,13 @@
3
3
  import functools
4
4
  import itertools
5
5
  import time
6
+ from datetime import datetime
6
7
  from typing import Any, Iterator, Sequence
7
8
 
8
- import dlt
9
9
  import humanize
10
10
  import pendulum
11
11
  from dlt.common import logger
12
12
  from dlt.common.configuration.inject import with_config
13
- from dlt.common.time import ensure_pendulum_datetime
14
13
  from dlt.common.typing import DictStrAny, TDataItem, TDataItems
15
14
  from dlt.sources.helpers import requests
16
15
  from dlt.sources.helpers.requests import Client
@@ -23,49 +22,21 @@ from facebook_business.api import FacebookResponse
23
22
 
24
23
  from .exceptions import InsightsJobTimeout
25
24
  from .settings import (
26
- FACEBOOK_INSIGHTS_RETENTION_PERIOD,
27
25
  INSIGHTS_PRIMARY_KEY,
28
26
  TFbMethod,
29
27
  )
30
28
 
31
29
 
32
- def get_start_date(
33
- incremental_start_date: dlt.sources.incremental[str],
34
- attribution_window_days_lag: int = 7,
35
- ) -> pendulum.DateTime:
36
- """
37
- Get the start date for incremental loading of Facebook Insights data.
38
- """
39
- start_date: pendulum.DateTime = ensure_pendulum_datetime(
40
- incremental_start_date.start_value
41
- ).subtract(days=attribution_window_days_lag)
42
-
43
- # facebook forgets insights so trim the lag and warn
44
- min_start_date = pendulum.today().subtract(
45
- months=FACEBOOK_INSIGHTS_RETENTION_PERIOD
46
- )
47
- if start_date < min_start_date:
48
- logger.warning(
49
- "%s: Start date is earlier than %s months ago, using %s instead. "
50
- "For more information, see https://www.facebook.com/business/help/1695754927158071?id=354406972049255",
51
- "facebook_insights",
52
- FACEBOOK_INSIGHTS_RETENTION_PERIOD,
53
- min_start_date,
54
- )
55
- start_date = min_start_date
56
- incremental_start_date.start_value = min_start_date
57
-
58
- # lag the incremental start date by attribution window lag
59
- incremental_start_date.start_value = start_date.isoformat()
60
- return start_date
61
-
62
-
63
30
  def process_report_item(item: AbstractObject) -> DictStrAny:
31
+ if "date_start" in item:
32
+ item["date_start"] = datetime.strptime(item["date_start"], "%Y-%m-%d").date()
33
+ if "date_stop" in item:
34
+ item["date_stop"] = datetime.strptime(item["date_stop"], "%Y-%m-%d").date()
35
+
64
36
  d: DictStrAny = item.export_all_data()
65
37
  for pki in INSIGHTS_PRIMARY_KEY:
66
38
  if pki not in d:
67
39
  d[pki] = "no_" + pki
68
-
69
40
  return d
70
41
 
71
42
 
@@ -138,17 +109,22 @@ def execute_job(
138
109
  ) -> AbstractCrudObject:
139
110
  status: str = None
140
111
  time_start = time.time()
141
- sleep_time = 10
112
+ sleep_time = 3
142
113
  while status != "Job Completed":
114
+ print("-----")
115
+ print("waiting for job to finish")
143
116
  duration = time.time() - time_start
144
117
  job = job.api_get()
145
118
  status = job["async_status"]
146
119
  percent_complete = job["async_percent_completion"]
120
+ print("async_status", status)
121
+ print("percent_complete", percent_complete)
147
122
 
148
123
  job_id = job["id"]
149
124
  logger.info("%s, %d%% done", status, percent_complete)
150
125
 
151
126
  if status == "Job Completed":
127
+ print("job completed")
152
128
  return job
153
129
 
154
130
  if duration > insights_max_wait_to_start_seconds and percent_complete == 0:
@@ -168,7 +144,7 @@ def execute_job(
168
144
  raise InsightsJobTimeout(
169
145
  "facebook_insights",
170
146
  pretty_error_message.format(
171
- job_id, insights_max_wait_to_finish_seconds // 60
147
+ job_id, insights_max_wait_to_finish_seconds
172
148
  ),
173
149
  )
174
150
 
@@ -253,3 +229,49 @@ def notify_on_token_expiration(access_token_expires_at: int = None) -> None:
253
229
  logger.error(
254
230
  f"Access Token expires in {humanize.precisedelta(pendulum.now() - expires_at)}. Replace the token now!"
255
231
  )
232
+
233
+
234
+ def parse_insights_table_to_source_kwargs(table: str) -> DictStrAny:
235
+ import typing
236
+
237
+ from ingestr.src.facebook_ads.settings import (
238
+ INSIGHTS_BREAKDOWNS_OPTIONS,
239
+ TInsightsBreakdownOptions,
240
+ TInsightsLevels,
241
+ )
242
+
243
+ parts = table.split(":")
244
+
245
+ source_kwargs = {}
246
+
247
+ breakdown_type = parts[1]
248
+
249
+ valid_breakdowns = list(typing.get_args(TInsightsBreakdownOptions))
250
+ if breakdown_type in valid_breakdowns:
251
+ dimensions = INSIGHTS_BREAKDOWNS_OPTIONS[breakdown_type]["breakdowns"]
252
+ fields = INSIGHTS_BREAKDOWNS_OPTIONS[breakdown_type]["fields"]
253
+ source_kwargs["dimensions"] = dimensions
254
+ source_kwargs["fields"] = fields
255
+ else:
256
+ dimensions = breakdown_type.split(",")
257
+ valid_levels = list(typing.get_args(TInsightsLevels))
258
+ level = None
259
+ for valid_level in reversed(valid_levels):
260
+ if valid_level in dimensions:
261
+ level = valid_level
262
+ dimensions.remove(valid_level)
263
+ break
264
+
265
+ source_kwargs["level"] = level
266
+ source_kwargs["dimensions"] = dimensions
267
+
268
+ # If custom metrics are provided, parse them
269
+ if len(parts) == 3:
270
+ fields = [f.strip() for f in parts[2].split(",") if f.strip()]
271
+ if not fields:
272
+ raise ValueError(
273
+ "Custom metrics must be provided after the second colon in format: facebook_insights:breakdown_type:metric1,metric2..."
274
+ )
275
+ source_kwargs["fields"] = fields
276
+
277
+ return source_kwargs
@@ -112,6 +112,8 @@ DEFAULT_INSIGHT_FIELDS = (
112
112
  "social_spend",
113
113
  "spend",
114
114
  "website_ctr",
115
+ "conversions",
116
+ "video_thruplay_watched_actions",
115
117
  )
116
118
 
117
119
  TInsightsLevels = Literal["account", "campaign", "adset", "ad"]
@@ -0,0 +1,39 @@
1
+ from typing import Dict
2
+
3
+ import dlt
4
+ from dlt.common.configuration.inject import with_config
5
+ from dlt.sources.helpers import requests
6
+
7
+
8
+ @with_config(sections=("sources", "facebook_ads"))
9
+ def debug_access_token(
10
+ access_token: str = dlt.secrets.value,
11
+ client_id: str = dlt.secrets.value,
12
+ client_secret: str = dlt.secrets.value,
13
+ ) -> str:
14
+ """Debugs the `access_token` providing info on expiration time, scopes etc. If arguments are not provides, `dlt` will inject them from configuration"""
15
+ debug_url = f"https://graph.facebook.com/debug_token?input_token={access_token}&access_token={client_id}|{client_secret}"
16
+ response = requests.get(debug_url)
17
+ data: Dict[str, str] = response.json()
18
+
19
+ if "error" in data:
20
+ raise Exception(f"Error debugging token: {data['error']}")
21
+
22
+ return data["data"]
23
+
24
+
25
+ @with_config(sections=("sources", "facebook_ads"))
26
+ def get_long_lived_token(
27
+ access_token: str = dlt.secrets.value,
28
+ client_id: str = dlt.secrets.value,
29
+ client_secret: str = dlt.secrets.value,
30
+ ) -> str:
31
+ """Gets the long lived access token (60 days) from `access_token`. If arguments are not provides, `dlt` will inject them from configuration"""
32
+ exchange_url = f"https://graph.facebook.com/v13.0/oauth/access_token?grant_type=fb_exchange_token&client_id={client_id}&client_secret={client_secret}&fb_exchange_token={access_token}"
33
+ response = requests.get(exchange_url)
34
+ data: Dict[str, str] = response.json()
35
+
36
+ if "error" in data:
37
+ raise Exception(f"Error refreshing token: {data['error']}")
38
+
39
+ return data["access_token"]