ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,111 @@
1
+ from typing import Any, Dict, Iterator, Optional
2
+
3
+ import dlt
4
+ import pendulum
5
+ import requests
6
+
7
+ LINEAR_GRAPHQL_ENDPOINT = "https://api.linear.app/graphql"
8
+
9
+
10
+ def _graphql(
11
+ api_key: str, query: str, variables: Optional[Dict[str, Any]] = None
12
+ ) -> Dict[str, Any]:
13
+ headers = {"Authorization": api_key, "Content-Type": "application/json"}
14
+ response = requests.post(
15
+ LINEAR_GRAPHQL_ENDPOINT,
16
+ json={"query": query, "variables": variables or {}},
17
+ headers=headers,
18
+ )
19
+ response.raise_for_status()
20
+ payload = response.json()
21
+ if "errors" in payload:
22
+ raise ValueError(str(payload["errors"]))
23
+ return payload["data"]
24
+
25
+
26
+ def _paginate(api_key: str, query: str, root: str) -> Iterator[Dict[str, Any]]:
27
+ cursor: Optional[str] = None
28
+ while True:
29
+ data = _graphql(api_key, query, {"cursor": cursor})[root]
30
+ for item in data["nodes"]:
31
+ yield item
32
+ if not data["pageInfo"]["hasNextPage"]:
33
+ break
34
+ cursor = data["pageInfo"]["endCursor"]
35
+
36
+
37
+ def _get_date_range(updated_at, start_date):
38
+ """Extract current start and end dates from incremental state."""
39
+ if updated_at.last_value:
40
+ current_start_date = pendulum.parse(updated_at.last_value)
41
+ else:
42
+ current_start_date = pendulum.parse(start_date)
43
+
44
+ if updated_at.end_value:
45
+ current_end_date = pendulum.parse(updated_at.end_value)
46
+ else:
47
+ current_end_date = pendulum.now(tz="UTC")
48
+
49
+ return current_start_date, current_end_date
50
+
51
+
52
+ def _paginated_resource(
53
+ api_key: str, query: str, query_field: str, updated_at, start_date
54
+ ) -> Iterator[Dict[str, Any]]:
55
+ """Helper function for paginated resources with date filtering."""
56
+ current_start_date, current_end_date = _get_date_range(updated_at, start_date)
57
+
58
+ for item in _paginate(api_key, query, query_field):
59
+ if pendulum.parse(item["updatedAt"]) >= current_start_date:
60
+ if pendulum.parse(item["updatedAt"]) <= current_end_date:
61
+ yield normalize_dictionaries(item)
62
+
63
+
64
+ def _create_paginated_resource(
65
+ resource_name: str,
66
+ query: str,
67
+ query_field: str,
68
+ api_key: str,
69
+ start_date,
70
+ end_date=None,
71
+ ):
72
+ """Factory function to create paginated resources dynamically."""
73
+
74
+ @dlt.resource(name=resource_name, primary_key="id", write_disposition="merge")
75
+ def paginated_resource(
76
+ updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
77
+ "updatedAt",
78
+ initial_value=start_date.isoformat(),
79
+ end_value=end_date.isoformat() if end_date else None,
80
+ range_start="closed",
81
+ range_end="closed",
82
+ ),
83
+ ) -> Iterator[Dict[str, Any]]:
84
+ for item in _paginated_resource(
85
+ api_key, query, query_field, updated_at, start_date
86
+ ):
87
+ yield normalize_dictionaries(item)
88
+
89
+ return paginated_resource
90
+
91
+
92
+ def normalize_dictionaries(item: Dict[str, Any]) -> Dict[str, Any]:
93
+ """
94
+ Automatically normalize dictionary fields by detecting their structure:
95
+ - Convert nested objects with 'id' field to {field_name}_id
96
+ - Convert objects with 'nodes' field to arrays
97
+
98
+ """
99
+ normalized_item = item.copy()
100
+
101
+ for key, value in list(normalized_item.items()):
102
+ if isinstance(value, dict):
103
+ # If the dict has an 'id' field, replace with {key}_id
104
+ if "id" in value:
105
+ normalized_item[f"{key}_id"] = value["id"]
106
+ del normalized_item[key]
107
+ # If the dict has 'nodes' field, extract the nodes array
108
+ elif "nodes" in value:
109
+ normalized_item[key] = value["nodes"]
110
+
111
+ return normalized_item
@@ -18,7 +18,6 @@ def retry_on_limit(
18
18
 
19
19
  def create_client() -> requests.Session:
20
20
  return Client(
21
- request_timeout=10.0,
22
21
  raise_for_status=False,
23
22
  retry_condition=retry_on_limit,
24
23
  request_max_attempts=12,
ingestr/src/loader.py ADDED
@@ -0,0 +1,69 @@
1
+ import csv
2
+ import gzip
3
+ import json
4
+ import subprocess
5
+ from contextlib import contextmanager
6
+ from typing import Generator
7
+
8
+ from pyarrow.parquet import ParquetFile # type: ignore
9
+
10
+ PARQUET_BATCH_SIZE = 64
11
+
12
+
13
+ class UnsupportedLoaderFileFormat(Exception):
14
+ pass
15
+
16
+
17
+ def load_dlt_file(filepath: str) -> Generator:
18
+ """
19
+ load_dlt_file reads dlt loader files. It handles different loader file formats
20
+ automatically. It returns a generator that yield data items as a python dict
21
+ """
22
+ result = subprocess.run(
23
+ ["file", "-b", filepath],
24
+ check=True,
25
+ capture_output=True,
26
+ text=True,
27
+ )
28
+
29
+ filetype = result.stdout.strip()
30
+ with factory(filetype, filepath) as reader:
31
+ yield from reader
32
+
33
+
34
+ def factory(filetype: str, filepath: str):
35
+ # ???(turtledev): can dlt produce non-gizpped jsonl files?
36
+ if filetype.startswith("gzip"):
37
+ return jsonlfile(filepath)
38
+ elif filetype.startswith("CSV"):
39
+ return csvfile(filepath)
40
+ elif filetype.startswith("Apache Parquet"):
41
+ return parquetfile(filepath)
42
+ else:
43
+ raise UnsupportedLoaderFileFormat(filetype)
44
+
45
+
46
+ @contextmanager
47
+ def jsonlfile(filepath: str):
48
+ def reader(fd):
49
+ for line in fd:
50
+ yield json.loads(line.decode().strip())
51
+
52
+ with gzip.open(filepath) as fd:
53
+ yield reader(fd)
54
+
55
+
56
+ @contextmanager
57
+ def csvfile(filepath: str):
58
+ with open(filepath, "r") as fd:
59
+ yield csv.DictReader(fd)
60
+
61
+
62
+ @contextmanager
63
+ def parquetfile(filepath: str):
64
+ def reader(pf: ParquetFile):
65
+ for batch in pf.iter_batches(PARQUET_BATCH_SIZE):
66
+ yield from batch.to_pylist()
67
+
68
+ with open(filepath, "rb") as fd:
69
+ yield reader(ParquetFile(fd))
@@ -0,0 +1,126 @@
1
+ """
2
+ Mailchimp source for data extraction via REST API.
3
+
4
+ This source provides access to Mailchimp account data.
5
+ """
6
+
7
+ from typing import Any, Iterable, Iterator
8
+
9
+ import dlt
10
+ from dlt.sources import DltResource
11
+
12
+ from ingestr.src.http_client import create_client
13
+ from ingestr.src.mailchimp.helpers import (
14
+ create_merge_resource,
15
+ create_nested_resource,
16
+ create_replace_resource,
17
+ )
18
+ from ingestr.src.mailchimp.settings import (
19
+ MERGE_ENDPOINTS,
20
+ NESTED_ENDPOINTS,
21
+ REPLACE_ENDPOINTS,
22
+ )
23
+
24
+
25
+ @dlt.source(max_table_nesting=0, name="mailchimp_source")
26
+ def mailchimp_source(
27
+ api_key: str,
28
+ server: str,
29
+ ) -> Iterable[DltResource]:
30
+ """
31
+ Mailchimp data source.
32
+
33
+ Args:
34
+ api_key: Mailchimp API key for authentication
35
+ server: Server prefix (e.g., 'us10')
36
+
37
+ Yields:
38
+ DltResource: Data resources for Mailchimp data
39
+ """
40
+ base_url = f"https://{server}.api.mailchimp.com/3.0"
41
+ session = create_client()
42
+ auth = ("anystring", api_key)
43
+
44
+ @dlt.resource(
45
+ name="account",
46
+ write_disposition="replace",
47
+ )
48
+ def fetch_account() -> Iterator[dict[str, Any]]:
49
+ """
50
+ Fetch account information from Mailchimp.
51
+
52
+ Table format: account (no parameters needed)
53
+ """
54
+ response = session.get(f"{base_url}/", auth=auth)
55
+ response.raise_for_status()
56
+ data = response.json()
57
+ yield data
58
+
59
+ # Create resources dynamically
60
+ resources = [fetch_account]
61
+
62
+ # Create merge resources (with incremental loading)
63
+ for (
64
+ resource_name,
65
+ endpoint_path,
66
+ data_key,
67
+ primary_key,
68
+ incremental_key,
69
+ ) in MERGE_ENDPOINTS:
70
+ resources.append(
71
+ create_merge_resource(
72
+ base_url,
73
+ session,
74
+ auth,
75
+ resource_name,
76
+ endpoint_path,
77
+ data_key,
78
+ primary_key,
79
+ incremental_key,
80
+ )
81
+ )
82
+
83
+ # Create replace resources (without incremental loading)
84
+ for replace_endpoint in REPLACE_ENDPOINTS:
85
+ resource_name, endpoint_path, data_key, pk = replace_endpoint
86
+ resources.append(
87
+ create_replace_resource(
88
+ base_url,
89
+ session,
90
+ auth,
91
+ resource_name,
92
+ endpoint_path,
93
+ data_key,
94
+ pk,
95
+ )
96
+ )
97
+
98
+ # Create nested resources (depend on parent resources)
99
+ for nested_endpoint in NESTED_ENDPOINTS:
100
+ (
101
+ parent_name,
102
+ parent_path,
103
+ parent_key,
104
+ parent_id_field,
105
+ nested_name,
106
+ nested_path,
107
+ nested_key,
108
+ pk,
109
+ ) = nested_endpoint
110
+ resources.append(
111
+ create_nested_resource(
112
+ base_url,
113
+ session,
114
+ auth,
115
+ parent_name,
116
+ parent_path,
117
+ parent_key,
118
+ parent_id_field,
119
+ nested_name,
120
+ nested_path,
121
+ nested_key,
122
+ pk,
123
+ )
124
+ )
125
+
126
+ return tuple(resources)
@@ -0,0 +1,226 @@
1
+ """
2
+ Helper functions for Mailchimp source.
3
+ """
4
+
5
+ from typing import Any, Iterator
6
+
7
+ import dlt
8
+
9
+
10
+ def fetch_paginated(
11
+ session,
12
+ url: str,
13
+ auth: tuple,
14
+ data_key: str | None = None,
15
+ ) -> Iterator[dict[str, Any]]:
16
+ """
17
+ Helper function to fetch paginated data from Mailchimp API.
18
+
19
+ Args:
20
+ session: HTTP session
21
+ url: API endpoint URL
22
+ auth: Authentication tuple
23
+ data_key: Key in response containing the data array (if None, return whole response)
24
+
25
+ Yields:
26
+ Individual items from the paginated response
27
+ """
28
+ offset = 0
29
+ count = 1000 # Maximum allowed by Mailchimp
30
+
31
+ while True:
32
+ params = {"count": count, "offset": offset}
33
+ response = session.get(url, auth=auth, params=params)
34
+ response.raise_for_status()
35
+ data = response.json()
36
+
37
+ # Extract items from response
38
+ if data_key and data_key in data:
39
+ items = data[data_key]
40
+ elif isinstance(data, list):
41
+ items = data
42
+ else:
43
+ # If no data_key specified and response is dict, yield the whole response
44
+ yield data
45
+ break
46
+
47
+ if not items:
48
+ break
49
+
50
+ yield from items
51
+
52
+ # Check if we've received fewer items than requested (last page)
53
+ if len(items) < count:
54
+ break
55
+
56
+ offset += count
57
+
58
+
59
+ def create_merge_resource(
60
+ base_url: str,
61
+ session,
62
+ auth: tuple,
63
+ name: str,
64
+ path: str,
65
+ key: str,
66
+ pk: str,
67
+ ik: str,
68
+ ):
69
+ """
70
+ Create a DLT resource with merge disposition for incremental loading.
71
+
72
+ Args:
73
+ base_url: Base API URL
74
+ session: HTTP session
75
+ auth: Authentication tuple
76
+ name: Resource name
77
+ path: API endpoint path
78
+ key: Data key in response
79
+ pk: Primary key field
80
+ ik: Incremental key field
81
+
82
+ Returns:
83
+ DLT resource function
84
+ """
85
+
86
+ @dlt.resource(
87
+ name=name,
88
+ write_disposition="merge",
89
+ primary_key=pk,
90
+ )
91
+ def fetch_data(
92
+ updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
93
+ ik, initial_value=None
94
+ ),
95
+ ) -> Iterator[dict[str, Any]]:
96
+ url = f"{base_url}/{path}"
97
+ yield from fetch_paginated(session, url, auth, data_key=key)
98
+
99
+ return fetch_data
100
+
101
+
102
+ def create_replace_resource(
103
+ base_url: str,
104
+ session,
105
+ auth: tuple,
106
+ name: str,
107
+ path: str,
108
+ key: str,
109
+ pk: str | None,
110
+ ):
111
+ """
112
+ Create a DLT resource with replace disposition.
113
+
114
+ Args:
115
+ base_url: Base API URL
116
+ session: HTTP session
117
+ auth: Authentication tuple
118
+ name: Resource name
119
+ path: API endpoint path
120
+ key: Data key in response
121
+ pk: Primary key field (optional)
122
+
123
+ Returns:
124
+ DLT resource function
125
+ """
126
+
127
+ def fetch_data() -> Iterator[dict[str, Any]]:
128
+ url = f"{base_url}/{path}"
129
+ yield from fetch_paginated(session, url, auth, data_key=key)
130
+
131
+ # Apply the resource decorator with conditional primary_key
132
+ if pk is not None:
133
+ return dlt.resource(
134
+ fetch_data,
135
+ name=name,
136
+ write_disposition="replace",
137
+ primary_key=pk,
138
+ )
139
+ else:
140
+ return dlt.resource(
141
+ fetch_data,
142
+ name=name,
143
+ write_disposition="replace",
144
+ )
145
+
146
+
147
+ def create_nested_resource(
148
+ base_url: str,
149
+ session,
150
+ auth: tuple,
151
+ parent_resource_name: str,
152
+ parent_path: str,
153
+ parent_key: str,
154
+ parent_id_field: str,
155
+ nested_name: str,
156
+ nested_path: str,
157
+ nested_key: str | None,
158
+ pk: str | None,
159
+ ):
160
+ """
161
+ Create a nested DLT resource that depends on a parent resource.
162
+
163
+ Args:
164
+ base_url: Base API URL
165
+ session: HTTP session
166
+ auth: Authentication tuple
167
+ parent_resource_name: Name of the parent resource
168
+ parent_path: Parent API endpoint path
169
+ parent_key: Data key in parent response
170
+ parent_id_field: Field name for parent ID
171
+ nested_name: Nested resource name
172
+ nested_path: Nested API endpoint path (with {id} placeholder)
173
+ nested_key: Data key in nested response (None to return whole response)
174
+ pk: Primary key field (optional)
175
+
176
+ Returns:
177
+ DLT resource function
178
+ """
179
+
180
+ def fetch_nested_data() -> Iterator[dict[str, Any]]:
181
+ # First, fetch parent items
182
+ parent_url = f"{base_url}/{parent_path}"
183
+ parent_items = fetch_paginated(session, parent_url, auth, data_key=parent_key)
184
+
185
+ # For each parent item, fetch nested data
186
+ for parent_item in parent_items:
187
+ parent_id = parent_item.get(parent_id_field)
188
+ if parent_id:
189
+ # Build nested URL with parent ID
190
+ nested_url = f"{base_url}/{nested_path.format(id=parent_id)}"
191
+
192
+ # Fetch nested data
193
+ response = session.get(nested_url, auth=auth)
194
+ response.raise_for_status()
195
+ data = response.json()
196
+
197
+ # Extract nested items or return whole response
198
+ if nested_key and nested_key in data:
199
+ items = data[nested_key]
200
+ if isinstance(items, list):
201
+ for item in items:
202
+ # Add parent reference
203
+ item[f"{parent_resource_name}_id"] = parent_id
204
+ yield item
205
+ else:
206
+ items[f"{parent_resource_name}_id"] = parent_id
207
+ yield items
208
+ else:
209
+ # Return whole response with parent reference
210
+ data[f"{parent_resource_name}_id"] = parent_id
211
+ yield data
212
+
213
+ # Apply the resource decorator with conditional primary_key
214
+ if pk is not None:
215
+ return dlt.resource(
216
+ fetch_nested_data,
217
+ name=nested_name,
218
+ write_disposition="replace",
219
+ primary_key=pk,
220
+ )
221
+ else:
222
+ return dlt.resource(
223
+ fetch_nested_data,
224
+ name=nested_name,
225
+ write_disposition="replace",
226
+ )
@@ -0,0 +1,164 @@
1
+ """
2
+ Mailchimp API endpoint configurations.
3
+ """
4
+
5
+ # Endpoints with merge disposition (have both primary_key and incremental_key)
6
+ # Format: (resource_name, endpoint_path, data_key, primary_key, incremental_key)
7
+ MERGE_ENDPOINTS = [
8
+ ("audiences", "lists", "lists", "id", "date_created"),
9
+ ("automations", "automations", "automations", "id", "create_time"),
10
+ ("campaigns", "campaigns", "campaigns", "id", "create_time"),
11
+ ("connected_sites", "connected-sites", "sites", "id", "updated_at"),
12
+ ("conversations", "conversations", "conversations", "id", "last_message.timestamp"),
13
+ ("ecommerce_stores", "ecommerce/stores", "stores", "id", "updated_at"),
14
+ ("facebook_ads", "facebook-ads", "facebook_ads", "id", "updated_at"),
15
+ ("landing_pages", "landing-pages", "landing_pages", "id", "updated_at"),
16
+ ("reports", "reports", "reports", "id", "send_time"),
17
+ ]
18
+
19
+ # Endpoints with replace disposition
20
+ # Format: (resource_name, endpoint_path, data_key, primary_key)
21
+ REPLACE_ENDPOINTS: list[tuple[str, str, str, str | None]] = [
22
+ ("account_exports", "account-exports", "exports", None),
23
+ ("authorized_apps", "authorized-apps", "apps", "id"),
24
+ ("batches", "batches", "batches", None),
25
+ ("campaign_folders", "campaign-folders", "folders", "id"),
26
+ ("chimp_chatter", "activity-feed/chimp-chatter", "chimp_chatter", None),
27
+ ]
28
+
29
+ # Nested endpoints (depend on parent resources)
30
+ # Format: (parent_name, parent_path, parent_key, parent_id_field, nested_name, nested_path, nested_key, pk)
31
+ NESTED_ENDPOINTS: list[tuple[str, str, str, str, str, str, str | None, str | None]] = [
32
+ # Reports nested endpoints
33
+ (
34
+ "reports",
35
+ "reports",
36
+ "reports",
37
+ "id",
38
+ "reports_advice",
39
+ "reports/{id}/advice",
40
+ None,
41
+ None,
42
+ ),
43
+ (
44
+ "reports",
45
+ "reports",
46
+ "reports",
47
+ "id",
48
+ "reports_domain_performance",
49
+ "reports/{id}/domain-performance",
50
+ "domains",
51
+ None,
52
+ ),
53
+ (
54
+ "reports",
55
+ "reports",
56
+ "reports",
57
+ "id",
58
+ "reports_locations",
59
+ "reports/{id}/locations",
60
+ "locations",
61
+ None,
62
+ ),
63
+ (
64
+ "reports",
65
+ "reports",
66
+ "reports",
67
+ "id",
68
+ "reports_sent_to",
69
+ "reports/{id}/sent-to",
70
+ "sent_to",
71
+ None,
72
+ ),
73
+ (
74
+ "reports",
75
+ "reports",
76
+ "reports",
77
+ "id",
78
+ "reports_sub_reports",
79
+ "reports/{id}/sub-reports",
80
+ None,
81
+ None,
82
+ ),
83
+ (
84
+ "reports",
85
+ "reports",
86
+ "reports",
87
+ "id",
88
+ "reports_unsubscribed",
89
+ "reports/{id}/unsubscribed",
90
+ "unsubscribes",
91
+ None,
92
+ ),
93
+ # Lists/Audiences nested endpoints
94
+ (
95
+ "audiences",
96
+ "lists",
97
+ "lists",
98
+ "id",
99
+ "lists_activity",
100
+ "lists/{id}/activity",
101
+ "activity",
102
+ None,
103
+ ),
104
+ (
105
+ "audiences",
106
+ "lists",
107
+ "lists",
108
+ "id",
109
+ "lists_clients",
110
+ "lists/{id}/clients",
111
+ "clients",
112
+ None,
113
+ ),
114
+ (
115
+ "audiences",
116
+ "lists",
117
+ "lists",
118
+ "id",
119
+ "lists_growth_history",
120
+ "lists/{id}/growth-history",
121
+ "history",
122
+ None,
123
+ ),
124
+ (
125
+ "audiences",
126
+ "lists",
127
+ "lists",
128
+ "id",
129
+ "lists_interest_categories",
130
+ "lists/{id}/interest-categories",
131
+ "categories",
132
+ None,
133
+ ),
134
+ (
135
+ "audiences",
136
+ "lists",
137
+ "lists",
138
+ "id",
139
+ "lists_locations",
140
+ "lists/{id}/locations",
141
+ "locations",
142
+ None,
143
+ ),
144
+ (
145
+ "audiences",
146
+ "lists",
147
+ "lists",
148
+ "id",
149
+ "lists_merge_fields",
150
+ "lists/{id}/merge-fields",
151
+ "merge_fields",
152
+ None,
153
+ ),
154
+ (
155
+ "audiences",
156
+ "lists",
157
+ "lists",
158
+ "id",
159
+ "lists_segments",
160
+ "lists/{id}/segments",
161
+ "segments",
162
+ None,
163
+ ),
164
+ ]