ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,114 @@
1
+ """Readers for HTTP file sources"""
2
+
3
+ import io
4
+ from typing import Any, Iterator, Optional
5
+ from urllib.parse import urlparse
6
+
7
+ import requests
8
+ from dlt.sources import TDataItems
9
+
10
+
11
+ class HttpReader:
12
+ """Reader for HTTP-based file sources"""
13
+
14
+ def __init__(self, url: str, file_format: Optional[str] = None):
15
+ self.url = url
16
+ self.file_format = file_format or self._infer_format(url)
17
+
18
+ if self.file_format not in ["csv", "json", "parquet"]:
19
+ raise ValueError(
20
+ f"Unsupported file format: {self.file_format}. "
21
+ "Supported formats: csv, json, parquet"
22
+ )
23
+
24
+ def _infer_format(self, url: str) -> str:
25
+ """Infer file format from URL extension"""
26
+ parsed = urlparse(url)
27
+ path = parsed.path.lower()
28
+
29
+ if path.endswith(".csv"):
30
+ return "csv"
31
+ elif path.endswith(".json") or path.endswith(".jsonl"):
32
+ return "json"
33
+ elif path.endswith(".parquet"):
34
+ return "parquet"
35
+ else:
36
+ raise ValueError(
37
+ f"Cannot infer file format from URL: {url}. "
38
+ "Please specify file_format parameter."
39
+ )
40
+
41
+ def _download_file(self) -> bytes:
42
+ """Download file from URL"""
43
+ response = requests.get(self.url, stream=True, timeout=30)
44
+ response.raise_for_status()
45
+ return response.content
46
+
47
+ def read_file(self, **kwargs: Any) -> Iterator[TDataItems]:
48
+ """Read file and yield data in chunks"""
49
+ content = self._download_file()
50
+
51
+ if self.file_format == "csv":
52
+ yield from self._read_csv(content, **kwargs)
53
+ elif self.file_format == "json":
54
+ yield from self._read_json(content, **kwargs)
55
+ elif self.file_format == "parquet":
56
+ yield from self._read_parquet(content, **kwargs)
57
+
58
+ def _read_csv(
59
+ self, content: bytes, chunksize: int = 10000, **pandas_kwargs: Any
60
+ ) -> Iterator[TDataItems]:
61
+ """Read CSV file with Pandas chunk by chunk"""
62
+ import pandas as pd # type: ignore
63
+
64
+ kwargs = {**{"header": "infer", "chunksize": chunksize}, **pandas_kwargs}
65
+
66
+ file_obj = io.BytesIO(content)
67
+ for df in pd.read_csv(file_obj, **kwargs):
68
+ yield df.to_dict(orient="records")
69
+
70
+ def _read_json(
71
+ self, content: bytes, chunksize: int = 1000, **kwargs: Any
72
+ ) -> Iterator[TDataItems]:
73
+ """Read JSON or JSONL file"""
74
+ from dlt.common import json
75
+
76
+ file_obj = io.BytesIO(content)
77
+ text = file_obj.read().decode("utf-8")
78
+
79
+ # Try to detect if it's JSONL format (one JSON object per line)
80
+ lines = text.strip().split("\n")
81
+
82
+ if len(lines) > 1:
83
+ # Likely JSONL format
84
+ lines_chunk = []
85
+ for line in lines:
86
+ if line.strip():
87
+ lines_chunk.append(json.loads(line))
88
+ if len(lines_chunk) >= chunksize:
89
+ yield lines_chunk
90
+ lines_chunk = []
91
+ if lines_chunk:
92
+ yield lines_chunk
93
+ else:
94
+ # Single JSON object or array
95
+ data = json.loads(text)
96
+ if isinstance(data, list):
97
+ # Chunk the list
98
+ for i in range(0, len(data), chunksize):
99
+ yield data[i : i + chunksize]
100
+ else:
101
+ # Single object
102
+ yield [data]
103
+
104
+ def _read_parquet(
105
+ self, content: bytes, chunksize: int = 10000, **kwargs: Any
106
+ ) -> Iterator[TDataItems]:
107
+ """Read Parquet file"""
108
+ from pyarrow import parquet as pq # type: ignore
109
+
110
+ file_obj = io.BytesIO(content)
111
+ parquet_file = pq.ParquetFile(file_obj)
112
+
113
+ for batch in parquet_file.iter_batches(batch_size=chunksize):
114
+ yield batch.to_pylist()
@@ -0,0 +1,24 @@
1
+ import requests
2
+ from dlt.sources.helpers.requests import Client
3
+
4
+
5
+ def create_client(retry_status_codes: list[int] | None = None) -> requests.Session:
6
+ if retry_status_codes is None:
7
+ retry_status_codes = [502]
8
+ return Client(
9
+ raise_for_status=False,
10
+ retry_condition=retry_on_status_code(retry_status_codes),
11
+ request_max_attempts=12,
12
+ request_backoff_factor=10,
13
+ ).session
14
+
15
+
16
+ def retry_on_status_code(retry_status_codes: list[int]):
17
+ def retry_on_limit(
18
+ response: requests.Response | None, exception: BaseException | None
19
+ ) -> bool:
20
+ if response is None:
21
+ return False
22
+ return response.status_code in retry_status_codes
23
+
24
+ return retry_on_limit
@@ -32,10 +32,16 @@ from dlt.common import pendulum
32
32
  from dlt.common.typing import TDataItems
33
33
  from dlt.sources import DltResource
34
34
 
35
- from .helpers import _get_property_names, fetch_data, fetch_property_history
35
+ from .helpers import (
36
+ _get_property_names,
37
+ fetch_data,
38
+ fetch_data_raw,
39
+ fetch_property_history,
40
+ )
36
41
  from .settings import (
37
42
  ALL,
38
43
  CRM_OBJECT_ENDPOINTS,
44
+ CRM_SCHEMAS_ENDPOINT,
39
45
  DEFAULT_COMPANY_PROPS,
40
46
  DEFAULT_CONTACT_PROPS,
41
47
  DEFAULT_DEAL_PROPS,
@@ -55,6 +61,7 @@ def hubspot(
55
61
  api_key: str = dlt.secrets.value,
56
62
  include_history: bool = False,
57
63
  include_custom_props: bool = True,
64
+ custom_object: str = None,
58
65
  ) -> Sequence[DltResource]:
59
66
  """
60
67
  A DLT source that retrieves data from the HubSpot API using the
@@ -86,7 +93,6 @@ def hubspot(
86
93
  def companies(
87
94
  api_key: str = api_key,
88
95
  include_history: bool = include_history,
89
- props: Sequence[str] = DEFAULT_COMPANY_PROPS,
90
96
  include_custom_props: bool = include_custom_props,
91
97
  ) -> Iterator[TDataItems]:
92
98
  """Hubspot companies resource"""
@@ -94,7 +100,7 @@ def hubspot(
94
100
  "company",
95
101
  api_key,
96
102
  include_history=include_history,
97
- props=props,
103
+ props=DEFAULT_COMPANY_PROPS,
98
104
  include_custom_props=include_custom_props,
99
105
  )
100
106
 
@@ -102,7 +108,6 @@ def hubspot(
102
108
  def contacts(
103
109
  api_key: str = api_key,
104
110
  include_history: bool = include_history,
105
- props: Sequence[str] = DEFAULT_CONTACT_PROPS,
106
111
  include_custom_props: bool = include_custom_props,
107
112
  ) -> Iterator[TDataItems]:
108
113
  """Hubspot contacts resource"""
@@ -110,7 +115,7 @@ def hubspot(
110
115
  "contact",
111
116
  api_key,
112
117
  include_history,
113
- props,
118
+ DEFAULT_CONTACT_PROPS,
114
119
  include_custom_props,
115
120
  )
116
121
 
@@ -118,7 +123,6 @@ def hubspot(
118
123
  def deals(
119
124
  api_key: str = api_key,
120
125
  include_history: bool = include_history,
121
- props: Sequence[str] = DEFAULT_DEAL_PROPS,
122
126
  include_custom_props: bool = include_custom_props,
123
127
  ) -> Iterator[TDataItems]:
124
128
  """Hubspot deals resource"""
@@ -126,7 +130,7 @@ def hubspot(
126
130
  "deal",
127
131
  api_key,
128
132
  include_history,
129
- props,
133
+ DEFAULT_DEAL_PROPS,
130
134
  include_custom_props,
131
135
  )
132
136
 
@@ -134,7 +138,6 @@ def hubspot(
134
138
  def tickets(
135
139
  api_key: str = api_key,
136
140
  include_history: bool = include_history,
137
- props: Sequence[str] = DEFAULT_TICKET_PROPS,
138
141
  include_custom_props: bool = include_custom_props,
139
142
  ) -> Iterator[TDataItems]:
140
143
  """Hubspot tickets resource"""
@@ -142,7 +145,7 @@ def hubspot(
142
145
  "ticket",
143
146
  api_key,
144
147
  include_history,
145
- props,
148
+ DEFAULT_TICKET_PROPS,
146
149
  include_custom_props,
147
150
  )
148
151
 
@@ -150,7 +153,6 @@ def hubspot(
150
153
  def products(
151
154
  api_key: str = api_key,
152
155
  include_history: bool = include_history,
153
- props: Sequence[str] = DEFAULT_PRODUCT_PROPS,
154
156
  include_custom_props: bool = include_custom_props,
155
157
  ) -> Iterator[TDataItems]:
156
158
  """Hubspot products resource"""
@@ -158,15 +160,21 @@ def hubspot(
158
160
  "product",
159
161
  api_key,
160
162
  include_history,
161
- props,
163
+ DEFAULT_PRODUCT_PROPS,
162
164
  include_custom_props,
163
165
  )
164
166
 
167
+ @dlt.resource(name="schemas", write_disposition="merge", primary_key="id")
168
+ def schemas(
169
+ api_key: str = api_key,
170
+ ) -> Iterator[TDataItems]:
171
+ """Hubspot schemas resource"""
172
+ yield from fetch_data(CRM_SCHEMAS_ENDPOINT, api_key, resource_name="schemas")
173
+
165
174
  @dlt.resource(name="quotes", write_disposition="replace")
166
175
  def quotes(
167
176
  api_key: str = api_key,
168
177
  include_history: bool = include_history,
169
- props: Sequence[str] = DEFAULT_QUOTE_PROPS,
170
178
  include_custom_props: bool = include_custom_props,
171
179
  ) -> Iterator[TDataItems]:
172
180
  """Hubspot quotes resource"""
@@ -174,11 +182,55 @@ def hubspot(
174
182
  "quote",
175
183
  api_key,
176
184
  include_history,
177
- props,
185
+ DEFAULT_QUOTE_PROPS,
178
186
  include_custom_props,
179
187
  )
180
188
 
181
- return companies, contacts, deals, tickets, products, quotes
189
+ @dlt.resource(write_disposition="merge", primary_key="hs_object_id")
190
+ def custom(
191
+ api_key: str = api_key,
192
+ custom_object_name: str = custom_object,
193
+ ) -> Iterator[TDataItems]:
194
+ custom_objects = fetch_data_raw(CRM_SCHEMAS_ENDPOINT, api_key)
195
+ object_type_id = None
196
+ associations = None
197
+ if ":" in custom_object_name:
198
+ fields = custom_object_name.split(":")
199
+ if len(fields) == 2:
200
+ custom_object_name = fields[0]
201
+ associations = fields[1]
202
+
203
+ custom_object_lowercase = custom_object_name.lower()
204
+
205
+ for custom_object in custom_objects["results"]:
206
+ if custom_object["name"].lower() == custom_object_lowercase:
207
+ object_type_id = custom_object["objectTypeId"]
208
+ break
209
+
210
+ # sometimes people use the plural name of the object type by accident, we should try to match that if we can
211
+ if "labels" in custom_object:
212
+ if custom_object_lowercase == custom_object["labels"]["plural"].lower():
213
+ object_type_id = custom_object["objectTypeId"]
214
+ break
215
+
216
+ if object_type_id is None:
217
+ raise ValueError(f"There is no such custom object as {custom_object_name}")
218
+ custom_object_properties = f"crm/v3/properties/{object_type_id}"
219
+
220
+ props_pages = fetch_data(custom_object_properties, api_key)
221
+ props = []
222
+ for page in props_pages:
223
+ props.extend([prop["name"] for prop in page])
224
+ props = ",".join(sorted(list(set(props))))
225
+
226
+ custom_object_endpoint = f"crm/v3/objects/{object_type_id}/?properties={props}"
227
+ if associations:
228
+ custom_object_endpoint += f"&associations={associations}"
229
+
230
+ """Hubspot custom object details resource"""
231
+ yield from fetch_data(custom_object_endpoint, api_key, resource_name="custom")
232
+
233
+ return companies, contacts, deals, tickets, products, quotes, schemas, custom
182
234
 
183
235
 
184
236
  def crm_objects(
@@ -199,15 +251,6 @@ def crm_objects(
199
251
 
200
252
  props = ",".join(sorted(list(set(props))))
201
253
 
202
- if len(props) > 2000:
203
- raise ValueError(
204
- "Your request to Hubspot is too long to process. "
205
- "Maximum allowed query length is 2000 symbols, while "
206
- f"your list of properties `{props[:200]}`... is {len(props)} "
207
- "symbols long. Use the `props` argument of the resource to "
208
- "set the list of properties to extract from the endpoint."
209
- )
210
-
211
254
  params = {"properties": props, "limit": 100}
212
255
 
213
256
  yield from fetch_data(CRM_OBJECT_ENDPOINTS[object_type], api_key, params=params)
@@ -90,7 +90,10 @@ def fetch_property_history(
90
90
 
91
91
 
92
92
  def fetch_data(
93
- endpoint: str, api_key: str, params: Optional[Dict[str, Any]] = None
93
+ endpoint: str,
94
+ api_key: str,
95
+ params: Optional[Dict[str, Any]] = None,
96
+ resource_name: str = None,
94
97
  ) -> Iterator[List[Dict[str, Any]]]:
95
98
  """
96
99
  Fetch data from HUBSPOT endpoint using a specified API key and yield the properties of each result.
@@ -127,32 +130,50 @@ def fetch_data(
127
130
  # Parse the API response and yield the properties of each result
128
131
  # Parse the response JSON data
129
132
  _data = r.json()
133
+
130
134
  # Yield the properties of each result in the API response
131
135
  while _data is not None:
132
136
  if "results" in _data:
133
137
  _objects: List[Dict[str, Any]] = []
134
138
  for _result in _data["results"]:
135
- _obj = _result.get("properties", _result)
136
- if "id" not in _obj and "id" in _result:
137
- # Move id from properties to top level
138
- _obj["id"] = _result["id"]
139
- if "associations" in _result:
140
- for association in _result["associations"]:
141
- __values = [
142
- {
143
- "value": _obj["hs_object_id"],
144
- f"{association}_id": __r["id"],
145
- }
146
- for __r in _result["associations"][association]["results"]
147
- ]
148
-
149
- # remove duplicates from list of dicts
150
- __values = [
151
- dict(t) for t in {tuple(d.items()) for d in __values}
152
- ]
153
-
154
- _obj[association] = __values
155
- _objects.append(_obj)
139
+ if resource_name == "schemas":
140
+ _objects.append(
141
+ {
142
+ "name": _result["labels"].get("singular", ""),
143
+ "objectTypeId": _result.get("objectTypeId", ""),
144
+ "id": _result.get("id", ""),
145
+ "fullyQualifiedName": _result.get("fullyQualifiedName", ""),
146
+ "properties": _result.get("properties", ""),
147
+ "createdAt": _result.get("createdAt", ""),
148
+ "updatedAt": _result.get("updatedAt", ""),
149
+ }
150
+ )
151
+ else:
152
+ _obj = _result.get("properties", _result)
153
+ if "id" not in _obj and "id" in _result:
154
+ # Move id from properties to top level
155
+ _obj["id"] = _result["id"]
156
+
157
+ if "associations" in _result:
158
+ for association in _result["associations"]:
159
+ __values = [
160
+ {
161
+ "value": _obj["hs_object_id"],
162
+ f"{association}_id": __r["id"],
163
+ }
164
+ for __r in _result["associations"][association][
165
+ "results"
166
+ ]
167
+ ]
168
+
169
+ # remove duplicates from list of dicts
170
+ __values = [
171
+ dict(t) for t in {tuple(d.items()) for d in __values}
172
+ ]
173
+
174
+ _obj[association] = __values
175
+
176
+ _objects.append(_obj)
156
177
  yield _objects
157
178
 
158
179
  # Follow pagination links if they exist
@@ -186,3 +207,12 @@ def _get_property_names(api_key: str, object_type: str) -> List[str]:
186
207
  properties.extend([prop["name"] for prop in page])
187
208
 
188
209
  return properties
210
+
211
+
212
+ def fetch_data_raw(
213
+ endpoint: str, api_key: str, params: Optional[Dict[str, Any]] = None
214
+ ) -> Iterator[List[Dict[str, Any]]]:
215
+ url = get_url(endpoint)
216
+ headers = _get_headers(api_key)
217
+ r = requests.get(url, headers=headers, params=params)
218
+ return r.json()
@@ -5,15 +5,22 @@ from dlt.common import pendulum
5
5
  STARTDATE = pendulum.datetime(year=2000, month=1, day=1)
6
6
 
7
7
  CRM_CONTACTS_ENDPOINT = (
8
- "/crm/v3/objects/contacts?associations=deals,products,tickets,quotes"
8
+ "/crm/v3/objects/contacts?associations=companies,deals,products,tickets,quotes"
9
9
  )
10
- CRM_COMPANIES_ENDPOINT = (
11
- "/crm/v3/objects/companies?associations=contacts,deals,products,tickets,quotes"
10
+ CRM_COMPANIES_ENDPOINT = "/crm/v3/objects/companies?associations=products"
11
+ CRM_DEALS_ENDPOINT = (
12
+ "/crm/v3/objects/deals?associations=companies,contacts,products,tickets,quotes"
12
13
  )
13
- CRM_DEALS_ENDPOINT = "/crm/v3/objects/deals"
14
- CRM_PRODUCTS_ENDPOINT = "/crm/v3/objects/products"
15
- CRM_TICKETS_ENDPOINT = "/crm/v3/objects/tickets"
16
- CRM_QUOTES_ENDPOINT = "/crm/v3/objects/quotes"
14
+ CRM_PRODUCTS_ENDPOINT = (
15
+ "/crm/v3/objects/products?associations=companies,contacts,deals,tickets,quotes"
16
+ )
17
+ CRM_TICKETS_ENDPOINT = (
18
+ "/crm/v3/objects/tickets?associations=companies,contacts,deals,products,quotes"
19
+ )
20
+ CRM_QUOTES_ENDPOINT = (
21
+ "/crm/v3/objects/quotes?associations=companies,contacts,deals,products,tickets"
22
+ )
23
+ CRM_SCHEMAS_ENDPOINT = "/crm/v3/schemas"
17
24
 
18
25
  CRM_OBJECT_ENDPOINTS = {
19
26
  "contact": CRM_CONTACTS_ENDPOINT,
@@ -0,0 +1,46 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import pendulum
5
+ from dlt.common.typing import TDataItem
6
+ from dlt.sources import DltResource
7
+
8
+ from .client import InfluxClient
9
+
10
+
11
+ @dlt.source(max_table_nesting=0)
12
+ def influxdb_source(
13
+ measurement: str,
14
+ host: str,
15
+ org: str,
16
+ bucket: str,
17
+ token: str = dlt.secrets.value,
18
+ secure: bool = True,
19
+ start_date: pendulum.DateTime = pendulum.datetime(2024, 1, 1),
20
+ end_date: pendulum.DateTime | None = None,
21
+ ) -> Iterable[DltResource]:
22
+ client = InfluxClient(
23
+ url=host, token=token, org=org, bucket=bucket, verify_ssl=secure
24
+ )
25
+
26
+ @dlt.resource(name=measurement)
27
+ def fetch_table(
28
+ timestamp=dlt.sources.incremental(
29
+ "time",
30
+ initial_value=start_date,
31
+ end_value=end_date,
32
+ range_start="closed",
33
+ range_end="closed",
34
+ ),
35
+ ) -> Iterable[TDataItem]:
36
+ if timestamp.last_value is None:
37
+ start = start_date.isoformat()
38
+ else:
39
+ start = timestamp.last_value.isoformat()
40
+ if timestamp.end_value is None:
41
+ end = pendulum.now().isoformat()
42
+ else:
43
+ end = timestamp.end_value.isoformat()
44
+ yield from client.fetch_measurement(measurement, start, end)
45
+
46
+ return fetch_table
@@ -0,0 +1,34 @@
1
+ from typing import Any, Dict, Iterable
2
+
3
+ from influxdb_client import InfluxDBClient # type: ignore
4
+
5
+
6
+ class InfluxClient:
7
+ def __init__(
8
+ self, url: str, token: str, org: str, bucket: str, verify_ssl: bool = True
9
+ ) -> None:
10
+ self.client = InfluxDBClient(
11
+ url=url, token=token, org=org, verify_ssl=verify_ssl
12
+ )
13
+ self.bucket = bucket
14
+
15
+ def fetch_measurement(
16
+ self, measurement: str, start: str, end: str | None = None
17
+ ) -> Iterable[Dict[str, Any]]:
18
+ query = f'from(bucket: "{self.bucket}") |> range(start: {start}'
19
+ if end:
20
+ query += f", stop: {end}"
21
+ query += f') |> filter(fn: (r) => r["_measurement"] == "{measurement}")'
22
+ query_api = self.client.query_api()
23
+
24
+ for record in query_api.query_stream(query):
25
+ cleaned_record = {}
26
+ exclude_keys = {"result", "table", "_start", "_stop"}
27
+ for key, value in record.values.items():
28
+ if key in exclude_keys:
29
+ continue
30
+ if key.startswith("_"):
31
+ cleaned_record[key[1:]] = value
32
+ else:
33
+ cleaned_record[key] = value
34
+ yield cleaned_record
@@ -0,0 +1,142 @@
1
+ """
2
+ Intercom source implementation for data ingestion.
3
+
4
+ This module provides DLT sources for retrieving data from Intercom API endpoints
5
+ including contacts, companies, conversations, tickets, and more.
6
+ """
7
+
8
+ from typing import Optional, Sequence
9
+
10
+ import dlt
11
+ from dlt.common.time import ensure_pendulum_datetime
12
+ from dlt.common.typing import TAnyDateTime
13
+ from dlt.sources import DltResource, DltSource
14
+
15
+ from .helpers import (
16
+ IntercomAPIClient,
17
+ IntercomCredentialsAccessToken,
18
+ TIntercomCredentials,
19
+ convert_datetime_to_timestamp,
20
+ create_resource_from_config,
21
+ transform_company,
22
+ transform_contact,
23
+ transform_conversation,
24
+ )
25
+ from .helpers import (
26
+ IntercomCredentialsOAuth as IntercomCredentialsOAuth,
27
+ )
28
+ from .settings import (
29
+ DEFAULT_START_DATE,
30
+ RESOURCE_CONFIGS,
31
+ )
32
+
33
+
34
+ @dlt.source(name="intercom", max_table_nesting=0)
35
+ def intercom_source(
36
+ credentials: TIntercomCredentials = dlt.secrets.value,
37
+ start_date: Optional[TAnyDateTime] = DEFAULT_START_DATE,
38
+ end_date: Optional[TAnyDateTime] = None,
39
+ ) -> Sequence[DltResource]:
40
+ """
41
+ A DLT source that retrieves data from Intercom API.
42
+
43
+ This source provides access to various Intercom resources including contacts,
44
+ companies, conversations, tickets, and more. It supports incremental loading
45
+ for resources that track updated timestamps.
46
+
47
+ Args:
48
+ credentials: Intercom API credentials (AccessToken or OAuth).
49
+ Defaults to dlt.secrets.value.
50
+ start_date: The start date for incremental loading.
51
+ Defaults to January 1, 2020.
52
+ end_date: Optional end date for incremental loading.
53
+ If not provided, loads all data from start_date to present.
54
+
55
+ Returns:
56
+ Sequence of DLT resources for different Intercom endpoints.
57
+
58
+ Example:
59
+ >>> source = intercom_source(
60
+ ... credentials=IntercomCredentialsAccessToken(
61
+ ... access_token="your_token",
62
+ ... region="us"
63
+ ... ),
64
+ ... start_date=datetime(2024, 1, 1)
65
+ ... )
66
+ """
67
+ # Initialize API client
68
+ api_client = IntercomAPIClient(credentials)
69
+
70
+ # Convert dates to pendulum and then to unix timestamps for Intercom API
71
+ start_date_obj = ensure_pendulum_datetime(start_date) if start_date else None
72
+ end_date_obj = ensure_pendulum_datetime(end_date) if end_date else None
73
+
74
+ # Convert to unix timestamps for API compatibility
75
+ # Use default start date if none provided
76
+ if not start_date_obj:
77
+ from .settings import DEFAULT_START_DATE
78
+
79
+ start_date_obj = ensure_pendulum_datetime(DEFAULT_START_DATE)
80
+
81
+ start_timestamp = convert_datetime_to_timestamp(start_date_obj)
82
+ end_timestamp = (
83
+ convert_datetime_to_timestamp(end_date_obj) if end_date_obj else None
84
+ )
85
+
86
+ # Transform function mapping
87
+ transform_functions = {
88
+ "transform_contact": transform_contact,
89
+ "transform_company": transform_company,
90
+ "transform_conversation": transform_conversation,
91
+ }
92
+
93
+ # Generate all resources from configuration
94
+ resources = []
95
+ for resource_name, config in RESOURCE_CONFIGS.items():
96
+ resource_func = create_resource_from_config(
97
+ resource_name,
98
+ config,
99
+ api_client,
100
+ start_timestamp,
101
+ end_timestamp,
102
+ transform_functions,
103
+ )
104
+
105
+ # Call the resource function to get the actual resource
106
+ resources.append(resource_func())
107
+
108
+ return resources
109
+
110
+
111
+ def intercom(
112
+ api_key: str,
113
+ region: str = "us",
114
+ start_date: Optional[TAnyDateTime] = DEFAULT_START_DATE,
115
+ end_date: Optional[TAnyDateTime] = None,
116
+ ) -> DltSource:
117
+ """
118
+ Convenience function to create Intercom source with access token.
119
+
120
+ Args:
121
+ api_key: Intercom API access token.
122
+ region: Data region (us, eu, or au). Defaults to "us".
123
+ start_date: Start date for incremental loading.
124
+ end_date: Optional end date for incremental loading.
125
+
126
+ Returns:
127
+ Sequence of DLT resources.
128
+
129
+ Example:
130
+ >>> source = intercom(
131
+ ... api_key="your_access_token",
132
+ ... region="us",
133
+ ... start_date=datetime(2024, 1, 1)
134
+ ... )
135
+ """
136
+ credentials = IntercomCredentialsAccessToken(access_token=api_key, region=region)
137
+
138
+ return intercom_source(
139
+ credentials=credentials,
140
+ start_date=start_date,
141
+ end_date=end_date,
142
+ )