ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,279 @@
1
+ """
2
+ Configuration settings and constants for Intercom API integration.
3
+ """
4
+
5
+ from datetime import datetime
6
+ from typing import Dict, List, Tuple
7
+
8
+ # API Version - REQUIRED for all requests
9
+ API_VERSION = "2.14"
10
+
11
+ # Default start date for incremental loading
12
+ DEFAULT_START_DATE = datetime(2020, 1, 1)
13
+
14
+ # Pagination settings
15
+ DEFAULT_PAGE_SIZE = 150
16
+ MAX_PAGE_SIZE = 150 # Intercom's maximum
17
+ SCROLL_EXPIRY_SECONDS = 60 # Scroll sessions expire after 1 minute
18
+
19
+ # Rate limiting settings
20
+ RATE_LIMIT_PER_10_SECONDS = 166
21
+ RATE_LIMIT_RETRY_AFTER_DEFAULT = 10
22
+
23
+ # Regional API endpoints
24
+ REGIONAL_ENDPOINTS = {
25
+ "us": "https://api.intercom.io",
26
+ "eu": "https://api.eu.intercom.io",
27
+ "au": "https://api.au.intercom.io",
28
+ }
29
+
30
+ # Resource configuration for automatic generation
31
+ # Format: resource_name -> config dict
32
+ RESOURCE_CONFIGS = {
33
+ # Search-based incremental resources
34
+ "contacts": {
35
+ "type": "search",
36
+ "incremental": True,
37
+ "transform_func": "transform_contact",
38
+ "columns": {
39
+ "custom_attributes": {"data_type": "json"},
40
+ "tags": {"data_type": "json"},
41
+ },
42
+ },
43
+ "conversations": {
44
+ "type": "search",
45
+ "incremental": True,
46
+ "transform_func": "transform_conversation",
47
+ "columns": {
48
+ "custom_attributes": {"data_type": "json"},
49
+ "tags": {"data_type": "json"},
50
+ },
51
+ },
52
+ # Pagination-based incremental resources
53
+ "companies": {
54
+ "type": "pagination",
55
+ "endpoint": "/companies",
56
+ "data_key": "data",
57
+ "pagination_type": "cursor",
58
+ "incremental": True,
59
+ "transform_func": "transform_company",
60
+ "params": {"per_page": 50},
61
+ "columns": {
62
+ "custom_attributes": {"data_type": "json"},
63
+ "tags": {"data_type": "json"},
64
+ },
65
+ },
66
+ "articles": {
67
+ "type": "pagination",
68
+ "endpoint": "/articles",
69
+ "data_key": "data",
70
+ "pagination_type": "cursor",
71
+ "incremental": True,
72
+ "transform_func": None,
73
+ "params": None,
74
+ "columns": {},
75
+ },
76
+ # Special case - tickets
77
+ "tickets": {
78
+ "type": "tickets",
79
+ "incremental": True,
80
+ "transform_func": None,
81
+ "columns": {
82
+ "ticket_attributes": {"data_type": "json"},
83
+ },
84
+ },
85
+ # Simple replace resources (non-incremental)
86
+ "tags": {
87
+ "type": "simple",
88
+ "endpoint": "/tags",
89
+ "data_key": "data",
90
+ "pagination_type": "simple",
91
+ "incremental": False,
92
+ "transform_func": None,
93
+ "columns": {},
94
+ },
95
+ "segments": {
96
+ "type": "simple",
97
+ "endpoint": "/segments",
98
+ "data_key": "segments",
99
+ "pagination_type": "cursor",
100
+ "incremental": False,
101
+ "transform_func": None,
102
+ "columns": {},
103
+ },
104
+ "teams": {
105
+ "type": "simple",
106
+ "endpoint": "/teams",
107
+ "data_key": "teams",
108
+ "pagination_type": "simple",
109
+ "incremental": False,
110
+ "transform_func": None,
111
+ "columns": {},
112
+ },
113
+ "admins": {
114
+ "type": "simple",
115
+ "endpoint": "/admins",
116
+ "data_key": "admins",
117
+ "pagination_type": "simple",
118
+ "incremental": False,
119
+ "transform_func": None,
120
+ "columns": {},
121
+ },
122
+ "data_attributes": {
123
+ "type": "simple",
124
+ "endpoint": "/data_attributes",
125
+ "data_key": "data",
126
+ "pagination_type": "cursor",
127
+ "incremental": False,
128
+ "transform_func": None,
129
+ "columns": {
130
+ "id": {"data_type": "bigint", "nullable": True},
131
+ },
132
+ },
133
+ }
134
+
135
+ # Core endpoints with their configuration (kept for backwards compatibility)
136
+ # Format: (endpoint_path, data_key, supports_incremental, pagination_type)
137
+ CORE_ENDPOINTS: Dict[str, Tuple[str, str, bool, str]] = {
138
+ "contacts": ("/contacts", "data", True, "cursor"),
139
+ "companies": ("/companies", "data", True, "cursor"),
140
+ "conversations": ("/conversations", "conversations", True, "cursor"),
141
+ "tickets": ("/tickets", "tickets", True, "cursor"),
142
+ "admins": ("/admins", "admins", False, "simple"),
143
+ "teams": ("/teams", "teams", False, "simple"),
144
+ "tags": ("/tags", "data", False, "simple"),
145
+ "segments": ("/segments", "segments", False, "cursor"),
146
+ "articles": ("/articles", "data", True, "cursor"),
147
+ "collections": ("/help_center/collections", "data", False, "cursor"),
148
+ "data_attributes": ("/data_attributes", "data", False, "cursor"),
149
+ }
150
+
151
+ # Incremental endpoints using search API
152
+ SEARCH_ENDPOINTS: Dict[str, str] = {
153
+ "contacts_search": "/contacts/search",
154
+ "companies_search": "/companies/search",
155
+ "conversations_search": "/conversations/search",
156
+ }
157
+
158
+ # Special endpoints requiring different handling
159
+ SCROLL_ENDPOINTS: List[str] = [
160
+ "companies", # Can use scroll for large exports
161
+ ]
162
+
163
+ # Event tracking endpoint
164
+ EVENTS_ENDPOINT = "/events"
165
+
166
+ # Ticket fields endpoint for custom field mapping
167
+ TICKET_FIELDS_ENDPOINT = "/ticket_types/{ticket_type_id}/attributes"
168
+
169
+ # Default fields to retrieve for each resource type
170
+ DEFAULT_CONTACT_FIELDS = [
171
+ "id",
172
+ "type",
173
+ "external_id",
174
+ "email",
175
+ "phone",
176
+ "name",
177
+ "created_at",
178
+ "updated_at",
179
+ "signed_up_at",
180
+ "last_seen_at",
181
+ "last_contacted_at",
182
+ "last_email_opened_at",
183
+ "last_email_clicked_at",
184
+ "browser",
185
+ "browser_language",
186
+ "browser_version",
187
+ "location",
188
+ "os",
189
+ "role",
190
+ "custom_attributes",
191
+ "tags",
192
+ "companies",
193
+ ]
194
+
195
+ DEFAULT_COMPANY_FIELDS = [
196
+ "id",
197
+ "type",
198
+ "company_id",
199
+ "name",
200
+ "plan",
201
+ "size",
202
+ "website",
203
+ "industry",
204
+ "created_at",
205
+ "updated_at",
206
+ "monthly_spend",
207
+ "session_count",
208
+ "user_count",
209
+ "custom_attributes",
210
+ "tags",
211
+ ]
212
+
213
+ DEFAULT_CONVERSATION_FIELDS = [
214
+ "id",
215
+ "type",
216
+ "created_at",
217
+ "updated_at",
218
+ "waiting_since",
219
+ "snoozed_until",
220
+ "state",
221
+ "open",
222
+ "read",
223
+ "priority",
224
+ "admin_assignee_id",
225
+ "team_assignee_id",
226
+ "tags",
227
+ "conversation_rating",
228
+ "source",
229
+ "contacts",
230
+ "teammates",
231
+ "custom_attributes",
232
+ "first_contact_reply",
233
+ "sla_applied",
234
+ "statistics",
235
+ "conversation_parts",
236
+ ]
237
+
238
+ DEFAULT_TICKET_FIELDS = [
239
+ "id",
240
+ "type",
241
+ "ticket_id",
242
+ "category",
243
+ "ticket_attributes",
244
+ "ticket_state",
245
+ "ticket_type",
246
+ "created_at",
247
+ "updated_at",
248
+ "ticket_parts",
249
+ "contacts",
250
+ "admin_assignee_id",
251
+ "team_assignee_id",
252
+ "open",
253
+ "snoozed_until",
254
+ ]
255
+
256
+ # Resources that support custom attributes
257
+ SUPPORTS_CUSTOM_ATTRIBUTES = [
258
+ "contacts",
259
+ "companies",
260
+ "conversations",
261
+ ]
262
+
263
+ # Maximum limits
264
+ MAX_CUSTOM_ATTRIBUTES_PER_RESOURCE = 100
265
+ MAX_EVENT_TYPES_PER_WORKSPACE = 120
266
+ MAX_CONVERSATION_PARTS = 500
267
+ MAX_SEARCH_RESULTS = 10000
268
+
269
+ # Field type mapping for custom attributes
270
+ INTERCOM_TO_DLT_TYPE_MAPPING = {
271
+ "string": "text",
272
+ "integer": "bigint",
273
+ "float": "double",
274
+ "boolean": "bool",
275
+ "date": "timestamp",
276
+ "datetime": "timestamp",
277
+ "object": "json",
278
+ "list": "json",
279
+ }
@@ -0,0 +1,159 @@
1
+ import math
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ from typing import Any, Dict, Iterable, List, Optional
5
+
6
+ import dlt
7
+ from dlt.sources.rest_api import RESTAPIConfig, rest_api_resources
8
+
9
+ METRICS: Dict[str, str] = {
10
+ "dnssec_adoption": "dnssec/adoption",
11
+ "dnssec_tld_adoption": "dnssec/adoption",
12
+ "dnssec_validation": "dnssec/validation",
13
+ "http": "http",
14
+ "http3": "http3",
15
+ "https": "https",
16
+ "ipv6": "ipv6",
17
+ "net_loss": "net-loss",
18
+ "resilience": "resilience",
19
+ "roa": "roa",
20
+ "rov": "rov",
21
+ "tls": "tls",
22
+ "tls13": "tls13",
23
+ }
24
+
25
+
26
+ @dlt.source
27
+ def pulse_source(
28
+ token: str,
29
+ start_date: str,
30
+ metric: str,
31
+ opts: List[str],
32
+ end_date: Optional[str] = None,
33
+ ) -> Iterable[dlt.sources.DltResource]:
34
+ validate(metric, opts)
35
+ cfg = get_metric_cfg(metric, opts, start_date)
36
+ endpoint: Dict[str, Any] = {
37
+ "path": cfg.path,
38
+ "params": {
39
+ "start_date": "{incremental.start_value}",
40
+ **cfg.params,
41
+ },
42
+ "incremental": {
43
+ "cursor_path": "date",
44
+ "start_param": "start_date",
45
+ "end_param": "end_date",
46
+ "initial_value": start_date,
47
+ "end_value": end_date,
48
+ "range_start": "closed",
49
+ "range_end": "closed",
50
+ },
51
+ "paginator": "single_page",
52
+ }
53
+
54
+ if end_date is not None:
55
+ endpoint["params"]["end_date"] = end_date
56
+
57
+ resources = [
58
+ {
59
+ "name": metric,
60
+ "write_disposition": "merge",
61
+ "primary_key": "date",
62
+ "columns": {"date": {"data_type": "date"}},
63
+ "endpoint": endpoint,
64
+ }
65
+ ]
66
+
67
+ config: RESTAPIConfig = {
68
+ "client": {
69
+ "base_url": "https://pulse.internetsociety.org/api/",
70
+ "headers": {"Authorization": f"Bearer {token}"},
71
+ },
72
+ "resource_defaults": {
73
+ "write_disposition": "merge",
74
+ "primary_key": "date",
75
+ },
76
+ "resources": resources, # type:ignore
77
+ }
78
+ res = rest_api_resources(config)
79
+ if metric == "net_loss":
80
+ res[0].add_map(add_date(start_date))
81
+ yield from res
82
+
83
+
84
+ @dataclass
85
+ class MetricCfg:
86
+ path: str
87
+ params: Dict[str, Any]
88
+
89
+
90
+ def get_metric_cfg(metric: str, opts: List[str], start_date: str) -> MetricCfg:
91
+ path = METRICS.get(metric)
92
+ if path is None:
93
+ raise ValueError(f"Unknown metric '{metric}'.")
94
+ if len(opts) == 0:
95
+ return MetricCfg(path=path, params={})
96
+
97
+ if metric == "https":
98
+ return MetricCfg(
99
+ path=f"{path}/country/{opts[-1]}",
100
+ params={
101
+ "topsites": True if "topsites" in opts else False,
102
+ },
103
+ )
104
+ elif metric in ["dnssec_validation", "dnssec_tld_adoption"]:
105
+ return MetricCfg(path=f"{path}/country/{opts[-1]}", params={})
106
+ elif metric == "dnssec_adoption":
107
+ return MetricCfg(path=f"{path}/domains/{opts[-1]}", params={})
108
+ elif metric == "ipv6":
109
+ if "topsites" in opts:
110
+ return MetricCfg(path=path, params={"topsites": True})
111
+ return MetricCfg(path=f"{path}/country/{opts[-1]}", params={})
112
+ elif metric == "roa":
113
+ if len(opts) > 1:
114
+ return MetricCfg(
115
+ path=f"{path}/country/{opts[-1]}", params={"ip_version": opts[-2]}
116
+ )
117
+ return MetricCfg(path=path, params={"ip_version": opts[-1]})
118
+ elif metric == "net_loss":
119
+ return MetricCfg(
120
+ path=path,
121
+ params={
122
+ "country": opts[-1],
123
+ "shutdown_type": opts[-2],
124
+ },
125
+ )
126
+ elif metric == "resilience":
127
+ date = datetime.strptime(start_date, "%Y-%m-%d")
128
+ return MetricCfg(
129
+ path=path,
130
+ params={
131
+ "country": opts[-1],
132
+ "year": date.year,
133
+ "quarter": math.floor(date.month / 4) + 1,
134
+ },
135
+ )
136
+ else:
137
+ raise ValueError(
138
+ f"Unsupported metric '{metric}' with options {opts}. "
139
+ "Please check the metric and options."
140
+ )
141
+
142
+
143
+ def add_date(start_date: str):
144
+ def transform(item: dict):
145
+ item["date"] = start_date
146
+ return item
147
+
148
+ return transform
149
+
150
+
151
+ def validate(metric: str, opts: List[str]) -> None:
152
+ nopts = len(opts)
153
+ if metric == "net_loss" and nopts != 2:
154
+ raise ValueError(
155
+ "For 'net_loss' metric, two options are required: "
156
+ "'shutdown_type' and 'country'."
157
+ )
158
+ if nopts > 0 and metric in ["http", "http3", "tls", "tls13", "rov"]:
159
+ raise ValueError(f"metric '{metric}' does not support options. ")