ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,128 @@
1
+ """
2
+ Allium source for data extraction via REST API.
3
+
4
+ This source provides access to Allium blockchain data via asynchronous query execution.
5
+ """
6
+
7
+ import time
8
+ from typing import Any, Iterator
9
+
10
+ import dlt
11
+
12
+ from ingestr.src.http_client import create_client
13
+
14
+
15
+ @dlt.source(max_table_nesting=0, name="allium_source")
16
+ def allium_source(
17
+ api_key: str,
18
+ query_id: str,
19
+ parameters: dict[str, Any] | None = None,
20
+ limit: int | None = None,
21
+ compute_profile: str | None = None,
22
+ ) -> Any:
23
+ """
24
+ Allium data source for blockchain data extraction.
25
+
26
+ This source connects to Allium API, runs async queries, and fetches results.
27
+
28
+ Args:
29
+ api_key: Allium API key for authentication
30
+ query_id: The query ID to execute (e.g., 'abc123')
31
+ parameters: Optional parameters for the query (e.g., {'start_date': '2025-02-01', 'end_date': '2025-02-02'})
32
+ limit: Limit the number of rows in the result (max 250,000)
33
+ compute_profile: Compute profile identifier
34
+
35
+ Yields:
36
+ DltResource: Data resources for Allium query results
37
+ """
38
+ base_url = "https://api.allium.so/api/v1/explorer"
39
+ session = create_client()
40
+ headers = {"X-API-Key": api_key}
41
+
42
+ @dlt.resource(
43
+ name="query_results",
44
+ write_disposition="replace",
45
+ )
46
+ def fetch_query_results() -> Iterator[dict[str, Any]]:
47
+ """
48
+ Fetch query results from Allium.
49
+
50
+ This function:
51
+ 1. Starts an async query execution
52
+ 2. Polls for completion status
53
+ 3. Fetches and yields the results
54
+ """
55
+ # Step 1: Start async query execution
56
+ run_config: dict[str, Any] = {}
57
+ if limit is not None:
58
+ run_config["limit"] = limit
59
+ if compute_profile is not None:
60
+ run_config["compute_profile"] = compute_profile
61
+
62
+ run_payload = {"parameters": parameters or {}, "run_config": run_config}
63
+
64
+ run_response = session.post(
65
+ f"{base_url}/queries/{query_id}/run-async",
66
+ json=run_payload,
67
+ headers=headers,
68
+ )
69
+
70
+ run_data = run_response.json()
71
+
72
+ if "run_id" not in run_data:
73
+ raise ValueError(f"Failed to start query execution: {run_data}")
74
+
75
+ run_id = run_data["run_id"]
76
+
77
+ # Step 2: Poll for completion
78
+ max_retries = 8640 # Max 12 hours with 5-second intervals
79
+ retry_count = 0
80
+ poll_interval = 5 # seconds
81
+
82
+ while retry_count < max_retries:
83
+ status_response = session.get(
84
+ f"{base_url}/query-runs/{run_id}/status",
85
+ headers=headers,
86
+ )
87
+ status_response.raise_for_status()
88
+ status_data = status_response.json()
89
+
90
+ # Handle both string and dict responses
91
+ if isinstance(status_data, str):
92
+ status = status_data
93
+ else:
94
+ status = status_data.get("status")
95
+
96
+ if status == "success":
97
+ break
98
+ elif status == "failed":
99
+ error_msg = (
100
+ status_data.get("error", "Unknown error")
101
+ if isinstance(status_data, dict)
102
+ else "Unknown error"
103
+ )
104
+ raise ValueError(f"Query execution failed: {error_msg}")
105
+ elif status in ["pending", "running", "queued"]:
106
+ time.sleep(poll_interval)
107
+ retry_count += 1
108
+ else:
109
+ raise ValueError(f"Unknown status: {status}")
110
+
111
+ if retry_count >= max_retries:
112
+ raise TimeoutError(
113
+ f"Query execution timed out after {max_retries * poll_interval} seconds"
114
+ )
115
+
116
+ # Step 3: Fetch results
117
+ results_response = session.get(
118
+ f"{base_url}/query-runs/{run_id}/results",
119
+ headers=headers,
120
+ params={"f": "json"},
121
+ )
122
+ results_response.raise_for_status()
123
+ query_output = results_response.json()
124
+
125
+ # Extract and yield all data
126
+ yield query_output.get("data", [])
127
+
128
+ return (fetch_query_results,)
@@ -0,0 +1,277 @@
1
+ """Anthropic source for loading Claude Code usage analytics and other Anthropic API data."""
2
+
3
+ from typing import Any, Dict, Iterator, Optional, Sequence
4
+
5
+ import dlt
6
+ import pendulum
7
+ from dlt.sources import DltResource
8
+
9
+ from .helpers import (
10
+ fetch_api_keys,
11
+ fetch_claude_code_usage,
12
+ fetch_cost_report,
13
+ fetch_invites,
14
+ fetch_organization_info,
15
+ fetch_usage_report,
16
+ fetch_users,
17
+ fetch_workspace_members,
18
+ fetch_workspaces,
19
+ )
20
+
21
+
22
+ @dlt.source(max_table_nesting=0)
23
+ def anthropic_source(
24
+ api_key: str,
25
+ initial_start_date: Optional[pendulum.DateTime] = None,
26
+ end_date: Optional[pendulum.DateTime] = None,
27
+ ) -> Sequence[DltResource]:
28
+ """
29
+ Load data from Anthropic APIs.
30
+
31
+ Currently supports:
32
+ - Claude Code Usage Analytics
33
+
34
+ Args:
35
+ api_key: Anthropic Admin API key (starts with sk-ant-admin...)
36
+ initial_start_date: Start date for data retrieval (defaults to 2023-01-01)
37
+ end_date: Optional end date for data retrieval
38
+
39
+ Returns:
40
+ Sequence of DLT resources with Anthropic data
41
+ """
42
+
43
+ # Default start date to 2023-01-01 if not provided
44
+ start_date: pendulum.DateTime = (
45
+ initial_start_date
46
+ if initial_start_date is not None
47
+ else pendulum.datetime(2023, 1, 1)
48
+ )
49
+
50
+ # Prepare end_value for incremental
51
+ end_value_str = None
52
+ if end_date is not None:
53
+ end_value_str = end_date.to_date_string()
54
+
55
+ @dlt.resource(
56
+ name="claude_code_usage",
57
+ write_disposition="merge",
58
+ primary_key=["date", "actor_type", "actor_id", "terminal_type"],
59
+ )
60
+ def claude_code_usage(
61
+ date: dlt.sources.incremental[str] = dlt.sources.incremental(
62
+ "date",
63
+ initial_value=start_date.to_date_string(),
64
+ end_value=end_value_str,
65
+ ),
66
+ ) -> Iterator[Dict[str, Any]]:
67
+ """
68
+ Load Claude Code usage analytics data incrementally by date.
69
+
70
+ Yields flattened records with:
71
+ - date: The date of the usage data
72
+ - actor_type: Type of actor (user_actor or api_actor)
73
+ - actor_id: Email address or API key name
74
+ - organization_id: Organization UUID
75
+ - customer_type: api or subscription
76
+ - terminal_type: Terminal/environment type
77
+ - Core metrics (sessions, lines of code, commits, PRs)
78
+ - Tool actions (accepted/rejected counts by tool)
79
+ - Model usage and costs
80
+ """
81
+
82
+ # Get the date range from the incremental state
83
+ start_value = date.last_value if date.last_value else date.initial_value
84
+ start_date_parsed = (
85
+ pendulum.parse(start_value) if start_value else pendulum.now()
86
+ )
87
+
88
+ # Ensure we have a DateTime object
89
+ if isinstance(start_date_parsed, pendulum.DateTime):
90
+ start_date = start_date_parsed
91
+ elif isinstance(start_date_parsed, pendulum.Date):
92
+ start_date = pendulum.datetime(
93
+ start_date_parsed.year, start_date_parsed.month, start_date_parsed.day
94
+ )
95
+ else:
96
+ start_date = pendulum.now()
97
+
98
+ end_filter = pendulum.now()
99
+ if date.end_value:
100
+ end_filter_parsed = pendulum.parse(date.end_value)
101
+ # Ensure we have a DateTime object
102
+ if isinstance(end_filter_parsed, pendulum.DateTime):
103
+ end_filter = end_filter_parsed
104
+ elif isinstance(end_filter_parsed, pendulum.Date):
105
+ end_filter = pendulum.datetime(
106
+ end_filter_parsed.year,
107
+ end_filter_parsed.month,
108
+ end_filter_parsed.day,
109
+ )
110
+
111
+ # Iterate through each day in the range
112
+ current_date = start_date
113
+ while current_date.date() <= end_filter.date():
114
+ # Fetch data for the current date
115
+ for record in fetch_claude_code_usage(
116
+ api_key, current_date.to_date_string()
117
+ ):
118
+ yield record
119
+
120
+ # Move to the next day
121
+ current_date = current_date.add(days=1)
122
+
123
+ @dlt.resource(
124
+ name="usage_report",
125
+ write_disposition="merge",
126
+ primary_key=["bucket", "api_key_id", "workspace_id", "model", "service_tier"],
127
+ )
128
+ def usage_report() -> Iterator[Dict[str, Any]]:
129
+ """
130
+ Load usage report data from the messages endpoint.
131
+
132
+ Yields records with token usage and server tool usage metrics.
133
+ """
134
+
135
+ # Convert dates to ISO format with timezone
136
+ start_iso = start_date.to_iso8601_string()
137
+ end_iso = (
138
+ end_date.to_iso8601_string()
139
+ if end_date
140
+ else pendulum.now().to_iso8601_string()
141
+ )
142
+
143
+ for record in fetch_usage_report(
144
+ api_key,
145
+ starting_at=start_iso,
146
+ ending_at=end_iso,
147
+ bucket_width="1h", # Hourly buckets by default
148
+ ):
149
+ yield record
150
+
151
+ @dlt.resource(
152
+ name="cost_report",
153
+ write_disposition="merge",
154
+ primary_key=["bucket", "workspace_id", "description"],
155
+ )
156
+ def cost_report() -> Iterator[Dict[str, Any]]:
157
+ """
158
+ Load cost report data.
159
+
160
+ Yields records with cost breakdowns by workspace and description.
161
+ """
162
+
163
+ # Convert dates to ISO format with timezone
164
+ start_iso = start_date.to_iso8601_string()
165
+ end_iso = (
166
+ end_date.to_iso8601_string()
167
+ if end_date
168
+ else pendulum.now().to_iso8601_string()
169
+ )
170
+
171
+ for record in fetch_cost_report(
172
+ api_key,
173
+ starting_at=start_iso,
174
+ ending_at=end_iso,
175
+ ):
176
+ yield record
177
+
178
+ @dlt.resource(
179
+ name="organization",
180
+ write_disposition="replace",
181
+ )
182
+ def organization() -> Iterator[Dict[str, Any]]:
183
+ """
184
+ Load organization information.
185
+
186
+ Yields a single record with organization details.
187
+ """
188
+ org_info = fetch_organization_info(api_key)
189
+ if org_info:
190
+ yield org_info
191
+
192
+ @dlt.resource(
193
+ name="workspaces",
194
+ write_disposition="replace",
195
+ primary_key=["id"],
196
+ )
197
+ def workspaces() -> Iterator[Dict[str, Any]]:
198
+ """
199
+ Load all workspaces in the organization.
200
+
201
+ Yields records with workspace details including name, type, and creation date.
202
+ """
203
+ for workspace in fetch_workspaces(api_key):
204
+ yield workspace
205
+
206
+ @dlt.resource(
207
+ name="api_keys",
208
+ write_disposition="replace",
209
+ primary_key=["id"],
210
+ )
211
+ def api_keys() -> Iterator[Dict[str, Any]]:
212
+ """
213
+ Load all API keys in the organization.
214
+
215
+ Yields records with API key details including name, status, and creation date.
216
+ """
217
+ for api_key_record in fetch_api_keys(api_key):
218
+ yield api_key_record
219
+
220
+ @dlt.resource(
221
+ name="invites",
222
+ write_disposition="replace",
223
+ primary_key=["id"],
224
+ )
225
+ def invites() -> Iterator[Dict[str, Any]]:
226
+ """
227
+ Load all pending invites in the organization.
228
+
229
+ Yields records with invite details including email, role, and expiration.
230
+ """
231
+ for invite in fetch_invites(api_key):
232
+ yield invite
233
+
234
+ @dlt.resource(
235
+ name="users",
236
+ write_disposition="replace",
237
+ primary_key=["id"],
238
+ )
239
+ def users() -> Iterator[Dict[str, Any]]:
240
+ """
241
+ Load all users in the organization.
242
+
243
+ Yields records with user details including email, name, and role.
244
+ """
245
+ for user in fetch_users(api_key):
246
+ yield user
247
+
248
+ @dlt.resource(
249
+ name="workspace_members",
250
+ write_disposition="replace",
251
+ primary_key=["workspace_id", "user_id"],
252
+ )
253
+ def workspace_members() -> Iterator[Dict[str, Any]]:
254
+ """
255
+ Load workspace members for all workspaces.
256
+
257
+ Yields records with workspace membership details.
258
+ """
259
+ # First get all workspaces
260
+ for workspace in fetch_workspaces(api_key):
261
+ workspace_id = workspace.get("id")
262
+ if workspace_id:
263
+ # Get members for each workspace
264
+ for member in fetch_workspace_members(api_key, workspace_id):
265
+ yield member
266
+
267
+ return [
268
+ claude_code_usage,
269
+ usage_report,
270
+ cost_report,
271
+ organization,
272
+ workspaces,
273
+ api_keys,
274
+ invites,
275
+ users,
276
+ workspace_members,
277
+ ]