ingestr 0.13.93__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -46,7 +46,7 @@ def adjust_source(
46
46
  filters: Optional[dict] = None,
47
47
  ) -> Sequence[DltResource]:
48
48
  @dlt.resource(write_disposition="merge", merge_key="day")
49
- def campaigns():
49
+ def campaigns() -> DltResource:
50
50
  adjust_api = AdjustAPI(api_key=api_key)
51
51
  yield from adjust_api.fetch_report_data(
52
52
  start_date=start_date,
@@ -57,12 +57,12 @@ def adjust_source(
57
57
  )
58
58
 
59
59
  @dlt.resource(write_disposition="replace", primary_key="id")
60
- def events():
60
+ def events() -> DltResource:
61
61
  adjust_api = AdjustAPI(api_key=api_key)
62
62
  yield adjust_api.fetch_events()
63
63
 
64
64
  @dlt.resource(write_disposition="merge", merge_key="day")
65
- def creatives():
65
+ def creatives() -> DltResource:
66
66
  adjust_api = AdjustAPI(api_key=api_key)
67
67
  yield from adjust_api.fetch_report_data(
68
68
  start_date=start_date,
@@ -95,7 +95,7 @@ def adjust_source(
95
95
  primary_key=dimensions,
96
96
  columns=type_hints,
97
97
  )
98
- def custom():
98
+ def custom() -> DltResource:
99
99
  adjust_api = AdjustAPI(api_key=api_key)
100
100
  yield from adjust_api.fetch_report_data(
101
101
  start_date=start_date,
@@ -0,0 +1,277 @@
1
+ """Anthropic source for loading Claude Code usage analytics and other Anthropic API data."""
2
+
3
+ from typing import Any, Dict, Iterator, Optional, Sequence
4
+
5
+ import dlt
6
+ import pendulum
7
+ from dlt.sources import DltResource
8
+
9
+ from .helpers import (
10
+ fetch_api_keys,
11
+ fetch_claude_code_usage,
12
+ fetch_cost_report,
13
+ fetch_invites,
14
+ fetch_organization_info,
15
+ fetch_usage_report,
16
+ fetch_users,
17
+ fetch_workspace_members,
18
+ fetch_workspaces,
19
+ )
20
+
21
+
22
+ @dlt.source(max_table_nesting=0)
23
+ def anthropic_source(
24
+ api_key: str,
25
+ initial_start_date: Optional[pendulum.DateTime] = None,
26
+ end_date: Optional[pendulum.DateTime] = None,
27
+ ) -> Sequence[DltResource]:
28
+ """
29
+ Load data from Anthropic APIs.
30
+
31
+ Currently supports:
32
+ - Claude Code Usage Analytics
33
+
34
+ Args:
35
+ api_key: Anthropic Admin API key (starts with sk-ant-admin...)
36
+ initial_start_date: Start date for data retrieval (defaults to 2023-01-01)
37
+ end_date: Optional end date for data retrieval
38
+
39
+ Returns:
40
+ Sequence of DLT resources with Anthropic data
41
+ """
42
+
43
+ # Default start date to 2023-01-01 if not provided
44
+ start_date: pendulum.DateTime = (
45
+ initial_start_date
46
+ if initial_start_date is not None
47
+ else pendulum.datetime(2023, 1, 1)
48
+ )
49
+
50
+ # Prepare end_value for incremental
51
+ end_value_str = None
52
+ if end_date is not None:
53
+ end_value_str = end_date.to_date_string()
54
+
55
+ @dlt.resource(
56
+ name="claude_code_usage",
57
+ write_disposition="merge",
58
+ primary_key=["date", "actor_type", "actor_id", "terminal_type"],
59
+ )
60
+ def claude_code_usage(
61
+ date: dlt.sources.incremental[str] = dlt.sources.incremental(
62
+ "date",
63
+ initial_value=start_date.to_date_string(),
64
+ end_value=end_value_str,
65
+ ),
66
+ ) -> Iterator[Dict[str, Any]]:
67
+ """
68
+ Load Claude Code usage analytics data incrementally by date.
69
+
70
+ Yields flattened records with:
71
+ - date: The date of the usage data
72
+ - actor_type: Type of actor (user_actor or api_actor)
73
+ - actor_id: Email address or API key name
74
+ - organization_id: Organization UUID
75
+ - customer_type: api or subscription
76
+ - terminal_type: Terminal/environment type
77
+ - Core metrics (sessions, lines of code, commits, PRs)
78
+ - Tool actions (accepted/rejected counts by tool)
79
+ - Model usage and costs
80
+ """
81
+
82
+ # Get the date range from the incremental state
83
+ start_value = date.last_value if date.last_value else date.initial_value
84
+ start_date_parsed = (
85
+ pendulum.parse(start_value) if start_value else pendulum.now()
86
+ )
87
+
88
+ # Ensure we have a DateTime object
89
+ if isinstance(start_date_parsed, pendulum.DateTime):
90
+ start_date = start_date_parsed
91
+ elif isinstance(start_date_parsed, pendulum.Date):
92
+ start_date = pendulum.datetime(
93
+ start_date_parsed.year, start_date_parsed.month, start_date_parsed.day
94
+ )
95
+ else:
96
+ start_date = pendulum.now()
97
+
98
+ end_filter = pendulum.now()
99
+ if date.end_value:
100
+ end_filter_parsed = pendulum.parse(date.end_value)
101
+ # Ensure we have a DateTime object
102
+ if isinstance(end_filter_parsed, pendulum.DateTime):
103
+ end_filter = end_filter_parsed
104
+ elif isinstance(end_filter_parsed, pendulum.Date):
105
+ end_filter = pendulum.datetime(
106
+ end_filter_parsed.year,
107
+ end_filter_parsed.month,
108
+ end_filter_parsed.day,
109
+ )
110
+
111
+ # Iterate through each day in the range
112
+ current_date = start_date
113
+ while current_date.date() <= end_filter.date():
114
+ # Fetch data for the current date
115
+ for record in fetch_claude_code_usage(
116
+ api_key, current_date.to_date_string()
117
+ ):
118
+ yield record
119
+
120
+ # Move to the next day
121
+ current_date = current_date.add(days=1)
122
+
123
+ @dlt.resource(
124
+ name="usage_report",
125
+ write_disposition="merge",
126
+ primary_key=["bucket", "api_key_id", "workspace_id", "model", "service_tier"],
127
+ )
128
+ def usage_report() -> Iterator[Dict[str, Any]]:
129
+ """
130
+ Load usage report data from the messages endpoint.
131
+
132
+ Yields records with token usage and server tool usage metrics.
133
+ """
134
+
135
+ # Convert dates to ISO format with timezone
136
+ start_iso = start_date.to_iso8601_string()
137
+ end_iso = (
138
+ end_date.to_iso8601_string()
139
+ if end_date
140
+ else pendulum.now().to_iso8601_string()
141
+ )
142
+
143
+ for record in fetch_usage_report(
144
+ api_key,
145
+ starting_at=start_iso,
146
+ ending_at=end_iso,
147
+ bucket_width="1h", # Hourly buckets by default
148
+ ):
149
+ yield record
150
+
151
+ @dlt.resource(
152
+ name="cost_report",
153
+ write_disposition="merge",
154
+ primary_key=["bucket", "workspace_id", "description"],
155
+ )
156
+ def cost_report() -> Iterator[Dict[str, Any]]:
157
+ """
158
+ Load cost report data.
159
+
160
+ Yields records with cost breakdowns by workspace and description.
161
+ """
162
+
163
+ # Convert dates to ISO format with timezone
164
+ start_iso = start_date.to_iso8601_string()
165
+ end_iso = (
166
+ end_date.to_iso8601_string()
167
+ if end_date
168
+ else pendulum.now().to_iso8601_string()
169
+ )
170
+
171
+ for record in fetch_cost_report(
172
+ api_key,
173
+ starting_at=start_iso,
174
+ ending_at=end_iso,
175
+ ):
176
+ yield record
177
+
178
+ @dlt.resource(
179
+ name="organization",
180
+ write_disposition="replace",
181
+ )
182
+ def organization() -> Iterator[Dict[str, Any]]:
183
+ """
184
+ Load organization information.
185
+
186
+ Yields a single record with organization details.
187
+ """
188
+ org_info = fetch_organization_info(api_key)
189
+ if org_info:
190
+ yield org_info
191
+
192
+ @dlt.resource(
193
+ name="workspaces",
194
+ write_disposition="replace",
195
+ primary_key=["id"],
196
+ )
197
+ def workspaces() -> Iterator[Dict[str, Any]]:
198
+ """
199
+ Load all workspaces in the organization.
200
+
201
+ Yields records with workspace details including name, type, and creation date.
202
+ """
203
+ for workspace in fetch_workspaces(api_key):
204
+ yield workspace
205
+
206
+ @dlt.resource(
207
+ name="api_keys",
208
+ write_disposition="replace",
209
+ primary_key=["id"],
210
+ )
211
+ def api_keys() -> Iterator[Dict[str, Any]]:
212
+ """
213
+ Load all API keys in the organization.
214
+
215
+ Yields records with API key details including name, status, and creation date.
216
+ """
217
+ for api_key_record in fetch_api_keys(api_key):
218
+ yield api_key_record
219
+
220
+ @dlt.resource(
221
+ name="invites",
222
+ write_disposition="replace",
223
+ primary_key=["id"],
224
+ )
225
+ def invites() -> Iterator[Dict[str, Any]]:
226
+ """
227
+ Load all pending invites in the organization.
228
+
229
+ Yields records with invite details including email, role, and expiration.
230
+ """
231
+ for invite in fetch_invites(api_key):
232
+ yield invite
233
+
234
+ @dlt.resource(
235
+ name="users",
236
+ write_disposition="replace",
237
+ primary_key=["id"],
238
+ )
239
+ def users() -> Iterator[Dict[str, Any]]:
240
+ """
241
+ Load all users in the organization.
242
+
243
+ Yields records with user details including email, name, and role.
244
+ """
245
+ for user in fetch_users(api_key):
246
+ yield user
247
+
248
+ @dlt.resource(
249
+ name="workspace_members",
250
+ write_disposition="replace",
251
+ primary_key=["workspace_id", "user_id"],
252
+ )
253
+ def workspace_members() -> Iterator[Dict[str, Any]]:
254
+ """
255
+ Load workspace members for all workspaces.
256
+
257
+ Yields records with workspace membership details.
258
+ """
259
+ # First get all workspaces
260
+ for workspace in fetch_workspaces(api_key):
261
+ workspace_id = workspace.get("id")
262
+ if workspace_id:
263
+ # Get members for each workspace
264
+ for member in fetch_workspace_members(api_key, workspace_id):
265
+ yield member
266
+
267
+ return [
268
+ claude_code_usage,
269
+ usage_report,
270
+ cost_report,
271
+ organization,
272
+ workspaces,
273
+ api_keys,
274
+ invites,
275
+ users,
276
+ workspace_members,
277
+ ]