ingestr 0.13.80__py3-none-any.whl → 0.13.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.80"
1
+ version = "v0.13.82"
@@ -22,12 +22,8 @@ from .settings import (
22
22
  DEFAULT_ADCREATIVE_FIELDS,
23
23
  DEFAULT_ADSET_FIELDS,
24
24
  DEFAULT_CAMPAIGN_FIELDS,
25
- DEFAULT_INSIGHT_FIELDS,
26
25
  DEFAULT_LEAD_FIELDS,
27
26
  INSIGHT_FIELDS_TYPES,
28
- INSIGHTS_BREAKDOWNS_OPTIONS,
29
- INVALID_INSIGHTS_FIELDS,
30
- TInsightsBreakdownOptions,
31
27
  TInsightsLevels,
32
28
  )
33
29
 
@@ -105,10 +101,9 @@ def facebook_insights_source(
105
101
  account_id: str = dlt.config.value,
106
102
  access_token: str = dlt.secrets.value,
107
103
  initial_load_past_days: int = 1,
108
- fields: Sequence[str] = DEFAULT_INSIGHT_FIELDS,
109
- attribution_window_days_lag: int = 7,
104
+ dimensions: Sequence[str] = None,
105
+ fields: Sequence[str] = None,
110
106
  time_increment_days: int = 1,
111
- breakdowns: TInsightsBreakdownOptions = "ads_insights",
112
107
  action_breakdowns: Sequence[str] = ALL_ACTION_BREAKDOWNS,
113
108
  level: TInsightsLevels = "ad",
114
109
  action_attribution_windows: Sequence[str] = ALL_ACTION_ATTRIBUTION_WINDOWS,
@@ -155,6 +150,11 @@ def facebook_insights_source(
155
150
  if start_date is None:
156
151
  start_date = pendulum.today().subtract(days=initial_load_past_days)
157
152
 
153
+ if dimensions is None:
154
+ dimensions = []
155
+ if fields is None:
156
+ fields = []
157
+
158
158
  columns = {}
159
159
  for field in fields:
160
160
  if field in INSIGHT_FIELDS_TYPES:
@@ -187,15 +187,9 @@ def facebook_insights_source(
187
187
  query = {
188
188
  "level": level,
189
189
  "action_breakdowns": list(action_breakdowns),
190
- "breakdowns": list(
191
- INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["breakdowns"]
192
- ),
190
+ "breakdowns": dimensions,
193
191
  "limit": batch_size,
194
- "fields": list(
195
- set(fields)
196
- .union(INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["fields"])
197
- .difference(INVALID_INSIGHTS_FIELDS)
198
- ),
192
+ "fields": fields,
199
193
  "time_increment": time_increment_days,
200
194
  "action_attribution_windows": list(action_attribution_windows),
201
195
  "time_ranges": [
@@ -229,3 +229,49 @@ def notify_on_token_expiration(access_token_expires_at: int = None) -> None:
229
229
  logger.error(
230
230
  f"Access Token expires in {humanize.precisedelta(pendulum.now() - expires_at)}. Replace the token now!"
231
231
  )
232
+
233
+
234
+ def parse_insights_table_to_source_kwargs(table: str) -> DictStrAny:
235
+ import typing
236
+
237
+ from ingestr.src.facebook_ads.settings import (
238
+ INSIGHTS_BREAKDOWNS_OPTIONS,
239
+ TInsightsBreakdownOptions,
240
+ TInsightsLevels,
241
+ )
242
+
243
+ parts = table.split(":")
244
+
245
+ source_kwargs = {}
246
+
247
+ breakdown_type = parts[1]
248
+
249
+ valid_breakdowns = list(typing.get_args(TInsightsBreakdownOptions))
250
+ if breakdown_type in valid_breakdowns:
251
+ dimensions = INSIGHTS_BREAKDOWNS_OPTIONS[breakdown_type]["breakdowns"]
252
+ fields = INSIGHTS_BREAKDOWNS_OPTIONS[breakdown_type]["fields"]
253
+ source_kwargs["dimensions"] = dimensions
254
+ source_kwargs["fields"] = fields
255
+ else:
256
+ dimensions = breakdown_type.split(",")
257
+ valid_levels = list(typing.get_args(TInsightsLevels))
258
+ level = None
259
+ for valid_level in reversed(valid_levels):
260
+ if valid_level in dimensions:
261
+ level = valid_level
262
+ dimensions.remove(valid_level)
263
+ break
264
+
265
+ source_kwargs["level"] = level
266
+ source_kwargs["dimensions"] = dimensions
267
+
268
+ # If custom metrics are provided, parse them
269
+ if len(parts) == 3:
270
+ fields = [f.strip() for f in parts[2].split(",") if f.strip()]
271
+ if not fields:
272
+ raise ValueError(
273
+ "Custom metrics must be provided after the second colon in format: facebook_insights:breakdown_type:metric1,metric2..."
274
+ )
275
+ source_kwargs["fields"] = fields
276
+
277
+ return source_kwargs
@@ -2,24 +2,11 @@ from typing import Any, Dict, Iterable, Iterator
2
2
 
3
3
  import dlt
4
4
  import pendulum
5
+ import requests
5
6
 
6
- from .helpers import _paginate, normalize_dictionaries
7
+ from .helpers import _graphql, normalize_dictionaries, _get_date_range, _create_paginated_resource
7
8
 
8
9
 
9
- def _get_date_range(updated_at, start_date):
10
- """Extract current start and end dates from incremental state."""
11
- if updated_at.last_value:
12
- current_start_date = pendulum.parse(updated_at.last_value)
13
- else:
14
- current_start_date = pendulum.parse(start_date)
15
-
16
- if updated_at.end_value:
17
- current_end_date = pendulum.parse(updated_at.end_value)
18
- else:
19
- current_end_date = pendulum.now(tz="UTC")
20
-
21
- return current_start_date, current_end_date
22
-
23
10
  ISSUES_QUERY = """
24
11
  query Issues($cursor: String) {
25
12
  issues(first: 50, after: $cursor) {
@@ -29,55 +16,409 @@ query Issues($cursor: String) {
29
16
  description
30
17
  createdAt
31
18
  updatedAt
32
- creator { id }
33
- assignee { id}
34
- state { id}
35
- labels { nodes { id } }
36
- cycle { id}
37
- project { id }
38
- subtasks: children { nodes { id title } }
39
- comments(first: 250) { nodes { id body } }
19
+ archivedAt
20
+ addedToCycleAt
21
+ autoArchivedAt
22
+ autoClosedAt
23
+ boardOrder
24
+ branchName
25
+ canceledAt
26
+ completedAt
27
+ customerTicketCount
28
+ descriptionState
29
+ dueDate
30
+ estimate
31
+ identifier
32
+ integrationSourceType
33
+ labelIds
34
+ number
35
+ previousIdentifiers
40
36
  priority
41
- attachments { nodes { id } }
42
- subscribers { nodes { id } }
37
+ priorityLabel
38
+ prioritySortOrder
39
+ reactionData
40
+ slaBreachesAt
41
+ slaHighRiskAt
42
+ slaMediumRiskAt
43
+ slaStartedAt
44
+ slaType
45
+ snoozedUntilAt
46
+ sortOrder
47
+ startedAt
48
+ startedTriageAt
49
+ subIssueSortOrder
50
+ triagedAt
51
+ url
52
+
53
+ creator { id }
54
+ assignee { id }
55
+ botActor { id name type }
56
+ cycle { id }
57
+ delegate { id }
58
+ externalUserCreator { id }
59
+ favorite { id }
60
+ lastAppliedTemplate { id }
61
+ parent { id }
62
+ projectMilestone { id }
63
+ recurringIssueTemplate { id }
64
+ snoozedBy { id }
65
+ sourceComment { id }
66
+ state { id }
67
+
68
+ labels(first: 250) {
69
+ nodes {
70
+ id
71
+ }
72
+ }
43
73
  }
44
74
  pageInfo { hasNextPage endCursor }
45
75
  }
46
76
  }
47
77
  """
48
78
 
49
- PROJECTS_QUERY = """
50
- query Projects($cursor: String) {
51
- projects(first: 50, after: $cursor) {
79
+ ATTACHMENTS_QUERY = """
80
+ query Attachments($cursor: String) {
81
+ attachments(first: 50, after: $cursor) {
52
82
  nodes {
53
83
  id
54
- name
84
+ archivedAt
85
+ bodyData
86
+ createdAt
87
+ groupBySource
88
+ metadata
89
+ sourceType
90
+ subtitle
91
+ title
92
+ updatedAt
93
+ url
94
+
95
+ creator { id }
96
+ externalUserCreator { id }
97
+ issue { id }
98
+ originalIssue { id }
99
+ }
100
+ pageInfo { hasNextPage endCursor }
101
+ }
102
+ }
103
+ """
104
+
105
+ COMMENTS_QUERY = """
106
+ query Comments($cursor: String) {
107
+ comments(first: 50, after: $cursor) {
108
+ nodes {
109
+ id
110
+ archivedAt
111
+ body
112
+ bodyData
113
+ createdAt
114
+ editedAt
115
+ quotedText
116
+ reactionData
117
+ resolvedAt
118
+ threadSummary
119
+ updatedAt
120
+ url
121
+
122
+ botActor { id }
123
+ documentContent { id }
124
+ externalThread { id }
125
+ externalUser { id }
126
+ initiativeUpdate { id }
127
+ issue { id }
128
+ parent { id }
129
+ post { id }
130
+ projectUpdate { id }
131
+ resolvingComment { id }
132
+ resolvingUser { id }
133
+ user { id }
134
+ }
135
+ pageInfo { hasNextPage endCursor }
136
+ }
137
+ }
138
+ """
139
+
140
+ CYCLES_QUERY = """
141
+ query Cycles($cursor: String) {
142
+ cycles(first: 50, after: $cursor) {
143
+ nodes {
144
+ id
145
+ archivedAt
146
+ autoArchivedAt
147
+ completedAt
148
+ completedIssueCountHistory
149
+ completedScopeHistory
150
+ createdAt
55
151
  description
152
+ endsAt
153
+ inProgressScopeHistory
154
+ issueCountHistory
155
+ name
156
+ number
157
+ progress
158
+ scopeHistory
159
+ startsAt
160
+ updatedAt
161
+ }
162
+ pageInfo { hasNextPage endCursor }
163
+ }
164
+ }
165
+ """
166
+
167
+ DOCUMENTS_QUERY = """
168
+ query Documents($cursor: String) {
169
+ documents(first: 50, after: $cursor) {
170
+ nodes {
171
+ id
172
+ archivedAt
173
+ color
56
174
  createdAt
175
+ icon
176
+ slugId
177
+ title
57
178
  updatedAt
179
+
180
+ creator { id }
181
+ updatedBy { id }
182
+ }
183
+ pageInfo { hasNextPage endCursor }
184
+ }
185
+ }
186
+ """
187
+
188
+ EXTERNAL_USERS_QUERY = """
189
+ query ExternalUsers($cursor: String) {
190
+ externalUsers(first: 50, after: $cursor) {
191
+ nodes {
192
+ id
193
+ archivedAt
194
+ avatarUrl
195
+ createdAt
196
+ displayName
197
+ email
198
+ lastSeen
199
+ name
200
+ updatedAt
201
+
202
+ organization { id }
203
+ }
204
+ pageInfo { hasNextPage endCursor }
205
+ }
206
+ }
207
+ """
208
+
209
+ INITIATIVES_QUERY = """
210
+ query Initiatives($cursor: String) {
211
+ initiatives(first: 50, after: $cursor) {
212
+ nodes {
213
+ id
214
+ archivedAt
215
+ color
216
+ completedAt
217
+ content
218
+ createdAt
219
+ description
220
+ frequencyResolution
58
221
  health
59
- priority
222
+ healthUpdatedAt
223
+ icon
224
+ name
225
+ slugId
226
+ sortOrder
227
+ startedAt
228
+ status
60
229
  targetDate
61
- lead { id }
230
+ targetDateResolution
231
+ trashed
232
+ updateReminderFrequency
233
+ updateReminderFrequencyInWeeks
234
+ updateRemindersDay
235
+ updateRemindersHour
236
+ updatedAt
237
+
238
+ creator { id }
239
+ documentContent { id }
240
+ integrationsSettings { id }
241
+ lastUpdate { id }
242
+ organization { id }
243
+ owner { id }
244
+ parentInitiative { id }
62
245
  }
63
246
  pageInfo { hasNextPage endCursor }
64
247
  }
65
248
  }
66
249
  """
67
250
 
68
- TEAMS_QUERY = """
69
- query Teams($cursor: String) {
70
- teams(first: 50, after: $cursor) {
251
+
252
+
253
+ INITIATIVE_TO_PROJECTS_QUERY = """
254
+ query InitiativeToProjects($cursor: String) {
255
+ initiativeToProjects(first: 50, after: $cursor) {
71
256
  nodes {
72
257
  id
258
+ archivedAt
259
+ createdAt
260
+ sortOrder
261
+ updatedAt
262
+
263
+ initiative { id }
264
+ }
265
+ pageInfo { hasNextPage endCursor }
266
+ }
267
+ }
268
+ """
269
+
270
+ PROJECT_MILESTONES_QUERY = """
271
+ query ProjectMilestones($cursor: String) {
272
+ projectMilestones(first: 50, after: $cursor) {
273
+ nodes {
274
+ id
275
+ archivedAt
276
+ createdAt
277
+ currentProgress
278
+ description
279
+ descriptionState
73
280
  name
74
- key
281
+ progress
282
+ progressHistory
283
+ sortOrder
284
+ status
285
+ targetDate
286
+ updatedAt
287
+
288
+ documentContent { id }
289
+ }
290
+ pageInfo { hasNextPage endCursor }
291
+ }
292
+ }
293
+ """
294
+
295
+ PROJECT_STATUSES_QUERY = """
296
+ query ProjectStatuses($cursor: String) {
297
+ projectStatuses(first: 50, after: $cursor) {
298
+ nodes {
299
+ id
300
+ archivedAt
301
+ color
302
+ createdAt
75
303
  description
304
+ indefinite
305
+ name
306
+ position
307
+ type
76
308
  updatedAt
309
+ }
310
+ pageInfo { hasNextPage endCursor }
311
+ }
312
+ }
313
+ """
314
+
315
+ INTEGRATIONS_QUERY = """
316
+ query Integrations($cursor: String) {
317
+ integrations(first: 50, after: $cursor) {
318
+ nodes {
319
+ id
320
+ archivedAt
77
321
  createdAt
78
- memberships { nodes { id } }
79
- members { nodes { id } }
80
- projects { nodes { id } }
322
+ service
323
+ updatedAt
324
+
325
+ creator { id }
326
+ organization { id }
327
+ }
328
+ pageInfo { hasNextPage endCursor }
329
+ }
330
+ }
331
+ """
332
+
333
+
334
+ LABELS_QUERY = """
335
+ query IssueLabels($cursor: String) {
336
+ issueLabels(first: 50, after: $cursor) {
337
+ nodes {
338
+ id
339
+ archivedAt
340
+ color
341
+ createdAt
342
+ description
343
+ name
344
+ updatedAt
345
+
346
+ creator { id }
347
+ organization { id }
348
+ parent { id }
349
+ }
350
+ pageInfo { hasNextPage endCursor }
351
+ }
352
+ }
353
+ """
354
+
355
+
356
+ ORGANIZATION_QUERY = """
357
+ query Organization {
358
+ viewer {
359
+ organization {
360
+ id
361
+ name
362
+ createdAt
363
+ updatedAt
364
+ archivedAt
365
+ logoUrl
366
+ allowMembersToInvite
367
+ allowedAuthServices
368
+ createdIssueCount
369
+ customerCount
370
+ customersEnabled
371
+ deletionRequestedAt
372
+ gitBranchFormat
373
+ gitLinkbackMessagesEnabled
374
+ gitPublicLinkbackMessagesEnabled
375
+ logoUrl
376
+ periodUploadVolume
377
+ previousUrlKeys
378
+ roadmapEnabled
379
+ samlEnabled
380
+ scimEnabled
381
+ }
382
+ }
383
+ }
384
+ """
385
+
386
+
387
+ PROJECT_UPDATES_QUERY = """
388
+ query ProjectUpdates($cursor: String) {
389
+ projectUpdates(first: 50, after: $cursor) {
390
+ nodes {
391
+ id
392
+ archivedAt
393
+ body
394
+ bodyData
395
+ createdAt
396
+ diffMarkdown
397
+ health
398
+ updatedAt
399
+ url
400
+
401
+ user { id }
402
+ }
403
+ pageInfo { hasNextPage endCursor }
404
+ }
405
+ }
406
+ """
407
+
408
+
409
+
410
+ TEAM_MEMBERSHIPS_QUERY = """
411
+ query TeamMemberships($cursor: String) {
412
+ teamMemberships(first: 50, after: $cursor) {
413
+ nodes {
414
+ id
415
+ archivedAt
416
+ createdAt
417
+ owner
418
+ sortOrder
419
+ updatedAt
420
+
421
+ user { id }
81
422
  }
82
423
  pageInfo { hasNextPage endCursor }
83
424
  }
@@ -89,11 +430,29 @@ query Users($cursor: String) {
89
430
  users(first: 50, after: $cursor) {
90
431
  nodes {
91
432
  id
92
- name
433
+ active
434
+ admin
435
+ archivedAt
436
+ avatarUrl
437
+ calendarHash
438
+ createdAt
439
+ createdIssueCount
440
+ description
441
+ disableReason
93
442
  displayName
94
443
  email
95
- createdAt
444
+ guest
445
+ inviteHash
446
+ lastSeen
447
+ name
448
+ statusEmoji
449
+ statusLabel
450
+ statusUntilAt
451
+ timezone
96
452
  updatedAt
453
+ url
454
+
455
+ organization { id }
97
456
  }
98
457
  pageInfo { hasNextPage endCursor }
99
458
  }
@@ -102,15 +461,14 @@ query Users($cursor: String) {
102
461
  WORKFLOW_STATES_QUERY = """
103
462
  query WorkflowStates($cursor: String) {
104
463
  workflowStates(first: 50, after: $cursor) {
105
- nodes {
464
+ nodes {
465
+ id
106
466
  archivedAt
107
467
  color
108
468
  createdAt
109
- id
110
- inheritedFrom { id }
469
+ description
111
470
  name
112
471
  position
113
- team { id }
114
472
  type
115
473
  updatedAt
116
474
  }
@@ -118,6 +476,48 @@ query WorkflowStates($cursor: String) {
118
476
  }
119
477
  }
120
478
  """
479
+ PROJECTS_QUERY = """
480
+ query Projects($cursor: String) {
481
+ projects(first: 50, after: $cursor) {
482
+ nodes {
483
+ id
484
+ name
485
+ description
486
+ createdAt
487
+ updatedAt
488
+ health
489
+ priority
490
+ targetDate
491
+ lead { id }
492
+ }
493
+ pageInfo { hasNextPage endCursor }
494
+ }
495
+ }
496
+ """
497
+
498
+
499
+ # Paginated resources configuration
500
+ PAGINATED_RESOURCES = [
501
+ ("issues", ISSUES_QUERY, "issues"),
502
+ ("users", USERS_QUERY, "users"),
503
+ ("workflow_states", WORKFLOW_STATES_QUERY, "workflowStates"),
504
+ ("cycles", CYCLES_QUERY, "cycles"),
505
+ ("attachments", ATTACHMENTS_QUERY, "attachments"),
506
+ ("comments", COMMENTS_QUERY, "comments"),
507
+ ("documents", DOCUMENTS_QUERY, "documents"),
508
+ ("external_users", EXTERNAL_USERS_QUERY, "externalUsers"),
509
+ ("initiative", INITIATIVES_QUERY, "initiatives"),
510
+ ("integrations", INTEGRATIONS_QUERY, "integrations"),
511
+ ("labels", LABELS_QUERY, "issueLabels"),
512
+ ("project_updates", PROJECT_UPDATES_QUERY, "projectUpdates"),
513
+ ("team_memberships", TEAM_MEMBERSHIPS_QUERY, "teamMemberships"),
514
+ ("initiative_to_project", INITIATIVE_TO_PROJECTS_QUERY, "initiativeToProjects"),
515
+ ("project_milestone", PROJECT_MILESTONES_QUERY, "projectMilestones"),
516
+ ("project_status", PROJECT_STATUSES_QUERY, "projectStatuses"),
517
+ ("projects", PROJECTS_QUERY, "projects"),
518
+ ]
519
+
520
+
121
521
 
122
522
  @dlt.source(name="linear", max_table_nesting=0)
123
523
  def linear_source(
@@ -125,25 +525,9 @@ def linear_source(
125
525
  start_date: pendulum.DateTime,
126
526
  end_date: pendulum.DateTime | None = None,
127
527
  ) -> Iterable[dlt.sources.DltResource]:
128
- @dlt.resource(name="issues", primary_key="id", write_disposition="merge")
129
- def issues(
130
- updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
131
- "updatedAt",
132
- initial_value=start_date.isoformat(),
133
- end_value=end_date.isoformat() if end_date else None,
134
- range_start="closed",
135
- range_end="closed",
136
- ),
137
- ) -> Iterator[Dict[str, Any]]:
138
- current_start_date, current_end_date = _get_date_range(updated_at, start_date)
139
-
140
- for item in _paginate(api_key, ISSUES_QUERY, "issues"):
141
- if pendulum.parse(item["updatedAt"]) >= current_start_date:
142
- if pendulum.parse(item["updatedAt"]) <= current_end_date:
143
- yield normalize_dictionaries(item)
144
528
 
145
- @dlt.resource(name="projects", primary_key="id", write_disposition="merge")
146
- def projects(
529
+ @dlt.resource(name="organization", primary_key="id", write_disposition="merge")
530
+ def organization(
147
531
  updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
148
532
  "updatedAt",
149
533
  initial_value=start_date.isoformat(),
@@ -154,62 +538,20 @@ def linear_source(
154
538
  ) -> Iterator[Dict[str, Any]]:
155
539
  current_start_date, current_end_date = _get_date_range(updated_at, start_date)
156
540
 
157
- for item in _paginate(api_key, PROJECTS_QUERY, "projects"):
158
- if pendulum.parse(item["updatedAt"]) >= current_start_date:
541
+ data = _graphql(api_key, ORGANIZATION_QUERY)
542
+ if "viewer" in data and "organization" in data["viewer"]:
543
+ item = data["viewer"]["organization"]
544
+ if item and pendulum.parse(item["updatedAt"]) >= current_start_date:
159
545
  if pendulum.parse(item["updatedAt"]) <= current_end_date:
160
546
  yield normalize_dictionaries(item)
161
547
 
162
- @dlt.resource(name="teams", primary_key="id", write_disposition="merge")
163
- def teams(
164
- updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
165
- "updatedAt",
166
- initial_value=start_date.isoformat(),
167
- end_value=end_date.isoformat() if end_date else None,
168
- range_start="closed",
169
- range_end="closed",
170
- ),
171
- ) -> Iterator[Dict[str, Any]]:
172
- print(start_date)
173
- current_start_date, current_end_date = _get_date_range(updated_at, start_date)
174
- print(current_start_date)
175
-
176
- for item in _paginate(api_key, TEAMS_QUERY, "teams"):
177
- if pendulum.parse(item["updatedAt"]) >= current_start_date:
178
- if pendulum.parse(item["updatedAt"]) <= current_end_date:
179
- yield normalize_dictionaries(item)
180
-
181
- @dlt.resource(name="users", primary_key="id", write_disposition="merge")
182
- def users(
183
- updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
184
- "updatedAt",
185
- initial_value=start_date.isoformat(),
186
- end_value=end_date.isoformat() if end_date else None,
187
- range_start="closed",
188
- range_end="closed",
189
- ),
190
- ) -> Iterator[Dict[str, Any]]:
191
- current_start_date, current_end_date = _get_date_range(updated_at, start_date)
192
-
193
- for item in _paginate(api_key, USERS_QUERY, "users"):
194
- if pendulum.parse(item["updatedAt"]) >= current_start_date:
195
- if pendulum.parse(item["updatedAt"]) <= current_end_date:
196
- yield normalize_dictionaries(item)
197
-
198
- @dlt.resource(name="workflow_states", primary_key="id", write_disposition="merge")
199
- def workflow_states(
200
- updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
201
- "updatedAt",
202
- initial_value=start_date.isoformat(),
203
- end_value=end_date.isoformat() if end_date else None,
204
- range_start="closed",
205
- range_end="closed",
206
- ),
207
- ) -> Iterator[Dict[str, Any]]:
208
- current_start_date, current_end_date = _get_date_range(updated_at, start_date)
209
-
210
- for item in _paginate(api_key, WORKFLOW_STATES_QUERY, "workflowStates"):
211
- if pendulum.parse(item["updatedAt"]) >= current_start_date:
212
- if pendulum.parse(item["updatedAt"]) <= current_end_date:
213
- yield normalize_dictionaries(item)
214
- return [issues, projects, teams, users, workflow_states]
215
-
548
+ # Create paginated resources dynamically
549
+ paginated_resources = [
550
+ _create_paginated_resource(resource_name, query, query_field, api_key, start_date, end_date)
551
+ for resource_name, query, query_field in PAGINATED_RESOURCES
552
+ ]
553
+
554
+ return [
555
+ *paginated_resources,
556
+ organization,
557
+ ]
@@ -1,6 +1,8 @@
1
1
  from typing import Any, Dict, Iterator, Optional
2
2
 
3
3
  import requests
4
+ import pendulum
5
+ import dlt
4
6
 
5
7
  LINEAR_GRAPHQL_ENDPOINT = "https://api.linear.app/graphql"
6
8
 
@@ -34,22 +36,66 @@ def _paginate(api_key: str, query: str, root: str) -> Iterator[Dict[str, Any]]:
34
36
 
35
37
 
36
38
 
39
+ def _get_date_range(updated_at, start_date):
40
+ """Extract current start and end dates from incremental state."""
41
+ if updated_at.last_value:
42
+ current_start_date = pendulum.parse(updated_at.last_value)
43
+ else:
44
+ current_start_date = pendulum.parse(start_date)
45
+
46
+ if updated_at.end_value:
47
+ current_end_date = pendulum.parse(updated_at.end_value)
48
+ else:
49
+ current_end_date = pendulum.now(tz="UTC")
50
+
51
+ return current_start_date, current_end_date
52
+
53
+
54
+ def _paginated_resource(api_key: str, query: str, query_field: str, updated_at, start_date) -> Iterator[Dict[str, Any]]:
55
+ """Helper function for paginated resources with date filtering."""
56
+ current_start_date, current_end_date = _get_date_range(updated_at, start_date)
57
+
58
+ for item in _paginate(api_key, query, query_field):
59
+ if pendulum.parse(item["updatedAt"]) >= current_start_date:
60
+ if pendulum.parse(item["updatedAt"]) <= current_end_date:
61
+ yield normalize_dictionaries(item)
62
+
63
+
64
+ def _create_paginated_resource(resource_name: str, query: str, query_field: str, api_key: str, start_date, end_date = None):
65
+ """Factory function to create paginated resources dynamically."""
66
+ @dlt.resource(name=resource_name, primary_key="id", write_disposition="merge")
67
+ def paginated_resource(
68
+ updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
69
+ "updatedAt",
70
+ initial_value=start_date.isoformat(),
71
+ end_value=end_date.isoformat() if end_date else None,
72
+ range_start="closed",
73
+ range_end="closed",
74
+ ),
75
+ ) -> Iterator[Dict[str, Any]]:
76
+ for item in _paginated_resource(api_key, query, query_field, updated_at, start_date):
77
+ yield normalize_dictionaries(item)
78
+
79
+ return paginated_resource
80
+
81
+
37
82
  def normalize_dictionaries(item: Dict[str, Any]) -> Dict[str, Any]:
38
83
  """
39
84
  Automatically normalize dictionary fields by detecting their structure:
40
85
  - Convert nested objects with 'id' field to {field_name}_id
41
86
  - Convert objects with 'nodes' field to arrays
87
+
42
88
  """
43
89
  normalized_item = item.copy()
44
-
90
+
45
91
  for key, value in list(normalized_item.items()):
46
92
  if isinstance(value, dict):
47
93
  # If the dict has an 'id' field, replace with {key}_id
48
- if 'id' in value:
49
- normalized_item[f"{key}_id"] = value['id']
94
+ if "id" in value:
95
+ normalized_item[f"{key}_id"] = value["id"]
50
96
  del normalized_item[key]
51
97
  # If the dict has 'nodes' field, extract the nodes array
52
- elif 'nodes' in value:
53
- normalized_item[key] = value['nodes']
54
-
98
+ elif "nodes" in value:
99
+ normalized_item[key] = value["nodes"]
100
+
55
101
  return normalized_item
ingestr/src/sources.py CHANGED
@@ -1056,33 +1056,16 @@ class FacebookAdsSource:
1056
1056
  # Validate breakdown type against available options from settings
1057
1057
  import typing
1058
1058
 
1059
- from ingestr.src.facebook_ads.settings import TInsightsBreakdownOptions
1060
-
1061
- # Get valid breakdown options from the type definition
1062
- valid_breakdowns = list(typing.get_args(TInsightsBreakdownOptions))
1063
-
1064
- if breakdown_type not in valid_breakdowns:
1065
- raise ValueError(
1066
- f"Invalid breakdown type '{breakdown_type}'. Valid options: {', '.join(valid_breakdowns)}"
1067
- )
1059
+ from ingestr.src.facebook_ads.helpers import parse_insights_table_to_source_kwargs
1068
1060
 
1069
1061
  source_kwargs = {
1070
1062
  "access_token": access_token[0],
1071
1063
  "account_id": account_id[0],
1072
1064
  "start_date": kwargs.get("interval_start"),
1073
1065
  "end_date": kwargs.get("interval_end"),
1074
- "breakdowns": breakdown_type,
1075
1066
  }
1076
1067
 
1077
- # If custom metrics are provided, parse them
1078
- if len(parts) == 3:
1079
- fields = [f.strip() for f in parts[2].split(",") if f.strip()]
1080
- if not fields:
1081
- raise ValueError(
1082
- "Custom metrics must be provided after the second colon in format: facebook_insights:breakdown_type:metric1,metric2..."
1083
- )
1084
- source_kwargs["fields"] = fields
1085
-
1068
+ source_kwargs.update(parse_insights_table_to_source_kwargs(table))
1086
1069
  return facebook_insights_source(**source_kwargs).with_resources(
1087
1070
  "facebook_insights"
1088
1071
  )
@@ -3257,7 +3240,27 @@ class LinearSource:
3257
3240
  if api_key is None:
3258
3241
  raise MissingValueError("api_key", "Linear")
3259
3242
 
3260
- if table not in ["issues", "projects", "teams", "users", "workflow_states"]:
3243
+ if table not in [
3244
+ "issues",
3245
+ "projects",
3246
+ "teams",
3247
+ "users",
3248
+ "workflow_states",
3249
+ "cycles",
3250
+ "attachments",
3251
+ "comments",
3252
+ "documents",
3253
+ "external_users",
3254
+ "initiative",
3255
+ "integrations",
3256
+ "labels",
3257
+ "organization",
3258
+ "project_updates",
3259
+ "team_memberships",
3260
+ "initiative_to_project",
3261
+ "project_milestone",
3262
+ "project_status",
3263
+ ]:
3261
3264
  raise UnsupportedResourceError(table, "Linear")
3262
3265
 
3263
3266
  start_date = kwargs.get("interval_start")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.80
3
+ Version: 0.13.82
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -2,7 +2,7 @@ ingestr/conftest.py,sha256=OE2yxeTCosS9CUFVuqNypm-2ftYvVBeeq7egm3878cI,1981
2
2
  ingestr/main.py,sha256=qoWHNcHh0-xVnyQxbQ-SKuTxPb1RNV3ENkCpqO7CLrk,26694
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
4
  ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
5
- ingestr/src/buildinfo.py,sha256=K8l-cnsoOmTtSy1GdxNMRLZxsjHoYiJsCc5rvXeCpHE,21
5
+ ingestr/src/buildinfo.py,sha256=S1Cgqn9Xc0avTk0r_rJ8fMXucqD7c5O68FsKjSWj2Pc,21
6
6
  ingestr/src/destinations.py,sha256=M2Yni6wiWcrvZ8EPJemidqxN156l0rehgCc7xuil7mo,22840
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
8
  ingestr/src/factory.py,sha256=rF5Ry4o4t8KulSPBtrd7ZKCI_0TH1DAetG0zs9H7oik,6792
@@ -11,7 +11,7 @@ ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,73
11
11
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
12
12
  ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
13
13
  ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
14
- ingestr/src/sources.py,sha256=guwCdKQDvT2XMYeR2O3nJ9kZ-wLCSDpOex4UH0luG5k,119966
14
+ ingestr/src/sources.py,sha256=Yaej1Iy75JPOccw6jYL6kbhzYFq33t4YelGT-NiAA5I,119661
15
15
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
16
16
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
17
17
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -41,9 +41,9 @@ ingestr/src/clickup/helpers.py,sha256=RzDKMUAHccuDhocIQ2ToBXfCERo8CBJqA3t-IPltBC
41
41
  ingestr/src/collector/spinner.py,sha256=_ZUqF5MI43hVIULdjF5s5mrAZbhEFXaiWirQmrv3Yk4,1201
42
42
  ingestr/src/dynamodb/__init__.py,sha256=swhxkeYBbJ35jn1IghCtvYWT2BM33KynVCh_oR4z28A,2264
43
43
  ingestr/src/elasticsearch/__init__.py,sha256=m-q93HgUmTwGDUwHOjHawstWL06TC3WIX3H05szybrY,2556
44
- ingestr/src/facebook_ads/__init__.py,sha256=bX6lnf0LxIcOyZHDVA9FL5iKhgnQ0f5Hfma4eXcQuIk,10094
44
+ ingestr/src/facebook_ads/__init__.py,sha256=15GiovITANe0al5MI6WWLdl3LDmdBd1YpkUWBV3g6bk,9715
45
45
  ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
46
- ingestr/src/facebook_ads/helpers.py,sha256=Oh9-LepxxBRnPXQZMbbNOSbxg9T8a4nmiLSt22GPt6E,8233
46
+ ingestr/src/facebook_ads/helpers.py,sha256=c-WG008yU_zIdhFwljtqE2jfjVYuaVoNKldxcnJN3U4,9761
47
47
  ingestr/src/facebook_ads/settings.py,sha256=Bsic8RcmH-NfEZ7r_NGospTCmwISK9XaMT5y2NZirtg,4938
48
48
  ingestr/src/facebook_ads/utils.py,sha256=ES2ylPoW3j3fjp6OMUgp21n1cG1OktXsmWWMk5vBW_I,1590
49
49
  ingestr/src/filesystem/__init__.py,sha256=zkIwbRr0ir0EUdniI25p2zGiVc-7M9EmR351AjNb0eA,4163
@@ -85,8 +85,8 @@ ingestr/src/kinesis/helpers.py,sha256=SO2cFmWNGcykUYmjHdfxWsOQSkLQXyhFtfWnkcUOM0
85
85
  ingestr/src/klaviyo/__init__.py,sha256=o_noUgbxLk36s4f9W56_ibPorF0n7kVapPUlV0p-jfA,7875
86
86
  ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
87
87
  ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
88
- ingestr/src/linear/__init__.py,sha256=XWgWiDJi87OFHnsOjTq4ZSAdtMcPdplBYC8fJM-6dMA,6607
89
- ingestr/src/linear/helpers.py,sha256=y8XhEDBVnxMmSzzyrS0_RnPwtNJIRuKM4Kw3wW9p6UM,1796
88
+ ingestr/src/linear/__init__.py,sha256=G2x4HaRl8WWReDJ5HElnNTfVIybdztrIB3acl1ickyo,11003
89
+ ingestr/src/linear/helpers.py,sha256=pPqtJrY3D3U2IaqTsD6ChvwMLYiUVtmQ8wTvetz5U7A,3640
90
90
  ingestr/src/linkedin_ads/__init__.py,sha256=CAPWFyV24loziiphbLmODxZUXZJwm4JxlFkr56q0jfo,1855
91
91
  ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffWJxmAadZMbpTvf0xkQc,198
92
92
  ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnbLfxzds,4498
@@ -151,8 +151,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
151
151
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
152
152
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
153
153
  ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
154
- ingestr-0.13.80.dist-info/METADATA,sha256=HF4xO0hXDV_gn389MIuX_-GhahKPaz8GOMIMt1If4x0,15182
155
- ingestr-0.13.80.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
156
- ingestr-0.13.80.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
157
- ingestr-0.13.80.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
158
- ingestr-0.13.80.dist-info/RECORD,,
154
+ ingestr-0.13.82.dist-info/METADATA,sha256=4edZaTP7apukcZRUY4J00G99O3d7Qxz37csXXCNhaww,15182
155
+ ingestr-0.13.82.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
156
+ ingestr-0.13.82.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
157
+ ingestr-0.13.82.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
158
+ ingestr-0.13.82.dist-info/RECORD,,