ingestr 0.13.93__py3-none-any.whl → 0.13.94__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -224,7 +224,7 @@ def resource(
224
224
  def custom_report_from_spec(spec: str) -> EndpointResource:
225
225
  parts = spec.split(":")
226
226
  if len(parts) != 4:
227
- raise InvalidCustomReportError()
227
+ raise InvalidCustomReportError()
228
228
 
229
229
  _, endpoint, report, dims = parts
230
230
  report_type = ReportType(report.strip())
ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.93"
1
+ version = "v0.13.94"
@@ -25,7 +25,6 @@ from ingestr.src.loader import load_dlt_file
25
25
 
26
26
  class GenericSqlDestination:
27
27
  def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
28
-
29
28
  if uri.startswith("databricks://"):
30
29
  p = urlparse(uri)
31
30
  q = parse_qs(p.query)
@@ -37,8 +36,8 @@ class GenericSqlDestination:
37
36
  "table_name": table,
38
37
  }
39
38
  return res
40
-
41
- table_fields = table.split(".")
39
+
40
+ table_fields = table.split(".")
42
41
  if len(table_fields) != 2:
43
42
  raise ValueError("Table name must be in the format <schema>.<table>")
44
43
 
@@ -290,7 +289,6 @@ class DatabricksDestination(GenericSqlDestination):
290
289
  http_path = q.get("http_path", [None])[0]
291
290
  catalog = q.get("catalog", [None])[0]
292
291
  schema = q.get("schema", [None])[0]
293
-
294
292
 
295
293
  creds = {
296
294
  "access_token": access_token,
@@ -299,14 +297,12 @@ class DatabricksDestination(GenericSqlDestination):
299
297
  "catalog": catalog,
300
298
  "schema": schema,
301
299
  }
302
-
300
+
303
301
  return dlt.destinations.databricks(
304
302
  credentials=creds,
305
303
  **kwargs,
306
304
  )
307
-
308
305
 
309
-
310
306
 
311
307
  class SynapseDestination(GenericSqlDestination):
312
308
  def dlt_dest(self, uri: str, **kwargs):
@@ -0,0 +1,607 @@
1
+ """Docebo source for ingestr."""
2
+
3
+ import json
4
+ from typing import Any, Dict, Iterator, Optional
5
+
6
+ import dlt
7
+ from dlt.sources import DltResource
8
+
9
+ from .client import DoceboClient
10
+ from .helpers import normalize_date_field, normalize_docebo_dates
11
+
12
+
13
+ @dlt.source(name="docebo", max_table_nesting=0)
14
+ def docebo_source(
15
+ base_url: str,
16
+ client_id: str,
17
+ client_secret: str,
18
+ username: Optional[str] = None,
19
+ password: Optional[str] = None,
20
+ ) -> list[DltResource]:
21
+ """
22
+ Docebo source for fetching data from Docebo LMS API.
23
+
24
+ Args:
25
+ base_url: The base URL of your Docebo instance (e.g., https://yourcompany.docebosaas.com)
26
+ client_id: OAuth2 client ID
27
+ client_secret: OAuth2 client secret
28
+ username: Username for authentication
29
+ password: Password for authentication
30
+
31
+ Yields:
32
+ DltResource: Resources available from Docebo API
33
+ """
34
+
35
+ # Initialize client once for all resources
36
+ client = DoceboClient(
37
+ base_url=base_url,
38
+ client_id=client_id,
39
+ client_secret=client_secret,
40
+ username=username,
41
+ password=password,
42
+ )
43
+
44
+ @dlt.resource(
45
+ name="users",
46
+ write_disposition="replace",
47
+ primary_key="user_id",
48
+ columns={
49
+ "user_id": {"data_type": "text", "nullable": False},
50
+ "username": {"data_type": "text", "nullable": True},
51
+ "first_name": {"data_type": "text", "nullable": True},
52
+ "last_name": {"data_type": "text", "nullable": True},
53
+ "email": {"data_type": "text", "nullable": True},
54
+ "uuid": {"data_type": "text", "nullable": True},
55
+ "is_manager": {"data_type": "bool", "nullable": True},
56
+ "fullname": {"data_type": "text", "nullable": True},
57
+ "last_access_date": {"data_type": "timestamp", "nullable": True},
58
+ "last_update": {"data_type": "timestamp", "nullable": True},
59
+ "creation_date": {"data_type": "timestamp", "nullable": True},
60
+ "status": {"data_type": "text", "nullable": True},
61
+ "avatar": {"data_type": "text", "nullable": True},
62
+ "language": {"data_type": "text", "nullable": True},
63
+ "lang_code": {"data_type": "text", "nullable": True},
64
+ "level": {"data_type": "text", "nullable": True},
65
+ "email_validation_status": {"data_type": "text", "nullable": True},
66
+ "send_notification": {"data_type": "text", "nullable": True},
67
+ "newsletter_optout": {"data_type": "text", "nullable": True},
68
+ "encoded_username": {"data_type": "text", "nullable": True},
69
+ "timezone": {"data_type": "text", "nullable": True},
70
+ "active_subordinates_count": {"data_type": "bigint", "nullable": True},
71
+ "expired": {"data_type": "bool", "nullable": True},
72
+ "multidomains": {"data_type": "json", "nullable": True},
73
+ "manager_names": {"data_type": "json", "nullable": True},
74
+ "managers": {"data_type": "json", "nullable": True},
75
+ "actions": {"data_type": "json", "nullable": True},
76
+ },
77
+ )
78
+ def users() -> Iterator[Dict[str, Any]]:
79
+ """Fetch all users from Docebo."""
80
+ for users_batch in client.fetch_users():
81
+ # Apply normalizer to each user and yield individually
82
+ for user in users_batch:
83
+ yield normalize_docebo_dates(user)
84
+
85
+ @dlt.resource(
86
+ name="courses",
87
+ write_disposition="replace",
88
+ primary_key="id_course",
89
+ parallelized=True,
90
+ columns={
91
+ "id_course": {"data_type": "bigint", "nullable": False},
92
+ "name": {"data_type": "text", "nullable": True},
93
+ "uidCourse": {"data_type": "text", "nullable": True},
94
+ "description": {"data_type": "text", "nullable": True},
95
+ "date_last_updated": {"data_type": "date", "nullable": True},
96
+ "course_type": {"data_type": "text", "nullable": True},
97
+ "selling": {"data_type": "bool", "nullable": True},
98
+ "code": {"data_type": "text", "nullable": True},
99
+ "slug_name": {"data_type": "text", "nullable": True},
100
+ "image": {"data_type": "text", "nullable": True},
101
+ "duration": {"data_type": "bigint", "nullable": True},
102
+ "language": {"data_type": "text", "nullable": True},
103
+ "language_label": {"data_type": "text", "nullable": True},
104
+ "multi_languages": {"data_type": "json", "nullable": True},
105
+ "price": {"data_type": "text", "nullable": True},
106
+ "is_new": {"data_type": "text", "nullable": True},
107
+ "is_opened": {"data_type": "text", "nullable": True},
108
+ "rating_option": {"data_type": "text", "nullable": True},
109
+ "current_rating": {"data_type": "bigint", "nullable": True},
110
+ "credits": {"data_type": "bigint", "nullable": True},
111
+ "img_url": {"data_type": "text", "nullable": True},
112
+ "can_rate": {"data_type": "bool", "nullable": True},
113
+ "can_self_unenroll": {"data_type": "bool", "nullable": True},
114
+ "start_date": {"data_type": "date", "nullable": True},
115
+ "end_date": {"data_type": "date", "nullable": True},
116
+ "category": {"data_type": "json", "nullable": True},
117
+ "enrollment_policy": {"data_type": "bigint", "nullable": True},
118
+ "max_attempts": {"data_type": "bigint", "nullable": True},
119
+ "available_seats": {"data_type": "json", "nullable": True},
120
+ "is_affiliate": {"data_type": "bool", "nullable": True},
121
+ "partner_fields": {"data_type": "text", "nullable": True},
122
+ "partner_data": {"data_type": "json", "nullable": True},
123
+ "affiliate_price": {"data_type": "text", "nullable": True},
124
+ },
125
+ )
126
+ def courses() -> Iterator[Dict[str, Any]]:
127
+ print("running courses transformer")
128
+ """Fetch all courses from Docebo."""
129
+ for courses_batch in client.fetch_courses(page_size=1000):
130
+ for course in courses_batch:
131
+ yield normalize_docebo_dates(course)
132
+
133
+ # normalized_courses = [normalize_docebo_dates(course) for course in courses_batch]
134
+ # print("yielding a batch for courses")
135
+ # yield normalized_courses
136
+
137
+ # Phase 1: Core User and Organization Resources
138
+ @dlt.resource(
139
+ name="user_fields",
140
+ write_disposition="replace",
141
+ primary_key="id",
142
+ columns={
143
+ "id": {"data_type": "bigint", "nullable": False},
144
+ "name": {"data_type": "text", "nullable": True},
145
+ "type": {"data_type": "text", "nullable": True},
146
+ "mandatory": {"data_type": "bool", "nullable": True},
147
+ "show_on_detail": {"data_type": "bool", "nullable": True},
148
+ "show_in_filter": {"data_type": "bool", "nullable": True},
149
+ "options": {"data_type": "json", "nullable": True},
150
+ "ref_area": {"data_type": "bigint", "nullable": True},
151
+ "is_valid": {"data_type": "bool", "nullable": True},
152
+ "sequence": {"data_type": "bigint", "nullable": True},
153
+ },
154
+ )
155
+ def user_fields() -> Iterator[Dict[str, Any]]:
156
+ """Fetch all user field definitions from Docebo."""
157
+ for fields_batch in client.fetch_user_fields():
158
+ for field in fields_batch:
159
+ yield normalize_docebo_dates(field)
160
+
161
+ @dlt.resource(
162
+ name="branches",
163
+ write_disposition="replace",
164
+ primary_key="id_org",
165
+ columns={
166
+ "id_org": {"data_type": "bigint", "nullable": False},
167
+ "id_parent": {"data_type": "bigint", "nullable": True},
168
+ "lft": {"data_type": "bigint", "nullable": True},
169
+ "rgt": {"data_type": "bigint", "nullable": True},
170
+ "code": {"data_type": "text", "nullable": True},
171
+ "translation": {"data_type": "json", "nullable": True},
172
+ "external_id": {"data_type": "text", "nullable": True},
173
+ "actions": {"data_type": "json", "nullable": True},
174
+ },
175
+ )
176
+ def branches() -> Iterator[Dict[str, Any]]:
177
+ """Fetch all branches/organizational units from Docebo."""
178
+ for branches_batch in client.fetch_branches():
179
+ for branch in branches_batch:
180
+ yield normalize_docebo_dates(branch)
181
+
182
+ # Phase 2: Group Management
183
+ @dlt.resource(
184
+ name="groups",
185
+ write_disposition="replace",
186
+ primary_key="group_id",
187
+ columns={
188
+ "group_id": {"data_type": "bigint", "nullable": False},
189
+ "name": {"data_type": "text", "nullable": True},
190
+ "description": {"data_type": "text", "nullable": True},
191
+ "language": {"data_type": "text", "nullable": True},
192
+ "total_members": {"data_type": "bigint", "nullable": True},
193
+ "id_branch": {"data_type": "bigint", "nullable": True},
194
+ "enrollment_rules": {"data_type": "json", "nullable": True},
195
+ "enrollment_rules_options": {"data_type": "json", "nullable": True},
196
+ "member_fields": {"data_type": "json", "nullable": True},
197
+ "is_default": {"data_type": "bool", "nullable": True},
198
+ "creation_date": {"data_type": "timestamp", "nullable": True},
199
+ "last_update": {"data_type": "timestamp", "nullable": True},
200
+ },
201
+ )
202
+ def groups() -> Iterator[Dict[str, Any]]:
203
+ """Fetch all groups/audiences from Docebo."""
204
+ for groups_batch in client.fetch_groups():
205
+ for group in groups_batch:
206
+ yield normalize_docebo_dates(group)
207
+
208
+ @dlt.resource(
209
+ name="group_members",
210
+ write_disposition="replace",
211
+ primary_key=["group_id", "user_id"],
212
+ columns={
213
+ "group_id": {"data_type": "bigint", "nullable": False},
214
+ "user_id": {"data_type": "text", "nullable": False},
215
+ "username": {"data_type": "text", "nullable": True},
216
+ "first_name": {"data_type": "text", "nullable": True},
217
+ "last_name": {"data_type": "text", "nullable": True},
218
+ "email": {"data_type": "text", "nullable": True},
219
+ "level": {"data_type": "text", "nullable": True},
220
+ "enrollment_date": {"data_type": "timestamp", "nullable": True},
221
+ },
222
+ )
223
+ def group_members() -> Iterator[Dict[str, Any]]:
224
+ """Fetch all group members for all groups."""
225
+ for members_batch in client.fetch_all_group_members():
226
+ for member in members_batch:
227
+ yield normalize_docebo_dates(member)
228
+
229
+ # Phase 3: Advanced Course Resources
230
+ @dlt.resource(
231
+ name="course_fields",
232
+ write_disposition="replace",
233
+ primary_key="field_id",
234
+ columns={
235
+ "field_id": {"data_type": "bigint", "nullable": False},
236
+ "type_field": {"data_type": "text", "nullable": True},
237
+ "name_field": {"data_type": "text", "nullable": True},
238
+ "is_mandatory": {"data_type": "bool", "nullable": True},
239
+ "show_on_course_details": {"data_type": "bool", "nullable": True},
240
+ "show_on_course_filter": {"data_type": "bool", "nullable": True},
241
+ "options": {"data_type": "json", "nullable": True},
242
+ "sequence": {"data_type": "bigint", "nullable": True},
243
+ },
244
+ )
245
+ def course_fields() -> Iterator[Dict[str, Any]]:
246
+ """Fetch all course field definitions from Docebo."""
247
+ for fields_batch in client.fetch_course_fields():
248
+ for field in fields_batch:
249
+ yield normalize_docebo_dates(field)
250
+
251
+ @dlt.transformer(
252
+ name="learning_objects",
253
+ data_from=courses,
254
+ write_disposition="replace",
255
+ primary_key=["course_id", "id_org"],
256
+ parallelized=True,
257
+ columns={
258
+ "course_id": {"data_type": "bigint", "nullable": False},
259
+ "id_org": {"data_type": "bigint", "nullable": False},
260
+ "object_id": {"data_type": "bigint", "nullable": True},
261
+ "lo_code": {"data_type": "text", "nullable": True},
262
+ "lo_name": {"data_type": "text", "nullable": True},
263
+ "lo_type": {"data_type": "text", "nullable": True},
264
+ "lo_visibility": {"data_type": "text", "nullable": True},
265
+ "lo_link": {"data_type": "text", "nullable": True},
266
+ "lo_thumbnail": {"data_type": "text", "nullable": True},
267
+ "mobile_compatibility": {"data_type": "text", "nullable": True},
268
+ "lo_external_source_url": {"data_type": "text", "nullable": True},
269
+ "created_by": {"data_type": "text", "nullable": True},
270
+ "creation_date": {"data_type": "timestamp", "nullable": True},
271
+ "duration": {"data_type": "bigint", "nullable": True},
272
+ },
273
+ )
274
+ def learning_objects(course_item: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
275
+ course_id = course_item.get("id_course")
276
+ if course_id:
277
+ los_endpoint = f"learn/v1/courses/{course_id}/los"
278
+ for lo_batch in client.get_paginated_data(los_endpoint):
279
+ for lo in lo_batch:
280
+ # Add course_id to learning object if not present
281
+ if "course_id" not in lo:
282
+ lo["course_id"] = course_id
283
+ yield normalize_docebo_dates(lo)
284
+
285
+ # Phase 4: Learning Plans
286
+ @dlt.resource(
287
+ name="learning_plans",
288
+ write_disposition="replace",
289
+ primary_key="learning_plan_id",
290
+ columns={
291
+ "learning_plan_id": {"data_type": "bigint", "nullable": False},
292
+ "uuid": {"data_type": "text", "nullable": True},
293
+ "code": {"data_type": "text", "nullable": True},
294
+ "title": {"data_type": "text", "nullable": True},
295
+ "thumbnail_url": {"data_type": "text", "nullable": True},
296
+ "price": {"data_type": "text", "nullable": True},
297
+ "credits": {"data_type": "bigint", "nullable": True},
298
+ "is_published": {"data_type": "bool", "nullable": True},
299
+ "is_publishable": {"data_type": "bool", "nullable": True},
300
+ "assigned_courses_count": {"data_type": "bigint", "nullable": True},
301
+ "assigned_enrollments_count": {"data_type": "bigint", "nullable": True},
302
+ "assigned_catalogs_count": {"data_type": "bigint", "nullable": True},
303
+ "assigned_channels_count": {"data_type": "bigint", "nullable": True},
304
+ "created_on": {"data_type": "timestamp", "nullable": True},
305
+ "created_by": {"data_type": "json", "nullable": True},
306
+ "updated_on": {"data_type": "timestamp", "nullable": True},
307
+ "updated_by": {"data_type": "json", "nullable": True},
308
+ },
309
+ )
310
+ def learning_plans() -> Iterator[Dict[str, Any]]:
311
+ """Fetch all learning plans from Docebo."""
312
+ for plans_batch in client.fetch_learning_plans():
313
+ for plan in plans_batch:
314
+ yield normalize_docebo_dates(plan)
315
+
316
+ @dlt.resource(
317
+ name="learning_plan_enrollments",
318
+ write_disposition="replace",
319
+ primary_key=["id_path", "id_user"],
320
+ columns={
321
+ "id_path": {"data_type": "bigint", "nullable": False},
322
+ "id_user": {"data_type": "text", "nullable": False},
323
+ "enrollment_date": {"data_type": "timestamp", "nullable": True},
324
+ "completion_date": {"data_type": "timestamp", "nullable": True},
325
+ "enrollment_status": {"data_type": "text", "nullable": True},
326
+ "score_given": {"data_type": "double", "nullable": True},
327
+ "total_credits": {"data_type": "bigint", "nullable": True},
328
+ "total_time": {"data_type": "bigint", "nullable": True},
329
+ "completed_courses": {"data_type": "bigint", "nullable": True},
330
+ "total_courses": {"data_type": "bigint", "nullable": True},
331
+ },
332
+ )
333
+ def learning_plan_enrollments() -> Iterator[Dict[str, Any]]:
334
+ """Fetch all learning plan enrollments."""
335
+ for enrollments_batch in client.fetch_learning_plan_enrollments():
336
+ for enrollment in enrollments_batch:
337
+ yield normalize_docebo_dates(enrollment)
338
+
339
+ @dlt.resource(
340
+ name="learning_plan_course_enrollments",
341
+ write_disposition="replace",
342
+ primary_key=["learning_plan_id", "course_id", "user_id"],
343
+ columns={
344
+ "learning_plan_id": {"data_type": "bigint", "nullable": False},
345
+ "course_id": {"data_type": "bigint", "nullable": False},
346
+ "user_id": {"data_type": "text", "nullable": False},
347
+ "enrollment_date": {"data_type": "timestamp", "nullable": True},
348
+ "completion_date": {"data_type": "timestamp", "nullable": True},
349
+ "status": {"data_type": "text", "nullable": True},
350
+ "score": {"data_type": "double", "nullable": True},
351
+ "credits": {"data_type": "bigint", "nullable": True},
352
+ "total_time": {"data_type": "bigint", "nullable": True},
353
+ },
354
+ )
355
+ def learning_plan_course_enrollments() -> Iterator[Dict[str, Any]]:
356
+ """Fetch course enrollments for all learning plans."""
357
+ for enrollments_batch in client.fetch_all_learning_plan_course_enrollments():
358
+ for enrollment in enrollments_batch:
359
+ yield normalize_docebo_dates(enrollment)
360
+
361
+ # Phase 5: Enrollments
362
+ @dlt.resource(
363
+ name="course_enrollments",
364
+ write_disposition="replace",
365
+ primary_key=["course_id", "user_id"],
366
+ columns={
367
+ "course_id": {"data_type": "bigint", "nullable": False},
368
+ "user_id": {"data_type": "text", "nullable": False},
369
+ "enrollment_date": {"data_type": "timestamp", "nullable": True},
370
+ "completion_date": {"data_type": "timestamp", "nullable": True},
371
+ "status": {"data_type": "text", "nullable": True},
372
+ "level": {"data_type": "text", "nullable": True},
373
+ "score_given": {"data_type": "double", "nullable": True},
374
+ "score_total": {"data_type": "double", "nullable": True},
375
+ "credits": {"data_type": "bigint", "nullable": True},
376
+ "total_time": {"data_type": "bigint", "nullable": True},
377
+ "expire_date": {"data_type": "timestamp", "nullable": True},
378
+ "certificate_id": {"data_type": "text", "nullable": True},
379
+ },
380
+ )
381
+ def course_enrollments() -> Iterator[Dict[str, Any]]:
382
+ """Fetch enrollments for all courses."""
383
+ for enrollments_batch in client.fetch_all_course_enrollments():
384
+ for enrollment in enrollments_batch:
385
+ yield normalize_docebo_dates(enrollment)
386
+
387
+ # Additional Resources
388
+ @dlt.resource(
389
+ name="sessions",
390
+ write_disposition="replace",
391
+ primary_key=["course_id", "session_id"],
392
+ columns={
393
+ "course_id": {"data_type": "bigint", "nullable": False},
394
+ "session_id": {"data_type": "bigint", "nullable": False},
395
+ "name": {"data_type": "text", "nullable": True},
396
+ "code": {"data_type": "text", "nullable": True},
397
+ "date_start": {"data_type": "timestamp", "nullable": True},
398
+ "date_end": {"data_type": "timestamp", "nullable": True},
399
+ "instructor": {"data_type": "text", "nullable": True},
400
+ "location": {"data_type": "text", "nullable": True},
401
+ "classroom": {"data_type": "text", "nullable": True},
402
+ "max_participants": {"data_type": "bigint", "nullable": True},
403
+ "enrolled_users": {"data_type": "bigint", "nullable": True},
404
+ "waiting_users": {"data_type": "bigint", "nullable": True},
405
+ "session_type": {"data_type": "text", "nullable": True},
406
+ "timezone": {"data_type": "text", "nullable": True},
407
+ "attendance_type": {"data_type": "text", "nullable": True},
408
+ },
409
+ )
410
+ def sessions() -> Iterator[Dict[str, Any]]:
411
+ """Fetch all ILT/classroom sessions."""
412
+ for sessions_batch in client.fetch_sessions():
413
+ for session in sessions_batch:
414
+ yield normalize_docebo_dates(session)
415
+
416
+ @dlt.resource(
417
+ name="categories",
418
+ write_disposition="replace",
419
+ primary_key="id_cat",
420
+ columns={
421
+ "id_cat": {"data_type": "bigint", "nullable": False},
422
+ "code": {"data_type": "text", "nullable": True},
423
+ "description": {"data_type": "text", "nullable": True},
424
+ "id_parent": {"data_type": "bigint", "nullable": True},
425
+ "lft": {"data_type": "bigint", "nullable": True},
426
+ "rgt": {"data_type": "bigint", "nullable": True},
427
+ "is_active": {"data_type": "bool", "nullable": True},
428
+ "translations": {"data_type": "json", "nullable": True},
429
+ },
430
+ )
431
+ def categories() -> Iterator[Dict[str, Any]]:
432
+ """Fetch all course categories."""
433
+ for categories_batch in client.fetch_categories():
434
+ for category in categories_batch:
435
+ yield normalize_docebo_dates(category)
436
+
437
+ @dlt.resource(
438
+ name="certifications",
439
+ write_disposition="replace",
440
+ primary_key="id_cert",
441
+ columns={
442
+ "id_cert": {"data_type": "bigint", "nullable": False},
443
+ "code": {"data_type": "text", "nullable": True},
444
+ "title": {"data_type": "text", "nullable": True},
445
+ "description": {"data_type": "text", "nullable": True},
446
+ "type": {"data_type": "text", "nullable": True},
447
+ "validity_type": {"data_type": "text", "nullable": True},
448
+ "validity_days": {"data_type": "bigint", "nullable": True},
449
+ "renewal_available": {"data_type": "bool", "nullable": True},
450
+ "renewal_days_before": {"data_type": "bigint", "nullable": True},
451
+ "meta_language": {"data_type": "text", "nullable": True},
452
+ "meta_language_label": {"data_type": "text", "nullable": True},
453
+ "created_on": {"data_type": "timestamp", "nullable": True},
454
+ "updated_on": {"data_type": "timestamp", "nullable": True},
455
+ },
456
+ )
457
+ def certifications() -> Iterator[Dict[str, Any]]:
458
+ """Fetch all certifications."""
459
+ for certifications_batch in client.fetch_certifications():
460
+ for cert in certifications_batch:
461
+ yield normalize_docebo_dates(cert)
462
+
463
+ @dlt.resource(
464
+ name="external_training",
465
+ write_disposition="replace",
466
+ primary_key="external_training_id",
467
+ columns={
468
+ "external_training_id": {"data_type": "bigint", "nullable": False},
469
+ "user_id": {"data_type": "text", "nullable": True},
470
+ "title": {"data_type": "text", "nullable": True},
471
+ "description": {"data_type": "text", "nullable": True},
472
+ "training_type": {"data_type": "text", "nullable": True},
473
+ "provider": {"data_type": "text", "nullable": True},
474
+ "date_from": {"data_type": "date", "nullable": True},
475
+ "date_to": {"data_type": "date", "nullable": True},
476
+ "credits": {"data_type": "bigint", "nullable": True},
477
+ "score": {"data_type": "double", "nullable": True},
478
+ "status": {"data_type": "text", "nullable": True},
479
+ "certificate_file": {"data_type": "text", "nullable": True},
480
+ "created_on": {"data_type": "timestamp", "nullable": True},
481
+ "updated_on": {"data_type": "timestamp", "nullable": True},
482
+ },
483
+ )
484
+ def external_training() -> Iterator[Dict[str, Any]]:
485
+ """Fetch all external training records."""
486
+ for training_batch in client.fetch_external_training():
487
+ for training in training_batch:
488
+ yield normalize_docebo_dates(training)
489
+
490
+ # Survey Resources - Using transformer chain for parallelization
491
+
492
+ # Transformer that filters learning_objects for polls only
493
+ @dlt.transformer(
494
+ data_from=learning_objects,
495
+ write_disposition="replace",
496
+ name="polls",
497
+ parallelized=True,
498
+ primary_key=["poll_id", "course_id"],
499
+ columns={
500
+ "poll_id": {"data_type": "bigint", "nullable": False},
501
+ "course_id": {"data_type": "bigint", "nullable": False},
502
+ "poll_title": {"data_type": "text", "nullable": True},
503
+ "object_type": {"data_type": "text", "nullable": True},
504
+ "lo_type": {"data_type": "text", "nullable": True},
505
+ },
506
+ )
507
+ def polls(lo_item: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
508
+ # print("running polls transformer")
509
+ """Filter learning objects to get only polls."""
510
+ # Check if this learning object is a poll
511
+ if lo_item.get("object_type") == "poll" or lo_item.get("lo_type") == "poll":
512
+ # print(f"polls transformer: {lo_item}")
513
+ poll_id = lo_item["id_resource"]
514
+ course_id = lo_item.get("course_id")
515
+
516
+ if poll_id and course_id:
517
+ yield normalize_docebo_dates(
518
+ {
519
+ "poll_id": poll_id,
520
+ "course_id": course_id,
521
+ "poll_title": lo_item.get("title")
522
+ or lo_item.get("lo_name")
523
+ or "",
524
+ "object_type": lo_item.get("object_type"),
525
+ "lo_type": lo_item.get("lo_type"),
526
+ }
527
+ )
528
+
529
+ # Transformer that fetches survey answers for each poll
530
+ @dlt.transformer(
531
+ data_from=polls,
532
+ write_disposition="replace",
533
+ parallelized=True,
534
+ name="survey_answers",
535
+ columns={
536
+ "course_id": {"data_type": "bigint", "nullable": False},
537
+ "poll_id": {"data_type": "bigint", "nullable": False},
538
+ "poll_title": {"data_type": "text", "nullable": True},
539
+ "question_id": {"data_type": "bigint", "nullable": False},
540
+ "question_type": {"data_type": "text", "nullable": True},
541
+ "question_title": {"data_type": "text", "nullable": True},
542
+ "answer": {"data_type": "text", "nullable": True},
543
+ "date": {"data_type": "timestamp", "nullable": True},
544
+ },
545
+ )
546
+ def survey_answers(poll_item: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
547
+ """Fetch all survey answers for a specific poll."""
548
+ poll_id = poll_item["poll_id"]
549
+ course_id = poll_item["course_id"]
550
+ poll_title = poll_item["poll_title"]
551
+
552
+ if not poll_id or not course_id:
553
+ return
554
+
555
+ survey_data = client.fetch_survey_answers_for_poll(poll_id, course_id)
556
+ if not survey_data:
557
+ return
558
+
559
+ assert "answers" in survey_data, "no answers in survey data " + json.dumps(
560
+ survey_data
561
+ )
562
+ assert isinstance(survey_data["answers"], list), "answers is not a list"
563
+ assert "questions" in survey_data, "no questions in survey data"
564
+ assert isinstance(survey_data["questions"], dict), "questions is not a dict"
565
+
566
+ questions = survey_data["questions"]
567
+ answers = survey_data["answers"]
568
+
569
+ for answer in answers:
570
+ if "answers" not in answer:
571
+ continue
572
+ date = normalize_date_field(answer.get("date"))
573
+
574
+ answer_data = answer.get("answers", {})
575
+ for question_id, answer_list in answer_data.items():
576
+ for answer in answer_list:
577
+ yield {
578
+ "course_id": course_id,
579
+ "poll_id": poll_id,
580
+ "poll_title": poll_title,
581
+ "question_id": question_id,
582
+ "question_type": questions[question_id].get("type_quest"),
583
+ "question_title": questions[question_id].get("title_quest"),
584
+ "answer": answer,
585
+ "date": date,
586
+ }
587
+
588
+ return [
589
+ users,
590
+ courses,
591
+ user_fields,
592
+ branches,
593
+ groups,
594
+ group_members,
595
+ course_fields,
596
+ learning_objects,
597
+ learning_plans,
598
+ learning_plan_enrollments,
599
+ learning_plan_course_enrollments,
600
+ course_enrollments,
601
+ sessions,
602
+ categories,
603
+ certifications,
604
+ external_training,
605
+ polls,
606
+ survey_answers, # Standalone survey resource
607
+ ]