ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,589 @@
1
+ """Docebo source for ingestr."""
2
+
3
+ import json
4
+ from typing import Any, Dict, Iterator, Optional
5
+
6
+ import dlt
7
+ from dlt.sources import DltResource
8
+
9
+ from .client import DoceboClient
10
+ from .helpers import normalize_date_field, normalize_docebo_dates
11
+
12
+
13
+ @dlt.source(name="docebo", max_table_nesting=0)
14
+ def docebo_source(
15
+ base_url: str,
16
+ client_id: str,
17
+ client_secret: str,
18
+ username: Optional[str] = None,
19
+ password: Optional[str] = None,
20
+ ) -> list[DltResource]:
21
+ """
22
+ Docebo source for fetching data from Docebo LMS API.
23
+
24
+ Args:
25
+ base_url: The base URL of your Docebo instance (e.g., https://yourcompany.docebosaas.com)
26
+ client_id: OAuth2 client ID
27
+ client_secret: OAuth2 client secret
28
+ username: Username for authentication
29
+ password: Password for authentication
30
+
31
+ Yields:
32
+ DltResource: Resources available from Docebo API
33
+ """
34
+
35
+ # Initialize client once for all resources
36
+ client = DoceboClient(
37
+ base_url=base_url,
38
+ client_id=client_id,
39
+ client_secret=client_secret,
40
+ username=username,
41
+ password=password,
42
+ )
43
+
44
+ @dlt.resource(
45
+ name="users",
46
+ write_disposition="replace",
47
+ columns={
48
+ "user_id": {"data_type": "text", "nullable": True},
49
+ "username": {"data_type": "text", "nullable": True},
50
+ "first_name": {"data_type": "text", "nullable": True},
51
+ "last_name": {"data_type": "text", "nullable": True},
52
+ "email": {"data_type": "text", "nullable": True},
53
+ "uuid": {"data_type": "text", "nullable": True},
54
+ "is_manager": {"data_type": "bool", "nullable": True},
55
+ "fullname": {"data_type": "text", "nullable": True},
56
+ "last_access_date": {"data_type": "timestamp", "nullable": True},
57
+ "last_update": {"data_type": "timestamp", "nullable": True},
58
+ "creation_date": {"data_type": "timestamp", "nullable": True},
59
+ "status": {"data_type": "text", "nullable": True},
60
+ "avatar": {"data_type": "text", "nullable": True},
61
+ "language": {"data_type": "text", "nullable": True},
62
+ "lang_code": {"data_type": "text", "nullable": True},
63
+ "level": {"data_type": "text", "nullable": True},
64
+ "email_validation_status": {"data_type": "text", "nullable": True},
65
+ "send_notification": {"data_type": "text", "nullable": True},
66
+ "newsletter_optout": {"data_type": "text", "nullable": True},
67
+ "encoded_username": {"data_type": "text", "nullable": True},
68
+ "timezone": {"data_type": "text", "nullable": True},
69
+ "active_subordinates_count": {"data_type": "bigint", "nullable": True},
70
+ "expired": {"data_type": "bool", "nullable": True},
71
+ "multidomains": {"data_type": "json", "nullable": True},
72
+ "manager_names": {"data_type": "json", "nullable": True},
73
+ "managers": {"data_type": "json", "nullable": True},
74
+ "actions": {"data_type": "json", "nullable": True},
75
+ },
76
+ )
77
+ def users() -> Iterator[Dict[str, Any]]:
78
+ """Fetch all users from Docebo."""
79
+ for users_batch in client.fetch_users():
80
+ # Apply normalizer to each user and yield individually
81
+ for user in users_batch:
82
+ yield normalize_docebo_dates(user)
83
+
84
+ @dlt.resource(
85
+ name="courses",
86
+ write_disposition="replace",
87
+ parallelized=True,
88
+ columns={
89
+ "id_course": {"data_type": "bigint", "nullable": True},
90
+ "name": {"data_type": "text", "nullable": True},
91
+ "uidCourse": {"data_type": "text", "nullable": True},
92
+ "description": {"data_type": "text", "nullable": True},
93
+ "date_last_updated": {"data_type": "date", "nullable": True},
94
+ "course_type": {"data_type": "text", "nullable": True},
95
+ "selling": {"data_type": "bool", "nullable": True},
96
+ "code": {"data_type": "text", "nullable": True},
97
+ "slug_name": {"data_type": "text", "nullable": True},
98
+ "image": {"data_type": "text", "nullable": True},
99
+ "duration": {"data_type": "bigint", "nullable": True},
100
+ "language": {"data_type": "text", "nullable": True},
101
+ "language_label": {"data_type": "text", "nullable": True},
102
+ "multi_languages": {"data_type": "json", "nullable": True},
103
+ "price": {"data_type": "text", "nullable": True},
104
+ "is_new": {"data_type": "text", "nullable": True},
105
+ "is_opened": {"data_type": "text", "nullable": True},
106
+ "rating_option": {"data_type": "text", "nullable": True},
107
+ "current_rating": {"data_type": "bigint", "nullable": True},
108
+ "credits": {"data_type": "bigint", "nullable": True},
109
+ "img_url": {"data_type": "text", "nullable": True},
110
+ "can_rate": {"data_type": "bool", "nullable": True},
111
+ "can_self_unenroll": {"data_type": "bool", "nullable": True},
112
+ "start_date": {"data_type": "date", "nullable": True},
113
+ "end_date": {"data_type": "date", "nullable": True},
114
+ "category": {"data_type": "json", "nullable": True},
115
+ "enrollment_policy": {"data_type": "bigint", "nullable": True},
116
+ "max_attempts": {"data_type": "bigint", "nullable": True},
117
+ "available_seats": {"data_type": "json", "nullable": True},
118
+ "is_affiliate": {"data_type": "bool", "nullable": True},
119
+ "partner_fields": {"data_type": "text", "nullable": True},
120
+ "partner_data": {"data_type": "json", "nullable": True},
121
+ "affiliate_price": {"data_type": "text", "nullable": True},
122
+ },
123
+ )
124
+ def courses() -> Iterator[Dict[str, Any]]:
125
+ print("running courses transformer")
126
+ """Fetch all courses from Docebo."""
127
+ for courses_batch in client.fetch_courses(page_size=1000):
128
+ for course in courses_batch:
129
+ yield normalize_docebo_dates(course)
130
+
131
+ @dlt.resource(
132
+ name="user_fields",
133
+ write_disposition="replace",
134
+ primary_key="id",
135
+ columns={
136
+ "id": {"data_type": "bigint", "nullable": True},
137
+ "name": {"data_type": "text", "nullable": True},
138
+ "type": {"data_type": "text", "nullable": True},
139
+ "mandatory": {"data_type": "bool", "nullable": True},
140
+ "show_on_detail": {"data_type": "bool", "nullable": True},
141
+ "show_in_filter": {"data_type": "bool", "nullable": True},
142
+ "options": {"data_type": "json", "nullable": True},
143
+ "ref_area": {"data_type": "bigint", "nullable": True},
144
+ "is_valid": {"data_type": "bool", "nullable": True},
145
+ "sequence": {"data_type": "bigint", "nullable": True},
146
+ },
147
+ )
148
+ def user_fields() -> Iterator[Dict[str, Any]]:
149
+ """Fetch all user field definitions from Docebo."""
150
+ for fields_batch in client.fetch_user_fields():
151
+ for field in fields_batch:
152
+ yield normalize_docebo_dates(field)
153
+
154
+ @dlt.resource(
155
+ name="branches",
156
+ write_disposition="replace",
157
+ columns={
158
+ "id_org": {"data_type": "bigint", "nullable": True},
159
+ "id_parent": {"data_type": "bigint", "nullable": True},
160
+ "lft": {"data_type": "bigint", "nullable": True},
161
+ "rgt": {"data_type": "bigint", "nullable": True},
162
+ "code": {"data_type": "text", "nullable": True},
163
+ "translation": {"data_type": "json", "nullable": True},
164
+ "external_id": {"data_type": "text", "nullable": True},
165
+ "actions": {"data_type": "json", "nullable": True},
166
+ },
167
+ )
168
+ def branches() -> Iterator[Dict[str, Any]]:
169
+ """Fetch all branches/organizational units from Docebo."""
170
+ for branches_batch in client.fetch_branches():
171
+ for branch in branches_batch:
172
+ yield normalize_docebo_dates(branch)
173
+
174
+ # Phase 2: Group Management
175
+ @dlt.resource(
176
+ name="groups",
177
+ write_disposition="replace",
178
+ primary_key="group_id",
179
+ columns={
180
+ "group_id": {"data_type": "bigint", "nullable": True},
181
+ "name": {"data_type": "text", "nullable": True},
182
+ "description": {"data_type": "text", "nullable": True},
183
+ "language": {"data_type": "text", "nullable": True},
184
+ "total_members": {"data_type": "bigint", "nullable": True},
185
+ "id_branch": {"data_type": "bigint", "nullable": True},
186
+ "enrollment_rules": {"data_type": "json", "nullable": True},
187
+ "enrollment_rules_options": {"data_type": "json", "nullable": True},
188
+ "member_fields": {"data_type": "json", "nullable": True},
189
+ "is_default": {"data_type": "bool", "nullable": True},
190
+ "creation_date": {"data_type": "timestamp", "nullable": True},
191
+ "last_update": {"data_type": "timestamp", "nullable": True},
192
+ },
193
+ )
194
+ def groups() -> Iterator[Dict[str, Any]]:
195
+ """Fetch all groups/audiences from Docebo."""
196
+ for groups_batch in client.fetch_groups():
197
+ for group in groups_batch:
198
+ yield normalize_docebo_dates(group)
199
+
200
+ @dlt.resource(
201
+ name="group_members",
202
+ write_disposition="replace",
203
+ primary_key=["group_id", "user_id"],
204
+ columns={
205
+ "group_id": {"data_type": "bigint", "nullable": True},
206
+ "user_id": {"data_type": "text", "nullable": True},
207
+ "username": {"data_type": "text", "nullable": True},
208
+ "first_name": {"data_type": "text", "nullable": True},
209
+ "last_name": {"data_type": "text", "nullable": True},
210
+ "email": {"data_type": "text", "nullable": True},
211
+ "level": {"data_type": "text", "nullable": True},
212
+ "enrollment_date": {"data_type": "timestamp", "nullable": True},
213
+ },
214
+ )
215
+ def group_members() -> Iterator[Dict[str, Any]]:
216
+ """Fetch all group members for all groups."""
217
+ for members_batch in client.fetch_all_group_members():
218
+ for member in members_batch:
219
+ yield normalize_docebo_dates(member)
220
+
221
+ # Phase 3: Advanced Course Resources
222
+ @dlt.resource(
223
+ name="course_fields",
224
+ write_disposition="replace",
225
+ primary_key="field_id",
226
+ columns={
227
+ "field_id": {"data_type": "bigint", "nullable": True},
228
+ "type_field": {"data_type": "text", "nullable": True},
229
+ "name_field": {"data_type": "text", "nullable": True},
230
+ "is_mandatory": {"data_type": "bool", "nullable": True},
231
+ "show_on_course_details": {"data_type": "bool", "nullable": True},
232
+ "show_on_course_filter": {"data_type": "bool", "nullable": True},
233
+ "options": {"data_type": "json", "nullable": True},
234
+ "sequence": {"data_type": "bigint", "nullable": True},
235
+ },
236
+ )
237
+ def course_fields() -> Iterator[Dict[str, Any]]:
238
+ """Fetch all course field definitions from Docebo."""
239
+ for fields_batch in client.fetch_course_fields():
240
+ for field in fields_batch:
241
+ yield normalize_docebo_dates(field)
242
+
243
+ @dlt.transformer(
244
+ name="learning_objects",
245
+ data_from=courses,
246
+ write_disposition="replace",
247
+ parallelized=True,
248
+ columns={
249
+ "course_id": {"data_type": "bigint", "nullable": True},
250
+ "id_org": {"data_type": "bigint", "nullable": True},
251
+ "object_id": {"data_type": "bigint", "nullable": True},
252
+ "lo_code": {"data_type": "text", "nullable": True},
253
+ "lo_name": {"data_type": "text", "nullable": True},
254
+ "lo_type": {"data_type": "text", "nullable": True},
255
+ "lo_visibility": {"data_type": "text", "nullable": True},
256
+ "lo_link": {"data_type": "text", "nullable": True},
257
+ "lo_thumbnail": {"data_type": "text", "nullable": True},
258
+ "mobile_compatibility": {"data_type": "text", "nullable": True},
259
+ "lo_external_source_url": {"data_type": "text", "nullable": True},
260
+ "created_by": {"data_type": "text", "nullable": True},
261
+ "creation_date": {"data_type": "timestamp", "nullable": True},
262
+ "duration": {"data_type": "bigint", "nullable": True},
263
+ },
264
+ )
265
+ def learning_objects(course_item: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
266
+ course_id = course_item.get("id_course")
267
+ if course_id:
268
+ los_endpoint = f"learn/v1/courses/{course_id}/los"
269
+ for lo_batch in client.get_paginated_data(los_endpoint):
270
+ for lo in lo_batch:
271
+ # Add course_id to learning object if not present
272
+ if "course_id" not in lo:
273
+ lo["course_id"] = course_id
274
+ yield normalize_docebo_dates(lo)
275
+
276
+ # Phase 4: Learning Plans
277
+ @dlt.resource(
278
+ name="learning_plans",
279
+ write_disposition="replace",
280
+ columns={
281
+ "learning_plan_id": {"data_type": "bigint", "nullable": True},
282
+ "uuid": {"data_type": "text", "nullable": True},
283
+ "code": {"data_type": "text", "nullable": True},
284
+ "title": {"data_type": "text", "nullable": True},
285
+ "thumbnail_url": {"data_type": "text", "nullable": True},
286
+ "price": {"data_type": "text", "nullable": True},
287
+ "credits": {"data_type": "bigint", "nullable": True},
288
+ "is_published": {"data_type": "bool", "nullable": True},
289
+ "is_publishable": {"data_type": "bool", "nullable": True},
290
+ "assigned_courses_count": {"data_type": "bigint", "nullable": True},
291
+ "assigned_enrollments_count": {"data_type": "bigint", "nullable": True},
292
+ "assigned_catalogs_count": {"data_type": "bigint", "nullable": True},
293
+ "assigned_channels_count": {"data_type": "bigint", "nullable": True},
294
+ "created_on": {"data_type": "timestamp", "nullable": True},
295
+ "created_by": {"data_type": "json", "nullable": True},
296
+ "updated_on": {"data_type": "timestamp", "nullable": True},
297
+ "updated_by": {"data_type": "json", "nullable": True},
298
+ },
299
+ )
300
+ def learning_plans() -> Iterator[Dict[str, Any]]:
301
+ """Fetch all learning plans from Docebo."""
302
+ for plans_batch in client.fetch_learning_plans():
303
+ for plan in plans_batch:
304
+ yield normalize_docebo_dates(plan)
305
+
306
+ @dlt.resource(
307
+ name="learning_plan_enrollments",
308
+ write_disposition="replace",
309
+ columns={
310
+ "id_path": {"data_type": "bigint", "nullable": True},
311
+ "id_user": {"data_type": "text", "nullable": True},
312
+ "enrollment_date": {"data_type": "timestamp", "nullable": True},
313
+ "completion_date": {"data_type": "timestamp", "nullable": True},
314
+ "enrollment_status": {"data_type": "text", "nullable": True},
315
+ "score_given": {"data_type": "double", "nullable": True},
316
+ "total_credits": {"data_type": "bigint", "nullable": True},
317
+ "total_time": {"data_type": "bigint", "nullable": True},
318
+ "completed_courses": {"data_type": "bigint", "nullable": True},
319
+ "total_courses": {"data_type": "bigint", "nullable": True},
320
+ },
321
+ )
322
+ def learning_plan_enrollments() -> Iterator[Dict[str, Any]]:
323
+ """Fetch all learning plan enrollments."""
324
+ for enrollments_batch in client.fetch_learning_plan_enrollments():
325
+ for enrollment in enrollments_batch:
326
+ yield normalize_docebo_dates(enrollment)
327
+
328
+ @dlt.resource(
329
+ name="learning_plan_course_enrollments",
330
+ write_disposition="replace",
331
+ columns={
332
+ "learning_plan_id": {"data_type": "bigint", "nullable": True},
333
+ "course_id": {"data_type": "bigint", "nullable": True},
334
+ "user_id": {"data_type": "text", "nullable": True},
335
+ "enrollment_date": {"data_type": "timestamp", "nullable": True},
336
+ "completion_date": {"data_type": "timestamp", "nullable": True},
337
+ "status": {"data_type": "text", "nullable": True},
338
+ "score": {"data_type": "double", "nullable": True},
339
+ "credits": {"data_type": "bigint", "nullable": True},
340
+ "total_time": {"data_type": "bigint", "nullable": True},
341
+ },
342
+ )
343
+ def learning_plan_course_enrollments() -> Iterator[Dict[str, Any]]:
344
+ """Fetch course enrollments for all learning plans."""
345
+ for enrollments_batch in client.fetch_all_learning_plan_course_enrollments():
346
+ for enrollment in enrollments_batch:
347
+ yield normalize_docebo_dates(enrollment)
348
+
349
+ # Phase 5: Enrollments
350
+ @dlt.resource(
351
+ name="course_enrollments",
352
+ write_disposition="replace",
353
+ columns={
354
+ "course_id": {"data_type": "bigint", "nullable": True},
355
+ "user_id": {"data_type": "text", "nullable": True},
356
+ "enrollment_date": {"data_type": "timestamp", "nullable": True},
357
+ "completion_date": {"data_type": "timestamp", "nullable": True},
358
+ "status": {"data_type": "text", "nullable": True},
359
+ "level": {"data_type": "text", "nullable": True},
360
+ "score_given": {"data_type": "double", "nullable": True},
361
+ "score_total": {"data_type": "double", "nullable": True},
362
+ "credits": {"data_type": "bigint", "nullable": True},
363
+ "total_time": {"data_type": "bigint", "nullable": True},
364
+ "expire_date": {"data_type": "timestamp", "nullable": True},
365
+ "certificate_id": {"data_type": "text", "nullable": True},
366
+ },
367
+ )
368
+ def course_enrollments() -> Iterator[Dict[str, Any]]:
369
+ """Fetch enrollments for all courses."""
370
+ for enrollments_batch in client.fetch_all_course_enrollments():
371
+ for enrollment in enrollments_batch:
372
+ yield normalize_docebo_dates(enrollment)
373
+
374
+ # Additional Resources
375
+ @dlt.resource(
376
+ name="sessions",
377
+ write_disposition="replace",
378
+ columns={
379
+ "course_id": {"data_type": "bigint", "nullable": True},
380
+ "session_id": {"data_type": "bigint", "nullable": True},
381
+ "name": {"data_type": "text", "nullable": True},
382
+ "code": {"data_type": "text", "nullable": True},
383
+ "date_start": {"data_type": "timestamp", "nullable": True},
384
+ "date_end": {"data_type": "timestamp", "nullable": True},
385
+ "instructor": {"data_type": "text", "nullable": True},
386
+ "location": {"data_type": "text", "nullable": True},
387
+ "classroom": {"data_type": "text", "nullable": True},
388
+ "max_participants": {"data_type": "bigint", "nullable": True},
389
+ "enrolled_users": {"data_type": "bigint", "nullable": True},
390
+ "waiting_users": {"data_type": "bigint", "nullable": True},
391
+ "session_type": {"data_type": "text", "nullable": True},
392
+ "timezone": {"data_type": "text", "nullable": True},
393
+ "attendance_type": {"data_type": "text", "nullable": True},
394
+ },
395
+ )
396
+ def sessions() -> Iterator[Dict[str, Any]]:
397
+ """Fetch all ILT/classroom sessions."""
398
+ for sessions_batch in client.fetch_sessions():
399
+ for session in sessions_batch:
400
+ yield normalize_docebo_dates(session)
401
+
402
+ @dlt.resource(
403
+ name="categories",
404
+ write_disposition="replace",
405
+ columns={
406
+ "id_cat": {"data_type": "bigint", "nullable": True},
407
+ "code": {"data_type": "text", "nullable": True},
408
+ "description": {"data_type": "text", "nullable": True},
409
+ "id_parent": {"data_type": "bigint", "nullable": True},
410
+ "lft": {"data_type": "bigint", "nullable": True},
411
+ "rgt": {"data_type": "bigint", "nullable": True},
412
+ "is_active": {"data_type": "bool", "nullable": True},
413
+ "translations": {"data_type": "json", "nullable": True},
414
+ },
415
+ )
416
+ def categories() -> Iterator[Dict[str, Any]]:
417
+ """Fetch all course categories."""
418
+ for categories_batch in client.fetch_categories():
419
+ for category in categories_batch:
420
+ yield normalize_docebo_dates(category)
421
+
422
+ @dlt.resource(
423
+ name="certifications",
424
+ write_disposition="replace",
425
+ columns={
426
+ "id_cert": {"data_type": "bigint", "nullable": True},
427
+ "code": {"data_type": "text", "nullable": True},
428
+ "title": {"data_type": "text", "nullable": True},
429
+ "description": {"data_type": "text", "nullable": True},
430
+ "type": {"data_type": "text", "nullable": True},
431
+ "validity_type": {"data_type": "text", "nullable": True},
432
+ "validity_days": {"data_type": "bigint", "nullable": True},
433
+ "renewal_available": {"data_type": "bool", "nullable": True},
434
+ "renewal_days_before": {"data_type": "bigint", "nullable": True},
435
+ "meta_language": {"data_type": "text", "nullable": True},
436
+ "meta_language_label": {"data_type": "text", "nullable": True},
437
+ "created_on": {"data_type": "timestamp", "nullable": True},
438
+ "updated_on": {"data_type": "timestamp", "nullable": True},
439
+ },
440
+ )
441
+ def certifications() -> Iterator[Dict[str, Any]]:
442
+ """Fetch all certifications."""
443
+ for certifications_batch in client.fetch_certifications():
444
+ for cert in certifications_batch:
445
+ yield normalize_docebo_dates(cert)
446
+
447
+ @dlt.resource(
448
+ name="external_training",
449
+ write_disposition="replace",
450
+ columns={
451
+ "external_training_id": {"data_type": "bigint", "nullable": True},
452
+ "user_id": {"data_type": "text", "nullable": True},
453
+ "title": {"data_type": "text", "nullable": True},
454
+ "description": {"data_type": "text", "nullable": True},
455
+ "training_type": {"data_type": "text", "nullable": True},
456
+ "provider": {"data_type": "text", "nullable": True},
457
+ "date_from": {"data_type": "date", "nullable": True},
458
+ "date_to": {"data_type": "date", "nullable": True},
459
+ "credits": {"data_type": "bigint", "nullable": True},
460
+ "score": {"data_type": "double", "nullable": True},
461
+ "status": {"data_type": "text", "nullable": True},
462
+ "certificate_file": {"data_type": "text", "nullable": True},
463
+ "created_on": {"data_type": "timestamp", "nullable": True},
464
+ "updated_on": {"data_type": "timestamp", "nullable": True},
465
+ },
466
+ )
467
+ def external_training() -> Iterator[Dict[str, Any]]:
468
+ """Fetch all external training records."""
469
+ for training_batch in client.fetch_external_training():
470
+ for training in training_batch:
471
+ yield normalize_docebo_dates(training)
472
+
473
+ # Survey Resources - Using transformer chain for parallelization
474
+
475
+ # Transformer that filters learning_objects for polls only
476
+ @dlt.transformer(
477
+ data_from=learning_objects,
478
+ write_disposition="replace",
479
+ name="polls",
480
+ parallelized=True,
481
+ columns={
482
+ "poll_id": {"data_type": "bigint", "nullable": True},
483
+ "course_id": {"data_type": "bigint", "nullable": True},
484
+ "poll_title": {"data_type": "text", "nullable": True},
485
+ "object_type": {"data_type": "text", "nullable": True},
486
+ "lo_type": {"data_type": "text", "nullable": True},
487
+ },
488
+ )
489
+ def polls(lo_item: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
490
+ # print("running polls transformer")
491
+ """Filter learning objects to get only polls."""
492
+ # Check if this learning object is a poll
493
+ if lo_item.get("object_type") == "poll" or lo_item.get("lo_type") == "poll":
494
+ # print(f"polls transformer: {lo_item}")
495
+ poll_id = lo_item["id_resource"]
496
+ course_id = lo_item.get("course_id")
497
+
498
+ if poll_id and course_id:
499
+ yield normalize_docebo_dates(
500
+ {
501
+ "poll_id": poll_id,
502
+ "course_id": course_id,
503
+ "poll_title": lo_item.get("title")
504
+ or lo_item.get("lo_name")
505
+ or "",
506
+ "object_type": lo_item.get("object_type"),
507
+ "lo_type": lo_item.get("lo_type"),
508
+ }
509
+ )
510
+
511
+ # Transformer that fetches survey answers for each poll
512
+ @dlt.transformer(
513
+ data_from=polls,
514
+ write_disposition="replace",
515
+ parallelized=True,
516
+ name="survey_answers",
517
+ columns={
518
+ "course_id": {"data_type": "bigint", "nullable": True},
519
+ "poll_id": {"data_type": "bigint", "nullable": True},
520
+ "poll_title": {"data_type": "text", "nullable": True},
521
+ "question_id": {"data_type": "bigint", "nullable": True},
522
+ "question_type": {"data_type": "text", "nullable": True},
523
+ "question_title": {"data_type": "text", "nullable": True},
524
+ "answer": {"data_type": "text", "nullable": True},
525
+ "date": {"data_type": "timestamp", "nullable": True},
526
+ },
527
+ )
528
+ def survey_answers(poll_item: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
529
+ """Fetch all survey answers for a specific poll."""
530
+ poll_id = poll_item["poll_id"]
531
+ course_id = poll_item["course_id"]
532
+ poll_title = poll_item["poll_title"]
533
+
534
+ if not poll_id or not course_id:
535
+ return
536
+
537
+ survey_data = client.fetch_survey_answers_for_poll(poll_id, course_id)
538
+ if not survey_data:
539
+ return
540
+
541
+ assert "answers" in survey_data, "no answers in survey data " + json.dumps(
542
+ survey_data
543
+ )
544
+ assert isinstance(survey_data["answers"], list), "answers is not a list"
545
+ assert "questions" in survey_data, "no questions in survey data"
546
+ assert isinstance(survey_data["questions"], dict), "questions is not a dict"
547
+
548
+ questions = survey_data["questions"]
549
+ answers = survey_data["answers"]
550
+
551
+ for answer in answers:
552
+ if "answers" not in answer:
553
+ continue
554
+ date = normalize_date_field(answer.get("date"))
555
+
556
+ answer_data = answer.get("answers", {})
557
+ for question_id, answer_list in answer_data.items():
558
+ for answer in answer_list:
559
+ yield {
560
+ "course_id": course_id,
561
+ "poll_id": poll_id,
562
+ "poll_title": poll_title,
563
+ "question_id": question_id,
564
+ "question_type": questions[question_id].get("type_quest"),
565
+ "question_title": questions[question_id].get("title_quest"),
566
+ "answer": answer,
567
+ "date": date,
568
+ }
569
+
570
+ return [
571
+ users,
572
+ courses,
573
+ user_fields,
574
+ branches,
575
+ groups,
576
+ group_members,
577
+ course_fields,
578
+ learning_objects,
579
+ learning_plans,
580
+ learning_plan_enrollments,
581
+ learning_plan_course_enrollments,
582
+ course_enrollments,
583
+ sessions,
584
+ categories,
585
+ certifications,
586
+ external_training,
587
+ polls,
588
+ survey_answers, # Standalone survey resource
589
+ ]