omniload 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. omniload/conftest.py +72 -0
  2. omniload/main.py +810 -0
  3. omniload/src/.gitignore +10 -0
  4. omniload/src/adjust/__init__.py +108 -0
  5. omniload/src/adjust/adjust_helpers.py +122 -0
  6. omniload/src/airtable/__init__.py +84 -0
  7. omniload/src/allium/__init__.py +128 -0
  8. omniload/src/anthropic/__init__.py +277 -0
  9. omniload/src/anthropic/helpers.py +525 -0
  10. omniload/src/applovin/__init__.py +316 -0
  11. omniload/src/applovin_max/__init__.py +117 -0
  12. omniload/src/appsflyer/__init__.py +325 -0
  13. omniload/src/appsflyer/client.py +110 -0
  14. omniload/src/appstore/__init__.py +142 -0
  15. omniload/src/appstore/client.py +126 -0
  16. omniload/src/appstore/errors.py +15 -0
  17. omniload/src/appstore/models.py +117 -0
  18. omniload/src/appstore/resources.py +179 -0
  19. omniload/src/arrow/__init__.py +81 -0
  20. omniload/src/asana_source/__init__.py +281 -0
  21. omniload/src/asana_source/helpers.py +30 -0
  22. omniload/src/asana_source/settings.py +158 -0
  23. omniload/src/attio/__init__.py +102 -0
  24. omniload/src/attio/helpers.py +65 -0
  25. omniload/src/blob.py +95 -0
  26. omniload/src/bruin/__init__.py +76 -0
  27. omniload/src/chess/__init__.py +180 -0
  28. omniload/src/chess/helpers.py +35 -0
  29. omniload/src/chess/settings.py +18 -0
  30. omniload/src/clickup/__init__.py +85 -0
  31. omniload/src/clickup/helpers.py +47 -0
  32. omniload/src/collector/spinner.py +43 -0
  33. omniload/src/couchbase_source/__init__.py +118 -0
  34. omniload/src/couchbase_source/helpers.py +135 -0
  35. omniload/src/cursor/__init__.py +83 -0
  36. omniload/src/cursor/helpers.py +188 -0
  37. omniload/src/customer_io/__init__.py +486 -0
  38. omniload/src/customer_io/helpers.py +530 -0
  39. omniload/src/destinations.py +982 -0
  40. omniload/src/docebo/__init__.py +589 -0
  41. omniload/src/docebo/client.py +435 -0
  42. omniload/src/docebo/helpers.py +97 -0
  43. omniload/src/dune/__init__.py +104 -0
  44. omniload/src/dune/helpers.py +108 -0
  45. omniload/src/dynamodb/__init__.py +86 -0
  46. omniload/src/elasticsearch/__init__.py +80 -0
  47. omniload/src/elasticsearch/helpers.py +141 -0
  48. omniload/src/errors.py +26 -0
  49. omniload/src/facebook_ads/__init__.py +403 -0
  50. omniload/src/facebook_ads/exceptions.py +19 -0
  51. omniload/src/facebook_ads/helpers.py +296 -0
  52. omniload/src/facebook_ads/settings.py +224 -0
  53. omniload/src/facebook_ads/utils.py +53 -0
  54. omniload/src/factory.py +305 -0
  55. omniload/src/filesystem/__init__.py +133 -0
  56. omniload/src/filesystem/helpers.py +114 -0
  57. omniload/src/filesystem/readers.py +187 -0
  58. omniload/src/filters.py +62 -0
  59. omniload/src/fireflies/__init__.py +151 -0
  60. omniload/src/fireflies/helpers.py +753 -0
  61. omniload/src/fluxx/__init__.py +10013 -0
  62. omniload/src/fluxx/helpers.py +233 -0
  63. omniload/src/frankfurter/__init__.py +157 -0
  64. omniload/src/frankfurter/helpers.py +48 -0
  65. omniload/src/freshdesk/__init__.py +103 -0
  66. omniload/src/freshdesk/freshdesk_client.py +151 -0
  67. omniload/src/freshdesk/settings.py +23 -0
  68. omniload/src/fundraiseup/__init__.py +95 -0
  69. omniload/src/fundraiseup/client.py +81 -0
  70. omniload/src/github/__init__.py +202 -0
  71. omniload/src/github/helpers.py +207 -0
  72. omniload/src/github/queries.py +129 -0
  73. omniload/src/github/settings.py +24 -0
  74. omniload/src/google_ads/__init__.py +198 -0
  75. omniload/src/google_ads/field.py +17 -0
  76. omniload/src/google_ads/metrics.py +254 -0
  77. omniload/src/google_ads/predicates.py +37 -0
  78. omniload/src/google_ads/reports.py +411 -0
  79. omniload/src/google_ads/test_google_ads.py +184 -0
  80. omniload/src/google_analytics/__init__.py +144 -0
  81. omniload/src/google_analytics/helpers.py +312 -0
  82. omniload/src/google_sheets/README.md +95 -0
  83. omniload/src/google_sheets/__init__.py +166 -0
  84. omniload/src/google_sheets/helpers/__init__.py +15 -0
  85. omniload/src/google_sheets/helpers/api_calls.py +160 -0
  86. omniload/src/google_sheets/helpers/data_processing.py +316 -0
  87. omniload/src/gorgias/__init__.py +595 -0
  88. omniload/src/gorgias/helpers.py +166 -0
  89. omniload/src/hostaway/__init__.py +302 -0
  90. omniload/src/hostaway/client.py +288 -0
  91. omniload/src/http/__init__.py +38 -0
  92. omniload/src/http/readers.py +146 -0
  93. omniload/src/http_client.py +24 -0
  94. omniload/src/hubspot/__init__.py +800 -0
  95. omniload/src/hubspot/helpers.py +417 -0
  96. omniload/src/hubspot/settings.py +329 -0
  97. omniload/src/indeed/__init__.py +153 -0
  98. omniload/src/indeed/helpers.py +228 -0
  99. omniload/src/influxdb/__init__.py +46 -0
  100. omniload/src/influxdb/client.py +34 -0
  101. omniload/src/intercom/__init__.py +142 -0
  102. omniload/src/intercom/helpers.py +674 -0
  103. omniload/src/intercom/settings.py +279 -0
  104. omniload/src/isoc_pulse/__init__.py +159 -0
  105. omniload/src/jira_source/__init__.py +377 -0
  106. omniload/src/jira_source/helpers.py +510 -0
  107. omniload/src/jira_source/settings.py +184 -0
  108. omniload/src/kafka/__init__.py +120 -0
  109. omniload/src/kafka/helpers.py +241 -0
  110. omniload/src/kinesis/__init__.py +153 -0
  111. omniload/src/kinesis/helpers.py +96 -0
  112. omniload/src/klaviyo/__init__.py +237 -0
  113. omniload/src/klaviyo/client.py +212 -0
  114. omniload/src/klaviyo/helpers.py +19 -0
  115. omniload/src/linear/__init__.py +634 -0
  116. omniload/src/linear/helpers.py +111 -0
  117. omniload/src/linkedin_ads/__init__.py +266 -0
  118. omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
  119. omniload/src/linkedin_ads/helpers.py +246 -0
  120. omniload/src/loader.py +69 -0
  121. omniload/src/mailchimp/__init__.py +126 -0
  122. omniload/src/mailchimp/helpers.py +226 -0
  123. omniload/src/mailchimp/settings.py +164 -0
  124. omniload/src/masking.py +344 -0
  125. omniload/src/mixpanel/__init__.py +62 -0
  126. omniload/src/mixpanel/client.py +104 -0
  127. omniload/src/monday/__init__.py +246 -0
  128. omniload/src/monday/helpers.py +392 -0
  129. omniload/src/monday/settings.py +325 -0
  130. omniload/src/mongodb/__init__.py +281 -0
  131. omniload/src/mongodb/helpers.py +975 -0
  132. omniload/src/notion/__init__.py +69 -0
  133. omniload/src/notion/helpers/__init__.py +14 -0
  134. omniload/src/notion/helpers/client.py +178 -0
  135. omniload/src/notion/helpers/database.py +92 -0
  136. omniload/src/notion/settings.py +17 -0
  137. omniload/src/partition.py +32 -0
  138. omniload/src/personio/__init__.py +345 -0
  139. omniload/src/personio/helpers.py +100 -0
  140. omniload/src/phantombuster/__init__.py +65 -0
  141. omniload/src/phantombuster/client.py +87 -0
  142. omniload/src/pinterest/__init__.py +82 -0
  143. omniload/src/pipedrive/__init__.py +212 -0
  144. omniload/src/pipedrive/helpers/__init__.py +37 -0
  145. omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
  146. omniload/src/pipedrive/helpers/pages.py +129 -0
  147. omniload/src/pipedrive/settings.py +41 -0
  148. omniload/src/pipedrive/typing.py +17 -0
  149. omniload/src/plusvibeai/__init__.py +335 -0
  150. omniload/src/plusvibeai/helpers.py +544 -0
  151. omniload/src/plusvibeai/settings.py +252 -0
  152. omniload/src/primer/__init__.py +45 -0
  153. omniload/src/primer/helpers.py +79 -0
  154. omniload/src/quickbooks/__init__.py +117 -0
  155. omniload/src/reddit_ads/__init__.py +183 -0
  156. omniload/src/reddit_ads/helpers.py +232 -0
  157. omniload/src/resource.py +40 -0
  158. omniload/src/revenuecat/__init__.py +83 -0
  159. omniload/src/revenuecat/helpers.py +237 -0
  160. omniload/src/salesforce/__init__.py +170 -0
  161. omniload/src/salesforce/helpers.py +78 -0
  162. omniload/src/shopify/__init__.py +1953 -0
  163. omniload/src/shopify/exceptions.py +17 -0
  164. omniload/src/shopify/helpers.py +202 -0
  165. omniload/src/shopify/settings.py +19 -0
  166. omniload/src/slack/__init__.py +290 -0
  167. omniload/src/slack/helpers.py +218 -0
  168. omniload/src/slack/settings.py +36 -0
  169. omniload/src/smartsheets/__init__.py +82 -0
  170. omniload/src/snapchat_ads/__init__.py +455 -0
  171. omniload/src/snapchat_ads/client.py +72 -0
  172. omniload/src/snapchat_ads/helpers.py +630 -0
  173. omniload/src/snapchat_ads/settings.py +130 -0
  174. omniload/src/socrata_source/__init__.py +83 -0
  175. omniload/src/socrata_source/helpers.py +85 -0
  176. omniload/src/socrata_source/settings.py +8 -0
  177. omniload/src/solidgate/__init__.py +219 -0
  178. omniload/src/solidgate/helpers.py +154 -0
  179. omniload/src/sources.py +5408 -0
  180. omniload/src/sql_database/__init__.py +0 -0
  181. omniload/src/sql_database/callbacks.py +66 -0
  182. omniload/src/stripe_analytics/__init__.py +183 -0
  183. omniload/src/stripe_analytics/helpers.py +386 -0
  184. omniload/src/stripe_analytics/settings.py +80 -0
  185. omniload/src/table_definition.py +15 -0
  186. omniload/src/testdata/fakebqcredentials.json +14 -0
  187. omniload/src/tiktok_ads/__init__.py +150 -0
  188. omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
  189. omniload/src/time.py +11 -0
  190. omniload/src/trustpilot/__init__.py +48 -0
  191. omniload/src/trustpilot/client.py +48 -0
  192. omniload/src/version.py +6 -0
  193. omniload/src/wise/__init__.py +68 -0
  194. omniload/src/wise/client.py +63 -0
  195. omniload/src/zendesk/__init__.py +480 -0
  196. omniload/src/zendesk/helpers/__init__.py +39 -0
  197. omniload/src/zendesk/helpers/api_helpers.py +119 -0
  198. omniload/src/zendesk/helpers/credentials.py +68 -0
  199. omniload/src/zendesk/helpers/talk_api.py +132 -0
  200. omniload/src/zendesk/settings.py +71 -0
  201. omniload/src/zoom/__init__.py +99 -0
  202. omniload/src/zoom/helpers.py +102 -0
  203. omniload/testdata/.gitignore +2 -0
  204. omniload/testdata/create_replace.csv +21 -0
  205. omniload/testdata/delete_insert_expected.csv +6 -0
  206. omniload/testdata/delete_insert_part1.csv +5 -0
  207. omniload/testdata/delete_insert_part2.csv +6 -0
  208. omniload/testdata/merge_expected.csv +5 -0
  209. omniload/testdata/merge_part1.csv +4 -0
  210. omniload/testdata/merge_part2.csv +5 -0
  211. omniload/tests/unit/test_smartsheets.py +133 -0
  212. omniload-0.0.0.dev0.dist-info/METADATA +439 -0
  213. omniload-0.0.0.dev0.dist-info/RECORD +218 -0
  214. omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
  215. omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
  216. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
  217. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
  218. omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
@@ -0,0 +1,345 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Fetches Personio Employees, Absences, Attendances."""
16
+
17
+ from typing import Iterable, Optional
18
+
19
+ import dlt
20
+ from dlt.common import pendulum
21
+ from dlt.common.time import ensure_pendulum_datetime
22
+ from dlt.common.typing import TAnyDateTime, TDataItem
23
+ from dlt.sources import DltResource
24
+
25
+ from .helpers import PersonioAPI
26
+
27
+
28
+ @dlt.source(name="personio", max_table_nesting=0)
29
+ def personio_source(
30
+ start_date: TAnyDateTime,
31
+ end_date: Optional[TAnyDateTime] = None,
32
+ client_id: str = dlt.secrets.value,
33
+ client_secret: str = dlt.secrets.value,
34
+ items_per_page: int = 200,
35
+ ) -> Iterable[DltResource]:
36
+ """
37
+ The source for the Personio pipeline. Available resources are employees, absences, and attendances.
38
+
39
+ Args:
40
+ client_id: The client ID of your app.
41
+ client_secret: The client secret of your app.
42
+ items_per_page: The max number of items to fetch per page. Defaults to 200.
43
+ Returns:
44
+ Iterable: A list of DltResource objects representing the data resources.
45
+ """
46
+
47
+ client = PersonioAPI(client_id, client_secret)
48
+
49
+ @dlt.resource(primary_key="id", write_disposition="merge", max_table_nesting=0)
50
+ def employees(
51
+ updated_at: dlt.sources.incremental[
52
+ pendulum.DateTime
53
+ ] = dlt.sources.incremental(
54
+ "last_modified_at", initial_value=None, allow_external_schedulers=True
55
+ ),
56
+ items_per_page: int = items_per_page,
57
+ ) -> Iterable[TDataItem]:
58
+ """
59
+ The resource for employees, supports incremental loading and pagination.
60
+
61
+ Args:
62
+ updated_at: The saved state of the last 'last_modified_at' value.
63
+ items_per_page: The max number of items to fetch per page. Defaults to 200.
64
+
65
+ Returns:
66
+ Iterable: A generator of employees.
67
+ """
68
+
69
+ def convert_item(item: TDataItem) -> TDataItem:
70
+ """Converts an employee item."""
71
+ attributes = item.get("attributes", {})
72
+ output = {}
73
+ for value in attributes.values():
74
+ name = value["universal_id"]
75
+ if not name:
76
+ label: str = value["label"].replace(" ", "_")
77
+ name = label.lower()
78
+
79
+ if value["type"] == "date" and value["value"]:
80
+ output[name] = ensure_pendulum_datetime(value["value"])
81
+ else:
82
+ output[name] = value["value"]
83
+ return output
84
+
85
+ if updated_at.last_value:
86
+ last_value = updated_at.last_value.format("YYYY-MM-DDTHH:mm:ss")
87
+ else:
88
+ last_value = None
89
+
90
+ params = {"limit": items_per_page, "updated_since": last_value}
91
+
92
+ pages = client.get_pages("company/employees", params=params)
93
+ for page in pages:
94
+ yield [convert_item(item) for item in page]
95
+
96
+ @dlt.resource(primary_key="id", write_disposition="replace", max_table_nesting=0)
97
+ def absence_types(items_per_page: int = items_per_page) -> Iterable[TDataItem]:
98
+ """
99
+ The resource for absence types (time-off-types), supports pagination.
100
+
101
+ Args:
102
+ items_per_page: The max number of items to fetch per page. Defaults to 200.
103
+
104
+ Returns:
105
+ Iterable: A generator of absences.
106
+ """
107
+
108
+ pages = client.get_pages(
109
+ "company/time-off-types", params={"limit": items_per_page}
110
+ )
111
+
112
+ for page in pages:
113
+ yield [item.get("attributes", {}) for item in page]
114
+
115
+ @dlt.resource(primary_key="id", write_disposition="merge", max_table_nesting=0)
116
+ def absences(
117
+ updated_at: dlt.sources.incremental[
118
+ pendulum.DateTime
119
+ ] = dlt.sources.incremental(
120
+ "updated_at", initial_value=None, allow_external_schedulers=True
121
+ ),
122
+ items_per_page: int = items_per_page,
123
+ ) -> Iterable[TDataItem]:
124
+ """
125
+ The resource for absence (time-offs), supports incremental loading and pagination.
126
+
127
+ Args:
128
+ updated_at: The saved state of the last 'updated_at' value.
129
+ items_per_page: The max number of items to fetch per page. Defaults to 200.
130
+
131
+ Returns:
132
+ Iterable: A generator of absences.
133
+ """
134
+ if updated_at.last_value:
135
+ updated_iso = updated_at.last_value.format("YYYY-MM-DDTHH:mm:ss")
136
+ else:
137
+ updated_iso = None
138
+
139
+ params = {
140
+ "limit": items_per_page,
141
+ "updated_since": updated_iso,
142
+ }
143
+
144
+ def convert_item(item: TDataItem) -> TDataItem:
145
+ output = item.get("attributes", {})
146
+ output["created_at"] = ensure_pendulum_datetime(output["created_at"])
147
+ output["updated_at"] = ensure_pendulum_datetime(output["updated_at"])
148
+ return output
149
+
150
+ pages = client.get_pages(
151
+ "company/time-offs",
152
+ params=params,
153
+ offset_by_page=True,
154
+ )
155
+
156
+ for page in pages:
157
+ yield [convert_item(item) for item in page]
158
+
159
+ @dlt.resource(primary_key="id", write_disposition="merge", max_table_nesting=0)
160
+ def attendances(
161
+ start_date: TAnyDateTime = start_date,
162
+ end_date: Optional[TAnyDateTime] = end_date,
163
+ updated_at: dlt.sources.incremental[
164
+ pendulum.DateTime
165
+ ] = dlt.sources.incremental(
166
+ "updated_at", initial_value=None, allow_external_schedulers=True
167
+ ),
168
+ items_per_page: int = items_per_page,
169
+ ) -> Iterable[TDataItem]:
170
+ """
171
+ The resource for attendances, supports incremental loading and pagination.
172
+
173
+ Args:
174
+ start_date: The start date to fetch attendances from.
175
+ end_date: The end date to fetch attendances from. Defaults to now.
176
+ updated_at: The saved state of the last 'updated_at' value.
177
+ items_per_page: The max number of items to fetch per page. Defaults to 200.
178
+
179
+ Returns:
180
+ Iterable: A generator of attendances.
181
+ """
182
+
183
+ end_date = end_date or pendulum.now()
184
+ if updated_at.last_value:
185
+ updated_iso = updated_at.last_value.format("YYYY-MM-DDTHH:mm:ss")
186
+ else:
187
+ updated_iso = None
188
+
189
+ params = {
190
+ "limit": items_per_page,
191
+ "start_date": ensure_pendulum_datetime(start_date).to_date_string(),
192
+ "end_date": ensure_pendulum_datetime(end_date).to_date_string(),
193
+ "updated_from": updated_iso,
194
+ "includePending": True,
195
+ }
196
+ pages = client.get_pages(
197
+ "company/attendances",
198
+ params=params,
199
+ )
200
+
201
+ def convert_item(item: TDataItem) -> TDataItem:
202
+ """Converts an attendance item."""
203
+ output = dict(id=item["id"], **item.get("attributes"))
204
+ output["date"] = ensure_pendulum_datetime(output["date"]).date()
205
+ output["updated_at"] = ensure_pendulum_datetime(output["updated_at"])
206
+ return output
207
+
208
+ for page in pages:
209
+ yield [convert_item(item) for item in page]
210
+
211
+ @dlt.resource(primary_key="id", write_disposition="replace", max_table_nesting=0)
212
+ def projects() -> Iterable[TDataItem]:
213
+ """
214
+ The resource for projects.
215
+
216
+ Returns:
217
+ Iterable: A generator of projects.
218
+ """
219
+
220
+ pages = client.get_pages("company/attendances/projects")
221
+
222
+ def convert_item(item: TDataItem) -> TDataItem:
223
+ """Converts an attendance item."""
224
+ output = dict(id=item["id"], **item.get("attributes"))
225
+ output["created_at"] = ensure_pendulum_datetime(output["created_at"])
226
+ output["updated_at"] = ensure_pendulum_datetime(output["updated_at"])
227
+ return output
228
+
229
+ for page in pages:
230
+ yield [convert_item(item) for item in page]
231
+
232
+ @dlt.resource(primary_key="id", write_disposition="replace", max_table_nesting=0)
233
+ def document_categories() -> Iterable[TDataItem]:
234
+ """
235
+ The resource for document_categories.
236
+
237
+ Returns:
238
+ Iterable: A generator of document_categories.
239
+ """
240
+
241
+ pages = client.get_pages("company/document-categories")
242
+
243
+ def convert_item(item: TDataItem) -> TDataItem:
244
+ """Converts an document_categories item."""
245
+ output = dict(id=item["id"], **item.get("attributes"))
246
+ return output
247
+
248
+ for page in pages:
249
+ yield [convert_item(item) for item in page]
250
+
251
+ @dlt.resource(primary_key="id", write_disposition="replace", max_table_nesting=0)
252
+ def custom_reports_list() -> Iterable[TDataItem]:
253
+ """
254
+ The resource for custom_reports.
255
+
256
+ Returns:
257
+ Iterable: A generator of custom_reports.
258
+ """
259
+
260
+ pages = client.get_pages("company/custom-reports/reports")
261
+
262
+ for page in pages:
263
+ yield [item.get("attributes", {}) for item in page]
264
+
265
+ @dlt.transformer(
266
+ data_from=employees,
267
+ write_disposition="merge",
268
+ primary_key=["employee_id", "id"],
269
+ )
270
+ @dlt.defer
271
+ def employees_absences_balance(employees_item: TDataItem) -> Iterable[TDataItem]:
272
+ """
273
+ The transformer for employees_absences_balance.
274
+
275
+ Args:
276
+ employees_item: The employee data.
277
+
278
+ Returns:
279
+ Iterable: A generator of employees_absences_balance for each employee.
280
+ """
281
+ for employee in employees_item:
282
+ employee_id = employee["id"]
283
+ pages = client.get_pages(
284
+ f"company/employees/{employee_id}/absences/balance",
285
+ )
286
+
287
+ for page in pages:
288
+ yield [dict(employee_id=employee_id, **i) for i in page]
289
+
290
+ @dlt.transformer(
291
+ data_from=custom_reports_list,
292
+ write_disposition="merge",
293
+ primary_key=["report_id", "item_id"],
294
+ )
295
+ @dlt.defer
296
+ def custom_reports(
297
+ custom_reports_item: TDataItem, items_per_page: int = items_per_page
298
+ ) -> Iterable[TDataItem]:
299
+ """
300
+ The transformer for custom reports, supports pagination.
301
+
302
+ Args:
303
+ custom_reports_item: The custom_report data.
304
+ items_per_page: The max number of items to fetch per page. Defaults to 200.
305
+
306
+ Returns:
307
+ Iterable: A generator of employees_absences_balance for each employee.
308
+ """
309
+
310
+ def convert_item(item: TDataItem, report_id: str) -> TDataItem:
311
+ """Converts an employee item."""
312
+ attributes = item.pop("attributes")
313
+ output = dict(report_id=report_id, item_id=list(item.values())[0])
314
+ for value in attributes:
315
+ name = value["attribute_id"]
316
+ if value["data_type"] == "date" and value["value"]:
317
+ output[name] = ensure_pendulum_datetime(value["value"])
318
+ else:
319
+ output[name] = value["value"]
320
+ return output
321
+
322
+ for custom_report in custom_reports_item:
323
+ report_id = custom_report["id"]
324
+ pages = client.get_pages(
325
+ f"company/custom-reports/reports/{report_id}",
326
+ params={"limit": items_per_page},
327
+ offset_by_page=True,
328
+ )
329
+
330
+ for page in pages:
331
+ for report in page:
332
+ report_items = report.get("attributes", {}).get("items", [])
333
+ yield [convert_item(item, report_id) for item in report_items]
334
+
335
+ return (
336
+ employees,
337
+ absence_types,
338
+ absences,
339
+ attendances,
340
+ projects,
341
+ document_categories,
342
+ employees_absences_balance,
343
+ custom_reports_list,
344
+ custom_reports,
345
+ )
@@ -0,0 +1,100 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Personio source helpers"""
16
+
17
+ from typing import Any, Iterable, Optional
18
+ from urllib.parse import urljoin
19
+
20
+ from dlt.common.typing import Dict, TDataItems
21
+ from dlt.sources.helpers import requests
22
+
23
+
24
+ class PersonioAPI:
25
+ """A Personio API client."""
26
+
27
+ base_url = "https://api.personio.de/v1/"
28
+
29
+ def __init__(self, client_id: str, client_secret: str) -> None:
30
+ """
31
+ Args:
32
+ client_id: The client ID of your app.
33
+ client_secret: The client secret of your app.
34
+ """
35
+ self.client_id = client_id
36
+ self.client_secret = client_secret
37
+ self.access_token = self.get_token()
38
+
39
+ def get_token(self) -> str:
40
+ """Get an access token from Personio.
41
+
42
+ Returns:
43
+ The access token.
44
+ """
45
+ headers = {"Content-Type": "application/json", "Accept": "application/json"}
46
+ data = {"client_id": self.client_id, "client_secret": self.client_secret}
47
+ url = urljoin(self.base_url, "auth")
48
+ response = requests.request("POST", url, headers=headers, json=data)
49
+ json_response = response.json()
50
+ token: str = json_response["data"]["token"]
51
+ return token
52
+
53
+ def get_pages(
54
+ self,
55
+ resource: str,
56
+ params: Optional[Dict[str, Any]] = None,
57
+ offset_by_page: bool = False,
58
+ ) -> Iterable[TDataItems]:
59
+ """Get all pages from Personio using requests.
60
+
61
+ Args:
62
+ resource: The resource to get pages for (e.g. employees, absences, attendances).
63
+ params: The parameters for the resource.
64
+ offset_by_page (bool): If True, offset increases by 1 per page; else, increases by page_size.
65
+
66
+ Yields:
67
+ List of data items from the page
68
+ """
69
+ params = params or {}
70
+ headers = {"Authorization": f"Bearer {self.access_token}"}
71
+ params.update({"offset": int(offset_by_page), "page": int(offset_by_page)})
72
+ url = urljoin(self.base_url, resource)
73
+ starts_from_zero = False
74
+ while True:
75
+ response = requests.get(url, headers=headers, params=params)
76
+ json_response = response.json()
77
+ # Get an item list from the page
78
+ yield json_response["data"]
79
+
80
+ metadata = json_response.get("metadata")
81
+ if not metadata:
82
+ break
83
+
84
+ total_pages = metadata.get("total_pages")
85
+ current_page = metadata.get("current_page")
86
+ if current_page == 0:
87
+ starts_from_zero = True
88
+
89
+ if (
90
+ current_page >= (total_pages - int(starts_from_zero))
91
+ or not json_response["data"]
92
+ ):
93
+ break
94
+
95
+ if offset_by_page:
96
+ params["offset"] += 1
97
+ params["page"] += 1
98
+ else:
99
+ params["offset"] += params["limit"]
100
+ params["page"] += 1
@@ -0,0 +1,65 @@
1
+ from typing import Iterable, Optional
2
+
3
+ import dlt
4
+ import pendulum
5
+ import requests
6
+ from dlt.common.typing import TAnyDateTime, TDataItem
7
+ from dlt.sources import DltResource
8
+ from dlt.sources.helpers.requests import Client
9
+
10
+ from omniload.src.phantombuster.client import PhantombusterClient
11
+
12
+
13
+ def retry_on_limit(
14
+ response: Optional[requests.Response], exception: Optional[BaseException]
15
+ ) -> bool:
16
+ if response is not None and response.status_code == 429:
17
+ return True
18
+ return False
19
+
20
+
21
+ def create_client() -> requests.Session:
22
+ return Client(
23
+ raise_for_status=False,
24
+ retry_condition=retry_on_limit,
25
+ request_max_attempts=12,
26
+ request_backoff_factor=2,
27
+ ).session
28
+
29
+
30
+ @dlt.source(max_table_nesting=0)
31
+ def phantombuster_source(
32
+ api_key: str, agent_id: str, start_date: TAnyDateTime, end_date: TAnyDateTime | None
33
+ ) -> Iterable[DltResource]:
34
+ client = PhantombusterClient(api_key)
35
+
36
+ @dlt.resource(
37
+ write_disposition="merge",
38
+ primary_key="container_id",
39
+ columns={
40
+ "partition_dt": {"data_type": "date", "partition": True},
41
+ },
42
+ )
43
+ def completed_phantoms(
44
+ dateTime=(
45
+ dlt.sources.incremental(
46
+ "ended_at",
47
+ initial_value=start_date,
48
+ end_value=end_date,
49
+ range_start="closed",
50
+ range_end="closed",
51
+ )
52
+ ),
53
+ ) -> Iterable[TDataItem]:
54
+ if dateTime.end_value is None:
55
+ end_dt = pendulum.now(tz="UTC")
56
+ else:
57
+ end_dt = dateTime.end_value
58
+
59
+ start_dt = dateTime.last_value
60
+
61
+ yield client.fetch_containers_result(
62
+ create_client(), agent_id, start_date=start_dt, end_date=end_dt
63
+ )
64
+
65
+ return completed_phantoms
@@ -0,0 +1,87 @@
1
+ from typing import Union
2
+
3
+ import pendulum
4
+ import requests
5
+
6
+
7
+ class PhantombusterClient:
8
+ def __init__(self, api_key: str):
9
+ self.api_key = api_key
10
+
11
+ def _get_headers(self):
12
+ return {
13
+ "X-Phantombuster-Key-1": self.api_key,
14
+ "accept": "application/json",
15
+ }
16
+
17
+ def fetch_containers_result(
18
+ self,
19
+ session: requests.Session,
20
+ agent_id: str,
21
+ start_date: pendulum.DateTime,
22
+ end_date: pendulum.DateTime,
23
+ ):
24
+ url = "https://api.phantombuster.com/api/v2/containers/fetch-all/"
25
+ before_ended_at = None
26
+ limit = 100
27
+
28
+ started_at = start_date.int_timestamp * 1000 + int(
29
+ start_date.microsecond / 1000
30
+ )
31
+ ended_at = end_date.int_timestamp * 1000 + int(end_date.microsecond / 1000)
32
+
33
+ while True:
34
+ params: dict[str, Union[str, int, float, bytes, None]] = {
35
+ "agentId": agent_id,
36
+ "limit": limit,
37
+ "mode": "finalized",
38
+ }
39
+
40
+ if before_ended_at:
41
+ params["beforeEndedAt"] = before_ended_at
42
+
43
+ response = session.get(url=url, headers=self._get_headers(), params=params)
44
+ data = response.json()
45
+ containers = data.get("containers", [])
46
+
47
+ for container in containers:
48
+ container_ended_at = container.get("endedAt")
49
+
50
+ if before_ended_at is None or before_ended_at > container_ended_at:
51
+ before_ended_at = container_ended_at
52
+
53
+ if container_ended_at < started_at or container_ended_at > ended_at:
54
+ continue
55
+
56
+ try:
57
+ result = self.fetch_result_object(session, container["id"])
58
+ partition_dt = pendulum.from_timestamp(
59
+ container_ended_at / 1000, tz="UTC"
60
+ ).date()
61
+ container_ended_at_datetime = pendulum.from_timestamp(
62
+ container_ended_at / 1000, tz="UTC"
63
+ )
64
+ row = {
65
+ "container_id": container["id"],
66
+ "container": container,
67
+ "result": result,
68
+ "partition_dt": partition_dt,
69
+ "ended_at": container_ended_at_datetime,
70
+ }
71
+ yield row
72
+
73
+ except requests.RequestException as e:
74
+ print(f"Error fetching result for container {container['id']}: {e}")
75
+
76
+ if data["maxLimitReached"] is False:
77
+ break
78
+
79
+ def fetch_result_object(self, session: requests.Session, container_id: str):
80
+ result_url = (
81
+ "https://api.phantombuster.com/api/v2/containers/fetch-result-object"
82
+ )
83
+ params = {"id": container_id}
84
+ response = session.get(result_url, headers=self._get_headers(), params=params)
85
+ response.raise_for_status()
86
+
87
+ return response.json()
@@ -0,0 +1,82 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import pendulum
5
+ from dlt.common.time import ensure_pendulum_datetime
6
+ from dlt.common.typing import TDataItem
7
+ from dlt.sources import DltResource
8
+ from dlt.sources.helpers import requests
9
+
10
+
11
+ @dlt.source(name="pinterest", max_table_nesting=0)
12
+ def pinterest_source(
13
+ start_date: pendulum.DateTime,
14
+ access_token: str,
15
+ page_size: int = 200,
16
+ end_date: pendulum.DateTime | None = None,
17
+ ) -> Iterable[DltResource]:
18
+ session = requests.Session()
19
+ session.headers.update({"Authorization": f"Bearer {access_token}"})
20
+ base_url = "https://api.pinterest.com/v5"
21
+
22
+ def fetch_data(
23
+ endpoint: str,
24
+ start_dt: pendulum.DateTime,
25
+ end_dt: pendulum.DateTime,
26
+ ) -> Iterable[TDataItem]:
27
+ url = f"{base_url}/{endpoint}"
28
+ params = {"page_size": page_size}
29
+ bookmark = None
30
+ while True:
31
+ if bookmark:
32
+ params["bookmark"] = bookmark
33
+
34
+ resp = session.get(url, params=params)
35
+ resp.raise_for_status()
36
+ data = resp.json()
37
+ items = data.get("items") or []
38
+
39
+ for item in items:
40
+ item_created = ensure_pendulum_datetime(item["created_at"])
41
+ if item_created <= start_dt:
42
+ continue
43
+ if item_created > end_dt:
44
+ continue
45
+ item["created_at"] = item_created
46
+ yield item
47
+
48
+ bookmark = data.get("bookmark")
49
+ if not bookmark:
50
+ break
51
+
52
+ @dlt.resource(write_disposition="merge", primary_key="id")
53
+ def pins(
54
+ datetime=dlt.sources.incremental(
55
+ "created_at",
56
+ initial_value=start_date,
57
+ end_value=end_date,
58
+ ),
59
+ ) -> Iterable[TDataItem]:
60
+ _start_date = datetime.last_value or start_date
61
+ if end_date is None:
62
+ _end_date = pendulum.now("UTC")
63
+ else:
64
+ _end_date = datetime.end_value
65
+ yield from fetch_data("pins", _start_date, _end_date)
66
+
67
+ @dlt.resource(write_disposition="merge", primary_key="id")
68
+ def boards(
69
+ datetime=dlt.sources.incremental(
70
+ "created_at",
71
+ initial_value=start_date,
72
+ end_value=end_date,
73
+ ),
74
+ ) -> Iterable[TDataItem]:
75
+ _start_date = datetime.last_value or start_date
76
+ if end_date is None:
77
+ _end_date = pendulum.now("UTC")
78
+ else:
79
+ _end_date = datetime.end_value
80
+ yield from fetch_data("boards", _start_date, _end_date)
81
+
82
+ return pins, boards