ingestr 0.13.9__py3-none-any.whl → 0.13.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/applovin/__init__.py +8 -7
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/factory.py +4 -0
- ingestr/src/personio/__init__.py +331 -0
- ingestr/src/personio/helpers.py +85 -0
- ingestr/src/salesforce/__init__.py +149 -0
- ingestr/src/salesforce/helpers.py +64 -0
- ingestr/src/sources.py +118 -5
- {ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/METADATA +27 -16
- {ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/RECORD +13 -9
- {ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/WHEEL +0 -0
- {ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.9.dist-info → ingestr-0.13.11.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/applovin/__init__.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from datetime import datetime,
|
|
1
|
+
from datetime import datetime, timedelta, timezone
|
|
2
2
|
from enum import Enum
|
|
3
3
|
from typing import Dict, List, Optional
|
|
4
|
-
from requests import Response
|
|
5
4
|
|
|
6
5
|
import dlt
|
|
7
6
|
from dlt.sources.rest_api import EndpointResource, RESTAPIConfig, rest_api_resources
|
|
7
|
+
from requests import Response
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class InvalidCustomReportError(Exception):
|
|
@@ -13,9 +13,11 @@ class InvalidCustomReportError(Exception):
|
|
|
13
13
|
"Custom report should be in the format 'custom:{endpoint}:{report_type}:{dimensions}"
|
|
14
14
|
)
|
|
15
15
|
|
|
16
|
+
|
|
16
17
|
class ClientError(Exception):
|
|
17
18
|
pass
|
|
18
19
|
|
|
20
|
+
|
|
19
21
|
TYPE_HINTS = {
|
|
20
22
|
"application_is_hidden": {"data_type": "bool"},
|
|
21
23
|
"average_cpa": {"data_type": "double"},
|
|
@@ -119,7 +121,6 @@ def applovin_source(
|
|
|
119
121
|
end_date: Optional[str],
|
|
120
122
|
custom: Optional[str],
|
|
121
123
|
):
|
|
122
|
-
|
|
123
124
|
backfill = False
|
|
124
125
|
if end_date is None:
|
|
125
126
|
backfill = True
|
|
@@ -127,7 +128,7 @@ def applovin_source(
|
|
|
127
128
|
# use the greatest of yesterday and start_date
|
|
128
129
|
end_date = max(
|
|
129
130
|
datetime.now(timezone.utc) - timedelta(days=1),
|
|
130
|
-
datetime.fromisoformat(start_date).replace(tzinfo=timezone.utc)
|
|
131
|
+
datetime.fromisoformat(start_date).replace(tzinfo=timezone.utc),
|
|
131
132
|
).strftime("%Y-%m-%d")
|
|
132
133
|
|
|
133
134
|
config: RESTAPIConfig = {
|
|
@@ -157,7 +158,7 @@ def applovin_source(
|
|
|
157
158
|
"paginator": "single_page",
|
|
158
159
|
"response_actions": [
|
|
159
160
|
http_error_handler,
|
|
160
|
-
]
|
|
161
|
+
],
|
|
161
162
|
},
|
|
162
163
|
},
|
|
163
164
|
"resources": [
|
|
@@ -177,8 +178,7 @@ def applovin_source(
|
|
|
177
178
|
"advertiser-probabilistic-report",
|
|
178
179
|
"probabilisticReport",
|
|
179
180
|
exclude(
|
|
180
|
-
REPORT_SCHEMA[ReportType.ADVERTISER],
|
|
181
|
-
PROBABILISTIC_REPORT_EXCLUDE
|
|
181
|
+
REPORT_SCHEMA[ReportType.ADVERTISER], PROBABILISTIC_REPORT_EXCLUDE
|
|
182
182
|
),
|
|
183
183
|
ReportType.ADVERTISER,
|
|
184
184
|
),
|
|
@@ -256,6 +256,7 @@ def exclude(source: List[str], exclude_list: List[str]) -> List[str]:
|
|
|
256
256
|
def build_type_hints(cols: List[str]) -> dict:
|
|
257
257
|
return {col: TYPE_HINTS[col] for col in cols if col in TYPE_HINTS}
|
|
258
258
|
|
|
259
|
+
|
|
259
260
|
def http_error_handler(resp: Response):
|
|
260
261
|
if not resp.ok:
|
|
261
262
|
raise ClientError(f"HTTP Status {resp.status_code}: {resp.text}")
|
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.11"
|
ingestr/src/factory.py
CHANGED
|
@@ -42,12 +42,14 @@ from ingestr.src.sources import (
|
|
|
42
42
|
MongoDbSource,
|
|
43
43
|
NotionSource,
|
|
44
44
|
S3Source,
|
|
45
|
+
SalesforceSource,
|
|
45
46
|
ShopifySource,
|
|
46
47
|
SlackSource,
|
|
47
48
|
SqlSource,
|
|
48
49
|
StripeAnalyticsSource,
|
|
49
50
|
TikTokSource,
|
|
50
51
|
ZendeskSource,
|
|
52
|
+
PersonioSource,
|
|
51
53
|
)
|
|
52
54
|
|
|
53
55
|
SQL_SOURCE_SCHEMES = [
|
|
@@ -136,6 +138,8 @@ class SourceDestinationFactory:
|
|
|
136
138
|
"linkedinads": LinkedInAdsSource,
|
|
137
139
|
"applovin": AppLovinSource,
|
|
138
140
|
"applovinmax": ApplovinMaxSource,
|
|
141
|
+
"salesforce": SalesforceSource,
|
|
142
|
+
"personio": PersonioSource,
|
|
139
143
|
}
|
|
140
144
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
141
145
|
"bigquery": BigQueryDestination,
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
"""Fetches Personio Employees, Absences, Attendances."""
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, Optional
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
from dlt.common import pendulum
|
|
7
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
8
|
+
from dlt.common.typing import TAnyDateTime, TDataItem
|
|
9
|
+
from dlt.sources import DltResource
|
|
10
|
+
|
|
11
|
+
from .helpers import PersonioAPI
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dlt.source(name="personio", max_table_nesting=0)
|
|
15
|
+
def personio_source(
|
|
16
|
+
start_date: TAnyDateTime,
|
|
17
|
+
end_date: Optional[TAnyDateTime] = None,
|
|
18
|
+
client_id: str = dlt.secrets.value,
|
|
19
|
+
client_secret: str = dlt.secrets.value,
|
|
20
|
+
items_per_page: int = 200,
|
|
21
|
+
) -> Iterable[DltResource]:
|
|
22
|
+
"""
|
|
23
|
+
The source for the Personio pipeline. Available resources are employees, absences, and attendances.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
client_id: The client ID of your app.
|
|
27
|
+
client_secret: The client secret of your app.
|
|
28
|
+
items_per_page: The max number of items to fetch per page. Defaults to 200.
|
|
29
|
+
Returns:
|
|
30
|
+
Iterable: A list of DltResource objects representing the data resources.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
client = PersonioAPI(client_id, client_secret)
|
|
34
|
+
|
|
35
|
+
@dlt.resource(primary_key="id", write_disposition="merge", max_table_nesting=0)
|
|
36
|
+
def employees(
|
|
37
|
+
updated_at: dlt.sources.incremental[
|
|
38
|
+
pendulum.DateTime
|
|
39
|
+
] = dlt.sources.incremental(
|
|
40
|
+
"last_modified_at", initial_value=None, allow_external_schedulers=True
|
|
41
|
+
),
|
|
42
|
+
items_per_page: int = items_per_page,
|
|
43
|
+
) -> Iterable[TDataItem]:
|
|
44
|
+
"""
|
|
45
|
+
The resource for employees, supports incremental loading and pagination.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
updated_at: The saved state of the last 'last_modified_at' value.
|
|
49
|
+
items_per_page: The max number of items to fetch per page. Defaults to 200.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Iterable: A generator of employees.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def convert_item(item: TDataItem) -> TDataItem:
|
|
56
|
+
"""Converts an employee item."""
|
|
57
|
+
attributes = item.get("attributes", {})
|
|
58
|
+
output = {}
|
|
59
|
+
for value in attributes.values():
|
|
60
|
+
name = value["universal_id"]
|
|
61
|
+
if not name:
|
|
62
|
+
label: str = value["label"].replace(" ", "_")
|
|
63
|
+
name = label.lower()
|
|
64
|
+
|
|
65
|
+
if value["type"] == "date" and value["value"]:
|
|
66
|
+
output[name] = ensure_pendulum_datetime(value["value"])
|
|
67
|
+
else:
|
|
68
|
+
output[name] = value["value"]
|
|
69
|
+
return output
|
|
70
|
+
|
|
71
|
+
if updated_at.last_value:
|
|
72
|
+
last_value = updated_at.last_value.format("YYYY-MM-DDTHH:mm:ss")
|
|
73
|
+
else:
|
|
74
|
+
last_value = None
|
|
75
|
+
|
|
76
|
+
params = {"limit": items_per_page, "updated_since": last_value}
|
|
77
|
+
|
|
78
|
+
pages = client.get_pages("company/employees", params=params)
|
|
79
|
+
for page in pages:
|
|
80
|
+
yield [convert_item(item) for item in page]
|
|
81
|
+
|
|
82
|
+
@dlt.resource(primary_key="id", write_disposition="replace", max_table_nesting=0)
|
|
83
|
+
def absence_types(items_per_page: int = items_per_page) -> Iterable[TDataItem]:
|
|
84
|
+
"""
|
|
85
|
+
The resource for absence types (time-off-types), supports pagination.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
items_per_page: The max number of items to fetch per page. Defaults to 200.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Iterable: A generator of absences.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
pages = client.get_pages(
|
|
95
|
+
"company/time-off-types", params={"limit": items_per_page}
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
for page in pages:
|
|
99
|
+
yield [item.get("attributes", {}) for item in page]
|
|
100
|
+
|
|
101
|
+
@dlt.resource(primary_key="id", write_disposition="merge", max_table_nesting=0)
|
|
102
|
+
def absences(
|
|
103
|
+
updated_at: dlt.sources.incremental[
|
|
104
|
+
pendulum.DateTime
|
|
105
|
+
] = dlt.sources.incremental(
|
|
106
|
+
"updated_at", initial_value=None, allow_external_schedulers=True
|
|
107
|
+
),
|
|
108
|
+
items_per_page: int = items_per_page,
|
|
109
|
+
) -> Iterable[TDataItem]:
|
|
110
|
+
"""
|
|
111
|
+
The resource for absence (time-offs), supports incremental loading and pagination.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
updated_at: The saved state of the last 'updated_at' value.
|
|
115
|
+
items_per_page: The max number of items to fetch per page. Defaults to 200.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Iterable: A generator of absences.
|
|
119
|
+
"""
|
|
120
|
+
if updated_at.last_value:
|
|
121
|
+
updated_iso = updated_at.last_value.format("YYYY-MM-DDTHH:mm:ss")
|
|
122
|
+
else:
|
|
123
|
+
updated_iso = None
|
|
124
|
+
|
|
125
|
+
params = {
|
|
126
|
+
"limit": items_per_page,
|
|
127
|
+
"updated_since": updated_iso,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
def convert_item(item: TDataItem) -> TDataItem:
|
|
131
|
+
output = item.get("attributes", {})
|
|
132
|
+
output["created_at"] = ensure_pendulum_datetime(output["created_at"])
|
|
133
|
+
output["updated_at"] = ensure_pendulum_datetime(output["updated_at"])
|
|
134
|
+
return output
|
|
135
|
+
|
|
136
|
+
pages = client.get_pages(
|
|
137
|
+
"company/time-offs",
|
|
138
|
+
params=params,
|
|
139
|
+
offset_by_page=True,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
for page in pages:
|
|
143
|
+
yield [convert_item(item) for item in page]
|
|
144
|
+
|
|
145
|
+
@dlt.resource(primary_key="id", write_disposition="merge", max_table_nesting=0)
|
|
146
|
+
def attendances(
|
|
147
|
+
start_date: TAnyDateTime = start_date,
|
|
148
|
+
end_date: Optional[TAnyDateTime] = end_date,
|
|
149
|
+
updated_at: dlt.sources.incremental[
|
|
150
|
+
pendulum.DateTime
|
|
151
|
+
] = dlt.sources.incremental(
|
|
152
|
+
"updated_at", initial_value=None, allow_external_schedulers=True
|
|
153
|
+
),
|
|
154
|
+
items_per_page: int = items_per_page,
|
|
155
|
+
) -> Iterable[TDataItem]:
|
|
156
|
+
"""
|
|
157
|
+
The resource for attendances, supports incremental loading and pagination.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
start_date: The start date to fetch attendances from.
|
|
161
|
+
end_date: The end date to fetch attendances from. Defaults to now.
|
|
162
|
+
updated_at: The saved state of the last 'updated_at' value.
|
|
163
|
+
items_per_page: The max number of items to fetch per page. Defaults to 200.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Iterable: A generator of attendances.
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
end_date = end_date or pendulum.now()
|
|
170
|
+
if updated_at.last_value:
|
|
171
|
+
updated_iso = updated_at.last_value.format("YYYY-MM-DDTHH:mm:ss")
|
|
172
|
+
else:
|
|
173
|
+
updated_iso = None
|
|
174
|
+
|
|
175
|
+
params = {
|
|
176
|
+
"limit": items_per_page,
|
|
177
|
+
"start_date": ensure_pendulum_datetime(start_date).to_date_string(),
|
|
178
|
+
"end_date": ensure_pendulum_datetime(end_date).to_date_string(),
|
|
179
|
+
"updated_from": updated_iso,
|
|
180
|
+
"includePending": True,
|
|
181
|
+
}
|
|
182
|
+
pages = client.get_pages(
|
|
183
|
+
"company/attendances",
|
|
184
|
+
params=params,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
def convert_item(item: TDataItem) -> TDataItem:
|
|
188
|
+
"""Converts an attendance item."""
|
|
189
|
+
output = dict(id=item["id"], **item.get("attributes"))
|
|
190
|
+
output["date"] = ensure_pendulum_datetime(output["date"]).date()
|
|
191
|
+
output["updated_at"] = ensure_pendulum_datetime(output["updated_at"])
|
|
192
|
+
return output
|
|
193
|
+
|
|
194
|
+
for page in pages:
|
|
195
|
+
yield [convert_item(item) for item in page]
|
|
196
|
+
|
|
197
|
+
@dlt.resource(primary_key="id", write_disposition="replace", max_table_nesting=0)
|
|
198
|
+
def projects() -> Iterable[TDataItem]:
|
|
199
|
+
"""
|
|
200
|
+
The resource for projects.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
Iterable: A generator of projects.
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
pages = client.get_pages("company/attendances/projects")
|
|
207
|
+
|
|
208
|
+
def convert_item(item: TDataItem) -> TDataItem:
|
|
209
|
+
"""Converts an attendance item."""
|
|
210
|
+
output = dict(id=item["id"], **item.get("attributes"))
|
|
211
|
+
output["created_at"] = ensure_pendulum_datetime(output["created_at"])
|
|
212
|
+
output["updated_at"] = ensure_pendulum_datetime(output["updated_at"])
|
|
213
|
+
return output
|
|
214
|
+
|
|
215
|
+
for page in pages:
|
|
216
|
+
yield [convert_item(item) for item in page]
|
|
217
|
+
|
|
218
|
+
@dlt.resource(primary_key="id", write_disposition="replace", max_table_nesting=0)
|
|
219
|
+
def document_categories() -> Iterable[TDataItem]:
|
|
220
|
+
"""
|
|
221
|
+
The resource for document_categories.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
Iterable: A generator of document_categories.
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
pages = client.get_pages("company/document-categories")
|
|
228
|
+
|
|
229
|
+
def convert_item(item: TDataItem) -> TDataItem:
|
|
230
|
+
"""Converts an document_categories item."""
|
|
231
|
+
output = dict(id=item["id"], **item.get("attributes"))
|
|
232
|
+
return output
|
|
233
|
+
|
|
234
|
+
for page in pages:
|
|
235
|
+
yield [convert_item(item) for item in page]
|
|
236
|
+
|
|
237
|
+
@dlt.resource(primary_key="id", write_disposition="replace", max_table_nesting=0)
|
|
238
|
+
def custom_reports_list() -> Iterable[TDataItem]:
|
|
239
|
+
"""
|
|
240
|
+
The resource for custom_reports.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Iterable: A generator of custom_reports.
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
pages = client.get_pages("company/custom-reports/reports")
|
|
247
|
+
|
|
248
|
+
for page in pages:
|
|
249
|
+
yield [item.get("attributes", {}) for item in page]
|
|
250
|
+
|
|
251
|
+
@dlt.transformer(
|
|
252
|
+
data_from=employees,
|
|
253
|
+
write_disposition="merge",
|
|
254
|
+
primary_key=["employee_id", "id"],
|
|
255
|
+
)
|
|
256
|
+
@dlt.defer
|
|
257
|
+
def employees_absences_balance(employees_item: TDataItem) -> Iterable[TDataItem]:
|
|
258
|
+
"""
|
|
259
|
+
The transformer for employees_absences_balance.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
employees_item: The employee data.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
Iterable: A generator of employees_absences_balance for each employee.
|
|
266
|
+
"""
|
|
267
|
+
for employee in employees_item:
|
|
268
|
+
employee_id = employee["id"]
|
|
269
|
+
pages = client.get_pages(
|
|
270
|
+
f"company/employees/{employee_id}/absences/balance",
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
for page in pages:
|
|
274
|
+
yield [dict(employee_id=employee_id, **i) for i in page]
|
|
275
|
+
|
|
276
|
+
@dlt.transformer(
|
|
277
|
+
data_from=custom_reports_list,
|
|
278
|
+
write_disposition="merge",
|
|
279
|
+
primary_key=["report_id", "item_id"],
|
|
280
|
+
)
|
|
281
|
+
@dlt.defer
|
|
282
|
+
def custom_reports(
|
|
283
|
+
custom_reports_item: TDataItem, items_per_page: int = items_per_page
|
|
284
|
+
) -> Iterable[TDataItem]:
|
|
285
|
+
"""
|
|
286
|
+
The transformer for custom reports, supports pagination.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
custom_reports_item: The custom_report data.
|
|
290
|
+
items_per_page: The max number of items to fetch per page. Defaults to 200.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
Iterable: A generator of employees_absences_balance for each employee.
|
|
294
|
+
"""
|
|
295
|
+
|
|
296
|
+
def convert_item(item: TDataItem, report_id: str) -> TDataItem:
|
|
297
|
+
"""Converts an employee item."""
|
|
298
|
+
attributes = item.pop("attributes")
|
|
299
|
+
output = dict(report_id=report_id, item_id=list(item.values())[0])
|
|
300
|
+
for value in attributes:
|
|
301
|
+
name = value["attribute_id"]
|
|
302
|
+
if value["data_type"] == "date" and value["value"]:
|
|
303
|
+
output[name] = ensure_pendulum_datetime(value["value"])
|
|
304
|
+
else:
|
|
305
|
+
output[name] = value["value"]
|
|
306
|
+
return output
|
|
307
|
+
|
|
308
|
+
for custom_report in custom_reports_item:
|
|
309
|
+
report_id = custom_report["id"]
|
|
310
|
+
pages = client.get_pages(
|
|
311
|
+
f"company/custom-reports/reports/{report_id}",
|
|
312
|
+
params={"limit": items_per_page},
|
|
313
|
+
offset_by_page=True,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
for page in pages:
|
|
317
|
+
for report in page:
|
|
318
|
+
report_items = report.get("attributes", {}).get("items", [])
|
|
319
|
+
yield [convert_item(item, report_id) for item in report_items]
|
|
320
|
+
|
|
321
|
+
return (
|
|
322
|
+
employees,
|
|
323
|
+
absence_types,
|
|
324
|
+
absences,
|
|
325
|
+
attendances,
|
|
326
|
+
projects,
|
|
327
|
+
document_categories,
|
|
328
|
+
employees_absences_balance,
|
|
329
|
+
custom_reports_list,
|
|
330
|
+
custom_reports,
|
|
331
|
+
)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Personio source helpers"""
|
|
2
|
+
from typing import Any, Iterable, Optional
|
|
3
|
+
from urllib.parse import urljoin
|
|
4
|
+
|
|
5
|
+
from dlt.common.typing import Dict, TDataItems
|
|
6
|
+
from dlt.sources.helpers import requests
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PersonioAPI:
|
|
10
|
+
"""A Personio API client."""
|
|
11
|
+
|
|
12
|
+
base_url = "https://api.personio.de/v1/"
|
|
13
|
+
|
|
14
|
+
def __init__(self, client_id: str, client_secret: str) -> None:
|
|
15
|
+
"""
|
|
16
|
+
Args:
|
|
17
|
+
client_id: The client ID of your app.
|
|
18
|
+
client_secret: The client secret of your app.
|
|
19
|
+
"""
|
|
20
|
+
self.client_id = client_id
|
|
21
|
+
self.client_secret = client_secret
|
|
22
|
+
self.access_token = self.get_token()
|
|
23
|
+
|
|
24
|
+
def get_token(self) -> str:
|
|
25
|
+
"""Get an access token from Personio.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
The access token.
|
|
29
|
+
"""
|
|
30
|
+
headers = {"Content-Type": "application/json", "Accept": "application/json"}
|
|
31
|
+
data = {"client_id": self.client_id, "client_secret": self.client_secret}
|
|
32
|
+
url = urljoin(self.base_url, "auth")
|
|
33
|
+
response = requests.request("POST", url, headers=headers, json=data)
|
|
34
|
+
json_response = response.json()
|
|
35
|
+
token: str = json_response["data"]["token"]
|
|
36
|
+
return token
|
|
37
|
+
|
|
38
|
+
def get_pages(
|
|
39
|
+
self,
|
|
40
|
+
resource: str,
|
|
41
|
+
params: Optional[Dict[str, Any]] = None,
|
|
42
|
+
offset_by_page: bool = False,
|
|
43
|
+
) -> Iterable[TDataItems]:
|
|
44
|
+
"""Get all pages from Personio using requests.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
resource: The resource to get pages for (e.g. employees, absences, attendances).
|
|
48
|
+
params: The parameters for the resource.
|
|
49
|
+
offset_by_page (bool): If True, offset increases by 1 per page; else, increases by page_size.
|
|
50
|
+
|
|
51
|
+
Yields:
|
|
52
|
+
List of data items from the page
|
|
53
|
+
"""
|
|
54
|
+
params = params or {}
|
|
55
|
+
headers = {"Authorization": f"Bearer {self.access_token}"}
|
|
56
|
+
params.update({"offset": int(offset_by_page), "page": int(offset_by_page)})
|
|
57
|
+
url = urljoin(self.base_url, resource)
|
|
58
|
+
starts_from_zero = False
|
|
59
|
+
while True:
|
|
60
|
+
response = requests.get(url, headers=headers, params=params)
|
|
61
|
+
json_response = response.json()
|
|
62
|
+
# Get an item list from the page
|
|
63
|
+
yield json_response["data"]
|
|
64
|
+
|
|
65
|
+
metadata = json_response.get("metadata")
|
|
66
|
+
if not metadata:
|
|
67
|
+
break
|
|
68
|
+
|
|
69
|
+
total_pages = metadata.get("total_pages")
|
|
70
|
+
current_page = metadata.get("current_page")
|
|
71
|
+
if current_page == 0:
|
|
72
|
+
starts_from_zero = True
|
|
73
|
+
|
|
74
|
+
if (
|
|
75
|
+
current_page >= (total_pages - int(starts_from_zero))
|
|
76
|
+
or not json_response["data"]
|
|
77
|
+
):
|
|
78
|
+
break
|
|
79
|
+
|
|
80
|
+
if offset_by_page:
|
|
81
|
+
params["offset"] += 1
|
|
82
|
+
params["page"] += 1
|
|
83
|
+
else:
|
|
84
|
+
params["offset"] += params["limit"]
|
|
85
|
+
params["page"] += 1
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
from dlt.common.typing import TDataItem
|
|
5
|
+
from dlt.sources import DltResource, incremental
|
|
6
|
+
from simple_salesforce import Salesforce
|
|
7
|
+
|
|
8
|
+
from .helpers import get_records
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dlt.source(name="salesforce")
|
|
12
|
+
def salesforce_source(
|
|
13
|
+
username: str,
|
|
14
|
+
password: str,
|
|
15
|
+
token: str,
|
|
16
|
+
) -> Iterable[DltResource]:
|
|
17
|
+
"""
|
|
18
|
+
Retrieves data from Salesforce using the Salesforce API.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
username (str): The username for authentication.
|
|
22
|
+
password (str): The password for authentication.
|
|
23
|
+
token (str): The security token for authentication.
|
|
24
|
+
|
|
25
|
+
Yields:
|
|
26
|
+
DltResource: Data resources from Salesforce.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
client = Salesforce(username, password, token)
|
|
30
|
+
|
|
31
|
+
# define resources
|
|
32
|
+
@dlt.resource(write_disposition="replace")
|
|
33
|
+
def user() -> Iterable[TDataItem]:
|
|
34
|
+
yield get_records(client, "User")
|
|
35
|
+
|
|
36
|
+
@dlt.resource(write_disposition="replace")
|
|
37
|
+
def user_role() -> Iterable[TDataItem]:
|
|
38
|
+
yield get_records(client, "UserRole")
|
|
39
|
+
|
|
40
|
+
@dlt.resource(write_disposition="merge")
|
|
41
|
+
def opportunity(
|
|
42
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
43
|
+
"SystemModstamp", initial_value=None
|
|
44
|
+
),
|
|
45
|
+
) -> Iterable[TDataItem]:
|
|
46
|
+
yield get_records(
|
|
47
|
+
client, "Opportunity", last_timestamp.last_value, "SystemModstamp"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
@dlt.resource(write_disposition="merge")
|
|
51
|
+
def opportunity_line_item(
|
|
52
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
53
|
+
"SystemModstamp", initial_value=None
|
|
54
|
+
),
|
|
55
|
+
) -> Iterable[TDataItem]:
|
|
56
|
+
yield get_records(
|
|
57
|
+
client, "OpportunityLineItem", last_timestamp.last_value, "SystemModstamp"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
@dlt.resource(write_disposition="merge")
|
|
61
|
+
def opportunity_contact_role(
|
|
62
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
63
|
+
"SystemModstamp", initial_value=None
|
|
64
|
+
),
|
|
65
|
+
) -> Iterable[TDataItem]:
|
|
66
|
+
yield get_records(
|
|
67
|
+
client,
|
|
68
|
+
"OpportunityContactRole",
|
|
69
|
+
last_timestamp.last_value,
|
|
70
|
+
"SystemModstamp",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
@dlt.resource(write_disposition="merge")
|
|
74
|
+
def account(
|
|
75
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
76
|
+
"LastModifiedDate", initial_value=None
|
|
77
|
+
),
|
|
78
|
+
) -> Iterable[TDataItem]:
|
|
79
|
+
yield get_records(
|
|
80
|
+
client, "Account", last_timestamp.last_value, "LastModifiedDate"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
@dlt.resource(write_disposition="replace")
|
|
84
|
+
def contact() -> Iterable[TDataItem]:
|
|
85
|
+
yield get_records(client, "Contact")
|
|
86
|
+
|
|
87
|
+
@dlt.resource(write_disposition="replace")
|
|
88
|
+
def lead() -> Iterable[TDataItem]:
|
|
89
|
+
yield get_records(client, "Lead")
|
|
90
|
+
|
|
91
|
+
@dlt.resource(write_disposition="replace")
|
|
92
|
+
def campaign() -> Iterable[TDataItem]:
|
|
93
|
+
yield get_records(client, "Campaign")
|
|
94
|
+
|
|
95
|
+
@dlt.resource(write_disposition="merge")
|
|
96
|
+
def campaign_member(
|
|
97
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
98
|
+
"SystemModstamp", initial_value=None
|
|
99
|
+
),
|
|
100
|
+
) -> Iterable[TDataItem]:
|
|
101
|
+
yield get_records(
|
|
102
|
+
client, "CampaignMember", last_timestamp.last_value, "SystemModstamp"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
@dlt.resource(write_disposition="replace")
|
|
106
|
+
def product() -> Iterable[TDataItem]:
|
|
107
|
+
yield get_records(client, "Product2")
|
|
108
|
+
|
|
109
|
+
@dlt.resource(write_disposition="replace")
|
|
110
|
+
def pricebook() -> Iterable[TDataItem]:
|
|
111
|
+
yield get_records(client, "Pricebook2")
|
|
112
|
+
|
|
113
|
+
@dlt.resource(write_disposition="replace")
|
|
114
|
+
def pricebook_entry() -> Iterable[TDataItem]:
|
|
115
|
+
yield get_records(client, "PricebookEntry")
|
|
116
|
+
|
|
117
|
+
@dlt.resource(write_disposition="merge")
|
|
118
|
+
def task(
|
|
119
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
120
|
+
"SystemModstamp", initial_value=None
|
|
121
|
+
),
|
|
122
|
+
) -> Iterable[TDataItem]:
|
|
123
|
+
yield get_records(client, "Task", last_timestamp.last_value, "SystemModstamp")
|
|
124
|
+
|
|
125
|
+
@dlt.resource(write_disposition="merge")
|
|
126
|
+
def event(
|
|
127
|
+
last_timestamp: incremental[str] = dlt.sources.incremental(
|
|
128
|
+
"SystemModstamp", initial_value=None
|
|
129
|
+
),
|
|
130
|
+
) -> Iterable[TDataItem]:
|
|
131
|
+
yield get_records(client, "Event", last_timestamp.last_value, "SystemModstamp")
|
|
132
|
+
|
|
133
|
+
return (
|
|
134
|
+
user,
|
|
135
|
+
user_role,
|
|
136
|
+
opportunity,
|
|
137
|
+
opportunity_line_item,
|
|
138
|
+
opportunity_contact_role,
|
|
139
|
+
account,
|
|
140
|
+
contact,
|
|
141
|
+
lead,
|
|
142
|
+
campaign,
|
|
143
|
+
campaign_member,
|
|
144
|
+
product,
|
|
145
|
+
pricebook,
|
|
146
|
+
pricebook_entry,
|
|
147
|
+
task,
|
|
148
|
+
event,
|
|
149
|
+
)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Salesforce source helpers"""
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, Optional
|
|
4
|
+
|
|
5
|
+
import pendulum
|
|
6
|
+
from dlt.common.typing import TDataItem
|
|
7
|
+
from simple_salesforce import Salesforce
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_records(
|
|
11
|
+
sf: Salesforce,
|
|
12
|
+
sobject: str,
|
|
13
|
+
last_state: Optional[str] = None,
|
|
14
|
+
replication_key: Optional[str] = None,
|
|
15
|
+
) -> Iterable[TDataItem]:
|
|
16
|
+
"""
|
|
17
|
+
Retrieves records from Salesforce for a specified sObject.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
sf (Salesforce): An instance of the Salesforce API client.
|
|
21
|
+
sobject (str): The name of the sObject to retrieve records from.
|
|
22
|
+
last_state (str, optional): The last known state for incremental loading. Defaults to None.
|
|
23
|
+
replication_key (str, optional): The replication key for incremental loading. Defaults to None.
|
|
24
|
+
|
|
25
|
+
Yields:
|
|
26
|
+
Dict[TDataItem]: A dictionary representing a record from the Salesforce sObject.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
# Get all fields for the sobject
|
|
30
|
+
desc = getattr(sf, sobject).describe()
|
|
31
|
+
# Salesforce returns compound fields as separate fields, so we need to filter them out
|
|
32
|
+
compound_fields = {
|
|
33
|
+
f["compoundFieldName"]
|
|
34
|
+
for f in desc["fields"]
|
|
35
|
+
if f["compoundFieldName"] is not None
|
|
36
|
+
} - {"Name"}
|
|
37
|
+
# Salesforce returns datetime fields as timestamps, so we need to convert them
|
|
38
|
+
date_fields = {
|
|
39
|
+
f["name"] for f in desc["fields"] if f["type"] in ("datetime",) and f["name"]
|
|
40
|
+
}
|
|
41
|
+
# If no fields are specified, use all fields except compound fields
|
|
42
|
+
fields = [f["name"] for f in desc["fields"] if f["name"] not in compound_fields]
|
|
43
|
+
|
|
44
|
+
# Generate a predicate to filter records by the replication key
|
|
45
|
+
predicate, order_by, n_records = "", "", 0
|
|
46
|
+
if replication_key:
|
|
47
|
+
if last_state:
|
|
48
|
+
predicate = f"WHERE {replication_key} > {last_state}"
|
|
49
|
+
order_by = f"ORDER BY {replication_key} ASC"
|
|
50
|
+
query = f"SELECT {', '.join(fields)} FROM {sobject} {predicate} {order_by}"
|
|
51
|
+
|
|
52
|
+
# Query all records in batches
|
|
53
|
+
for page in getattr(sf.bulk, sobject).query_all(query, lazy_operation=True):
|
|
54
|
+
for record in page:
|
|
55
|
+
# Strip out the attributes field
|
|
56
|
+
record.pop("attributes", None)
|
|
57
|
+
for field in date_fields:
|
|
58
|
+
# Convert Salesforce timestamps to ISO 8601
|
|
59
|
+
if record.get(field):
|
|
60
|
+
record[field] = pendulum.from_timestamp(
|
|
61
|
+
record[field] / 1000,
|
|
62
|
+
).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
|
|
63
|
+
yield from page
|
|
64
|
+
n_records += len(page)
|
ingestr/src/sources.py
CHANGED
|
@@ -15,7 +15,7 @@ from typing import (
|
|
|
15
15
|
Optional,
|
|
16
16
|
Union,
|
|
17
17
|
)
|
|
18
|
-
from urllib.parse import ParseResult, parse_qs, quote, urlparse
|
|
18
|
+
from urllib.parse import ParseResult, parse_qs, quote, urlencode, urlparse
|
|
19
19
|
|
|
20
20
|
import dlt
|
|
21
21
|
import gcsfs # type: ignore
|
|
@@ -83,6 +83,8 @@ from ingestr.src.linkedin_ads.dimension_time_enum import (
|
|
|
83
83
|
)
|
|
84
84
|
from ingestr.src.mongodb import mongodb_collection
|
|
85
85
|
from ingestr.src.notion import notion_databases
|
|
86
|
+
from ingestr.src.personio import personio_source
|
|
87
|
+
from ingestr.src.salesforce import salesforce_source
|
|
86
88
|
from ingestr.src.shopify import shopify_source
|
|
87
89
|
from ingestr.src.slack import slack_source
|
|
88
90
|
from ingestr.src.sql_database.callbacks import (
|
|
@@ -134,10 +136,46 @@ class SqlSource:
|
|
|
134
136
|
if uri.startswith("mysql://"):
|
|
135
137
|
uri = uri.replace("mysql://", "mysql+pymysql://")
|
|
136
138
|
|
|
139
|
+
# clickhouse://<username>:<password>@<host>:<port>?secure=<secure>
|
|
137
140
|
if uri.startswith("clickhouse://"):
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
+
parsed_uri = urlparse(uri)
|
|
142
|
+
|
|
143
|
+
username = parsed_uri.username
|
|
144
|
+
if not username:
|
|
145
|
+
raise ValueError(
|
|
146
|
+
"A username is required to connect to the ClickHouse database."
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
password = parsed_uri.password
|
|
150
|
+
if not password:
|
|
151
|
+
raise ValueError(
|
|
152
|
+
"A password is required to authenticate with the ClickHouse database."
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
host = parsed_uri.hostname
|
|
156
|
+
if not host:
|
|
157
|
+
raise ValueError(
|
|
158
|
+
"The hostname or IP address of the ClickHouse server is required to establish a connection."
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
port = parsed_uri.port
|
|
162
|
+
if not port:
|
|
163
|
+
raise ValueError(
|
|
164
|
+
"The TCP port of the ClickHouse server is required to establish a connection."
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
query_params = parse_qs(parsed_uri.query)
|
|
168
|
+
|
|
169
|
+
if "http_port" in query_params:
|
|
170
|
+
del query_params["http_port"]
|
|
171
|
+
|
|
172
|
+
if "secure" not in query_params:
|
|
173
|
+
query_params["secure"] = ["1"]
|
|
174
|
+
|
|
175
|
+
uri = parsed_uri._replace(
|
|
176
|
+
scheme="clickhouse+native",
|
|
177
|
+
query=urlencode(query_params, doseq=True),
|
|
178
|
+
).geturl()
|
|
141
179
|
|
|
142
180
|
query_adapters = []
|
|
143
181
|
if kwargs.get("sql_limit"):
|
|
@@ -1753,7 +1791,7 @@ class AppLovinSource:
|
|
|
1753
1791
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1754
1792
|
if kwargs.get("incremental_key") is not None:
|
|
1755
1793
|
raise ValueError(
|
|
1756
|
-
"
|
|
1794
|
+
"Applovin takes care of incrementality on its own, you should not provide incremental_key"
|
|
1757
1795
|
)
|
|
1758
1796
|
|
|
1759
1797
|
parsed_uri = urlparse(uri)
|
|
@@ -1833,3 +1871,78 @@ class ApplovinMaxSource:
|
|
|
1833
1871
|
api_key=api_key[0],
|
|
1834
1872
|
application=application[0],
|
|
1835
1873
|
).with_resources(table)
|
|
1874
|
+
|
|
1875
|
+
|
|
1876
|
+
class SalesforceSource:
|
|
1877
|
+
def handles_incrementality(self) -> bool:
|
|
1878
|
+
return True
|
|
1879
|
+
|
|
1880
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1881
|
+
if kwargs.get("incremental_key"):
|
|
1882
|
+
raise ValueError(
|
|
1883
|
+
"Salesforce takes care of incrementality on its own, you should not provide incremental_key"
|
|
1884
|
+
)
|
|
1885
|
+
|
|
1886
|
+
params = parse_qs(urlparse(uri).query)
|
|
1887
|
+
creds = {
|
|
1888
|
+
"username": params.get("username", [None])[0],
|
|
1889
|
+
"password": params.get("password", [None])[0],
|
|
1890
|
+
"token": params.get("token", [None])[0],
|
|
1891
|
+
}
|
|
1892
|
+
for k, v in creds.items():
|
|
1893
|
+
if v is None:
|
|
1894
|
+
raise MissingValueError(k, "Salesforce")
|
|
1895
|
+
|
|
1896
|
+
src = salesforce_source(**creds) # type: ignore
|
|
1897
|
+
|
|
1898
|
+
if table not in src.resources:
|
|
1899
|
+
raise UnsupportedResourceError(table, "Salesforce")
|
|
1900
|
+
|
|
1901
|
+
return src.with_resources(table)
|
|
1902
|
+
|
|
1903
|
+
|
|
1904
|
+
class PersonioSource:
|
|
1905
|
+
def handles_incrementality(self) -> bool:
|
|
1906
|
+
return True
|
|
1907
|
+
|
|
1908
|
+
# applovin://?client_id=123&client_secret=123
|
|
1909
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1910
|
+
parsed_uri = urlparse(uri)
|
|
1911
|
+
params = parse_qs(parsed_uri.query)
|
|
1912
|
+
|
|
1913
|
+
client_id = params.get("client_id")
|
|
1914
|
+
client_secret = params.get("client_secret")
|
|
1915
|
+
|
|
1916
|
+
interval_start = kwargs.get("interval_start")
|
|
1917
|
+
interval_end = kwargs.get("interval_end")
|
|
1918
|
+
|
|
1919
|
+
interval_start_date = (
|
|
1920
|
+
interval_start if interval_start is not None else "2018-01-01"
|
|
1921
|
+
)
|
|
1922
|
+
|
|
1923
|
+
interval_end_date = (
|
|
1924
|
+
interval_end.strftime("%Y-%m-%d") if interval_end is not None else None
|
|
1925
|
+
)
|
|
1926
|
+
|
|
1927
|
+
if client_id is None:
|
|
1928
|
+
raise MissingValueError("client_id", "Personio")
|
|
1929
|
+
if client_secret is None:
|
|
1930
|
+
raise MissingValueError("client_secret", "Personio")
|
|
1931
|
+
if table not in [
|
|
1932
|
+
"employees",
|
|
1933
|
+
"absences",
|
|
1934
|
+
"absence_types",
|
|
1935
|
+
"attendances",
|
|
1936
|
+
"projects",
|
|
1937
|
+
"document_categories",
|
|
1938
|
+
"employees_absences_balance",
|
|
1939
|
+
"custom_reports_list",
|
|
1940
|
+
]:
|
|
1941
|
+
raise UnsupportedResourceError(table, "Personio")
|
|
1942
|
+
|
|
1943
|
+
return personio_source(
|
|
1944
|
+
client_id=client_id[0],
|
|
1945
|
+
client_secret=client_secret[0],
|
|
1946
|
+
start_date=interval_start_date,
|
|
1947
|
+
end_date=interval_end_date,
|
|
1948
|
+
).with_resources(table)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.11
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -18,42 +18,43 @@ Requires-Dist: asana==3.2.3
|
|
|
18
18
|
Requires-Dist: clickhouse-connect==0.8.14
|
|
19
19
|
Requires-Dist: clickhouse-driver==0.2.9
|
|
20
20
|
Requires-Dist: clickhouse-sqlalchemy==0.2.7
|
|
21
|
-
Requires-Dist: confluent-kafka>=2.
|
|
21
|
+
Requires-Dist: confluent-kafka>=2.8.0
|
|
22
22
|
Requires-Dist: databricks-sql-connector==2.9.3
|
|
23
23
|
Requires-Dist: dataclasses-json==0.6.7
|
|
24
|
-
Requires-Dist: dlt==1.
|
|
25
|
-
Requires-Dist: duckdb-engine==0.
|
|
26
|
-
Requires-Dist: duckdb==1.
|
|
24
|
+
Requires-Dist: dlt==1.6.1
|
|
25
|
+
Requires-Dist: duckdb-engine==0.15.0
|
|
26
|
+
Requires-Dist: duckdb==1.2.0
|
|
27
27
|
Requires-Dist: facebook-business==20.0.0
|
|
28
28
|
Requires-Dist: flatten-json==0.1.14
|
|
29
29
|
Requires-Dist: gcsfs==2024.10.0
|
|
30
30
|
Requires-Dist: google-ads==25.1.0
|
|
31
|
-
Requires-Dist: google-analytics-data==0.18.
|
|
31
|
+
Requires-Dist: google-analytics-data==0.18.17
|
|
32
32
|
Requires-Dist: google-api-python-client==2.130.0
|
|
33
33
|
Requires-Dist: google-cloud-bigquery-storage==2.24.0
|
|
34
|
-
Requires-Dist: mysql-connector-python==9.
|
|
34
|
+
Requires-Dist: mysql-connector-python==9.2.0
|
|
35
35
|
Requires-Dist: pendulum==3.0.0
|
|
36
36
|
Requires-Dist: psutil==6.1.1
|
|
37
37
|
Requires-Dist: psycopg2-binary==2.9.10
|
|
38
38
|
Requires-Dist: py-machineid==0.6.0
|
|
39
39
|
Requires-Dist: pyairtable==2.3.3
|
|
40
40
|
Requires-Dist: pyarrow==18.1.0
|
|
41
|
-
Requires-Dist: pyathena==3.
|
|
42
|
-
Requires-Dist: pymongo==4.
|
|
41
|
+
Requires-Dist: pyathena==3.12.2
|
|
42
|
+
Requires-Dist: pymongo==4.11.1
|
|
43
43
|
Requires-Dist: pymysql==1.1.1
|
|
44
44
|
Requires-Dist: pyrate-limiter==3.7.0
|
|
45
45
|
Requires-Dist: redshift-connector==2.1.5
|
|
46
46
|
Requires-Dist: rich==13.9.4
|
|
47
47
|
Requires-Dist: rudder-sdk-python==2.1.4
|
|
48
48
|
Requires-Dist: s3fs==2024.10.0
|
|
49
|
+
Requires-Dist: simple-salesforce==1.12.6
|
|
49
50
|
Requires-Dist: snowflake-sqlalchemy==1.6.1
|
|
50
|
-
Requires-Dist: sqlalchemy-bigquery==1.12.
|
|
51
|
+
Requires-Dist: sqlalchemy-bigquery==1.12.1
|
|
51
52
|
Requires-Dist: sqlalchemy-hana==2.0.0
|
|
52
53
|
Requires-Dist: sqlalchemy-redshift==0.8.14
|
|
53
54
|
Requires-Dist: sqlalchemy2-stubs==0.0.2a38
|
|
54
55
|
Requires-Dist: sqlalchemy==1.4.52
|
|
55
56
|
Requires-Dist: stripe==10.7.0
|
|
56
|
-
Requires-Dist: tqdm==4.67.
|
|
57
|
+
Requires-Dist: tqdm==4.67.1
|
|
57
58
|
Requires-Dist: typer==0.13.1
|
|
58
59
|
Requires-Dist: types-requests==2.32.0.20240907
|
|
59
60
|
Provides-Extra: odbc
|
|
@@ -161,6 +162,11 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
161
162
|
<td>✅</td>
|
|
162
163
|
<td>✅</td>
|
|
163
164
|
</tr>
|
|
165
|
+
<tr>
|
|
166
|
+
<td>DynamoDB</td>
|
|
167
|
+
<td>✅</td>
|
|
168
|
+
<td>-</td>
|
|
169
|
+
</tr>
|
|
164
170
|
<tr>
|
|
165
171
|
<td>Local CSV file</td>
|
|
166
172
|
<td>✅</td>
|
|
@@ -247,11 +253,6 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
247
253
|
<td>✅</td>
|
|
248
254
|
<td>-</td>
|
|
249
255
|
</tr>
|
|
250
|
-
<tr>
|
|
251
|
-
<td>DynamoDB</td>
|
|
252
|
-
<td>✅</td>
|
|
253
|
-
<td>-</td>
|
|
254
|
-
</tr>
|
|
255
256
|
<tr>
|
|
256
257
|
<td>Facebook Ads</td>
|
|
257
258
|
<td>✅</td>
|
|
@@ -301,12 +302,22 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
301
302
|
<td>Notion</td>
|
|
302
303
|
<td>✅</td>
|
|
303
304
|
<td>-</td>
|
|
305
|
+
</tr>
|
|
306
|
+
<tr>
|
|
307
|
+
<td>Personio</td>
|
|
308
|
+
<td>✅</td>
|
|
309
|
+
<td>-</td>
|
|
304
310
|
</tr>
|
|
305
311
|
<tr>
|
|
306
312
|
<td>S3</td>
|
|
307
313
|
<td>✅</td>
|
|
308
314
|
<td>-</td>
|
|
309
315
|
</tr>
|
|
316
|
+
<tr>
|
|
317
|
+
<td>Salesforce</td>
|
|
318
|
+
<td>✅</td>
|
|
319
|
+
<td>-</td>
|
|
320
|
+
</tr>
|
|
310
321
|
<tr>
|
|
311
322
|
<td>Shopify</td>
|
|
312
323
|
<td>✅</td>
|
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
ingestr/main.py,sha256=ufn8AcM2ID80ChUApJzYDjnQaurMXOkYfTm6GzAggSQ,24746
|
|
2
2
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
3
|
ingestr/src/blob.py,sha256=LtEZWoUhm5i2aKerdgEpLtNCf3fdhGGMM4td-LRZVbY,1407
|
|
4
|
-
ingestr/src/buildinfo.py,sha256=
|
|
4
|
+
ingestr/src/buildinfo.py,sha256=PnFKBMVizeXpYaYJ6rkY9m_oU0QCJzbLAOJyEQ8gyRg,21
|
|
5
5
|
ingestr/src/destinations.py,sha256=vrGij4qMPCdXTMIimROWBJFqzOqCM4DFmgyubgSHejA,11279
|
|
6
6
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
7
|
-
ingestr/src/factory.py,sha256=
|
|
7
|
+
ingestr/src/factory.py,sha256=dOdY4fzeQ-2dgFBGIDFD5ilxpYNfCVqQOureuWzOL-w,5127
|
|
8
8
|
ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
|
|
9
9
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
10
|
-
ingestr/src/sources.py,sha256=
|
|
10
|
+
ingestr/src/sources.py,sha256=YlWokgTZoeMQ6PVb9UVU3I99R0cdhkYjEzPf5LNGs30,68582
|
|
11
11
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
12
12
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
13
13
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
14
14
|
ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
|
|
15
15
|
ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
|
|
16
16
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
17
|
-
ingestr/src/applovin/__init__.py,sha256=
|
|
17
|
+
ingestr/src/applovin/__init__.py,sha256=X_YCLppPrnL8KXfYWICE_uDfMzHHH3JZ-DBGZ1RlaOI,6984
|
|
18
18
|
ingestr/src/applovin_max/__init__.py,sha256=1NUOeJzRyZZQ95KEirbrlSrk-8SNc9JrlM_5pGgBgHg,2878
|
|
19
19
|
ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
|
|
20
20
|
ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
|
|
@@ -74,6 +74,10 @@ ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMO
|
|
|
74
74
|
ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
75
|
ingestr/src/notion/helpers/client.py,sha256=QXuudkf5Zzff98HRsCqA1g1EZWIrnfn1falPrnKg_y4,5500
|
|
76
76
|
ingestr/src/notion/helpers/database.py,sha256=gigPibTeVefP3lA-8w4aOwX67pj7RlciPk5koDs1ry8,2737
|
|
77
|
+
ingestr/src/personio/__init__.py,sha256=CQ8XX8Q8BG-wgoen3emhe_r8Cx414Fux7P8jQNawWvY,11646
|
|
78
|
+
ingestr/src/personio/helpers.py,sha256=OmeMzfg4MVtpI7f75D3-9OGZb8SDsKyz0svNm1zJLTw,2900
|
|
79
|
+
ingestr/src/salesforce/__init__.py,sha256=2hik5pRrxVODdDTlUEMoyccNC07zozjnxkMHcjMT1qA,4558
|
|
80
|
+
ingestr/src/salesforce/helpers.py,sha256=QTdazBt-qRTBbCQMZnyclIaDQFmBixBy_RDKD00Lt-8,2492
|
|
77
81
|
ingestr/src/shopify/__init__.py,sha256=PF_6VQnS065Br1UzSIekTVXBu3WtrMQL_v5CfbfaX5Y,63151
|
|
78
82
|
ingestr/src/shopify/exceptions.py,sha256=BhV3lIVWeBt8Eh4CWGW_REFJpGCzvW6-62yZrBWa3nQ,50
|
|
79
83
|
ingestr/src/shopify/helpers.py,sha256=NfHD6lWXe88ybR0ri-FCQuh2Vf8l5WG0a0FVjmdoSC4,6296
|
|
@@ -104,8 +108,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
104
108
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
105
109
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
106
110
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
107
|
-
ingestr-0.13.
|
|
108
|
-
ingestr-0.13.
|
|
109
|
-
ingestr-0.13.
|
|
110
|
-
ingestr-0.13.
|
|
111
|
-
ingestr-0.13.
|
|
111
|
+
ingestr-0.13.11.dist-info/METADATA,sha256=8vjvshEDHgAZEMt3ykbUSlEl_Ky0KtHf6p6vjT6RDGI,9171
|
|
112
|
+
ingestr-0.13.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
113
|
+
ingestr-0.13.11.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
114
|
+
ingestr-0.13.11.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
115
|
+
ingestr-0.13.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|