ingestr 0.7.6__py3-none-any.whl → 0.7.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +11 -1
- ingestr/src/.gitignore +10 -0
- ingestr/src/airtable/__init__.py +69 -0
- ingestr/src/facebook_ads/__init__.py +197 -0
- ingestr/src/facebook_ads/exceptions.py +5 -0
- ingestr/src/facebook_ads/helpers.py +255 -0
- ingestr/src/facebook_ads/settings.py +208 -0
- ingestr/src/factory.py +15 -0
- ingestr/src/kafka/__init__.py +103 -0
- ingestr/src/kafka/helpers.py +227 -0
- ingestr/src/klaviyo/_init_.py +173 -0
- ingestr/src/klaviyo/client.py +212 -0
- ingestr/src/klaviyo/helpers.py +19 -0
- ingestr/src/slack/__init__.py +272 -0
- ingestr/src/slack/helpers.py +204 -0
- ingestr/src/slack/settings.py +22 -0
- ingestr/src/sources.py +222 -1
- ingestr/src/version.py +1 -1
- {ingestr-0.7.6.dist-info → ingestr-0.7.8.dist-info}/METADATA +31 -5
- {ingestr-0.7.6.dist-info → ingestr-0.7.8.dist-info}/RECORD +23 -9
- {ingestr-0.7.6.dist-info → ingestr-0.7.8.dist-info}/WHEEL +0 -0
- {ingestr-0.7.6.dist-info → ingestr-0.7.8.dist-info}/entry_points.txt +0 -0
- {ingestr-0.7.6.dist-info → ingestr-0.7.8.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""Slack source helpers."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Generator, Iterable, List, Optional
|
|
4
|
+
from urllib.parse import urljoin
|
|
5
|
+
|
|
6
|
+
import pendulum
|
|
7
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
8
|
+
from dlt.common.typing import Dict, TAnyDateTime, TDataItem
|
|
9
|
+
from dlt.sources.helpers import requests
|
|
10
|
+
from jsonpath_ng.ext import parse # type: ignore
|
|
11
|
+
|
|
12
|
+
from .settings import MAX_PAGE_SIZE, SLACK_API_URL
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SlackApiException(Exception):
|
|
16
|
+
"""Slack api exception."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PaidOnlyException(SlackApiException):
|
|
20
|
+
"""Slack api exception."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def extract_jsonpath(
|
|
24
|
+
expression: str,
|
|
25
|
+
json_data: TDataItem,
|
|
26
|
+
) -> Generator[Any, None, None]:
|
|
27
|
+
"""Extract records from an input based on a JSONPath expression."""
|
|
28
|
+
if not expression:
|
|
29
|
+
yield json_data
|
|
30
|
+
return
|
|
31
|
+
|
|
32
|
+
jsonpath = parse(expression)
|
|
33
|
+
|
|
34
|
+
for match in jsonpath.find(json_data):
|
|
35
|
+
yield match.value
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def update_jsonpath(expression: str, json_data: TDataItem, value: Any) -> Any:
|
|
39
|
+
"""Update a record in an input based on a JSONPath expression."""
|
|
40
|
+
jsonpath = parse(expression)
|
|
41
|
+
return jsonpath.update_or_create(json_data, value)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def ensure_dt_type(dt: TAnyDateTime, to_ts: bool = False) -> Any:
|
|
45
|
+
"""Converts a datetime to a pendulum datetime or timestamp.
|
|
46
|
+
Args:
|
|
47
|
+
dt: The datetime to convert.
|
|
48
|
+
to_ts: Whether to convert to a timestamp or not.
|
|
49
|
+
Returns:
|
|
50
|
+
Any: The converted datetime or timestamp.
|
|
51
|
+
"""
|
|
52
|
+
if dt is None:
|
|
53
|
+
return None
|
|
54
|
+
out_dt = ensure_pendulum_datetime(dt)
|
|
55
|
+
if to_ts:
|
|
56
|
+
return out_dt.timestamp()
|
|
57
|
+
return out_dt
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class SlackAPI:
|
|
61
|
+
"""
|
|
62
|
+
A Slack API client that can be used to get pages of data from Slack.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(
|
|
66
|
+
self,
|
|
67
|
+
access_token: str,
|
|
68
|
+
page_size: int = MAX_PAGE_SIZE,
|
|
69
|
+
) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Args:
|
|
72
|
+
access_token: The private app password to the app on your shop.
|
|
73
|
+
page_size: The max number of items to fetch per page. Defaults to 1000.
|
|
74
|
+
"""
|
|
75
|
+
self.access_token = access_token
|
|
76
|
+
self.page_size = page_size
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def headers(self) -> Dict[str, str]:
|
|
80
|
+
"""Generate the headers to use for the request."""
|
|
81
|
+
return {"Authorization": f"Bearer {self.access_token}"}
|
|
82
|
+
|
|
83
|
+
def parameters(
|
|
84
|
+
self, params: Optional[Dict[str, Any]] = None, next_cursor: str = None
|
|
85
|
+
) -> Dict[str, str]:
|
|
86
|
+
"""
|
|
87
|
+
Generate the query parameters to use for the request.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
params: The query parameters to include in the request.
|
|
91
|
+
next_cursor: The cursor to use to get the next page of results.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
The query parameters to use for the request.
|
|
95
|
+
"""
|
|
96
|
+
params = params or {}
|
|
97
|
+
params["limit"] = self.page_size
|
|
98
|
+
if next_cursor:
|
|
99
|
+
params["cursor"] = next_cursor
|
|
100
|
+
return params
|
|
101
|
+
|
|
102
|
+
def url(self, resource: str) -> str:
|
|
103
|
+
"""
|
|
104
|
+
Generate the URL to use for the request.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
resource: The resource to get pages for (e.g. conversations.list).
|
|
108
|
+
"""
|
|
109
|
+
return urljoin(SLACK_API_URL, resource)
|
|
110
|
+
|
|
111
|
+
def _get_next_cursor(self, response: Dict[str, Any]) -> Any:
|
|
112
|
+
"""
|
|
113
|
+
Get the next cursor from the response.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
response: The response from the Slack API.
|
|
117
|
+
"""
|
|
118
|
+
cursor_jsonpath = "$.response_metadata.next_cursor"
|
|
119
|
+
return next(extract_jsonpath(cursor_jsonpath, response), None)
|
|
120
|
+
|
|
121
|
+
def _convert_datetime_fields(
|
|
122
|
+
self, item: Dict[str, Any], datetime_fields: List[str]
|
|
123
|
+
) -> Dict[str, Any]:
|
|
124
|
+
"""Convert timestamp fields in the item to pendulum datetime objects.
|
|
125
|
+
|
|
126
|
+
The item is modified in place.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
item: The item to convert
|
|
130
|
+
datetime_fields: List of fields to convert to pendulum datetime objects.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
The same data item (for convenience)
|
|
134
|
+
"""
|
|
135
|
+
if not datetime_fields:
|
|
136
|
+
return item
|
|
137
|
+
|
|
138
|
+
for field in datetime_fields:
|
|
139
|
+
if timestamp := next(extract_jsonpath(field, item), None):
|
|
140
|
+
if isinstance(timestamp, str):
|
|
141
|
+
timestamp = float(timestamp)
|
|
142
|
+
if timestamp > 1e10:
|
|
143
|
+
timestamp = timestamp / 1000
|
|
144
|
+
pendulum_dt = pendulum.from_timestamp(timestamp)
|
|
145
|
+
item = update_jsonpath(field, item, pendulum_dt)
|
|
146
|
+
return item
|
|
147
|
+
|
|
148
|
+
def get_pages(
|
|
149
|
+
self,
|
|
150
|
+
resource: str,
|
|
151
|
+
response_path: str = None,
|
|
152
|
+
params: Dict[str, Any] = None,
|
|
153
|
+
datetime_fields: List[str] = None,
|
|
154
|
+
context: Dict[str, Any] = None,
|
|
155
|
+
) -> Iterable[TDataItem]:
|
|
156
|
+
"""Get all pages from slack using requests.
|
|
157
|
+
Iterates through all pages and yield each page items.\
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
resource: The resource to get pages for (e.g. conversations.list).
|
|
161
|
+
response_path: The path to the list of items in the response JSON.
|
|
162
|
+
params: Query params to include in the request.
|
|
163
|
+
datetime_fields: List of fields to convert to pendulum datetime objects.
|
|
164
|
+
context: Additional context to add to each item.
|
|
165
|
+
|
|
166
|
+
Yields:
|
|
167
|
+
List of data items from the page
|
|
168
|
+
"""
|
|
169
|
+
has_next_page = True
|
|
170
|
+
next_cursor = None
|
|
171
|
+
|
|
172
|
+
# Iterate through all pages
|
|
173
|
+
while has_next_page:
|
|
174
|
+
# Make the request
|
|
175
|
+
response = requests.get(
|
|
176
|
+
url=self.url(resource),
|
|
177
|
+
headers=self.headers,
|
|
178
|
+
params=self.parameters(params or {}, next_cursor),
|
|
179
|
+
)
|
|
180
|
+
json_response = response.json()
|
|
181
|
+
|
|
182
|
+
# Stop if there was an error
|
|
183
|
+
if not json_response.get("ok"):
|
|
184
|
+
has_next_page = False
|
|
185
|
+
error = json_response.get("error")
|
|
186
|
+
if error == "paid_only":
|
|
187
|
+
raise PaidOnlyException(
|
|
188
|
+
"This resource is just available on paid accounts."
|
|
189
|
+
)
|
|
190
|
+
else:
|
|
191
|
+
raise SlackApiException(error)
|
|
192
|
+
|
|
193
|
+
# Yield the page converting datetime fields
|
|
194
|
+
output = []
|
|
195
|
+
for item in extract_jsonpath(response_path, json_response):
|
|
196
|
+
item = self._convert_datetime_fields(item, datetime_fields)
|
|
197
|
+
item.update(context or {})
|
|
198
|
+
output.append(item)
|
|
199
|
+
yield output
|
|
200
|
+
|
|
201
|
+
# Get the next cursor
|
|
202
|
+
next_cursor = self._get_next_cursor(json_response)
|
|
203
|
+
if not next_cursor:
|
|
204
|
+
has_next_page = False
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Slack source settings and constants"""
|
|
2
|
+
|
|
3
|
+
from dlt.common import pendulum
|
|
4
|
+
|
|
5
|
+
DEFAULT_START_DATE = pendulum.datetime(year=2000, month=1, day=1)
|
|
6
|
+
|
|
7
|
+
SLACK_API_URL = "https://slack.com/api/"
|
|
8
|
+
|
|
9
|
+
MAX_PAGE_SIZE = 1000
|
|
10
|
+
|
|
11
|
+
MSG_DATETIME_FIELDS = [
|
|
12
|
+
"ts",
|
|
13
|
+
"thread_ts",
|
|
14
|
+
"latest_reply",
|
|
15
|
+
"blocks.thread_ts",
|
|
16
|
+
"blocks.latest_reply",
|
|
17
|
+
"attachment.thread_ts",
|
|
18
|
+
"attachment.latest_reply",
|
|
19
|
+
"edited.ts",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
DEFAULT_DATETIME_FIELDS = ["updated", "created"]
|
ingestr/src/sources.py
CHANGED
|
@@ -7,13 +7,19 @@ from urllib.parse import parse_qs, urlparse
|
|
|
7
7
|
|
|
8
8
|
import dlt
|
|
9
9
|
|
|
10
|
+
from ingestr.src.airtable import airtable_source
|
|
10
11
|
from ingestr.src.chess import source
|
|
12
|
+
from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
|
|
11
13
|
from ingestr.src.google_sheets import google_spreadsheet
|
|
12
14
|
from ingestr.src.gorgias import gorgias_source
|
|
13
15
|
from ingestr.src.hubspot import hubspot
|
|
16
|
+
from ingestr.src.kafka import kafka_consumer
|
|
17
|
+
from ingestr.src.kafka.helpers import KafkaCredentials
|
|
18
|
+
from ingestr.src.klaviyo._init_ import klaviyo_source
|
|
14
19
|
from ingestr.src.mongodb import mongodb_collection
|
|
15
20
|
from ingestr.src.notion import notion_databases
|
|
16
21
|
from ingestr.src.shopify import shopify_source
|
|
22
|
+
from ingestr.src.slack import slack_source
|
|
17
23
|
from ingestr.src.sql_database import sql_table
|
|
18
24
|
from ingestr.src.stripe_analytics import stripe_source
|
|
19
25
|
from ingestr.src.table_definition import table_string_to_dataclass
|
|
@@ -344,7 +350,9 @@ class ChessSource:
|
|
|
344
350
|
f"Resource '{table}' is not supported for Chess source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
345
351
|
)
|
|
346
352
|
|
|
347
|
-
return source(players=list_players, **date_args).with_resources(
|
|
353
|
+
return source(players=list_players, **date_args).with_resources(
|
|
354
|
+
table_mapping[table]
|
|
355
|
+
)
|
|
348
356
|
|
|
349
357
|
|
|
350
358
|
class StripeAnalyticsSource:
|
|
@@ -401,6 +409,94 @@ class StripeAnalyticsSource:
|
|
|
401
409
|
).with_resources(endpoint)
|
|
402
410
|
|
|
403
411
|
|
|
412
|
+
class FacebookAdsSource:
|
|
413
|
+
def handles_incrementality(self) -> bool:
|
|
414
|
+
return True
|
|
415
|
+
|
|
416
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
417
|
+
# facebook_ads://?access_token=abcd&account_id=1234
|
|
418
|
+
if kwargs.get("incremental_key"):
|
|
419
|
+
raise ValueError(
|
|
420
|
+
"Facebook Ads takes care of incrementality on its own, you should not provide incremental_key"
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
access_token = None
|
|
424
|
+
account_id = None
|
|
425
|
+
source_field = urlparse(uri)
|
|
426
|
+
source_params = parse_qs(source_field.query)
|
|
427
|
+
access_token = source_params.get("access_token")
|
|
428
|
+
account_id = source_params.get("account_id")
|
|
429
|
+
|
|
430
|
+
if not access_token or not account_id:
|
|
431
|
+
raise ValueError(
|
|
432
|
+
"access_token and accound_id are required to connect to Facebook Ads."
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
endpoint = None
|
|
436
|
+
if table in ["campaigns", "ad_sets", "ad_creatives", "ads", "leads"]:
|
|
437
|
+
endpoint = table
|
|
438
|
+
elif table in "facebook_insights":
|
|
439
|
+
return facebook_insights_source(
|
|
440
|
+
access_token=access_token[0],
|
|
441
|
+
account_id=account_id[0],
|
|
442
|
+
).with_resources("facebook_insights")
|
|
443
|
+
else:
|
|
444
|
+
raise ValueError(
|
|
445
|
+
"fResource '{table}' is not supported for Facebook Ads source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
return facebook_ads_source(
|
|
449
|
+
access_token=access_token[0],
|
|
450
|
+
account_id=account_id[0],
|
|
451
|
+
).with_resources(endpoint)
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
class SlackSource:
|
|
455
|
+
def handles_incrementality(self) -> bool:
|
|
456
|
+
return True
|
|
457
|
+
|
|
458
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
459
|
+
if kwargs.get("incremental_key"):
|
|
460
|
+
raise ValueError(
|
|
461
|
+
"Slack takes care of incrementality on its own, you should not provide incremental_key"
|
|
462
|
+
)
|
|
463
|
+
# slack://?api_key=<apikey>
|
|
464
|
+
api_key = None
|
|
465
|
+
source_field = urlparse(uri)
|
|
466
|
+
source_query = parse_qs(source_field.query)
|
|
467
|
+
api_key = source_query.get("api_key")
|
|
468
|
+
|
|
469
|
+
if not api_key:
|
|
470
|
+
raise ValueError("api_key in the URI is required to connect to Slack")
|
|
471
|
+
|
|
472
|
+
endpoint = None
|
|
473
|
+
msg_channels = None
|
|
474
|
+
if table in ["channels", "users", "access_logs"]:
|
|
475
|
+
endpoint = table
|
|
476
|
+
elif table.startswith("messages"):
|
|
477
|
+
channels_part = table.split(":")[1]
|
|
478
|
+
msg_channels = channels_part.split(",")
|
|
479
|
+
endpoint = "messages"
|
|
480
|
+
else:
|
|
481
|
+
raise ValueError(
|
|
482
|
+
f"Resource '{table}' is not supported for slack source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
date_args = {}
|
|
486
|
+
if kwargs.get("interval_start"):
|
|
487
|
+
date_args["start_date"] = kwargs.get("interval_start")
|
|
488
|
+
|
|
489
|
+
if kwargs.get("interval_end"):
|
|
490
|
+
date_args["end_date"] = kwargs.get("interval_end")
|
|
491
|
+
|
|
492
|
+
return slack_source(
|
|
493
|
+
access_token=api_key[0],
|
|
494
|
+
table_per_channel=False,
|
|
495
|
+
selected_channels=msg_channels,
|
|
496
|
+
**date_args,
|
|
497
|
+
).with_resources(endpoint)
|
|
498
|
+
|
|
499
|
+
|
|
404
500
|
class HubspotSource:
|
|
405
501
|
def handles_incrementality(self) -> bool:
|
|
406
502
|
return True
|
|
@@ -431,3 +527,128 @@ class HubspotSource:
|
|
|
431
527
|
return hubspot(
|
|
432
528
|
api_key=api_key[0],
|
|
433
529
|
).with_resources(endpoint)
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
class AirtableSource:
|
|
533
|
+
def handles_incrementality(self) -> bool:
|
|
534
|
+
return True
|
|
535
|
+
|
|
536
|
+
# airtable://?access_token=<access_token>&base_id=<base_id>
|
|
537
|
+
|
|
538
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
539
|
+
if kwargs.get("incremental_key"):
|
|
540
|
+
raise ValueError("Incremental loads are not supported for Airtable")
|
|
541
|
+
|
|
542
|
+
if not table:
|
|
543
|
+
raise ValueError("Source table is required to connect to Airtable")
|
|
544
|
+
|
|
545
|
+
tables = table.split(",")
|
|
546
|
+
|
|
547
|
+
source_parts = urlparse(uri)
|
|
548
|
+
source_fields = parse_qs(source_parts.query)
|
|
549
|
+
base_id = source_fields.get("base_id")
|
|
550
|
+
access_token = source_fields.get("access_token")
|
|
551
|
+
|
|
552
|
+
if not base_id or not access_token:
|
|
553
|
+
raise ValueError(
|
|
554
|
+
"base_id and access_token in the URI are required to connect to Airtable"
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
return airtable_source(
|
|
558
|
+
base_id=base_id[0], table_names=tables, access_token=access_token[0]
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
class KlaviyoSource:
|
|
563
|
+
def handles_incrementality(self) -> bool:
|
|
564
|
+
return True
|
|
565
|
+
|
|
566
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
567
|
+
if kwargs.get("incremental_key"):
|
|
568
|
+
raise ValueError(
|
|
569
|
+
"klaviyo_source takes care of incrementality on its own, you should not provide incremental_key"
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
source_fields = urlparse(uri)
|
|
573
|
+
source_params = parse_qs(source_fields.query)
|
|
574
|
+
api_key = source_params.get("api_key")
|
|
575
|
+
|
|
576
|
+
if not api_key:
|
|
577
|
+
raise ValueError("api_key in the URI is required to connect to klaviyo")
|
|
578
|
+
|
|
579
|
+
resource = None
|
|
580
|
+
if table in [
|
|
581
|
+
"events",
|
|
582
|
+
"profiles",
|
|
583
|
+
"campaigns",
|
|
584
|
+
"metrics",
|
|
585
|
+
"tags",
|
|
586
|
+
"coupons",
|
|
587
|
+
"catalog-variants",
|
|
588
|
+
"catalog-categories",
|
|
589
|
+
"catalog-items",
|
|
590
|
+
"forms",
|
|
591
|
+
"lists",
|
|
592
|
+
"images",
|
|
593
|
+
"segments",
|
|
594
|
+
"flows",
|
|
595
|
+
"templates",
|
|
596
|
+
]:
|
|
597
|
+
resource = table
|
|
598
|
+
else:
|
|
599
|
+
raise ValueError(
|
|
600
|
+
f"Resource '{table}' is not supported for Klaviyo source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
start_date = kwargs.get("interval_start") or "2000-01-01"
|
|
604
|
+
return klaviyo_source(
|
|
605
|
+
api_key=api_key[0],
|
|
606
|
+
start_date=start_date,
|
|
607
|
+
).with_resources(resource)
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
class KafkaSource:
|
|
611
|
+
def handles_incrementality(self) -> bool:
|
|
612
|
+
return False
|
|
613
|
+
|
|
614
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
615
|
+
# kafka://?bootstrap_servers=localhost:9092&group_id=test_group&security_protocol=SASL_SSL&sasl_mechanisms=PLAIN&sasl_username=example_username&sasl_password=example_secret
|
|
616
|
+
source_fields = urlparse(uri)
|
|
617
|
+
source_params = parse_qs(source_fields.query)
|
|
618
|
+
|
|
619
|
+
bootstrap_servers = source_params.get("bootstrap_servers")
|
|
620
|
+
group_id = source_params.get("group_id")
|
|
621
|
+
security_protocol = source_params.get("security_protocol", [])
|
|
622
|
+
sasl_mechanisms = source_params.get("sasl_mechanisms", [])
|
|
623
|
+
sasl_username = source_params.get("sasl_username", [])
|
|
624
|
+
sasl_password = source_params.get("sasl_password", [])
|
|
625
|
+
batch_size = source_params.get("batch_size", [3000])
|
|
626
|
+
batch_timeout = source_params.get("batch_timeout", [3])
|
|
627
|
+
|
|
628
|
+
if not bootstrap_servers:
|
|
629
|
+
raise ValueError(
|
|
630
|
+
"bootstrap_servers in the URI is required to connect to kafka"
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
if not group_id:
|
|
634
|
+
raise ValueError("group_id in the URI is required to connect to kafka")
|
|
635
|
+
|
|
636
|
+
start_date = kwargs.get("interval_start")
|
|
637
|
+
return kafka_consumer(
|
|
638
|
+
topics=[table],
|
|
639
|
+
credentials=KafkaCredentials(
|
|
640
|
+
bootstrap_servers=bootstrap_servers[0],
|
|
641
|
+
group_id=group_id[0],
|
|
642
|
+
security_protocol=security_protocol[0]
|
|
643
|
+
if len(security_protocol) > 0
|
|
644
|
+
else None, # type: ignore
|
|
645
|
+
sasl_mechanisms=sasl_mechanisms[0]
|
|
646
|
+
if len(sasl_mechanisms) > 0
|
|
647
|
+
else None, # type: ignore
|
|
648
|
+
sasl_username=sasl_username[0] if len(sasl_username) > 0 else None, # type: ignore
|
|
649
|
+
sasl_password=sasl_password[0] if len(sasl_password) > 0 else None, # type: ignore
|
|
650
|
+
),
|
|
651
|
+
start_from=start_date,
|
|
652
|
+
batch_size=int(batch_size[0]),
|
|
653
|
+
batch_timeout=int(batch_timeout[0]),
|
|
654
|
+
)
|
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.7.
|
|
1
|
+
__version__ = "0.7.8"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.8
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -14,17 +14,20 @@ Classifier: Operating System :: OS Independent
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
15
|
Classifier: Topic :: Database
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
|
+
Requires-Dist: confluent-kafka>=2.3.0
|
|
17
18
|
Requires-Dist: cx-oracle==8.3.0
|
|
18
19
|
Requires-Dist: databricks-sql-connector==2.9.3
|
|
19
20
|
Requires-Dist: dlt==0.5.1
|
|
20
21
|
Requires-Dist: duckdb-engine==0.11.5
|
|
21
22
|
Requires-Dist: duckdb==0.10.2
|
|
23
|
+
Requires-Dist: facebook-business==20.0.0
|
|
22
24
|
Requires-Dist: google-api-python-client==2.130.0
|
|
23
25
|
Requires-Dist: google-cloud-bigquery-storage==2.24.0
|
|
24
26
|
Requires-Dist: mysql-connector-python==9.0.0
|
|
25
27
|
Requires-Dist: pendulum==3.0.0
|
|
26
28
|
Requires-Dist: psycopg2-binary==2.9.9
|
|
27
29
|
Requires-Dist: py-machineid==0.5.1
|
|
30
|
+
Requires-Dist: pyairtable==2.3.3
|
|
28
31
|
Requires-Dist: pymongo==4.6.3
|
|
29
32
|
Requires-Dist: pymysql==1.1.0
|
|
30
33
|
Requires-Dist: pyodbc==5.1.0
|
|
@@ -55,7 +58,7 @@ Description-Content-Type: text/markdown
|
|
|
55
58
|
</a>
|
|
56
59
|
</div>
|
|
57
60
|
|
|
58
|
-
|
|
61
|
+
---
|
|
59
62
|
|
|
60
63
|
Ingestr is a command-line application that allows you to ingest data from any source into any destination using simple command-line flags, no code necessary.
|
|
61
64
|
|
|
@@ -65,8 +68,8 @@ Ingestr is a command-line application that allows you to ingest data from any so
|
|
|
65
68
|
|
|
66
69
|
ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the data land on its destination.
|
|
67
70
|
|
|
68
|
-
|
|
69
71
|
## Installation
|
|
72
|
+
|
|
70
73
|
```
|
|
71
74
|
pip install ingestr
|
|
72
75
|
```
|
|
@@ -84,15 +87,17 @@ ingestr ingest \
|
|
|
84
87
|
That's it.
|
|
85
88
|
|
|
86
89
|
This command will:
|
|
90
|
+
|
|
87
91
|
- get the table `public.some_data` from the Postgres instance.
|
|
88
92
|
- upload this data to your BigQuery warehouse under the schema `ingestr` and table `some_data`.
|
|
89
93
|
|
|
90
94
|
## Documentation
|
|
95
|
+
|
|
91
96
|
You can see the full documentation [here](https://bruin-data.github.io/ingestr/getting-started/quickstart.html).
|
|
92
97
|
|
|
93
98
|
## Community
|
|
94
|
-
Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
|
|
95
99
|
|
|
100
|
+
Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
|
|
96
101
|
|
|
97
102
|
## Supported Sources & Destinations
|
|
98
103
|
|
|
@@ -173,10 +178,20 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
173
178
|
<tr>
|
|
174
179
|
<td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
|
|
175
180
|
</tr>
|
|
181
|
+
<tr>
|
|
182
|
+
<td>Airtable</td>
|
|
183
|
+
<td>✅</td>
|
|
184
|
+
<td>-</td>
|
|
185
|
+
</tr>
|
|
176
186
|
<tr>
|
|
177
187
|
<td>Chess.com</td>
|
|
178
188
|
<td>✅</td>
|
|
179
189
|
<td>-</td>
|
|
190
|
+
</tr>
|
|
191
|
+
<tr>
|
|
192
|
+
<td>Facebook Ads</td>
|
|
193
|
+
<td>✅</td>
|
|
194
|
+
<td>-</td>
|
|
180
195
|
</tr>
|
|
181
196
|
<tr>
|
|
182
197
|
<td>Gorgias</td>
|
|
@@ -192,6 +207,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
192
207
|
<td>HubSpot</td>
|
|
193
208
|
<td>✅</td>
|
|
194
209
|
<td>-</td>
|
|
210
|
+
</tr>
|
|
211
|
+
<tr>
|
|
212
|
+
<td>Klaviyo</td>
|
|
213
|
+
<td>✅</td>
|
|
214
|
+
<td>-</td>
|
|
195
215
|
</tr>
|
|
196
216
|
<tr>
|
|
197
217
|
<td>Notion</td>
|
|
@@ -202,6 +222,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
202
222
|
<td>Shopify</td>
|
|
203
223
|
<td>✅</td>
|
|
204
224
|
<td>-</td>
|
|
225
|
+
</tr>
|
|
226
|
+
<tr>
|
|
227
|
+
<td>Slack</td>
|
|
228
|
+
<td>✅</td>
|
|
229
|
+
<td>-</td>
|
|
205
230
|
</tr>
|
|
206
231
|
<tr>
|
|
207
232
|
<td>Stripe</td>
|
|
@@ -213,4 +238,5 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
213
238
|
More to come soon!
|
|
214
239
|
|
|
215
240
|
## Acknowledgements
|
|
216
|
-
|
|
241
|
+
|
|
242
|
+
This project would not have been possible without the amazing work done by the [SQLAlchemy](https://www.sqlalchemy.org/) and [dlt](https://dlthub.com/) teams. We relied on their work to connect to various sources and destinations, and built `ingestr` as a simple, opinionated wrapper around their work.
|
|
@@ -1,12 +1,18 @@
|
|
|
1
|
-
ingestr/main.py,sha256=
|
|
1
|
+
ingestr/main.py,sha256=Hlcb8mUAWoGZr4ZKtQnoEhjLkjroiwx2-J86C6fN37E,17596
|
|
2
|
+
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
2
3
|
ingestr/src/destinations.py,sha256=2SfPMjtTelPmzQmc3zNs8xGcKIPuGn_hoZFIBUuhjXI,6338
|
|
3
|
-
ingestr/src/factory.py,sha256=
|
|
4
|
-
ingestr/src/sources.py,sha256=
|
|
4
|
+
ingestr/src/factory.py,sha256=CTVaFeMVgZO1fC9AKOqx-Wu89l5_YL6GlmvDF-FkAew,4442
|
|
5
|
+
ingestr/src/sources.py,sha256=BlMsajIMcu_oqmU38uqlasXz2vtN_J8yXa24NHFcwJA,22696
|
|
5
6
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
6
|
-
ingestr/src/version.py,sha256=
|
|
7
|
+
ingestr/src/version.py,sha256=uC8wB9mRblQ0jUBAOUyCQLUQJ39MC2xybVLB_8ZsevU,22
|
|
8
|
+
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
7
9
|
ingestr/src/chess/__init__.py,sha256=PaxT2DObudOGlhyoENE5LjR6rTdsxiqKKpAZeyzVLCA,6791
|
|
8
10
|
ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
|
|
9
11
|
ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
|
|
12
|
+
ingestr/src/facebook_ads/__init__.py,sha256=ZZyogV48gmhDcC3CYQEsC4qT3Q6JI9IOnMff2NS1M-A,9207
|
|
13
|
+
ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
|
|
14
|
+
ingestr/src/facebook_ads/helpers.py,sha256=ZLbNHiKer5lPb4g3_435XeBJr57Wv0o1KTyBA1mQ100,9068
|
|
15
|
+
ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_tl6UTFk,4881
|
|
10
16
|
ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
|
|
11
17
|
ingestr/src/google_sheets/__init__.py,sha256=5qlX-6ilx5MW7klC7B_0jGSxloQSLkSESTh4nlY3Aos,6643
|
|
12
18
|
ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
|
|
@@ -17,6 +23,11 @@ ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOIN
|
|
|
17
23
|
ingestr/src/hubspot/__init__.py,sha256=eSD_lEIEd16YijAtUATFG8FGO8YGPm-MtAk94KKsx6o,9740
|
|
18
24
|
ingestr/src/hubspot/helpers.py,sha256=PTn-UHJv1ENIvA5azUTaHCmFXgmHLJC1tUatQ1N-KFE,6727
|
|
19
25
|
ingestr/src/hubspot/settings.py,sha256=9P1OKiRL88kl_m8n1HhuG-Qpq9VGbqPLn5Q0QYneToU,2193
|
|
26
|
+
ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
|
|
27
|
+
ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
|
|
28
|
+
ingestr/src/klaviyo/_init_.py,sha256=nq2T1p3Xc7yiwGabsZBp2Jy2fa8_n5oxqxBnUGhKOgg,6592
|
|
29
|
+
ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
|
|
30
|
+
ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
|
|
20
31
|
ingestr/src/mongodb/__init__.py,sha256=E7SDeCyYNkYZZ_RFhjCRDZUGpKtaxpPG5sFSmKJV62U,4336
|
|
21
32
|
ingestr/src/mongodb/helpers.py,sha256=80vtAeNyUn1iMN0CeLrTlKqYN6I6fHF81Kd2UuE8Kns,5653
|
|
22
33
|
ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
|
|
@@ -28,6 +39,9 @@ ingestr/src/shopify/__init__.py,sha256=EWjpvZz7K6Pms7uUoqqkM4Wj0XeE2NrDvVp4BNM8d
|
|
|
28
39
|
ingestr/src/shopify/exceptions.py,sha256=BhV3lIVWeBt8Eh4CWGW_REFJpGCzvW6-62yZrBWa3nQ,50
|
|
29
40
|
ingestr/src/shopify/helpers.py,sha256=OO_Tw-HwVLnRhwT3vqUWEQEEcWIS9KWE6VDDe8BCC2w,4972
|
|
30
41
|
ingestr/src/shopify/settings.py,sha256=StY0EPr7wFJ7KzRRDN4TKxV0_gkIS1wPj2eR4AYSsDk,141
|
|
42
|
+
ingestr/src/slack/__init__.py,sha256=UfUhkS6FnCKJeXkkJ5QrmdT5nZm5czjtomsQu_x9WUM,9987
|
|
43
|
+
ingestr/src/slack/helpers.py,sha256=08TLK7vhFvH_uekdLVOLF3bTDe1zgH0QxHObXHzk1a8,6545
|
|
44
|
+
ingestr/src/slack/settings.py,sha256=NhKn4y1zokEa5EmIZ05wtj_-I0GOASXZ5V81M1zXCtY,457
|
|
31
45
|
ingestr/src/sql_database/__init__.py,sha256=HEqY6U-YzzbeZ8avIthj-Fatm2C3i3jqYs5DAIAu4Ss,11511
|
|
32
46
|
ingestr/src/sql_database/arrow_helpers.py,sha256=yze1X3A9nUQA4HeuFDDWrfJVkCq8Uo5UyDo_zhJtI60,5699
|
|
33
47
|
ingestr/src/sql_database/helpers.py,sha256=6o8e2_8MIuj3qlo40a2E6ns3gyK18ei1jCePONrMUjI,10191
|
|
@@ -46,8 +60,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
46
60
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
47
61
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
48
62
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
49
|
-
ingestr-0.7.
|
|
50
|
-
ingestr-0.7.
|
|
51
|
-
ingestr-0.7.
|
|
52
|
-
ingestr-0.7.
|
|
53
|
-
ingestr-0.7.
|
|
63
|
+
ingestr-0.7.8.dist-info/METADATA,sha256=JGJ_76vC0icT_tJSYDkbtRXuc_63sgHXJYYIksTSyOE,6561
|
|
64
|
+
ingestr-0.7.8.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
65
|
+
ingestr-0.7.8.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
66
|
+
ingestr-0.7.8.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
67
|
+
ingestr-0.7.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|