ingestr 0.13.54__py3-none-any.whl → 0.13.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/blob.py +24 -0
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/facebook_ads/__init__.py +18 -8
- ingestr/src/facebook_ads/helpers.py +2 -4
- ingestr/src/facebook_ads/settings.py +2 -0
- ingestr/src/facebook_ads/utils.py +39 -0
- ingestr/src/factory.py +2 -0
- ingestr/src/sources.py +128 -30
- ingestr/src/stripe_analytics/__init__.py +4 -2
- ingestr/src/trustpilot/__init__.py +48 -0
- ingestr/src/trustpilot/client.py +48 -0
- {ingestr-0.13.54.dist-info → ingestr-0.13.56.dist-info}/METADATA +1 -1
- {ingestr-0.13.54.dist-info → ingestr-0.13.56.dist-info}/RECORD +16 -13
- {ingestr-0.13.54.dist-info → ingestr-0.13.56.dist-info}/WHEEL +0 -0
- {ingestr-0.13.54.dist-info → ingestr-0.13.56.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.54.dist-info → ingestr-0.13.56.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/blob.py
CHANGED
|
@@ -6,6 +6,10 @@ BucketName: TypeAlias = str
|
|
|
6
6
|
FileGlob: TypeAlias = str
|
|
7
7
|
|
|
8
8
|
|
|
9
|
+
class UnsupportedEndpointError(Exception):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
9
13
|
def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
|
|
10
14
|
"""
|
|
11
15
|
parse the URI of a blob storage and
|
|
@@ -50,3 +54,23 @@ def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
|
|
|
50
54
|
return "", parts[0]
|
|
51
55
|
|
|
52
56
|
return parts[0], parts[1]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def parse_endpoint(path: str) -> str:
|
|
60
|
+
"""
|
|
61
|
+
Parse the endpoint kind from the URI.
|
|
62
|
+
|
|
63
|
+
kind is a file format. one of [csv, jsonl, parquet]
|
|
64
|
+
"""
|
|
65
|
+
file_extension = path.split(".")[-1]
|
|
66
|
+
if file_extension == "gz":
|
|
67
|
+
file_extension = path.split(".")[-2]
|
|
68
|
+
if file_extension == "csv":
|
|
69
|
+
endpoint = "read_csv"
|
|
70
|
+
elif file_extension == "jsonl":
|
|
71
|
+
endpoint = "read_jsonl"
|
|
72
|
+
elif file_extension == "parquet":
|
|
73
|
+
endpoint = "read_parquet"
|
|
74
|
+
else:
|
|
75
|
+
raise UnsupportedEndpointError(f"Unsupported file format: {file_extension}")
|
|
76
|
+
return endpoint
|
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.56"
|
|
@@ -116,6 +116,8 @@ def facebook_insights_source(
|
|
|
116
116
|
batch_size: int = 50,
|
|
117
117
|
request_timeout: int = 300,
|
|
118
118
|
app_api_version: str = None,
|
|
119
|
+
start_date: pendulum.DateTime | None = None,
|
|
120
|
+
end_date: pendulum.DateTime | None = None,
|
|
119
121
|
) -> DltResource:
|
|
120
122
|
"""Incrementally loads insight reports with defined granularity level, fields, breakdowns etc.
|
|
121
123
|
|
|
@@ -148,27 +150,32 @@ def facebook_insights_source(
|
|
|
148
150
|
account_id, access_token, request_timeout, app_api_version
|
|
149
151
|
)
|
|
150
152
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
153
|
+
if start_date is None:
|
|
154
|
+
start_date = pendulum.today().subtract(days=initial_load_past_days)
|
|
155
|
+
|
|
156
|
+
columns = {}
|
|
157
|
+
for field in fields:
|
|
158
|
+
if field in INSIGHT_FIELDS_TYPES:
|
|
159
|
+
columns[field] = INSIGHT_FIELDS_TYPES[field]
|
|
154
160
|
|
|
155
161
|
@dlt.resource(
|
|
156
162
|
primary_key=INSIGHTS_PRIMARY_KEY,
|
|
157
163
|
write_disposition="merge",
|
|
158
|
-
columns=
|
|
164
|
+
columns=columns,
|
|
159
165
|
)
|
|
160
166
|
def facebook_insights(
|
|
161
167
|
date_start: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
162
168
|
"date_start",
|
|
163
|
-
initial_value=
|
|
169
|
+
initial_value=start_date.isoformat(),
|
|
170
|
+
end_value=end_date.isoformat() if end_date else None,
|
|
164
171
|
range_end="closed",
|
|
165
172
|
range_start="closed",
|
|
173
|
+
lag=attribution_window_days_lag * 24 * 60 * 60, # Convert days to seconds
|
|
166
174
|
),
|
|
167
175
|
) -> Iterator[TDataItems]:
|
|
168
|
-
start_date = get_start_date(date_start
|
|
176
|
+
start_date = get_start_date(date_start)
|
|
169
177
|
end_date = pendulum.now()
|
|
170
178
|
|
|
171
|
-
# fetch insights in incremental day steps
|
|
172
179
|
while start_date <= end_date:
|
|
173
180
|
query = {
|
|
174
181
|
"level": level,
|
|
@@ -193,7 +200,10 @@ def facebook_insights_source(
|
|
|
193
200
|
}
|
|
194
201
|
],
|
|
195
202
|
}
|
|
196
|
-
job = execute_job(
|
|
203
|
+
job = execute_job(
|
|
204
|
+
account.get_insights(params=query, is_async=True),
|
|
205
|
+
insights_max_async_sleep_seconds=10,
|
|
206
|
+
)
|
|
197
207
|
yield list(map(process_report_item, job.get_result()))
|
|
198
208
|
start_date = start_date.add(days=time_increment_days)
|
|
199
209
|
|
|
@@ -31,14 +31,13 @@ from .settings import (
|
|
|
31
31
|
|
|
32
32
|
def get_start_date(
|
|
33
33
|
incremental_start_date: dlt.sources.incremental[str],
|
|
34
|
-
attribution_window_days_lag: int = 7,
|
|
35
34
|
) -> pendulum.DateTime:
|
|
36
35
|
"""
|
|
37
36
|
Get the start date for incremental loading of Facebook Insights data.
|
|
38
37
|
"""
|
|
39
38
|
start_date: pendulum.DateTime = ensure_pendulum_datetime(
|
|
40
39
|
incremental_start_date.start_value
|
|
41
|
-
)
|
|
40
|
+
)
|
|
42
41
|
|
|
43
42
|
# facebook forgets insights so trim the lag and warn
|
|
44
43
|
min_start_date = pendulum.today().subtract(
|
|
@@ -65,7 +64,6 @@ def process_report_item(item: AbstractObject) -> DictStrAny:
|
|
|
65
64
|
for pki in INSIGHTS_PRIMARY_KEY:
|
|
66
65
|
if pki not in d:
|
|
67
66
|
d[pki] = "no_" + pki
|
|
68
|
-
|
|
69
67
|
return d
|
|
70
68
|
|
|
71
69
|
|
|
@@ -138,7 +136,7 @@ def execute_job(
|
|
|
138
136
|
) -> AbstractCrudObject:
|
|
139
137
|
status: str = None
|
|
140
138
|
time_start = time.time()
|
|
141
|
-
sleep_time =
|
|
139
|
+
sleep_time = 3
|
|
142
140
|
while status != "Job Completed":
|
|
143
141
|
duration = time.time() - time_start
|
|
144
142
|
job = job.api_get()
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
from dlt.common.configuration.inject import with_config
|
|
5
|
+
from dlt.sources.helpers import requests
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@with_config(sections=("sources", "facebook_ads"))
|
|
9
|
+
def debug_access_token(
|
|
10
|
+
access_token: str = dlt.secrets.value,
|
|
11
|
+
client_id: str = dlt.secrets.value,
|
|
12
|
+
client_secret: str = dlt.secrets.value,
|
|
13
|
+
) -> str:
|
|
14
|
+
"""Debugs the `access_token` providing info on expiration time, scopes etc. If arguments are not provides, `dlt` will inject them from configuration"""
|
|
15
|
+
debug_url = f"https://graph.facebook.com/debug_token?input_token={access_token}&access_token={client_id}|{client_secret}"
|
|
16
|
+
response = requests.get(debug_url)
|
|
17
|
+
data: Dict[str, str] = response.json()
|
|
18
|
+
|
|
19
|
+
if "error" in data:
|
|
20
|
+
raise Exception(f"Error debugging token: {data['error']}")
|
|
21
|
+
|
|
22
|
+
return data["data"]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@with_config(sections=("sources", "facebook_ads"))
|
|
26
|
+
def get_long_lived_token(
|
|
27
|
+
access_token: str = dlt.secrets.value,
|
|
28
|
+
client_id: str = dlt.secrets.value,
|
|
29
|
+
client_secret: str = dlt.secrets.value,
|
|
30
|
+
) -> str:
|
|
31
|
+
"""Gets the long lived access token (60 days) from `access_token`. If arguments are not provides, `dlt` will inject them from configuration"""
|
|
32
|
+
exchange_url = f"https://graph.facebook.com/v13.0/oauth/access_token?grant_type=fb_exchange_token&client_id={client_id}&client_secret={client_secret}&fb_exchange_token={access_token}"
|
|
33
|
+
response = requests.get(exchange_url)
|
|
34
|
+
data: Dict[str, str] = response.json()
|
|
35
|
+
|
|
36
|
+
if "error" in data:
|
|
37
|
+
raise Exception(f"Error refreshing token: {data['error']}")
|
|
38
|
+
|
|
39
|
+
return data["access_token"]
|
ingestr/src/factory.py
CHANGED
|
@@ -64,6 +64,7 @@ from ingestr.src.sources import (
|
|
|
64
64
|
SqlSource,
|
|
65
65
|
StripeAnalyticsSource,
|
|
66
66
|
TikTokSource,
|
|
67
|
+
TrustpilotSource,
|
|
67
68
|
ZendeskSource,
|
|
68
69
|
)
|
|
69
70
|
|
|
@@ -165,6 +166,7 @@ class SourceDestinationFactory:
|
|
|
165
166
|
"pipedrive": PipedriveSource,
|
|
166
167
|
"frankfurter": FrankfurterSource,
|
|
167
168
|
"freshdesk": FreshdeskSource,
|
|
169
|
+
"trustpilot": TrustpilotSource,
|
|
168
170
|
"phantombuster": PhantombusterSource,
|
|
169
171
|
"elasticsearch": ElasticsearchSource,
|
|
170
172
|
"attio": AttioSource,
|
ingestr/src/sources.py
CHANGED
|
@@ -747,11 +747,64 @@ class FacebookAdsSource:
|
|
|
747
747
|
endpoint = None
|
|
748
748
|
if table in ["campaigns", "ad_sets", "ad_creatives", "ads", "leads"]:
|
|
749
749
|
endpoint = table
|
|
750
|
-
elif table
|
|
750
|
+
elif table == "facebook_insights":
|
|
751
751
|
return facebook_insights_source(
|
|
752
752
|
access_token=access_token[0],
|
|
753
753
|
account_id=account_id[0],
|
|
754
|
+
start_date=kwargs.get("interval_start"),
|
|
755
|
+
end_date=kwargs.get("interval_end"),
|
|
754
756
|
).with_resources("facebook_insights")
|
|
757
|
+
elif table.startswith("facebook_insights:"):
|
|
758
|
+
# Parse custom breakdowns and metrics from table name
|
|
759
|
+
# Supported formats:
|
|
760
|
+
# facebook_insights:breakdown_type
|
|
761
|
+
# facebook_insights:breakdown_type:metric1,metric2...
|
|
762
|
+
parts = table.split(":")
|
|
763
|
+
|
|
764
|
+
if len(parts) < 2 or len(parts) > 3:
|
|
765
|
+
raise ValueError(
|
|
766
|
+
"Invalid facebook_insights format. Expected: facebook_insights:breakdown_type or facebook_insights:breakdown_type:metric1,metric2..."
|
|
767
|
+
)
|
|
768
|
+
|
|
769
|
+
breakdown_type = parts[1].strip()
|
|
770
|
+
if not breakdown_type:
|
|
771
|
+
raise ValueError(
|
|
772
|
+
"Breakdown type must be provided in format: facebook_insights:breakdown_type"
|
|
773
|
+
)
|
|
774
|
+
|
|
775
|
+
# Validate breakdown type against available options from settings
|
|
776
|
+
import typing
|
|
777
|
+
|
|
778
|
+
from ingestr.src.facebook_ads.settings import TInsightsBreakdownOptions
|
|
779
|
+
|
|
780
|
+
# Get valid breakdown options from the type definition
|
|
781
|
+
valid_breakdowns = list(typing.get_args(TInsightsBreakdownOptions))
|
|
782
|
+
|
|
783
|
+
if breakdown_type not in valid_breakdowns:
|
|
784
|
+
raise ValueError(
|
|
785
|
+
f"Invalid breakdown type '{breakdown_type}'. Valid options: {', '.join(valid_breakdowns)}"
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
source_kwargs = {
|
|
789
|
+
"access_token": access_token[0],
|
|
790
|
+
"account_id": account_id[0],
|
|
791
|
+
"start_date": kwargs.get("interval_start"),
|
|
792
|
+
"end_date": kwargs.get("interval_end"),
|
|
793
|
+
"breakdowns": breakdown_type,
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
# If custom metrics are provided, parse them
|
|
797
|
+
if len(parts) == 3:
|
|
798
|
+
fields = [f.strip() for f in parts[2].split(",") if f.strip()]
|
|
799
|
+
if not fields:
|
|
800
|
+
raise ValueError(
|
|
801
|
+
"Custom metrics must be provided after the second colon in format: facebook_insights:breakdown_type:metric1,metric2..."
|
|
802
|
+
)
|
|
803
|
+
source_kwargs["fields"] = fields
|
|
804
|
+
|
|
805
|
+
return facebook_insights_source(**source_kwargs).with_resources(
|
|
806
|
+
"facebook_insights"
|
|
807
|
+
)
|
|
755
808
|
else:
|
|
756
809
|
raise ValueError(
|
|
757
810
|
f"Resource '{table}' is not supported for Facebook Ads source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
@@ -1309,17 +1362,25 @@ class S3Source:
|
|
|
1309
1362
|
secret=secret_access_key[0],
|
|
1310
1363
|
)
|
|
1311
1364
|
|
|
1312
|
-
|
|
1313
|
-
if
|
|
1314
|
-
endpoint = "
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1365
|
+
endpoint: Optional[str] = None
|
|
1366
|
+
if "#" in table:
|
|
1367
|
+
_, endpoint = table.split("#")
|
|
1368
|
+
if endpoint not in ["csv", "jsonl", "parquet"]:
|
|
1369
|
+
raise ValueError(
|
|
1370
|
+
"S3 Source only supports specific formats files: csv, jsonl, parquet"
|
|
1371
|
+
)
|
|
1372
|
+
endpoint = f"read_{endpoint}"
|
|
1319
1373
|
else:
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1374
|
+
try:
|
|
1375
|
+
endpoint = blob.parse_endpoint(path_to_file)
|
|
1376
|
+
except blob.UnsupportedEndpointError:
|
|
1377
|
+
raise ValueError(
|
|
1378
|
+
"S3 Source only supports specific formats files: csv, jsonl, parquet"
|
|
1379
|
+
)
|
|
1380
|
+
except Exception as e:
|
|
1381
|
+
raise ValueError(
|
|
1382
|
+
f"Failed to parse endpoint from path: {path_to_file}"
|
|
1383
|
+
) from e
|
|
1323
1384
|
|
|
1324
1385
|
from ingestr.src.filesystem import readers
|
|
1325
1386
|
|
|
@@ -1791,17 +1852,16 @@ class GCSSource:
|
|
|
1791
1852
|
token=credentials,
|
|
1792
1853
|
)
|
|
1793
1854
|
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
elif file_extension == "jsonl":
|
|
1798
|
-
endpoint = "read_jsonl"
|
|
1799
|
-
elif file_extension == "parquet":
|
|
1800
|
-
endpoint = "read_parquet"
|
|
1801
|
-
else:
|
|
1855
|
+
try:
|
|
1856
|
+
endpoint = blob.parse_endpoint(path_to_file)
|
|
1857
|
+
except blob.UnsupportedEndpointError:
|
|
1802
1858
|
raise ValueError(
|
|
1803
|
-
"
|
|
1859
|
+
"S3 Source only supports specific formats files: csv, jsonl, parquet"
|
|
1804
1860
|
)
|
|
1861
|
+
except Exception as e:
|
|
1862
|
+
raise ValueError(
|
|
1863
|
+
f"Failed to parse endpoint from path: {path_to_file}"
|
|
1864
|
+
) from e
|
|
1805
1865
|
|
|
1806
1866
|
from ingestr.src.filesystem import readers
|
|
1807
1867
|
|
|
@@ -2339,6 +2399,47 @@ class FreshdeskSource:
|
|
|
2339
2399
|
).with_resources(table)
|
|
2340
2400
|
|
|
2341
2401
|
|
|
2402
|
+
class TrustpilotSource:
|
|
2403
|
+
# trustpilot://<business_unit_id>?api_key=<api_key>
|
|
2404
|
+
def handles_incrementality(self) -> bool:
|
|
2405
|
+
return True
|
|
2406
|
+
|
|
2407
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
2408
|
+
parsed_uri = urlparse(uri)
|
|
2409
|
+
business_unit_id = parsed_uri.netloc
|
|
2410
|
+
params = parse_qs(parsed_uri.query)
|
|
2411
|
+
|
|
2412
|
+
if not business_unit_id:
|
|
2413
|
+
raise MissingValueError("business_unit_id", "Trustpilot")
|
|
2414
|
+
|
|
2415
|
+
api_key = params.get("api_key")
|
|
2416
|
+
if api_key is None:
|
|
2417
|
+
raise MissingValueError("api_key", "Trustpilot")
|
|
2418
|
+
|
|
2419
|
+
start_date = kwargs.get("interval_start")
|
|
2420
|
+
if start_date is None:
|
|
2421
|
+
start_date = ensure_pendulum_datetime("2000-01-01").in_tz("UTC").isoformat()
|
|
2422
|
+
else:
|
|
2423
|
+
start_date = ensure_pendulum_datetime(start_date).in_tz("UTC").isoformat()
|
|
2424
|
+
|
|
2425
|
+
end_date = kwargs.get("interval_end")
|
|
2426
|
+
|
|
2427
|
+
if end_date is not None:
|
|
2428
|
+
end_date = ensure_pendulum_datetime(end_date).in_tz("UTC").isoformat()
|
|
2429
|
+
|
|
2430
|
+
if table not in ["reviews"]:
|
|
2431
|
+
raise UnsupportedResourceError(table, "Trustpilot")
|
|
2432
|
+
|
|
2433
|
+
from ingestr.src.trustpilot import trustpilot_source
|
|
2434
|
+
|
|
2435
|
+
return trustpilot_source(
|
|
2436
|
+
business_unit_id=business_unit_id,
|
|
2437
|
+
api_key=api_key[0],
|
|
2438
|
+
start_date=start_date,
|
|
2439
|
+
end_date=end_date,
|
|
2440
|
+
).with_resources(table)
|
|
2441
|
+
|
|
2442
|
+
|
|
2342
2443
|
class PhantombusterSource:
|
|
2343
2444
|
def handles_incrementality(self) -> bool:
|
|
2344
2445
|
return True
|
|
@@ -2569,18 +2670,15 @@ class SFTPSource:
|
|
|
2569
2670
|
else:
|
|
2570
2671
|
file_glob = f"/{table}"
|
|
2571
2672
|
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
|
|
2575
|
-
endpoint = "read_csv"
|
|
2576
|
-
elif file_extension == "jsonl":
|
|
2577
|
-
endpoint = "read_jsonl"
|
|
2578
|
-
elif file_extension == "parquet":
|
|
2579
|
-
endpoint = "read_parquet"
|
|
2580
|
-
else:
|
|
2673
|
+
try:
|
|
2674
|
+
endpoint = blob.parse_endpoint(table)
|
|
2675
|
+
except blob.UnsupportedEndpointError:
|
|
2581
2676
|
raise ValueError(
|
|
2582
|
-
"
|
|
2677
|
+
"SFTP Source only supports specific formats files: csv, jsonl, parquet"
|
|
2583
2678
|
)
|
|
2679
|
+
except Exception as e:
|
|
2680
|
+
raise ValueError(f"Failed to parse endpoint from path: {table}") from e
|
|
2681
|
+
|
|
2584
2682
|
from ingestr.src.filesystem import readers
|
|
2585
2683
|
|
|
2586
2684
|
dlt_source_resource = readers(bucket_url, fs, file_glob)
|
|
@@ -85,12 +85,14 @@ def incremental_stripe_source(
|
|
|
85
85
|
created: Optional[Any] = dlt.sources.incremental(
|
|
86
86
|
"created",
|
|
87
87
|
initial_value=start_date_unix,
|
|
88
|
+
end_value=transform_date(end_date) if end_date is not None else None,
|
|
88
89
|
range_end="closed",
|
|
89
90
|
range_start="closed",
|
|
90
91
|
),
|
|
91
92
|
) -> Generator[Dict[Any, Any], Any, None]:
|
|
92
|
-
|
|
93
|
-
|
|
93
|
+
yield from pagination(
|
|
94
|
+
endpoint, start_date=created.last_value, end_date=created.end_value
|
|
95
|
+
)
|
|
94
96
|
|
|
95
97
|
for endpoint in endpoints:
|
|
96
98
|
yield dlt.resource(
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Trustpilot source for ingesting reviews."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Generator, Iterable
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
import pendulum
|
|
7
|
+
from dlt.sources import DltResource
|
|
8
|
+
|
|
9
|
+
from .client import TrustpilotClient
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dlt.source()
|
|
13
|
+
def trustpilot_source(
|
|
14
|
+
business_unit_id: str,
|
|
15
|
+
start_date: str,
|
|
16
|
+
end_date: str | None,
|
|
17
|
+
api_key: str,
|
|
18
|
+
per_page: int = 1000,
|
|
19
|
+
) -> Iterable[DltResource]:
|
|
20
|
+
"""Return resources for Trustpilot."""
|
|
21
|
+
|
|
22
|
+
client = TrustpilotClient(api_key=api_key)
|
|
23
|
+
|
|
24
|
+
@dlt.resource(name="reviews", write_disposition="merge", primary_key="id")
|
|
25
|
+
def reviews(
|
|
26
|
+
dateTime=(
|
|
27
|
+
dlt.sources.incremental(
|
|
28
|
+
"updated_at",
|
|
29
|
+
initial_value=start_date,
|
|
30
|
+
end_value=end_date,
|
|
31
|
+
range_start="closed",
|
|
32
|
+
range_end="closed",
|
|
33
|
+
)
|
|
34
|
+
),
|
|
35
|
+
) -> Generator[Dict[str, Any], None, None]:
|
|
36
|
+
if end_date is None:
|
|
37
|
+
end_dt = pendulum.now(tz="UTC").isoformat()
|
|
38
|
+
else:
|
|
39
|
+
end_dt = dateTime.end_value
|
|
40
|
+
start_dt = dateTime.last_value
|
|
41
|
+
yield from client.paginated_reviews(
|
|
42
|
+
business_unit_id=business_unit_id,
|
|
43
|
+
per_page=per_page,
|
|
44
|
+
updated_since=start_dt,
|
|
45
|
+
end_date=end_dt,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
yield reviews
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Simple Trustpilot API client."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Iterable
|
|
4
|
+
|
|
5
|
+
import pendulum
|
|
6
|
+
from dlt.sources.helpers import requests
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TrustpilotClient:
|
|
10
|
+
"""Client for the Trustpilot public API."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, api_key: str) -> None:
|
|
13
|
+
self.api_key = api_key
|
|
14
|
+
self.base_url = "https://api.trustpilot.com/v1"
|
|
15
|
+
|
|
16
|
+
def _get(self, endpoint: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
|
17
|
+
params = dict(params)
|
|
18
|
+
params["apikey"] = self.api_key
|
|
19
|
+
response = requests.get(f"{self.base_url}{endpoint}", params=params)
|
|
20
|
+
response.raise_for_status()
|
|
21
|
+
return response.json()
|
|
22
|
+
|
|
23
|
+
def paginated_reviews(
|
|
24
|
+
self,
|
|
25
|
+
business_unit_id: str,
|
|
26
|
+
updated_since: str,
|
|
27
|
+
end_date: str,
|
|
28
|
+
per_page: int = 1000,
|
|
29
|
+
) -> Iterable[Dict[str, Any]]:
|
|
30
|
+
page = 1
|
|
31
|
+
while True:
|
|
32
|
+
params: Dict[str, Any] = {"perPage": per_page, "page": page}
|
|
33
|
+
if updated_since:
|
|
34
|
+
params["updatedSince"] = updated_since
|
|
35
|
+
data = self._get(f"/business-units/{business_unit_id}/reviews", params)
|
|
36
|
+
reviews = data.get("reviews", data)
|
|
37
|
+
if not reviews:
|
|
38
|
+
break
|
|
39
|
+
for review in reviews:
|
|
40
|
+
end_date_dt = pendulum.parse(end_date)
|
|
41
|
+
review["updated_at"] = review["updatedAt"]
|
|
42
|
+
review_dt = pendulum.parse(review["updated_at"])
|
|
43
|
+
if review_dt > end_date_dt: # type: ignore
|
|
44
|
+
continue
|
|
45
|
+
yield review
|
|
46
|
+
if len(reviews) < per_page:
|
|
47
|
+
break
|
|
48
|
+
page += 1
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.56
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
|
|
2
2
|
ingestr/main.py,sha256=GkC1hdq8AVGrvolc95zMfjmibI95p2pmFkbgCOVf-Og,26311
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
|
-
ingestr/src/blob.py,sha256=
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
4
|
+
ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=xHWz596_bblLkASY5eAURBFkKuYtb-7IoI3_4X9OIZM,21
|
|
6
6
|
ingestr/src/destinations.py,sha256=TcxM2rcwHfgMMP6U0yRNcfWKlEzkBbZbqCIDww7lkTY,16882
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
|
-
ingestr/src/factory.py,sha256=
|
|
8
|
+
ingestr/src/factory.py,sha256=R7KzGRQ9tYZ_N-daD9OtnEp0K-DrsP8bUyXWdv4LV4A,6200
|
|
9
9
|
ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
|
|
10
10
|
ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
|
|
11
11
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
12
12
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
13
13
|
ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
|
|
14
|
-
ingestr/src/sources.py,sha256=
|
|
14
|
+
ingestr/src/sources.py,sha256=_1iodwR8UC0MtlnJr6y45eMWCcUwXKXSqJMzYsBizXo,95759
|
|
15
15
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
16
16
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
17
17
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -39,10 +39,11 @@ ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k
|
|
|
39
39
|
ingestr/src/collector/spinner.py,sha256=_ZUqF5MI43hVIULdjF5s5mrAZbhEFXaiWirQmrv3Yk4,1201
|
|
40
40
|
ingestr/src/dynamodb/__init__.py,sha256=swhxkeYBbJ35jn1IghCtvYWT2BM33KynVCh_oR4z28A,2264
|
|
41
41
|
ingestr/src/elasticsearch/__init__.py,sha256=m-q93HgUmTwGDUwHOjHawstWL06TC3WIX3H05szybrY,2556
|
|
42
|
-
ingestr/src/facebook_ads/__init__.py,sha256=
|
|
42
|
+
ingestr/src/facebook_ads/__init__.py,sha256=a1A5fO1r_FotoH9UET42tamqo_-ftCm9vBrkm5lpjG0,9579
|
|
43
43
|
ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
|
|
44
|
-
ingestr/src/facebook_ads/helpers.py,sha256=
|
|
45
|
-
ingestr/src/facebook_ads/settings.py,sha256=
|
|
44
|
+
ingestr/src/facebook_ads/helpers.py,sha256=EYqOAPUlhVNxwzjP_CUGjJvAXTq65nJC-v75BfyJKmg,8981
|
|
45
|
+
ingestr/src/facebook_ads/settings.py,sha256=Bsic8RcmH-NfEZ7r_NGospTCmwISK9XaMT5y2NZirtg,4938
|
|
46
|
+
ingestr/src/facebook_ads/utils.py,sha256=ES2ylPoW3j3fjp6OMUgp21n1cG1OktXsmWWMk5vBW_I,1590
|
|
46
47
|
ingestr/src/filesystem/__init__.py,sha256=zkIwbRr0ir0EUdniI25p2zGiVc-7M9EmR351AjNb0eA,4163
|
|
47
48
|
ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
|
|
48
49
|
ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
|
|
@@ -116,13 +117,15 @@ ingestr/src/solidgate/__init__.py,sha256=JdaXvAu5QGuf9-FY294vwCQCEmfrqIld9oqbzqC
|
|
|
116
117
|
ingestr/src/solidgate/helpers.py,sha256=oePEc9nnvmN3IaKrfJCvyKL79xdGM0-gRTN3-8tY4Fc,4952
|
|
117
118
|
ingestr/src/sql_database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
119
|
ingestr/src/sql_database/callbacks.py,sha256=sEFFmXxAURY3yeBjnawigDtq9LBCvi8HFqG4kLd7tMU,2002
|
|
119
|
-
ingestr/src/stripe_analytics/__init__.py,sha256=
|
|
120
|
+
ingestr/src/stripe_analytics/__init__.py,sha256=j3Vmvo8G75fJJIF4rUnpGliGTpYQZt372wo-AjGImYs,4581
|
|
120
121
|
ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu69vAJoTxgEo,1976
|
|
121
122
|
ingestr/src/stripe_analytics/settings.py,sha256=ZahhZg3Sq2KnvnDcfSaXO494Csy3tElBDEHnvA1AVmA,2461
|
|
122
123
|
ingestr/src/telemetry/event.py,sha256=W7bs4uVfPakQ5otmiqgqu1l5SqjYx1p87wudnWXckBc,949
|
|
123
124
|
ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
|
|
124
125
|
ingestr/src/tiktok_ads/__init__.py,sha256=aEqCl3dTH6_d43s1jgAeG1UasEls_SlorORulYMwIL8,4590
|
|
125
126
|
ingestr/src/tiktok_ads/tiktok_helpers.py,sha256=jmWHvZzN1Vt_PWrJkgq5a2wIwon-OBEzXoZx0jEy-74,3905
|
|
127
|
+
ingestr/src/trustpilot/__init__.py,sha256=ofhjep4qRPIi8q41qc97QVex8UbWF-Fd7gUsqeQlQX8,1279
|
|
128
|
+
ingestr/src/trustpilot/client.py,sha256=zKYt5C7nrR83Id0KN49EPmtml8MEtlSPlAosEFU3VXY,1616
|
|
126
129
|
ingestr/src/zendesk/__init__.py,sha256=tmJ_jdb6kpwmEKpcv6Im71-bOZI6h-Tcofe18OH4I24,17762
|
|
127
130
|
ingestr/src/zendesk/settings.py,sha256=Vdj706nTJFQ-3KH4nO97iYCQuba3dV3E9gfnmLK6xwU,2294
|
|
128
131
|
ingestr/src/zendesk/helpers/__init__.py,sha256=YTJejCiUjfIcsj9FrkY0l-JGYDI7RRte1Ydq5FDH_0c,888
|
|
@@ -138,8 +141,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
|
|
|
138
141
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
139
142
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
140
143
|
ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
|
|
141
|
-
ingestr-0.13.
|
|
142
|
-
ingestr-0.13.
|
|
143
|
-
ingestr-0.13.
|
|
144
|
-
ingestr-0.13.
|
|
145
|
-
ingestr-0.13.
|
|
144
|
+
ingestr-0.13.56.dist-info/METADATA,sha256=YleGPh8oMkcEXHKFXIHIgSyUeu9p53rkynuyC4uiKMw,15131
|
|
145
|
+
ingestr-0.13.56.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
146
|
+
ingestr-0.13.56.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
147
|
+
ingestr-0.13.56.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
148
|
+
ingestr-0.13.56.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|