ingestr 0.13.37__py3-none-any.whl → 0.13.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +2 -0
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/destinations.py +70 -0
- ingestr/src/factory.py +4 -2
- ingestr/src/frankfurter/__init__.py +25 -12
- ingestr/src/frankfurter/helpers.py +16 -0
- ingestr/src/google_analytics/helpers.py +12 -9
- ingestr/src/phantombuster/__init__.py +32 -5
- ingestr/src/phantombuster/client.py +33 -11
- ingestr/src/sources.py +58 -32
- {ingestr-0.13.37.dist-info → ingestr-0.13.39.dist-info}/METADATA +2 -2
- {ingestr-0.13.37.dist-info → ingestr-0.13.39.dist-info}/RECORD +15 -15
- {ingestr-0.13.37.dist-info → ingestr-0.13.39.dist-info}/WHEEL +0 -0
- {ingestr-0.13.37.dist-info → ingestr-0.13.39.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.37.dist-info → ingestr-0.13.39.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -34,6 +34,7 @@ PARQUET_SUPPORTED_DESTINATIONS = [
|
|
|
34
34
|
"snowflake",
|
|
35
35
|
"databricks",
|
|
36
36
|
"synapse",
|
|
37
|
+
"s3",
|
|
37
38
|
]
|
|
38
39
|
|
|
39
40
|
# these sources would return a JSON for sure, which means they cannot be used with Parquet loader for BigQuery
|
|
@@ -485,6 +486,7 @@ def ingest(
|
|
|
485
486
|
print(
|
|
486
487
|
f"[bold yellow] Primary Key:[/bold yellow] {primary_key if primary_key else 'None'}"
|
|
487
488
|
)
|
|
489
|
+
print(f"[bold yellow] Pipeline ID:[/bold yellow] {m.hexdigest()}")
|
|
488
490
|
print()
|
|
489
491
|
|
|
490
492
|
if not yes:
|
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.39"
|
ingestr/src/destinations.py
CHANGED
|
@@ -7,11 +7,13 @@ import tempfile
|
|
|
7
7
|
from urllib.parse import parse_qs, quote, urlparse
|
|
8
8
|
|
|
9
9
|
import dlt
|
|
10
|
+
import dlt.destinations.impl.filesystem.filesystem
|
|
10
11
|
from dlt.common.configuration.specs import AwsCredentials
|
|
11
12
|
from dlt.destinations.impl.clickhouse.configuration import (
|
|
12
13
|
ClickHouseCredentials,
|
|
13
14
|
)
|
|
14
15
|
|
|
16
|
+
from ingestr.src.errors import MissingValueError
|
|
15
17
|
from ingestr.src.loader import load_dlt_file
|
|
16
18
|
|
|
17
19
|
|
|
@@ -382,3 +384,71 @@ class ClickhouseDestination:
|
|
|
382
384
|
|
|
383
385
|
def post_load(self):
|
|
384
386
|
pass
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
class S3FSClient(dlt.destinations.impl.filesystem.filesystem.FilesystemClient):
|
|
390
|
+
@property
|
|
391
|
+
def dataset_path(self):
|
|
392
|
+
# override to remove dataset path
|
|
393
|
+
return self.bucket_path
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
class S3FS(dlt.destinations.filesystem):
|
|
397
|
+
@property
|
|
398
|
+
def client_class(self):
|
|
399
|
+
return S3FSClient
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
class S3Destination:
|
|
403
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
404
|
+
parsed_uri = urlparse(uri)
|
|
405
|
+
params = parse_qs(parsed_uri.query)
|
|
406
|
+
|
|
407
|
+
access_key_id = params.get("access_key_id", [None])[0]
|
|
408
|
+
if access_key_id is None:
|
|
409
|
+
raise MissingValueError("access_key_id", "S3")
|
|
410
|
+
|
|
411
|
+
secret_access_key = params.get("secret_access_key", [None])[0]
|
|
412
|
+
if secret_access_key is None:
|
|
413
|
+
raise MissingValueError("secret_access_key", "S3")
|
|
414
|
+
|
|
415
|
+
endpoint_url = params.get("endpoint_url", [None])[0]
|
|
416
|
+
|
|
417
|
+
creds = AwsCredentials(
|
|
418
|
+
aws_access_key_id=access_key_id,
|
|
419
|
+
aws_secret_access_key=secret_access_key,
|
|
420
|
+
endpoint_url=endpoint_url,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
dest_table = self.validate_table(kwargs["dest_table"])
|
|
424
|
+
table_parts = dest_table.split("/")
|
|
425
|
+
base_path = "/".join(table_parts[:-1])
|
|
426
|
+
|
|
427
|
+
opts = {
|
|
428
|
+
"bucket_url": f"s3://{base_path}",
|
|
429
|
+
"credentials": creds,
|
|
430
|
+
# supresses dlt warnings about dataset name normalization.
|
|
431
|
+
# we don't use dataset names in S3 so it's fine to disable this.
|
|
432
|
+
"enable_dataset_name_normalization": False,
|
|
433
|
+
}
|
|
434
|
+
layout = params.get("layout", [None])[0]
|
|
435
|
+
if layout is not None:
|
|
436
|
+
opts["layout"] = layout
|
|
437
|
+
|
|
438
|
+
return S3FS(**opts) # type: ignore
|
|
439
|
+
|
|
440
|
+
def validate_table(self, table: str):
|
|
441
|
+
table = table.strip("/ ")
|
|
442
|
+
if len(table.split("/")) < 2:
|
|
443
|
+
raise ValueError("Table name must be in the format {bucket-name}/{path}")
|
|
444
|
+
return table
|
|
445
|
+
|
|
446
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs):
|
|
447
|
+
table = self.validate_table(table)
|
|
448
|
+
table_parts = table.split("/")
|
|
449
|
+
return {
|
|
450
|
+
"table_name": table_parts[-1],
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
def post_load(self) -> None:
|
|
454
|
+
pass
|
ingestr/src/factory.py
CHANGED
|
@@ -13,6 +13,7 @@ from ingestr.src.destinations import (
|
|
|
13
13
|
MsSQLDestination,
|
|
14
14
|
PostgresDestination,
|
|
15
15
|
RedshiftDestination,
|
|
16
|
+
S3Destination,
|
|
16
17
|
SnowflakeDestination,
|
|
17
18
|
SynapseDestination,
|
|
18
19
|
)
|
|
@@ -29,6 +30,7 @@ from ingestr.src.sources import (
|
|
|
29
30
|
DynamoDBSource,
|
|
30
31
|
FacebookAdsSource,
|
|
31
32
|
FrankfurterSource,
|
|
33
|
+
FreshdeskSource,
|
|
32
34
|
GCSSource,
|
|
33
35
|
GitHubSource,
|
|
34
36
|
GoogleAdsSource,
|
|
@@ -44,6 +46,7 @@ from ingestr.src.sources import (
|
|
|
44
46
|
MongoDbSource,
|
|
45
47
|
NotionSource,
|
|
46
48
|
PersonioSource,
|
|
49
|
+
PhantombusterSource,
|
|
47
50
|
PipedriveSource,
|
|
48
51
|
S3Source,
|
|
49
52
|
SalesforceSource,
|
|
@@ -53,8 +56,6 @@ from ingestr.src.sources import (
|
|
|
53
56
|
StripeAnalyticsSource,
|
|
54
57
|
TikTokSource,
|
|
55
58
|
ZendeskSource,
|
|
56
|
-
FreshdeskSource,
|
|
57
|
-
PhantombusterSource,
|
|
58
59
|
)
|
|
59
60
|
|
|
60
61
|
SQL_SOURCE_SCHEMES = [
|
|
@@ -170,6 +171,7 @@ class SourceDestinationFactory:
|
|
|
170
171
|
"athena": AthenaDestination,
|
|
171
172
|
"clickhouse+native": ClickhouseDestination,
|
|
172
173
|
"clickhouse": ClickhouseDestination,
|
|
174
|
+
"s3": S3Destination,
|
|
173
175
|
}
|
|
174
176
|
|
|
175
177
|
def __init__(self, source_uri: str, destination_uri: str):
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, Iterator
|
|
1
|
+
from typing import Any, Iterator, Optional
|
|
2
2
|
|
|
3
3
|
import dlt
|
|
4
4
|
from dlt.common.pendulum import pendulum
|
|
@@ -15,13 +15,13 @@ from ingestr.src.frankfurter.helpers import get_path_with_retry
|
|
|
15
15
|
def frankfurter_source(
|
|
16
16
|
start_date: TAnyDateTime,
|
|
17
17
|
end_date: TAnyDateTime,
|
|
18
|
+
base_currency: str,
|
|
18
19
|
) -> Any:
|
|
19
20
|
"""
|
|
20
21
|
A dlt source for the frankfurter.dev API. It groups several resources (in this case frankfurter.dev API endpoints) containing
|
|
21
22
|
various types of data: currencies, latest rates, historical rates.
|
|
22
23
|
"""
|
|
23
24
|
date_time = dlt.sources.incremental(
|
|
24
|
-
|
|
25
25
|
"date",
|
|
26
26
|
initial_value=start_date,
|
|
27
27
|
end_value=end_date,
|
|
@@ -31,9 +31,10 @@ def frankfurter_source(
|
|
|
31
31
|
|
|
32
32
|
return (
|
|
33
33
|
currencies(),
|
|
34
|
-
latest(),
|
|
35
|
-
exchange_rates(
|
|
36
|
-
|
|
34
|
+
latest(base_currency=base_currency),
|
|
35
|
+
exchange_rates(
|
|
36
|
+
start_date=date_time, end_date=end_date, base_currency=base_currency
|
|
37
|
+
),
|
|
37
38
|
)
|
|
38
39
|
|
|
39
40
|
|
|
@@ -61,29 +62,33 @@ def currencies() -> Iterator[dict]:
|
|
|
61
62
|
"date": {"data_type": "text"},
|
|
62
63
|
"currency_code": {"data_type": "text"},
|
|
63
64
|
"rate": {"data_type": "double"},
|
|
65
|
+
"base_currency": {"data_type": "text"},
|
|
64
66
|
},
|
|
65
|
-
primary_key=["date", "currency_code"],
|
|
67
|
+
primary_key=["date", "currency_code", "base_currency"],
|
|
66
68
|
)
|
|
67
|
-
def latest() -> Iterator[dict]:
|
|
69
|
+
def latest(base_currency: Optional[str] = "") -> Iterator[dict]:
|
|
68
70
|
"""
|
|
69
71
|
Fetches the latest exchange rates and yields them as rows.
|
|
70
72
|
"""
|
|
71
73
|
# Base URL
|
|
72
74
|
url = "latest?"
|
|
73
75
|
|
|
76
|
+
if base_currency:
|
|
77
|
+
url += f"base={base_currency}"
|
|
78
|
+
|
|
74
79
|
# Fetch data
|
|
75
80
|
data = get_path_with_retry(url)
|
|
76
81
|
|
|
77
82
|
# Extract rates and base currency
|
|
78
83
|
rates = data["rates"]
|
|
79
|
-
|
|
80
84
|
date = pendulum.parse(data["date"])
|
|
81
85
|
|
|
82
|
-
# Add the base currency
|
|
86
|
+
# Add the base currency with a rate of 1.0
|
|
83
87
|
yield {
|
|
84
88
|
"date": date,
|
|
85
|
-
"currency_code":
|
|
89
|
+
"currency_code": base_currency,
|
|
86
90
|
"rate": 1.0,
|
|
91
|
+
"base_currency": base_currency,
|
|
87
92
|
}
|
|
88
93
|
|
|
89
94
|
# Add all currencies and their rates
|
|
@@ -92,6 +97,7 @@ def latest() -> Iterator[dict]:
|
|
|
92
97
|
"date": date,
|
|
93
98
|
"currency_code": currency_code,
|
|
94
99
|
"rate": rate,
|
|
100
|
+
"base_currency": base_currency,
|
|
95
101
|
}
|
|
96
102
|
|
|
97
103
|
|
|
@@ -101,12 +107,14 @@ def latest() -> Iterator[dict]:
|
|
|
101
107
|
"date": {"data_type": "text"},
|
|
102
108
|
"currency_code": {"data_type": "text"},
|
|
103
109
|
"rate": {"data_type": "double"},
|
|
110
|
+
"base_currency": {"data_type": "text"},
|
|
104
111
|
},
|
|
105
|
-
primary_key=("date", "currency_code"),
|
|
112
|
+
primary_key=("date", "currency_code", "base_currency"),
|
|
106
113
|
)
|
|
107
114
|
def exchange_rates(
|
|
108
115
|
end_date: TAnyDateTime,
|
|
109
116
|
start_date: dlt.sources.incremental[TAnyDateTime] = dlt.sources.incremental("date"),
|
|
117
|
+
base_currency: Optional[str] = "",
|
|
110
118
|
) -> Iterator[dict]:
|
|
111
119
|
"""
|
|
112
120
|
Fetches exchange rates for a specified date range.
|
|
@@ -124,6 +132,9 @@ def exchange_rates(
|
|
|
124
132
|
# Compose the URL
|
|
125
133
|
url = f"{start_date_str}..{end_date_str}?"
|
|
126
134
|
|
|
135
|
+
if base_currency:
|
|
136
|
+
url += f"base={base_currency}"
|
|
137
|
+
|
|
127
138
|
# Fetch data from the API
|
|
128
139
|
data = get_path_with_retry(url)
|
|
129
140
|
|
|
@@ -137,8 +148,9 @@ def exchange_rates(
|
|
|
137
148
|
# Add the base currency with a rate of 1.0
|
|
138
149
|
yield {
|
|
139
150
|
"date": formatted_date,
|
|
140
|
-
"currency_code":
|
|
151
|
+
"currency_code": base_currency,
|
|
141
152
|
"rate": 1.0,
|
|
153
|
+
"base_currency": base_currency,
|
|
142
154
|
}
|
|
143
155
|
|
|
144
156
|
# Add all other currencies and their rates
|
|
@@ -147,4 +159,5 @@ def exchange_rates(
|
|
|
147
159
|
"date": formatted_date,
|
|
148
160
|
"currency_code": currency_code,
|
|
149
161
|
"rate": rate,
|
|
162
|
+
"base_currency": base_currency,
|
|
150
163
|
}
|
|
@@ -30,3 +30,19 @@ def validate_dates(start_date: datetime, end_date: datetime) -> None:
|
|
|
30
30
|
# Check if start_date is before end_date
|
|
31
31
|
if start_date > end_date:
|
|
32
32
|
raise ValueError("Interval-end cannot be before interval-start.")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def validate_currency(currency_code: str) -> bool:
|
|
36
|
+
url = "https://api.frankfurter.dev/v1/currencies"
|
|
37
|
+
|
|
38
|
+
response = requests.get(url, timeout=5)
|
|
39
|
+
currencies = response.json()
|
|
40
|
+
|
|
41
|
+
if currency_code.upper() in currencies:
|
|
42
|
+
return True
|
|
43
|
+
else:
|
|
44
|
+
supported_currencies = list(currencies.keys())
|
|
45
|
+
print(
|
|
46
|
+
f"Invalid base currency '{currency_code}'. Supported currencies are: {supported_currencies}"
|
|
47
|
+
)
|
|
48
|
+
return False
|
|
@@ -149,7 +149,7 @@ def get_report(
|
|
|
149
149
|
|
|
150
150
|
# process request
|
|
151
151
|
processed_response_generator = process_report(response=response)
|
|
152
|
-
|
|
152
|
+
|
|
153
153
|
# import pdb; pdb.set_trace()
|
|
154
154
|
yield from processed_response_generator
|
|
155
155
|
offset += per_page
|
|
@@ -225,7 +225,9 @@ def _resolve_dimension_value(dimension_name: str, dimension_value: str) -> Any:
|
|
|
225
225
|
return dimension_value
|
|
226
226
|
|
|
227
227
|
|
|
228
|
-
def convert_minutes_ranges_to_minute_range_objects(
|
|
228
|
+
def convert_minutes_ranges_to_minute_range_objects(
|
|
229
|
+
minutes_ranges: str,
|
|
230
|
+
) -> List[MinuteRange]:
|
|
229
231
|
minutes_ranges = minutes_ranges.strip()
|
|
230
232
|
minutes = minutes_ranges.replace(" ", "").split(",")
|
|
231
233
|
if minutes == "":
|
|
@@ -233,7 +235,6 @@ def convert_minutes_ranges_to_minute_range_objects(minutes_ranges: str) -> List[
|
|
|
233
235
|
"Invalid input. Minutes range should be startminute-endminute format. For example: 1-2,5-6"
|
|
234
236
|
)
|
|
235
237
|
|
|
236
|
-
|
|
237
238
|
minute_range_objects = []
|
|
238
239
|
for min_range in minutes:
|
|
239
240
|
if "-" not in min_range:
|
|
@@ -246,14 +247,16 @@ def convert_minutes_ranges_to_minute_range_objects(minutes_ranges: str) -> List[
|
|
|
246
247
|
raise ValueError(
|
|
247
248
|
f"Invalid input '{min_range}'. Both start and end minutes must be digits. For example: 1-2,5-6"
|
|
248
249
|
)
|
|
249
|
-
|
|
250
|
+
|
|
250
251
|
end_minutes_ago = int(parts[0])
|
|
251
252
|
start_minutes_ago = int(parts[1])
|
|
252
|
-
minute_range_objects.append(
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
253
|
+
minute_range_objects.append(
|
|
254
|
+
MinuteRange(
|
|
255
|
+
name=f"{end_minutes_ago}-{start_minutes_ago} minutes ago",
|
|
256
|
+
start_minutes_ago=start_minutes_ago,
|
|
257
|
+
end_minutes_ago=end_minutes_ago,
|
|
258
|
+
)
|
|
259
|
+
)
|
|
257
260
|
|
|
258
261
|
return minute_range_objects
|
|
259
262
|
|
|
@@ -3,7 +3,7 @@ from typing import Iterable, Optional
|
|
|
3
3
|
import dlt
|
|
4
4
|
import pendulum
|
|
5
5
|
import requests
|
|
6
|
-
from dlt.common.typing import TDataItem
|
|
6
|
+
from dlt.common.typing import TAnyDateTime, TDataItem
|
|
7
7
|
from dlt.sources import DltResource
|
|
8
8
|
from dlt.sources.helpers.requests import Client
|
|
9
9
|
|
|
@@ -28,11 +28,38 @@ def create_client() -> requests.Session:
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
@dlt.source(max_table_nesting=0)
|
|
31
|
-
def phantombuster_source(
|
|
31
|
+
def phantombuster_source(
|
|
32
|
+
api_key: str, agent_id: str, start_date: TAnyDateTime, end_date: TAnyDateTime | None
|
|
33
|
+
) -> Iterable[DltResource]:
|
|
32
34
|
client = PhantombusterClient(api_key)
|
|
33
35
|
|
|
34
|
-
@dlt.resource(
|
|
35
|
-
|
|
36
|
-
|
|
36
|
+
@dlt.resource(
|
|
37
|
+
write_disposition="merge",
|
|
38
|
+
primary_key="container_id",
|
|
39
|
+
columns={
|
|
40
|
+
"partition_dt": {"data_type": "date", "partition": True},
|
|
41
|
+
},
|
|
42
|
+
)
|
|
43
|
+
def completed_phantoms(
|
|
44
|
+
dateTime=(
|
|
45
|
+
dlt.sources.incremental(
|
|
46
|
+
"ended_at",
|
|
47
|
+
initial_value=start_date,
|
|
48
|
+
end_value=end_date,
|
|
49
|
+
range_start="closed",
|
|
50
|
+
range_end="closed",
|
|
51
|
+
)
|
|
52
|
+
),
|
|
53
|
+
) -> Iterable[TDataItem]:
|
|
54
|
+
if dateTime.end_value is None:
|
|
55
|
+
end_dt = pendulum.now(tz="UTC")
|
|
56
|
+
else:
|
|
57
|
+
end_dt = dateTime.end_value
|
|
58
|
+
|
|
59
|
+
start_dt = dateTime.last_value
|
|
60
|
+
|
|
61
|
+
yield client.fetch_containers_result(
|
|
62
|
+
create_client(), agent_id, start_date=start_dt, end_date=end_dt
|
|
63
|
+
)
|
|
37
64
|
|
|
38
65
|
return completed_phantoms
|
|
@@ -14,12 +14,22 @@ class PhantombusterClient:
|
|
|
14
14
|
"accept": "application/json",
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
def fetch_containers_result(
|
|
17
|
+
def fetch_containers_result(
|
|
18
|
+
self,
|
|
19
|
+
session: requests.Session,
|
|
20
|
+
agent_id: str,
|
|
21
|
+
start_date: pendulum.DateTime,
|
|
22
|
+
end_date: pendulum.DateTime,
|
|
23
|
+
):
|
|
18
24
|
url = "https://api.phantombuster.com/api/v2/containers/fetch-all/"
|
|
19
25
|
before_ended_at = None
|
|
20
26
|
limit = 100
|
|
21
|
-
|
|
27
|
+
|
|
28
|
+
started_at = start_date.int_timestamp * 1000 + int(
|
|
29
|
+
start_date.microsecond / 1000
|
|
30
|
+
)
|
|
22
31
|
ended_at = end_date.int_timestamp * 1000 + int(end_date.microsecond / 1000)
|
|
32
|
+
|
|
23
33
|
while True:
|
|
24
34
|
params: dict[str, Union[str, int, float, bytes, None]] = {
|
|
25
35
|
"agentId": agent_id,
|
|
@@ -36,23 +46,35 @@ class PhantombusterClient:
|
|
|
36
46
|
|
|
37
47
|
for container in containers:
|
|
38
48
|
container_ended_at = container.get("endedAt")
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
49
|
+
|
|
50
|
+
if before_ended_at is None or before_ended_at > container_ended_at:
|
|
51
|
+
before_ended_at = container_ended_at
|
|
52
|
+
|
|
53
|
+
if container_ended_at < started_at or container_ended_at > ended_at:
|
|
43
54
|
continue
|
|
55
|
+
|
|
44
56
|
try:
|
|
45
57
|
result = self.fetch_result_object(session, container["id"])
|
|
46
|
-
partition_dt = pendulum.from_timestamp(
|
|
47
|
-
|
|
58
|
+
partition_dt = pendulum.from_timestamp(
|
|
59
|
+
container_ended_at / 1000, tz="UTC"
|
|
60
|
+
).date()
|
|
61
|
+
container_ended_at_datetime = pendulum.from_timestamp(
|
|
62
|
+
container_ended_at / 1000, tz="UTC"
|
|
63
|
+
)
|
|
64
|
+
row = {
|
|
65
|
+
"container_id": container["id"],
|
|
66
|
+
"container": container,
|
|
67
|
+
"result": result,
|
|
68
|
+
"partition_dt": partition_dt,
|
|
69
|
+
"ended_at": container_ended_at_datetime,
|
|
70
|
+
}
|
|
48
71
|
yield row
|
|
49
|
-
|
|
72
|
+
|
|
50
73
|
except requests.RequestException as e:
|
|
51
74
|
print(f"Error fetching result for container {container['id']}: {e}")
|
|
52
|
-
|
|
75
|
+
|
|
53
76
|
if data["maxLimitReached"] is False:
|
|
54
77
|
break
|
|
55
|
-
|
|
56
78
|
|
|
57
79
|
def fetch_result_object(self, session: requests.Session, container_id: str):
|
|
58
80
|
result_url = (
|
ingestr/src/sources.py
CHANGED
|
@@ -1492,7 +1492,9 @@ class GoogleAnalyticsSource:
|
|
|
1492
1492
|
|
|
1493
1493
|
minute_range_objects = []
|
|
1494
1494
|
if len(fields) == 4:
|
|
1495
|
-
minute_range_objects =
|
|
1495
|
+
minute_range_objects = (
|
|
1496
|
+
helpers.convert_minutes_ranges_to_minute_range_objects(fields[3])
|
|
1497
|
+
)
|
|
1496
1498
|
|
|
1497
1499
|
datetime = ""
|
|
1498
1500
|
resource_name = fields[0].lower()
|
|
@@ -2180,6 +2182,18 @@ class FrankfurterSource:
|
|
|
2180
2182
|
"Frankfurter takes care of incrementality on its own, you should not provide incremental_key"
|
|
2181
2183
|
)
|
|
2182
2184
|
|
|
2185
|
+
from ingestr.src.frankfurter import frankfurter_source
|
|
2186
|
+
from ingestr.src.frankfurter.helpers import validate_currency, validate_dates
|
|
2187
|
+
|
|
2188
|
+
parsed_uri = urlparse(uri)
|
|
2189
|
+
source_params = parse_qs(parsed_uri.query)
|
|
2190
|
+
base_currency = source_params.get("base", [None])[0]
|
|
2191
|
+
|
|
2192
|
+
if not base_currency:
|
|
2193
|
+
base_currency = "USD"
|
|
2194
|
+
|
|
2195
|
+
validate_currency(base_currency)
|
|
2196
|
+
|
|
2183
2197
|
if kwargs.get("interval_start"):
|
|
2184
2198
|
start_date = ensure_pendulum_datetime(str(kwargs.get("interval_start")))
|
|
2185
2199
|
if kwargs.get("interval_end"):
|
|
@@ -2190,26 +2204,25 @@ class FrankfurterSource:
|
|
|
2190
2204
|
start_date = pendulum.now()
|
|
2191
2205
|
end_date = pendulum.now()
|
|
2192
2206
|
|
|
2193
|
-
from ingestr.src.frankfurter import frankfurter_source
|
|
2194
|
-
from ingestr.src.frankfurter.helpers import validate_dates
|
|
2195
|
-
|
|
2196
2207
|
validate_dates(start_date=start_date, end_date=end_date)
|
|
2197
2208
|
|
|
2198
2209
|
src = frankfurter_source(
|
|
2199
2210
|
start_date=start_date,
|
|
2200
2211
|
end_date=end_date,
|
|
2212
|
+
base_currency=base_currency,
|
|
2201
2213
|
)
|
|
2202
2214
|
|
|
2203
2215
|
if table not in src.resources:
|
|
2204
2216
|
raise UnsupportedResourceError(table, "Frankfurter")
|
|
2205
2217
|
|
|
2206
2218
|
return src.with_resources(table)
|
|
2207
|
-
|
|
2219
|
+
|
|
2220
|
+
|
|
2208
2221
|
class FreshdeskSource:
|
|
2209
|
-
|
|
2222
|
+
# freshdesk://domain?api_key=<api_key>
|
|
2210
2223
|
def handles_incrementality(self) -> bool:
|
|
2211
2224
|
return True
|
|
2212
|
-
|
|
2225
|
+
|
|
2213
2226
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
2214
2227
|
parsed_uri = urlparse(uri)
|
|
2215
2228
|
domain = parsed_uri.netloc
|
|
@@ -2219,56 +2232,69 @@ class FreshdeskSource:
|
|
|
2219
2232
|
if not domain:
|
|
2220
2233
|
raise MissingValueError("domain", "Freshdesk")
|
|
2221
2234
|
|
|
2222
|
-
if
|
|
2223
|
-
domain = domain.split(
|
|
2224
|
-
|
|
2235
|
+
if "." in domain:
|
|
2236
|
+
domain = domain.split(".")[0]
|
|
2237
|
+
|
|
2225
2238
|
api_key = params.get("api_key")
|
|
2226
2239
|
if api_key is None:
|
|
2227
2240
|
raise MissingValueError("api_key", "Freshdesk")
|
|
2228
|
-
|
|
2229
|
-
if table not in [
|
|
2241
|
+
|
|
2242
|
+
if table not in [
|
|
2243
|
+
"agents",
|
|
2244
|
+
"companies",
|
|
2245
|
+
"contacts",
|
|
2246
|
+
"groups",
|
|
2247
|
+
"roles",
|
|
2248
|
+
"tickets",
|
|
2249
|
+
]:
|
|
2230
2250
|
raise UnsupportedResourceError(table, "Freshdesk")
|
|
2231
|
-
|
|
2251
|
+
|
|
2232
2252
|
from ingestr.src.freshdesk import freshdesk_source
|
|
2233
|
-
|
|
2253
|
+
|
|
2254
|
+
return freshdesk_source(
|
|
2255
|
+
api_secret_key=api_key[0], domain=domain
|
|
2256
|
+
).with_resources(table)
|
|
2257
|
+
|
|
2234
2258
|
|
|
2235
2259
|
class PhantombusterSource:
|
|
2236
2260
|
def handles_incrementality(self) -> bool:
|
|
2237
2261
|
return True
|
|
2238
|
-
|
|
2262
|
+
|
|
2239
2263
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
2240
|
-
#phantombuster://?api_key=<api_key>
|
|
2241
|
-
#source table = phantom_results:agent_id
|
|
2264
|
+
# phantombuster://?api_key=<api_key>
|
|
2265
|
+
# source table = phantom_results:agent_id
|
|
2242
2266
|
parsed_uri = urlparse(uri)
|
|
2243
2267
|
params = parse_qs(parsed_uri.query)
|
|
2244
2268
|
api_key = params.get("api_key")
|
|
2245
2269
|
if api_key is None:
|
|
2246
2270
|
raise MissingValueError("api_key", "Phantombuster")
|
|
2247
|
-
|
|
2271
|
+
|
|
2248
2272
|
table_fields = table.replace(" ", "").split(":")
|
|
2249
2273
|
table_name = table_fields[0]
|
|
2250
|
-
|
|
2274
|
+
|
|
2251
2275
|
agent_id = table_fields[1] if len(table_fields) > 1 else None
|
|
2252
|
-
|
|
2276
|
+
|
|
2253
2277
|
if table_name not in ["completed_phantoms"]:
|
|
2254
2278
|
raise UnsupportedResourceError(table_name, "Phantombuster")
|
|
2255
|
-
|
|
2279
|
+
|
|
2256
2280
|
if not agent_id:
|
|
2257
2281
|
raise MissingValueError("agent_id", "Phantombuster")
|
|
2258
|
-
|
|
2282
|
+
|
|
2259
2283
|
start_date = kwargs.get("interval_start")
|
|
2260
|
-
if start_date is
|
|
2261
|
-
start_date = ensure_pendulum_datetime(
|
|
2284
|
+
if start_date is None:
|
|
2285
|
+
start_date = ensure_pendulum_datetime("2018-01-01").in_tz("UTC")
|
|
2262
2286
|
else:
|
|
2263
|
-
start_date =
|
|
2287
|
+
start_date = ensure_pendulum_datetime(start_date).in_tz("UTC")
|
|
2264
2288
|
|
|
2265
2289
|
end_date = kwargs.get("interval_end")
|
|
2266
|
-
|
|
2267
|
-
#doesnot support incremental loading
|
|
2268
2290
|
if end_date is not None:
|
|
2269
|
-
end_date = ensure_pendulum_datetime(end_date)
|
|
2270
|
-
|
|
2271
|
-
end_date = pendulum.now()
|
|
2272
|
-
|
|
2291
|
+
end_date = ensure_pendulum_datetime(end_date).in_tz("UTC")
|
|
2292
|
+
|
|
2273
2293
|
from ingestr.src.phantombuster import phantombuster_source
|
|
2274
|
-
|
|
2294
|
+
|
|
2295
|
+
return phantombuster_source(
|
|
2296
|
+
api_key=api_key[0],
|
|
2297
|
+
agent_id=agent_id,
|
|
2298
|
+
start_date=start_date,
|
|
2299
|
+
end_date=end_date,
|
|
2300
|
+
).with_resources(table_name)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.39
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -74,7 +74,7 @@ Requires-Dist: google-cloud-storage==3.1.0
|
|
|
74
74
|
Requires-Dist: google-crc32c==1.6.0
|
|
75
75
|
Requires-Dist: google-resumable-media==2.7.2
|
|
76
76
|
Requires-Dist: googleapis-common-protos==1.69.0
|
|
77
|
-
Requires-Dist: greenlet==3.2.
|
|
77
|
+
Requires-Dist: greenlet==3.2.2
|
|
78
78
|
Requires-Dist: grpcio-status==1.62.3
|
|
79
79
|
Requires-Dist: grpcio==1.70.0
|
|
80
80
|
Requires-Dist: hdbcli==2.23.27
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
|
|
2
|
-
ingestr/main.py,sha256=
|
|
2
|
+
ingestr/main.py,sha256=Pe_rzwcDRKIYa7baEVUAAPOHyqQbX29RUexMl0F_S1k,25273
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
6
|
-
ingestr/src/destinations.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=edyodue-Rkn4zTwWVR9OU0dSsDXVokKw_KKllrI1amM,21
|
|
6
|
+
ingestr/src/destinations.py,sha256=MctbeJUyNr0DRB0XYt2xAbEKkHZ40-nXXEOYCs4KuoE,15420
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
|
-
ingestr/src/factory.py,sha256=
|
|
8
|
+
ingestr/src/factory.py,sha256=j-FKRBEBZVLT_DEn-SCu9KEvaab3BchEV5hzTjpree8,5511
|
|
9
9
|
ingestr/src/filters.py,sha256=C-_TIVkF_cxZBgG-Run2Oyn0TAhJgA8IWXZ-OPY3uek,1136
|
|
10
10
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
11
11
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
12
12
|
ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
|
|
13
|
-
ingestr/src/sources.py,sha256=
|
|
13
|
+
ingestr/src/sources.py,sha256=vppNI75ucM0EtW2kP5ldKyhc4Pij_hGVmKlZ9DNL4g0,79181
|
|
14
14
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
15
15
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
16
16
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -42,8 +42,8 @@ ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_
|
|
|
42
42
|
ingestr/src/filesystem/__init__.py,sha256=zkIwbRr0ir0EUdniI25p2zGiVc-7M9EmR351AjNb0eA,4163
|
|
43
43
|
ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
|
|
44
44
|
ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
|
|
45
|
-
ingestr/src/frankfurter/__init__.py,sha256=
|
|
46
|
-
ingestr/src/frankfurter/helpers.py,sha256=
|
|
45
|
+
ingestr/src/frankfurter/__init__.py,sha256=oVi4BiOxPRyckEVrBNunyMAHulPyMgyGRwBbhn-Xz6M,4987
|
|
46
|
+
ingestr/src/frankfurter/helpers.py,sha256=SyrkRTDqvKdQxRHTV5kcSeVG3FEnaK5zxHyNyqtumZ0,1445
|
|
47
47
|
ingestr/src/freshdesk/__init__.py,sha256=uFQW_cJyymxtHQiYb_xjzZAklc487L0n9GkgHgC7yAI,2618
|
|
48
48
|
ingestr/src/freshdesk/freshdesk_client.py,sha256=3z5Yc008ADzRcJWtNc00PwjkLzG-RMI8jVIOOyYA-Rw,4088
|
|
49
49
|
ingestr/src/freshdesk/settings.py,sha256=0Wr_OMnUZcTlry7BmALssLxD2yh686JW4moLNv12Jnw,409
|
|
@@ -57,7 +57,7 @@ ingestr/src/google_ads/metrics.py,sha256=tAqpBpm-8l95oPT9cBxMWaEoDTNHVXnqUphYDHW
|
|
|
57
57
|
ingestr/src/google_ads/predicates.py,sha256=K4wTuqfmJ9ko1RKeHTBDfQO_mUADVyuRqtywBPP-72w,683
|
|
58
58
|
ingestr/src/google_ads/reports.py,sha256=AVY1pPt5yaIFskQe1k5VW2Dhlux3bzewsHlDrdGEems,12686
|
|
59
59
|
ingestr/src/google_analytics/__init__.py,sha256=8b9CBWJFrBpHVRl993Z7J01sKKbYyXEtngdfEUwqlfE,4343
|
|
60
|
-
ingestr/src/google_analytics/helpers.py,sha256=
|
|
60
|
+
ingestr/src/google_analytics/helpers.py,sha256=tM7h_yughca3l7tnS_2XGIBM37mVm-Uewv7tK7XHVbc,10111
|
|
61
61
|
ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
|
|
62
62
|
ingestr/src/google_sheets/__init__.py,sha256=CL0HfY74uxX8-ge0ucI0VhWMYZVAfoX7WRPBitRi-CI,6647
|
|
63
63
|
ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
|
|
@@ -87,8 +87,8 @@ ingestr/src/notion/helpers/client.py,sha256=QXuudkf5Zzff98HRsCqA1g1EZWIrnfn1falP
|
|
|
87
87
|
ingestr/src/notion/helpers/database.py,sha256=gigPibTeVefP3lA-8w4aOwX67pj7RlciPk5koDs1ry8,2737
|
|
88
88
|
ingestr/src/personio/__init__.py,sha256=sHYpoV-rg-kA1YsflctChis0hKcTrL6mka9O0CHV4zA,11638
|
|
89
89
|
ingestr/src/personio/helpers.py,sha256=EKmBN0Lf4R0lc3yqqs7D-RjoZ75E8gPcctt59xwHxrY,2901
|
|
90
|
-
ingestr/src/phantombuster/__init__.py,sha256=
|
|
91
|
-
ingestr/src/phantombuster/client.py,sha256=
|
|
90
|
+
ingestr/src/phantombuster/__init__.py,sha256=8AQTiA8fp1NT8TellQQqwBCl6vGvGwUBLif6LIzgAik,1786
|
|
91
|
+
ingestr/src/phantombuster/client.py,sha256=9zx58sFunXjUNh6jeEYLNfwNxGxX9odifwAmS0E9AaY,3018
|
|
92
92
|
ingestr/src/pipedrive/__init__.py,sha256=iRrxeMwo8_83ptgGnTFTNHV1nYvIsFfg0a3XzugPYeI,6982
|
|
93
93
|
ingestr/src/pipedrive/settings.py,sha256=q119Fy4C5Ip1rMoCILX2BkHV3bwiXC_dW58KIiDUzsY,708
|
|
94
94
|
ingestr/src/pipedrive/typing.py,sha256=lEMXu4hhAA3XkhVSlBUa-juqyupisd3c-qSQKxFvzoE,69
|
|
@@ -127,8 +127,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
127
127
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
128
128
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
129
129
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
130
|
-
ingestr-0.13.
|
|
131
|
-
ingestr-0.13.
|
|
132
|
-
ingestr-0.13.
|
|
133
|
-
ingestr-0.13.
|
|
134
|
-
ingestr-0.13.
|
|
130
|
+
ingestr-0.13.39.dist-info/METADATA,sha256=goY5MW5AzJwYQ0cbwTmlNxZgCP1QRSt6ROmBOImESIM,13575
|
|
131
|
+
ingestr-0.13.39.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
132
|
+
ingestr-0.13.39.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
133
|
+
ingestr-0.13.39.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
134
|
+
ingestr-0.13.39.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|