ingestr 0.12.8__py3-none-any.whl → 0.12.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/appstore/__init__.py +4 -0
- ingestr/src/factory.py +4 -0
- ingestr/src/filesystem/__init__.py +0 -2
- ingestr/src/linkedin_ads/__init__.py +63 -0
- ingestr/src/linkedin_ads/dimension_time_enum.py +12 -0
- ingestr/src/linkedin_ads/helpers.py +148 -0
- ingestr/src/sources.py +157 -9
- ingestr/src/version.py +1 -1
- {ingestr-0.12.8.dist-info → ingestr-0.12.10.dist-info}/METADATA +2 -1
- {ingestr-0.12.8.dist-info → ingestr-0.12.10.dist-info}/RECORD +13 -10
- {ingestr-0.12.8.dist-info → ingestr-0.12.10.dist-info}/WHEEL +0 -0
- {ingestr-0.12.8.dist-info → ingestr-0.12.10.dist-info}/entry_points.txt +0 -0
- {ingestr-0.12.8.dist-info → ingestr-0.12.10.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/appstore/__init__.py
CHANGED
|
@@ -28,6 +28,10 @@ def app_store(
|
|
|
28
28
|
start_date: Optional[datetime] = None,
|
|
29
29
|
end_date: Optional[datetime] = None,
|
|
30
30
|
) -> Iterable[DltResource]:
|
|
31
|
+
if start_date and start_date.tzinfo is not None:
|
|
32
|
+
start_date = start_date.replace(tzinfo=None)
|
|
33
|
+
if end_date and end_date.tzinfo is not None:
|
|
34
|
+
end_date = end_date.replace(tzinfo=None)
|
|
31
35
|
for resource in RESOURCES:
|
|
32
36
|
yield dlt.resource(
|
|
33
37
|
get_analytics_reports,
|
ingestr/src/factory.py
CHANGED
|
@@ -25,6 +25,7 @@ from ingestr.src.sources import (
|
|
|
25
25
|
ChessSource,
|
|
26
26
|
DynamoDBSource,
|
|
27
27
|
FacebookAdsSource,
|
|
28
|
+
GCSSource,
|
|
28
29
|
GitHubSource,
|
|
29
30
|
GoogleAnalyticsSource,
|
|
30
31
|
GoogleSheetsSource,
|
|
@@ -32,6 +33,7 @@ from ingestr.src.sources import (
|
|
|
32
33
|
HubspotSource,
|
|
33
34
|
KafkaSource,
|
|
34
35
|
KlaviyoSource,
|
|
36
|
+
LinkedInAdsSource,
|
|
35
37
|
LocalCsvSource,
|
|
36
38
|
MongoDbSource,
|
|
37
39
|
NotionSource,
|
|
@@ -124,6 +126,8 @@ class SourceDestinationFactory:
|
|
|
124
126
|
"tiktok": TikTokSource,
|
|
125
127
|
"googleanalytics": GoogleAnalyticsSource,
|
|
126
128
|
"appstore": AppleAppStoreSource,
|
|
129
|
+
"gs": GCSSource,
|
|
130
|
+
"linkedinads": LinkedInAdsSource,
|
|
127
131
|
}
|
|
128
132
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
129
133
|
"bigquery": BigQueryDestination,
|
|
@@ -39,8 +39,6 @@ def readers(
|
|
|
39
39
|
filesystem_resource = filesystem(bucket_url, credentials, file_glob=file_glob)
|
|
40
40
|
filesystem_resource.apply_hints(
|
|
41
41
|
incremental=dlt.sources.incremental("modification_date"),
|
|
42
|
-
range_end="closed",
|
|
43
|
-
range_start="closed",
|
|
44
42
|
)
|
|
45
43
|
return (
|
|
46
44
|
filesystem_resource | dlt.transformer(name="read_csv")(_read_csv),
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
from dlt.common.typing import TDataItem
|
|
6
|
+
from dlt.sources import DltResource
|
|
7
|
+
from pendulum import Date
|
|
8
|
+
|
|
9
|
+
from .dimension_time_enum import Dimension, TimeGranularity
|
|
10
|
+
from .helpers import LinkedInAdsAPI, find_intervals
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dlt.source(max_table_nesting=0)
|
|
14
|
+
def linked_in_ads_source(
|
|
15
|
+
start_date: Date,
|
|
16
|
+
end_date: Date | None,
|
|
17
|
+
access_token: str,
|
|
18
|
+
account_ids: list[str],
|
|
19
|
+
dimension: Dimension,
|
|
20
|
+
metrics: list[str],
|
|
21
|
+
time_granularity: TimeGranularity,
|
|
22
|
+
) -> DltResource:
|
|
23
|
+
linkedin_api = LinkedInAdsAPI(
|
|
24
|
+
access_token=access_token,
|
|
25
|
+
account_ids=account_ids,
|
|
26
|
+
dimension=dimension,
|
|
27
|
+
metrics=metrics,
|
|
28
|
+
time_granularity=time_granularity,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
if time_granularity == TimeGranularity.daily:
|
|
32
|
+
primary_key = [dimension.value, "date"]
|
|
33
|
+
incremental_loading_param = "date"
|
|
34
|
+
else:
|
|
35
|
+
primary_key = [dimension.value, "start_date", "end_date"]
|
|
36
|
+
incremental_loading_param = "start_date"
|
|
37
|
+
|
|
38
|
+
@dlt.resource(write_disposition="merge", primary_key=primary_key)
|
|
39
|
+
def custom_reports(
|
|
40
|
+
dateTime=(
|
|
41
|
+
dlt.sources.incremental(
|
|
42
|
+
incremental_loading_param,
|
|
43
|
+
initial_value=start_date,
|
|
44
|
+
end_value=end_date,
|
|
45
|
+
range_start="closed",
|
|
46
|
+
range_end="closed",
|
|
47
|
+
)
|
|
48
|
+
),
|
|
49
|
+
) -> Iterable[TDataItem]:
|
|
50
|
+
if dateTime.end_value is None:
|
|
51
|
+
end_date = pendulum.now().date()
|
|
52
|
+
else:
|
|
53
|
+
end_date = dateTime.end_value
|
|
54
|
+
|
|
55
|
+
list_of_interval = find_intervals(
|
|
56
|
+
start_date=dateTime.last_value,
|
|
57
|
+
end_date=end_date,
|
|
58
|
+
time_granularity=time_granularity,
|
|
59
|
+
)
|
|
60
|
+
for start, end in list_of_interval:
|
|
61
|
+
yield linkedin_api.fetch_pages(start, end)
|
|
62
|
+
|
|
63
|
+
return custom_reports
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
from urllib.parse import quote
|
|
2
|
+
|
|
3
|
+
import pendulum
|
|
4
|
+
import requests
|
|
5
|
+
from dlt.sources.helpers.requests import Client
|
|
6
|
+
from pendulum import Date
|
|
7
|
+
|
|
8
|
+
from .dimension_time_enum import Dimension, TimeGranularity
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def retry_on_limit(
|
|
12
|
+
response: requests.Response | None, exception: BaseException | None
|
|
13
|
+
) -> bool:
|
|
14
|
+
if response is None:
|
|
15
|
+
return False
|
|
16
|
+
return response.status_code == 429
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def create_client() -> requests.Session:
|
|
20
|
+
return Client(
|
|
21
|
+
request_timeout=10.0,
|
|
22
|
+
raise_for_status=False,
|
|
23
|
+
retry_condition=retry_on_limit,
|
|
24
|
+
request_max_attempts=12,
|
|
25
|
+
).session
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def flat_structure(items, pivot: Dimension, time_granularity: TimeGranularity):
|
|
29
|
+
for item in items:
|
|
30
|
+
if "pivotValues" in item:
|
|
31
|
+
if len(item["pivotValues"]) > 1:
|
|
32
|
+
item[pivot.value.lower()] = item["pivotValues"]
|
|
33
|
+
else:
|
|
34
|
+
item[pivot.value.lower()] = item["pivotValues"][0]
|
|
35
|
+
if "dateRange" in item:
|
|
36
|
+
start_date = item["dateRange"]["start"]
|
|
37
|
+
start_dt = pendulum.date(
|
|
38
|
+
year=start_date["year"],
|
|
39
|
+
month=start_date["month"],
|
|
40
|
+
day=start_date["day"],
|
|
41
|
+
)
|
|
42
|
+
if time_granularity == TimeGranularity.daily:
|
|
43
|
+
item["date"] = start_dt
|
|
44
|
+
else:
|
|
45
|
+
end_date = item["dateRange"]["end"]
|
|
46
|
+
end_dt = pendulum.date(
|
|
47
|
+
year=end_date["year"],
|
|
48
|
+
month=end_date["month"],
|
|
49
|
+
day=end_date["day"],
|
|
50
|
+
)
|
|
51
|
+
item["start_date"] = start_dt
|
|
52
|
+
item["end_date"] = end_dt
|
|
53
|
+
|
|
54
|
+
del item["dateRange"]
|
|
55
|
+
del item["pivotValues"]
|
|
56
|
+
|
|
57
|
+
return items
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def find_intervals(start_date: Date, end_date: Date, time_granularity: TimeGranularity):
|
|
61
|
+
intervals = []
|
|
62
|
+
|
|
63
|
+
if start_date > end_date:
|
|
64
|
+
raise ValueError("Start date must be less than end date")
|
|
65
|
+
|
|
66
|
+
while start_date <= end_date:
|
|
67
|
+
if time_granularity == TimeGranularity.daily:
|
|
68
|
+
next_date = min(start_date.add(months=6), end_date)
|
|
69
|
+
else:
|
|
70
|
+
next_date = min(start_date.add(years=2), end_date)
|
|
71
|
+
|
|
72
|
+
intervals.append((start_date, next_date))
|
|
73
|
+
|
|
74
|
+
start_date = next_date.add(days=1)
|
|
75
|
+
|
|
76
|
+
return intervals
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def construct_url(
|
|
80
|
+
start: Date,
|
|
81
|
+
end: Date,
|
|
82
|
+
account_ids: list[str],
|
|
83
|
+
metrics: list[str],
|
|
84
|
+
dimension: Dimension,
|
|
85
|
+
time_granularity: TimeGranularity,
|
|
86
|
+
):
|
|
87
|
+
date_range = f"(start:(year:{start.year},month:{start.month},day:{start.day})"
|
|
88
|
+
date_range += f",end:(year:{end.year},month:{end.month},day:{end.day}))"
|
|
89
|
+
accounts = ",".join(
|
|
90
|
+
[quote(f"urn:li:sponsoredAccount:{account_id}") for account_id in account_ids]
|
|
91
|
+
)
|
|
92
|
+
encoded_accounts = f"List({accounts})"
|
|
93
|
+
dimension_str = dimension.value.upper()
|
|
94
|
+
time_granularity_str = time_granularity.value
|
|
95
|
+
metrics_str = ",".join([metric for metric in metrics])
|
|
96
|
+
|
|
97
|
+
url = (
|
|
98
|
+
f"https://api.linkedin.com/rest/adAnalytics?"
|
|
99
|
+
f"q=analytics&timeGranularity={time_granularity_str}&"
|
|
100
|
+
f"dateRange={date_range}&accounts={encoded_accounts}&"
|
|
101
|
+
f"pivot={dimension_str}&fields={metrics_str}"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
return url
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class LinkedInAdsAPI:
|
|
108
|
+
def __init__(
|
|
109
|
+
self,
|
|
110
|
+
access_token,
|
|
111
|
+
time_granularity,
|
|
112
|
+
account_ids,
|
|
113
|
+
dimension,
|
|
114
|
+
metrics,
|
|
115
|
+
):
|
|
116
|
+
self.time_granularity: TimeGranularity = time_granularity
|
|
117
|
+
self.account_ids: list[str] = account_ids
|
|
118
|
+
self.dimension: Dimension = dimension
|
|
119
|
+
self.metrics: list[str] = metrics
|
|
120
|
+
self.headers = {
|
|
121
|
+
"Authorization": f"Bearer {access_token}",
|
|
122
|
+
"Linkedin-Version": "202411",
|
|
123
|
+
"X-Restli-Protocol-Version": "2.0.0",
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
def fetch_pages(self, start: Date, end: Date):
|
|
127
|
+
client = create_client()
|
|
128
|
+
url = construct_url(
|
|
129
|
+
start=start,
|
|
130
|
+
end=end,
|
|
131
|
+
account_ids=self.account_ids,
|
|
132
|
+
metrics=self.metrics,
|
|
133
|
+
dimension=self.dimension,
|
|
134
|
+
time_granularity=self.time_granularity,
|
|
135
|
+
)
|
|
136
|
+
response = client.get(url=url, headers=self.headers)
|
|
137
|
+
|
|
138
|
+
if response.status_code != 200:
|
|
139
|
+
error_data = response.json()
|
|
140
|
+
raise ValueError(f"LinkedIn API Error: {error_data.get('message')}")
|
|
141
|
+
|
|
142
|
+
result = response.json()
|
|
143
|
+
items = result.get("elements", [])
|
|
144
|
+
yield flat_structure(
|
|
145
|
+
items=items,
|
|
146
|
+
pivot=self.dimension,
|
|
147
|
+
time_granularity=self.time_granularity,
|
|
148
|
+
)
|
ingestr/src/sources.py
CHANGED
|
@@ -17,6 +17,8 @@ from typing import (
|
|
|
17
17
|
from urllib.parse import ParseResult, parse_qs, quote, urlparse
|
|
18
18
|
|
|
19
19
|
import dlt
|
|
20
|
+
import gcsfs # type: ignore
|
|
21
|
+
import s3fs # type: ignore
|
|
20
22
|
import pendulum
|
|
21
23
|
from dlt.common.configuration.specs import (
|
|
22
24
|
AwsCredentials,
|
|
@@ -67,6 +69,11 @@ from ingestr.src.hubspot import hubspot
|
|
|
67
69
|
from ingestr.src.kafka import kafka_consumer
|
|
68
70
|
from ingestr.src.kafka.helpers import KafkaCredentials
|
|
69
71
|
from ingestr.src.klaviyo._init_ import klaviyo_source
|
|
72
|
+
from ingestr.src.linkedin_ads import linked_in_ads_source
|
|
73
|
+
from ingestr.src.linkedin_ads.dimension_time_enum import (
|
|
74
|
+
Dimension,
|
|
75
|
+
TimeGranularity,
|
|
76
|
+
)
|
|
70
77
|
from ingestr.src.mongodb import mongodb_collection
|
|
71
78
|
from ingestr.src.notion import notion_databases
|
|
72
79
|
from ingestr.src.shopify import shopify_source
|
|
@@ -1091,19 +1098,17 @@ class S3Source:
|
|
|
1091
1098
|
bucket_name = parsed_uri.hostname
|
|
1092
1099
|
if not bucket_name:
|
|
1093
1100
|
raise ValueError(
|
|
1094
|
-
"Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name
|
|
1101
|
+
"Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name"
|
|
1095
1102
|
)
|
|
1096
1103
|
bucket_url = f"s3://{bucket_name}"
|
|
1097
1104
|
|
|
1098
|
-
path_to_file = parsed_uri.path.lstrip("/")
|
|
1105
|
+
path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
|
|
1099
1106
|
if not path_to_file:
|
|
1100
|
-
raise ValueError(
|
|
1101
|
-
"Invalid S3 URI: The file path is missing. Ensure your S3 URI follows the format 's3://bucket-name/path/to/file"
|
|
1102
|
-
)
|
|
1107
|
+
raise ValueError("--source-table must be specified")
|
|
1103
1108
|
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1109
|
+
fs = s3fs.S3FileSystem(
|
|
1110
|
+
key=access_key_id[0],
|
|
1111
|
+
secret=secret_access_key[0],
|
|
1107
1112
|
)
|
|
1108
1113
|
|
|
1109
1114
|
file_extension = path_to_file.split(".")[-1]
|
|
@@ -1119,7 +1124,7 @@ class S3Source:
|
|
|
1119
1124
|
)
|
|
1120
1125
|
|
|
1121
1126
|
return readers(
|
|
1122
|
-
bucket_url
|
|
1127
|
+
bucket_url, fs, path_to_file
|
|
1123
1128
|
).with_resources(endpoint)
|
|
1124
1129
|
|
|
1125
1130
|
|
|
@@ -1503,3 +1508,146 @@ class AppleAppStoreSource:
|
|
|
1503
1508
|
raise UnsupportedResourceError(table, "AppStore")
|
|
1504
1509
|
|
|
1505
1510
|
return src.with_resources(table)
|
|
1511
|
+
|
|
1512
|
+
|
|
1513
|
+
class GCSSource:
|
|
1514
|
+
def handles_incrementality(self) -> bool:
|
|
1515
|
+
return True
|
|
1516
|
+
|
|
1517
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1518
|
+
if kwargs.get("incremental_key"):
|
|
1519
|
+
raise ValueError(
|
|
1520
|
+
"GCS takes care of incrementality on its own, you should not provide incremental_key"
|
|
1521
|
+
)
|
|
1522
|
+
|
|
1523
|
+
parsed_uri = urlparse(uri)
|
|
1524
|
+
params = parse_qs(parsed_uri.query)
|
|
1525
|
+
credentials_path = params.get("credentials_path")
|
|
1526
|
+
credentials_base64 = params.get("credentials_base64")
|
|
1527
|
+
credentials_available = any(
|
|
1528
|
+
map(
|
|
1529
|
+
lambda x: x is not None,
|
|
1530
|
+
[credentials_path, credentials_base64],
|
|
1531
|
+
)
|
|
1532
|
+
)
|
|
1533
|
+
if credentials_available is False:
|
|
1534
|
+
raise MissingValueError("credentials_path or credentials_base64", "GCS")
|
|
1535
|
+
|
|
1536
|
+
bucket_name = parsed_uri.hostname
|
|
1537
|
+
if not bucket_name:
|
|
1538
|
+
raise ValueError(
|
|
1539
|
+
"Invalid GCS URI: The bucket name is missing. Ensure your GCS URI follows the format 'gs://bucket-name/path/to/file"
|
|
1540
|
+
)
|
|
1541
|
+
bucket_url = f"gs://{bucket_name}/"
|
|
1542
|
+
|
|
1543
|
+
path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
|
|
1544
|
+
if not path_to_file:
|
|
1545
|
+
raise ValueError("--source-table must be specified")
|
|
1546
|
+
|
|
1547
|
+
credentials = None
|
|
1548
|
+
if credentials_path:
|
|
1549
|
+
credentials = credentials_path[0]
|
|
1550
|
+
else:
|
|
1551
|
+
credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
|
|
1552
|
+
|
|
1553
|
+
# There's a compatiblity issue between google-auth, dlt and gcsfs
|
|
1554
|
+
# that makes it difficult to use google.oauth2.service_account.Credentials
|
|
1555
|
+
# (The RECOMMENDED way of passing service account credentials)
|
|
1556
|
+
# directly with gcsfs. As a workaround, we construct the GCSFileSystem
|
|
1557
|
+
# and pass it directly to filesystem.readers.
|
|
1558
|
+
fs = gcsfs.GCSFileSystem(
|
|
1559
|
+
token=credentials,
|
|
1560
|
+
)
|
|
1561
|
+
|
|
1562
|
+
file_extension = path_to_file.split(".")[-1]
|
|
1563
|
+
if file_extension == "csv":
|
|
1564
|
+
endpoint = "read_csv"
|
|
1565
|
+
elif file_extension == "jsonl":
|
|
1566
|
+
endpoint = "read_jsonl"
|
|
1567
|
+
elif file_extension == "parquet":
|
|
1568
|
+
endpoint = "read_parquet"
|
|
1569
|
+
else:
|
|
1570
|
+
raise ValueError(
|
|
1571
|
+
"GCS Source only supports specific formats files: csv, jsonl, parquet"
|
|
1572
|
+
)
|
|
1573
|
+
|
|
1574
|
+
return readers(
|
|
1575
|
+
bucket_url, fs, path_to_file
|
|
1576
|
+
).with_resources(endpoint)
|
|
1577
|
+
|
|
1578
|
+
|
|
1579
|
+
class LinkedInAdsSource:
|
|
1580
|
+
def handles_incrementality(self) -> bool:
|
|
1581
|
+
return True
|
|
1582
|
+
|
|
1583
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1584
|
+
parsed_uri = urlparse(uri)
|
|
1585
|
+
source_fields = parse_qs(parsed_uri.query)
|
|
1586
|
+
|
|
1587
|
+
access_token = source_fields.get("access_token")
|
|
1588
|
+
if not access_token:
|
|
1589
|
+
raise ValueError("access_token is required to connect to LinkedIn Ads")
|
|
1590
|
+
|
|
1591
|
+
account_ids = source_fields.get("account_ids")
|
|
1592
|
+
|
|
1593
|
+
if not account_ids:
|
|
1594
|
+
raise ValueError("account_ids is required to connect to LinkedIn Ads")
|
|
1595
|
+
account_ids = account_ids[0].replace(" ", "").split(",")
|
|
1596
|
+
|
|
1597
|
+
interval_start = kwargs.get("interval_start")
|
|
1598
|
+
interval_end = kwargs.get("interval_end")
|
|
1599
|
+
start_date = (
|
|
1600
|
+
ensure_pendulum_datetime(interval_start).date()
|
|
1601
|
+
if interval_start
|
|
1602
|
+
else pendulum.datetime(2018, 1, 1).date()
|
|
1603
|
+
)
|
|
1604
|
+
end_date = (
|
|
1605
|
+
ensure_pendulum_datetime(interval_end).date() if interval_end else None
|
|
1606
|
+
)
|
|
1607
|
+
|
|
1608
|
+
fields = table.split(":")
|
|
1609
|
+
if len(fields) != 3:
|
|
1610
|
+
raise ValueError(
|
|
1611
|
+
"Invalid table format. Expected format: custom:<dimensions>:<metrics>"
|
|
1612
|
+
)
|
|
1613
|
+
|
|
1614
|
+
dimensions = fields[1].replace(" ", "").split(",")
|
|
1615
|
+
dimensions = [item for item in dimensions if item.strip()]
|
|
1616
|
+
if (
|
|
1617
|
+
"campaign" not in dimensions
|
|
1618
|
+
and "creative" not in dimensions
|
|
1619
|
+
and "account" not in dimensions
|
|
1620
|
+
):
|
|
1621
|
+
raise ValueError(
|
|
1622
|
+
"'campaign', 'creative' or 'account' is required to connect to LinkedIn Ads, please provide at least one of these dimensions."
|
|
1623
|
+
)
|
|
1624
|
+
if "date" not in dimensions and "month" not in dimensions:
|
|
1625
|
+
raise ValueError(
|
|
1626
|
+
"'date' or 'month' is required to connect to LinkedIn Ads, please provide at least one of these dimensions."
|
|
1627
|
+
)
|
|
1628
|
+
|
|
1629
|
+
if "date" in dimensions:
|
|
1630
|
+
time_granularity = TimeGranularity.daily
|
|
1631
|
+
dimensions.remove("date")
|
|
1632
|
+
else:
|
|
1633
|
+
time_granularity = TimeGranularity.monthly
|
|
1634
|
+
dimensions.remove("month")
|
|
1635
|
+
|
|
1636
|
+
dimension = Dimension[dimensions[0]]
|
|
1637
|
+
|
|
1638
|
+
metrics = fields[2].replace(" ", "").split(",")
|
|
1639
|
+
metrics = [item for item in metrics if item.strip()]
|
|
1640
|
+
if "dateRange" not in metrics:
|
|
1641
|
+
metrics.append("dateRange")
|
|
1642
|
+
if "pivotValues" not in metrics:
|
|
1643
|
+
metrics.append("pivotValues")
|
|
1644
|
+
|
|
1645
|
+
return linked_in_ads_source(
|
|
1646
|
+
start_date=start_date,
|
|
1647
|
+
end_date=end_date,
|
|
1648
|
+
access_token=access_token[0],
|
|
1649
|
+
account_ids=account_ids,
|
|
1650
|
+
dimension=dimension,
|
|
1651
|
+
metrics=metrics,
|
|
1652
|
+
time_granularity=time_granularity,
|
|
1653
|
+
).with_resources("custom_reports")
|
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.12.
|
|
1
|
+
__version__ = "0.12.10"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.10
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -22,6 +22,7 @@ Requires-Dist: dlt==1.5.0
|
|
|
22
22
|
Requires-Dist: duckdb-engine==0.13.5
|
|
23
23
|
Requires-Dist: duckdb==1.1.3
|
|
24
24
|
Requires-Dist: facebook-business==20.0.0
|
|
25
|
+
Requires-Dist: gcsfs==2024.10.0
|
|
25
26
|
Requires-Dist: google-analytics-data==0.18.16
|
|
26
27
|
Requires-Dist: google-api-python-client==2.130.0
|
|
27
28
|
Requires-Dist: google-cloud-bigquery-storage==2.24.0
|
|
@@ -2,18 +2,18 @@ ingestr/main.py,sha256=fRWnyoPzMvvxTa61EIAP_dsKu0B_0yOwoyt0Slq9WQU,24723
|
|
|
2
2
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
3
|
ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
|
|
4
4
|
ingestr/src/errors.py,sha256=MrdLY5Gpr3g3qbYjl-U8-m8kxBJQOJo4ZVOsQpQbRR8,447
|
|
5
|
-
ingestr/src/factory.py,sha256=
|
|
5
|
+
ingestr/src/factory.py,sha256=kzbJ10fF1xySzarhDfJ2l4_Hm925cglsvzk3MAIMkaI,4687
|
|
6
6
|
ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
|
|
7
|
-
ingestr/src/sources.py,sha256=
|
|
7
|
+
ingestr/src/sources.py,sha256=qUs9s_0kCurUJUmbrBCTiPzyLpEtaO-yCOnOYvsftUY,58965
|
|
8
8
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
9
9
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
10
|
-
ingestr/src/version.py,sha256=
|
|
10
|
+
ingestr/src/version.py,sha256=037G21EIHmneVX5BgQiyUajkoMsqfZoVvjyP3_6MaDs,24
|
|
11
11
|
ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
|
|
12
12
|
ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
|
|
13
13
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
14
14
|
ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
|
|
15
15
|
ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
|
|
16
|
-
ingestr/src/appstore/__init__.py,sha256=
|
|
16
|
+
ingestr/src/appstore/__init__.py,sha256=3P4VZH2WJF477QjW19jMTwu6L8DXcLkYSdutnvp3AmM,4742
|
|
17
17
|
ingestr/src/appstore/client.py,sha256=qY9nBZPNIAveR-Dn-pW141Mr9xi9LMOz2HHfnfueHvE,3975
|
|
18
18
|
ingestr/src/appstore/errors.py,sha256=KVpPWth5qlv6_QWEm3aJAt3cdf6miPJs0UDzxknx2Ms,481
|
|
19
19
|
ingestr/src/appstore/models.py,sha256=tW1JSATHBIxZ6a77-RTCBQptJk6iRC8fWcmx4NW7SVA,1716
|
|
@@ -30,7 +30,7 @@ ingestr/src/facebook_ads/__init__.py,sha256=reEpSr4BaKA1wO3qVgCH51gW-TgWkbJ_g24U
|
|
|
30
30
|
ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
|
|
31
31
|
ingestr/src/facebook_ads/helpers.py,sha256=ZLbNHiKer5lPb4g3_435XeBJr57Wv0o1KTyBA1mQ100,9068
|
|
32
32
|
ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_tl6UTFk,4881
|
|
33
|
-
ingestr/src/filesystem/__init__.py,sha256=
|
|
33
|
+
ingestr/src/filesystem/__init__.py,sha256=zkIwbRr0ir0EUdniI25p2zGiVc-7M9EmR351AjNb0eA,4163
|
|
34
34
|
ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
|
|
35
35
|
ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
|
|
36
36
|
ingestr/src/github/__init__.py,sha256=xVijF-Wi4p88hkVJnKH-oTixismjD3aUcGqGa6Wr4e4,5889
|
|
@@ -54,6 +54,9 @@ ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,
|
|
|
54
54
|
ingestr/src/klaviyo/_init_.py,sha256=ucWHqBe8DQvXVpbmxKFAV5ljpCFb4ps_2QTD0OSiWxY,7905
|
|
55
55
|
ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
|
|
56
56
|
ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
|
|
57
|
+
ingestr/src/linkedin_ads/__init__.py,sha256=CAPWFyV24loziiphbLmODxZUXZJwm4JxlFkr56q0jfo,1855
|
|
58
|
+
ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffWJxmAadZMbpTvf0xkQc,198
|
|
59
|
+
ingestr/src/linkedin_ads/helpers.py,sha256=6jSIp4DF0iUafJWU3Y7DbIJGKRH6hrx4S7zCTDOjNuE,4528
|
|
57
60
|
ingestr/src/mongodb/__init__.py,sha256=aMr1PFIDUMRv--ne61lR17HudsN-fsrzMeyxe9PqK2s,4335
|
|
58
61
|
ingestr/src/mongodb/helpers.py,sha256=y9rYKR8eyIqam_eNsZmwSYevgi8mghh7Zp8qhTHl65s,5652
|
|
59
62
|
ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
|
|
@@ -91,8 +94,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
91
94
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
92
95
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
93
96
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
94
|
-
ingestr-0.12.
|
|
95
|
-
ingestr-0.12.
|
|
96
|
-
ingestr-0.12.
|
|
97
|
-
ingestr-0.12.
|
|
98
|
-
ingestr-0.12.
|
|
97
|
+
ingestr-0.12.10.dist-info/METADATA,sha256=_jr6Mv4lUktQkO2MP3q4RBeHe2RnCy1-4WWGXva6Qbo,8057
|
|
98
|
+
ingestr-0.12.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
99
|
+
ingestr-0.12.10.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
100
|
+
ingestr-0.12.10.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
101
|
+
ingestr-0.12.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|