omniload 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omniload/conftest.py +72 -0
- omniload/main.py +810 -0
- omniload/src/.gitignore +10 -0
- omniload/src/adjust/__init__.py +108 -0
- omniload/src/adjust/adjust_helpers.py +122 -0
- omniload/src/airtable/__init__.py +84 -0
- omniload/src/allium/__init__.py +128 -0
- omniload/src/anthropic/__init__.py +277 -0
- omniload/src/anthropic/helpers.py +525 -0
- omniload/src/applovin/__init__.py +316 -0
- omniload/src/applovin_max/__init__.py +117 -0
- omniload/src/appsflyer/__init__.py +325 -0
- omniload/src/appsflyer/client.py +110 -0
- omniload/src/appstore/__init__.py +142 -0
- omniload/src/appstore/client.py +126 -0
- omniload/src/appstore/errors.py +15 -0
- omniload/src/appstore/models.py +117 -0
- omniload/src/appstore/resources.py +179 -0
- omniload/src/arrow/__init__.py +81 -0
- omniload/src/asana_source/__init__.py +281 -0
- omniload/src/asana_source/helpers.py +30 -0
- omniload/src/asana_source/settings.py +158 -0
- omniload/src/attio/__init__.py +102 -0
- omniload/src/attio/helpers.py +65 -0
- omniload/src/blob.py +95 -0
- omniload/src/bruin/__init__.py +76 -0
- omniload/src/chess/__init__.py +180 -0
- omniload/src/chess/helpers.py +35 -0
- omniload/src/chess/settings.py +18 -0
- omniload/src/clickup/__init__.py +85 -0
- omniload/src/clickup/helpers.py +47 -0
- omniload/src/collector/spinner.py +43 -0
- omniload/src/couchbase_source/__init__.py +118 -0
- omniload/src/couchbase_source/helpers.py +135 -0
- omniload/src/cursor/__init__.py +83 -0
- omniload/src/cursor/helpers.py +188 -0
- omniload/src/customer_io/__init__.py +486 -0
- omniload/src/customer_io/helpers.py +530 -0
- omniload/src/destinations.py +982 -0
- omniload/src/docebo/__init__.py +589 -0
- omniload/src/docebo/client.py +435 -0
- omniload/src/docebo/helpers.py +97 -0
- omniload/src/dune/__init__.py +104 -0
- omniload/src/dune/helpers.py +108 -0
- omniload/src/dynamodb/__init__.py +86 -0
- omniload/src/elasticsearch/__init__.py +80 -0
- omniload/src/elasticsearch/helpers.py +141 -0
- omniload/src/errors.py +26 -0
- omniload/src/facebook_ads/__init__.py +403 -0
- omniload/src/facebook_ads/exceptions.py +19 -0
- omniload/src/facebook_ads/helpers.py +296 -0
- omniload/src/facebook_ads/settings.py +224 -0
- omniload/src/facebook_ads/utils.py +53 -0
- omniload/src/factory.py +305 -0
- omniload/src/filesystem/__init__.py +133 -0
- omniload/src/filesystem/helpers.py +114 -0
- omniload/src/filesystem/readers.py +187 -0
- omniload/src/filters.py +62 -0
- omniload/src/fireflies/__init__.py +151 -0
- omniload/src/fireflies/helpers.py +753 -0
- omniload/src/fluxx/__init__.py +10013 -0
- omniload/src/fluxx/helpers.py +233 -0
- omniload/src/frankfurter/__init__.py +157 -0
- omniload/src/frankfurter/helpers.py +48 -0
- omniload/src/freshdesk/__init__.py +103 -0
- omniload/src/freshdesk/freshdesk_client.py +151 -0
- omniload/src/freshdesk/settings.py +23 -0
- omniload/src/fundraiseup/__init__.py +95 -0
- omniload/src/fundraiseup/client.py +81 -0
- omniload/src/github/__init__.py +202 -0
- omniload/src/github/helpers.py +207 -0
- omniload/src/github/queries.py +129 -0
- omniload/src/github/settings.py +24 -0
- omniload/src/google_ads/__init__.py +198 -0
- omniload/src/google_ads/field.py +17 -0
- omniload/src/google_ads/metrics.py +254 -0
- omniload/src/google_ads/predicates.py +37 -0
- omniload/src/google_ads/reports.py +411 -0
- omniload/src/google_ads/test_google_ads.py +184 -0
- omniload/src/google_analytics/__init__.py +144 -0
- omniload/src/google_analytics/helpers.py +312 -0
- omniload/src/google_sheets/README.md +95 -0
- omniload/src/google_sheets/__init__.py +166 -0
- omniload/src/google_sheets/helpers/__init__.py +15 -0
- omniload/src/google_sheets/helpers/api_calls.py +160 -0
- omniload/src/google_sheets/helpers/data_processing.py +316 -0
- omniload/src/gorgias/__init__.py +595 -0
- omniload/src/gorgias/helpers.py +166 -0
- omniload/src/hostaway/__init__.py +302 -0
- omniload/src/hostaway/client.py +288 -0
- omniload/src/http/__init__.py +38 -0
- omniload/src/http/readers.py +146 -0
- omniload/src/http_client.py +24 -0
- omniload/src/hubspot/__init__.py +800 -0
- omniload/src/hubspot/helpers.py +417 -0
- omniload/src/hubspot/settings.py +329 -0
- omniload/src/indeed/__init__.py +153 -0
- omniload/src/indeed/helpers.py +228 -0
- omniload/src/influxdb/__init__.py +46 -0
- omniload/src/influxdb/client.py +34 -0
- omniload/src/intercom/__init__.py +142 -0
- omniload/src/intercom/helpers.py +674 -0
- omniload/src/intercom/settings.py +279 -0
- omniload/src/isoc_pulse/__init__.py +159 -0
- omniload/src/jira_source/__init__.py +377 -0
- omniload/src/jira_source/helpers.py +510 -0
- omniload/src/jira_source/settings.py +184 -0
- omniload/src/kafka/__init__.py +120 -0
- omniload/src/kafka/helpers.py +241 -0
- omniload/src/kinesis/__init__.py +153 -0
- omniload/src/kinesis/helpers.py +96 -0
- omniload/src/klaviyo/__init__.py +237 -0
- omniload/src/klaviyo/client.py +212 -0
- omniload/src/klaviyo/helpers.py +19 -0
- omniload/src/linear/__init__.py +634 -0
- omniload/src/linear/helpers.py +111 -0
- omniload/src/linkedin_ads/__init__.py +266 -0
- omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
- omniload/src/linkedin_ads/helpers.py +246 -0
- omniload/src/loader.py +69 -0
- omniload/src/mailchimp/__init__.py +126 -0
- omniload/src/mailchimp/helpers.py +226 -0
- omniload/src/mailchimp/settings.py +164 -0
- omniload/src/masking.py +344 -0
- omniload/src/mixpanel/__init__.py +62 -0
- omniload/src/mixpanel/client.py +104 -0
- omniload/src/monday/__init__.py +246 -0
- omniload/src/monday/helpers.py +392 -0
- omniload/src/monday/settings.py +325 -0
- omniload/src/mongodb/__init__.py +281 -0
- omniload/src/mongodb/helpers.py +975 -0
- omniload/src/notion/__init__.py +69 -0
- omniload/src/notion/helpers/__init__.py +14 -0
- omniload/src/notion/helpers/client.py +178 -0
- omniload/src/notion/helpers/database.py +92 -0
- omniload/src/notion/settings.py +17 -0
- omniload/src/partition.py +32 -0
- omniload/src/personio/__init__.py +345 -0
- omniload/src/personio/helpers.py +100 -0
- omniload/src/phantombuster/__init__.py +65 -0
- omniload/src/phantombuster/client.py +87 -0
- omniload/src/pinterest/__init__.py +82 -0
- omniload/src/pipedrive/__init__.py +212 -0
- omniload/src/pipedrive/helpers/__init__.py +37 -0
- omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
- omniload/src/pipedrive/helpers/pages.py +129 -0
- omniload/src/pipedrive/settings.py +41 -0
- omniload/src/pipedrive/typing.py +17 -0
- omniload/src/plusvibeai/__init__.py +335 -0
- omniload/src/plusvibeai/helpers.py +544 -0
- omniload/src/plusvibeai/settings.py +252 -0
- omniload/src/primer/__init__.py +45 -0
- omniload/src/primer/helpers.py +79 -0
- omniload/src/quickbooks/__init__.py +117 -0
- omniload/src/reddit_ads/__init__.py +183 -0
- omniload/src/reddit_ads/helpers.py +232 -0
- omniload/src/resource.py +40 -0
- omniload/src/revenuecat/__init__.py +83 -0
- omniload/src/revenuecat/helpers.py +237 -0
- omniload/src/salesforce/__init__.py +170 -0
- omniload/src/salesforce/helpers.py +78 -0
- omniload/src/shopify/__init__.py +1953 -0
- omniload/src/shopify/exceptions.py +17 -0
- omniload/src/shopify/helpers.py +202 -0
- omniload/src/shopify/settings.py +19 -0
- omniload/src/slack/__init__.py +290 -0
- omniload/src/slack/helpers.py +218 -0
- omniload/src/slack/settings.py +36 -0
- omniload/src/smartsheets/__init__.py +82 -0
- omniload/src/snapchat_ads/__init__.py +455 -0
- omniload/src/snapchat_ads/client.py +72 -0
- omniload/src/snapchat_ads/helpers.py +630 -0
- omniload/src/snapchat_ads/settings.py +130 -0
- omniload/src/socrata_source/__init__.py +83 -0
- omniload/src/socrata_source/helpers.py +85 -0
- omniload/src/socrata_source/settings.py +8 -0
- omniload/src/solidgate/__init__.py +219 -0
- omniload/src/solidgate/helpers.py +154 -0
- omniload/src/sources.py +5408 -0
- omniload/src/sql_database/__init__.py +0 -0
- omniload/src/sql_database/callbacks.py +66 -0
- omniload/src/stripe_analytics/__init__.py +183 -0
- omniload/src/stripe_analytics/helpers.py +386 -0
- omniload/src/stripe_analytics/settings.py +80 -0
- omniload/src/table_definition.py +15 -0
- omniload/src/testdata/fakebqcredentials.json +14 -0
- omniload/src/tiktok_ads/__init__.py +150 -0
- omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
- omniload/src/time.py +11 -0
- omniload/src/trustpilot/__init__.py +48 -0
- omniload/src/trustpilot/client.py +48 -0
- omniload/src/version.py +6 -0
- omniload/src/wise/__init__.py +68 -0
- omniload/src/wise/client.py +63 -0
- omniload/src/zendesk/__init__.py +480 -0
- omniload/src/zendesk/helpers/__init__.py +39 -0
- omniload/src/zendesk/helpers/api_helpers.py +119 -0
- omniload/src/zendesk/helpers/credentials.py +68 -0
- omniload/src/zendesk/helpers/talk_api.py +132 -0
- omniload/src/zendesk/settings.py +71 -0
- omniload/src/zoom/__init__.py +99 -0
- omniload/src/zoom/helpers.py +102 -0
- omniload/testdata/.gitignore +2 -0
- omniload/testdata/create_replace.csv +21 -0
- omniload/testdata/delete_insert_expected.csv +6 -0
- omniload/testdata/delete_insert_part1.csv +5 -0
- omniload/testdata/delete_insert_part2.csv +6 -0
- omniload/testdata/merge_expected.csv +5 -0
- omniload/testdata/merge_part1.csv +4 -0
- omniload/testdata/merge_part2.csv +5 -0
- omniload/tests/unit/test_smartsheets.py +133 -0
- omniload-0.0.0.dev0.dist-info/METADATA +439 -0
- omniload-0.0.0.dev0.dist-info/RECORD +218 -0
- omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
- omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
- omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from typing import Any, Dict, Iterator, Optional
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
LINEAR_GRAPHQL_ENDPOINT = "https://api.linear.app/graphql"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _graphql(
|
|
11
|
+
api_key: str, query: str, variables: Optional[Dict[str, Any]] = None
|
|
12
|
+
) -> Dict[str, Any]:
|
|
13
|
+
headers = {"Authorization": api_key, "Content-Type": "application/json"}
|
|
14
|
+
response = requests.post(
|
|
15
|
+
LINEAR_GRAPHQL_ENDPOINT,
|
|
16
|
+
json={"query": query, "variables": variables or {}},
|
|
17
|
+
headers=headers,
|
|
18
|
+
)
|
|
19
|
+
response.raise_for_status()
|
|
20
|
+
payload = response.json()
|
|
21
|
+
if "errors" in payload:
|
|
22
|
+
raise ValueError(str(payload["errors"]))
|
|
23
|
+
return payload["data"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _paginate(api_key: str, query: str, root: str) -> Iterator[Dict[str, Any]]:
|
|
27
|
+
cursor: Optional[str] = None
|
|
28
|
+
while True:
|
|
29
|
+
data = _graphql(api_key, query, {"cursor": cursor})[root]
|
|
30
|
+
for item in data["nodes"]:
|
|
31
|
+
yield item
|
|
32
|
+
if not data["pageInfo"]["hasNextPage"]:
|
|
33
|
+
break
|
|
34
|
+
cursor = data["pageInfo"]["endCursor"]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _get_date_range(updated_at, start_date):
|
|
38
|
+
"""Extract current start and end dates from incremental state."""
|
|
39
|
+
if updated_at.last_value:
|
|
40
|
+
current_start_date = pendulum.parse(updated_at.last_value)
|
|
41
|
+
else:
|
|
42
|
+
current_start_date = pendulum.parse(start_date)
|
|
43
|
+
|
|
44
|
+
if updated_at.end_value:
|
|
45
|
+
current_end_date = pendulum.parse(updated_at.end_value)
|
|
46
|
+
else:
|
|
47
|
+
current_end_date = pendulum.now(tz="UTC")
|
|
48
|
+
|
|
49
|
+
return current_start_date, current_end_date
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _paginated_resource(
|
|
53
|
+
api_key: str, query: str, query_field: str, updated_at, start_date
|
|
54
|
+
) -> Iterator[Dict[str, Any]]:
|
|
55
|
+
"""Helper function for paginated resources with date filtering."""
|
|
56
|
+
current_start_date, current_end_date = _get_date_range(updated_at, start_date)
|
|
57
|
+
|
|
58
|
+
for item in _paginate(api_key, query, query_field):
|
|
59
|
+
if pendulum.parse(item["updatedAt"]) >= current_start_date:
|
|
60
|
+
if pendulum.parse(item["updatedAt"]) <= current_end_date:
|
|
61
|
+
yield normalize_dictionaries(item)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _create_paginated_resource(
|
|
65
|
+
resource_name: str,
|
|
66
|
+
query: str,
|
|
67
|
+
query_field: str,
|
|
68
|
+
api_key: str,
|
|
69
|
+
start_date,
|
|
70
|
+
end_date=None,
|
|
71
|
+
):
|
|
72
|
+
"""Factory function to create paginated resources dynamically."""
|
|
73
|
+
|
|
74
|
+
@dlt.resource(name=resource_name, primary_key="id", write_disposition="merge")
|
|
75
|
+
def paginated_resource(
|
|
76
|
+
updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
77
|
+
"updatedAt",
|
|
78
|
+
initial_value=start_date.isoformat(),
|
|
79
|
+
end_value=end_date.isoformat() if end_date else None,
|
|
80
|
+
range_start="closed",
|
|
81
|
+
range_end="closed",
|
|
82
|
+
),
|
|
83
|
+
) -> Iterator[Dict[str, Any]]:
|
|
84
|
+
for item in _paginated_resource(
|
|
85
|
+
api_key, query, query_field, updated_at, start_date
|
|
86
|
+
):
|
|
87
|
+
yield normalize_dictionaries(item)
|
|
88
|
+
|
|
89
|
+
return paginated_resource
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def normalize_dictionaries(item: Dict[str, Any]) -> Dict[str, Any]:
|
|
93
|
+
"""
|
|
94
|
+
Automatically normalize dictionary fields by detecting their structure:
|
|
95
|
+
- Convert nested objects with 'id' field to {field_name}_id
|
|
96
|
+
- Convert objects with 'nodes' field to arrays
|
|
97
|
+
|
|
98
|
+
"""
|
|
99
|
+
normalized_item = item.copy()
|
|
100
|
+
|
|
101
|
+
for key, value in list(normalized_item.items()):
|
|
102
|
+
if isinstance(value, dict):
|
|
103
|
+
# If the dict has an 'id' field, replace with {key}_id
|
|
104
|
+
if "id" in value:
|
|
105
|
+
normalized_item[f"{key}_id"] = value["id"]
|
|
106
|
+
del normalized_item[key]
|
|
107
|
+
# If the dict has 'nodes' field, extract the nodes array
|
|
108
|
+
elif "nodes" in value:
|
|
109
|
+
normalized_item[key] = value["nodes"]
|
|
110
|
+
|
|
111
|
+
return normalized_item
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
from urllib.parse import quote
|
|
3
|
+
|
|
4
|
+
import dlt
|
|
5
|
+
import pendulum
|
|
6
|
+
from dlt.common.typing import TDataItem
|
|
7
|
+
from dlt.sources import DltResource
|
|
8
|
+
from pendulum import Date, DateTime
|
|
9
|
+
|
|
10
|
+
from .dimension_time_enum import Dimension, TimeGranularity
|
|
11
|
+
from .helpers import LinkedInAdsAnalyticsAPI, LinkedInAdsAPI, find_intervals
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dlt.source(max_table_nesting=0)
|
|
15
|
+
def linked_in_ads_analytics_source(
|
|
16
|
+
start_date: Date,
|
|
17
|
+
end_date: Date | None,
|
|
18
|
+
access_token: str,
|
|
19
|
+
account_ids: list[str],
|
|
20
|
+
dimension: Dimension,
|
|
21
|
+
metrics: list[str],
|
|
22
|
+
time_granularity: TimeGranularity,
|
|
23
|
+
) -> DltResource:
|
|
24
|
+
if time_granularity == TimeGranularity.daily:
|
|
25
|
+
primary_key = [dimension.value, "date"]
|
|
26
|
+
incremental_loading_param = "date"
|
|
27
|
+
else:
|
|
28
|
+
primary_key = [dimension.value, "start_date", "end_date"]
|
|
29
|
+
incremental_loading_param = "start_date"
|
|
30
|
+
|
|
31
|
+
@dlt.resource(write_disposition="merge", primary_key=primary_key)
|
|
32
|
+
def custom_reports(
|
|
33
|
+
dateTime=(
|
|
34
|
+
dlt.sources.incremental(
|
|
35
|
+
incremental_loading_param,
|
|
36
|
+
initial_value=start_date,
|
|
37
|
+
end_value=end_date,
|
|
38
|
+
range_start="closed",
|
|
39
|
+
range_end="closed",
|
|
40
|
+
)
|
|
41
|
+
),
|
|
42
|
+
) -> Iterable[TDataItem]:
|
|
43
|
+
linkedin_api = LinkedInAdsAnalyticsAPI(
|
|
44
|
+
access_token=access_token,
|
|
45
|
+
account_ids=account_ids,
|
|
46
|
+
dimension=dimension,
|
|
47
|
+
metrics=metrics,
|
|
48
|
+
time_granularity=time_granularity,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
if dateTime.end_value is None:
|
|
52
|
+
end_date = pendulum.now().date()
|
|
53
|
+
else:
|
|
54
|
+
end_date = dateTime.end_value
|
|
55
|
+
|
|
56
|
+
list_of_interval = find_intervals(
|
|
57
|
+
start_date=dateTime.last_value,
|
|
58
|
+
end_date=end_date,
|
|
59
|
+
time_granularity=time_granularity,
|
|
60
|
+
)
|
|
61
|
+
for start, end in list_of_interval:
|
|
62
|
+
yield linkedin_api.fetch_pages(start, end)
|
|
63
|
+
|
|
64
|
+
return custom_reports
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dlt.source(max_table_nesting=0)
|
|
68
|
+
def linked_in_ads_source(
|
|
69
|
+
access_token: str, start_datetime: DateTime, end_datetime: DateTime | None
|
|
70
|
+
) -> list[DltResource]:
|
|
71
|
+
linkedin_api = LinkedInAdsAPI(
|
|
72
|
+
access_token=access_token,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
@dlt.resource(write_disposition="replace", primary_key="id")
|
|
76
|
+
def ad_accounts() -> Iterable[TDataItem]:
|
|
77
|
+
yield from linkedin_api.fetch_token_pagination(
|
|
78
|
+
url="https://api.linkedin.com/rest/adAccounts?q=search"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
@dlt.transformer(
|
|
82
|
+
write_disposition="replace",
|
|
83
|
+
primary_key=["user", "account"],
|
|
84
|
+
data_from=ad_accounts,
|
|
85
|
+
)
|
|
86
|
+
def ad_account_users(ad_accounts) -> Iterable[TDataItem]:
|
|
87
|
+
for ad_account in ad_accounts:
|
|
88
|
+
account_id = ad_account["id"]
|
|
89
|
+
encoded_id = quote(f"urn:li:sponsoredAccount:{account_id}")
|
|
90
|
+
url = f"https://api.linkedin.com/rest/adAccountUsers?q=accounts&accounts=List({encoded_id})"
|
|
91
|
+
for page in linkedin_api.fetch_cursor_pagination(url):
|
|
92
|
+
for item in page:
|
|
93
|
+
item["account_id"] = account_id
|
|
94
|
+
|
|
95
|
+
yield page
|
|
96
|
+
|
|
97
|
+
@dlt.transformer(
|
|
98
|
+
write_disposition="replace",
|
|
99
|
+
primary_key="id",
|
|
100
|
+
data_from=ad_accounts,
|
|
101
|
+
)
|
|
102
|
+
def campaign_groups(ad_accounts) -> Iterable[TDataItem]:
|
|
103
|
+
for ad_account in ad_accounts:
|
|
104
|
+
account_id = ad_account["id"]
|
|
105
|
+
url = f"https://api.linkedin.com/rest/adAccounts/{account_id}/adCampaignGroups?q=search"
|
|
106
|
+
for page in linkedin_api.fetch_token_pagination(url):
|
|
107
|
+
for item in page:
|
|
108
|
+
item["account_id"] = account_id
|
|
109
|
+
|
|
110
|
+
yield page
|
|
111
|
+
|
|
112
|
+
@dlt.transformer(
|
|
113
|
+
write_disposition="replace",
|
|
114
|
+
primary_key="id",
|
|
115
|
+
data_from=ad_accounts,
|
|
116
|
+
)
|
|
117
|
+
def campaigns(ad_accounts) -> Iterable[TDataItem]:
|
|
118
|
+
for ad_account in ad_accounts:
|
|
119
|
+
account_id = ad_account["id"]
|
|
120
|
+
url = f"https://api.linkedin.com/rest/adAccounts/{account_id}/adCampaigns?q=search"
|
|
121
|
+
for page in linkedin_api.fetch_token_pagination(url):
|
|
122
|
+
for item in page:
|
|
123
|
+
item["account_id"] = account_id
|
|
124
|
+
|
|
125
|
+
yield page
|
|
126
|
+
|
|
127
|
+
@dlt.transformer(
|
|
128
|
+
write_disposition="replace",
|
|
129
|
+
primary_key="id",
|
|
130
|
+
data_from=ad_accounts,
|
|
131
|
+
)
|
|
132
|
+
def creatives(ad_accounts) -> Iterable[TDataItem]:
|
|
133
|
+
for ad_account in ad_accounts:
|
|
134
|
+
account_id = ad_account["id"]
|
|
135
|
+
url = f"https://api.linkedin.com/rest/adAccounts/{account_id}/creatives?q=criteria"
|
|
136
|
+
for page in linkedin_api.fetch_token_pagination(url):
|
|
137
|
+
for item in page:
|
|
138
|
+
item["account_id"] = account_id
|
|
139
|
+
|
|
140
|
+
yield page
|
|
141
|
+
|
|
142
|
+
@dlt.transformer(
|
|
143
|
+
write_disposition="replace",
|
|
144
|
+
primary_key="id",
|
|
145
|
+
data_from=ad_accounts,
|
|
146
|
+
)
|
|
147
|
+
def conversions(ad_accounts) -> Iterable[TDataItem]:
|
|
148
|
+
for ad_account in ad_accounts:
|
|
149
|
+
account_id = ad_account["id"]
|
|
150
|
+
encoded_id = quote(f"urn:li:sponsoredAccount:{account_id}")
|
|
151
|
+
url = f"https://api.linkedin.com/rest/conversions?q=account&account={encoded_id}"
|
|
152
|
+
for page in linkedin_api.fetch_cursor_pagination(url):
|
|
153
|
+
for item in page:
|
|
154
|
+
item["account_id"] = account_id
|
|
155
|
+
|
|
156
|
+
yield page
|
|
157
|
+
|
|
158
|
+
@dlt.transformer(
|
|
159
|
+
write_disposition="replace",
|
|
160
|
+
primary_key="id",
|
|
161
|
+
data_from=ad_accounts,
|
|
162
|
+
)
|
|
163
|
+
def lead_forms(ad_accounts) -> Iterable[TDataItem]:
|
|
164
|
+
for ad_account in ad_accounts:
|
|
165
|
+
account_id = ad_account["id"]
|
|
166
|
+
encoded_id = quote(f"urn:li:sponsoredAccount:{account_id}")
|
|
167
|
+
url = f"https://api.linkedin.com/rest/leadForms?q=owner&owner=(sponsoredAccount:{encoded_id})"
|
|
168
|
+
for page in linkedin_api.fetch_cursor_pagination(url):
|
|
169
|
+
for item in page:
|
|
170
|
+
item["account_id"] = account_id
|
|
171
|
+
|
|
172
|
+
yield page
|
|
173
|
+
|
|
174
|
+
@dlt.transformer(
|
|
175
|
+
write_disposition="merge",
|
|
176
|
+
primary_key="id",
|
|
177
|
+
data_from=ad_accounts,
|
|
178
|
+
)
|
|
179
|
+
def lead_form_responses(
|
|
180
|
+
ad_accounts,
|
|
181
|
+
submittedAt=dlt.sources.incremental(
|
|
182
|
+
"submittedAt",
|
|
183
|
+
initial_value=int(start_datetime.int_timestamp * 1000),
|
|
184
|
+
end_value=end_datetime.int_timestamp * 1000 if end_datetime else None,
|
|
185
|
+
range_end="closed" if end_datetime else "open",
|
|
186
|
+
range_start="closed",
|
|
187
|
+
),
|
|
188
|
+
) -> Iterable[TDataItem]:
|
|
189
|
+
fromDate = submittedAt.start_value
|
|
190
|
+
toDate = (
|
|
191
|
+
submittedAt.end_value
|
|
192
|
+
if submittedAt.end_value
|
|
193
|
+
else int(pendulum.now(tz="UTC").int_timestamp * 1000)
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
for ad_account in ad_accounts:
|
|
197
|
+
account_id = ad_account["id"]
|
|
198
|
+
encoded_id = quote(f"urn:li:sponsoredAccount:{account_id}")
|
|
199
|
+
url = f"https://api.linkedin.com/rest/leadFormResponses?leadType=(leadType:SPONSORED)&q=owner&owner=(sponsoredAccount:{encoded_id})&submittedAtTimeRange=(start:{fromDate},end:{toDate})&limitedToTestLeads=false"
|
|
200
|
+
for page in linkedin_api.fetch_cursor_pagination(url):
|
|
201
|
+
for item in page:
|
|
202
|
+
item["account_id"] = account_id
|
|
203
|
+
|
|
204
|
+
yield page
|
|
205
|
+
|
|
206
|
+
@dlt.transformer(
|
|
207
|
+
write_disposition="replace",
|
|
208
|
+
primary_key="id",
|
|
209
|
+
data_from=ad_accounts,
|
|
210
|
+
)
|
|
211
|
+
def dmp_segments(ad_accounts) -> Iterable[TDataItem]:
|
|
212
|
+
for ad_account in ad_accounts:
|
|
213
|
+
account_id = ad_account["id"]
|
|
214
|
+
encoded_id = quote(f"urn:li:sponsoredAccount:{account_id}")
|
|
215
|
+
url = f"https://api.linkedin.com/rest/dmpSegments?q=account&account={encoded_id}"
|
|
216
|
+
for page in linkedin_api.fetch_cursor_pagination(url):
|
|
217
|
+
for item in page:
|
|
218
|
+
item["account_id"] = account_id
|
|
219
|
+
|
|
220
|
+
yield page
|
|
221
|
+
|
|
222
|
+
@dlt.transformer(
|
|
223
|
+
write_disposition="replace",
|
|
224
|
+
primary_key="id",
|
|
225
|
+
data_from=ad_accounts,
|
|
226
|
+
)
|
|
227
|
+
def insight_tags(ad_accounts) -> Iterable[TDataItem]:
|
|
228
|
+
for ad_account in ad_accounts:
|
|
229
|
+
account_id = ad_account["id"]
|
|
230
|
+
encoded_id = quote(f"urn:li:sponsoredAccount:{account_id}")
|
|
231
|
+
url = f"https://api.linkedin.com/rest/insightTags?q=account&account={encoded_id}"
|
|
232
|
+
for page in linkedin_api.fetch_cursor_pagination(url):
|
|
233
|
+
for item in page:
|
|
234
|
+
item["account_id"] = account_id
|
|
235
|
+
|
|
236
|
+
yield page
|
|
237
|
+
|
|
238
|
+
@dlt.transformer(
|
|
239
|
+
write_disposition="replace",
|
|
240
|
+
primary_key=["domainName", "account_id"],
|
|
241
|
+
data_from=ad_accounts,
|
|
242
|
+
)
|
|
243
|
+
def insight_tag_domains(ad_accounts) -> Iterable[TDataItem]:
|
|
244
|
+
for ad_account in ad_accounts:
|
|
245
|
+
account_id = ad_account["id"]
|
|
246
|
+
encoded_id = quote(f"urn:li:sponsoredAccount:{account_id}")
|
|
247
|
+
url = f"https://api.linkedin.com/rest/insightTagDomains?q=account&account={encoded_id}"
|
|
248
|
+
for page in linkedin_api.fetch_cursor_pagination(url):
|
|
249
|
+
for item in page:
|
|
250
|
+
item["account_id"] = account_id
|
|
251
|
+
|
|
252
|
+
yield page
|
|
253
|
+
|
|
254
|
+
return [
|
|
255
|
+
ad_accounts,
|
|
256
|
+
ad_account_users,
|
|
257
|
+
campaign_groups,
|
|
258
|
+
campaigns,
|
|
259
|
+
creatives,
|
|
260
|
+
conversions,
|
|
261
|
+
lead_forms,
|
|
262
|
+
lead_form_responses,
|
|
263
|
+
dmp_segments,
|
|
264
|
+
insight_tags,
|
|
265
|
+
insight_tag_domains,
|
|
266
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Dimension(Enum):
|
|
5
|
+
campaign = "campaign"
|
|
6
|
+
creative = "creative"
|
|
7
|
+
account = "account"
|
|
8
|
+
member_job_title = "member_job_title"
|
|
9
|
+
member_seniority = "member_seniority"
|
|
10
|
+
member_industry = "member_industry"
|
|
11
|
+
member_company_size = "member_company_size"
|
|
12
|
+
member_company = "member_company"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TimeGranularity(Enum):
|
|
16
|
+
daily = "DAILY"
|
|
17
|
+
monthly = "MONTHLY"
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
from urllib.parse import quote
|
|
2
|
+
|
|
3
|
+
import pendulum
|
|
4
|
+
import requests
|
|
5
|
+
from dlt.sources.helpers.requests import Client
|
|
6
|
+
from pendulum import Date
|
|
7
|
+
|
|
8
|
+
from .dimension_time_enum import Dimension, TimeGranularity
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def retry_on_limit(
|
|
12
|
+
response: requests.Response | None, exception: BaseException | None
|
|
13
|
+
) -> bool:
|
|
14
|
+
if response is None:
|
|
15
|
+
return False
|
|
16
|
+
return response.status_code == 429
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def create_client() -> requests.Session:
|
|
20
|
+
return Client(
|
|
21
|
+
raise_for_status=False,
|
|
22
|
+
retry_condition=retry_on_limit,
|
|
23
|
+
request_max_attempts=12,
|
|
24
|
+
).session
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def flat_structure(items, pivot: Dimension, time_granularity: TimeGranularity):
|
|
28
|
+
for item in items:
|
|
29
|
+
if "pivotValues" in item:
|
|
30
|
+
if len(item["pivotValues"]) > 1:
|
|
31
|
+
item[pivot.value.lower()] = item["pivotValues"]
|
|
32
|
+
else:
|
|
33
|
+
item[pivot.value.lower()] = item["pivotValues"][0]
|
|
34
|
+
if "dateRange" in item:
|
|
35
|
+
start_date = item["dateRange"]["start"]
|
|
36
|
+
start_dt = pendulum.date(
|
|
37
|
+
year=start_date["year"],
|
|
38
|
+
month=start_date["month"],
|
|
39
|
+
day=start_date["day"],
|
|
40
|
+
)
|
|
41
|
+
if time_granularity == TimeGranularity.daily:
|
|
42
|
+
item["date"] = start_dt
|
|
43
|
+
else:
|
|
44
|
+
end_date = item["dateRange"]["end"]
|
|
45
|
+
end_dt = pendulum.date(
|
|
46
|
+
year=end_date["year"],
|
|
47
|
+
month=end_date["month"],
|
|
48
|
+
day=end_date["day"],
|
|
49
|
+
)
|
|
50
|
+
item["start_date"] = start_dt
|
|
51
|
+
item["end_date"] = end_dt
|
|
52
|
+
|
|
53
|
+
del item["dateRange"]
|
|
54
|
+
del item["pivotValues"]
|
|
55
|
+
|
|
56
|
+
return items
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def find_intervals(start_date: Date, end_date: Date, time_granularity: TimeGranularity):
|
|
60
|
+
intervals = []
|
|
61
|
+
|
|
62
|
+
if start_date > end_date:
|
|
63
|
+
raise ValueError("Start date must be less than end date")
|
|
64
|
+
|
|
65
|
+
while start_date <= end_date:
|
|
66
|
+
if time_granularity == TimeGranularity.daily:
|
|
67
|
+
next_date = min(start_date.add(months=6), end_date)
|
|
68
|
+
else:
|
|
69
|
+
next_date = min(start_date.add(years=2), end_date)
|
|
70
|
+
|
|
71
|
+
intervals.append((start_date, next_date))
|
|
72
|
+
|
|
73
|
+
start_date = next_date.add(days=1)
|
|
74
|
+
|
|
75
|
+
return intervals
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
DIMENSION_PIVOT_MAP = {
|
|
79
|
+
"campaign": "CAMPAIGN",
|
|
80
|
+
"creative": "CREATIVE",
|
|
81
|
+
"account": "ACCOUNT",
|
|
82
|
+
"member_job_title": "MEMBER_JOB_TITLE",
|
|
83
|
+
"member_seniority": "MEMBER_SENIORITY",
|
|
84
|
+
"member_industry": "MEMBER_INDUSTRY",
|
|
85
|
+
"member_company_size": "MEMBER_COMPANY_SIZE",
|
|
86
|
+
"member_company": "MEMBER_COMPANY",
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def construct_url(
|
|
91
|
+
start: Date,
|
|
92
|
+
end: Date,
|
|
93
|
+
account_ids: list[str],
|
|
94
|
+
metrics: list[str],
|
|
95
|
+
dimension: Dimension,
|
|
96
|
+
time_granularity: TimeGranularity,
|
|
97
|
+
):
|
|
98
|
+
date_range = f"(start:(year:{start.year},month:{start.month},day:{start.day})"
|
|
99
|
+
date_range += f",end:(year:{end.year},month:{end.month},day:{end.day}))"
|
|
100
|
+
accounts = ",".join(
|
|
101
|
+
[quote(f"urn:li:sponsoredAccount:{account_id}") for account_id in account_ids]
|
|
102
|
+
)
|
|
103
|
+
encoded_accounts = f"List({accounts})"
|
|
104
|
+
dimension_str = DIMENSION_PIVOT_MAP[dimension.value]
|
|
105
|
+
time_granularity_str = time_granularity.value
|
|
106
|
+
metrics_str = ",".join([metric for metric in metrics])
|
|
107
|
+
|
|
108
|
+
url = (
|
|
109
|
+
f"https://api.linkedin.com/rest/adAnalytics?"
|
|
110
|
+
f"q=analytics&timeGranularity={time_granularity_str}&"
|
|
111
|
+
f"dateRange={date_range}&accounts={encoded_accounts}&"
|
|
112
|
+
f"pivot={dimension_str}&fields={metrics_str}"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return url
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class LinkedInAdsAnalyticsAPI:
|
|
119
|
+
def __init__(
|
|
120
|
+
self,
|
|
121
|
+
access_token,
|
|
122
|
+
time_granularity,
|
|
123
|
+
account_ids,
|
|
124
|
+
dimension,
|
|
125
|
+
metrics,
|
|
126
|
+
):
|
|
127
|
+
self.time_granularity: TimeGranularity = time_granularity
|
|
128
|
+
self.account_ids: list[str] = account_ids
|
|
129
|
+
self.dimension: Dimension = dimension
|
|
130
|
+
self.metrics: list[str] = metrics
|
|
131
|
+
self.headers = {
|
|
132
|
+
"Authorization": f"Bearer {access_token}",
|
|
133
|
+
"Linkedin-Version": "202601",
|
|
134
|
+
"X-Restli-Protocol-Version": "2.0.0",
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
def fetch_pages(self, start: Date, end: Date):
|
|
138
|
+
client = create_client()
|
|
139
|
+
url = construct_url(
|
|
140
|
+
start=start,
|
|
141
|
+
end=end,
|
|
142
|
+
account_ids=self.account_ids,
|
|
143
|
+
metrics=self.metrics,
|
|
144
|
+
dimension=self.dimension,
|
|
145
|
+
time_granularity=self.time_granularity,
|
|
146
|
+
)
|
|
147
|
+
response = client.get(url=url, headers=self.headers)
|
|
148
|
+
|
|
149
|
+
if response.status_code != 200:
|
|
150
|
+
error_data = response.json()
|
|
151
|
+
raise ValueError(f"LinkedIn API Error: {error_data.get('message')}")
|
|
152
|
+
|
|
153
|
+
result = response.json()
|
|
154
|
+
items = result.get("elements", [])
|
|
155
|
+
yield flat_structure(
|
|
156
|
+
items=items,
|
|
157
|
+
pivot=self.dimension,
|
|
158
|
+
time_granularity=self.time_granularity,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class LinkedInAdsAPI:
|
|
163
|
+
def __init__(
|
|
164
|
+
self,
|
|
165
|
+
access_token,
|
|
166
|
+
):
|
|
167
|
+
self.headers = {
|
|
168
|
+
"Authorization": f"Bearer {access_token}",
|
|
169
|
+
"Linkedin-Version": "202601",
|
|
170
|
+
"X-Restli-Protocol-Version": "2.0.0",
|
|
171
|
+
}
|
|
172
|
+
self.client = create_client()
|
|
173
|
+
|
|
174
|
+
def fetch_full(self, url: str):
|
|
175
|
+
response = self.client.get(url=url, headers=self.headers)
|
|
176
|
+
|
|
177
|
+
if response.status_code != 200:
|
|
178
|
+
error_data = response.json()
|
|
179
|
+
raise ValueError(f"LinkedIn API Error: {error_data}")
|
|
180
|
+
|
|
181
|
+
result = response.json()
|
|
182
|
+
elements = result.get("elements", [])
|
|
183
|
+
|
|
184
|
+
if elements:
|
|
185
|
+
yield elements
|
|
186
|
+
|
|
187
|
+
def fetch_token_pagination(self, url: str, page_size: int = 1000):
|
|
188
|
+
next_page_token = None
|
|
189
|
+
separator = "&" if "?" in url else "?"
|
|
190
|
+
|
|
191
|
+
while True:
|
|
192
|
+
if next_page_token:
|
|
193
|
+
paginated_url = (
|
|
194
|
+
f"{url}{separator}pageSize={page_size}&pageToken={next_page_token}"
|
|
195
|
+
)
|
|
196
|
+
else:
|
|
197
|
+
paginated_url = f"{url}{separator}pageSize={page_size}"
|
|
198
|
+
|
|
199
|
+
response = self.client.get(url=paginated_url, headers=self.headers)
|
|
200
|
+
|
|
201
|
+
if response.status_code != 200:
|
|
202
|
+
error_data = response.json()
|
|
203
|
+
raise ValueError(f"LinkedIn API Error: {error_data}")
|
|
204
|
+
|
|
205
|
+
result = response.json()
|
|
206
|
+
elements = result.get("elements", [])
|
|
207
|
+
|
|
208
|
+
if not elements:
|
|
209
|
+
break
|
|
210
|
+
|
|
211
|
+
yield elements
|
|
212
|
+
|
|
213
|
+
if len(elements) < page_size:
|
|
214
|
+
break
|
|
215
|
+
|
|
216
|
+
metadata = result.get("metadata", {})
|
|
217
|
+
next_page_token = metadata.get("nextPageToken")
|
|
218
|
+
|
|
219
|
+
if not next_page_token:
|
|
220
|
+
break
|
|
221
|
+
|
|
222
|
+
def fetch_cursor_pagination(self, url: str, count: int = 1000):
|
|
223
|
+
start = 0
|
|
224
|
+
separator = "&" if "?" in url else "?"
|
|
225
|
+
|
|
226
|
+
while True:
|
|
227
|
+
paginated_url = f"{url}{separator}start={start}&count={count}"
|
|
228
|
+
|
|
229
|
+
response = self.client.get(url=paginated_url, headers=self.headers)
|
|
230
|
+
|
|
231
|
+
if response.status_code != 200:
|
|
232
|
+
error_data = response.json()
|
|
233
|
+
raise ValueError(f"LinkedIn API Error: {error_data}")
|
|
234
|
+
|
|
235
|
+
result = response.json()
|
|
236
|
+
elements = result.get("elements", [])
|
|
237
|
+
|
|
238
|
+
if not elements:
|
|
239
|
+
break
|
|
240
|
+
|
|
241
|
+
yield elements
|
|
242
|
+
|
|
243
|
+
if len(elements) < count:
|
|
244
|
+
break
|
|
245
|
+
|
|
246
|
+
start += count
|