omniload 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omniload/conftest.py +72 -0
- omniload/main.py +810 -0
- omniload/src/.gitignore +10 -0
- omniload/src/adjust/__init__.py +108 -0
- omniload/src/adjust/adjust_helpers.py +122 -0
- omniload/src/airtable/__init__.py +84 -0
- omniload/src/allium/__init__.py +128 -0
- omniload/src/anthropic/__init__.py +277 -0
- omniload/src/anthropic/helpers.py +525 -0
- omniload/src/applovin/__init__.py +316 -0
- omniload/src/applovin_max/__init__.py +117 -0
- omniload/src/appsflyer/__init__.py +325 -0
- omniload/src/appsflyer/client.py +110 -0
- omniload/src/appstore/__init__.py +142 -0
- omniload/src/appstore/client.py +126 -0
- omniload/src/appstore/errors.py +15 -0
- omniload/src/appstore/models.py +117 -0
- omniload/src/appstore/resources.py +179 -0
- omniload/src/arrow/__init__.py +81 -0
- omniload/src/asana_source/__init__.py +281 -0
- omniload/src/asana_source/helpers.py +30 -0
- omniload/src/asana_source/settings.py +158 -0
- omniload/src/attio/__init__.py +102 -0
- omniload/src/attio/helpers.py +65 -0
- omniload/src/blob.py +95 -0
- omniload/src/bruin/__init__.py +76 -0
- omniload/src/chess/__init__.py +180 -0
- omniload/src/chess/helpers.py +35 -0
- omniload/src/chess/settings.py +18 -0
- omniload/src/clickup/__init__.py +85 -0
- omniload/src/clickup/helpers.py +47 -0
- omniload/src/collector/spinner.py +43 -0
- omniload/src/couchbase_source/__init__.py +118 -0
- omniload/src/couchbase_source/helpers.py +135 -0
- omniload/src/cursor/__init__.py +83 -0
- omniload/src/cursor/helpers.py +188 -0
- omniload/src/customer_io/__init__.py +486 -0
- omniload/src/customer_io/helpers.py +530 -0
- omniload/src/destinations.py +982 -0
- omniload/src/docebo/__init__.py +589 -0
- omniload/src/docebo/client.py +435 -0
- omniload/src/docebo/helpers.py +97 -0
- omniload/src/dune/__init__.py +104 -0
- omniload/src/dune/helpers.py +108 -0
- omniload/src/dynamodb/__init__.py +86 -0
- omniload/src/elasticsearch/__init__.py +80 -0
- omniload/src/elasticsearch/helpers.py +141 -0
- omniload/src/errors.py +26 -0
- omniload/src/facebook_ads/__init__.py +403 -0
- omniload/src/facebook_ads/exceptions.py +19 -0
- omniload/src/facebook_ads/helpers.py +296 -0
- omniload/src/facebook_ads/settings.py +224 -0
- omniload/src/facebook_ads/utils.py +53 -0
- omniload/src/factory.py +305 -0
- omniload/src/filesystem/__init__.py +133 -0
- omniload/src/filesystem/helpers.py +114 -0
- omniload/src/filesystem/readers.py +187 -0
- omniload/src/filters.py +62 -0
- omniload/src/fireflies/__init__.py +151 -0
- omniload/src/fireflies/helpers.py +753 -0
- omniload/src/fluxx/__init__.py +10013 -0
- omniload/src/fluxx/helpers.py +233 -0
- omniload/src/frankfurter/__init__.py +157 -0
- omniload/src/frankfurter/helpers.py +48 -0
- omniload/src/freshdesk/__init__.py +103 -0
- omniload/src/freshdesk/freshdesk_client.py +151 -0
- omniload/src/freshdesk/settings.py +23 -0
- omniload/src/fundraiseup/__init__.py +95 -0
- omniload/src/fundraiseup/client.py +81 -0
- omniload/src/github/__init__.py +202 -0
- omniload/src/github/helpers.py +207 -0
- omniload/src/github/queries.py +129 -0
- omniload/src/github/settings.py +24 -0
- omniload/src/google_ads/__init__.py +198 -0
- omniload/src/google_ads/field.py +17 -0
- omniload/src/google_ads/metrics.py +254 -0
- omniload/src/google_ads/predicates.py +37 -0
- omniload/src/google_ads/reports.py +411 -0
- omniload/src/google_ads/test_google_ads.py +184 -0
- omniload/src/google_analytics/__init__.py +144 -0
- omniload/src/google_analytics/helpers.py +312 -0
- omniload/src/google_sheets/README.md +95 -0
- omniload/src/google_sheets/__init__.py +166 -0
- omniload/src/google_sheets/helpers/__init__.py +15 -0
- omniload/src/google_sheets/helpers/api_calls.py +160 -0
- omniload/src/google_sheets/helpers/data_processing.py +316 -0
- omniload/src/gorgias/__init__.py +595 -0
- omniload/src/gorgias/helpers.py +166 -0
- omniload/src/hostaway/__init__.py +302 -0
- omniload/src/hostaway/client.py +288 -0
- omniload/src/http/__init__.py +38 -0
- omniload/src/http/readers.py +146 -0
- omniload/src/http_client.py +24 -0
- omniload/src/hubspot/__init__.py +800 -0
- omniload/src/hubspot/helpers.py +417 -0
- omniload/src/hubspot/settings.py +329 -0
- omniload/src/indeed/__init__.py +153 -0
- omniload/src/indeed/helpers.py +228 -0
- omniload/src/influxdb/__init__.py +46 -0
- omniload/src/influxdb/client.py +34 -0
- omniload/src/intercom/__init__.py +142 -0
- omniload/src/intercom/helpers.py +674 -0
- omniload/src/intercom/settings.py +279 -0
- omniload/src/isoc_pulse/__init__.py +159 -0
- omniload/src/jira_source/__init__.py +377 -0
- omniload/src/jira_source/helpers.py +510 -0
- omniload/src/jira_source/settings.py +184 -0
- omniload/src/kafka/__init__.py +120 -0
- omniload/src/kafka/helpers.py +241 -0
- omniload/src/kinesis/__init__.py +153 -0
- omniload/src/kinesis/helpers.py +96 -0
- omniload/src/klaviyo/__init__.py +237 -0
- omniload/src/klaviyo/client.py +212 -0
- omniload/src/klaviyo/helpers.py +19 -0
- omniload/src/linear/__init__.py +634 -0
- omniload/src/linear/helpers.py +111 -0
- omniload/src/linkedin_ads/__init__.py +266 -0
- omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
- omniload/src/linkedin_ads/helpers.py +246 -0
- omniload/src/loader.py +69 -0
- omniload/src/mailchimp/__init__.py +126 -0
- omniload/src/mailchimp/helpers.py +226 -0
- omniload/src/mailchimp/settings.py +164 -0
- omniload/src/masking.py +344 -0
- omniload/src/mixpanel/__init__.py +62 -0
- omniload/src/mixpanel/client.py +104 -0
- omniload/src/monday/__init__.py +246 -0
- omniload/src/monday/helpers.py +392 -0
- omniload/src/monday/settings.py +325 -0
- omniload/src/mongodb/__init__.py +281 -0
- omniload/src/mongodb/helpers.py +975 -0
- omniload/src/notion/__init__.py +69 -0
- omniload/src/notion/helpers/__init__.py +14 -0
- omniload/src/notion/helpers/client.py +178 -0
- omniload/src/notion/helpers/database.py +92 -0
- omniload/src/notion/settings.py +17 -0
- omniload/src/partition.py +32 -0
- omniload/src/personio/__init__.py +345 -0
- omniload/src/personio/helpers.py +100 -0
- omniload/src/phantombuster/__init__.py +65 -0
- omniload/src/phantombuster/client.py +87 -0
- omniload/src/pinterest/__init__.py +82 -0
- omniload/src/pipedrive/__init__.py +212 -0
- omniload/src/pipedrive/helpers/__init__.py +37 -0
- omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
- omniload/src/pipedrive/helpers/pages.py +129 -0
- omniload/src/pipedrive/settings.py +41 -0
- omniload/src/pipedrive/typing.py +17 -0
- omniload/src/plusvibeai/__init__.py +335 -0
- omniload/src/plusvibeai/helpers.py +544 -0
- omniload/src/plusvibeai/settings.py +252 -0
- omniload/src/primer/__init__.py +45 -0
- omniload/src/primer/helpers.py +79 -0
- omniload/src/quickbooks/__init__.py +117 -0
- omniload/src/reddit_ads/__init__.py +183 -0
- omniload/src/reddit_ads/helpers.py +232 -0
- omniload/src/resource.py +40 -0
- omniload/src/revenuecat/__init__.py +83 -0
- omniload/src/revenuecat/helpers.py +237 -0
- omniload/src/salesforce/__init__.py +170 -0
- omniload/src/salesforce/helpers.py +78 -0
- omniload/src/shopify/__init__.py +1953 -0
- omniload/src/shopify/exceptions.py +17 -0
- omniload/src/shopify/helpers.py +202 -0
- omniload/src/shopify/settings.py +19 -0
- omniload/src/slack/__init__.py +290 -0
- omniload/src/slack/helpers.py +218 -0
- omniload/src/slack/settings.py +36 -0
- omniload/src/smartsheets/__init__.py +82 -0
- omniload/src/snapchat_ads/__init__.py +455 -0
- omniload/src/snapchat_ads/client.py +72 -0
- omniload/src/snapchat_ads/helpers.py +630 -0
- omniload/src/snapchat_ads/settings.py +130 -0
- omniload/src/socrata_source/__init__.py +83 -0
- omniload/src/socrata_source/helpers.py +85 -0
- omniload/src/socrata_source/settings.py +8 -0
- omniload/src/solidgate/__init__.py +219 -0
- omniload/src/solidgate/helpers.py +154 -0
- omniload/src/sources.py +5408 -0
- omniload/src/sql_database/__init__.py +0 -0
- omniload/src/sql_database/callbacks.py +66 -0
- omniload/src/stripe_analytics/__init__.py +183 -0
- omniload/src/stripe_analytics/helpers.py +386 -0
- omniload/src/stripe_analytics/settings.py +80 -0
- omniload/src/table_definition.py +15 -0
- omniload/src/testdata/fakebqcredentials.json +14 -0
- omniload/src/tiktok_ads/__init__.py +150 -0
- omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
- omniload/src/time.py +11 -0
- omniload/src/trustpilot/__init__.py +48 -0
- omniload/src/trustpilot/client.py +48 -0
- omniload/src/version.py +6 -0
- omniload/src/wise/__init__.py +68 -0
- omniload/src/wise/client.py +63 -0
- omniload/src/zendesk/__init__.py +480 -0
- omniload/src/zendesk/helpers/__init__.py +39 -0
- omniload/src/zendesk/helpers/api_helpers.py +119 -0
- omniload/src/zendesk/helpers/credentials.py +68 -0
- omniload/src/zendesk/helpers/talk_api.py +132 -0
- omniload/src/zendesk/settings.py +71 -0
- omniload/src/zoom/__init__.py +99 -0
- omniload/src/zoom/helpers.py +102 -0
- omniload/testdata/.gitignore +2 -0
- omniload/testdata/create_replace.csv +21 -0
- omniload/testdata/delete_insert_expected.csv +6 -0
- omniload/testdata/delete_insert_part1.csv +5 -0
- omniload/testdata/delete_insert_part2.csv +6 -0
- omniload/testdata/merge_expected.csv +5 -0
- omniload/testdata/merge_part1.csv +4 -0
- omniload/testdata/merge_part2.csv +5 -0
- omniload/tests/unit/test_smartsheets.py +133 -0
- omniload-0.0.0.dev0.dist-info/METADATA +439 -0
- omniload-0.0.0.dev0.dist-info/RECORD +218 -0
- omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
- omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
- omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
omniload/src/.gitignore
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from typing import Optional, Sequence
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
from dlt.sources import DltResource
|
|
6
|
+
|
|
7
|
+
from .adjust_helpers import DEFAULT_DIMENSIONS, DEFAULT_METRICS, AdjustAPI
|
|
8
|
+
|
|
9
|
+
REQUIRED_CUSTOM_DIMENSIONS = [
|
|
10
|
+
"hour",
|
|
11
|
+
"day",
|
|
12
|
+
"week",
|
|
13
|
+
"month",
|
|
14
|
+
"quarter",
|
|
15
|
+
"year",
|
|
16
|
+
]
|
|
17
|
+
KNOWN_TYPE_HINTS = {
|
|
18
|
+
"hour": {"data_type": "timestamp"},
|
|
19
|
+
"day": {"data_type": "date"},
|
|
20
|
+
"week": {"data_type": "text"},
|
|
21
|
+
"month": {"data_type": "text"},
|
|
22
|
+
"quarter": {"data_type": "text"},
|
|
23
|
+
"year": {"data_type": "text"},
|
|
24
|
+
"campaign": {"data_type": "text"},
|
|
25
|
+
"adgroup": {"data_type": "text"},
|
|
26
|
+
"creative": {"data_type": "text"},
|
|
27
|
+
# metrics
|
|
28
|
+
"installs": {"data_type": "bigint"},
|
|
29
|
+
"clicks": {"data_type": "bigint"},
|
|
30
|
+
"cost": {"data_type": "decimal"},
|
|
31
|
+
"network_cost": {"data_type": "decimal"},
|
|
32
|
+
"impressions": {"data_type": "bigint"},
|
|
33
|
+
"ad_revenue": {"data_type": "decimal"},
|
|
34
|
+
"all_revenue": {"data_type": "decimal"},
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dlt.source(max_table_nesting=0)
|
|
39
|
+
def adjust_source(
|
|
40
|
+
start_date: pendulum.DateTime,
|
|
41
|
+
end_date: pendulum.DateTime,
|
|
42
|
+
api_key: str,
|
|
43
|
+
dimensions: Optional[list[str]] = None,
|
|
44
|
+
metrics: Optional[list[str]] = None,
|
|
45
|
+
merge_key: Optional[str] = None,
|
|
46
|
+
filters: Optional[dict] = None,
|
|
47
|
+
) -> Sequence[DltResource]:
|
|
48
|
+
@dlt.resource(write_disposition="merge", merge_key="day")
|
|
49
|
+
def campaigns() -> DltResource:
|
|
50
|
+
adjust_api = AdjustAPI(api_key=api_key)
|
|
51
|
+
yield from adjust_api.fetch_report_data(
|
|
52
|
+
start_date=start_date,
|
|
53
|
+
end_date=end_date,
|
|
54
|
+
dimensions=DEFAULT_DIMENSIONS,
|
|
55
|
+
metrics=DEFAULT_METRICS,
|
|
56
|
+
filters=filters,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
@dlt.resource(write_disposition="replace", primary_key="id")
|
|
60
|
+
def events() -> DltResource:
|
|
61
|
+
adjust_api = AdjustAPI(api_key=api_key)
|
|
62
|
+
yield adjust_api.fetch_events()
|
|
63
|
+
|
|
64
|
+
@dlt.resource(write_disposition="merge", merge_key="day")
|
|
65
|
+
def creatives() -> DltResource:
|
|
66
|
+
adjust_api = AdjustAPI(api_key=api_key)
|
|
67
|
+
yield from adjust_api.fetch_report_data(
|
|
68
|
+
start_date=start_date,
|
|
69
|
+
end_date=end_date,
|
|
70
|
+
dimensions=DEFAULT_DIMENSIONS + ["adgroup", "creative"],
|
|
71
|
+
metrics=DEFAULT_METRICS,
|
|
72
|
+
filters=filters,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
if not dimensions:
|
|
76
|
+
return campaigns, creatives, events
|
|
77
|
+
|
|
78
|
+
merge_key = merge_key
|
|
79
|
+
type_hints = {}
|
|
80
|
+
for dimension in REQUIRED_CUSTOM_DIMENSIONS:
|
|
81
|
+
if dimension in dimensions:
|
|
82
|
+
merge_key = dimension
|
|
83
|
+
break
|
|
84
|
+
|
|
85
|
+
for dimension in dimensions:
|
|
86
|
+
if dimension in KNOWN_TYPE_HINTS:
|
|
87
|
+
type_hints[dimension] = KNOWN_TYPE_HINTS[dimension]
|
|
88
|
+
for metric in metrics:
|
|
89
|
+
if metric in KNOWN_TYPE_HINTS:
|
|
90
|
+
type_hints[metric] = KNOWN_TYPE_HINTS[metric]
|
|
91
|
+
|
|
92
|
+
@dlt.resource(
|
|
93
|
+
write_disposition={"disposition": "merge", "strategy": "delete-insert"},
|
|
94
|
+
merge_key=merge_key,
|
|
95
|
+
primary_key=dimensions,
|
|
96
|
+
columns=type_hints,
|
|
97
|
+
)
|
|
98
|
+
def custom() -> DltResource:
|
|
99
|
+
adjust_api = AdjustAPI(api_key=api_key)
|
|
100
|
+
yield from adjust_api.fetch_report_data(
|
|
101
|
+
start_date=start_date,
|
|
102
|
+
end_date=end_date,
|
|
103
|
+
dimensions=dimensions,
|
|
104
|
+
metrics=metrics,
|
|
105
|
+
filters=filters,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
return campaigns, creatives, custom, events
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
import pendulum
|
|
4
|
+
import requests
|
|
5
|
+
from dlt.sources.helpers.requests import Client
|
|
6
|
+
from requests.exceptions import HTTPError
|
|
7
|
+
|
|
8
|
+
DEFAULT_DIMENSIONS = ["campaign", "day", "app", "store_type", "channel", "country"]
|
|
9
|
+
|
|
10
|
+
DEFAULT_METRICS = [
|
|
11
|
+
"installs",
|
|
12
|
+
"network_cost",
|
|
13
|
+
"all_revenue_total_d0",
|
|
14
|
+
"ad_revenue_total_d0",
|
|
15
|
+
"revenue_total_d0",
|
|
16
|
+
"all_revenue_total_d1",
|
|
17
|
+
"ad_revenue_total_d1",
|
|
18
|
+
"revenue_total_d1",
|
|
19
|
+
"all_revenue_total_d3",
|
|
20
|
+
"ad_revenue_total_d3",
|
|
21
|
+
"revenue_total_d3",
|
|
22
|
+
"all_revenue_total_d7",
|
|
23
|
+
"ad_revenue_total_d7",
|
|
24
|
+
"revenue_total_d7",
|
|
25
|
+
"all_revenue_total_d14",
|
|
26
|
+
"ad_revenue_total_d14",
|
|
27
|
+
"revenue_total_d14",
|
|
28
|
+
"all_revenue_total_d21",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def retry_on_limit(response: requests.Response, exception: BaseException) -> bool:
|
|
33
|
+
return response.status_code == 429
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class AdjustAPI:
|
|
37
|
+
def __init__(self, api_key):
|
|
38
|
+
self.api_key = api_key
|
|
39
|
+
self.request_client = Client(
|
|
40
|
+
request_timeout=1000, # Adjust support recommends 1000 seconds of read timeout.
|
|
41
|
+
raise_for_status=False,
|
|
42
|
+
retry_condition=retry_on_limit,
|
|
43
|
+
request_max_attempts=12,
|
|
44
|
+
request_backoff_factor=2,
|
|
45
|
+
).session
|
|
46
|
+
|
|
47
|
+
def fetch_report_data(
|
|
48
|
+
self,
|
|
49
|
+
start_date: pendulum.DateTime,
|
|
50
|
+
end_date: pendulum.DateTime,
|
|
51
|
+
dimensions=DEFAULT_DIMENSIONS,
|
|
52
|
+
metrics=DEFAULT_METRICS,
|
|
53
|
+
filters: Optional[dict] = None,
|
|
54
|
+
):
|
|
55
|
+
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
56
|
+
params = {}
|
|
57
|
+
|
|
58
|
+
if filters:
|
|
59
|
+
for key, value in filters.items():
|
|
60
|
+
if isinstance(value, list):
|
|
61
|
+
params[key] = ",".join(value)
|
|
62
|
+
else:
|
|
63
|
+
params[key] = value
|
|
64
|
+
|
|
65
|
+
params["date_period"] = (
|
|
66
|
+
f"{start_date.format('YYYY-MM-DD')}:{end_date.format('YYYY-MM-DD')}"
|
|
67
|
+
)
|
|
68
|
+
params["dimensions"] = ",".join(dimensions)
|
|
69
|
+
params["metrics"] = ",".join(metrics)
|
|
70
|
+
|
|
71
|
+
if start_date > end_date:
|
|
72
|
+
raise ValueError(
|
|
73
|
+
f"Invalid date range: Start date ({start_date}) must be earlier than end date ({end_date})."
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
response = self.request_client.get(
|
|
77
|
+
"https://automate.adjust.com/reports-service/report",
|
|
78
|
+
headers=headers,
|
|
79
|
+
params=params,
|
|
80
|
+
)
|
|
81
|
+
if response.status_code == 200:
|
|
82
|
+
result = response.json()
|
|
83
|
+
items = result.get("rows", [])
|
|
84
|
+
yield items
|
|
85
|
+
else:
|
|
86
|
+
raise HTTPError(
|
|
87
|
+
f"Request failed with status code: {response.status_code}, {response.text}."
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def fetch_events(self):
|
|
91
|
+
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
92
|
+
response = self.request_client.get(
|
|
93
|
+
"https://automate.adjust.com/reports-service/events", headers=headers
|
|
94
|
+
)
|
|
95
|
+
if response.status_code == 200:
|
|
96
|
+
result = response.json()
|
|
97
|
+
yield result
|
|
98
|
+
else:
|
|
99
|
+
raise HTTPError(
|
|
100
|
+
f"Request failed with status code: {response.status_code}, {response.text}."
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def parse_filters(filters_raw: str) -> dict:
|
|
105
|
+
# Parse filter string like "key1=value1,key2=value2,value3,value4"
|
|
106
|
+
filters = {}
|
|
107
|
+
current_key = None
|
|
108
|
+
|
|
109
|
+
for item in filters_raw.split(","):
|
|
110
|
+
if "=" in item:
|
|
111
|
+
# Start of a new key-value pair
|
|
112
|
+
key, value = item.split("=")
|
|
113
|
+
filters[key] = [value] # Always start with a list
|
|
114
|
+
current_key = key
|
|
115
|
+
elif current_key is not None:
|
|
116
|
+
# Additional value for the current key
|
|
117
|
+
filters[current_key].append(item)
|
|
118
|
+
|
|
119
|
+
# Convert single-item lists to simple values
|
|
120
|
+
filters = {k: v[0] if len(v) == 1 else v for k, v in filters.items()}
|
|
121
|
+
|
|
122
|
+
return filters
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Copyright 2022-2025 ScaleVector
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Source that loads tables form Airtable.
|
|
16
|
+
Supports whitelisting of tables or loading of all tables from a specified base.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from typing import Any, Dict, Iterable, Iterator, List, Optional
|
|
20
|
+
|
|
21
|
+
import dlt
|
|
22
|
+
import pyairtable
|
|
23
|
+
from dlt.sources import DltResource
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dlt.source(max_table_nesting=1)
|
|
27
|
+
def airtable_source(
|
|
28
|
+
base_id: str = dlt.config.value,
|
|
29
|
+
table_names: Optional[List[str]] = dlt.config.value,
|
|
30
|
+
access_token: str = dlt.secrets.value,
|
|
31
|
+
) -> Iterable[DltResource]:
|
|
32
|
+
"""
|
|
33
|
+
Represents tables for a single Airtable base.
|
|
34
|
+
Args:
|
|
35
|
+
base_id (str): The id of the base. Obtain it e.g. from the URL in your webbrowser.
|
|
36
|
+
It starts with "app". See https://support.airtable.com/docs/finding-airtable-ids
|
|
37
|
+
table_names (Optional[List[str]]): A list of table IDs or table names to load.
|
|
38
|
+
Unless specified otherwise, all tables in the schema are loaded.
|
|
39
|
+
Names are freely user-defined. IDs start with "tbl". See https://support.airtable.com/docs/finding-airtable-ids
|
|
40
|
+
access_token (str): The personal access token.
|
|
41
|
+
See https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens#personal-access-tokens-basic-actions
|
|
42
|
+
"""
|
|
43
|
+
api = pyairtable.Api(access_token)
|
|
44
|
+
all_tables_url = api.build_url(f"meta/bases/{base_id}/tables")
|
|
45
|
+
tables = api.request(method="GET", url=all_tables_url).get("tables")
|
|
46
|
+
for t in tables:
|
|
47
|
+
if table_names:
|
|
48
|
+
if t.get("id") in table_names or t.get("name") in table_names:
|
|
49
|
+
yield airtable_resource(api, base_id, t)
|
|
50
|
+
else:
|
|
51
|
+
yield airtable_resource(api, base_id, t)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def airtable_resource(
|
|
55
|
+
api: pyairtable.Api,
|
|
56
|
+
base_id: str,
|
|
57
|
+
table: Dict[str, Any],
|
|
58
|
+
) -> DltResource:
|
|
59
|
+
"""
|
|
60
|
+
Represents a single airtable.
|
|
61
|
+
Args:
|
|
62
|
+
api (pyairtable.Api): The API connection object
|
|
63
|
+
base_id (str): The id of the base. Obtain it e.g. from the URL in your webbrowser.
|
|
64
|
+
It starts with "app". See https://support.airtable.com/docs/finding-airtable-ids
|
|
65
|
+
table (Dict[str, Any]): Metadata about an airtable, does not contain the actual records
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
primary_key_id = table["primaryFieldId"]
|
|
69
|
+
primary_key_field = [
|
|
70
|
+
field for field in table["fields"] if field["id"] == primary_key_id
|
|
71
|
+
][0]
|
|
72
|
+
table_name: str = table["name"]
|
|
73
|
+
primary_key: List[str] = [f"fields__{primary_key_field['name']}".lower()]
|
|
74
|
+
air_table = api.table(base_id, table["id"])
|
|
75
|
+
|
|
76
|
+
# Table.iterate() supports rich customization options, such as chunk size, fields, cell format, timezone, locale, and view
|
|
77
|
+
air_table_generator: Iterator[List[Any]] = air_table.iterate()
|
|
78
|
+
|
|
79
|
+
return dlt.resource(
|
|
80
|
+
air_table_generator,
|
|
81
|
+
name=table_name,
|
|
82
|
+
primary_key=primary_key,
|
|
83
|
+
write_disposition="replace",
|
|
84
|
+
)
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Allium source for data extraction via REST API.
|
|
3
|
+
|
|
4
|
+
This source provides access to Allium blockchain data via asynchronous query execution.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any, Iterator
|
|
9
|
+
|
|
10
|
+
import dlt
|
|
11
|
+
|
|
12
|
+
from omniload.src.http_client import create_client
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dlt.source(max_table_nesting=0, name="allium_source")
|
|
16
|
+
def allium_source(
|
|
17
|
+
api_key: str,
|
|
18
|
+
query_id: str,
|
|
19
|
+
parameters: dict[str, Any] | None = None,
|
|
20
|
+
limit: int | None = None,
|
|
21
|
+
compute_profile: str | None = None,
|
|
22
|
+
) -> Any:
|
|
23
|
+
"""
|
|
24
|
+
Allium data source for blockchain data extraction.
|
|
25
|
+
|
|
26
|
+
This source connects to Allium API, runs async queries, and fetches results.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
api_key: Allium API key for authentication
|
|
30
|
+
query_id: The query ID to execute (e.g., 'abc123')
|
|
31
|
+
parameters: Optional parameters for the query (e.g., {'start_date': '2025-02-01', 'end_date': '2025-02-02'})
|
|
32
|
+
limit: Limit the number of rows in the result (max 250,000)
|
|
33
|
+
compute_profile: Compute profile identifier
|
|
34
|
+
|
|
35
|
+
Yields:
|
|
36
|
+
DltResource: Data resources for Allium query results
|
|
37
|
+
"""
|
|
38
|
+
base_url = "https://api.allium.so/api/v1/explorer"
|
|
39
|
+
session = create_client()
|
|
40
|
+
headers = {"X-API-Key": api_key}
|
|
41
|
+
|
|
42
|
+
@dlt.resource(
|
|
43
|
+
name="query_results",
|
|
44
|
+
write_disposition="replace",
|
|
45
|
+
)
|
|
46
|
+
def fetch_query_results() -> Iterator[dict[str, Any]]:
|
|
47
|
+
"""
|
|
48
|
+
Fetch query results from Allium.
|
|
49
|
+
|
|
50
|
+
This function:
|
|
51
|
+
1. Starts an async query execution
|
|
52
|
+
2. Polls for completion status
|
|
53
|
+
3. Fetches and yields the results
|
|
54
|
+
"""
|
|
55
|
+
# Step 1: Start async query execution
|
|
56
|
+
run_config: dict[str, Any] = {}
|
|
57
|
+
if limit is not None:
|
|
58
|
+
run_config["limit"] = limit
|
|
59
|
+
if compute_profile is not None:
|
|
60
|
+
run_config["compute_profile"] = compute_profile
|
|
61
|
+
|
|
62
|
+
run_payload = {"parameters": parameters or {}, "run_config": run_config}
|
|
63
|
+
|
|
64
|
+
run_response = session.post(
|
|
65
|
+
f"{base_url}/queries/{query_id}/run-async",
|
|
66
|
+
json=run_payload,
|
|
67
|
+
headers=headers,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
run_data = run_response.json()
|
|
71
|
+
|
|
72
|
+
if "run_id" not in run_data:
|
|
73
|
+
raise ValueError(f"Failed to start query execution: {run_data}")
|
|
74
|
+
|
|
75
|
+
run_id = run_data["run_id"]
|
|
76
|
+
|
|
77
|
+
# Step 2: Poll for completion
|
|
78
|
+
max_retries = 8640 # Max 12 hours with 5-second intervals
|
|
79
|
+
retry_count = 0
|
|
80
|
+
poll_interval = 5 # seconds
|
|
81
|
+
|
|
82
|
+
while retry_count < max_retries:
|
|
83
|
+
status_response = session.get(
|
|
84
|
+
f"{base_url}/query-runs/{run_id}/status",
|
|
85
|
+
headers=headers,
|
|
86
|
+
)
|
|
87
|
+
status_response.raise_for_status()
|
|
88
|
+
status_data = status_response.json()
|
|
89
|
+
|
|
90
|
+
# Handle both string and dict responses
|
|
91
|
+
if isinstance(status_data, str):
|
|
92
|
+
status = status_data
|
|
93
|
+
else:
|
|
94
|
+
status = status_data.get("status")
|
|
95
|
+
|
|
96
|
+
if status == "success":
|
|
97
|
+
break
|
|
98
|
+
elif status == "failed":
|
|
99
|
+
error_msg = (
|
|
100
|
+
status_data.get("error", "Unknown error")
|
|
101
|
+
if isinstance(status_data, dict)
|
|
102
|
+
else "Unknown error"
|
|
103
|
+
)
|
|
104
|
+
raise ValueError(f"Query execution failed: {error_msg}")
|
|
105
|
+
elif status in ["pending", "running", "queued"]:
|
|
106
|
+
time.sleep(poll_interval)
|
|
107
|
+
retry_count += 1
|
|
108
|
+
else:
|
|
109
|
+
raise ValueError(f"Unknown status: {status}")
|
|
110
|
+
|
|
111
|
+
if retry_count >= max_retries:
|
|
112
|
+
raise TimeoutError(
|
|
113
|
+
f"Query execution timed out after {max_retries * poll_interval} seconds"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Step 3: Fetch results
|
|
117
|
+
results_response = session.get(
|
|
118
|
+
f"{base_url}/query-runs/{run_id}/results",
|
|
119
|
+
headers=headers,
|
|
120
|
+
params={"f": "json"},
|
|
121
|
+
)
|
|
122
|
+
results_response.raise_for_status()
|
|
123
|
+
query_output = results_response.json()
|
|
124
|
+
|
|
125
|
+
# Extract and yield all data
|
|
126
|
+
yield query_output.get("data", [])
|
|
127
|
+
|
|
128
|
+
return (fetch_query_results,)
|