omniload 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omniload/conftest.py +72 -0
- omniload/main.py +810 -0
- omniload/src/.gitignore +10 -0
- omniload/src/adjust/__init__.py +108 -0
- omniload/src/adjust/adjust_helpers.py +122 -0
- omniload/src/airtable/__init__.py +84 -0
- omniload/src/allium/__init__.py +128 -0
- omniload/src/anthropic/__init__.py +277 -0
- omniload/src/anthropic/helpers.py +525 -0
- omniload/src/applovin/__init__.py +316 -0
- omniload/src/applovin_max/__init__.py +117 -0
- omniload/src/appsflyer/__init__.py +325 -0
- omniload/src/appsflyer/client.py +110 -0
- omniload/src/appstore/__init__.py +142 -0
- omniload/src/appstore/client.py +126 -0
- omniload/src/appstore/errors.py +15 -0
- omniload/src/appstore/models.py +117 -0
- omniload/src/appstore/resources.py +179 -0
- omniload/src/arrow/__init__.py +81 -0
- omniload/src/asana_source/__init__.py +281 -0
- omniload/src/asana_source/helpers.py +30 -0
- omniload/src/asana_source/settings.py +158 -0
- omniload/src/attio/__init__.py +102 -0
- omniload/src/attio/helpers.py +65 -0
- omniload/src/blob.py +95 -0
- omniload/src/bruin/__init__.py +76 -0
- omniload/src/chess/__init__.py +180 -0
- omniload/src/chess/helpers.py +35 -0
- omniload/src/chess/settings.py +18 -0
- omniload/src/clickup/__init__.py +85 -0
- omniload/src/clickup/helpers.py +47 -0
- omniload/src/collector/spinner.py +43 -0
- omniload/src/couchbase_source/__init__.py +118 -0
- omniload/src/couchbase_source/helpers.py +135 -0
- omniload/src/cursor/__init__.py +83 -0
- omniload/src/cursor/helpers.py +188 -0
- omniload/src/customer_io/__init__.py +486 -0
- omniload/src/customer_io/helpers.py +530 -0
- omniload/src/destinations.py +982 -0
- omniload/src/docebo/__init__.py +589 -0
- omniload/src/docebo/client.py +435 -0
- omniload/src/docebo/helpers.py +97 -0
- omniload/src/dune/__init__.py +104 -0
- omniload/src/dune/helpers.py +108 -0
- omniload/src/dynamodb/__init__.py +86 -0
- omniload/src/elasticsearch/__init__.py +80 -0
- omniload/src/elasticsearch/helpers.py +141 -0
- omniload/src/errors.py +26 -0
- omniload/src/facebook_ads/__init__.py +403 -0
- omniload/src/facebook_ads/exceptions.py +19 -0
- omniload/src/facebook_ads/helpers.py +296 -0
- omniload/src/facebook_ads/settings.py +224 -0
- omniload/src/facebook_ads/utils.py +53 -0
- omniload/src/factory.py +305 -0
- omniload/src/filesystem/__init__.py +133 -0
- omniload/src/filesystem/helpers.py +114 -0
- omniload/src/filesystem/readers.py +187 -0
- omniload/src/filters.py +62 -0
- omniload/src/fireflies/__init__.py +151 -0
- omniload/src/fireflies/helpers.py +753 -0
- omniload/src/fluxx/__init__.py +10013 -0
- omniload/src/fluxx/helpers.py +233 -0
- omniload/src/frankfurter/__init__.py +157 -0
- omniload/src/frankfurter/helpers.py +48 -0
- omniload/src/freshdesk/__init__.py +103 -0
- omniload/src/freshdesk/freshdesk_client.py +151 -0
- omniload/src/freshdesk/settings.py +23 -0
- omniload/src/fundraiseup/__init__.py +95 -0
- omniload/src/fundraiseup/client.py +81 -0
- omniload/src/github/__init__.py +202 -0
- omniload/src/github/helpers.py +207 -0
- omniload/src/github/queries.py +129 -0
- omniload/src/github/settings.py +24 -0
- omniload/src/google_ads/__init__.py +198 -0
- omniload/src/google_ads/field.py +17 -0
- omniload/src/google_ads/metrics.py +254 -0
- omniload/src/google_ads/predicates.py +37 -0
- omniload/src/google_ads/reports.py +411 -0
- omniload/src/google_ads/test_google_ads.py +184 -0
- omniload/src/google_analytics/__init__.py +144 -0
- omniload/src/google_analytics/helpers.py +312 -0
- omniload/src/google_sheets/README.md +95 -0
- omniload/src/google_sheets/__init__.py +166 -0
- omniload/src/google_sheets/helpers/__init__.py +15 -0
- omniload/src/google_sheets/helpers/api_calls.py +160 -0
- omniload/src/google_sheets/helpers/data_processing.py +316 -0
- omniload/src/gorgias/__init__.py +595 -0
- omniload/src/gorgias/helpers.py +166 -0
- omniload/src/hostaway/__init__.py +302 -0
- omniload/src/hostaway/client.py +288 -0
- omniload/src/http/__init__.py +38 -0
- omniload/src/http/readers.py +146 -0
- omniload/src/http_client.py +24 -0
- omniload/src/hubspot/__init__.py +800 -0
- omniload/src/hubspot/helpers.py +417 -0
- omniload/src/hubspot/settings.py +329 -0
- omniload/src/indeed/__init__.py +153 -0
- omniload/src/indeed/helpers.py +228 -0
- omniload/src/influxdb/__init__.py +46 -0
- omniload/src/influxdb/client.py +34 -0
- omniload/src/intercom/__init__.py +142 -0
- omniload/src/intercom/helpers.py +674 -0
- omniload/src/intercom/settings.py +279 -0
- omniload/src/isoc_pulse/__init__.py +159 -0
- omniload/src/jira_source/__init__.py +377 -0
- omniload/src/jira_source/helpers.py +510 -0
- omniload/src/jira_source/settings.py +184 -0
- omniload/src/kafka/__init__.py +120 -0
- omniload/src/kafka/helpers.py +241 -0
- omniload/src/kinesis/__init__.py +153 -0
- omniload/src/kinesis/helpers.py +96 -0
- omniload/src/klaviyo/__init__.py +237 -0
- omniload/src/klaviyo/client.py +212 -0
- omniload/src/klaviyo/helpers.py +19 -0
- omniload/src/linear/__init__.py +634 -0
- omniload/src/linear/helpers.py +111 -0
- omniload/src/linkedin_ads/__init__.py +266 -0
- omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
- omniload/src/linkedin_ads/helpers.py +246 -0
- omniload/src/loader.py +69 -0
- omniload/src/mailchimp/__init__.py +126 -0
- omniload/src/mailchimp/helpers.py +226 -0
- omniload/src/mailchimp/settings.py +164 -0
- omniload/src/masking.py +344 -0
- omniload/src/mixpanel/__init__.py +62 -0
- omniload/src/mixpanel/client.py +104 -0
- omniload/src/monday/__init__.py +246 -0
- omniload/src/monday/helpers.py +392 -0
- omniload/src/monday/settings.py +325 -0
- omniload/src/mongodb/__init__.py +281 -0
- omniload/src/mongodb/helpers.py +975 -0
- omniload/src/notion/__init__.py +69 -0
- omniload/src/notion/helpers/__init__.py +14 -0
- omniload/src/notion/helpers/client.py +178 -0
- omniload/src/notion/helpers/database.py +92 -0
- omniload/src/notion/settings.py +17 -0
- omniload/src/partition.py +32 -0
- omniload/src/personio/__init__.py +345 -0
- omniload/src/personio/helpers.py +100 -0
- omniload/src/phantombuster/__init__.py +65 -0
- omniload/src/phantombuster/client.py +87 -0
- omniload/src/pinterest/__init__.py +82 -0
- omniload/src/pipedrive/__init__.py +212 -0
- omniload/src/pipedrive/helpers/__init__.py +37 -0
- omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
- omniload/src/pipedrive/helpers/pages.py +129 -0
- omniload/src/pipedrive/settings.py +41 -0
- omniload/src/pipedrive/typing.py +17 -0
- omniload/src/plusvibeai/__init__.py +335 -0
- omniload/src/plusvibeai/helpers.py +544 -0
- omniload/src/plusvibeai/settings.py +252 -0
- omniload/src/primer/__init__.py +45 -0
- omniload/src/primer/helpers.py +79 -0
- omniload/src/quickbooks/__init__.py +117 -0
- omniload/src/reddit_ads/__init__.py +183 -0
- omniload/src/reddit_ads/helpers.py +232 -0
- omniload/src/resource.py +40 -0
- omniload/src/revenuecat/__init__.py +83 -0
- omniload/src/revenuecat/helpers.py +237 -0
- omniload/src/salesforce/__init__.py +170 -0
- omniload/src/salesforce/helpers.py +78 -0
- omniload/src/shopify/__init__.py +1953 -0
- omniload/src/shopify/exceptions.py +17 -0
- omniload/src/shopify/helpers.py +202 -0
- omniload/src/shopify/settings.py +19 -0
- omniload/src/slack/__init__.py +290 -0
- omniload/src/slack/helpers.py +218 -0
- omniload/src/slack/settings.py +36 -0
- omniload/src/smartsheets/__init__.py +82 -0
- omniload/src/snapchat_ads/__init__.py +455 -0
- omniload/src/snapchat_ads/client.py +72 -0
- omniload/src/snapchat_ads/helpers.py +630 -0
- omniload/src/snapchat_ads/settings.py +130 -0
- omniload/src/socrata_source/__init__.py +83 -0
- omniload/src/socrata_source/helpers.py +85 -0
- omniload/src/socrata_source/settings.py +8 -0
- omniload/src/solidgate/__init__.py +219 -0
- omniload/src/solidgate/helpers.py +154 -0
- omniload/src/sources.py +5408 -0
- omniload/src/sql_database/__init__.py +0 -0
- omniload/src/sql_database/callbacks.py +66 -0
- omniload/src/stripe_analytics/__init__.py +183 -0
- omniload/src/stripe_analytics/helpers.py +386 -0
- omniload/src/stripe_analytics/settings.py +80 -0
- omniload/src/table_definition.py +15 -0
- omniload/src/testdata/fakebqcredentials.json +14 -0
- omniload/src/tiktok_ads/__init__.py +150 -0
- omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
- omniload/src/time.py +11 -0
- omniload/src/trustpilot/__init__.py +48 -0
- omniload/src/trustpilot/client.py +48 -0
- omniload/src/version.py +6 -0
- omniload/src/wise/__init__.py +68 -0
- omniload/src/wise/client.py +63 -0
- omniload/src/zendesk/__init__.py +480 -0
- omniload/src/zendesk/helpers/__init__.py +39 -0
- omniload/src/zendesk/helpers/api_helpers.py +119 -0
- omniload/src/zendesk/helpers/credentials.py +68 -0
- omniload/src/zendesk/helpers/talk_api.py +132 -0
- omniload/src/zendesk/settings.py +71 -0
- omniload/src/zoom/__init__.py +99 -0
- omniload/src/zoom/helpers.py +102 -0
- omniload/testdata/.gitignore +2 -0
- omniload/testdata/create_replace.csv +21 -0
- omniload/testdata/delete_insert_expected.csv +6 -0
- omniload/testdata/delete_insert_part1.csv +5 -0
- omniload/testdata/delete_insert_part2.csv +6 -0
- omniload/testdata/merge_expected.csv +5 -0
- omniload/testdata/merge_part1.csv +4 -0
- omniload/testdata/merge_part2.csv +5 -0
- omniload/tests/unit/test_smartsheets.py +133 -0
- omniload-0.0.0.dev0.dist-info/METADATA +439 -0
- omniload-0.0.0.dev0.dist-info/RECORD +218 -0
- omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
- omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
- omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Snapchat Ads source settings and constants"""
|
|
2
|
+
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
from dlt.common.schema.typing import TColumnSchema
|
|
6
|
+
|
|
7
|
+
# Valid granularities for stats (required)
|
|
8
|
+
TStatsGranularity = Literal["TOTAL", "DAY", "HOUR", "LIFETIME"]
|
|
9
|
+
|
|
10
|
+
# Valid breakdowns for stats (object-level breakdown)
|
|
11
|
+
TStatsBreakdown = Literal["ad", "adsquad", "campaign"]
|
|
12
|
+
|
|
13
|
+
# Valid dimensions for stats (insight-level breakdown)
|
|
14
|
+
TStatsDimension = Literal["GEO", "DEMO", "INTEREST", "DEVICE"]
|
|
15
|
+
|
|
16
|
+
# Valid pivots for stats (pivot for insights breakdown)
|
|
17
|
+
TStatsPivot = Literal[
|
|
18
|
+
"country",
|
|
19
|
+
"region",
|
|
20
|
+
"dma",
|
|
21
|
+
"gender",
|
|
22
|
+
"age_bucket",
|
|
23
|
+
"interest_category_id",
|
|
24
|
+
"interest_category_name",
|
|
25
|
+
"operating_system",
|
|
26
|
+
"make",
|
|
27
|
+
"model",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
# Sets for efficient lookup
|
|
31
|
+
VALID_GRANULARITIES = {"TOTAL", "DAY", "HOUR", "LIFETIME"}
|
|
32
|
+
VALID_BREAKDOWNS = {"ad", "adsquad", "campaign"}
|
|
33
|
+
VALID_DIMENSIONS = {"GEO", "DEMO", "INTEREST", "DEVICE"}
|
|
34
|
+
VALID_PIVOTS = {
|
|
35
|
+
"country",
|
|
36
|
+
"region",
|
|
37
|
+
"dma",
|
|
38
|
+
"gender",
|
|
39
|
+
"age_bucket",
|
|
40
|
+
"interest_category_id",
|
|
41
|
+
"interest_category_name",
|
|
42
|
+
"operating_system",
|
|
43
|
+
"make",
|
|
44
|
+
"model",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
# Stats primary key - includes all possible identifying fields
|
|
48
|
+
# campaign_id is always present
|
|
49
|
+
# adsquad_id and ad_id will be NULL when no breakdown is specified
|
|
50
|
+
# Time fields identify the time period for the stats
|
|
51
|
+
STATS_PRIMARY_KEY = [
|
|
52
|
+
"campaign_id",
|
|
53
|
+
"adsquad_id",
|
|
54
|
+
"ad_id",
|
|
55
|
+
"start_time",
|
|
56
|
+
"end_time",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
# Default stats fields
|
|
60
|
+
DEFAULT_STATS_FIELDS = "impressions,spend"
|
|
61
|
+
|
|
62
|
+
# Core metrics column hints for stats resources
|
|
63
|
+
# All monetary values are in micro-currency (1.00 = 1,000,000 micro-currency)
|
|
64
|
+
# Metrics are finalized 48 hours after the end of the day in the Ad Account's timezone
|
|
65
|
+
STATS_METRICS_COLUMNS: dict[str, TColumnSchema] = {
|
|
66
|
+
# Core metrics (available for all Snap Ads)
|
|
67
|
+
"impressions": {
|
|
68
|
+
"data_type": "bigint",
|
|
69
|
+
"nullable": True,
|
|
70
|
+
"description": "Impression Count",
|
|
71
|
+
},
|
|
72
|
+
"swipes": {
|
|
73
|
+
"data_type": "bigint",
|
|
74
|
+
"nullable": True,
|
|
75
|
+
"description": "Swipe-Up Count",
|
|
76
|
+
},
|
|
77
|
+
"view_time_millis": {
|
|
78
|
+
"data_type": "bigint",
|
|
79
|
+
"nullable": True,
|
|
80
|
+
"description": "Deprecated: Use screen_time_millis instead. Total Time Spent on top Snap Ad (milliseconds)",
|
|
81
|
+
},
|
|
82
|
+
"screen_time_millis": {
|
|
83
|
+
"data_type": "bigint",
|
|
84
|
+
"nullable": True,
|
|
85
|
+
"description": "Total Time Spent on top Snap Ad (milliseconds)",
|
|
86
|
+
},
|
|
87
|
+
"quartile_1": {
|
|
88
|
+
"data_type": "bigint",
|
|
89
|
+
"nullable": True,
|
|
90
|
+
"description": "Video Views to 25%",
|
|
91
|
+
},
|
|
92
|
+
"quartile_2": {
|
|
93
|
+
"data_type": "bigint",
|
|
94
|
+
"nullable": True,
|
|
95
|
+
"description": "Video Views to 50%",
|
|
96
|
+
},
|
|
97
|
+
"quartile_3": {
|
|
98
|
+
"data_type": "bigint",
|
|
99
|
+
"nullable": True,
|
|
100
|
+
"description": "Video Views to 75%",
|
|
101
|
+
},
|
|
102
|
+
"view_completion": {
|
|
103
|
+
"data_type": "bigint",
|
|
104
|
+
"nullable": True,
|
|
105
|
+
"description": "Video Views to completion",
|
|
106
|
+
},
|
|
107
|
+
"spend": {
|
|
108
|
+
"data_type": "bigint",
|
|
109
|
+
"nullable": True,
|
|
110
|
+
"description": "Amount Spent (micro-currency: 1.00 = 1,000,000)",
|
|
111
|
+
},
|
|
112
|
+
"coupon_used_local": {
|
|
113
|
+
"data_type": "bigint",
|
|
114
|
+
"nullable": True,
|
|
115
|
+
"description": "Amount Spent via Coupon in the assigned currency of Ad Account (micro-currency)",
|
|
116
|
+
},
|
|
117
|
+
"coupon_used_usd": {
|
|
118
|
+
"data_type": "bigint",
|
|
119
|
+
"nullable": True,
|
|
120
|
+
"description": "Amount Spent via Coupon in USD (micro-currency)",
|
|
121
|
+
},
|
|
122
|
+
"video_views": {
|
|
123
|
+
"data_type": "bigint",
|
|
124
|
+
"nullable": True,
|
|
125
|
+
"description": "Total impressions with at least 2 seconds of consecutive watch time or a swipe up action",
|
|
126
|
+
},
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
# Set of valid metric field names for validation
|
|
130
|
+
VALID_METRICS = set(STATS_METRICS_COLUMNS.keys())
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""A source loading data from Socrata open data platform"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Iterator, Optional
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
|
|
7
|
+
from .helpers import fetch_data
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dlt.source(name="socrata", max_table_nesting=0)
|
|
11
|
+
def source(
|
|
12
|
+
domain: str,
|
|
13
|
+
dataset_id: str,
|
|
14
|
+
app_token: Optional[str] = None,
|
|
15
|
+
username: Optional[str] = None,
|
|
16
|
+
password: Optional[str] = None,
|
|
17
|
+
incremental: Optional[Any] = None,
|
|
18
|
+
primary_key: Optional[str] = None,
|
|
19
|
+
write_disposition: Optional[str] = dlt.config.value,
|
|
20
|
+
):
|
|
21
|
+
"""
|
|
22
|
+
A dlt source for the Socrata open data platform.
|
|
23
|
+
|
|
24
|
+
Supports both full refresh (replace) and incremental loading (merge).
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
domain: The Socrata domain (e.g., "evergreen.data.socrata.com")
|
|
28
|
+
dataset_id: The dataset identifier (e.g., "6udu-fhnu")
|
|
29
|
+
app_token: Socrata app token for higher rate limits (recommended)
|
|
30
|
+
username: Username for authentication (if dataset is private)
|
|
31
|
+
password: Password for authentication (if dataset is private)
|
|
32
|
+
incremental: DLT incremental object for incremental loading
|
|
33
|
+
primary_key: Primary key field for merge operations (default: ":id")
|
|
34
|
+
write_disposition: Write disposition ("replace", "append", "merge").
|
|
35
|
+
If not provided, automatically determined based on incremental setting.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
A dlt source with a single "dataset" resource
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
@dlt.resource(
|
|
42
|
+
write_disposition=write_disposition or "replace",
|
|
43
|
+
primary_key=primary_key, # type: ignore[call-overload]
|
|
44
|
+
)
|
|
45
|
+
def dataset(
|
|
46
|
+
incremental: Optional[dlt.sources.incremental] = incremental, # type: ignore[type-arg]
|
|
47
|
+
) -> Iterator[Dict[str, Any]]:
|
|
48
|
+
"""
|
|
49
|
+
Yields records from a Socrata dataset.
|
|
50
|
+
|
|
51
|
+
Supports both full refresh (replace) and incremental loading (merge).
|
|
52
|
+
When incremental is provided, filters data using SoQL WHERE clause on the server side.
|
|
53
|
+
|
|
54
|
+
Yields:
|
|
55
|
+
Dict[str, Any]: Individual records from the dataset
|
|
56
|
+
"""
|
|
57
|
+
fetch_kwargs: Dict[str, Any] = {
|
|
58
|
+
"domain": domain,
|
|
59
|
+
"dataset_id": dataset_id,
|
|
60
|
+
"app_token": app_token,
|
|
61
|
+
"username": username,
|
|
62
|
+
"password": password,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if incremental and incremental.cursor_path:
|
|
66
|
+
fetch_kwargs["incremental_key"] = incremental.cursor_path
|
|
67
|
+
fetch_kwargs["start_value"] = (
|
|
68
|
+
str(incremental.last_value)
|
|
69
|
+
if incremental.last_value is not None
|
|
70
|
+
else None
|
|
71
|
+
)
|
|
72
|
+
if getattr(incremental, "end_value", None) is not None:
|
|
73
|
+
ev = incremental.end_value # type: ignore[attr-defined]
|
|
74
|
+
fetch_kwargs["end_value"] = (
|
|
75
|
+
ev.isoformat() # type: ignore[union-attr]
|
|
76
|
+
if hasattr(ev, "isoformat")
|
|
77
|
+
else str(ev)
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Fetch and yield records
|
|
81
|
+
yield from fetch_data(**fetch_kwargs)
|
|
82
|
+
|
|
83
|
+
return (dataset,)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Socrata API helpers"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Iterator, Optional
|
|
4
|
+
|
|
5
|
+
from dlt.sources.helpers import requests
|
|
6
|
+
|
|
7
|
+
from .settings import DEFAULT_PAGE_SIZE, REQUEST_TIMEOUT
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def fetch_data(
|
|
11
|
+
domain: str,
|
|
12
|
+
dataset_id: str,
|
|
13
|
+
app_token: Optional[str] = None,
|
|
14
|
+
username: Optional[str] = None,
|
|
15
|
+
password: Optional[str] = None,
|
|
16
|
+
incremental_key: Optional[str] = None,
|
|
17
|
+
start_value: Optional[str] = None,
|
|
18
|
+
end_value: Optional[str] = None,
|
|
19
|
+
) -> Iterator[Dict[str, Any]]:
|
|
20
|
+
"""
|
|
21
|
+
Fetch records from Socrata dataset with pagination and optional filtering.
|
|
22
|
+
|
|
23
|
+
Uses offset-based pagination to get all records, not just first 50000.
|
|
24
|
+
Supports incremental loading via SoQL WHERE clause for server-side filtering.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
domain: Socrata domain (e.g., "data.seattle.gov")
|
|
28
|
+
dataset_id: Dataset identifier (e.g., "6udu-fhnu")
|
|
29
|
+
app_token: Socrata app token for higher rate limits
|
|
30
|
+
username: Username for authentication
|
|
31
|
+
password: Password for authentication
|
|
32
|
+
start_value: Minimum value for incremental_key (inclusive)
|
|
33
|
+
end_value: Maximum value for incremental_key (exclusive)
|
|
34
|
+
|
|
35
|
+
Yields:
|
|
36
|
+
Lists of records (one list per page)
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
requests.HTTPError: If API request fails
|
|
40
|
+
"""
|
|
41
|
+
url = f"https://{domain}/resource/{dataset_id}.json"
|
|
42
|
+
|
|
43
|
+
headers = {"Accept": "application/json"}
|
|
44
|
+
if app_token:
|
|
45
|
+
headers["X-App-Token"] = app_token
|
|
46
|
+
|
|
47
|
+
auth = (username, password) if username and password else None
|
|
48
|
+
|
|
49
|
+
limit = DEFAULT_PAGE_SIZE
|
|
50
|
+
offset = 0
|
|
51
|
+
|
|
52
|
+
while True:
|
|
53
|
+
params: Dict[str, Any] = {"$limit": limit, "$offset": offset}
|
|
54
|
+
|
|
55
|
+
if incremental_key and start_value:
|
|
56
|
+
start_value_iso = str(start_value).replace(" ", "T")
|
|
57
|
+
where_conditions = [f"{incremental_key} >= '{start_value_iso}'"]
|
|
58
|
+
|
|
59
|
+
if end_value:
|
|
60
|
+
end_value_iso = str(end_value).replace(" ", "T")
|
|
61
|
+
where_conditions.append(f"{incremental_key} < '{end_value_iso}'")
|
|
62
|
+
|
|
63
|
+
params["$where"] = " AND ".join(where_conditions)
|
|
64
|
+
params["$order"] = f"{incremental_key} ASC"
|
|
65
|
+
|
|
66
|
+
response = requests.get(
|
|
67
|
+
url,
|
|
68
|
+
headers=headers,
|
|
69
|
+
auth=auth,
|
|
70
|
+
params=params,
|
|
71
|
+
timeout=REQUEST_TIMEOUT,
|
|
72
|
+
)
|
|
73
|
+
response.raise_for_status()
|
|
74
|
+
|
|
75
|
+
data = response.json()
|
|
76
|
+
|
|
77
|
+
if not data:
|
|
78
|
+
break
|
|
79
|
+
|
|
80
|
+
yield data
|
|
81
|
+
|
|
82
|
+
if len(data) < limit:
|
|
83
|
+
break
|
|
84
|
+
|
|
85
|
+
offset += limit
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
from typing import Iterable, Iterator
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
from dlt.sources import DltResource
|
|
6
|
+
|
|
7
|
+
from .helpers import SolidgateClient
|
|
8
|
+
|
|
9
|
+
COLUMN_HINTS = {
|
|
10
|
+
"subscriptions": {
|
|
11
|
+
"id": {"data_type": "text", "nullable": False, "primary_key": True},
|
|
12
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
13
|
+
"status": {"data_type": "text"},
|
|
14
|
+
"started_at": {"data_type": "timestamp"},
|
|
15
|
+
"updated_at": {"data_type": "timestamp"},
|
|
16
|
+
"expired_at": {"data_type": "timestamp"},
|
|
17
|
+
"next_charge_at": {"data_type": "timestamp"},
|
|
18
|
+
"payment_type": {"data_type": "text"},
|
|
19
|
+
"trial": {"data_type": "bool"},
|
|
20
|
+
"cancelled_at": {"data_type": "timestamp"},
|
|
21
|
+
"cancellation_requested_at": {"data_type": "timestamp"},
|
|
22
|
+
"cancel_code": {"data_type": "text"},
|
|
23
|
+
"cancel_message": {"data_type": "text"},
|
|
24
|
+
"customer": {"data_type": "json"},
|
|
25
|
+
"product": {"data_type": "json"},
|
|
26
|
+
"invoices": {"data_type": "json"},
|
|
27
|
+
},
|
|
28
|
+
"apm_orders": {
|
|
29
|
+
"order_id": {"data_type": "text", "nullable": False, "primary_key": True},
|
|
30
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
31
|
+
"updated_at": {"data_type": "timestamp"},
|
|
32
|
+
"order_description": {"data_type": "text"},
|
|
33
|
+
"method": {"data_type": "text"},
|
|
34
|
+
"amount": {"data_type": "bigint"},
|
|
35
|
+
"currency": {"data_type": "text"},
|
|
36
|
+
"processing_amount": {"data_type": "bigint"},
|
|
37
|
+
"processing_currency": {"data_type": "text"},
|
|
38
|
+
"status": {"data_type": "text"},
|
|
39
|
+
"customer_account_id": {"data_type": "text"},
|
|
40
|
+
"customer_email": {"data_type": "text"},
|
|
41
|
+
"ip_address": {"data_type": "text"},
|
|
42
|
+
"geo_country": {"data_type": "text"},
|
|
43
|
+
"error_code": {"data_type": "text"},
|
|
44
|
+
"transactions": {"data_type": "json"},
|
|
45
|
+
"order_metadata": {"data_type": "json"},
|
|
46
|
+
},
|
|
47
|
+
"card_orders": {
|
|
48
|
+
"order_id": {"data_type": "text", "nullable": False, "primary_key": True},
|
|
49
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
50
|
+
"updated_at": {"data_type": "timestamp"},
|
|
51
|
+
"order_description": {"data_type": "text"},
|
|
52
|
+
"psp_order_id": {"data_type": "text"},
|
|
53
|
+
"provider_payment_id": {"data_type": "text"},
|
|
54
|
+
"amount": {"data_type": "bigint"},
|
|
55
|
+
"currency": {"data_type": "text"},
|
|
56
|
+
"processing_amount": {"data_type": "bigint"},
|
|
57
|
+
"processing_currency": {"data_type": "text"},
|
|
58
|
+
"status": {"data_type": "text"},
|
|
59
|
+
"payment_type": {"data_type": "text"},
|
|
60
|
+
"type": {"data_type": "text"},
|
|
61
|
+
"is_secured": {"data_type": "bool"},
|
|
62
|
+
"routing": {"data_type": "json"},
|
|
63
|
+
"customer_account_id": {"data_type": "text"},
|
|
64
|
+
"customer_email": {"data_type": "text"},
|
|
65
|
+
"customer_first_name": {"data_type": "text"},
|
|
66
|
+
"customer_last_name": {"data_type": "text"},
|
|
67
|
+
"ip_address": {"data_type": "text"},
|
|
68
|
+
"mid": {"data_type": "text"},
|
|
69
|
+
"traffic_source": {"data_type": "text"},
|
|
70
|
+
"platform": {"data_type": "text"},
|
|
71
|
+
"geo_country": {"data_type": "text"},
|
|
72
|
+
"error_code": {"data_type": "text"},
|
|
73
|
+
"transactions": {"data_type": "json"},
|
|
74
|
+
"order_metadata": {"data_type": "json"},
|
|
75
|
+
"fraudulent": {"data_type": "bool"},
|
|
76
|
+
},
|
|
77
|
+
"financial_entries": {
|
|
78
|
+
"id": {
|
|
79
|
+
"data_type": "text",
|
|
80
|
+
"nullable": False,
|
|
81
|
+
"primary_key": True,
|
|
82
|
+
},
|
|
83
|
+
"order_id": {"data_type": "text"},
|
|
84
|
+
"external_psp_order_id": {"data_type": "text"},
|
|
85
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
86
|
+
"transaction_datetime_provider": {"data_type": "timestamp"},
|
|
87
|
+
"transaction_datetime_utc": {"data_type": "timestamp"},
|
|
88
|
+
"accounting_date": {"data_type": "date"},
|
|
89
|
+
"amount": {"data_type": "double"},
|
|
90
|
+
"amount_in_major_units": {"data_type": "double"},
|
|
91
|
+
"currency": {"data_type": "text"},
|
|
92
|
+
"currency_minor_units": {"data_type": "bigint"},
|
|
93
|
+
"payout_amount": {"data_type": "double"},
|
|
94
|
+
"payout_amount_in_major_units": {"data_type": "double"},
|
|
95
|
+
"payout_currency": {"data_type": "text"},
|
|
96
|
+
"payout_currency_minor_units": {"data_type": "bigint"},
|
|
97
|
+
"record_type_key": {"data_type": "text"},
|
|
98
|
+
"provider": {"data_type": "text"},
|
|
99
|
+
"payment_method": {"data_type": "text"},
|
|
100
|
+
"card_brand": {"data_type": "text"},
|
|
101
|
+
"geo_country": {"data_type": "text"},
|
|
102
|
+
"issuing_country": {"data_type": "text"},
|
|
103
|
+
"transaction_id": {"data_type": "text"},
|
|
104
|
+
"chargeback_id": {"data_type": "text"},
|
|
105
|
+
"legal_entity": {"data_type": "text"},
|
|
106
|
+
},
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dlt.source(max_table_nesting=0)
|
|
111
|
+
def solidgate_source(
|
|
112
|
+
start_date: pendulum.DateTime,
|
|
113
|
+
end_date: pendulum.DateTime | None,
|
|
114
|
+
public_key: str,
|
|
115
|
+
secret_key: str,
|
|
116
|
+
) -> Iterable[DltResource]:
|
|
117
|
+
solidgate_client = SolidgateClient(public_key, secret_key)
|
|
118
|
+
|
|
119
|
+
@dlt.resource(
|
|
120
|
+
name="subscriptions",
|
|
121
|
+
write_disposition="merge",
|
|
122
|
+
primary_key="id",
|
|
123
|
+
columns=COLUMN_HINTS["subscriptions"], # type: ignore
|
|
124
|
+
)
|
|
125
|
+
def fetch_all_subscriptions(
|
|
126
|
+
dateTime=dlt.sources.incremental(
|
|
127
|
+
"updated_at",
|
|
128
|
+
initial_value=start_date,
|
|
129
|
+
end_value=end_date,
|
|
130
|
+
range_start="closed",
|
|
131
|
+
range_end="closed",
|
|
132
|
+
),
|
|
133
|
+
) -> Iterator[dict]:
|
|
134
|
+
path = "subscriptions"
|
|
135
|
+
if dateTime.end_value is None:
|
|
136
|
+
end_dt = pendulum.now(tz="UTC")
|
|
137
|
+
else:
|
|
138
|
+
end_dt = dateTime.end_value
|
|
139
|
+
|
|
140
|
+
start_dt = dateTime.last_value
|
|
141
|
+
yield solidgate_client.fetch_data(path, date_from=start_dt, date_to=end_dt)
|
|
142
|
+
|
|
143
|
+
@dlt.resource(
|
|
144
|
+
name="apm_orders",
|
|
145
|
+
write_disposition="merge",
|
|
146
|
+
primary_key="order_id",
|
|
147
|
+
columns=COLUMN_HINTS["apm_orders"], # type: ignore
|
|
148
|
+
)
|
|
149
|
+
def fetch_apm_orders(
|
|
150
|
+
dateTime=dlt.sources.incremental(
|
|
151
|
+
"updated_at",
|
|
152
|
+
initial_value=start_date,
|
|
153
|
+
end_value=end_date,
|
|
154
|
+
range_start="closed",
|
|
155
|
+
range_end="closed",
|
|
156
|
+
),
|
|
157
|
+
) -> Iterator[dict]:
|
|
158
|
+
path = "apm-orders"
|
|
159
|
+
if dateTime.end_value is None:
|
|
160
|
+
end_dt = pendulum.now(tz="UTC")
|
|
161
|
+
else:
|
|
162
|
+
end_dt = dateTime.end_value
|
|
163
|
+
|
|
164
|
+
start_dt = dateTime.last_value
|
|
165
|
+
yield solidgate_client.fetch_data(path, date_from=start_dt, date_to=end_dt)
|
|
166
|
+
|
|
167
|
+
@dlt.resource(
|
|
168
|
+
name="card_orders",
|
|
169
|
+
write_disposition="merge",
|
|
170
|
+
primary_key="order_id",
|
|
171
|
+
columns=COLUMN_HINTS["card_orders"], # type: ignore
|
|
172
|
+
)
|
|
173
|
+
def fetch_card_orders(
|
|
174
|
+
dateTime=dlt.sources.incremental(
|
|
175
|
+
"updated_at",
|
|
176
|
+
initial_value=start_date,
|
|
177
|
+
end_value=end_date,
|
|
178
|
+
range_start="closed",
|
|
179
|
+
range_end="closed",
|
|
180
|
+
),
|
|
181
|
+
) -> Iterator[dict]:
|
|
182
|
+
path = "card-orders"
|
|
183
|
+
if dateTime.end_value is None:
|
|
184
|
+
end_dt = pendulum.now(tz="UTC")
|
|
185
|
+
else:
|
|
186
|
+
end_dt = dateTime.end_value
|
|
187
|
+
|
|
188
|
+
start_dt = dateTime.last_value
|
|
189
|
+
yield solidgate_client.fetch_data(path, date_from=start_dt, date_to=end_dt)
|
|
190
|
+
|
|
191
|
+
@dlt.resource(
|
|
192
|
+
name="financial_entries",
|
|
193
|
+
write_disposition="merge",
|
|
194
|
+
primary_key="id",
|
|
195
|
+
columns=COLUMN_HINTS["financial_entries"], # type: ignore
|
|
196
|
+
)
|
|
197
|
+
def fetch_financial_entries(
|
|
198
|
+
dateTime=dlt.sources.incremental(
|
|
199
|
+
"created_at",
|
|
200
|
+
initial_value=start_date,
|
|
201
|
+
end_value=end_date,
|
|
202
|
+
range_start="closed",
|
|
203
|
+
range_end="closed",
|
|
204
|
+
),
|
|
205
|
+
):
|
|
206
|
+
if dateTime.end_value is None:
|
|
207
|
+
end_date = pendulum.now(tz="UTC")
|
|
208
|
+
else:
|
|
209
|
+
end_date = dateTime.end_value
|
|
210
|
+
|
|
211
|
+
start_date = dateTime.last_value
|
|
212
|
+
yield solidgate_client.fetch_financial_entry_data(start_date, end_date)
|
|
213
|
+
|
|
214
|
+
return (
|
|
215
|
+
fetch_all_subscriptions,
|
|
216
|
+
fetch_apm_orders,
|
|
217
|
+
fetch_card_orders,
|
|
218
|
+
fetch_financial_entries,
|
|
219
|
+
)
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import csv
|
|
3
|
+
import hashlib
|
|
4
|
+
import hmac
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from io import StringIO
|
|
8
|
+
|
|
9
|
+
import pendulum
|
|
10
|
+
|
|
11
|
+
from omniload.src.http_client import create_client
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SolidgateClient:
|
|
15
|
+
def __init__(self, public_key, secret_key):
|
|
16
|
+
self.base_url = "https://reports.solidgate.com/api/v1"
|
|
17
|
+
self.public_key = public_key
|
|
18
|
+
self.secret_key = secret_key
|
|
19
|
+
self.client = create_client(retry_status_codes=[204])
|
|
20
|
+
|
|
21
|
+
def fetch_data(
|
|
22
|
+
self,
|
|
23
|
+
path: str,
|
|
24
|
+
date_from: pendulum.DateTime,
|
|
25
|
+
date_to: pendulum.DateTime,
|
|
26
|
+
):
|
|
27
|
+
request_payload = {
|
|
28
|
+
"date_from": date_from.format("YYYY-MM-DD HH:mm:ss"),
|
|
29
|
+
"date_to": date_to.format("YYYY-MM-DD HH:mm:ss"),
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
json_string = json.dumps(request_payload)
|
|
33
|
+
signature = self.generateSignature(json_string)
|
|
34
|
+
headers = {
|
|
35
|
+
"merchant": self.public_key,
|
|
36
|
+
"Signature": signature,
|
|
37
|
+
"Content-Type": "application/json",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
next_page_iterator = None
|
|
41
|
+
url = f"{self.base_url}/{path}"
|
|
42
|
+
|
|
43
|
+
while True:
|
|
44
|
+
payload = request_payload.copy()
|
|
45
|
+
if next_page_iterator:
|
|
46
|
+
payload["page_iterator"] = next_page_iterator
|
|
47
|
+
|
|
48
|
+
response = self.client.post(url, headers=headers, json=payload)
|
|
49
|
+
response.raise_for_status()
|
|
50
|
+
response_json = response.json()
|
|
51
|
+
|
|
52
|
+
if path == "subscriptions":
|
|
53
|
+
data = response_json["subscriptions"]
|
|
54
|
+
for _, value in data.items():
|
|
55
|
+
if "updated_at" in value:
|
|
56
|
+
value["updated_at"] = pendulum.parse(value["updated_at"])
|
|
57
|
+
yield value
|
|
58
|
+
|
|
59
|
+
else:
|
|
60
|
+
data = response_json["orders"]
|
|
61
|
+
for value in data:
|
|
62
|
+
if "updated_at" in value:
|
|
63
|
+
value["updated_at"] = pendulum.parse(value["updated_at"])
|
|
64
|
+
yield value
|
|
65
|
+
|
|
66
|
+
next_page_iterator = response_json.get("metadata", {}).get(
|
|
67
|
+
"next_page_iterator"
|
|
68
|
+
)
|
|
69
|
+
if not next_page_iterator or next_page_iterator == "None":
|
|
70
|
+
break
|
|
71
|
+
|
|
72
|
+
def fetch_financial_entry_data(
|
|
73
|
+
self, date_from: pendulum.DateTime, date_to: pendulum.DateTime
|
|
74
|
+
):
|
|
75
|
+
request_payload = {
|
|
76
|
+
"date_from": date_from.format("YYYY-MM-DD HH:mm:ss"),
|
|
77
|
+
"date_to": date_to.format("YYYY-MM-DD HH:mm:ss"),
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
json_string = json.dumps(request_payload)
|
|
81
|
+
signature = self.generateSignature(json_string)
|
|
82
|
+
headers = {
|
|
83
|
+
"merchant": self.public_key,
|
|
84
|
+
"Signature": signature,
|
|
85
|
+
"Content-Type": "application/json",
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
url = f"{self.base_url}/finance/financial_entries"
|
|
89
|
+
post_response = self.client.post(url, headers=headers, json=request_payload)
|
|
90
|
+
post_response.raise_for_status()
|
|
91
|
+
report_url = post_response.json().get("report_url")
|
|
92
|
+
if not report_url:
|
|
93
|
+
return f"Report URL not found in the response: {post_response.json()}", 400
|
|
94
|
+
|
|
95
|
+
data = self.public_key + self.public_key
|
|
96
|
+
hmac_hash = hmac.new(
|
|
97
|
+
self.secret_key.encode("utf-8"), data.encode("utf-8"), hashlib.sha512
|
|
98
|
+
).digest()
|
|
99
|
+
signature_get = base64.b64encode(hmac_hash.hex().encode("utf-8")).decode(
|
|
100
|
+
"utf-8"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
headers_get = {
|
|
104
|
+
"merchant": self.public_key,
|
|
105
|
+
"Signature": signature_get,
|
|
106
|
+
"Content-Type": "application/json",
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
# Retry getting the report for up to 10 minutes (600 seconds) with 5-second intervals
|
|
110
|
+
max_retries = 120 # 10 minutes / 5 seconds = 120 attempts
|
|
111
|
+
retry_count = 0
|
|
112
|
+
|
|
113
|
+
while retry_count < max_retries:
|
|
114
|
+
get_response = self.client.get(report_url, headers=headers_get)
|
|
115
|
+
|
|
116
|
+
if get_response.status_code == 200:
|
|
117
|
+
try:
|
|
118
|
+
response_json = json.loads(get_response.content)
|
|
119
|
+
if "error" in response_json:
|
|
120
|
+
raise Exception(
|
|
121
|
+
f"API Error: {response_json['error']['messages']}"
|
|
122
|
+
)
|
|
123
|
+
except json.JSONDecodeError:
|
|
124
|
+
try:
|
|
125
|
+
csv_data = get_response.content.decode("utf-8")
|
|
126
|
+
reader = csv.DictReader(StringIO(csv_data))
|
|
127
|
+
rows = []
|
|
128
|
+
for row in reader:
|
|
129
|
+
if row["created_at"]:
|
|
130
|
+
row["created_at"] = pendulum.parse(row["created_at"])
|
|
131
|
+
else:
|
|
132
|
+
row["created_at"] = None
|
|
133
|
+
|
|
134
|
+
row2 = {k: v for k, v in row.items() if v != ""}
|
|
135
|
+
rows.append(row2)
|
|
136
|
+
|
|
137
|
+
return rows
|
|
138
|
+
except Exception as e:
|
|
139
|
+
raise Exception(f"Error reading CSV: {e}")
|
|
140
|
+
else:
|
|
141
|
+
# Report might not be ready yet, wait and retry
|
|
142
|
+
retry_count += 1
|
|
143
|
+
if retry_count >= max_retries:
|
|
144
|
+
raise Exception(
|
|
145
|
+
f"Failed to get report after {max_retries} attempts. Status code: {get_response.status_code}"
|
|
146
|
+
)
|
|
147
|
+
time.sleep(5) # Wait 5 seconds before retrying
|
|
148
|
+
|
|
149
|
+
def generateSignature(self, json_string):
|
|
150
|
+
data = self.public_key + json_string + self.public_key
|
|
151
|
+
hmac_hash = hmac.new(
|
|
152
|
+
self.secret_key.encode("utf-8"), data.encode("utf-8"), hashlib.sha512
|
|
153
|
+
).digest()
|
|
154
|
+
return base64.b64encode(hmac_hash.hex().encode("utf-8")).decode("utf-8")
|