ingestr 0.12.9__py3-none-any.whl → 0.12.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/blob.py +49 -0
- ingestr/src/errors.py +8 -0
- ingestr/src/factory.py +4 -0
- ingestr/src/google_ads/__init__.py +116 -0
- ingestr/src/google_ads/field.py +2 -0
- ingestr/src/google_ads/metrics.py +240 -0
- ingestr/src/google_ads/predicates.py +23 -0
- ingestr/src/google_ads/reports.py +380 -0
- ingestr/src/linkedin_ads/__init__.py +63 -0
- ingestr/src/linkedin_ads/dimension_time_enum.py +12 -0
- ingestr/src/linkedin_ads/helpers.py +148 -0
- ingestr/src/sources.py +195 -28
- ingestr/src/version.py +1 -1
- {ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/METADATA +3 -1
- {ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/RECORD +18 -9
- {ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/WHEEL +0 -0
- {ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/entry_points.txt +0 -0
- {ingestr-0.12.9.dist-info → ingestr-0.12.11.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/blob.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from typing import Tuple, TypeAlias
|
|
3
|
+
from urllib.parse import ParseResult
|
|
4
|
+
|
|
5
|
+
BucketName: TypeAlias = str
|
|
6
|
+
FileGlob: TypeAlias = str
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
|
|
10
|
+
"""
|
|
11
|
+
parse the URI of a blob storage and
|
|
12
|
+
return the bucket name and the file glob.
|
|
13
|
+
|
|
14
|
+
Supports the following Forms:
|
|
15
|
+
- uri: "gs://"
|
|
16
|
+
table: "bucket-name/file-glob"
|
|
17
|
+
- uri: gs://bucket-name/file-glob
|
|
18
|
+
table: None
|
|
19
|
+
- uri: "gs://bucket-name"
|
|
20
|
+
table: "file-glob"
|
|
21
|
+
|
|
22
|
+
The first form is the prefered method. Other forms are supported
|
|
23
|
+
for backward compatibility, but discouraged.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
table = table.strip()
|
|
27
|
+
host = uri.netloc.strip()
|
|
28
|
+
|
|
29
|
+
if table == "":
|
|
30
|
+
warnings.warn(
|
|
31
|
+
f"Using the form '{uri.scheme}://bucket-name/file-glob' is deprecated and will be removed in future versions.",
|
|
32
|
+
DeprecationWarning,
|
|
33
|
+
stacklevel=2,
|
|
34
|
+
)
|
|
35
|
+
return host, uri.path.lstrip("/")
|
|
36
|
+
|
|
37
|
+
if host != "":
|
|
38
|
+
warnings.warn(
|
|
39
|
+
f"Using the form '{uri.scheme}://bucket-name' is deprecated and will be removed in future versions.",
|
|
40
|
+
DeprecationWarning,
|
|
41
|
+
stacklevel=2,
|
|
42
|
+
)
|
|
43
|
+
return host, table.lstrip("/")
|
|
44
|
+
|
|
45
|
+
parts = table.lstrip("/").split("/", maxsplit=1)
|
|
46
|
+
if len(parts) != 2:
|
|
47
|
+
return "", parts[0]
|
|
48
|
+
|
|
49
|
+
return parts[0], parts[1]
|
ingestr/src/errors.py
CHANGED
|
@@ -8,3 +8,11 @@ class UnsupportedResourceError(Exception):
|
|
|
8
8
|
super().__init__(
|
|
9
9
|
f"Resource '{resource}' is not supported for {source} source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
10
10
|
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InvalidBlobTableError(Exception):
|
|
14
|
+
def __init__(self, source):
|
|
15
|
+
super().__init__(
|
|
16
|
+
f"Invalid source table for {source} "
|
|
17
|
+
"Ensure that the table is in the format {bucket-name}/{file glob}"
|
|
18
|
+
)
|
ingestr/src/factory.py
CHANGED
|
@@ -27,12 +27,14 @@ from ingestr.src.sources import (
|
|
|
27
27
|
FacebookAdsSource,
|
|
28
28
|
GCSSource,
|
|
29
29
|
GitHubSource,
|
|
30
|
+
GoogleAdsSource,
|
|
30
31
|
GoogleAnalyticsSource,
|
|
31
32
|
GoogleSheetsSource,
|
|
32
33
|
GorgiasSource,
|
|
33
34
|
HubspotSource,
|
|
34
35
|
KafkaSource,
|
|
35
36
|
KlaviyoSource,
|
|
37
|
+
LinkedInAdsSource,
|
|
36
38
|
LocalCsvSource,
|
|
37
39
|
MongoDbSource,
|
|
38
40
|
NotionSource,
|
|
@@ -124,8 +126,10 @@ class SourceDestinationFactory:
|
|
|
124
126
|
"asana": AsanaSource,
|
|
125
127
|
"tiktok": TikTokSource,
|
|
126
128
|
"googleanalytics": GoogleAnalyticsSource,
|
|
129
|
+
"googleads": GoogleAdsSource,
|
|
127
130
|
"appstore": AppleAppStoreSource,
|
|
128
131
|
"gs": GCSSource,
|
|
132
|
+
"linkedinads": LinkedInAdsSource,
|
|
129
133
|
}
|
|
130
134
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
131
135
|
"bigquery": BigQueryDestination,
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import date, datetime
|
|
3
|
+
from typing import Any, Iterator, Optional
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
import proto # type: ignore
|
|
7
|
+
from dlt.common.exceptions import MissingDependencyException
|
|
8
|
+
from dlt.common.typing import TDataItem
|
|
9
|
+
from dlt.sources import DltResource
|
|
10
|
+
from flatten_json import flatten # type: ignore
|
|
11
|
+
from googleapiclient.discovery import Resource # type: ignore
|
|
12
|
+
|
|
13
|
+
from . import field
|
|
14
|
+
from .metrics import dlt_metrics_schema
|
|
15
|
+
from .predicates import date_predicate
|
|
16
|
+
from .reports import BUILTIN_REPORTS, Report
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from google.ads.googleads.client import GoogleAdsClient # type: ignore
|
|
20
|
+
except ImportError:
|
|
21
|
+
raise MissingDependencyException("Requests-OAuthlib", ["google-ads"])
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dlt.source
|
|
25
|
+
def google_ads(
|
|
26
|
+
client: GoogleAdsClient,
|
|
27
|
+
customer_id: str,
|
|
28
|
+
report_spec: Optional[str] = None,
|
|
29
|
+
start_date: Optional[datetime] = None,
|
|
30
|
+
end_date: Optional[datetime] = None,
|
|
31
|
+
) -> Iterator[DltResource]:
|
|
32
|
+
date_range = dlt.sources.incremental(
|
|
33
|
+
"segments_date",
|
|
34
|
+
initial_value=start_date.date(), # type: ignore
|
|
35
|
+
end_value=end_date.date() if end_date is not None else None, # type: ignore
|
|
36
|
+
range_start="closed",
|
|
37
|
+
range_end="closed",
|
|
38
|
+
)
|
|
39
|
+
if report_spec is not None:
|
|
40
|
+
custom_report = Report().from_spec(report_spec)
|
|
41
|
+
yield dlt.resource(
|
|
42
|
+
daily_report,
|
|
43
|
+
name="daily_report",
|
|
44
|
+
write_disposition="merge",
|
|
45
|
+
primary_key=custom_report.primary_keys(),
|
|
46
|
+
columns=dlt_metrics_schema(custom_report.metrics),
|
|
47
|
+
)(client, customer_id, custom_report, date_range)
|
|
48
|
+
|
|
49
|
+
for report_name, report in BUILTIN_REPORTS.items():
|
|
50
|
+
yield dlt.resource(
|
|
51
|
+
daily_report,
|
|
52
|
+
name=report_name,
|
|
53
|
+
write_disposition="merge",
|
|
54
|
+
primary_key=report.primary_keys(),
|
|
55
|
+
columns=dlt_metrics_schema(report.metrics),
|
|
56
|
+
)(client, customer_id, report, date_range)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def daily_report(
|
|
60
|
+
client: Resource,
|
|
61
|
+
customer_id: str,
|
|
62
|
+
report: Report,
|
|
63
|
+
date: dlt.sources.incremental[date],
|
|
64
|
+
) -> Iterator[TDataItem]:
|
|
65
|
+
ga_service = client.get_service("GoogleAdsService")
|
|
66
|
+
fields = report.dimensions + report.metrics + report.segments
|
|
67
|
+
criteria = date_predicate("segments.date", date.last_value, date.end_value) # type:ignore
|
|
68
|
+
query = f"""
|
|
69
|
+
SELECT
|
|
70
|
+
{", ".join(fields)}
|
|
71
|
+
FROM
|
|
72
|
+
{report.resource}
|
|
73
|
+
WHERE
|
|
74
|
+
{criteria}
|
|
75
|
+
"""
|
|
76
|
+
if report.unfilterable is True:
|
|
77
|
+
i = query.index("WHERE", 0)
|
|
78
|
+
query = query[:i]
|
|
79
|
+
|
|
80
|
+
allowed_keys = set([field.to_column(k) for k in fields])
|
|
81
|
+
stream = ga_service.search_stream(customer_id=customer_id, query=query)
|
|
82
|
+
for batch in stream:
|
|
83
|
+
for row in batch.results:
|
|
84
|
+
data = flatten(merge_lists(to_dict(row)))
|
|
85
|
+
if "segments_date" in data:
|
|
86
|
+
data["segments_date"] = datetime.strptime(
|
|
87
|
+
data["segments_date"], "%Y-%m-%d"
|
|
88
|
+
).date()
|
|
89
|
+
yield {k: v for k, v in data.items() if k in allowed_keys}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def to_dict(item: Any) -> TDataItem:
|
|
93
|
+
"""
|
|
94
|
+
Processes a batch result (page of results per dimension) accordingly
|
|
95
|
+
:param batch:
|
|
96
|
+
:return:
|
|
97
|
+
"""
|
|
98
|
+
return json.loads(
|
|
99
|
+
proto.Message.to_json(
|
|
100
|
+
item,
|
|
101
|
+
preserving_proto_field_name=True,
|
|
102
|
+
use_integers_for_enums=False,
|
|
103
|
+
including_default_value_fields=False,
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def merge_lists(item: dict) -> dict:
|
|
109
|
+
replacements = {}
|
|
110
|
+
for k, v in item.get("metrics", {}).items():
|
|
111
|
+
if isinstance(v, list):
|
|
112
|
+
replacements[k] = ",".join(v)
|
|
113
|
+
if len(replacements) == 0:
|
|
114
|
+
return item
|
|
115
|
+
item["metrics"].update(replacements)
|
|
116
|
+
return item
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from . import field
|
|
4
|
+
|
|
5
|
+
METRICS_SCHEMA = {
|
|
6
|
+
"metrics.absolute_top_impression_percentage": "DOUBLE",
|
|
7
|
+
"metrics.active_view_cpm": "DOUBLE",
|
|
8
|
+
"metrics.active_view_ctr": "DOUBLE",
|
|
9
|
+
"metrics.active_view_impressions": "INT64",
|
|
10
|
+
"metrics.active_view_measurability": "DOUBLE",
|
|
11
|
+
"metrics.active_view_measurable_cost_micros": "INT64",
|
|
12
|
+
"metrics.active_view_measurable_impressions": "INT64",
|
|
13
|
+
"metrics.active_view_viewability": "DOUBLE",
|
|
14
|
+
"metrics.all_conversions": "DOUBLE",
|
|
15
|
+
"metrics.all_conversions_by_conversion_date": "DOUBLE",
|
|
16
|
+
"metrics.all_conversions_from_click_to_call": "DOUBLE",
|
|
17
|
+
"metrics.all_conversions_from_directions": "DOUBLE",
|
|
18
|
+
"metrics.all_conversions_from_interactions_rate": "DOUBLE",
|
|
19
|
+
"metrics.all_conversions_from_interactions_value_per_interaction": "DOUBLE",
|
|
20
|
+
"metrics.all_conversions_from_location_asset_click_to_call": "DOUBLE",
|
|
21
|
+
"metrics.all_conversions_from_location_asset_directions": "DOUBLE",
|
|
22
|
+
"metrics.all_conversions_from_location_asset_menu": "DOUBLE",
|
|
23
|
+
"metrics.all_conversions_from_location_asset_order": "DOUBLE",
|
|
24
|
+
"metrics.all_conversions_from_location_asset_other_engagement": "DOUBLE",
|
|
25
|
+
"metrics.all_conversions_from_location_asset_store_visits": "DOUBLE",
|
|
26
|
+
"metrics.all_conversions_from_location_asset_website": "DOUBLE",
|
|
27
|
+
"metrics.all_conversions_from_menu": "DOUBLE",
|
|
28
|
+
"metrics.all_conversions_from_order": "DOUBLE",
|
|
29
|
+
"metrics.all_conversions_from_other_engagement": "DOUBLE",
|
|
30
|
+
"metrics.all_conversions_from_store_visit": "DOUBLE",
|
|
31
|
+
"metrics.all_conversions_from_store_website": "DOUBLE",
|
|
32
|
+
"metrics.all_conversions_value": "DOUBLE",
|
|
33
|
+
"metrics.all_conversions_value_by_conversion_date": "DOUBLE",
|
|
34
|
+
"metrics.all_conversions_value_per_cost": "DOUBLE",
|
|
35
|
+
"metrics.all_new_customer_lifetime_value": "DOUBLE",
|
|
36
|
+
"metrics.asset_best_performance_cost_percentage": "DOUBLE",
|
|
37
|
+
"metrics.asset_best_performance_impression_percentage": "DOUBLE",
|
|
38
|
+
"metrics.asset_good_performance_cost_percentage": "DOUBLE",
|
|
39
|
+
"metrics.asset_good_performance_impression_percentage": "DOUBLE",
|
|
40
|
+
"metrics.asset_learning_performance_cost_percentage": "DOUBLE",
|
|
41
|
+
"metrics.asset_learning_performance_impression_percentage": "DOUBLE",
|
|
42
|
+
"metrics.asset_low_performance_cost_percentage": "DOUBLE",
|
|
43
|
+
"metrics.asset_low_performance_impression_percentage": "DOUBLE",
|
|
44
|
+
"metrics.asset_pinned_as_description_position_one_count": "INT64",
|
|
45
|
+
"metrics.asset_pinned_as_description_position_two_count": "INT64",
|
|
46
|
+
"metrics.asset_pinned_as_headline_position_one_count": "INT64",
|
|
47
|
+
"metrics.asset_pinned_as_headline_position_three_count": "INT64",
|
|
48
|
+
"metrics.asset_pinned_as_headline_position_two_count": "INT64",
|
|
49
|
+
"metrics.asset_pinned_total_count": "INT64",
|
|
50
|
+
"metrics.asset_unrated_performance_cost_percentage": "DOUBLE",
|
|
51
|
+
"metrics.asset_unrated_performance_impression_percentage": "DOUBLE",
|
|
52
|
+
"metrics.auction_insight_search_absolute_top_impression_percentage": "DOUBLE",
|
|
53
|
+
"metrics.auction_insight_search_impression_share": "DOUBLE",
|
|
54
|
+
"metrics.auction_insight_search_outranking_share": "DOUBLE",
|
|
55
|
+
"metrics.auction_insight_search_overlap_rate": "DOUBLE",
|
|
56
|
+
"metrics.auction_insight_search_position_above_rate": "DOUBLE",
|
|
57
|
+
"metrics.auction_insight_search_top_impression_percentage": "DOUBLE",
|
|
58
|
+
"metrics.average_cart_size": "DOUBLE",
|
|
59
|
+
"metrics.average_cost": "DOUBLE",
|
|
60
|
+
"metrics.average_cpc": "DOUBLE",
|
|
61
|
+
"metrics.average_cpe": "DOUBLE",
|
|
62
|
+
"metrics.average_cpm": "DOUBLE",
|
|
63
|
+
"metrics.average_cpv": "DOUBLE",
|
|
64
|
+
"metrics.average_impression_frequency_per_user": "DOUBLE",
|
|
65
|
+
"metrics.average_order_value_micros": "INT64",
|
|
66
|
+
"metrics.average_page_views": "DOUBLE",
|
|
67
|
+
"metrics.average_target_cpa_micros": "INT64",
|
|
68
|
+
"metrics.average_target_roas": "DOUBLE",
|
|
69
|
+
"metrics.average_time_on_site": "DOUBLE",
|
|
70
|
+
"metrics.benchmark_average_max_cpc": "DOUBLE",
|
|
71
|
+
"metrics.benchmark_ctr": "DOUBLE",
|
|
72
|
+
"metrics.biddable_app_install_conversions": "DOUBLE",
|
|
73
|
+
"metrics.biddable_app_post_install_conversions": "DOUBLE",
|
|
74
|
+
"metrics.bounce_rate": "DOUBLE",
|
|
75
|
+
"metrics.clicks": "INT64",
|
|
76
|
+
"metrics.combined_clicks": "INT64",
|
|
77
|
+
"metrics.combined_clicks_per_query": "DOUBLE",
|
|
78
|
+
"metrics.combined_queries": "INT64",
|
|
79
|
+
"metrics.content_budget_lost_impression_share": "DOUBLE",
|
|
80
|
+
"metrics.content_impression_share": "DOUBLE",
|
|
81
|
+
"metrics.content_rank_lost_impression_share": "DOUBLE",
|
|
82
|
+
"metrics.conversion_last_conversion_date": "DATE",
|
|
83
|
+
"metrics.conversion_last_received_request_date_time": "DATE",
|
|
84
|
+
"metrics.conversions": "DOUBLE",
|
|
85
|
+
"metrics.conversions_by_conversion_date": "DOUBLE",
|
|
86
|
+
"metrics.conversions_from_interactions_rate": "DOUBLE",
|
|
87
|
+
"metrics.conversions_from_interactions_value_per_interaction": "DOUBLE",
|
|
88
|
+
"metrics.conversions_value": "DOUBLE",
|
|
89
|
+
"metrics.conversions_value_by_conversion_date": "DOUBLE",
|
|
90
|
+
"metrics.conversions_value_per_cost": "DOUBLE",
|
|
91
|
+
"metrics.cost_micros": "INT64",
|
|
92
|
+
"metrics.cost_of_goods_sold_micros": "INT64",
|
|
93
|
+
"metrics.cost_per_all_conversions": "DOUBLE",
|
|
94
|
+
"metrics.cost_per_conversion": "DOUBLE",
|
|
95
|
+
"metrics.cost_per_current_model_attributed_conversion": "DOUBLE",
|
|
96
|
+
"metrics.cross_device_conversions": "DOUBLE",
|
|
97
|
+
"metrics.cross_device_conversions_value_micros": "INT64",
|
|
98
|
+
"metrics.cross_sell_cost_of_goods_sold_micros": "INT64",
|
|
99
|
+
"metrics.cross_sell_gross_profit_micros": "INT64",
|
|
100
|
+
"metrics.cross_sell_revenue_micros": "INT64",
|
|
101
|
+
"metrics.cross_sell_units_sold": "DOUBLE",
|
|
102
|
+
"metrics.ctr": "DOUBLE",
|
|
103
|
+
"metrics.current_model_attributed_conversions": "DOUBLE",
|
|
104
|
+
"metrics.current_model_attributed_conversions_from_interactions_rate": "DOUBLE",
|
|
105
|
+
"metrics.current_model_attributed_conversions_from_interactions_value_per_interaction": "DOUBLE",
|
|
106
|
+
"metrics.current_model_attributed_conversions_value": "DOUBLE",
|
|
107
|
+
"metrics.current_model_attributed_conversions_value_per_cost": "DOUBLE",
|
|
108
|
+
"metrics.eligible_impressions_from_location_asset_store_reach": "INT64",
|
|
109
|
+
"metrics.engagement_rate": "DOUBLE",
|
|
110
|
+
"metrics.engagements": "INT64",
|
|
111
|
+
"metrics.general_invalid_click_rate": "DOUBLE",
|
|
112
|
+
"metrics.general_invalid_clicks": "INT64",
|
|
113
|
+
"metrics.gmail_forwards": "INT64",
|
|
114
|
+
"metrics.gmail_saves": "INT64",
|
|
115
|
+
"metrics.gmail_secondary_clicks": "INT64",
|
|
116
|
+
"metrics.gross_profit_margin": "DOUBLE",
|
|
117
|
+
"metrics.gross_profit_micros": "INT64",
|
|
118
|
+
"metrics.historical_creative_quality_score": "ENUM",
|
|
119
|
+
"metrics.historical_landing_page_quality_score": "ENUM",
|
|
120
|
+
"metrics.historical_quality_score": "INT64",
|
|
121
|
+
"metrics.historical_search_predicted_ctr": "ENUM",
|
|
122
|
+
"metrics.hotel_average_lead_value_micros": "DOUBLE",
|
|
123
|
+
"metrics.hotel_commission_rate_micros": "INT64",
|
|
124
|
+
"metrics.hotel_eligible_impressions": "INT64",
|
|
125
|
+
"metrics.hotel_expected_commission_cost": "DOUBLE",
|
|
126
|
+
"metrics.hotel_price_difference_percentage": "DOUBLE",
|
|
127
|
+
"metrics.impressions": "INT64",
|
|
128
|
+
"metrics.impressions_from_store_reach": "INT64",
|
|
129
|
+
"metrics.interaction_event_types": "ENUM",
|
|
130
|
+
"metrics.interaction_rate": "DOUBLE",
|
|
131
|
+
"metrics.interactions": "INT64",
|
|
132
|
+
"metrics.invalid_click_rate": "DOUBLE",
|
|
133
|
+
"metrics.invalid_clicks": "INT64",
|
|
134
|
+
"metrics.lead_cost_of_goods_sold_micros": "INT64",
|
|
135
|
+
"metrics.lead_gross_profit_micros": "INT64",
|
|
136
|
+
"metrics.lead_revenue_micros": "INT64",
|
|
137
|
+
"metrics.lead_units_sold": "DOUBLE",
|
|
138
|
+
"metrics.linked_entities_count": "INT64",
|
|
139
|
+
"metrics.linked_sample_entities": "STRING",
|
|
140
|
+
"metrics.message_chat_rate": "DOUBLE",
|
|
141
|
+
"metrics.message_chats": "INT64",
|
|
142
|
+
"metrics.message_impressions": "INT64",
|
|
143
|
+
"metrics.mobile_friendly_clicks_percentage": "DOUBLE",
|
|
144
|
+
"metrics.new_customer_lifetime_value": "DOUBLE",
|
|
145
|
+
"metrics.optimization_score_uplift": "DOUBLE",
|
|
146
|
+
"metrics.optimization_score_url": "STRING",
|
|
147
|
+
"metrics.orders": "DOUBLE",
|
|
148
|
+
"metrics.organic_clicks": "INT64",
|
|
149
|
+
"metrics.organic_clicks_per_query": "DOUBLE",
|
|
150
|
+
"metrics.organic_impressions": "INT64",
|
|
151
|
+
"metrics.organic_impressions_per_query": "DOUBLE",
|
|
152
|
+
"metrics.organic_queries": "INT64",
|
|
153
|
+
"metrics.percent_new_visitors": "DOUBLE",
|
|
154
|
+
"metrics.phone_calls": "INT64",
|
|
155
|
+
"metrics.phone_impressions": "INT64",
|
|
156
|
+
"metrics.phone_through_rate": "DOUBLE",
|
|
157
|
+
"metrics.publisher_organic_clicks": "INT64",
|
|
158
|
+
"metrics.publisher_purchased_clicks": "INT64",
|
|
159
|
+
"metrics.publisher_unknown_clicks": "INT64",
|
|
160
|
+
"metrics.relative_ctr": "DOUBLE",
|
|
161
|
+
"metrics.results_conversions_purchase": "DOUBLE",
|
|
162
|
+
"metrics.revenue_micros": "INT64",
|
|
163
|
+
"metrics.sample_best_performance_entities": "STRING",
|
|
164
|
+
"metrics.sample_good_performance_entities": "STRING",
|
|
165
|
+
"metrics.sample_learning_performance_entities": "STRING",
|
|
166
|
+
"metrics.sample_low_performance_entities": "STRING",
|
|
167
|
+
"metrics.sample_unrated_performance_entities": "STRING",
|
|
168
|
+
"metrics.search_absolute_top_impression_share": "DOUBLE",
|
|
169
|
+
"metrics.search_budget_lost_absolute_top_impression_share": "DOUBLE",
|
|
170
|
+
"metrics.search_budget_lost_impression_share": "DOUBLE",
|
|
171
|
+
"metrics.search_budget_lost_top_impression_share": "DOUBLE",
|
|
172
|
+
"metrics.search_click_share": "DOUBLE",
|
|
173
|
+
"metrics.search_exact_match_impression_share": "DOUBLE",
|
|
174
|
+
"metrics.search_impression_share": "DOUBLE",
|
|
175
|
+
"metrics.search_rank_lost_absolute_top_impression_share": "DOUBLE",
|
|
176
|
+
"metrics.search_rank_lost_impression_share": "DOUBLE",
|
|
177
|
+
"metrics.search_rank_lost_top_impression_share": "DOUBLE",
|
|
178
|
+
"metrics.search_top_impression_share": "DOUBLE",
|
|
179
|
+
"metrics.search_volume": "MESSAGE",
|
|
180
|
+
"metrics.sk_ad_network_installs": "INT64",
|
|
181
|
+
"metrics.sk_ad_network_total_conversions": "INT64",
|
|
182
|
+
"metrics.speed_score": "INT64",
|
|
183
|
+
"metrics.store_visits_last_click_model_attributed_conversions": "DOUBLE",
|
|
184
|
+
"metrics.top_impression_percentage": "DOUBLE",
|
|
185
|
+
"metrics.unique_users": "INT64",
|
|
186
|
+
"metrics.units_sold": "DOUBLE",
|
|
187
|
+
"metrics.valid_accelerated_mobile_pages_clicks_percentage": "DOUBLE",
|
|
188
|
+
"metrics.value_per_all_conversions": "DOUBLE",
|
|
189
|
+
"metrics.value_per_all_conversions_by_conversion_date": "DOUBLE",
|
|
190
|
+
"metrics.value_per_conversion": "DOUBLE",
|
|
191
|
+
"metrics.value_per_conversions_by_conversion_date": "DOUBLE",
|
|
192
|
+
"metrics.value_per_current_model_attributed_conversion": "DOUBLE",
|
|
193
|
+
"metrics.video_quartile_p100_rate": "DOUBLE",
|
|
194
|
+
"metrics.video_quartile_p25_rate": "DOUBLE",
|
|
195
|
+
"metrics.video_quartile_p50_rate": "DOUBLE",
|
|
196
|
+
"metrics.video_quartile_p75_rate": "DOUBLE",
|
|
197
|
+
"metrics.video_view_rate": "DOUBLE",
|
|
198
|
+
"metrics.video_view_rate_in_feed": "DOUBLE",
|
|
199
|
+
"metrics.video_view_rate_in_stream": "DOUBLE",
|
|
200
|
+
"metrics.video_view_rate_shorts": "DOUBLE",
|
|
201
|
+
"metrics.video_views": "INT64",
|
|
202
|
+
"metrics.view_through_conversions": "INT64",
|
|
203
|
+
"metrics.view_through_conversions_from_location_asset_click_to_call": "DOUBLE",
|
|
204
|
+
"metrics.view_through_conversions_from_location_asset_directions": "DOUBLE",
|
|
205
|
+
"metrics.view_through_conversions_from_location_asset_menu": "DOUBLE",
|
|
206
|
+
"metrics.view_through_conversions_from_location_asset_order": "DOUBLE",
|
|
207
|
+
"metrics.view_through_conversions_from_location_asset_other_engagement": "DOUBLE",
|
|
208
|
+
"metrics.view_through_conversions_from_location_asset_store_visits": "DOUBLE",
|
|
209
|
+
"metrics.view_through_conversions_from_location_asset_website": "DOUBLE",
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
METRIC_TO_DLT_TYPE = {
|
|
213
|
+
"INT64": "bigint",
|
|
214
|
+
"DOUBLE": "double",
|
|
215
|
+
"STRING": "text",
|
|
216
|
+
"ENUM": "text",
|
|
217
|
+
# TODO: support message types
|
|
218
|
+
# "MESSAGE": "string",
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def dlt_metrics_schema(metrics: List[str]):
|
|
223
|
+
"""
|
|
224
|
+
Returns a dictionary with only the metrics that are
|
|
225
|
+
present in the given list of metrics.
|
|
226
|
+
"""
|
|
227
|
+
schema = {}
|
|
228
|
+
for metric in metrics:
|
|
229
|
+
typ = METRICS_SCHEMA.get(metric)
|
|
230
|
+
if typ is None:
|
|
231
|
+
raise ValueError(f"Unsupported metric {metric}")
|
|
232
|
+
|
|
233
|
+
if typ not in METRIC_TO_DLT_TYPE:
|
|
234
|
+
raise ValueError(f"Unsupported metric '{metric}' of type '{typ}'")
|
|
235
|
+
|
|
236
|
+
# ???: can we make these non-nullable?
|
|
237
|
+
schema[field.to_column(metric)] = {
|
|
238
|
+
"data_type": METRIC_TO_DLT_TYPE[typ],
|
|
239
|
+
}
|
|
240
|
+
return schema
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from datetime import date, datetime, timezone
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def date_predicate(column: str, start_date: date, end_date: Optional[date]) -> str:
|
|
6
|
+
"""
|
|
7
|
+
Generates a date predicate for the WHERE clause of a
|
|
8
|
+
GAQL query.
|
|
9
|
+
"""
|
|
10
|
+
if start_date is None:
|
|
11
|
+
raise ValueError("start_date must be provided")
|
|
12
|
+
|
|
13
|
+
if end_date is None:
|
|
14
|
+
end_date = datetime.now(tz=timezone.utc).date()
|
|
15
|
+
|
|
16
|
+
clauses = []
|
|
17
|
+
if start_date is not None:
|
|
18
|
+
clauses.append(f"""{column} >= '{start_date.strftime("%Y-%m-%d")}'""")
|
|
19
|
+
|
|
20
|
+
if end_date is not None:
|
|
21
|
+
clauses.append(f"""{column} <= '{end_date.strftime("%Y-%m-%d")}'""")
|
|
22
|
+
|
|
23
|
+
return " AND ".join(clauses)
|