ingestr 0.12.10__py3-none-any.whl → 0.12.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/blob.py +49 -0
- ingestr/src/errors.py +8 -0
- ingestr/src/factory.py +2 -0
- ingestr/src/google_ads/__init__.py +116 -0
- ingestr/src/google_ads/field.py +2 -0
- ingestr/src/google_ads/metrics.py +240 -0
- ingestr/src/google_ads/predicates.py +23 -0
- ingestr/src/google_ads/reports.py +380 -0
- ingestr/src/sources.py +113 -28
- ingestr/src/version.py +1 -1
- {ingestr-0.12.10.dist-info → ingestr-0.12.11.dist-info}/METADATA +3 -1
- {ingestr-0.12.10.dist-info → ingestr-0.12.11.dist-info}/RECORD +15 -9
- {ingestr-0.12.10.dist-info → ingestr-0.12.11.dist-info}/WHEEL +0 -0
- {ingestr-0.12.10.dist-info → ingestr-0.12.11.dist-info}/entry_points.txt +0 -0
- {ingestr-0.12.10.dist-info → ingestr-0.12.11.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/blob.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from typing import Tuple, TypeAlias
|
|
3
|
+
from urllib.parse import ParseResult
|
|
4
|
+
|
|
5
|
+
BucketName: TypeAlias = str
|
|
6
|
+
FileGlob: TypeAlias = str
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
|
|
10
|
+
"""
|
|
11
|
+
parse the URI of a blob storage and
|
|
12
|
+
return the bucket name and the file glob.
|
|
13
|
+
|
|
14
|
+
Supports the following Forms:
|
|
15
|
+
- uri: "gs://"
|
|
16
|
+
table: "bucket-name/file-glob"
|
|
17
|
+
- uri: gs://bucket-name/file-glob
|
|
18
|
+
table: None
|
|
19
|
+
- uri: "gs://bucket-name"
|
|
20
|
+
table: "file-glob"
|
|
21
|
+
|
|
22
|
+
The first form is the prefered method. Other forms are supported
|
|
23
|
+
for backward compatibility, but discouraged.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
table = table.strip()
|
|
27
|
+
host = uri.netloc.strip()
|
|
28
|
+
|
|
29
|
+
if table == "":
|
|
30
|
+
warnings.warn(
|
|
31
|
+
f"Using the form '{uri.scheme}://bucket-name/file-glob' is deprecated and will be removed in future versions.",
|
|
32
|
+
DeprecationWarning,
|
|
33
|
+
stacklevel=2,
|
|
34
|
+
)
|
|
35
|
+
return host, uri.path.lstrip("/")
|
|
36
|
+
|
|
37
|
+
if host != "":
|
|
38
|
+
warnings.warn(
|
|
39
|
+
f"Using the form '{uri.scheme}://bucket-name' is deprecated and will be removed in future versions.",
|
|
40
|
+
DeprecationWarning,
|
|
41
|
+
stacklevel=2,
|
|
42
|
+
)
|
|
43
|
+
return host, table.lstrip("/")
|
|
44
|
+
|
|
45
|
+
parts = table.lstrip("/").split("/", maxsplit=1)
|
|
46
|
+
if len(parts) != 2:
|
|
47
|
+
return "", parts[0]
|
|
48
|
+
|
|
49
|
+
return parts[0], parts[1]
|
ingestr/src/errors.py
CHANGED
|
@@ -8,3 +8,11 @@ class UnsupportedResourceError(Exception):
|
|
|
8
8
|
super().__init__(
|
|
9
9
|
f"Resource '{resource}' is not supported for {source} source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
10
10
|
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InvalidBlobTableError(Exception):
|
|
14
|
+
def __init__(self, source):
|
|
15
|
+
super().__init__(
|
|
16
|
+
f"Invalid source table for {source} "
|
|
17
|
+
"Ensure that the table is in the format {bucket-name}/{file glob}"
|
|
18
|
+
)
|
ingestr/src/factory.py
CHANGED
|
@@ -27,6 +27,7 @@ from ingestr.src.sources import (
|
|
|
27
27
|
FacebookAdsSource,
|
|
28
28
|
GCSSource,
|
|
29
29
|
GitHubSource,
|
|
30
|
+
GoogleAdsSource,
|
|
30
31
|
GoogleAnalyticsSource,
|
|
31
32
|
GoogleSheetsSource,
|
|
32
33
|
GorgiasSource,
|
|
@@ -125,6 +126,7 @@ class SourceDestinationFactory:
|
|
|
125
126
|
"asana": AsanaSource,
|
|
126
127
|
"tiktok": TikTokSource,
|
|
127
128
|
"googleanalytics": GoogleAnalyticsSource,
|
|
129
|
+
"googleads": GoogleAdsSource,
|
|
128
130
|
"appstore": AppleAppStoreSource,
|
|
129
131
|
"gs": GCSSource,
|
|
130
132
|
"linkedinads": LinkedInAdsSource,
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import date, datetime
|
|
3
|
+
from typing import Any, Iterator, Optional
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
import proto # type: ignore
|
|
7
|
+
from dlt.common.exceptions import MissingDependencyException
|
|
8
|
+
from dlt.common.typing import TDataItem
|
|
9
|
+
from dlt.sources import DltResource
|
|
10
|
+
from flatten_json import flatten # type: ignore
|
|
11
|
+
from googleapiclient.discovery import Resource # type: ignore
|
|
12
|
+
|
|
13
|
+
from . import field
|
|
14
|
+
from .metrics import dlt_metrics_schema
|
|
15
|
+
from .predicates import date_predicate
|
|
16
|
+
from .reports import BUILTIN_REPORTS, Report
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from google.ads.googleads.client import GoogleAdsClient # type: ignore
|
|
20
|
+
except ImportError:
|
|
21
|
+
raise MissingDependencyException("Requests-OAuthlib", ["google-ads"])
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dlt.source
|
|
25
|
+
def google_ads(
|
|
26
|
+
client: GoogleAdsClient,
|
|
27
|
+
customer_id: str,
|
|
28
|
+
report_spec: Optional[str] = None,
|
|
29
|
+
start_date: Optional[datetime] = None,
|
|
30
|
+
end_date: Optional[datetime] = None,
|
|
31
|
+
) -> Iterator[DltResource]:
|
|
32
|
+
date_range = dlt.sources.incremental(
|
|
33
|
+
"segments_date",
|
|
34
|
+
initial_value=start_date.date(), # type: ignore
|
|
35
|
+
end_value=end_date.date() if end_date is not None else None, # type: ignore
|
|
36
|
+
range_start="closed",
|
|
37
|
+
range_end="closed",
|
|
38
|
+
)
|
|
39
|
+
if report_spec is not None:
|
|
40
|
+
custom_report = Report().from_spec(report_spec)
|
|
41
|
+
yield dlt.resource(
|
|
42
|
+
daily_report,
|
|
43
|
+
name="daily_report",
|
|
44
|
+
write_disposition="merge",
|
|
45
|
+
primary_key=custom_report.primary_keys(),
|
|
46
|
+
columns=dlt_metrics_schema(custom_report.metrics),
|
|
47
|
+
)(client, customer_id, custom_report, date_range)
|
|
48
|
+
|
|
49
|
+
for report_name, report in BUILTIN_REPORTS.items():
|
|
50
|
+
yield dlt.resource(
|
|
51
|
+
daily_report,
|
|
52
|
+
name=report_name,
|
|
53
|
+
write_disposition="merge",
|
|
54
|
+
primary_key=report.primary_keys(),
|
|
55
|
+
columns=dlt_metrics_schema(report.metrics),
|
|
56
|
+
)(client, customer_id, report, date_range)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def daily_report(
|
|
60
|
+
client: Resource,
|
|
61
|
+
customer_id: str,
|
|
62
|
+
report: Report,
|
|
63
|
+
date: dlt.sources.incremental[date],
|
|
64
|
+
) -> Iterator[TDataItem]:
|
|
65
|
+
ga_service = client.get_service("GoogleAdsService")
|
|
66
|
+
fields = report.dimensions + report.metrics + report.segments
|
|
67
|
+
criteria = date_predicate("segments.date", date.last_value, date.end_value) # type:ignore
|
|
68
|
+
query = f"""
|
|
69
|
+
SELECT
|
|
70
|
+
{", ".join(fields)}
|
|
71
|
+
FROM
|
|
72
|
+
{report.resource}
|
|
73
|
+
WHERE
|
|
74
|
+
{criteria}
|
|
75
|
+
"""
|
|
76
|
+
if report.unfilterable is True:
|
|
77
|
+
i = query.index("WHERE", 0)
|
|
78
|
+
query = query[:i]
|
|
79
|
+
|
|
80
|
+
allowed_keys = set([field.to_column(k) for k in fields])
|
|
81
|
+
stream = ga_service.search_stream(customer_id=customer_id, query=query)
|
|
82
|
+
for batch in stream:
|
|
83
|
+
for row in batch.results:
|
|
84
|
+
data = flatten(merge_lists(to_dict(row)))
|
|
85
|
+
if "segments_date" in data:
|
|
86
|
+
data["segments_date"] = datetime.strptime(
|
|
87
|
+
data["segments_date"], "%Y-%m-%d"
|
|
88
|
+
).date()
|
|
89
|
+
yield {k: v for k, v in data.items() if k in allowed_keys}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def to_dict(item: Any) -> TDataItem:
|
|
93
|
+
"""
|
|
94
|
+
Processes a batch result (page of results per dimension) accordingly
|
|
95
|
+
:param batch:
|
|
96
|
+
:return:
|
|
97
|
+
"""
|
|
98
|
+
return json.loads(
|
|
99
|
+
proto.Message.to_json(
|
|
100
|
+
item,
|
|
101
|
+
preserving_proto_field_name=True,
|
|
102
|
+
use_integers_for_enums=False,
|
|
103
|
+
including_default_value_fields=False,
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def merge_lists(item: dict) -> dict:
|
|
109
|
+
replacements = {}
|
|
110
|
+
for k, v in item.get("metrics", {}).items():
|
|
111
|
+
if isinstance(v, list):
|
|
112
|
+
replacements[k] = ",".join(v)
|
|
113
|
+
if len(replacements) == 0:
|
|
114
|
+
return item
|
|
115
|
+
item["metrics"].update(replacements)
|
|
116
|
+
return item
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from . import field
|
|
4
|
+
|
|
5
|
+
METRICS_SCHEMA = {
|
|
6
|
+
"metrics.absolute_top_impression_percentage": "DOUBLE",
|
|
7
|
+
"metrics.active_view_cpm": "DOUBLE",
|
|
8
|
+
"metrics.active_view_ctr": "DOUBLE",
|
|
9
|
+
"metrics.active_view_impressions": "INT64",
|
|
10
|
+
"metrics.active_view_measurability": "DOUBLE",
|
|
11
|
+
"metrics.active_view_measurable_cost_micros": "INT64",
|
|
12
|
+
"metrics.active_view_measurable_impressions": "INT64",
|
|
13
|
+
"metrics.active_view_viewability": "DOUBLE",
|
|
14
|
+
"metrics.all_conversions": "DOUBLE",
|
|
15
|
+
"metrics.all_conversions_by_conversion_date": "DOUBLE",
|
|
16
|
+
"metrics.all_conversions_from_click_to_call": "DOUBLE",
|
|
17
|
+
"metrics.all_conversions_from_directions": "DOUBLE",
|
|
18
|
+
"metrics.all_conversions_from_interactions_rate": "DOUBLE",
|
|
19
|
+
"metrics.all_conversions_from_interactions_value_per_interaction": "DOUBLE",
|
|
20
|
+
"metrics.all_conversions_from_location_asset_click_to_call": "DOUBLE",
|
|
21
|
+
"metrics.all_conversions_from_location_asset_directions": "DOUBLE",
|
|
22
|
+
"metrics.all_conversions_from_location_asset_menu": "DOUBLE",
|
|
23
|
+
"metrics.all_conversions_from_location_asset_order": "DOUBLE",
|
|
24
|
+
"metrics.all_conversions_from_location_asset_other_engagement": "DOUBLE",
|
|
25
|
+
"metrics.all_conversions_from_location_asset_store_visits": "DOUBLE",
|
|
26
|
+
"metrics.all_conversions_from_location_asset_website": "DOUBLE",
|
|
27
|
+
"metrics.all_conversions_from_menu": "DOUBLE",
|
|
28
|
+
"metrics.all_conversions_from_order": "DOUBLE",
|
|
29
|
+
"metrics.all_conversions_from_other_engagement": "DOUBLE",
|
|
30
|
+
"metrics.all_conversions_from_store_visit": "DOUBLE",
|
|
31
|
+
"metrics.all_conversions_from_store_website": "DOUBLE",
|
|
32
|
+
"metrics.all_conversions_value": "DOUBLE",
|
|
33
|
+
"metrics.all_conversions_value_by_conversion_date": "DOUBLE",
|
|
34
|
+
"metrics.all_conversions_value_per_cost": "DOUBLE",
|
|
35
|
+
"metrics.all_new_customer_lifetime_value": "DOUBLE",
|
|
36
|
+
"metrics.asset_best_performance_cost_percentage": "DOUBLE",
|
|
37
|
+
"metrics.asset_best_performance_impression_percentage": "DOUBLE",
|
|
38
|
+
"metrics.asset_good_performance_cost_percentage": "DOUBLE",
|
|
39
|
+
"metrics.asset_good_performance_impression_percentage": "DOUBLE",
|
|
40
|
+
"metrics.asset_learning_performance_cost_percentage": "DOUBLE",
|
|
41
|
+
"metrics.asset_learning_performance_impression_percentage": "DOUBLE",
|
|
42
|
+
"metrics.asset_low_performance_cost_percentage": "DOUBLE",
|
|
43
|
+
"metrics.asset_low_performance_impression_percentage": "DOUBLE",
|
|
44
|
+
"metrics.asset_pinned_as_description_position_one_count": "INT64",
|
|
45
|
+
"metrics.asset_pinned_as_description_position_two_count": "INT64",
|
|
46
|
+
"metrics.asset_pinned_as_headline_position_one_count": "INT64",
|
|
47
|
+
"metrics.asset_pinned_as_headline_position_three_count": "INT64",
|
|
48
|
+
"metrics.asset_pinned_as_headline_position_two_count": "INT64",
|
|
49
|
+
"metrics.asset_pinned_total_count": "INT64",
|
|
50
|
+
"metrics.asset_unrated_performance_cost_percentage": "DOUBLE",
|
|
51
|
+
"metrics.asset_unrated_performance_impression_percentage": "DOUBLE",
|
|
52
|
+
"metrics.auction_insight_search_absolute_top_impression_percentage": "DOUBLE",
|
|
53
|
+
"metrics.auction_insight_search_impression_share": "DOUBLE",
|
|
54
|
+
"metrics.auction_insight_search_outranking_share": "DOUBLE",
|
|
55
|
+
"metrics.auction_insight_search_overlap_rate": "DOUBLE",
|
|
56
|
+
"metrics.auction_insight_search_position_above_rate": "DOUBLE",
|
|
57
|
+
"metrics.auction_insight_search_top_impression_percentage": "DOUBLE",
|
|
58
|
+
"metrics.average_cart_size": "DOUBLE",
|
|
59
|
+
"metrics.average_cost": "DOUBLE",
|
|
60
|
+
"metrics.average_cpc": "DOUBLE",
|
|
61
|
+
"metrics.average_cpe": "DOUBLE",
|
|
62
|
+
"metrics.average_cpm": "DOUBLE",
|
|
63
|
+
"metrics.average_cpv": "DOUBLE",
|
|
64
|
+
"metrics.average_impression_frequency_per_user": "DOUBLE",
|
|
65
|
+
"metrics.average_order_value_micros": "INT64",
|
|
66
|
+
"metrics.average_page_views": "DOUBLE",
|
|
67
|
+
"metrics.average_target_cpa_micros": "INT64",
|
|
68
|
+
"metrics.average_target_roas": "DOUBLE",
|
|
69
|
+
"metrics.average_time_on_site": "DOUBLE",
|
|
70
|
+
"metrics.benchmark_average_max_cpc": "DOUBLE",
|
|
71
|
+
"metrics.benchmark_ctr": "DOUBLE",
|
|
72
|
+
"metrics.biddable_app_install_conversions": "DOUBLE",
|
|
73
|
+
"metrics.biddable_app_post_install_conversions": "DOUBLE",
|
|
74
|
+
"metrics.bounce_rate": "DOUBLE",
|
|
75
|
+
"metrics.clicks": "INT64",
|
|
76
|
+
"metrics.combined_clicks": "INT64",
|
|
77
|
+
"metrics.combined_clicks_per_query": "DOUBLE",
|
|
78
|
+
"metrics.combined_queries": "INT64",
|
|
79
|
+
"metrics.content_budget_lost_impression_share": "DOUBLE",
|
|
80
|
+
"metrics.content_impression_share": "DOUBLE",
|
|
81
|
+
"metrics.content_rank_lost_impression_share": "DOUBLE",
|
|
82
|
+
"metrics.conversion_last_conversion_date": "DATE",
|
|
83
|
+
"metrics.conversion_last_received_request_date_time": "DATE",
|
|
84
|
+
"metrics.conversions": "DOUBLE",
|
|
85
|
+
"metrics.conversions_by_conversion_date": "DOUBLE",
|
|
86
|
+
"metrics.conversions_from_interactions_rate": "DOUBLE",
|
|
87
|
+
"metrics.conversions_from_interactions_value_per_interaction": "DOUBLE",
|
|
88
|
+
"metrics.conversions_value": "DOUBLE",
|
|
89
|
+
"metrics.conversions_value_by_conversion_date": "DOUBLE",
|
|
90
|
+
"metrics.conversions_value_per_cost": "DOUBLE",
|
|
91
|
+
"metrics.cost_micros": "INT64",
|
|
92
|
+
"metrics.cost_of_goods_sold_micros": "INT64",
|
|
93
|
+
"metrics.cost_per_all_conversions": "DOUBLE",
|
|
94
|
+
"metrics.cost_per_conversion": "DOUBLE",
|
|
95
|
+
"metrics.cost_per_current_model_attributed_conversion": "DOUBLE",
|
|
96
|
+
"metrics.cross_device_conversions": "DOUBLE",
|
|
97
|
+
"metrics.cross_device_conversions_value_micros": "INT64",
|
|
98
|
+
"metrics.cross_sell_cost_of_goods_sold_micros": "INT64",
|
|
99
|
+
"metrics.cross_sell_gross_profit_micros": "INT64",
|
|
100
|
+
"metrics.cross_sell_revenue_micros": "INT64",
|
|
101
|
+
"metrics.cross_sell_units_sold": "DOUBLE",
|
|
102
|
+
"metrics.ctr": "DOUBLE",
|
|
103
|
+
"metrics.current_model_attributed_conversions": "DOUBLE",
|
|
104
|
+
"metrics.current_model_attributed_conversions_from_interactions_rate": "DOUBLE",
|
|
105
|
+
"metrics.current_model_attributed_conversions_from_interactions_value_per_interaction": "DOUBLE",
|
|
106
|
+
"metrics.current_model_attributed_conversions_value": "DOUBLE",
|
|
107
|
+
"metrics.current_model_attributed_conversions_value_per_cost": "DOUBLE",
|
|
108
|
+
"metrics.eligible_impressions_from_location_asset_store_reach": "INT64",
|
|
109
|
+
"metrics.engagement_rate": "DOUBLE",
|
|
110
|
+
"metrics.engagements": "INT64",
|
|
111
|
+
"metrics.general_invalid_click_rate": "DOUBLE",
|
|
112
|
+
"metrics.general_invalid_clicks": "INT64",
|
|
113
|
+
"metrics.gmail_forwards": "INT64",
|
|
114
|
+
"metrics.gmail_saves": "INT64",
|
|
115
|
+
"metrics.gmail_secondary_clicks": "INT64",
|
|
116
|
+
"metrics.gross_profit_margin": "DOUBLE",
|
|
117
|
+
"metrics.gross_profit_micros": "INT64",
|
|
118
|
+
"metrics.historical_creative_quality_score": "ENUM",
|
|
119
|
+
"metrics.historical_landing_page_quality_score": "ENUM",
|
|
120
|
+
"metrics.historical_quality_score": "INT64",
|
|
121
|
+
"metrics.historical_search_predicted_ctr": "ENUM",
|
|
122
|
+
"metrics.hotel_average_lead_value_micros": "DOUBLE",
|
|
123
|
+
"metrics.hotel_commission_rate_micros": "INT64",
|
|
124
|
+
"metrics.hotel_eligible_impressions": "INT64",
|
|
125
|
+
"metrics.hotel_expected_commission_cost": "DOUBLE",
|
|
126
|
+
"metrics.hotel_price_difference_percentage": "DOUBLE",
|
|
127
|
+
"metrics.impressions": "INT64",
|
|
128
|
+
"metrics.impressions_from_store_reach": "INT64",
|
|
129
|
+
"metrics.interaction_event_types": "ENUM",
|
|
130
|
+
"metrics.interaction_rate": "DOUBLE",
|
|
131
|
+
"metrics.interactions": "INT64",
|
|
132
|
+
"metrics.invalid_click_rate": "DOUBLE",
|
|
133
|
+
"metrics.invalid_clicks": "INT64",
|
|
134
|
+
"metrics.lead_cost_of_goods_sold_micros": "INT64",
|
|
135
|
+
"metrics.lead_gross_profit_micros": "INT64",
|
|
136
|
+
"metrics.lead_revenue_micros": "INT64",
|
|
137
|
+
"metrics.lead_units_sold": "DOUBLE",
|
|
138
|
+
"metrics.linked_entities_count": "INT64",
|
|
139
|
+
"metrics.linked_sample_entities": "STRING",
|
|
140
|
+
"metrics.message_chat_rate": "DOUBLE",
|
|
141
|
+
"metrics.message_chats": "INT64",
|
|
142
|
+
"metrics.message_impressions": "INT64",
|
|
143
|
+
"metrics.mobile_friendly_clicks_percentage": "DOUBLE",
|
|
144
|
+
"metrics.new_customer_lifetime_value": "DOUBLE",
|
|
145
|
+
"metrics.optimization_score_uplift": "DOUBLE",
|
|
146
|
+
"metrics.optimization_score_url": "STRING",
|
|
147
|
+
"metrics.orders": "DOUBLE",
|
|
148
|
+
"metrics.organic_clicks": "INT64",
|
|
149
|
+
"metrics.organic_clicks_per_query": "DOUBLE",
|
|
150
|
+
"metrics.organic_impressions": "INT64",
|
|
151
|
+
"metrics.organic_impressions_per_query": "DOUBLE",
|
|
152
|
+
"metrics.organic_queries": "INT64",
|
|
153
|
+
"metrics.percent_new_visitors": "DOUBLE",
|
|
154
|
+
"metrics.phone_calls": "INT64",
|
|
155
|
+
"metrics.phone_impressions": "INT64",
|
|
156
|
+
"metrics.phone_through_rate": "DOUBLE",
|
|
157
|
+
"metrics.publisher_organic_clicks": "INT64",
|
|
158
|
+
"metrics.publisher_purchased_clicks": "INT64",
|
|
159
|
+
"metrics.publisher_unknown_clicks": "INT64",
|
|
160
|
+
"metrics.relative_ctr": "DOUBLE",
|
|
161
|
+
"metrics.results_conversions_purchase": "DOUBLE",
|
|
162
|
+
"metrics.revenue_micros": "INT64",
|
|
163
|
+
"metrics.sample_best_performance_entities": "STRING",
|
|
164
|
+
"metrics.sample_good_performance_entities": "STRING",
|
|
165
|
+
"metrics.sample_learning_performance_entities": "STRING",
|
|
166
|
+
"metrics.sample_low_performance_entities": "STRING",
|
|
167
|
+
"metrics.sample_unrated_performance_entities": "STRING",
|
|
168
|
+
"metrics.search_absolute_top_impression_share": "DOUBLE",
|
|
169
|
+
"metrics.search_budget_lost_absolute_top_impression_share": "DOUBLE",
|
|
170
|
+
"metrics.search_budget_lost_impression_share": "DOUBLE",
|
|
171
|
+
"metrics.search_budget_lost_top_impression_share": "DOUBLE",
|
|
172
|
+
"metrics.search_click_share": "DOUBLE",
|
|
173
|
+
"metrics.search_exact_match_impression_share": "DOUBLE",
|
|
174
|
+
"metrics.search_impression_share": "DOUBLE",
|
|
175
|
+
"metrics.search_rank_lost_absolute_top_impression_share": "DOUBLE",
|
|
176
|
+
"metrics.search_rank_lost_impression_share": "DOUBLE",
|
|
177
|
+
"metrics.search_rank_lost_top_impression_share": "DOUBLE",
|
|
178
|
+
"metrics.search_top_impression_share": "DOUBLE",
|
|
179
|
+
"metrics.search_volume": "MESSAGE",
|
|
180
|
+
"metrics.sk_ad_network_installs": "INT64",
|
|
181
|
+
"metrics.sk_ad_network_total_conversions": "INT64",
|
|
182
|
+
"metrics.speed_score": "INT64",
|
|
183
|
+
"metrics.store_visits_last_click_model_attributed_conversions": "DOUBLE",
|
|
184
|
+
"metrics.top_impression_percentage": "DOUBLE",
|
|
185
|
+
"metrics.unique_users": "INT64",
|
|
186
|
+
"metrics.units_sold": "DOUBLE",
|
|
187
|
+
"metrics.valid_accelerated_mobile_pages_clicks_percentage": "DOUBLE",
|
|
188
|
+
"metrics.value_per_all_conversions": "DOUBLE",
|
|
189
|
+
"metrics.value_per_all_conversions_by_conversion_date": "DOUBLE",
|
|
190
|
+
"metrics.value_per_conversion": "DOUBLE",
|
|
191
|
+
"metrics.value_per_conversions_by_conversion_date": "DOUBLE",
|
|
192
|
+
"metrics.value_per_current_model_attributed_conversion": "DOUBLE",
|
|
193
|
+
"metrics.video_quartile_p100_rate": "DOUBLE",
|
|
194
|
+
"metrics.video_quartile_p25_rate": "DOUBLE",
|
|
195
|
+
"metrics.video_quartile_p50_rate": "DOUBLE",
|
|
196
|
+
"metrics.video_quartile_p75_rate": "DOUBLE",
|
|
197
|
+
"metrics.video_view_rate": "DOUBLE",
|
|
198
|
+
"metrics.video_view_rate_in_feed": "DOUBLE",
|
|
199
|
+
"metrics.video_view_rate_in_stream": "DOUBLE",
|
|
200
|
+
"metrics.video_view_rate_shorts": "DOUBLE",
|
|
201
|
+
"metrics.video_views": "INT64",
|
|
202
|
+
"metrics.view_through_conversions": "INT64",
|
|
203
|
+
"metrics.view_through_conversions_from_location_asset_click_to_call": "DOUBLE",
|
|
204
|
+
"metrics.view_through_conversions_from_location_asset_directions": "DOUBLE",
|
|
205
|
+
"metrics.view_through_conversions_from_location_asset_menu": "DOUBLE",
|
|
206
|
+
"metrics.view_through_conversions_from_location_asset_order": "DOUBLE",
|
|
207
|
+
"metrics.view_through_conversions_from_location_asset_other_engagement": "DOUBLE",
|
|
208
|
+
"metrics.view_through_conversions_from_location_asset_store_visits": "DOUBLE",
|
|
209
|
+
"metrics.view_through_conversions_from_location_asset_website": "DOUBLE",
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
METRIC_TO_DLT_TYPE = {
|
|
213
|
+
"INT64": "bigint",
|
|
214
|
+
"DOUBLE": "double",
|
|
215
|
+
"STRING": "text",
|
|
216
|
+
"ENUM": "text",
|
|
217
|
+
# TODO: support message types
|
|
218
|
+
# "MESSAGE": "string",
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def dlt_metrics_schema(metrics: List[str]):
|
|
223
|
+
"""
|
|
224
|
+
Returns a dictionary with only the metrics that are
|
|
225
|
+
present in the given list of metrics.
|
|
226
|
+
"""
|
|
227
|
+
schema = {}
|
|
228
|
+
for metric in metrics:
|
|
229
|
+
typ = METRICS_SCHEMA.get(metric)
|
|
230
|
+
if typ is None:
|
|
231
|
+
raise ValueError(f"Unsupported metric {metric}")
|
|
232
|
+
|
|
233
|
+
if typ not in METRIC_TO_DLT_TYPE:
|
|
234
|
+
raise ValueError(f"Unsupported metric '{metric}' of type '{typ}'")
|
|
235
|
+
|
|
236
|
+
# ???: can we make these non-nullable?
|
|
237
|
+
schema[field.to_column(metric)] = {
|
|
238
|
+
"data_type": METRIC_TO_DLT_TYPE[typ],
|
|
239
|
+
}
|
|
240
|
+
return schema
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from datetime import date, datetime, timezone
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def date_predicate(column: str, start_date: date, end_date: Optional[date]) -> str:
|
|
6
|
+
"""
|
|
7
|
+
Generates a date predicate for the WHERE clause of a
|
|
8
|
+
GAQL query.
|
|
9
|
+
"""
|
|
10
|
+
if start_date is None:
|
|
11
|
+
raise ValueError("start_date must be provided")
|
|
12
|
+
|
|
13
|
+
if end_date is None:
|
|
14
|
+
end_date = datetime.now(tz=timezone.utc).date()
|
|
15
|
+
|
|
16
|
+
clauses = []
|
|
17
|
+
if start_date is not None:
|
|
18
|
+
clauses.append(f"""{column} >= '{start_date.strftime("%Y-%m-%d")}'""")
|
|
19
|
+
|
|
20
|
+
if end_date is not None:
|
|
21
|
+
clauses.append(f"""{column} <= '{end_date.strftime("%Y-%m-%d")}'""")
|
|
22
|
+
|
|
23
|
+
return " AND ".join(clauses)
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
|
|
3
|
+
from . import field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Report:
|
|
7
|
+
resource: str
|
|
8
|
+
unfilterable: bool
|
|
9
|
+
dimensions: List[str]
|
|
10
|
+
metrics: List[str]
|
|
11
|
+
segments: List[str]
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
resource: str = "",
|
|
16
|
+
dimensions: List[str] = [],
|
|
17
|
+
metrics: List[str] = [],
|
|
18
|
+
segments: List[str] = [],
|
|
19
|
+
unfilterable: bool = False,
|
|
20
|
+
):
|
|
21
|
+
self.resource = resource
|
|
22
|
+
self.dimensions = dimensions
|
|
23
|
+
self.metrics = metrics
|
|
24
|
+
self.segments = segments
|
|
25
|
+
self.unfilterable = unfilterable
|
|
26
|
+
|
|
27
|
+
def primary_keys(self) -> List[str]:
|
|
28
|
+
return [field.to_column(k) for k in self.dimensions + self.segments]
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def from_spec(cls, spec: str):
|
|
32
|
+
"""
|
|
33
|
+
Parse a report specification string into a Report object.
|
|
34
|
+
The expected format is:
|
|
35
|
+
custom:{resource}:{dimensions}:{metrics}
|
|
36
|
+
|
|
37
|
+
Example:
|
|
38
|
+
custom:ad_group_ad_asset_view:ad_group.id,campaign.id:clicks,conversions
|
|
39
|
+
"""
|
|
40
|
+
if spec.count(":") != 3:
|
|
41
|
+
raise ValueError(
|
|
42
|
+
"Invalid report specification format. Expected daily:{resource}:{dimensions}:{metrics}"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
_, resource, dimensions, metrics = spec.split(":")
|
|
46
|
+
|
|
47
|
+
report = cls()
|
|
48
|
+
report.segments = ["segments.date"]
|
|
49
|
+
report.resource = resource
|
|
50
|
+
if dimensions.strip() != "":
|
|
51
|
+
report.dimensions = [
|
|
52
|
+
d for d in map(cls._parse_dimension, dimensions.split(","))
|
|
53
|
+
]
|
|
54
|
+
if metrics.strip() != "":
|
|
55
|
+
report.metrics = [m for m in map(cls._parse_metric, metrics.split(","))]
|
|
56
|
+
return report
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def _parse_dimension(self, dim: str):
|
|
60
|
+
dim = dim.strip()
|
|
61
|
+
if dim.count(".") == 0:
|
|
62
|
+
raise ValueError("Invalid dimension format. Expected {resource}.{field}")
|
|
63
|
+
if dim.startswith("segments."):
|
|
64
|
+
raise ValueError(
|
|
65
|
+
"Invalid dimension format. Segments are not allowed in dimensions."
|
|
66
|
+
)
|
|
67
|
+
return dim
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def _parse_metric(self, metric: str):
|
|
71
|
+
metric = metric.strip()
|
|
72
|
+
if not metric.startswith("metrics."):
|
|
73
|
+
metric = f"metrics.{metric.strip()}"
|
|
74
|
+
return metric
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
BUILTIN_REPORTS: Dict[str, Report] = {
|
|
78
|
+
"account_report_daily": Report(
|
|
79
|
+
resource="campaign",
|
|
80
|
+
dimensions=[
|
|
81
|
+
"customer.id",
|
|
82
|
+
],
|
|
83
|
+
metrics=[
|
|
84
|
+
"metrics.active_view_impressions",
|
|
85
|
+
"metrics.active_view_measurability",
|
|
86
|
+
"metrics.active_view_measurable_cost_micros",
|
|
87
|
+
"metrics.active_view_measurable_impressions",
|
|
88
|
+
"metrics.active_view_viewability",
|
|
89
|
+
"metrics.clicks",
|
|
90
|
+
"metrics.conversions",
|
|
91
|
+
"metrics.conversions_value",
|
|
92
|
+
"metrics.cost_micros",
|
|
93
|
+
"metrics.impressions",
|
|
94
|
+
"metrics.interactions",
|
|
95
|
+
"metrics.interaction_event_types",
|
|
96
|
+
"metrics.view_through_conversions",
|
|
97
|
+
],
|
|
98
|
+
segments=[
|
|
99
|
+
"segments.date",
|
|
100
|
+
"segments.ad_network_type",
|
|
101
|
+
"segments.device",
|
|
102
|
+
],
|
|
103
|
+
),
|
|
104
|
+
"campaign_report_daily": Report(
|
|
105
|
+
resource="campaign",
|
|
106
|
+
dimensions=[
|
|
107
|
+
"campaign.id",
|
|
108
|
+
"customer.id",
|
|
109
|
+
],
|
|
110
|
+
metrics=[
|
|
111
|
+
"metrics.active_view_impressions",
|
|
112
|
+
"metrics.active_view_measurability",
|
|
113
|
+
"metrics.active_view_measurable_cost_micros",
|
|
114
|
+
"metrics.active_view_measurable_impressions",
|
|
115
|
+
"metrics.active_view_viewability",
|
|
116
|
+
"metrics.clicks",
|
|
117
|
+
"metrics.conversions",
|
|
118
|
+
"metrics.conversions_value",
|
|
119
|
+
"metrics.cost_micros",
|
|
120
|
+
"metrics.impressions",
|
|
121
|
+
"metrics.interactions",
|
|
122
|
+
"metrics.interaction_event_types",
|
|
123
|
+
"metrics.view_through_conversions",
|
|
124
|
+
],
|
|
125
|
+
segments=[
|
|
126
|
+
"segments.date",
|
|
127
|
+
"segments.ad_network_type",
|
|
128
|
+
"segments.device",
|
|
129
|
+
],
|
|
130
|
+
),
|
|
131
|
+
"ad_group_report_daily": Report(
|
|
132
|
+
resource="ad_group",
|
|
133
|
+
dimensions=[
|
|
134
|
+
"ad_group.id",
|
|
135
|
+
"customer.id",
|
|
136
|
+
"campaign.id",
|
|
137
|
+
],
|
|
138
|
+
metrics=[
|
|
139
|
+
"metrics.active_view_impressions",
|
|
140
|
+
"metrics.active_view_measurability",
|
|
141
|
+
"metrics.active_view_measurable_cost_micros",
|
|
142
|
+
"metrics.active_view_measurable_impressions",
|
|
143
|
+
"metrics.active_view_viewability",
|
|
144
|
+
"metrics.clicks",
|
|
145
|
+
"metrics.conversions",
|
|
146
|
+
"metrics.conversions_value",
|
|
147
|
+
"metrics.cost_micros",
|
|
148
|
+
"metrics.impressions",
|
|
149
|
+
"metrics.interactions",
|
|
150
|
+
"metrics.interaction_event_types",
|
|
151
|
+
"metrics.view_through_conversions",
|
|
152
|
+
],
|
|
153
|
+
segments=[
|
|
154
|
+
"segments.date",
|
|
155
|
+
"segments.ad_network_type",
|
|
156
|
+
"segments.device",
|
|
157
|
+
],
|
|
158
|
+
),
|
|
159
|
+
"ad_report_daily": Report(
|
|
160
|
+
resource="ad_group_ad",
|
|
161
|
+
dimensions=[
|
|
162
|
+
"ad_group.id",
|
|
163
|
+
"ad_group_ad.ad.id",
|
|
164
|
+
"customer.id",
|
|
165
|
+
"campaign.id",
|
|
166
|
+
],
|
|
167
|
+
segments=[
|
|
168
|
+
"segments.date",
|
|
169
|
+
"segments.ad_network_type",
|
|
170
|
+
"segments.device",
|
|
171
|
+
],
|
|
172
|
+
metrics=[
|
|
173
|
+
"metrics.active_view_impressions",
|
|
174
|
+
"metrics.active_view_measurability",
|
|
175
|
+
"metrics.active_view_measurable_cost_micros",
|
|
176
|
+
"metrics.active_view_measurable_impressions",
|
|
177
|
+
"metrics.active_view_viewability",
|
|
178
|
+
"metrics.clicks",
|
|
179
|
+
"metrics.conversions",
|
|
180
|
+
"metrics.conversions_value",
|
|
181
|
+
"metrics.cost_micros",
|
|
182
|
+
"metrics.impressions",
|
|
183
|
+
"metrics.interactions",
|
|
184
|
+
"metrics.interaction_event_types",
|
|
185
|
+
"metrics.view_through_conversions",
|
|
186
|
+
],
|
|
187
|
+
),
|
|
188
|
+
"audience_report_daily": Report(
|
|
189
|
+
resource="ad_group_audience_view",
|
|
190
|
+
dimensions=[
|
|
191
|
+
"ad_group.id",
|
|
192
|
+
"customer.id",
|
|
193
|
+
"campaign.id",
|
|
194
|
+
"ad_group_criterion.criterion_id",
|
|
195
|
+
],
|
|
196
|
+
segments=[
|
|
197
|
+
"segments.date",
|
|
198
|
+
"segments.ad_network_type",
|
|
199
|
+
"segments.device",
|
|
200
|
+
],
|
|
201
|
+
metrics=[
|
|
202
|
+
"metrics.active_view_impressions",
|
|
203
|
+
"metrics.active_view_measurability",
|
|
204
|
+
"metrics.active_view_measurable_cost_micros",
|
|
205
|
+
"metrics.active_view_measurable_impressions",
|
|
206
|
+
"metrics.active_view_viewability",
|
|
207
|
+
"metrics.clicks",
|
|
208
|
+
"metrics.conversions",
|
|
209
|
+
"metrics.conversions_value",
|
|
210
|
+
"metrics.cost_micros",
|
|
211
|
+
"metrics.impressions",
|
|
212
|
+
"metrics.interactions",
|
|
213
|
+
"metrics.interaction_event_types",
|
|
214
|
+
"metrics.view_through_conversions",
|
|
215
|
+
],
|
|
216
|
+
),
|
|
217
|
+
"keyword_report_daily": Report(
|
|
218
|
+
resource="keyword_view",
|
|
219
|
+
dimensions=[
|
|
220
|
+
"ad_group.id",
|
|
221
|
+
"customer.id",
|
|
222
|
+
"campaign.id",
|
|
223
|
+
"ad_group_criterion.criterion_id",
|
|
224
|
+
],
|
|
225
|
+
segments=[
|
|
226
|
+
"segments.date",
|
|
227
|
+
"segments.ad_network_type",
|
|
228
|
+
"segments.device",
|
|
229
|
+
],
|
|
230
|
+
metrics=[
|
|
231
|
+
"metrics.active_view_impressions",
|
|
232
|
+
"metrics.active_view_measurability",
|
|
233
|
+
"metrics.active_view_measurable_cost_micros",
|
|
234
|
+
"metrics.active_view_measurable_impressions",
|
|
235
|
+
"metrics.active_view_viewability",
|
|
236
|
+
"metrics.clicks",
|
|
237
|
+
"metrics.conversions",
|
|
238
|
+
"metrics.conversions_value",
|
|
239
|
+
"metrics.cost_micros",
|
|
240
|
+
"metrics.impressions",
|
|
241
|
+
"metrics.interactions",
|
|
242
|
+
"metrics.interaction_event_types",
|
|
243
|
+
"metrics.view_through_conversions",
|
|
244
|
+
],
|
|
245
|
+
),
|
|
246
|
+
"click_report_daily": Report(
|
|
247
|
+
resource="click_view",
|
|
248
|
+
dimensions=[
|
|
249
|
+
"click_view.gclid",
|
|
250
|
+
"customer.id",
|
|
251
|
+
"ad_group.id",
|
|
252
|
+
"campaign.id",
|
|
253
|
+
"segments.date",
|
|
254
|
+
],
|
|
255
|
+
metrics=[
|
|
256
|
+
"metrics.clicks",
|
|
257
|
+
],
|
|
258
|
+
),
|
|
259
|
+
"landing_page_report_daily": Report(
|
|
260
|
+
resource="landing_page_view",
|
|
261
|
+
dimensions=[
|
|
262
|
+
"landing_page_view.unexpanded_final_url",
|
|
263
|
+
"landing_page_view.resource_name",
|
|
264
|
+
"customer.id",
|
|
265
|
+
"ad_group.id",
|
|
266
|
+
"campaign.id",
|
|
267
|
+
"segments.date",
|
|
268
|
+
],
|
|
269
|
+
metrics=[
|
|
270
|
+
"metrics.average_cpc",
|
|
271
|
+
"metrics.clicks",
|
|
272
|
+
"metrics.cost_micros",
|
|
273
|
+
"metrics.ctr",
|
|
274
|
+
"metrics.impressions",
|
|
275
|
+
"metrics.mobile_friendly_clicks_percentage",
|
|
276
|
+
"metrics.speed_score",
|
|
277
|
+
"metrics.valid_accelerated_mobile_pages_clicks_percentage",
|
|
278
|
+
],
|
|
279
|
+
),
|
|
280
|
+
"search_keyword_report_daily": Report(
|
|
281
|
+
resource="keyword_view",
|
|
282
|
+
dimensions=[
|
|
283
|
+
"customer.id",
|
|
284
|
+
"ad_group.id",
|
|
285
|
+
"campaign.id",
|
|
286
|
+
"keyword_view.resource_name",
|
|
287
|
+
"ad_group_criterion.criterion_id",
|
|
288
|
+
"segments.date",
|
|
289
|
+
],
|
|
290
|
+
metrics=[
|
|
291
|
+
"metrics.absolute_top_impression_percentage",
|
|
292
|
+
"metrics.average_cpc",
|
|
293
|
+
"metrics.average_cpm",
|
|
294
|
+
"metrics.clicks",
|
|
295
|
+
"metrics.conversions_from_interactions_rate",
|
|
296
|
+
"metrics.conversions_value",
|
|
297
|
+
"metrics.cost_micros",
|
|
298
|
+
"metrics.ctr",
|
|
299
|
+
"metrics.impressions",
|
|
300
|
+
"metrics.top_impression_percentage",
|
|
301
|
+
"metrics.view_through_conversions",
|
|
302
|
+
],
|
|
303
|
+
),
|
|
304
|
+
"search_term_report_daily": Report(
|
|
305
|
+
resource="search_term_view",
|
|
306
|
+
dimensions=[
|
|
307
|
+
"customer.id",
|
|
308
|
+
"ad_group.id",
|
|
309
|
+
"campaign.id",
|
|
310
|
+
"search_term_view.resource_name",
|
|
311
|
+
"search_term_view.search_term",
|
|
312
|
+
"search_term_view.status",
|
|
313
|
+
"segments.date",
|
|
314
|
+
],
|
|
315
|
+
segments=[
|
|
316
|
+
"segments.search_term_match_type",
|
|
317
|
+
],
|
|
318
|
+
metrics=[
|
|
319
|
+
"metrics.absolute_top_impression_percentage",
|
|
320
|
+
"metrics.average_cpc",
|
|
321
|
+
"metrics.clicks",
|
|
322
|
+
"metrics.conversions",
|
|
323
|
+
"metrics.conversions_from_interactions_rate",
|
|
324
|
+
"metrics.conversions_from_interactions_value_per_interaction",
|
|
325
|
+
"metrics.cost_micros",
|
|
326
|
+
"metrics.ctr",
|
|
327
|
+
"metrics.impressions",
|
|
328
|
+
"metrics.top_impression_percentage",
|
|
329
|
+
"metrics.view_through_conversions",
|
|
330
|
+
],
|
|
331
|
+
),
|
|
332
|
+
"lead_form_submission_data_report_daily": Report(
|
|
333
|
+
resource="lead_form_submission_data",
|
|
334
|
+
dimensions=[
|
|
335
|
+
"lead_form_submission_data.gclid",
|
|
336
|
+
"lead_form_submission_data.submission_date_time",
|
|
337
|
+
"lead_form_submission_data.lead_form_submission_fields",
|
|
338
|
+
"lead_form_submission_data.custom_lead_form_submission_fields",
|
|
339
|
+
"lead_form_submission_data.resource_name",
|
|
340
|
+
"customer.id",
|
|
341
|
+
"ad_group_ad.ad.id",
|
|
342
|
+
"ad_group.id",
|
|
343
|
+
"campaign.id",
|
|
344
|
+
],
|
|
345
|
+
unfilterable=True,
|
|
346
|
+
),
|
|
347
|
+
"local_services_lead_report_daily": Report(
|
|
348
|
+
resource="local_services_lead",
|
|
349
|
+
dimensions=[
|
|
350
|
+
"customer.id",
|
|
351
|
+
"local_services_lead.creation_date_time",
|
|
352
|
+
"local_services_lead.contact_details",
|
|
353
|
+
"local_services_lead.credit_details.credit_state",
|
|
354
|
+
"local_services_lead.credit_details.credit_state_last_update_date_time",
|
|
355
|
+
"local_services_lead.lead_charged",
|
|
356
|
+
"local_services_lead.lead_status",
|
|
357
|
+
"local_services_lead.lead_type",
|
|
358
|
+
"local_services_lead.locale",
|
|
359
|
+
"local_services_lead.note.description",
|
|
360
|
+
"local_services_lead.note.edit_date_time",
|
|
361
|
+
"local_services_lead.service_id",
|
|
362
|
+
],
|
|
363
|
+
unfilterable=True,
|
|
364
|
+
),
|
|
365
|
+
"local_services_lead_conversations_report_daily": Report(
|
|
366
|
+
resource="local_services_lead_conversation",
|
|
367
|
+
dimensions=[
|
|
368
|
+
"customer.id",
|
|
369
|
+
"local_services_lead_conversation.id",
|
|
370
|
+
"local_services_lead_conversation.event_date_time",
|
|
371
|
+
"local_services_lead_conversation.conversation_channel",
|
|
372
|
+
"local_services_lead_conversation.message_details.attachment_urls",
|
|
373
|
+
"local_services_lead_conversation.message_details.text",
|
|
374
|
+
"local_services_lead_conversation.participant_type",
|
|
375
|
+
"local_services_lead_conversation.phone_call_details.call_duration_millis",
|
|
376
|
+
"local_services_lead_conversation.phone_call_details.call_recording_url",
|
|
377
|
+
],
|
|
378
|
+
unfilterable=True,
|
|
379
|
+
),
|
|
380
|
+
}
|
ingestr/src/sources.py
CHANGED
|
@@ -3,7 +3,8 @@ import csv
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
5
|
import re
|
|
6
|
-
|
|
6
|
+
import tempfile
|
|
7
|
+
from datetime import date, datetime, timedelta, timezone
|
|
7
8
|
from typing import (
|
|
8
9
|
Any,
|
|
9
10
|
Callable,
|
|
@@ -18,8 +19,8 @@ from urllib.parse import ParseResult, parse_qs, quote, urlparse
|
|
|
18
19
|
|
|
19
20
|
import dlt
|
|
20
21
|
import gcsfs # type: ignore
|
|
21
|
-
import s3fs # type: ignore
|
|
22
22
|
import pendulum
|
|
23
|
+
import s3fs # type: ignore
|
|
23
24
|
from dlt.common.configuration.specs import (
|
|
24
25
|
AwsCredentials,
|
|
25
26
|
)
|
|
@@ -41,9 +42,11 @@ from dlt.sources.sql_database.schema_types import (
|
|
|
41
42
|
Table,
|
|
42
43
|
TTypeAdapter,
|
|
43
44
|
)
|
|
45
|
+
from google.ads.googleads.client import GoogleAdsClient # type: ignore
|
|
44
46
|
from sqlalchemy import Column
|
|
45
47
|
from sqlalchemy import types as sa
|
|
46
48
|
|
|
49
|
+
from ingestr.src import blob
|
|
47
50
|
from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
|
|
48
51
|
from ingestr.src.adjust.adjust_helpers import parse_filters
|
|
49
52
|
from ingestr.src.airtable import airtable_source
|
|
@@ -55,6 +58,7 @@ from ingestr.src.asana_source import asana_source
|
|
|
55
58
|
from ingestr.src.chess import source
|
|
56
59
|
from ingestr.src.dynamodb import dynamodb
|
|
57
60
|
from ingestr.src.errors import (
|
|
61
|
+
InvalidBlobTableError,
|
|
58
62
|
MissingValueError,
|
|
59
63
|
UnsupportedResourceError,
|
|
60
64
|
)
|
|
@@ -62,6 +66,7 @@ from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_sour
|
|
|
62
66
|
from ingestr.src.filesystem import readers
|
|
63
67
|
from ingestr.src.filters import table_adapter_exclude_columns
|
|
64
68
|
from ingestr.src.github import github_reactions, github_repo_events, github_stargazers
|
|
69
|
+
from ingestr.src.google_ads import google_ads
|
|
65
70
|
from ingestr.src.google_analytics import google_analytics
|
|
66
71
|
from ingestr.src.google_sheets import google_spreadsheet
|
|
67
72
|
from ingestr.src.gorgias import gorgias_source
|
|
@@ -1095,16 +1100,11 @@ class S3Source:
|
|
|
1095
1100
|
if not secret_access_key:
|
|
1096
1101
|
raise ValueError("secret_access_key is required to connect to S3")
|
|
1097
1102
|
|
|
1098
|
-
bucket_name = parsed_uri
|
|
1099
|
-
if not bucket_name:
|
|
1100
|
-
raise
|
|
1101
|
-
"Invalid S3 URI: The bucket name is missing. Ensure your S3 URI follows the format 's3://bucket-name"
|
|
1102
|
-
)
|
|
1103
|
-
bucket_url = f"s3://{bucket_name}"
|
|
1103
|
+
bucket_name, path_to_file = blob.parse_uri(parsed_uri, table)
|
|
1104
|
+
if not bucket_name or not path_to_file:
|
|
1105
|
+
raise InvalidBlobTableError("S3")
|
|
1104
1106
|
|
|
1105
|
-
|
|
1106
|
-
if not path_to_file:
|
|
1107
|
-
raise ValueError("--source-table must be specified")
|
|
1107
|
+
bucket_url = f"s3://{bucket_name}/"
|
|
1108
1108
|
|
|
1109
1109
|
fs = s3fs.S3FileSystem(
|
|
1110
1110
|
key=access_key_id[0],
|
|
@@ -1123,9 +1123,7 @@ class S3Source:
|
|
|
1123
1123
|
"S3 Source only supports specific formats files: csv, jsonl, parquet"
|
|
1124
1124
|
)
|
|
1125
1125
|
|
|
1126
|
-
return readers(
|
|
1127
|
-
bucket_url, fs, path_to_file
|
|
1128
|
-
).with_resources(endpoint)
|
|
1126
|
+
return readers(bucket_url, fs, path_to_file).with_resources(endpoint)
|
|
1129
1127
|
|
|
1130
1128
|
|
|
1131
1129
|
class TikTokSource:
|
|
@@ -1332,6 +1330,7 @@ class DynamoDBSource:
|
|
|
1332
1330
|
range_start="closed",
|
|
1333
1331
|
)
|
|
1334
1332
|
|
|
1333
|
+
# bug: we never validate table.
|
|
1335
1334
|
return dynamodb(table, creds, incremental)
|
|
1336
1335
|
|
|
1337
1336
|
|
|
@@ -1522,6 +1521,13 @@ class GCSSource:
|
|
|
1522
1521
|
|
|
1523
1522
|
parsed_uri = urlparse(uri)
|
|
1524
1523
|
params = parse_qs(parsed_uri.query)
|
|
1524
|
+
|
|
1525
|
+
bucket_name, path_to_file = blob.parse_uri(parsed_uri, table)
|
|
1526
|
+
if not bucket_name or not path_to_file:
|
|
1527
|
+
raise InvalidBlobTableError("GCS")
|
|
1528
|
+
|
|
1529
|
+
bucket_url = f"gs://{bucket_name}"
|
|
1530
|
+
|
|
1525
1531
|
credentials_path = params.get("credentials_path")
|
|
1526
1532
|
credentials_base64 = params.get("credentials_base64")
|
|
1527
1533
|
credentials_available = any(
|
|
@@ -1533,17 +1539,6 @@ class GCSSource:
|
|
|
1533
1539
|
if credentials_available is False:
|
|
1534
1540
|
raise MissingValueError("credentials_path or credentials_base64", "GCS")
|
|
1535
1541
|
|
|
1536
|
-
bucket_name = parsed_uri.hostname
|
|
1537
|
-
if not bucket_name:
|
|
1538
|
-
raise ValueError(
|
|
1539
|
-
"Invalid GCS URI: The bucket name is missing. Ensure your GCS URI follows the format 'gs://bucket-name/path/to/file"
|
|
1540
|
-
)
|
|
1541
|
-
bucket_url = f"gs://{bucket_name}/"
|
|
1542
|
-
|
|
1543
|
-
path_to_file = parsed_uri.path.lstrip("/") or table.lstrip("/")
|
|
1544
|
-
if not path_to_file:
|
|
1545
|
-
raise ValueError("--source-table must be specified")
|
|
1546
|
-
|
|
1547
1542
|
credentials = None
|
|
1548
1543
|
if credentials_path:
|
|
1549
1544
|
credentials = credentials_path[0]
|
|
@@ -1571,9 +1566,99 @@ class GCSSource:
|
|
|
1571
1566
|
"GCS Source only supports specific formats files: csv, jsonl, parquet"
|
|
1572
1567
|
)
|
|
1573
1568
|
|
|
1574
|
-
return readers(
|
|
1575
|
-
|
|
1576
|
-
|
|
1569
|
+
return readers(bucket_url, fs, path_to_file).with_resources(endpoint)
|
|
1570
|
+
|
|
1571
|
+
class GoogleAdsSource:
|
|
1572
|
+
def handles_incrementality(self) -> bool:
|
|
1573
|
+
return True
|
|
1574
|
+
|
|
1575
|
+
def init_client(self, params: Dict[str, List[str]]) -> GoogleAdsClient:
|
|
1576
|
+
dev_token = params.get("dev_token")
|
|
1577
|
+
if dev_token is None or len(dev_token) == 0:
|
|
1578
|
+
raise MissingValueError("dev_token", "Google Ads")
|
|
1579
|
+
|
|
1580
|
+
credentials_path = params.get("credentials_path")
|
|
1581
|
+
credentials_base64 = params.get("credentials_base64")
|
|
1582
|
+
credentials_available = any(
|
|
1583
|
+
map(
|
|
1584
|
+
lambda x: x is not None,
|
|
1585
|
+
[credentials_path, credentials_base64],
|
|
1586
|
+
)
|
|
1587
|
+
)
|
|
1588
|
+
if credentials_available is False:
|
|
1589
|
+
raise MissingValueError(
|
|
1590
|
+
"credentials_path or credentials_base64", "Google Ads"
|
|
1591
|
+
)
|
|
1592
|
+
|
|
1593
|
+
path = None
|
|
1594
|
+
fd = None
|
|
1595
|
+
if credentials_path:
|
|
1596
|
+
path = credentials_path[0]
|
|
1597
|
+
else:
|
|
1598
|
+
(fd, path) = tempfile.mkstemp(prefix="secret-")
|
|
1599
|
+
secret = base64.b64decode(credentials_base64[0]) # type: ignore
|
|
1600
|
+
os.write(fd, secret)
|
|
1601
|
+
os.close(fd)
|
|
1602
|
+
|
|
1603
|
+
conf = {
|
|
1604
|
+
"json_key_file_path": path,
|
|
1605
|
+
"use_proto_plus": True,
|
|
1606
|
+
"developer_token": dev_token[0],
|
|
1607
|
+
}
|
|
1608
|
+
try:
|
|
1609
|
+
client = GoogleAdsClient.load_from_dict(conf)
|
|
1610
|
+
finally:
|
|
1611
|
+
if fd is not None:
|
|
1612
|
+
os.remove(path)
|
|
1613
|
+
|
|
1614
|
+
return client
|
|
1615
|
+
|
|
1616
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1617
|
+
if kwargs.get("incremental_key") is not None:
|
|
1618
|
+
raise ValueError(
|
|
1619
|
+
"Google Ads takes care of incrementality on its own, you should not provide incremental_key"
|
|
1620
|
+
)
|
|
1621
|
+
|
|
1622
|
+
parsed_uri = urlparse(uri)
|
|
1623
|
+
|
|
1624
|
+
customer_id = parsed_uri.hostname
|
|
1625
|
+
if not customer_id:
|
|
1626
|
+
raise MissingValueError("customer_id", "Google Ads")
|
|
1627
|
+
|
|
1628
|
+
params = parse_qs(parsed_uri.query)
|
|
1629
|
+
client = self.init_client(params)
|
|
1630
|
+
|
|
1631
|
+
start_date = kwargs.get("interval_start") or datetime.now(
|
|
1632
|
+
tz=timezone.utc
|
|
1633
|
+
) - timedelta(days=30)
|
|
1634
|
+
end_date = kwargs.get("interval_end")
|
|
1635
|
+
|
|
1636
|
+
# most combinations of explict start/end dates are automatically handled.
|
|
1637
|
+
# however, in the scenario where only the end date is provided, we need to
|
|
1638
|
+
# calculate the start date based on the end date.
|
|
1639
|
+
if (
|
|
1640
|
+
kwargs.get("interval_end") is not None
|
|
1641
|
+
and kwargs.get("interval_start") is None
|
|
1642
|
+
):
|
|
1643
|
+
start_date = end_date - timedelta(days=30) # type: ignore
|
|
1644
|
+
|
|
1645
|
+
report_spec = None
|
|
1646
|
+
if table.startswith("daily:"):
|
|
1647
|
+
report_spec = table
|
|
1648
|
+
table = "daily_report"
|
|
1649
|
+
|
|
1650
|
+
src = google_ads(
|
|
1651
|
+
client,
|
|
1652
|
+
customer_id,
|
|
1653
|
+
report_spec,
|
|
1654
|
+
start_date=start_date,
|
|
1655
|
+
end_date=end_date,
|
|
1656
|
+
)
|
|
1657
|
+
|
|
1658
|
+
if table not in src.resources:
|
|
1659
|
+
raise UnsupportedResourceError(table, "Google Ads")
|
|
1660
|
+
|
|
1661
|
+
return src.with_resources(table)
|
|
1577
1662
|
|
|
1578
1663
|
|
|
1579
1664
|
class LinkedInAdsSource:
|
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.12.
|
|
1
|
+
__version__ = "0.12.11"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.11
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -22,7 +22,9 @@ Requires-Dist: dlt==1.5.0
|
|
|
22
22
|
Requires-Dist: duckdb-engine==0.13.5
|
|
23
23
|
Requires-Dist: duckdb==1.1.3
|
|
24
24
|
Requires-Dist: facebook-business==20.0.0
|
|
25
|
+
Requires-Dist: flatten-json==0.1.14
|
|
25
26
|
Requires-Dist: gcsfs==2024.10.0
|
|
27
|
+
Requires-Dist: google-ads==25.1.0
|
|
26
28
|
Requires-Dist: google-analytics-data==0.18.16
|
|
27
29
|
Requires-Dist: google-api-python-client==2.130.0
|
|
28
30
|
Requires-Dist: google-cloud-bigquery-storage==2.24.0
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
ingestr/main.py,sha256=fRWnyoPzMvvxTa61EIAP_dsKu0B_0yOwoyt0Slq9WQU,24723
|
|
2
2
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
|
+
ingestr/src/blob.py,sha256=XDk_XqmU_He4sQ1brY3ceoZgpq_ZBZihz1gHW9MzqUk,1381
|
|
3
4
|
ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
|
|
4
|
-
ingestr/src/errors.py,sha256=
|
|
5
|
-
ingestr/src/factory.py,sha256=
|
|
5
|
+
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
6
|
+
ingestr/src/factory.py,sha256=D__Oy029z6y2OsAUMGab5K5ZmYhRXxDbD_SDc21b9Eo,4746
|
|
6
7
|
ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
|
|
7
|
-
ingestr/src/sources.py,sha256=
|
|
8
|
+
ingestr/src/sources.py,sha256=jIq1qVj8_uOVbdrVuvs2uHkrLydd1i8XHMx5vhPVqAo,61682
|
|
8
9
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
9
10
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
10
|
-
ingestr/src/version.py,sha256=
|
|
11
|
+
ingestr/src/version.py,sha256=92OWM_xUUgc7wxFngCUAzVKFahsSWsF4UXOgDEn2uVI,24
|
|
11
12
|
ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
|
|
12
13
|
ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
|
|
13
14
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
@@ -37,6 +38,11 @@ ingestr/src/github/__init__.py,sha256=xVijF-Wi4p88hkVJnKH-oTixismjD3aUcGqGa6Wr4e
|
|
|
37
38
|
ingestr/src/github/helpers.py,sha256=Tmnik9811zBWNO6cJwV9PFQxEx2j32LHAQCvNbubsEI,6759
|
|
38
39
|
ingestr/src/github/queries.py,sha256=W34C02jUEdjFmOE7f7u9xvYyBNDMfVZAu0JIRZI2mkU,2302
|
|
39
40
|
ingestr/src/github/settings.py,sha256=N5ahWrDIQ_4IWV9i-hTXxyYduqY9Ym2BTwqsWxcDdJ8,258
|
|
41
|
+
ingestr/src/google_ads/__init__.py,sha256=bH0TtnRWcOUESezpvoA7VEUHAq_0ITGQeX4GGVBfl1I,3725
|
|
42
|
+
ingestr/src/google_ads/field.py,sha256=uc8KEaYQrwgQoQPUdxIQWZxpFeZHbiV98FM0ZSaelS0,69
|
|
43
|
+
ingestr/src/google_ads/metrics.py,sha256=tAqpBpm-8l95oPT9cBxMWaEoDTNHVXnqUphYDHWKDiE,12099
|
|
44
|
+
ingestr/src/google_ads/predicates.py,sha256=K4wTuqfmJ9ko1RKeHTBDfQO_mUADVyuRqtywBPP-72w,683
|
|
45
|
+
ingestr/src/google_ads/reports.py,sha256=AVY1pPt5yaIFskQe1k5VW2Dhlux3bzewsHlDrdGEems,12686
|
|
40
46
|
ingestr/src/google_analytics/__init__.py,sha256=8Evpmoy464YpNbCI_NmvFHIzWCu7J7SjJw-RrPZ6AL8,3674
|
|
41
47
|
ingestr/src/google_analytics/helpers.py,sha256=vLmFyQ_IEJEK5LlxBJQeJw0VHaE5gRRZdBa54U72CaQ,5965
|
|
42
48
|
ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
|
|
@@ -94,8 +100,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
94
100
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
95
101
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
96
102
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
97
|
-
ingestr-0.12.
|
|
98
|
-
ingestr-0.12.
|
|
99
|
-
ingestr-0.12.
|
|
100
|
-
ingestr-0.12.
|
|
101
|
-
ingestr-0.12.
|
|
103
|
+
ingestr-0.12.11.dist-info/METADATA,sha256=fxNa7pb3GLEvLuUjHSOviflBwIBJto0ck1PyQp893jU,8127
|
|
104
|
+
ingestr-0.12.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
105
|
+
ingestr-0.12.11.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
106
|
+
ingestr-0.12.11.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
107
|
+
ingestr-0.12.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|