airbyte-source-google-search-console 1.5.18__py3-none-any.whl → 1.6.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-google-search-console
3
- Version: 1.5.18
3
+ Version: 1.6.0rc1
4
4
  Summary: Source implementation for Google Search Console.
5
5
  License: Elv2
6
6
  Author: Airbyte
@@ -1,9 +1,10 @@
1
1
  source_google_search_console/__init__.py,sha256=HQCPu-CK7XmVDtP9rmTdB2XyraVCc6pv9pw38-O8y48,1191
2
+ source_google_search_console/components.py,sha256=5o8kH2xwYUvk3yjnSd6okJVF2KBUqUIF4V97xRpPAyI,2803
2
3
  source_google_search_console/config_migrations.py,sha256=Cl4SUdJpAf6wMM_vVhqjjU89NfUq9LIGJ9zNrWiBk-A,4235
3
4
  source_google_search_console/exceptions.py,sha256=iD3jYC4WxVCEKGsqQ7Vaj1tbjhJZ4S5mnSDnwFJdsIQ,1097
4
- source_google_search_console/run.py,sha256=q6O2iXoxtrdgtodCaanqTO2eKzUvXF2iDCfmCxaPE24,462
5
+ source_google_search_console/manifest.yaml,sha256=zoPWyJ7gwOOK6yKAk4H92cD-N7Xg3iHtp5hTT_e80QA,10974
6
+ source_google_search_console/run.py,sha256=TBkPlseTERarkj6wL8AMEKgm5Xsb2drnltPVH6257-M,2195
5
7
  source_google_search_console/schemas/search_analytics_all_fields.json,sha256=iQxRh_c_yz3uGofqpo1KX571TMmzYjKScb0PtI6SN_Q,1729
6
- source_google_search_console/schemas/search_analytics_by_country.json,sha256=xvUVjGRy63dsc7c0O-Kg7DUzybRpD-r_-VYhPmDBw_o,1491
7
8
  source_google_search_console/schemas/search_analytics_by_date.json,sha256=meCbWDayc1y0q-Lu-CAdjQVnsM8xZBX3BdF129UC1P8,1388
8
9
  source_google_search_console/schemas/search_analytics_by_device.json,sha256=VtoFjmmv9rx-uhSFaRn0wm4LeSxRIaexrxg2Spvbneo,1525
9
10
  source_google_search_console/schemas/search_analytics_by_page.json,sha256=KyUojZc4Lv3hPswxIJzUL5QDNsbvSugGjl_uHGF7Am4,1473
@@ -14,13 +15,11 @@ source_google_search_console/schemas/search_analytics_keyword_site_report_by_sit
14
15
  source_google_search_console/schemas/search_analytics_page_report.json,sha256=-b0Y0LenTchS0q9A2aQ4hIjUjXkYF8erOtyrTMhf6MM,1776
15
16
  source_google_search_console/schemas/search_analytics_site_report_by_page.json,sha256=hWKHkm1reqGGu1dNcWBe6_XkZ5tK-UaiymrYRVgxRxI,1515
16
17
  source_google_search_console/schemas/search_analytics_site_report_by_site.json,sha256=rAh6LuNy7nCrrNM9MTd0qxAVc886ecQaqWRgV63OfyA,1408
17
- source_google_search_console/schemas/sitemaps.json,sha256=coyPSZCAfzMheybfRp4WPAZCp5JF2KGRF2rWK8oC080,1775
18
- source_google_search_console/schemas/sites.json,sha256=WNiCRuStPL1YkJiFa8FEbNJmqaERAOf9Yow6ygIumvo,383
19
- source_google_search_console/service_account_authenticator.py,sha256=gjUxt0xFxj82uviCQNTsA1Jlee__UDhYNjE7bRO1G0U,1227
20
- source_google_search_console/source.py,sha256=2OpJDWhMNoRj9Q4p6Br3DMOrphqDb_6Ca0RWGAvtnrQ,10010
18
+ source_google_search_console/service_account_authenticator.py,sha256=pAWKAXfwfTY3xkXvQJH0EyFphFULdCIcC47YXYTO9X8,1307
19
+ source_google_search_console/source.py,sha256=7FD4ciRrsptU7ZIxAU2xLC37bgjKWzNkflE9ybmgpXM,9113
21
20
  source_google_search_console/spec.json,sha256=WYtFvaSqWYGm1Dt2yV9G92U78Q94rh9oarbxJe3H7xo,8470
22
- source_google_search_console/streams.py,sha256=WyClF4v6w5ZdYL9Ses_ITBzMNd6CQLaNKU64HtFMcfE,19920
23
- airbyte_source_google_search_console-1.5.18.dist-info/METADATA,sha256=bodtIwg1k3rP4507KfHBZoBQugAXclCMsKCuMAgpEjU,5622
24
- airbyte_source_google_search_console-1.5.18.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
25
- airbyte_source_google_search_console-1.5.18.dist-info/entry_points.txt,sha256=DMcgc9bCX-Vt6hm_68pa77qS3eGdeMhg-UdlFc-XKUM,85
26
- airbyte_source_google_search_console-1.5.18.dist-info/RECORD,,
21
+ source_google_search_console/streams.py,sha256=T0eqhmxGPDAfFNMXFfG_vM4aYHFaHAHHAoj6s5XnjnI,18760
22
+ airbyte_source_google_search_console-1.6.0rc1.dist-info/METADATA,sha256=smSxRuSxL5aXxK-OcjVGdpR_jLJN_-dMEtsKyMu5Oww,5624
23
+ airbyte_source_google_search_console-1.6.0rc1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
+ airbyte_source_google_search_console-1.6.0rc1.dist-info/entry_points.txt,sha256=DMcgc9bCX-Vt6hm_68pa77qS3eGdeMhg-UdlFc-XKUM,85
25
+ airbyte_source_google_search_console-1.6.0rc1.dist-info/RECORD,,
@@ -0,0 +1,81 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Mapping, Optional
7
+
8
+ from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
9
+
10
+
11
+ @dataclass
12
+ class NestedSubstreamStateMigration(StateMigration):
13
+ """
14
+ We require a custom state migration because SearchAnalytics streams contain two nested levels of
15
+ substreams. The existing LegacyToPerPartitionStateMigration only handles one level.
16
+
17
+ Legacy state format is as follows:
18
+ {
19
+ "date": "2025-05-28",
20
+ "https://www.example.com/": {
21
+ "web": {
22
+ "date": "2025-05-25"
23
+ },
24
+ "news": {
25
+ "date": "2023-05-22"
26
+ }
27
+ }
28
+ }
29
+
30
+ The resulting migrated per-partition state is:
31
+ {
32
+ "use_global_cursor": false,
33
+ "states": [
34
+ {
35
+ "partition": {
36
+ "search_type": "web",
37
+ "site_url": "https://www.example.com/"
38
+ },
39
+ "cursor": {
40
+ "date": "2025-05-25"
41
+ }
42
+ },
43
+ {
44
+ "partition": {
45
+ "search_type": "news",
46
+ "site_url": "https://www.example.com/"
47
+ },
48
+ "cursor": {
49
+ "date": "2023-05-22"
50
+ }
51
+ }],
52
+ "state": {
53
+ "date": "2025-05-25"
54
+ }
55
+ }
56
+ """
57
+
58
+ def should_migrate(self, stream_state: Mapping[str, Any]) -> bool:
59
+ return len(stream_state) > 0 and "states" not in stream_state
60
+
61
+ def migrate(self, stream_state: Mapping[str, Any]) -> Mapping[str, Any]:
62
+ global_state: Optional[Mapping[str, Any]] = None
63
+ per_partition_state = []
64
+ for site_url_key, search_type_state in stream_state.items():
65
+ if site_url_key == "date":
66
+ # The legacy state also contains a global cursor value under the `date` key which equates
67
+ # to global state.
68
+ #
69
+ # However, the Python implementation does not appear to be implemented
70
+ # correctly and simply saves the state of the last seen partition. Since I don't trust the
71
+ # legacy value and in the current implementation global state is applied to partitions
72
+ # without an existing value, I'm making a conscious choice to not migrate the global value.
73
+ continue
74
+ else:
75
+ site_url = site_url_key
76
+ for search_type_key, cursor in search_type_state.items():
77
+ per_partition_state.append({"partition": {"site_url": site_url, "search_type": search_type_key}, "cursor": cursor})
78
+ return {
79
+ "use_global_cursor": False,
80
+ "states": per_partition_state,
81
+ }
@@ -0,0 +1,341 @@
1
+ version: 6.44.0
2
+
3
+ type: DeclarativeSource
4
+
5
+ check:
6
+ type: CheckStream
7
+ stream_names:
8
+ - sites
9
+
10
+ definitions:
11
+ oauth_authenticator:
12
+ type: OAuthAuthenticator
13
+ client_id: "{{ config.get('authorization', {}).get('client_id') }}"
14
+ client_secret: "{{ config.get('authorization', {}).get('client_secret') }}"
15
+ refresh_token: "{{ config.get('authorization', {}).get('refresh_token') }}"
16
+ token_refresh_endpoint: "https://oauth2.googleapis.com/token"
17
+
18
+ jwt_profile_assertion_oauth_authenticator:
19
+ type: OAuthAuthenticator
20
+ token_refresh_endpoint: https://oauth2.googleapis.com/token
21
+ refresh_request_headers:
22
+ Content-Type: application/x-www-form-urlencoded
23
+ use_profile_assertion: true
24
+ profile_assertion:
25
+ type: JwtAuthenticator
26
+ secret_key: "{{ json_loads(config.get('authorization', {}).get('service_account_info', {})).get('private_key') }}"
27
+ algorithm: "RS256"
28
+ token_duration: 3600
29
+ jwt_payload:
30
+ aud: "{{ json_loads(config.get('authorization', {}).get('service_account_info', {})).get('token_uri') }}"
31
+ iss: "{{ json_loads(config.get('authorization', {}).get('service_account_info', {})).get('client_email') }}"
32
+ additional_jwt_payload:
33
+ scope: "https://www.googleapis.com/auth/webmasters.readonly"
34
+
35
+ selective_authenticator:
36
+ type: SelectiveAuthenticator
37
+ authenticator_selection_path: ["authorization", "auth_type"]
38
+ authenticators:
39
+ Client: "#/definitions/oauth_authenticator"
40
+ Service: "#/definitions/jwt_profile_assertion_oauth_authenticator"
41
+
42
+ search_analytics_by_country_stream:
43
+ type: DeclarativeStream
44
+ name: search_analytics_by_country
45
+ primary_key:
46
+ - site_url
47
+ - date
48
+ - country
49
+ - search_type
50
+ retriever:
51
+ type: SimpleRetriever
52
+ requester:
53
+ type: HttpRequester
54
+ url_base: https://www.googleapis.com/webmasters/v3
55
+ path: "/sites/{{ sanitize_url(stream_partition.get('site_url')) }}/searchAnalytics/query"
56
+ http_method: POST
57
+ authenticator: "#/definitions/selective_authenticator"
58
+ request_headers:
59
+ Content-Type: "application/json"
60
+ request_body_json:
61
+ startDate: "{{ stream_interval.get('start_time') }}"
62
+ endDate: "{{ stream_interval.get('end_time') }}"
63
+ dimensions: ["date", "country"]
64
+ type: "{{ stream_partition.get('search_type') }}"
65
+ aggregationType: auto
66
+ dataState: "{{ config.get('data_state', 'final') }}"
67
+ # Currently relying on the default error handler behavior. Two pieces of functionality not covered are
68
+ # - Silently skipping over 403 permissions errors and relying on partial success reporting
69
+ # - Retrying 400 errors with aggregation_type=auto instead of failing outright
70
+ paginator:
71
+ type: DefaultPaginator
72
+ page_token_option:
73
+ type: RequestOption
74
+ field_name: startRow
75
+ inject_into: body_json
76
+ page_size_option:
77
+ type: RequestOption
78
+ field_name: rowLimit
79
+ inject_into: body_json
80
+ pagination_strategy:
81
+ type: OffsetIncrement
82
+ page_size: 25000
83
+ inject_on_first_request: true
84
+ record_selector:
85
+ type: RecordSelector
86
+ extractor:
87
+ type: DpathExtractor
88
+ field_path:
89
+ - rows
90
+ partition_router:
91
+ - type: ListPartitionRouter
92
+ values: "{{ config['site_urls'] }}"
93
+ cursor_field: site_url
94
+ - type: ListPartitionRouter
95
+ values:
96
+ - web
97
+ - news
98
+ - image
99
+ - video
100
+ - discover
101
+ - googleNews
102
+ cursor_field: search_type
103
+ incremental_sync:
104
+ type: DatetimeBasedCursor
105
+ cursor_field: date
106
+ cursor_datetime_formats:
107
+ - "%Y-%m-%d"
108
+ datetime_format: "%Y-%m-%d"
109
+ start_datetime:
110
+ type: MinMaxDatetime
111
+ datetime: "{{ config.get('start_date', '2021-01-01') }}"
112
+ datetime_format: "%Y-%m-%d"
113
+ end_datetime:
114
+ type: MinMaxDatetime
115
+ datetime: "{{ config.get('end_date', today_utc()) }}"
116
+ datetime_format: "%Y-%m-%d"
117
+ step: P3D
118
+ cursor_granularity: P1D
119
+ transformations:
120
+ - type: AddFields
121
+ fields:
122
+ - path:
123
+ - site_url
124
+ value: "{{ stream_partition['site_url'] }}"
125
+ - path:
126
+ - search_type
127
+ value: "{{ stream_partition['search_type'] }}"
128
+ # The values in the 'keys' array in the record correspond to the same order that the dimensions
129
+ # are requested in the API request. For example, if the request body was `dimensions: ["date", "country"]`,
130
+ # then the first value of `keys` is placed under the `date` field. These arrays are always be the same length
131
+ # After extracting the keys, the `keys` array is removed from the record.
132
+ - type: AddFields
133
+ fields:
134
+ - path:
135
+ - date
136
+ value: "{{ record['keys'][0] }}"
137
+ - path:
138
+ - country
139
+ value: "{{ record['keys'][1] }}"
140
+ - type: RemoveFields
141
+ field_pointers:
142
+ - - keys
143
+ schema_loader:
144
+ type: InlineSchemaLoader
145
+ schema:
146
+ $ref: "#/schemas/search_analytics_by_country"
147
+ state_migrations:
148
+ - type: CustomStateMigration
149
+ class_name: source_google_search_console.components.NestedSubstreamStateMigration
150
+
151
+ sites_stream:
152
+ type: DeclarativeStream
153
+ name: sites
154
+ retriever:
155
+ type: SimpleRetriever
156
+ requester:
157
+ type: HttpRequester
158
+ url_base: https://www.googleapis.com/webmasters/v3
159
+ path: "/sites/{{ sanitize_url(stream_partition.get('site_url')) }}"
160
+ http_method: GET
161
+ authenticator: "#/definitions/selective_authenticator"
162
+ record_selector:
163
+ type: RecordSelector
164
+ extractor:
165
+ type: DpathExtractor
166
+ field_path: []
167
+ partition_router:
168
+ - type: ListPartitionRouter
169
+ values: "{{ config['site_urls'] }}"
170
+ cursor_field: site_url
171
+ schema_loader:
172
+ type: InlineSchemaLoader
173
+ schema:
174
+ $ref: "#/schemas/sites"
175
+
176
+ sitemaps_stream:
177
+ type: DeclarativeStream
178
+ name: sitemaps
179
+ retriever:
180
+ type: SimpleRetriever
181
+ requester:
182
+ type: HttpRequester
183
+ url_base: https://www.googleapis.com/webmasters/v3
184
+ path: "/sites/{{ sanitize_url(stream_partition.get('site_url')) }}/sitemaps"
185
+ http_method: GET
186
+ authenticator: "#/definitions/selective_authenticator"
187
+ record_selector:
188
+ type: RecordSelector
189
+ extractor:
190
+ type: DpathExtractor
191
+ field_path:
192
+ - "sitemap"
193
+ partition_router:
194
+ - type: ListPartitionRouter
195
+ values: "{{ config['site_urls'] }}"
196
+ cursor_field: site_url
197
+ schema_loader:
198
+ type: InlineSchemaLoader
199
+ schema:
200
+ $ref: "#/schemas/sitemaps"
201
+
202
+ streams:
203
+ - "#/definitions/search_analytics_by_country_stream"
204
+ - "#/definitions/sites_stream"
205
+ - "#/definitions/sitemaps_stream"
206
+
207
+ schemas:
208
+ search_analytics_by_country:
209
+ $schema: "http://json-schema.org/draft-07/schema#"
210
+ type: object
211
+ properties:
212
+ site_url:
213
+ description: The URL of the site for which the search analytics data is being reported.
214
+ type:
215
+ - "null"
216
+ - string
217
+ search_type:
218
+ description: >-
219
+ The type of search (web search, image search, video search, etc.) for
220
+ which the data is being reported.
221
+ type:
222
+ - "null"
223
+ - string
224
+ date:
225
+ description: The date for which the search analytics data is being reported.
226
+ type:
227
+ - "null"
228
+ - string
229
+ format: date
230
+ country:
231
+ description: The country for which the search analytics data is being reported.
232
+ type:
233
+ - "null"
234
+ - string
235
+ clicks:
236
+ description: >-
237
+ The number of times users clicked on the search result for a specific
238
+ country.
239
+ type:
240
+ - "null"
241
+ - integer
242
+ impressions:
243
+ description: >-
244
+ The total number of times a search result was shown in search results for
245
+ a specific country.
246
+ type:
247
+ - "null"
248
+ - integer
249
+ ctr:
250
+ description: >-
251
+ The click-through rate, i.e., the ratio of clicks to impressions for a
252
+ specific country.
253
+ type:
254
+ - "null"
255
+ - number
256
+ multipleOf: 1.e-25
257
+ position:
258
+ description: >-
259
+ The average position at which the site's search result appeared for a
260
+ specific country.
261
+ type:
262
+ - "null"
263
+ - number
264
+ multipleOf: 1.e-25
265
+ sites:
266
+ $schema: "http://json-schema.org/draft-07/schema#"
267
+ type: object
268
+ properties:
269
+ siteUrl:
270
+ description: "The URL of the site data being fetched"
271
+ type: ["null", "string"]
272
+ permissionLevel:
273
+ description: "The user's permission level for the site (owner, full, restricted, etc.)"
274
+ type: ["null", "string"]
275
+ sitemaps:
276
+ $schema: "http://json-schema.org/draft-07/schema#"
277
+ type: object
278
+ properties:
279
+ path:
280
+ description: "Path to the sitemap file"
281
+ type:
282
+ - "null"
283
+ - string
284
+ lastSubmitted:
285
+ description: "Timestamp when the sitemap was last submitted"
286
+ type:
287
+ - "null"
288
+ - string
289
+ format: date-time
290
+ isPending:
291
+ description: "Flag indicating if the sitemap is pending for processing"
292
+ type:
293
+ - "null"
294
+ - boolean
295
+ isSitemapsIndex:
296
+ description: "Flag indicating if the data represents a sitemap index"
297
+ type:
298
+ - "null"
299
+ - boolean
300
+ type:
301
+ description: "Type of the sitemap"
302
+ type:
303
+ - "null"
304
+ - string
305
+ lastDownloaded:
306
+ description: "Timestamp when the sitemap was last downloaded"
307
+ type:
308
+ - "null"
309
+ - string
310
+ format: date-time
311
+ warnings:
312
+ description: "Warnings encountered while processing the sitemaps"
313
+ type:
314
+ - "null"
315
+ - string
316
+ errors:
317
+ description: "Errors encountered while processing the sitemaps"
318
+ type:
319
+ - "null"
320
+ - string
321
+ contents:
322
+ description: "Data related to the sitemap contents"
323
+ type: array
324
+ items:
325
+ type: object
326
+ properties:
327
+ type:
328
+ description: "Type of the sitemap content"
329
+ type:
330
+ - "null"
331
+ - string
332
+ submitted:
333
+ description: "Number of submitted sitemap URLs"
334
+ type:
335
+ - "null"
336
+ - string
337
+ indexed:
338
+ description: "Number of indexed sitemap URLs"
339
+ type:
340
+ - "null"
341
+ - string
@@ -1,18 +1,56 @@
1
1
  #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
-
6
5
  import sys
6
+ import traceback
7
+ from datetime import datetime
8
+ from typing import List
9
+
10
+ from orjson import orjson
7
11
 
8
- from airbyte_cdk.entrypoint import launch
12
+ from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch, logger
13
+ from airbyte_cdk.exception_handler import init_uncaught_exception_handler
14
+ from airbyte_cdk.models import AirbyteErrorTraceMessage, AirbyteMessage, AirbyteMessageSerializer, AirbyteTraceMessage, TraceType, Type
9
15
  from source_google_search_console import SourceGoogleSearchConsole
10
16
  from source_google_search_console.config_migrations import MigrateCustomReports
11
17
 
12
18
 
13
- def run():
14
- source = SourceGoogleSearchConsole()
15
- # migrate config at runtime
16
- MigrateCustomReports.migrate(sys.argv[1:], source)
17
- # run the connector
18
- launch(source, sys.argv[1:])
19
+ def _get_source(args: List[str]):
20
+ catalog_path = AirbyteEntrypoint.extract_catalog(args)
21
+ config_path = AirbyteEntrypoint.extract_config(args)
22
+ state_path = AirbyteEntrypoint.extract_state(args)
23
+ try:
24
+ return SourceGoogleSearchConsole(
25
+ SourceGoogleSearchConsole.read_catalog(catalog_path) if catalog_path else None,
26
+ SourceGoogleSearchConsole.read_config(config_path) if config_path else None,
27
+ SourceGoogleSearchConsole.read_state(state_path) if state_path else None,
28
+ )
29
+ except Exception as error:
30
+ print(
31
+ orjson.dumps(
32
+ AirbyteMessageSerializer.dump(
33
+ AirbyteMessage(
34
+ type=Type.TRACE,
35
+ trace=AirbyteTraceMessage(
36
+ type=TraceType.ERROR,
37
+ emitted_at=int(datetime.now().timestamp() * 1000),
38
+ error=AirbyteErrorTraceMessage(
39
+ message=f"Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance. Error: {error}",
40
+ stack_trace=traceback.format_exc(),
41
+ ),
42
+ ),
43
+ )
44
+ )
45
+ ).decode()
46
+ )
47
+ return None
48
+
49
+
50
+ def run() -> None:
51
+ init_uncaught_exception_handler(logger)
52
+ _args = sys.argv[1:]
53
+ source = _get_source(_args)
54
+ if source:
55
+ MigrateCustomReports.migrate(sys.argv[1:], source)
56
+ launch(source, _args)
@@ -1,6 +1,7 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
+ import json
4
5
 
5
6
  import requests
6
7
  from google.auth.transport.requests import Request
@@ -21,7 +22,8 @@ class ServiceAccountAuthenticator(AuthBase):
21
22
 
22
23
  def __call__(self, request: requests.PreparedRequest) -> requests.PreparedRequest:
23
24
  try:
24
- credentials: Credentials = Credentials.from_service_account_info(self.service_account_info, scopes=self.scopes).with_subject(
25
+ service_account_info = json.loads(self.service_account_info)
26
+ credentials: Credentials = Credentials.from_service_account_info(service_account_info, scopes=self.scopes).with_subject(
25
27
  self.email
26
28
  )
27
29
  if not credentials.valid:
@@ -11,8 +11,9 @@ import jsonschema
11
11
  import pendulum
12
12
  import requests
13
13
 
14
- from airbyte_cdk.models import FailureType, SyncMode
15
- from airbyte_cdk.sources import AbstractSource
14
+ from airbyte_cdk.models import ConfiguredAirbyteCatalog, FailureType, SyncMode
15
+ from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
16
+ from airbyte_cdk.sources.source import TState
16
17
  from airbyte_cdk.sources.streams import Stream
17
18
  from airbyte_cdk.sources.streams.http.requests_native_auth import Oauth2Authenticator
18
19
  from airbyte_cdk.utils import AirbyteTracedException
@@ -25,7 +26,6 @@ from source_google_search_console.exceptions import (
25
26
  from source_google_search_console.service_account_authenticator import ServiceAccountAuthenticator
26
27
  from source_google_search_console.streams import (
27
28
  SearchAnalyticsAllFields,
28
- SearchAnalyticsByCountry,
29
29
  SearchAnalyticsByCustomDimensions,
30
30
  SearchAnalyticsByDate,
31
31
  SearchAnalyticsByDevice,
@@ -37,8 +37,6 @@ from source_google_search_console.streams import (
37
37
  SearchAnalyticsPageReport,
38
38
  SearchAnalyticsSiteReportByPage,
39
39
  SearchAnalyticsSiteReportBySite,
40
- Sitemaps,
41
- Sites,
42
40
  )
43
41
 
44
42
 
@@ -55,7 +53,10 @@ custom_reports_schema = {
55
53
  }
56
54
 
57
55
 
58
- class SourceGoogleSearchConsole(AbstractSource):
56
+ class SourceGoogleSearchConsole(YamlDeclarativeSource):
57
+ def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional[Mapping[str, Any]], state: TState, **kwargs):
58
+ super().__init__(catalog=catalog, config=config, state=state, **{"path_to_yaml": "manifest.yaml"})
59
+
59
60
  @staticmethod
60
61
  def normalize_url(url):
61
62
  parse_result = urlparse(url)
@@ -68,7 +69,7 @@ class SourceGoogleSearchConsole(AbstractSource):
68
69
  authorization = config["authorization"]
69
70
  if authorization["auth_type"] == "Service":
70
71
  try:
71
- authorization["service_account_info"] = json.loads(authorization["service_account_info"])
72
+ json.loads(authorization["service_account_info"])
72
73
  except ValueError:
73
74
  message = "authorization.service_account_info is not valid JSON"
74
75
  raise AirbyteTracedException(message=message, internal_message=message, failure_type=FailureType.config_error)
@@ -110,29 +111,6 @@ class SourceGoogleSearchConsole(AbstractSource):
110
111
  raise AirbyteTracedException(message=message, internal_message=message, failure_type=FailureType.config_error)
111
112
  return config
112
113
 
113
- def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Any]:
114
- try:
115
- config = self._validate_and_transform(config)
116
- stream_kwargs = self.get_stream_kwargs(config)
117
- self.validate_site_urls(config["site_urls"], stream_kwargs["authenticator"])
118
- sites = Sites(**stream_kwargs)
119
- stream_slice = sites.stream_slices(SyncMode.full_refresh)
120
-
121
- # stream_slice returns all site_urls and we need to make sure that
122
- # the connection is successful for all of them
123
- for _slice in stream_slice:
124
- sites_gen = sites.read_records(sync_mode=SyncMode.full_refresh, stream_slice=_slice)
125
- next(sites_gen)
126
- return True, None
127
-
128
- except (InvalidSiteURLValidationError, UnauthorizedOauthError, UnauthorizedServiceAccountError, jsonschema.ValidationError) as e:
129
- return False, repr(e)
130
- except (Exception, UnidentifiedError) as error:
131
- return (
132
- False,
133
- f"Unable to check connectivity to Google Search Console API - {repr(error)}",
134
- )
135
-
136
114
  def validate_site_urls(self, site_urls: List[str], auth: Union[ServiceAccountAuthenticator, Oauth2Authenticator]):
137
115
  if isinstance(auth, ServiceAccountAuthenticator):
138
116
  request = auth(requests.Request(method="GET", url="https://www.googleapis.com/webmasters/v3/sites"))
@@ -172,22 +150,23 @@ class SourceGoogleSearchConsole(AbstractSource):
172
150
  config = self._validate_and_transform(config)
173
151
  stream_config = self.get_stream_kwargs(config)
174
152
 
175
- streams = [
176
- Sites(**stream_config),
177
- Sitemaps(**stream_config),
178
- SearchAnalyticsByCountry(**stream_config),
179
- SearchAnalyticsByDevice(**stream_config),
180
- SearchAnalyticsByDate(**stream_config),
181
- SearchAnalyticsByQuery(**stream_config),
182
- SearchAnalyticsByPage(**stream_config),
183
- SearchAnalyticsAllFields(**stream_config),
184
- SearchAnalyticsKeywordPageReport(**stream_config),
185
- SearchAnalyticsPageReport(**stream_config),
186
- SearchAnalyticsSiteReportBySite(**stream_config),
187
- SearchAnalyticsSiteReportByPage(**stream_config),
188
- SearchAnalyticsKeywordSiteReportByPage(**stream_config),
189
- SearchAnalyticsKeywordSiteReportBySite(**stream_config),
190
- ]
153
+ streams = super().streams(config=config)
154
+
155
+ streams.extend(
156
+ [
157
+ SearchAnalyticsByDevice(**stream_config),
158
+ SearchAnalyticsByDate(**stream_config),
159
+ SearchAnalyticsByQuery(**stream_config),
160
+ SearchAnalyticsByPage(**stream_config),
161
+ SearchAnalyticsAllFields(**stream_config),
162
+ SearchAnalyticsKeywordPageReport(**stream_config),
163
+ SearchAnalyticsPageReport(**stream_config),
164
+ SearchAnalyticsSiteReportBySite(**stream_config),
165
+ SearchAnalyticsSiteReportByPage(**stream_config),
166
+ SearchAnalyticsKeywordSiteReportByPage(**stream_config),
167
+ SearchAnalyticsKeywordSiteReportBySite(**stream_config),
168
+ ]
169
+ )
191
170
 
192
171
  streams = streams + self.get_custom_reports(config=config, stream_config=stream_config)
193
172
 
@@ -84,41 +84,6 @@ class GoogleSearchConsole(HttpStream, ABC):
84
84
  return response.status_code == 429 or 500 <= response.status_code < 600
85
85
 
86
86
 
87
- class Sites(GoogleSearchConsole):
88
- """
89
- API docs: https://developers.google.com/webmaster-tools/search-console-api-original/v3/sites
90
- """
91
-
92
- primary_key = None
93
- is_resumable = False
94
-
95
- def path(
96
- self,
97
- stream_state: Mapping[str, Any] = None,
98
- stream_slice: Mapping[str, Any] = None,
99
- next_page_token: Mapping[str, Any] = None,
100
- ) -> str:
101
- return f"sites/{stream_slice.get('site_url')}"
102
-
103
-
104
- class Sitemaps(GoogleSearchConsole):
105
- """
106
- API docs: https://developers.google.com/webmaster-tools/search-console-api-original/v3/sitemaps
107
- """
108
-
109
- primary_key = None
110
- data_field = "sitemap"
111
- is_resumable = False
112
-
113
- def path(
114
- self,
115
- stream_state: Mapping[str, Any] = None,
116
- stream_slice: Mapping[str, Any] = None,
117
- next_page_token: Mapping[str, Any] = None,
118
- ) -> str:
119
- return f"sites/{stream_slice.get('site_url')}/sitemaps"
120
-
121
-
122
87
  class SearchAnalytics(GoogleSearchConsole, CheckpointMixin, ABC):
123
88
  """
124
89
  API docs: https://developers.google.com/webmaster-tools/search-console-api-original/v3/searchanalytics
@@ -335,12 +300,6 @@ class SearchAnalyticsByDate(SearchAnalytics):
335
300
  dimensions = ["date"]
336
301
 
337
302
 
338
- class SearchAnalyticsByCountry(SearchAnalytics):
339
- primary_key = ["site_url", "date", "country", "search_type"]
340
- search_types = ["web", "news", "image", "video", "discover", "googleNews"]
341
- dimensions = ["date", "country"]
342
-
343
-
344
303
  class SearchAnalyticsByDevice(SearchAnalytics):
345
304
  primary_key = ["site_url", "date", "device", "search_type"]
346
305
  search_types = ["web", "news", "image", "video", "googleNews"]
@@ -1,41 +0,0 @@
1
- {
2
- "$schema": "http://json-schema.org/draft-07/schema#",
3
- "type": "object",
4
- "properties": {
5
- "site_url": {
6
- "description": "The URL of the site for which the search analytics data is being reported.",
7
- "type": ["null", "string"]
8
- },
9
- "search_type": {
10
- "description": "The type of search (web search, image search, video search, etc.) for which the data is being reported.",
11
- "type": ["null", "string"]
12
- },
13
- "date": {
14
- "description": "The date for which the search analytics data is being reported.",
15
- "type": ["null", "string"],
16
- "format": "date"
17
- },
18
- "country": {
19
- "description": "The country for which the search analytics data is being reported.",
20
- "type": ["null", "string"]
21
- },
22
- "clicks": {
23
- "description": "The number of times users clicked on the search result for a specific country.",
24
- "type": ["null", "integer"]
25
- },
26
- "impressions": {
27
- "description": "The total number of times a search result was shown in search results for a specific country.",
28
- "type": ["null", "integer"]
29
- },
30
- "ctr": {
31
- "description": "The click-through rate, i.e., the ratio of clicks to impressions for a specific country.",
32
- "type": ["null", "number"],
33
- "multipleOf": 1e-25
34
- },
35
- "position": {
36
- "description": "The average position at which the site's search result appeared for a specific country.",
37
- "type": ["null", "number"],
38
- "multipleOf": 1e-25
39
- }
40
- }
41
- }
@@ -1,61 +0,0 @@
1
- {
2
- "$schema": "http://json-schema.org/draft-07/schema#",
3
- "type": "object",
4
- "properties": {
5
- "path": {
6
- "description": "Path to the sitemap file",
7
- "type": ["null", "string"]
8
- },
9
- "lastSubmitted": {
10
- "description": "Timestamp when the sitemap was last submitted",
11
- "type": ["null", "string"],
12
- "format": "date-time"
13
- },
14
- "isPending": {
15
- "description": "Flag indicating if the sitemap is pending for processing",
16
- "type": ["null", "boolean"]
17
- },
18
- "isSitemapsIndex": {
19
- "description": "Flag indicating if the data represents a sitemap index",
20
- "type": ["null", "boolean"]
21
- },
22
- "type": {
23
- "description": "Type of the sitemap",
24
- "type": ["null", "string"]
25
- },
26
- "lastDownloaded": {
27
- "description": "Timestamp when the sitemap was last downloaded",
28
- "type": ["null", "string"],
29
- "format": "date-time"
30
- },
31
- "warnings": {
32
- "description": "Warnings encountered while processing the sitemaps",
33
- "type": ["null", "string"]
34
- },
35
- "errors": {
36
- "description": "Errors encountered while processing the sitemaps",
37
- "type": ["null", "string"]
38
- },
39
- "contents": {
40
- "description": "Data related to the sitemap contents",
41
- "type": "array",
42
- "items": {
43
- "type": "object",
44
- "properties": {
45
- "type": {
46
- "description": "Type of the sitemap content",
47
- "type": ["null", "string"]
48
- },
49
- "submitted": {
50
- "description": "Number of submitted sitemap URLs",
51
- "type": ["null", "string"]
52
- },
53
- "indexed": {
54
- "description": "Number of indexed sitemap URLs",
55
- "type": ["null", "string"]
56
- }
57
- }
58
- }
59
- }
60
- }
61
- }
@@ -1,14 +0,0 @@
1
- {
2
- "$schema": "http://json-schema.org/draft-07/schema#",
3
- "type": "object",
4
- "properties": {
5
- "siteUrl": {
6
- "description": "The URL of the site data being fetched",
7
- "type": ["null", "string"]
8
- },
9
- "permissionLevel": {
10
- "description": "The user's permission level for the site (owner, full, restricted, etc.)",
11
- "type": ["null", "string"]
12
- }
13
- }
14
- }