airbyte-source-google-search-console 1.7.0__py3-none-any.whl → 1.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_google_search_console-1.7.0.dist-info → airbyte_source_google_search_console-1.9.0.dist-info}/METADATA +1 -1
- airbyte_source_google_search_console-1.9.0.dist-info/RECORD +13 -0
- source_google_search_console/components.py +105 -3
- source_google_search_console/manifest.yaml +898 -30
- source_google_search_console/source.py +12 -37
- source_google_search_console/spec.json +9 -0
- airbyte_source_google_search_console-1.7.0.dist-info/RECORD +0 -22
- source_google_search_console/schemas/search_analytics_all_fields.json +0 -53
- source_google_search_console/schemas/search_analytics_by_date.json +0 -37
- source_google_search_console/schemas/search_analytics_by_device.json +0 -41
- source_google_search_console/schemas/search_analytics_by_page.json +0 -41
- source_google_search_console/schemas/search_analytics_by_query.json +0 -41
- source_google_search_console/schemas/search_analytics_page_report.json +0 -50
- source_google_search_console/schemas/search_analytics_site_report_by_page.json +0 -46
- source_google_search_console/schemas/search_analytics_site_report_by_site.json +0 -46
- source_google_search_console/streams.py +0 -394
- {airbyte_source_google_search_console-1.7.0.dist-info → airbyte_source_google_search_console-1.9.0.dist-info}/WHEEL +0 -0
- {airbyte_source_google_search_console-1.7.0.dist-info → airbyte_source_google_search_console-1.9.0.dist-info}/entry_points.txt +0 -0
@@ -39,14 +39,8 @@ definitions:
|
|
39
39
|
Client: "#/definitions/oauth_authenticator"
|
40
40
|
Service: "#/definitions/jwt_profile_assertion_oauth_authenticator"
|
41
41
|
|
42
|
-
|
42
|
+
base_search_analytics_stream:
|
43
43
|
type: DeclarativeStream
|
44
|
-
name: search_analytics_by_country
|
45
|
-
primary_key:
|
46
|
-
- site_url
|
47
|
-
- date
|
48
|
-
- country
|
49
|
-
- search_type
|
50
44
|
retriever:
|
51
45
|
type: SimpleRetriever
|
52
46
|
requester:
|
@@ -60,13 +54,10 @@ definitions:
|
|
60
54
|
request_body_json:
|
61
55
|
startDate: "{{ stream_interval.get('start_time') }}"
|
62
56
|
endDate: "{{ stream_interval.get('end_time') }}"
|
63
|
-
dimensions:
|
57
|
+
dimensions: "{{ parameters['dimensions'] }}"
|
64
58
|
type: "{{ stream_partition.get('search_type') }}"
|
65
|
-
aggregationType: auto
|
59
|
+
aggregationType: "{{ 'auto' if config.get('always_use_aggregation_type_auto') else parameters.get('aggregationType') }}"
|
66
60
|
dataState: "{{ config.get('data_state', 'final') }}"
|
67
|
-
# Currently relying on the default error handler behavior. Two pieces of functionality not covered are
|
68
|
-
# - Silently skipping over 403 permissions errors and relying on partial success reporting
|
69
|
-
# - Retrying 400 errors with aggregation_type=auto instead of failing outright
|
70
61
|
paginator:
|
71
62
|
type: DefaultPaginator
|
72
63
|
page_token_option:
|
@@ -92,13 +83,7 @@ definitions:
|
|
92
83
|
values: "{{ config['site_urls'] }}"
|
93
84
|
cursor_field: site_url
|
94
85
|
- type: ListPartitionRouter
|
95
|
-
values:
|
96
|
-
- web
|
97
|
-
- news
|
98
|
-
- image
|
99
|
-
- video
|
100
|
-
- discover
|
101
|
-
- googleNews
|
86
|
+
values: "{{ parameters['search_types'] }}"
|
102
87
|
cursor_field: search_type
|
103
88
|
incremental_sync:
|
104
89
|
type: DatetimeBasedCursor
|
@@ -116,19 +101,413 @@ definitions:
|
|
116
101
|
datetime_format: "%Y-%m-%d"
|
117
102
|
step: P3D
|
118
103
|
cursor_granularity: P1D
|
104
|
+
state_migrations:
|
105
|
+
- type: CustomStateMigration
|
106
|
+
class_name: source_google_search_console.components.NestedSubstreamStateMigration
|
107
|
+
|
108
|
+
search_analytics_all_fields_stream:
|
109
|
+
$ref: "#/definitions/base_search_analytics_stream"
|
110
|
+
name: search_analytics_all_fields
|
111
|
+
primary_key:
|
112
|
+
- site_url
|
113
|
+
- date
|
114
|
+
- country
|
115
|
+
- device
|
116
|
+
- query
|
117
|
+
- page
|
118
|
+
- search_type
|
119
|
+
transformations:
|
120
|
+
- type: AddFields
|
121
|
+
fields:
|
122
|
+
- path:
|
123
|
+
- site_url
|
124
|
+
value: "{{ stream_partition['site_url'] }}"
|
125
|
+
- path:
|
126
|
+
- search_type
|
127
|
+
value: "{{ stream_partition['search_type'] }}"
|
128
|
+
- type: AddFields
|
129
|
+
fields:
|
130
|
+
- path:
|
131
|
+
- date
|
132
|
+
value: "{{ record['keys'][0] }}"
|
133
|
+
- path:
|
134
|
+
- country
|
135
|
+
value: "{{ record['keys'][1] }}"
|
136
|
+
- path:
|
137
|
+
- device
|
138
|
+
value: "{{ record['keys'][2] }}"
|
139
|
+
- path:
|
140
|
+
- page
|
141
|
+
value: "{{ record['keys'][3] }}"
|
142
|
+
- path:
|
143
|
+
- query
|
144
|
+
value: "{{ record['keys'][4] }}"
|
145
|
+
- type: RemoveFields
|
146
|
+
field_pointers:
|
147
|
+
- - keys
|
148
|
+
schema_loader:
|
149
|
+
type: InlineSchemaLoader
|
150
|
+
schema:
|
151
|
+
$ref: "#/schemas/search_analytics_all_fields"
|
152
|
+
$parameters:
|
153
|
+
dimensions:
|
154
|
+
- date
|
155
|
+
- country
|
156
|
+
- device
|
157
|
+
- page
|
158
|
+
- query
|
159
|
+
aggregationType: auto
|
160
|
+
search_types:
|
161
|
+
- web
|
162
|
+
- news
|
163
|
+
- image
|
164
|
+
- video
|
165
|
+
|
166
|
+
search_analytics_by_country_stream:
|
167
|
+
$ref: "#/definitions/base_search_analytics_stream"
|
168
|
+
name: search_analytics_by_country
|
169
|
+
primary_key:
|
170
|
+
- site_url
|
171
|
+
- date
|
172
|
+
- country
|
173
|
+
- search_type
|
174
|
+
transformations:
|
175
|
+
- type: AddFields
|
176
|
+
fields:
|
177
|
+
- path:
|
178
|
+
- site_url
|
179
|
+
value: "{{ stream_partition['site_url'] }}"
|
180
|
+
- path:
|
181
|
+
- search_type
|
182
|
+
value: "{{ stream_partition['search_type'] }}"
|
183
|
+
# The values in the 'keys' array in the record correspond to the same order that the dimensions
|
184
|
+
# are requested in the API request. For example, if the request body was `dimensions: ["date", "country"]`,
|
185
|
+
# then the first value of `keys` is placed under the `date` field. These arrays are always be the same length
|
186
|
+
# After extracting the keys, the `keys` array is removed from the record.
|
187
|
+
- type: AddFields
|
188
|
+
fields:
|
189
|
+
- path:
|
190
|
+
- date
|
191
|
+
value: "{{ record['keys'][0] }}"
|
192
|
+
- path:
|
193
|
+
- country
|
194
|
+
value: "{{ record['keys'][1] }}"
|
195
|
+
- type: RemoveFields
|
196
|
+
field_pointers:
|
197
|
+
- - keys
|
198
|
+
schema_loader:
|
199
|
+
type: InlineSchemaLoader
|
200
|
+
schema:
|
201
|
+
$ref: "#/schemas/search_analytics_by_country"
|
202
|
+
$parameters:
|
203
|
+
dimensions:
|
204
|
+
- date
|
205
|
+
- country
|
206
|
+
aggregationType: auto
|
207
|
+
search_types:
|
208
|
+
- web
|
209
|
+
- news
|
210
|
+
- image
|
211
|
+
- video
|
212
|
+
- discover
|
213
|
+
- googleNews
|
214
|
+
|
215
|
+
search_analytics_by_date_stream:
|
216
|
+
$ref: "#/definitions/base_search_analytics_stream"
|
217
|
+
name: search_analytics_by_date
|
218
|
+
primary_key:
|
219
|
+
- site_url
|
220
|
+
- date
|
221
|
+
- search_type
|
222
|
+
transformations:
|
223
|
+
- type: AddFields
|
224
|
+
fields:
|
225
|
+
- path:
|
226
|
+
- site_url
|
227
|
+
value: "{{ stream_partition['site_url'] }}"
|
228
|
+
- path:
|
229
|
+
- search_type
|
230
|
+
value: "{{ stream_partition['search_type'] }}"
|
231
|
+
- type: AddFields
|
232
|
+
fields:
|
233
|
+
- path:
|
234
|
+
- date
|
235
|
+
value: "{{ record['keys'][0] }}"
|
236
|
+
- type: RemoveFields
|
237
|
+
field_pointers:
|
238
|
+
- - keys
|
239
|
+
schema_loader:
|
240
|
+
type: InlineSchemaLoader
|
241
|
+
schema:
|
242
|
+
$ref: "#/schemas/search_analytics_by_date"
|
243
|
+
$parameters:
|
244
|
+
dimensions:
|
245
|
+
- date
|
246
|
+
aggregationType: auto
|
247
|
+
search_types:
|
248
|
+
- web
|
249
|
+
- news
|
250
|
+
- image
|
251
|
+
- video
|
252
|
+
- discover
|
253
|
+
- googleNews
|
254
|
+
|
255
|
+
search_analytics_by_device_stream:
|
256
|
+
$ref: "#/definitions/base_search_analytics_stream"
|
257
|
+
name: search_analytics_by_device
|
258
|
+
primary_key:
|
259
|
+
- site_url
|
260
|
+
- date
|
261
|
+
- device
|
262
|
+
- search_type
|
263
|
+
transformations:
|
264
|
+
- type: AddFields
|
265
|
+
fields:
|
266
|
+
- path:
|
267
|
+
- site_url
|
268
|
+
value: "{{ stream_partition['site_url'] }}"
|
269
|
+
- path:
|
270
|
+
- search_type
|
271
|
+
value: "{{ stream_partition['search_type'] }}"
|
272
|
+
# The values in the 'keys' array in the record correspond to the same order that the dimensions
|
273
|
+
# are requested in the API request. For example, if the request body was `dimensions: ["date", "device"]`,
|
274
|
+
# then the first value of `keys` is placed under the `date` field. These arrays are always be the same length
|
275
|
+
# After extracting the keys, the `keys` array is removed from the record.
|
276
|
+
- type: AddFields
|
277
|
+
fields:
|
278
|
+
- path:
|
279
|
+
- date
|
280
|
+
value: "{{ record['keys'][0] }}"
|
281
|
+
- path:
|
282
|
+
- device
|
283
|
+
value: "{{ record['keys'][1] }}"
|
284
|
+
- type: RemoveFields
|
285
|
+
field_pointers:
|
286
|
+
- - keys
|
287
|
+
schema_loader:
|
288
|
+
type: InlineSchemaLoader
|
289
|
+
schema:
|
290
|
+
$ref: "#/schemas/search_analytics_by_device"
|
291
|
+
$parameters:
|
292
|
+
dimensions:
|
293
|
+
- date
|
294
|
+
- device
|
295
|
+
aggregationType: auto
|
296
|
+
search_types:
|
297
|
+
- web
|
298
|
+
- news
|
299
|
+
- image
|
300
|
+
- video
|
301
|
+
- googleNews
|
302
|
+
|
303
|
+
search_analytics_by_page_stream:
|
304
|
+
$ref: "#/definitions/base_search_analytics_stream"
|
305
|
+
name: search_analytics_by_page
|
306
|
+
primary_key:
|
307
|
+
- site_url
|
308
|
+
- date
|
309
|
+
- page
|
310
|
+
- search_type
|
311
|
+
transformations:
|
312
|
+
- type: AddFields
|
313
|
+
fields:
|
314
|
+
- path:
|
315
|
+
- site_url
|
316
|
+
value: "{{ stream_partition['site_url'] }}"
|
317
|
+
- path:
|
318
|
+
- search_type
|
319
|
+
value: "{{ stream_partition['search_type'] }}"
|
320
|
+
- type: AddFields
|
321
|
+
fields:
|
322
|
+
- path:
|
323
|
+
- date
|
324
|
+
value: "{{ record['keys'][0] }}"
|
325
|
+
- path:
|
326
|
+
- page
|
327
|
+
value: "{{ record['keys'][1] }}"
|
328
|
+
- type: RemoveFields
|
329
|
+
field_pointers:
|
330
|
+
- - keys
|
331
|
+
schema_loader:
|
332
|
+
type: InlineSchemaLoader
|
333
|
+
schema:
|
334
|
+
$ref: "#/schemas/search_analytics_by_page"
|
335
|
+
$parameters:
|
336
|
+
dimensions:
|
337
|
+
- date
|
338
|
+
- page
|
339
|
+
aggregationType: auto
|
340
|
+
search_types:
|
341
|
+
- web
|
342
|
+
- news
|
343
|
+
- image
|
344
|
+
- video
|
345
|
+
- discover
|
346
|
+
- googleNews
|
347
|
+
|
348
|
+
search_analytics_by_query_stream:
|
349
|
+
$ref: "#/definitions/base_search_analytics_stream"
|
350
|
+
name: search_analytics_by_query
|
351
|
+
primary_key:
|
352
|
+
- site_url
|
353
|
+
- date
|
354
|
+
- query
|
355
|
+
- search_type
|
356
|
+
transformations:
|
357
|
+
- type: AddFields
|
358
|
+
fields:
|
359
|
+
- path:
|
360
|
+
- site_url
|
361
|
+
value: "{{ stream_partition['site_url'] }}"
|
362
|
+
- path:
|
363
|
+
- search_type
|
364
|
+
value: "{{ stream_partition['search_type'] }}"
|
365
|
+
- type: AddFields
|
366
|
+
fields:
|
367
|
+
- path:
|
368
|
+
- date
|
369
|
+
value: "{{ record['keys'][0] }}"
|
370
|
+
- path:
|
371
|
+
- query
|
372
|
+
value: "{{ record['keys'][1] }}"
|
373
|
+
- type: RemoveFields
|
374
|
+
field_pointers:
|
375
|
+
- - keys
|
376
|
+
schema_loader:
|
377
|
+
type: InlineSchemaLoader
|
378
|
+
schema:
|
379
|
+
$ref: "#/schemas/search_analytics_by_query"
|
380
|
+
$parameters:
|
381
|
+
dimensions:
|
382
|
+
- date
|
383
|
+
- query
|
384
|
+
aggregationType: auto
|
385
|
+
search_types:
|
386
|
+
- web
|
387
|
+
- news
|
388
|
+
- image
|
389
|
+
- video
|
390
|
+
|
391
|
+
search_analytics_page_report_stream:
|
392
|
+
$ref: "#/definitions/base_search_analytics_stream"
|
393
|
+
name: search_analytics_page_report
|
394
|
+
primary_key:
|
395
|
+
- site_url
|
396
|
+
- date
|
397
|
+
- country
|
398
|
+
- device
|
399
|
+
- search_type
|
400
|
+
- page
|
401
|
+
transformations:
|
402
|
+
- type: AddFields
|
403
|
+
fields:
|
404
|
+
- path:
|
405
|
+
- site_url
|
406
|
+
value: "{{ stream_partition.get('site_url') }}"
|
407
|
+
- path:
|
408
|
+
- search_type
|
409
|
+
value: "{{ stream_partition.get('search_type') }}"
|
410
|
+
- type: AddFields
|
411
|
+
fields:
|
412
|
+
- path:
|
413
|
+
- date
|
414
|
+
value: "{{ record['keys'][0] }}"
|
415
|
+
- path:
|
416
|
+
- country
|
417
|
+
value: "{{ record['keys'][1] }}"
|
418
|
+
- path:
|
419
|
+
- device
|
420
|
+
value: "{{ record['keys'][2] }}"
|
421
|
+
- path:
|
422
|
+
- page
|
423
|
+
value: "{{ record['keys'][3] }}"
|
424
|
+
- type: RemoveFields
|
425
|
+
field_pointers:
|
426
|
+
- - keys
|
427
|
+
schema_loader:
|
428
|
+
type: InlineSchemaLoader
|
429
|
+
schema:
|
430
|
+
$ref: "#/schemas/search_analytics_page_report"
|
431
|
+
$parameters:
|
432
|
+
dimensions:
|
433
|
+
- date
|
434
|
+
- country
|
435
|
+
- device
|
436
|
+
- page
|
437
|
+
search_types:
|
438
|
+
- web
|
439
|
+
- news
|
440
|
+
- image
|
441
|
+
- video
|
442
|
+
- googleNews
|
443
|
+
|
444
|
+
search_analytics_site_report_by_page_stream:
|
445
|
+
$ref: "#/definitions/base_search_analytics_stream"
|
446
|
+
name: search_analytics_site_report_by_page
|
447
|
+
primary_key:
|
448
|
+
- site_url
|
449
|
+
- date
|
450
|
+
- country
|
451
|
+
- device
|
452
|
+
- search_type
|
453
|
+
transformations:
|
454
|
+
- type: AddFields
|
455
|
+
fields:
|
456
|
+
- path:
|
457
|
+
- site_url
|
458
|
+
value: "{{ stream_partition.get('site_url') }}"
|
459
|
+
- path:
|
460
|
+
- search_type
|
461
|
+
value: "{{ stream_partition.get('search_type') }}"
|
462
|
+
- type: AddFields
|
463
|
+
fields:
|
464
|
+
- path:
|
465
|
+
- date
|
466
|
+
value: "{{ record['keys'][0] }}"
|
467
|
+
- path:
|
468
|
+
- country
|
469
|
+
value: "{{ record['keys'][1] }}"
|
470
|
+
- path:
|
471
|
+
- device
|
472
|
+
value: "{{ record['keys'][2] }}"
|
473
|
+
- type: RemoveFields
|
474
|
+
field_pointers:
|
475
|
+
- - keys
|
476
|
+
schema_loader:
|
477
|
+
type: InlineSchemaLoader
|
478
|
+
schema:
|
479
|
+
$ref: "#/schemas/search_analytics_site_report_by_page"
|
480
|
+
$parameters:
|
481
|
+
aggregationType: byPage
|
482
|
+
dimensions:
|
483
|
+
- date
|
484
|
+
- country
|
485
|
+
- device
|
486
|
+
search_types:
|
487
|
+
- web
|
488
|
+
- news
|
489
|
+
- image
|
490
|
+
- video
|
491
|
+
- googleNews
|
492
|
+
|
493
|
+
search_analytics_site_report_by_site_stream:
|
494
|
+
$ref: "#/definitions/base_search_analytics_stream"
|
495
|
+
name: search_analytics_site_report_by_site
|
496
|
+
primary_key:
|
497
|
+
- site_url
|
498
|
+
- date
|
499
|
+
- country
|
500
|
+
- device
|
501
|
+
- search_type
|
119
502
|
transformations:
|
120
503
|
- type: AddFields
|
121
504
|
fields:
|
122
505
|
- path:
|
123
506
|
- site_url
|
124
|
-
value: "{{ stream_partition
|
507
|
+
value: "{{ stream_partition.get('site_url') }}"
|
125
508
|
- path:
|
126
509
|
- search_type
|
127
|
-
value: "{{ stream_partition
|
128
|
-
# The values in the 'keys' array in the record correspond to the same order that the dimensions
|
129
|
-
# are requested in the API request. For example, if the request body was `dimensions: ["date", "country"]`,
|
130
|
-
# then the first value of `keys` is placed under the `date` field. These arrays are always be the same length
|
131
|
-
# After extracting the keys, the `keys` array is removed from the record.
|
510
|
+
value: "{{ stream_partition.get('search_type') }}"
|
132
511
|
- type: AddFields
|
133
512
|
fields:
|
134
513
|
- path:
|
@@ -137,16 +516,28 @@ definitions:
|
|
137
516
|
- path:
|
138
517
|
- country
|
139
518
|
value: "{{ record['keys'][1] }}"
|
519
|
+
- path:
|
520
|
+
- device
|
521
|
+
value: "{{ record['keys'][2] }}"
|
140
522
|
- type: RemoveFields
|
141
523
|
field_pointers:
|
142
524
|
- - keys
|
143
525
|
schema_loader:
|
144
526
|
type: InlineSchemaLoader
|
145
527
|
schema:
|
146
|
-
$ref: "#/schemas/
|
147
|
-
|
148
|
-
|
149
|
-
|
528
|
+
$ref: "#/schemas/search_analytics_site_report_by_site"
|
529
|
+
$parameters:
|
530
|
+
aggregationType: byProperty
|
531
|
+
dimensions:
|
532
|
+
- date
|
533
|
+
- country
|
534
|
+
- device
|
535
|
+
search_types:
|
536
|
+
- web
|
537
|
+
- news
|
538
|
+
- image
|
539
|
+
- video
|
540
|
+
- googleNews
|
150
541
|
|
151
542
|
sites_stream:
|
152
543
|
type: DeclarativeStream
|
@@ -486,20 +877,212 @@ definitions:
|
|
486
877
|
- date
|
487
878
|
- country
|
488
879
|
- device
|
489
|
-
- query
|
490
880
|
|
491
881
|
streams:
|
492
882
|
# Regular streams
|
493
883
|
- "#/definitions/sites_stream"
|
494
884
|
- "#/definitions/sitemaps_stream"
|
495
885
|
# Search Analytics streams
|
886
|
+
- "#/definitions/search_analytics_all_fields_stream"
|
496
887
|
- "#/definitions/search_analytics_by_country_stream"
|
888
|
+
- "#/definitions/search_analytics_by_date_stream"
|
889
|
+
- "#/definitions/search_analytics_by_device_stream"
|
890
|
+
- "#/definitions/search_analytics_by_page_stream"
|
891
|
+
- "#/definitions/search_analytics_by_query_stream"
|
892
|
+
- "#/definitions/search_analytics_page_report_stream"
|
893
|
+
- "#/definitions/search_analytics_site_report_by_page_stream"
|
894
|
+
- "#/definitions/search_analytics_site_report_by_site_stream"
|
497
895
|
# Search Analytics Keyword streams
|
498
896
|
- "#/definitions/search_analytics_keyword_page_report_stream"
|
499
897
|
- "#/definitions/search_analytics_keyword_site_report_by_page_stream"
|
500
898
|
- "#/definitions/search_analytics_keyword_site_report_by_site_stream"
|
501
899
|
|
900
|
+
dynamic_streams:
|
901
|
+
- type: DynamicDeclarativeStream
|
902
|
+
stream_template:
|
903
|
+
type: DeclarativeStream
|
904
|
+
name: search_analytics_by_custom_dimensions # This will be replaced by the name of the custom report
|
905
|
+
primary_key: # This will be replaced by the dimensions of the custom report
|
906
|
+
- site_url
|
907
|
+
- search_type
|
908
|
+
retriever:
|
909
|
+
type: SimpleRetriever
|
910
|
+
requester:
|
911
|
+
type: HttpRequester
|
912
|
+
url_base: https://www.googleapis.com/webmasters/v3
|
913
|
+
path: "/sites/{{ sanitize_url(stream_partition.get('site_url')) }}/searchAnalytics/query"
|
914
|
+
http_method: POST
|
915
|
+
authenticator: "#/definitions/selective_authenticator"
|
916
|
+
request_headers:
|
917
|
+
Content-Type: "application/json"
|
918
|
+
request_body_json:
|
919
|
+
startDate: "{{ stream_interval.get('start_time') }}"
|
920
|
+
endDate: "{{ stream_interval.get('end_time') }}"
|
921
|
+
dimensions: ["date", "country"] # This will be replaced by the dimensions of the custom report
|
922
|
+
type: "{{ stream_partition.get('search_type') }}"
|
923
|
+
aggregationType: auto
|
924
|
+
dataState: "{{ config.get('data_state', 'final') }}"
|
925
|
+
paginator:
|
926
|
+
type: DefaultPaginator
|
927
|
+
page_token_option:
|
928
|
+
type: RequestOption
|
929
|
+
field_name: startRow
|
930
|
+
inject_into: body_json
|
931
|
+
page_size_option:
|
932
|
+
type: RequestOption
|
933
|
+
field_name: rowLimit
|
934
|
+
inject_into: body_json
|
935
|
+
pagination_strategy:
|
936
|
+
type: OffsetIncrement
|
937
|
+
page_size: 25000
|
938
|
+
inject_on_first_request: true
|
939
|
+
record_selector:
|
940
|
+
type: RecordSelector
|
941
|
+
extractor:
|
942
|
+
type: DpathExtractor
|
943
|
+
field_path:
|
944
|
+
- rows
|
945
|
+
partition_router:
|
946
|
+
- type: ListPartitionRouter
|
947
|
+
values: "{{ config['site_urls'] }}"
|
948
|
+
cursor_field: site_url
|
949
|
+
- type: ListPartitionRouter
|
950
|
+
values:
|
951
|
+
- web
|
952
|
+
- news
|
953
|
+
- image
|
954
|
+
- video
|
955
|
+
cursor_field: search_type
|
956
|
+
incremental_sync:
|
957
|
+
type: DatetimeBasedCursor
|
958
|
+
cursor_field: date
|
959
|
+
cursor_datetime_formats:
|
960
|
+
- "%Y-%m-%d"
|
961
|
+
datetime_format: "%Y-%m-%d"
|
962
|
+
start_datetime:
|
963
|
+
type: MinMaxDatetime
|
964
|
+
datetime: "{{ config.get('start_date', '2021-01-01') }}"
|
965
|
+
datetime_format: "%Y-%m-%d"
|
966
|
+
end_datetime:
|
967
|
+
type: MinMaxDatetime
|
968
|
+
datetime: "{{ config.get('end_date', today_utc()) }}"
|
969
|
+
datetime_format: "%Y-%m-%d"
|
970
|
+
step: P3D
|
971
|
+
cursor_granularity: P1D
|
972
|
+
transformations:
|
973
|
+
- type: AddFields
|
974
|
+
fields:
|
975
|
+
- path:
|
976
|
+
- site_url
|
977
|
+
value: "{{ stream_partition['site_url'] }}"
|
978
|
+
- path:
|
979
|
+
- search_type
|
980
|
+
value: "{{ stream_partition['search_type'] }}"
|
981
|
+
- type: CustomTransformation
|
982
|
+
class_name: source_google_search_console.components.CustomReportExtractDimensionsFromKeys
|
983
|
+
dimensions: # This will be replaced by the dimensions of the custom report
|
984
|
+
- date
|
985
|
+
- country
|
986
|
+
schema_loader:
|
987
|
+
type: CustomSchemaLoader
|
988
|
+
class_name: source_google_search_console.components.CustomReportSchemaLoader
|
989
|
+
dimensions: [] # This will be replaced by the dimensions of the custom report
|
990
|
+
state_migrations:
|
991
|
+
- type: CustomStateMigration
|
992
|
+
class_name: source_google_search_console.components.NestedSubstreamStateMigration
|
993
|
+
components_resolver:
|
994
|
+
type: ConfigComponentsResolver
|
995
|
+
stream_config:
|
996
|
+
type: StreamConfig
|
997
|
+
configs_pointer:
|
998
|
+
- custom_reports_array
|
999
|
+
components_mapping:
|
1000
|
+
- type: ComponentMappingDefinition
|
1001
|
+
field_path:
|
1002
|
+
# - "**" # is this needed
|
1003
|
+
- name
|
1004
|
+
value: "{{components_values['name']}}"
|
1005
|
+
- type: ComponentMappingDefinition
|
1006
|
+
field_path:
|
1007
|
+
- primary_key
|
1008
|
+
value: "{{ components_values['dimensions'] + (['date'] if 'date' not in components_values['dimensions'] else []) + ['site_url', 'search_type'] }}"
|
1009
|
+
- type: ComponentMappingDefinition
|
1010
|
+
field_path:
|
1011
|
+
- retriever
|
1012
|
+
- requester
|
1013
|
+
- request_body_json
|
1014
|
+
- dimensions
|
1015
|
+
# `date` is a cursor field therefore should be a mandatory dimension if not already present
|
1016
|
+
value: "{{ components_values['dimensions'] + (['date'] if 'date' not in components_values['dimensions'] else []) }}"
|
1017
|
+
- type: ComponentMappingDefinition
|
1018
|
+
field_path:
|
1019
|
+
- transformations
|
1020
|
+
- "1"
|
1021
|
+
- dimensions
|
1022
|
+
value: "{{ components_values['dimensions'] + (['date'] if 'date' not in components_values['dimensions'] else []) }}"
|
1023
|
+
- type: ComponentMappingDefinition
|
1024
|
+
field_path:
|
1025
|
+
- schema_loader
|
1026
|
+
- dimensions
|
1027
|
+
value: "{{ components_values['dimensions'] + (['date'] if 'date' not in components_values['dimensions'] else []) }}"
|
1028
|
+
|
1029
|
+
# Google Search Console has three layers of quotas that dictate rate limiting at the
|
1030
|
+
# user making requests, site being requested, and developer console key used.
|
1031
|
+
# https://developers.google.com/webmaster-tools/limits#qps-quota
|
1032
|
+
# - Per Site Quota: 1,200 req/min (20 req/sec)
|
1033
|
+
# - Per User Quota: 1,200 req/min (20 req/sec)
|
1034
|
+
# - Per Project Quota: 30,000,000 req/day (350 req/sec) / 40,000 req/min (60 req/sec)
|
1035
|
+
#
|
1036
|
+
# The most likely upper bound is based on the user quota since it is the lowest and the
|
1037
|
+
# same authenticated user account may hit multiple site urls. The default is set to 40
|
1038
|
+
# which equates to one request every 2 seconds which seems like a fair baseline.
|
1039
|
+
#
|
1040
|
+
concurrency_level:
|
1041
|
+
type: ConcurrencyLevel
|
1042
|
+
default_concurrency: "{{ config.get('num_workers', 40) }}"
|
1043
|
+
max_concurrency: 100
|
1044
|
+
|
502
1045
|
schemas:
|
1046
|
+
search_analytics_all_fields:
|
1047
|
+
$schema: "http://json-schema.org/draft-07/schema#"
|
1048
|
+
type: object
|
1049
|
+
properties:
|
1050
|
+
site_url:
|
1051
|
+
description: "The URL of the site from which the data originates."
|
1052
|
+
type: ["null", "string"]
|
1053
|
+
search_type:
|
1054
|
+
description: "The type of search (e.g., web, image, video) that triggered the search result."
|
1055
|
+
type: ["null", "string"]
|
1056
|
+
date:
|
1057
|
+
description: "The date when the search query occurred."
|
1058
|
+
type: ["null", "string"]
|
1059
|
+
format: "date"
|
1060
|
+
country:
|
1061
|
+
description: "The country from which the search query originated."
|
1062
|
+
type: ["null", "string"]
|
1063
|
+
device:
|
1064
|
+
description: "The type of device used by the user (e.g., desktop, mobile)."
|
1065
|
+
type: ["null", "string"]
|
1066
|
+
page:
|
1067
|
+
description: "The page URL that appeared in the search results."
|
1068
|
+
type: ["null", "string"]
|
1069
|
+
query:
|
1070
|
+
description: "The search query entered by the user."
|
1071
|
+
type: ["null", "string"]
|
1072
|
+
clicks:
|
1073
|
+
description: "The number of times users clicked on the search result for a specific query."
|
1074
|
+
type: ["null", "integer"]
|
1075
|
+
impressions:
|
1076
|
+
description: "The number of times a search result appeared in response to a query."
|
1077
|
+
type: ["null", "integer"]
|
1078
|
+
ctr:
|
1079
|
+
description: "Click-through rate, calculated as clicks divided by impressions."
|
1080
|
+
type: ["null", "number"]
|
1081
|
+
multipleOf: 1.e-25
|
1082
|
+
position:
|
1083
|
+
description: "The average position of the search result on the search engine results page."
|
1084
|
+
type: ["null", "number"]
|
1085
|
+
multipleOf: 1.e-25
|
503
1086
|
search_analytics_by_country:
|
504
1087
|
$schema: "http://json-schema.org/draft-07/schema#"
|
505
1088
|
type: object
|
@@ -557,6 +1140,291 @@ schemas:
|
|
557
1140
|
- "null"
|
558
1141
|
- number
|
559
1142
|
multipleOf: 1.e-25
|
1143
|
+
search_analytics_by_date:
|
1144
|
+
$schema: "http://json-schema.org/draft-07/schema#"
|
1145
|
+
type: object
|
1146
|
+
properties:
|
1147
|
+
site_url:
|
1148
|
+
description: "The URL of the site for which the search analytics data is being reported."
|
1149
|
+
type: ["null", "string"]
|
1150
|
+
search_type:
|
1151
|
+
description: "The type of search query (e.g., web, image, video) that generated the search analytics data."
|
1152
|
+
type: ["null", "string"]
|
1153
|
+
date:
|
1154
|
+
description: "The date for which the search analytics data is being reported."
|
1155
|
+
type: ["null", "string"]
|
1156
|
+
format: "date"
|
1157
|
+
clicks:
|
1158
|
+
description: "The total number of times users clicked on the search result for the site URL on the specific date."
|
1159
|
+
type: ["null", "integer"]
|
1160
|
+
impressions:
|
1161
|
+
description: "The number of times the site URL was displayed in the search results to users on the specific date."
|
1162
|
+
type: ["null", "integer"]
|
1163
|
+
ctr:
|
1164
|
+
description: "The click-through rate (CTR) represents the percentage of total impressions that resulted in a click to the site URL."
|
1165
|
+
type: ["null", "number"]
|
1166
|
+
multipleOf: 1.e-25
|
1167
|
+
position:
|
1168
|
+
description: "The average position of the site URL in the search results pages for the specific date."
|
1169
|
+
type: ["null", "number"]
|
1170
|
+
multipleOf: 1.e-25
|
1171
|
+
search_analytics_by_device:
|
1172
|
+
$schema: "http://json-schema.org/draft-07/schema#"
|
1173
|
+
type: "object"
|
1174
|
+
properties:
|
1175
|
+
site_url:
|
1176
|
+
description: "The URL of the site for which search analytics data is being provided."
|
1177
|
+
type: ["null", "string"]
|
1178
|
+
search_type:
|
1179
|
+
description: "The type of search performed (e.g., web search, image search, video search)."
|
1180
|
+
type: ["null", "string"]
|
1181
|
+
date:
|
1182
|
+
description: "The date for which the search analytics data is provided."
|
1183
|
+
type: ["null", "string"]
|
1184
|
+
format: "date"
|
1185
|
+
device:
|
1186
|
+
description: "The type of device used by the user for the search query (e.g., desktop, mobile)."
|
1187
|
+
type: ["null", "string"]
|
1188
|
+
clicks:
|
1189
|
+
description: "The total number of times a user clicked on a search result linking to the target site."
|
1190
|
+
type: ["null", "integer"]
|
1191
|
+
impressions:
|
1192
|
+
description: "The total number of times a user saw a link to the target site in search results."
|
1193
|
+
type: ["null", "integer"]
|
1194
|
+
ctr:
|
1195
|
+
description: "Click-through rate represents the ratio of clicks to impressions, showing the effectiveness of your site in attracting clicks from search results."
|
1196
|
+
type: ["null", "number"]
|
1197
|
+
multipleOf: 1.e-25
|
1198
|
+
position:
|
1199
|
+
description: "The average position of the site's URLs in search results for the given query or queries."
|
1200
|
+
type: ["null", "number"]
|
1201
|
+
multipleOf: 1.e-25
|
1202
|
+
search_analytics_by_page:
|
1203
|
+
$schema: "http://json-schema.org/draft-07/schema#"
|
1204
|
+
type: object
|
1205
|
+
properties:
|
1206
|
+
site_url:
|
1207
|
+
description: "The URL of the site for which the search analytics data is being reported."
|
1208
|
+
type: ["null", "string"]
|
1209
|
+
search_type:
|
1210
|
+
description: "The type of search query that led to the page being displayed in search results."
|
1211
|
+
type: ["null", "string"]
|
1212
|
+
date:
|
1213
|
+
description: "The date for which the search analytics data is reported."
|
1214
|
+
type: ["null", "string"]
|
1215
|
+
format: "date"
|
1216
|
+
page:
|
1217
|
+
description: "The URL of the specific page being analyzed for search analytics data."
|
1218
|
+
type: ["null", "string"]
|
1219
|
+
clicks:
|
1220
|
+
description: "The number of times a user clicked on the search result linking to the page."
|
1221
|
+
type: ["null", "integer"]
|
1222
|
+
impressions:
|
1223
|
+
description: "The number of times a page from the site appeared in the search results viewed by users."
|
1224
|
+
type: ["null", "integer"]
|
1225
|
+
ctr:
|
1226
|
+
description: "Click-through rate (CTR) is the ratio of clicks to impressions, indicating the effectiveness of the page in generating clicks."
|
1227
|
+
type: ["null", "number"]
|
1228
|
+
multipleOf: 1.e-25
|
1229
|
+
position:
|
1230
|
+
description: "The average position at which the page appeared in search results."
|
1231
|
+
type: ["null", "number"]
|
1232
|
+
multipleOf: 1.e-25
|
1233
|
+
search_analytics_by_query:
|
1234
|
+
$schema: "http://json-schema.org/draft-07/schema#"
|
1235
|
+
type: object
|
1236
|
+
properties:
|
1237
|
+
site_url:
|
1238
|
+
description: "The URL of the site for which the search analytics data is captured."
|
1239
|
+
type: ["null", "string"]
|
1240
|
+
search_type:
|
1241
|
+
description: "The type of search result (e.g., web, image, video) for the specific query."
|
1242
|
+
type: ["null", "string"]
|
1243
|
+
date:
|
1244
|
+
description: "The date for which the search analytics data is recorded."
|
1245
|
+
type: ["null", "string"]
|
1246
|
+
format: "date"
|
1247
|
+
query:
|
1248
|
+
description: "The search query for which the search analytics data is recorded."
|
1249
|
+
type: ["null", "string"]
|
1250
|
+
clicks:
|
1251
|
+
description: "The number of times users clicked on the search result for the specific query."
|
1252
|
+
type: ["null", "integer"]
|
1253
|
+
impressions:
|
1254
|
+
description: "The number of times the search result was displayed for the specific query."
|
1255
|
+
type: ["null", "integer"]
|
1256
|
+
ctr:
|
1257
|
+
description: "The click-through rate (percentage) for the specific query, calculated as clicks divided by impressions."
|
1258
|
+
type: ["null", "number"]
|
1259
|
+
multipleOf: 1.e-25
|
1260
|
+
position:
|
1261
|
+
description: "The average position at which the search result appeared for the specific query."
|
1262
|
+
type: ["null", "number"]
|
1263
|
+
multipleOf: 1.e-25
|
1264
|
+
search_analytics_page_report:
|
1265
|
+
$schema: "https://json-schema.org/draft-07/schema#"
|
1266
|
+
type: object
|
1267
|
+
additionalProperties: true
|
1268
|
+
properties:
|
1269
|
+
site_url:
|
1270
|
+
description: The URL of the website for which the search analytics data is being reported.
|
1271
|
+
type:
|
1272
|
+
- "null"
|
1273
|
+
- string
|
1274
|
+
search_type:
|
1275
|
+
description: The type of search (e.g., web, image, video) that led users to the website.
|
1276
|
+
type:
|
1277
|
+
- "null"
|
1278
|
+
- string
|
1279
|
+
date:
|
1280
|
+
description: The date when the search data was recorded.
|
1281
|
+
type:
|
1282
|
+
- "null"
|
1283
|
+
- string
|
1284
|
+
format: date
|
1285
|
+
country:
|
1286
|
+
description: The country from which the search originated.
|
1287
|
+
type:
|
1288
|
+
- "null"
|
1289
|
+
- string
|
1290
|
+
page:
|
1291
|
+
description: The specific page URL within the website that appeared in search results.
|
1292
|
+
type:
|
1293
|
+
- "null"
|
1294
|
+
- string
|
1295
|
+
device:
|
1296
|
+
description: The type of device used by the user for the search query (e.g., desktop, mobile).
|
1297
|
+
type:
|
1298
|
+
- "null"
|
1299
|
+
- string
|
1300
|
+
clicks:
|
1301
|
+
description: The total number of times users clicked on search results that led to the linked website.
|
1302
|
+
type:
|
1303
|
+
- "null"
|
1304
|
+
- integer
|
1305
|
+
impressions:
|
1306
|
+
description: The total number of times a search result from the linked website was shown to users.
|
1307
|
+
type:
|
1308
|
+
- "null"
|
1309
|
+
- integer
|
1310
|
+
ctr:
|
1311
|
+
description: "Click-through rate: The percentage of clicks out of the total impressions for a given search query."
|
1312
|
+
type:
|
1313
|
+
- "null"
|
1314
|
+
- number
|
1315
|
+
multipleOf: 1.e-25
|
1316
|
+
position:
|
1317
|
+
description: The average position at which the website's search results appeared to users.
|
1318
|
+
type:
|
1319
|
+
- "null"
|
1320
|
+
- number
|
1321
|
+
multipleOf: 1.e-25
|
1322
|
+
search_analytics_site_report_by_page:
|
1323
|
+
$schema: "https://json-schema.org/draft-07/schema#"
|
1324
|
+
type: object
|
1325
|
+
additionalProperties: true
|
1326
|
+
properties:
|
1327
|
+
site_url:
|
1328
|
+
description: The URL of the page on the site that is being reported.
|
1329
|
+
type:
|
1330
|
+
- "null"
|
1331
|
+
- string
|
1332
|
+
search_type:
|
1333
|
+
description: The type of search query that led to the page being shown.
|
1334
|
+
type:
|
1335
|
+
- "null"
|
1336
|
+
- string
|
1337
|
+
date:
|
1338
|
+
description: The date for which the data is being reported.
|
1339
|
+
type:
|
1340
|
+
- "null"
|
1341
|
+
- string
|
1342
|
+
format: date
|
1343
|
+
country:
|
1344
|
+
description: The country from which the search traffic originated.
|
1345
|
+
type:
|
1346
|
+
- "null"
|
1347
|
+
- string
|
1348
|
+
device:
|
1349
|
+
description: "The type of device used by the searcher (e.g., desktop, mobile)."
|
1350
|
+
type:
|
1351
|
+
- "null"
|
1352
|
+
- string
|
1353
|
+
clicks:
|
1354
|
+
description: The total number of clicks received by the page from search results.
|
1355
|
+
type:
|
1356
|
+
- "null"
|
1357
|
+
- integer
|
1358
|
+
impressions:
|
1359
|
+
description: The total number of times the page appeared in search results.
|
1360
|
+
type:
|
1361
|
+
- "null"
|
1362
|
+
- integer
|
1363
|
+
ctr:
|
1364
|
+
description: The click-through rate, i.e., the percentage of total impressions that resulted in clicks.
|
1365
|
+
type:
|
1366
|
+
- "null"
|
1367
|
+
- number
|
1368
|
+
multipleOf: 1.e-25
|
1369
|
+
position:
|
1370
|
+
description: The average position at which the page appeared in search results.
|
1371
|
+
type:
|
1372
|
+
- "null"
|
1373
|
+
- number
|
1374
|
+
multipleOf: 1.e-25
|
1375
|
+
search_analytics_site_report_by_site:
|
1376
|
+
$schema: "https://json-schema.org/draft-07/schema#"
|
1377
|
+
type: object
|
1378
|
+
additionalProperties: true
|
1379
|
+
properties:
|
1380
|
+
site_url:
|
1381
|
+
description: The URL of the site being analyzed
|
1382
|
+
type:
|
1383
|
+
- "null"
|
1384
|
+
- string
|
1385
|
+
search_type:
|
1386
|
+
description: "The type of search (e.g., web, image, video)"
|
1387
|
+
type:
|
1388
|
+
- "null"
|
1389
|
+
- string
|
1390
|
+
date:
|
1391
|
+
description: The date of the search analytics data
|
1392
|
+
type:
|
1393
|
+
- "null"
|
1394
|
+
- string
|
1395
|
+
format: date
|
1396
|
+
country:
|
1397
|
+
description: The country where the search took place
|
1398
|
+
type:
|
1399
|
+
- "null"
|
1400
|
+
- string
|
1401
|
+
device:
|
1402
|
+
description: "The type of device used for the search (e.g., mobile, desktop)"
|
1403
|
+
type:
|
1404
|
+
- "null"
|
1405
|
+
- string
|
1406
|
+
clicks:
|
1407
|
+
description: The number of times users clicked on a search result linking to the site
|
1408
|
+
type:
|
1409
|
+
- "null"
|
1410
|
+
- integer
|
1411
|
+
impressions:
|
1412
|
+
description: The number of times the site appeared in search results
|
1413
|
+
type:
|
1414
|
+
- "null"
|
1415
|
+
- integer
|
1416
|
+
ctr:
|
1417
|
+
description: Click-through rate calculated as clicks divided by impressions
|
1418
|
+
type:
|
1419
|
+
- "null"
|
1420
|
+
- number
|
1421
|
+
multipleOf: 1.e-25
|
1422
|
+
position:
|
1423
|
+
description: The average position of the site in search results
|
1424
|
+
type:
|
1425
|
+
- "null"
|
1426
|
+
- number
|
1427
|
+
multipleOf: 1.e-25
|
560
1428
|
sites:
|
561
1429
|
$schema: "http://json-schema.org/draft-07/schema#"
|
562
1430
|
type: object
|