airbyte-source-google-search-console 1.6.0rc1__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {airbyte_source_google_search_console-1.6.0rc1.dist-info → airbyte_source_google_search_console-1.8.0.dist-info}/METADATA +1 -1
  2. airbyte_source_google_search_console-1.8.0.dist-info/RECORD +14 -0
  3. source_google_search_console/manifest.yaml +1269 -43
  4. source_google_search_console/source.py +2 -31
  5. source_google_search_console/spec.json +16 -0
  6. source_google_search_console/streams.py +0 -112
  7. airbyte_source_google_search_console-1.6.0rc1.dist-info/RECORD +0 -25
  8. source_google_search_console/schemas/search_analytics_all_fields.json +0 -53
  9. source_google_search_console/schemas/search_analytics_by_date.json +0 -37
  10. source_google_search_console/schemas/search_analytics_by_device.json +0 -41
  11. source_google_search_console/schemas/search_analytics_by_page.json +0 -41
  12. source_google_search_console/schemas/search_analytics_by_query.json +0 -41
  13. source_google_search_console/schemas/search_analytics_keyword_page_report.json +0 -54
  14. source_google_search_console/schemas/search_analytics_keyword_site_report_by_page.json +0 -50
  15. source_google_search_console/schemas/search_analytics_keyword_site_report_by_site.json +0 -50
  16. source_google_search_console/schemas/search_analytics_page_report.json +0 -50
  17. source_google_search_console/schemas/search_analytics_site_report_by_page.json +0 -46
  18. source_google_search_console/schemas/search_analytics_site_report_by_site.json +0 -46
  19. {airbyte_source_google_search_console-1.6.0rc1.dist-info → airbyte_source_google_search_console-1.8.0.dist-info}/WHEEL +0 -0
  20. {airbyte_source_google_search_console-1.6.0rc1.dist-info → airbyte_source_google_search_console-1.8.0.dist-info}/entry_points.txt +0 -0
@@ -39,14 +39,8 @@ definitions:
39
39
  Client: "#/definitions/oauth_authenticator"
40
40
  Service: "#/definitions/jwt_profile_assertion_oauth_authenticator"
41
41
 
42
- search_analytics_by_country_stream:
42
+ base_search_analytics_stream:
43
43
  type: DeclarativeStream
44
- name: search_analytics_by_country
45
- primary_key:
46
- - site_url
47
- - date
48
- - country
49
- - search_type
50
44
  retriever:
51
45
  type: SimpleRetriever
52
46
  requester:
@@ -60,13 +54,10 @@ definitions:
60
54
  request_body_json:
61
55
  startDate: "{{ stream_interval.get('start_time') }}"
62
56
  endDate: "{{ stream_interval.get('end_time') }}"
63
- dimensions: ["date", "country"]
57
+ dimensions: "{{ parameters['dimensions'] }}"
64
58
  type: "{{ stream_partition.get('search_type') }}"
65
- aggregationType: auto
59
+ aggregationType: "{{ 'auto' if config.get('always_use_aggregation_type_auto') else parameters.get('aggregationType') }}"
66
60
  dataState: "{{ config.get('data_state', 'final') }}"
67
- # Currently relying on the default error handler behavior. Two pieces of functionality not covered are
68
- # - Silently skipping over 403 permissions errors and relying on partial success reporting
69
- # - Retrying 400 errors with aggregation_type=auto instead of failing outright
70
61
  paginator:
71
62
  type: DefaultPaginator
72
63
  page_token_option:
@@ -92,13 +83,7 @@ definitions:
92
83
  values: "{{ config['site_urls'] }}"
93
84
  cursor_field: site_url
94
85
  - type: ListPartitionRouter
95
- values:
96
- - web
97
- - news
98
- - image
99
- - video
100
- - discover
101
- - googleNews
86
+ values: "{{ parameters['search_types'] }}"
102
87
  cursor_field: search_type
103
88
  incremental_sync:
104
89
  type: DatetimeBasedCursor
@@ -116,6 +101,76 @@ definitions:
116
101
  datetime_format: "%Y-%m-%d"
117
102
  step: P3D
118
103
  cursor_granularity: P1D
104
+ state_migrations:
105
+ - type: CustomStateMigration
106
+ class_name: source_google_search_console.components.NestedSubstreamStateMigration
107
+
108
+ search_analytics_all_fields_stream:
109
+ $ref: "#/definitions/base_search_analytics_stream"
110
+ name: search_analytics_all_fields
111
+ primary_key:
112
+ - site_url
113
+ - date
114
+ - country
115
+ - device
116
+ - query
117
+ - page
118
+ - search_type
119
+ transformations:
120
+ - type: AddFields
121
+ fields:
122
+ - path:
123
+ - site_url
124
+ value: "{{ stream_partition['site_url'] }}"
125
+ - path:
126
+ - search_type
127
+ value: "{{ stream_partition['search_type'] }}"
128
+ - type: AddFields
129
+ fields:
130
+ - path:
131
+ - date
132
+ value: "{{ record['keys'][0] }}"
133
+ - path:
134
+ - country
135
+ value: "{{ record['keys'][1] }}"
136
+ - path:
137
+ - device
138
+ value: "{{ record['keys'][2] }}"
139
+ - path:
140
+ - page
141
+ value: "{{ record['keys'][3] }}"
142
+ - path:
143
+ - query
144
+ value: "{{ record['keys'][4] }}"
145
+ - type: RemoveFields
146
+ field_pointers:
147
+ - - keys
148
+ schema_loader:
149
+ type: InlineSchemaLoader
150
+ schema:
151
+ $ref: "#/schemas/search_analytics_all_fields"
152
+ $parameters:
153
+ dimensions:
154
+ - date
155
+ - country
156
+ - device
157
+ - page
158
+ - query
159
+ aggregationType: auto
160
+ search_types:
161
+ - web
162
+ - news
163
+ - image
164
+ - video
165
+
166
+ search_analytics_by_country_stream:
167
+ $ref: "#/definitions/base_search_analytics_stream"
168
+ name: search_analytics_by_country
169
+ primary_key:
170
+ - site_url
171
+ - date
172
+ - country
173
+ - search_type
119
174
  transformations:
120
175
  - type: AddFields
121
176
  fields:
@@ -144,9 +199,345 @@ definitions:
144
199
  type: InlineSchemaLoader
145
200
  schema:
146
201
  $ref: "#/schemas/search_analytics_by_country"
147
- state_migrations:
148
- - type: CustomStateMigration
149
- class_name: source_google_search_console.components.NestedSubstreamStateMigration
202
+ $parameters:
203
+ dimensions:
204
+ - date
205
+ - country
206
+ aggregationType: auto
207
+ search_types:
208
+ - web
209
+ - news
210
+ - image
211
+ - video
212
+ - discover
213
+ - googleNews
214
+
215
+ search_analytics_by_date_stream:
216
+ $ref: "#/definitions/base_search_analytics_stream"
217
+ name: search_analytics_by_date
218
+ primary_key:
219
+ - site_url
220
+ - date
221
+ - search_type
222
+ transformations:
223
+ - type: AddFields
224
+ fields:
225
+ - path:
226
+ - site_url
227
+ value: "{{ stream_partition['site_url'] }}"
228
+ - path:
229
+ - search_type
230
+ value: "{{ stream_partition['search_type'] }}"
231
+ - type: AddFields
232
+ fields:
233
+ - path:
234
+ - date
235
+ value: "{{ record['keys'][0] }}"
236
+ - type: RemoveFields
237
+ field_pointers:
238
+ - - keys
239
+ schema_loader:
240
+ type: InlineSchemaLoader
241
+ schema:
242
+ $ref: "#/schemas/search_analytics_by_date"
243
+ $parameters:
244
+ dimensions:
245
+ - date
246
+ aggregationType: auto
247
+ search_types:
248
+ - web
249
+ - news
250
+ - image
251
+ - video
252
+ - discover
253
+ - googleNews
254
+
255
+ search_analytics_by_device_stream:
256
+ $ref: "#/definitions/base_search_analytics_stream"
257
+ name: search_analytics_by_device
258
+ primary_key:
259
+ - site_url
260
+ - date
261
+ - device
262
+ - search_type
263
+ transformations:
264
+ - type: AddFields
265
+ fields:
266
+ - path:
267
+ - site_url
268
+ value: "{{ stream_partition['site_url'] }}"
269
+ - path:
270
+ - search_type
271
+ value: "{{ stream_partition['search_type'] }}"
272
+ # The values in the 'keys' array in the record correspond to the same order that the dimensions
273
+ # are requested in the API request. For example, if the request body was `dimensions: ["date", "device"]`,
274
+ # then the first value of `keys` is placed under the `date` field. These arrays are always be the same length
275
+ # After extracting the keys, the `keys` array is removed from the record.
276
+ - type: AddFields
277
+ fields:
278
+ - path:
279
+ - date
280
+ value: "{{ record['keys'][0] }}"
281
+ - path:
282
+ - device
283
+ value: "{{ record['keys'][1] }}"
284
+ - type: RemoveFields
285
+ field_pointers:
286
+ - - keys
287
+ schema_loader:
288
+ type: InlineSchemaLoader
289
+ schema:
290
+ $ref: "#/schemas/search_analytics_by_device"
291
+ $parameters:
292
+ dimensions:
293
+ - date
294
+ - device
295
+ aggregationType: auto
296
+ search_types:
297
+ - web
298
+ - news
299
+ - image
300
+ - video
301
+ - googleNews
302
+
303
+ search_analytics_by_page_stream:
304
+ $ref: "#/definitions/base_search_analytics_stream"
305
+ name: search_analytics_by_page
306
+ primary_key:
307
+ - site_url
308
+ - date
309
+ - page
310
+ - search_type
311
+ transformations:
312
+ - type: AddFields
313
+ fields:
314
+ - path:
315
+ - site_url
316
+ value: "{{ stream_partition['site_url'] }}"
317
+ - path:
318
+ - search_type
319
+ value: "{{ stream_partition['search_type'] }}"
320
+ - type: AddFields
321
+ fields:
322
+ - path:
323
+ - date
324
+ value: "{{ record['keys'][0] }}"
325
+ - path:
326
+ - page
327
+ value: "{{ record['keys'][1] }}"
328
+ - type: RemoveFields
329
+ field_pointers:
330
+ - - keys
331
+ schema_loader:
332
+ type: InlineSchemaLoader
333
+ schema:
334
+ $ref: "#/schemas/search_analytics_by_page"
335
+ $parameters:
336
+ dimensions:
337
+ - date
338
+ - page
339
+ aggregationType: auto
340
+ search_types:
341
+ - web
342
+ - news
343
+ - image
344
+ - video
345
+ - discover
346
+ - googleNews
347
+
348
+ search_analytics_by_query_stream:
349
+ $ref: "#/definitions/base_search_analytics_stream"
350
+ name: search_analytics_by_query
351
+ primary_key:
352
+ - site_url
353
+ - date
354
+ - query
355
+ - search_type
356
+ transformations:
357
+ - type: AddFields
358
+ fields:
359
+ - path:
360
+ - site_url
361
+ value: "{{ stream_partition['site_url'] }}"
362
+ - path:
363
+ - search_type
364
+ value: "{{ stream_partition['search_type'] }}"
365
+ - type: AddFields
366
+ fields:
367
+ - path:
368
+ - date
369
+ value: "{{ record['keys'][0] }}"
370
+ - path:
371
+ - query
372
+ value: "{{ record['keys'][1] }}"
373
+ - type: RemoveFields
374
+ field_pointers:
375
+ - - keys
376
+ schema_loader:
377
+ type: InlineSchemaLoader
378
+ schema:
379
+ $ref: "#/schemas/search_analytics_by_query"
380
+ $parameters:
381
+ dimensions:
382
+ - date
383
+ - query
384
+ aggregationType: auto
385
+ search_types:
386
+ - web
387
+ - news
388
+ - image
389
+ - video
390
+
391
+ search_analytics_page_report_stream:
392
+ $ref: "#/definitions/base_search_analytics_stream"
393
+ name: search_analytics_page_report
394
+ primary_key:
395
+ - site_url
396
+ - date
397
+ - country
398
+ - device
399
+ - search_type
400
+ - page
401
+ transformations:
402
+ - type: AddFields
403
+ fields:
404
+ - path:
405
+ - site_url
406
+ value: "{{ stream_partition.get('site_url') }}"
407
+ - path:
408
+ - search_type
409
+ value: "{{ stream_partition.get('search_type') }}"
410
+ - type: AddFields
411
+ fields:
412
+ - path:
413
+ - date
414
+ value: "{{ record['keys'][0] }}"
415
+ - path:
416
+ - country
417
+ value: "{{ record['keys'][1] }}"
418
+ - path:
419
+ - device
420
+ value: "{{ record['keys'][2] }}"
421
+ - path:
422
+ - page
423
+ value: "{{ record['keys'][3] }}"
424
+ - type: RemoveFields
425
+ field_pointers:
426
+ - - keys
427
+ schema_loader:
428
+ type: InlineSchemaLoader
429
+ schema:
430
+ $ref: "#/schemas/search_analytics_page_report"
431
+ $parameters:
432
+ dimensions:
433
+ - date
434
+ - country
435
+ - device
436
+ - page
437
+ search_types:
438
+ - web
439
+ - news
440
+ - image
441
+ - video
442
+ - googleNews
443
+
444
+ search_analytics_site_report_by_page_stream:
445
+ $ref: "#/definitions/base_search_analytics_stream"
446
+ name: search_analytics_site_report_by_page
447
+ primary_key:
448
+ - site_url
449
+ - date
450
+ - country
451
+ - device
452
+ - search_type
453
+ transformations:
454
+ - type: AddFields
455
+ fields:
456
+ - path:
457
+ - site_url
458
+ value: "{{ stream_partition.get('site_url') }}"
459
+ - path:
460
+ - search_type
461
+ value: "{{ stream_partition.get('search_type') }}"
462
+ - type: AddFields
463
+ fields:
464
+ - path:
465
+ - date
466
+ value: "{{ record['keys'][0] }}"
467
+ - path:
468
+ - country
469
+ value: "{{ record['keys'][1] }}"
470
+ - path:
471
+ - device
472
+ value: "{{ record['keys'][2] }}"
473
+ - type: RemoveFields
474
+ field_pointers:
475
+ - - keys
476
+ schema_loader:
477
+ type: InlineSchemaLoader
478
+ schema:
479
+ $ref: "#/schemas/search_analytics_site_report_by_page"
480
+ $parameters:
481
+ aggregationType: byPage
482
+ dimensions:
483
+ - date
484
+ - country
485
+ - device
486
+ search_types:
487
+ - web
488
+ - news
489
+ - image
490
+ - video
491
+ - googleNews
492
+
493
+ search_analytics_site_report_by_site_stream:
494
+ $ref: "#/definitions/base_search_analytics_stream"
495
+ name: search_analytics_site_report_by_site
496
+ primary_key:
497
+ - site_url
498
+ - date
499
+ - country
500
+ - device
501
+ - search_type
502
+ transformations:
503
+ - type: AddFields
504
+ fields:
505
+ - path:
506
+ - site_url
507
+ value: "{{ stream_partition.get('site_url') }}"
508
+ - path:
509
+ - search_type
510
+ value: "{{ stream_partition.get('search_type') }}"
511
+ - type: AddFields
512
+ fields:
513
+ - path:
514
+ - date
515
+ value: "{{ record['keys'][0] }}"
516
+ - path:
517
+ - country
518
+ value: "{{ record['keys'][1] }}"
519
+ - path:
520
+ - device
521
+ value: "{{ record['keys'][2] }}"
522
+ - type: RemoveFields
523
+ field_pointers:
524
+ - - keys
525
+ schema_loader:
526
+ type: InlineSchemaLoader
527
+ schema:
528
+ $ref: "#/schemas/search_analytics_site_report_by_site"
529
+ $parameters:
530
+ aggregationType: byProperty
531
+ dimensions:
532
+ - date
533
+ - country
534
+ - device
535
+ search_types:
536
+ - web
537
+ - news
538
+ - image
539
+ - video
540
+ - googleNews
150
541
 
151
542
  sites_stream:
152
543
  type: DeclarativeStream
@@ -199,65 +590,708 @@ definitions:
199
590
  schema:
200
591
  $ref: "#/schemas/sitemaps"
201
592
 
202
- streams:
203
- - "#/definitions/search_analytics_by_country_stream"
204
- - "#/definitions/sites_stream"
593
+ # This stream is only used as a parent stream for search_by_keyword substreams
594
+ search_appearances_stream:
595
+ type: DeclarativeStream
596
+ name: search_appearances
597
+ retriever:
598
+ type: SimpleRetriever
599
+ requester:
600
+ type: HttpRequester
601
+ url_base: https://www.googleapis.com/webmasters/v3
602
+ path: "/sites/{{ sanitize_url(stream_partition.get('site_url')) }}/searchAnalytics/query"
603
+ http_method: POST
604
+ authenticator: "#/definitions/selective_authenticator"
605
+ request_headers:
606
+ Content-Type: "application/json"
607
+ request_body_json:
608
+ startDate: "{{ config.get('start_date') }}"
609
+ endDate: "{{ config.get('end_date') }}"
610
+ dimensions: ["searchAppearance"]
611
+ type: "{{ stream_partition.get('search_type') }}"
612
+ aggregationType: auto
613
+ dataState: "{{ config.get('data_state', 'final') }}"
614
+ paginator:
615
+ type: DefaultPaginator
616
+ page_token_option:
617
+ type: RequestOption
618
+ field_name: startRow
619
+ inject_into: body_json
620
+ page_size_option:
621
+ type: RequestOption
622
+ field_name: rowLimit
623
+ inject_into: body_json
624
+ pagination_strategy:
625
+ type: OffsetIncrement
626
+ page_size: 25000
627
+ inject_on_first_request: true
628
+ record_selector:
629
+ type: RecordSelector
630
+ extractor:
631
+ type: DpathExtractor
632
+ field_path:
633
+ - rows
634
+ partition_router:
635
+ - type: ListPartitionRouter
636
+ values: "{{ config['site_urls'] }}"
637
+ cursor_field: site_url
638
+ - type: ListPartitionRouter
639
+ values:
640
+ - web
641
+ - news
642
+ - image
643
+ - video
644
+ cursor_field: search_type
645
+ transformations:
646
+ - type: AddFields
647
+ fields:
648
+ - path:
649
+ - searchAppearance
650
+ value: "{{ record['keys'][0] }}"
651
+ - type: RemoveFields
652
+ field_pointers:
653
+ - - keys
654
+
655
+ base_search_analytics_keyword_stream:
656
+ type: DeclarativeStream
657
+ retriever:
658
+ type: SimpleRetriever
659
+ requester:
660
+ type: HttpRequester
661
+ url_base: https://www.googleapis.com/webmasters/v3
662
+ path: "/sites/{{ sanitize_url(stream_partition.get('site_url')) }}/searchAnalytics/query"
663
+ http_method: POST
664
+ authenticator: "#/definitions/selective_authenticator"
665
+ request_headers:
666
+ Content-Type: "application/json"
667
+ request_body_json:
668
+ startDate: "{{ stream_interval.get('start_time') }}"
669
+ endDate: "{{ stream_interval.get('end_time') }}"
670
+ dimensions: "{{ parameters.get('dimensions') }}"
671
+ type: "{{ stream_partition.get('parent_slice', {}).get('search_type') }}"
672
+ aggregationType: "{{ 'auto' if config.get('always_use_aggregation_type_auto') else parameters.get('aggregationType') }}"
673
+ dataState: "{{ config.get('data_state', 'final') }}"
674
+ dimensionFilterGroups: "{{ [{'groupType': 'and', 'filters': {'dimension': 'searchAppearance', 'operator': 'equals', 'expression': stream_partition.get('search_appearance')}}] }}"
675
+ error_handler:
676
+ type: DefaultErrorHandler
677
+ response_filters:
678
+ - type: HttpResponseFilter
679
+ action: FAIL
680
+ http_codes:
681
+ - 400
682
+ error_message: >-
683
+ Invalid aggregationType '{{ parameters.get('aggregationType') }}' used in the body of the API request. If you see this error, enable the
684
+ 'always_use_aggregation_type_auto' config setting which will automatically use aggregationType=auto
685
+ paginator:
686
+ type: DefaultPaginator
687
+ page_token_option:
688
+ type: RequestOption
689
+ field_name: startRow
690
+ inject_into: body_json
691
+ page_size_option:
692
+ type: RequestOption
693
+ field_name: rowLimit
694
+ inject_into: body_json
695
+ pagination_strategy:
696
+ type: OffsetIncrement
697
+ page_size: 25000
698
+ inject_on_first_request: true
699
+ record_selector:
700
+ type: RecordSelector
701
+ extractor:
702
+ type: DpathExtractor
703
+ field_path:
704
+ - rows
705
+ partition_router:
706
+ - type: ListPartitionRouter
707
+ values: "{{ config['site_urls'] }}"
708
+ cursor_field: site_url
709
+ - type: SubstreamPartitionRouter
710
+ parent_stream_configs:
711
+ - type: ParentStreamConfig
712
+ parent_key: searchAppearance
713
+ partition_field: search_appearance
714
+ stream:
715
+ $ref: "#/definitions/search_appearances_stream"
716
+ incremental_sync:
717
+ type: DatetimeBasedCursor
718
+ cursor_field: date
719
+ cursor_datetime_formats:
720
+ - "%Y-%m-%d"
721
+ datetime_format: "%Y-%m-%d"
722
+ start_datetime:
723
+ type: MinMaxDatetime
724
+ datetime: "{{ config.get('start_date', '2021-01-01') }}"
725
+ datetime_format: "%Y-%m-%d"
726
+ end_datetime:
727
+ type: MinMaxDatetime
728
+ datetime: "{{ config.get('end_date', today_utc()) }}"
729
+ datetime_format: "%Y-%m-%d"
730
+ step: P3D
731
+ cursor_granularity: P1D
732
+
733
+ search_analytics_keyword_page_report_stream:
734
+ $ref: "#/definitions/base_search_analytics_keyword_stream"
735
+ name: search_analytics_keyword_page_report
736
+ primary_key:
737
+ - site_url
738
+ - date
739
+ - country
740
+ - device
741
+ - query
742
+ - page
743
+ - search_type
744
+ transformations:
745
+ - type: AddFields
746
+ fields:
747
+ - path:
748
+ - site_url
749
+ value: "{{ stream_partition['site_url'] }}"
750
+ - path:
751
+ - search_type
752
+ value: "{{ stream_partition.get('parent_slice', {}).get('search_type') }}"
753
+ - type: AddFields
754
+ fields:
755
+ - path:
756
+ - date
757
+ value: "{{ record['keys'][0] }}"
758
+ - path:
759
+ - country
760
+ value: "{{ record['keys'][1] }}"
761
+ - path:
762
+ - device
763
+ value: "{{ record['keys'][2] }}"
764
+ - path:
765
+ - query
766
+ value: "{{ record['keys'][3] }}"
767
+ - path:
768
+ - page
769
+ value: "{{ record['keys'][4] }}"
770
+ - type: RemoveFields
771
+ field_pointers:
772
+ - - keys
773
+ schema_loader:
774
+ type: InlineSchemaLoader
775
+ schema:
776
+ $ref: "#/schemas/search_analytics_keyword_page_report"
777
+ $parameters:
778
+ aggregationType: auto
779
+ dimensions:
780
+ - date
781
+ - country
782
+ - device
783
+ - query
784
+ - page
785
+
786
+ search_analytics_keyword_site_report_by_page_stream:
787
+ $ref: "#/definitions/base_search_analytics_keyword_stream"
788
+ name: search_analytics_keyword_site_report_by_page
789
+ primary_key:
790
+ - site_url
791
+ - date
792
+ - country
793
+ - device
794
+ - query
795
+ - search_type
796
+ transformations:
797
+ - type: AddFields
798
+ fields:
799
+ - path:
800
+ - site_url
801
+ value: "{{ stream_partition['site_url'] }}"
802
+ - path:
803
+ - search_type
804
+ value: "{{ stream_partition.get('parent_slice', {}).get('search_type') }}"
805
+ - type: AddFields
806
+ fields:
807
+ - path:
808
+ - date
809
+ value: "{{ record['keys'][0] }}"
810
+ - path:
811
+ - country
812
+ value: "{{ record['keys'][1] }}"
813
+ - path:
814
+ - device
815
+ value: "{{ record['keys'][2] }}"
816
+ - path:
817
+ - query
818
+ value: "{{ record['keys'][3] }}"
819
+ - type: RemoveFields
820
+ field_pointers:
821
+ - - keys
822
+ schema_loader:
823
+ type: InlineSchemaLoader
824
+ schema:
825
+ $ref: "#/schemas/search_analytics_keyword_site_report_by_page"
826
+ $parameters:
827
+ aggregationType: byPage
828
+ dimensions:
829
+ - date
830
+ - country
831
+ - device
832
+ - query
833
+
834
+ search_analytics_keyword_site_report_by_site_stream:
835
+ $ref: "#/definitions/base_search_analytics_keyword_stream"
836
+ name: search_analytics_keyword_site_report_by_site
837
+ primary_key:
838
+ - site_url
839
+ - date
840
+ - country
841
+ - device
842
+ - query
843
+ - search_type
844
+ transformations:
845
+ - type: AddFields
846
+ fields:
847
+ - path:
848
+ - site_url
849
+ value: "{{ stream_partition['site_url'] }}"
850
+ - path:
851
+ - search_type
852
+ value: "{{ stream_partition.get('parent_slice', {}).get('search_type') }}"
853
+ - type: AddFields
854
+ fields:
855
+ - path:
856
+ - date
857
+ value: "{{ record['keys'][0] }}"
858
+ - path:
859
+ - country
860
+ value: "{{ record['keys'][1] }}"
861
+ - path:
862
+ - device
863
+ value: "{{ record['keys'][2] }}"
864
+ - path:
865
+ - query
866
+ value: "{{ record['keys'][3] }}"
867
+ - type: RemoveFields
868
+ field_pointers:
869
+ - - keys
870
+ schema_loader:
871
+ type: InlineSchemaLoader
872
+ schema:
873
+ $ref: "#/schemas/search_analytics_keyword_site_report_by_site"
874
+ $parameters:
875
+ aggregationType: byProperty
876
+ dimensions:
877
+ - date
878
+ - country
879
+ - device
880
+
881
+ streams:
882
+ # Regular streams
883
+ - "#/definitions/sites_stream"
205
884
  - "#/definitions/sitemaps_stream"
885
+ # Search Analytics streams
886
+ - "#/definitions/search_analytics_all_fields_stream"
887
+ - "#/definitions/search_analytics_by_country_stream"
888
+ - "#/definitions/search_analytics_by_date_stream"
889
+ - "#/definitions/search_analytics_by_device_stream"
890
+ - "#/definitions/search_analytics_by_page_stream"
891
+ - "#/definitions/search_analytics_by_query_stream"
892
+ - "#/definitions/search_analytics_page_report_stream"
893
+ - "#/definitions/search_analytics_site_report_by_page_stream"
894
+ - "#/definitions/search_analytics_site_report_by_site_stream"
895
+ # Search Analytics Keyword streams
896
+ - "#/definitions/search_analytics_keyword_page_report_stream"
897
+ - "#/definitions/search_analytics_keyword_site_report_by_page_stream"
898
+ - "#/definitions/search_analytics_keyword_site_report_by_site_stream"
899
+
900
+ # Google Search Console has three layers of quotas that dictate rate limiting at the
901
+ # user making requests, site being requested, and developer console key used.
902
+ # https://developers.google.com/webmaster-tools/limits#qps-quota
903
+ # - Per Site Quota: 1,200 req/min (20 req/sec)
904
+ # - Per User Quota: 1,200 req/min (20 req/sec)
905
+ # - Per Project Quota: 30,000,000 req/day (350 req/sec) / 40,000 req/min (60 req/sec)
906
+ #
907
+ # The most likely upper bound is based on the user quota since it is the lowest and the
908
+ # same authenticated user account may hit multiple site urls. The default is set to 40
909
+ # which equates to one request every 2 seconds which seems like a fair baseline.
910
+ #
911
+ concurrency_level:
912
+ type: ConcurrencyLevel
913
+ default_concurrency: "{{ config.get('num_workers', 40) }}"
914
+ max_concurrency: 100
206
915
 
207
916
  schemas:
917
+ search_analytics_all_fields:
918
+ $schema: "http://json-schema.org/draft-07/schema#"
919
+ type: object
920
+ properties:
921
+ site_url:
922
+ description: "The URL of the site from which the data originates."
923
+ type: ["null", "string"]
924
+ search_type:
925
+ description: "The type of search (e.g., web, image, video) that triggered the search result."
926
+ type: ["null", "string"]
927
+ date:
928
+ description: "The date when the search query occurred."
929
+ type: ["null", "string"]
930
+ format: "date"
931
+ country:
932
+ description: "The country from which the search query originated."
933
+ type: ["null", "string"]
934
+ device:
935
+ description: "The type of device used by the user (e.g., desktop, mobile)."
936
+ type: ["null", "string"]
937
+ page:
938
+ description: "The page URL that appeared in the search results."
939
+ type: ["null", "string"]
940
+ query:
941
+ description: "The search query entered by the user."
942
+ type: ["null", "string"]
943
+ clicks:
944
+ description: "The number of times users clicked on the search result for a specific query."
945
+ type: ["null", "integer"]
946
+ impressions:
947
+ description: "The number of times a search result appeared in response to a query."
948
+ type: ["null", "integer"]
949
+ ctr:
950
+ description: "Click-through rate, calculated as clicks divided by impressions."
951
+ type: ["null", "number"]
952
+ multipleOf: 1.e-25
953
+ position:
954
+ description: "The average position of the search result on the search engine results page."
955
+ type: ["null", "number"]
956
+ multipleOf: 1.e-25
208
957
  search_analytics_by_country:
209
958
  $schema: "http://json-schema.org/draft-07/schema#"
210
959
  type: object
211
960
  properties:
212
961
  site_url:
213
- description: The URL of the site for which the search analytics data is being reported.
962
+ description: The URL of the site for which the search analytics data is being reported.
963
+ type:
964
+ - "null"
965
+ - string
966
+ search_type:
967
+ description: >-
968
+ The type of search (web search, image search, video search, etc.) for
969
+ which the data is being reported.
970
+ type:
971
+ - "null"
972
+ - string
973
+ date:
974
+ description: The date for which the search analytics data is being reported.
975
+ type:
976
+ - "null"
977
+ - string
978
+ format: date
979
+ country:
980
+ description: The country for which the search analytics data is being reported.
981
+ type:
982
+ - "null"
983
+ - string
984
+ clicks:
985
+ description: >-
986
+ The number of times users clicked on the search result for a specific
987
+ country.
988
+ type:
989
+ - "null"
990
+ - integer
991
+ impressions:
992
+ description: >-
993
+ The total number of times a search result was shown in search results for
994
+ a specific country.
995
+ type:
996
+ - "null"
997
+ - integer
998
+ ctr:
999
+ description: >-
1000
+ The click-through rate, i.e., the ratio of clicks to impressions for a
1001
+ specific country.
1002
+ type:
1003
+ - "null"
1004
+ - number
1005
+ multipleOf: 1.e-25
1006
+ position:
1007
+ description: >-
1008
+ The average position at which the site's search result appeared for a
1009
+ specific country.
1010
+ type:
1011
+ - "null"
1012
+ - number
1013
+ multipleOf: 1.e-25
1014
+ search_analytics_by_date:
1015
+ $schema: "http://json-schema.org/draft-07/schema#"
1016
+ type: object
1017
+ properties:
1018
+ site_url:
1019
+ description: "The URL of the site for which the search analytics data is being reported."
1020
+ type: ["null", "string"]
1021
+ search_type:
1022
+ description: "The type of search query (e.g., web, image, video) that generated the search analytics data."
1023
+ type: ["null", "string"]
1024
+ date:
1025
+ description: "The date for which the search analytics data is being reported."
1026
+ type: ["null", "string"]
1027
+ format: "date"
1028
+ clicks:
1029
+ description: "The total number of times users clicked on the search result for the site URL on the specific date."
1030
+ type: ["null", "integer"]
1031
+ impressions:
1032
+ description: "The number of times the site URL was displayed in the search results to users on the specific date."
1033
+ type: ["null", "integer"]
1034
+ ctr:
1035
+ description: "The click-through rate (CTR) represents the percentage of total impressions that resulted in a click to the site URL."
1036
+ type: ["null", "number"]
1037
+ multipleOf: 1.e-25
1038
+ position:
1039
+ description: "The average position of the site URL in the search results pages for the specific date."
1040
+ type: ["null", "number"]
1041
+ multipleOf: 1.e-25
1042
+ search_analytics_by_device:
1043
+ $schema: "http://json-schema.org/draft-07/schema#"
1044
+ type: "object"
1045
+ properties:
1046
+ site_url:
1047
+ description: "The URL of the site for which search analytics data is being provided."
1048
+ type: ["null", "string"]
1049
+ search_type:
1050
+ description: "The type of search performed (e.g., web search, image search, video search)."
1051
+ type: ["null", "string"]
1052
+ date:
1053
+ description: "The date for which the search analytics data is provided."
1054
+ type: ["null", "string"]
1055
+ format: "date"
1056
+ device:
1057
+ description: "The type of device used by the user for the search query (e.g., desktop, mobile)."
1058
+ type: ["null", "string"]
1059
+ clicks:
1060
+ description: "The total number of times a user clicked on a search result linking to the target site."
1061
+ type: ["null", "integer"]
1062
+ impressions:
1063
+ description: "The total number of times a user saw a link to the target site in search results."
1064
+ type: ["null", "integer"]
1065
+ ctr:
1066
+ description: "Click-through rate represents the ratio of clicks to impressions, showing the effectiveness of your site in attracting clicks from search results."
1067
+ type: ["null", "number"]
1068
+ multipleOf: 1.e-25
1069
+ position:
1070
+ description: "The average position of the site's URLs in search results for the given query or queries."
1071
+ type: ["null", "number"]
1072
+ multipleOf: 1.e-25
1073
+ search_analytics_by_page:
1074
+ $schema: "http://json-schema.org/draft-07/schema#"
1075
+ type: object
1076
+ properties:
1077
+ site_url:
1078
+ description: "The URL of the site for which the search analytics data is being reported."
1079
+ type: ["null", "string"]
1080
+ search_type:
1081
+ description: "The type of search query that led to the page being displayed in search results."
1082
+ type: ["null", "string"]
1083
+ date:
1084
+ description: "The date for which the search analytics data is reported."
1085
+ type: ["null", "string"]
1086
+ format: "date"
1087
+ page:
1088
+ description: "The URL of the specific page being analyzed for search analytics data."
1089
+ type: ["null", "string"]
1090
+ clicks:
1091
+ description: "The number of times a user clicked on the search result linking to the page."
1092
+ type: ["null", "integer"]
1093
+ impressions:
1094
+ description: "The number of times a page from the site appeared in the search results viewed by users."
1095
+ type: ["null", "integer"]
1096
+ ctr:
1097
+ description: "Click-through rate (CTR) is the ratio of clicks to impressions, indicating the effectiveness of the page in generating clicks."
1098
+ type: ["null", "number"]
1099
+ multipleOf: 1.e-25
1100
+ position:
1101
+ description: "The average position at which the page appeared in search results."
1102
+ type: ["null", "number"]
1103
+ multipleOf: 1.e-25
1104
+ search_analytics_by_query:
1105
+ $schema: "http://json-schema.org/draft-07/schema#"
1106
+ type: object
1107
+ properties:
1108
+ site_url:
1109
+ description: "The URL of the site for which the search analytics data is captured."
1110
+ type: ["null", "string"]
1111
+ search_type:
1112
+ description: "The type of search result (e.g., web, image, video) for the specific query."
1113
+ type: ["null", "string"]
1114
+ date:
1115
+ description: "The date for which the search analytics data is recorded."
1116
+ type: ["null", "string"]
1117
+ format: "date"
1118
+ query:
1119
+ description: "The search query for which the search analytics data is recorded."
1120
+ type: ["null", "string"]
1121
+ clicks:
1122
+ description: "The number of times users clicked on the search result for the specific query."
1123
+ type: ["null", "integer"]
1124
+ impressions:
1125
+ description: "The number of times the search result was displayed for the specific query."
1126
+ type: ["null", "integer"]
1127
+ ctr:
1128
+ description: "The click-through rate (percentage) for the specific query, calculated as clicks divided by impressions."
1129
+ type: ["null", "number"]
1130
+ multipleOf: 1.e-25
1131
+ position:
1132
+ description: "The average position at which the search result appeared for the specific query."
1133
+ type: ["null", "number"]
1134
+ multipleOf: 1.e-25
1135
+ search_analytics_page_report:
1136
+ $schema: "https://json-schema.org/draft-07/schema#"
1137
+ type: object
1138
+ additionalProperties: true
1139
+ properties:
1140
+ site_url:
1141
+ description: The URL of the website for which the search analytics data is being reported.
214
1142
  type:
215
1143
  - "null"
216
1144
  - string
217
1145
  search_type:
218
- description: >-
219
- The type of search (web search, image search, video search, etc.) for
220
- which the data is being reported.
1146
+ description: The type of search (e.g., web, image, video) that led users to the website.
221
1147
  type:
222
1148
  - "null"
223
1149
  - string
224
1150
  date:
225
- description: The date for which the search analytics data is being reported.
1151
+ description: The date when the search data was recorded.
226
1152
  type:
227
1153
  - "null"
228
1154
  - string
229
1155
  format: date
230
1156
  country:
231
- description: The country for which the search analytics data is being reported.
1157
+ description: The country from which the search originated.
1158
+ type:
1159
+ - "null"
1160
+ - string
1161
+ page:
1162
+ description: The specific page URL within the website that appeared in search results.
1163
+ type:
1164
+ - "null"
1165
+ - string
1166
+ device:
1167
+ description: The type of device used by the user for the search query (e.g., desktop, mobile).
232
1168
  type:
233
1169
  - "null"
234
1170
  - string
235
1171
  clicks:
236
- description: >-
237
- The number of times users clicked on the search result for a specific
238
- country.
1172
+ description: The total number of times users clicked on search results that led to the linked website.
239
1173
  type:
240
1174
  - "null"
241
1175
  - integer
242
1176
  impressions:
243
- description: >-
244
- The total number of times a search result was shown in search results for
245
- a specific country.
1177
+ description: The total number of times a search result from the linked website was shown to users.
246
1178
  type:
247
1179
  - "null"
248
1180
  - integer
249
1181
  ctr:
250
- description: >-
251
- The click-through rate, i.e., the ratio of clicks to impressions for a
252
- specific country.
1182
+ description: "Click-through rate: The percentage of clicks out of the total impressions for a given search query."
253
1183
  type:
254
1184
  - "null"
255
1185
  - number
256
1186
  multipleOf: 1.e-25
257
1187
  position:
258
- description: >-
259
- The average position at which the site's search result appeared for a
260
- specific country.
1188
+ description: The average position at which the website's search results appeared to users.
1189
+ type:
1190
+ - "null"
1191
+ - number
1192
+ multipleOf: 1.e-25
1193
+ search_analytics_site_report_by_page:
1194
+ $schema: "https://json-schema.org/draft-07/schema#"
1195
+ type: object
1196
+ additionalProperties: true
1197
+ properties:
1198
+ site_url:
1199
+ description: The URL of the page on the site that is being reported.
1200
+ type:
1201
+ - "null"
1202
+ - string
1203
+ search_type:
1204
+ description: The type of search query that led to the page being shown.
1205
+ type:
1206
+ - "null"
1207
+ - string
1208
+ date:
1209
+ description: The date for which the data is being reported.
1210
+ type:
1211
+ - "null"
1212
+ - string
1213
+ format: date
1214
+ country:
1215
+ description: The country from which the search traffic originated.
1216
+ type:
1217
+ - "null"
1218
+ - string
1219
+ device:
1220
+ description: "The type of device used by the searcher (e.g., desktop, mobile)."
1221
+ type:
1222
+ - "null"
1223
+ - string
1224
+ clicks:
1225
+ description: The total number of clicks received by the page from search results.
1226
+ type:
1227
+ - "null"
1228
+ - integer
1229
+ impressions:
1230
+ description: The total number of times the page appeared in search results.
1231
+ type:
1232
+ - "null"
1233
+ - integer
1234
+ ctr:
1235
+ description: The click-through rate, i.e., the percentage of total impressions that resulted in clicks.
1236
+ type:
1237
+ - "null"
1238
+ - number
1239
+ multipleOf: 1.e-25
1240
+ position:
1241
+ description: The average position at which the page appeared in search results.
1242
+ type:
1243
+ - "null"
1244
+ - number
1245
+ multipleOf: 1.e-25
1246
+ search_analytics_site_report_by_site:
1247
+ $schema: "https://json-schema.org/draft-07/schema#"
1248
+ type: object
1249
+ additionalProperties: true
1250
+ properties:
1251
+ site_url:
1252
+ description: The URL of the site being analyzed
1253
+ type:
1254
+ - "null"
1255
+ - string
1256
+ search_type:
1257
+ description: "The type of search (e.g., web, image, video)"
1258
+ type:
1259
+ - "null"
1260
+ - string
1261
+ date:
1262
+ description: The date of the search analytics data
1263
+ type:
1264
+ - "null"
1265
+ - string
1266
+ format: date
1267
+ country:
1268
+ description: The country where the search took place
1269
+ type:
1270
+ - "null"
1271
+ - string
1272
+ device:
1273
+ description: "The type of device used for the search (e.g., mobile, desktop)"
1274
+ type:
1275
+ - "null"
1276
+ - string
1277
+ clicks:
1278
+ description: The number of times users clicked on a search result linking to the site
1279
+ type:
1280
+ - "null"
1281
+ - integer
1282
+ impressions:
1283
+ description: The number of times the site appeared in search results
1284
+ type:
1285
+ - "null"
1286
+ - integer
1287
+ ctr:
1288
+ description: Click-through rate calculated as clicks divided by impressions
1289
+ type:
1290
+ - "null"
1291
+ - number
1292
+ multipleOf: 1.e-25
1293
+ position:
1294
+ description: The average position of the site in search results
261
1295
  type:
262
1296
  - "null"
263
1297
  - number
@@ -339,3 +1373,195 @@ schemas:
339
1373
  type:
340
1374
  - "null"
341
1375
  - string
1376
+
1377
+ search_analytics_keyword_page_report:
1378
+ $schema: "https://json-schema.org/draft-07/schema#"
1379
+ type: object
1380
+ additionalProperties: true
1381
+ properties:
1382
+ site_url:
1383
+ description: The URL of the website being monitored.
1384
+ type:
1385
+ - "null"
1386
+ - string
1387
+ search_type:
1388
+ description: "The type of search (e.g., web, image, video)."
1389
+ type:
1390
+ - "null"
1391
+ - string
1392
+ date:
1393
+ description: The date of the search data collected.
1394
+ type:
1395
+ - "null"
1396
+ - string
1397
+ format: date
1398
+ country:
1399
+ description: The country where the search is made.
1400
+ type:
1401
+ - "null"
1402
+ - string
1403
+ device:
1404
+ description: "The device type used for the search (e.g., desktop, mobile)."
1405
+ type:
1406
+ - "null"
1407
+ - string
1408
+ page:
1409
+ description: The page URL on which the keyword appears in search results.
1410
+ type:
1411
+ - "null"
1412
+ - string
1413
+ query:
1414
+ description: The search query used to find the site.
1415
+ type:
1416
+ - "null"
1417
+ - string
1418
+ clicks:
1419
+ description: The number of clicks for the keyword on a specific page.
1420
+ type:
1421
+ - "null"
1422
+ - integer
1423
+ impressions:
1424
+ description: The number of times the keyword appeared in search results.
1425
+ type:
1426
+ - "null"
1427
+ - integer
1428
+ ctr:
1429
+ description: >-
1430
+ Click-through rate which is the percentage of clicks divided by
1431
+ impressions.
1432
+ type:
1433
+ - "null"
1434
+ - number
1435
+ multipleOf: 1.e-25
1436
+ position:
1437
+ description: The average position of the keyword on search results pages.
1438
+ type:
1439
+ - "null"
1440
+ - number
1441
+ multipleOf: 1.e-25
1442
+
1443
+ search_analytics_keyword_site_report_by_page:
1444
+ $schema: "https://json-schema.org/draft-07/schema#"
1445
+ type: object
1446
+ additionalProperties: true
1447
+ properties:
1448
+ site_url:
1449
+ description: The URL of the website for which the search analytics data is retrieved.
1450
+ type:
1451
+ - "null"
1452
+ - string
1453
+ search_type:
1454
+ description: "The type of search conducted (e.g., web, image, video)."
1455
+ type:
1456
+ - "null"
1457
+ - string
1458
+ date:
1459
+ description: The date when the search data was recorded.
1460
+ type:
1461
+ - "null"
1462
+ - string
1463
+ format: date
1464
+ country:
1465
+ description: The country from which the search query originated.
1466
+ type:
1467
+ - "null"
1468
+ - string
1469
+ device:
1470
+ description: "The device type used for the search query (e.g., desktop, mobile)."
1471
+ type:
1472
+ - "null"
1473
+ - string
1474
+ query:
1475
+ description: The search query used by the user.
1476
+ type:
1477
+ - "null"
1478
+ - string
1479
+ clicks:
1480
+ description: The number of times users clicked on your website link in search results.
1481
+ type:
1482
+ - "null"
1483
+ - integer
1484
+ impressions:
1485
+ description: The number of times your website link appeared in search results.
1486
+ type:
1487
+ - "null"
1488
+ - integer
1489
+ ctr:
1490
+ description: "Click-through rate: Number of clicks divided by the number of impressions."
1491
+ type:
1492
+ - "null"
1493
+ - number
1494
+ multipleOf: 1.e-25
1495
+ position:
1496
+ description: The average position of your website link in search results.
1497
+ type:
1498
+ - "null"
1499
+ - number
1500
+ multipleOf: 1.e-25
1501
+
1502
+ search_analytics_keyword_site_report_by_site:
1503
+ $schema: "https://json-schema.org/draft-07/schema#"
1504
+ type: object
1505
+ additionalProperties: true
1506
+ properties:
1507
+ site_url:
1508
+ description: The URL of the site for which the search analytics data is recorded.
1509
+ type:
1510
+ - "null"
1511
+ - string
1512
+ search_type:
1513
+ description: >-
1514
+ The type of search (e.g., web search, image search) that generated the
1515
+ analytics data.
1516
+ type:
1517
+ - "null"
1518
+ - string
1519
+ date:
1520
+ description: The date for which the search analytics data is recorded.
1521
+ type:
1522
+ - "null"
1523
+ - string
1524
+ format: date
1525
+ country:
1526
+ description: The country from which the search originated.
1527
+ type:
1528
+ - "null"
1529
+ - string
1530
+ device:
1531
+ description: >-
1532
+ The type of device used by the user during the search (e.g., desktop,
1533
+ mobile).
1534
+ type:
1535
+ - "null"
1536
+ - string
1537
+ query:
1538
+ description: The search query used by the user to find the site in search results.
1539
+ type:
1540
+ - "null"
1541
+ - string
1542
+ clicks:
1543
+ description: >-
1544
+ The number of times users clicked on the search result linking to the
1545
+ site.
1546
+ type:
1547
+ - "null"
1548
+ - integer
1549
+ impressions:
1550
+ description: The number of times the site was shown in search results to users.
1551
+ type:
1552
+ - "null"
1553
+ - integer
1554
+ ctr:
1555
+ description: >-
1556
+ Click-through rate represents the percentage of users who clicked on the
1557
+ site's link after seeing it in search results.
1558
+ type:
1559
+ - "null"
1560
+ - number
1561
+ multipleOf: 1.e-25
1562
+ position:
1563
+ description: The average ranking position of the site in search results.
1564
+ type:
1565
+ - "null"
1566
+ - number
1567
+ multipleOf: 1.e-25