airbyte-source-google-search-console 1.7.0__tar.gz → 1.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {airbyte_source_google_search_console-1.7.0 → airbyte_source_google_search_console-1.9.0}/PKG-INFO +1 -1
  2. {airbyte_source_google_search_console-1.7.0 → airbyte_source_google_search_console-1.9.0}/pyproject.toml +1 -1
  3. airbyte_source_google_search_console-1.9.0/source_google_search_console/components.py +183 -0
  4. airbyte_source_google_search_console-1.9.0/source_google_search_console/manifest.yaml +1696 -0
  5. {airbyte_source_google_search_console-1.7.0 → airbyte_source_google_search_console-1.9.0}/source_google_search_console/source.py +12 -37
  6. {airbyte_source_google_search_console-1.7.0 → airbyte_source_google_search_console-1.9.0}/source_google_search_console/spec.json +9 -0
  7. airbyte_source_google_search_console-1.7.0/source_google_search_console/components.py +0 -81
  8. airbyte_source_google_search_console-1.7.0/source_google_search_console/manifest.yaml +0 -828
  9. airbyte_source_google_search_console-1.7.0/source_google_search_console/schemas/search_analytics_all_fields.json +0 -53
  10. airbyte_source_google_search_console-1.7.0/source_google_search_console/schemas/search_analytics_by_date.json +0 -37
  11. airbyte_source_google_search_console-1.7.0/source_google_search_console/schemas/search_analytics_by_device.json +0 -41
  12. airbyte_source_google_search_console-1.7.0/source_google_search_console/schemas/search_analytics_by_page.json +0 -41
  13. airbyte_source_google_search_console-1.7.0/source_google_search_console/schemas/search_analytics_by_query.json +0 -41
  14. airbyte_source_google_search_console-1.7.0/source_google_search_console/schemas/search_analytics_page_report.json +0 -50
  15. airbyte_source_google_search_console-1.7.0/source_google_search_console/schemas/search_analytics_site_report_by_page.json +0 -46
  16. airbyte_source_google_search_console-1.7.0/source_google_search_console/schemas/search_analytics_site_report_by_site.json +0 -46
  17. airbyte_source_google_search_console-1.7.0/source_google_search_console/streams.py +0 -394
  18. {airbyte_source_google_search_console-1.7.0 → airbyte_source_google_search_console-1.9.0}/README.md +0 -0
  19. {airbyte_source_google_search_console-1.7.0 → airbyte_source_google_search_console-1.9.0}/source_google_search_console/__init__.py +0 -0
  20. {airbyte_source_google_search_console-1.7.0 → airbyte_source_google_search_console-1.9.0}/source_google_search_console/config_migrations.py +0 -0
  21. {airbyte_source_google_search_console-1.7.0 → airbyte_source_google_search_console-1.9.0}/source_google_search_console/exceptions.py +0 -0
  22. {airbyte_source_google_search_console-1.7.0 → airbyte_source_google_search_console-1.9.0}/source_google_search_console/run.py +0 -0
  23. {airbyte_source_google_search_console-1.7.0 → airbyte_source_google_search_console-1.9.0}/source_google_search_console/service_account_authenticator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-google-search-console
3
- Version: 1.7.0
3
+ Version: 1.9.0
4
4
  Summary: Source implementation for Google Search Console.
5
5
  License: Elv2
6
6
  Author: Airbyte
@@ -5,7 +5,7 @@ requires = [
5
5
  build-backend = "poetry.core.masonry.api"
6
6
 
7
7
  [tool.poetry]
8
- version = "1.7.0"
8
+ version = "1.9.0"
9
9
  name = "airbyte-source-google-search-console"
10
10
  description = "Source implementation for Google Search Console."
11
11
  authors = [
@@ -0,0 +1,183 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any, Dict, List, Mapping, Optional
7
+
8
+ from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
9
+ from airbyte_cdk.sources.declarative.schema import SchemaLoader
10
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
11
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
12
+
13
+
14
+ @dataclass
15
+ class NestedSubstreamStateMigration(StateMigration):
16
+ """
17
+ We require a custom state migration because SearchAnalytics streams contain two nested levels of
18
+ substreams. The existing LegacyToPerPartitionStateMigration only handles one level.
19
+
20
+ Legacy state format is as follows:
21
+ {
22
+ "date": "2025-05-28",
23
+ "https://www.example.com/": {
24
+ "web": {
25
+ "date": "2025-05-25"
26
+ },
27
+ "news": {
28
+ "date": "2023-05-22"
29
+ }
30
+ }
31
+ }
32
+
33
+ The resulting migrated per-partition state is:
34
+ {
35
+ "use_global_cursor": false,
36
+ "states": [
37
+ {
38
+ "partition": {
39
+ "search_type": "web",
40
+ "site_url": "https://www.example.com/"
41
+ },
42
+ "cursor": {
43
+ "date": "2025-05-25"
44
+ }
45
+ },
46
+ {
47
+ "partition": {
48
+ "search_type": "news",
49
+ "site_url": "https://www.example.com/"
50
+ },
51
+ "cursor": {
52
+ "date": "2023-05-22"
53
+ }
54
+ }],
55
+ "state": {
56
+ "date": "2025-05-25"
57
+ }
58
+ }
59
+ """
60
+
61
+ def should_migrate(self, stream_state: Mapping[str, Any]) -> bool:
62
+ return len(stream_state) > 0 and "states" not in stream_state
63
+
64
+ def migrate(self, stream_state: Mapping[str, Any]) -> Mapping[str, Any]:
65
+ per_partition_state = []
66
+ for site_url_key, search_type_state in stream_state.items():
67
+ if site_url_key == "date":
68
+ # The legacy state also contains a global cursor value under the `date` key which equates
69
+ # to global state.
70
+ #
71
+ # However, the Python implementation does not appear to be implemented
72
+ # correctly and simply saves the state of the last seen partition. Since I don't trust the
73
+ # legacy value and in the current implementation global state is applied to partitions
74
+ # without an existing value, I'm making a conscious choice to not migrate the global value.
75
+ continue
76
+ else:
77
+ site_url = site_url_key
78
+ for search_type_key, cursor in search_type_state.items():
79
+ per_partition_state.append({"partition": {"site_url": site_url, "search_type": search_type_key}, "cursor": cursor})
80
+ return {
81
+ "use_global_cursor": False,
82
+ "states": per_partition_state,
83
+ }
84
+
85
+
86
+ @dataclass
87
+ class CustomReportExtractDimensionsFromKeys(RecordTransformation):
88
+ """
89
+ A record transformation that remaps each value in the keys array back to its associated
90
+ dimension. The reason this is a custom component is because we're unable to use list
91
+ comprehension and and enumerate() is not a valid function in our Jinja contact so can't
92
+ iterate over the dimensions defined in the config to create each field transformation on the
93
+ stream_template for each custom report.
94
+
95
+ If we were able to, the actual ComponentMappingDefinition would look like this:
96
+
97
+ type: ComponentMappingDefinition
98
+ field_path:
99
+ - transformations
100
+ - "1"
101
+ - fields
102
+ value: "{{ [{'path': [dimension], 'value': '{{ record['keys'][index]} for index, dimension in enumerate(components_values['dimensions'])] }}"
103
+
104
+ or
105
+
106
+ type: ComponentMappingDefinition
107
+ field_path:
108
+ - transformations
109
+ - "1"
110
+ - fields
111
+ value: >
112
+ {% for index, dimension in enumerate(components_values["dimensions"]) %}
113
+ - type: AddFields
114
+ fields:
115
+ - path: [ {{ dimension }} ]
116
+ value: "{{ record['keys'][index] }}"
117
+ {% endfor %}
118
+ """
119
+
120
+ dimensions: List[str] = field(default_factory=lambda: [])
121
+
122
+ def transform(
123
+ self,
124
+ record: Dict[str, Any],
125
+ config: Optional[Config] = None,
126
+ stream_state: Optional[StreamState] = None,
127
+ stream_slice: Optional[StreamSlice] = None,
128
+ ) -> None:
129
+ for dimension in self.dimensions:
130
+ record[dimension] = record["keys"].pop(0)
131
+
132
+ record.pop("keys")
133
+
134
+
135
+ @dataclass
136
+ class CustomReportSchemaLoader(SchemaLoader):
137
+ """
138
+ Custom schema loader is needed because Google Search Console's custom reports streams
139
+ because the schema is dependent on which dimensions are selected in the config. Right now,
140
+ only DynamicSchemaLoader which is based on the response from an API endpoint supports
141
+ remapping of types to Airbyte schema types. This CustomReportSchemaLoader functions
142
+ more like a static schema loader and so we must perform the remapping in this custom component.
143
+ """
144
+
145
+ DIMENSION_TO_PROPERTY_SCHEMA_MAP = {
146
+ "country": [{"country": {"type": ["null", "string"]}}],
147
+ "date": [{"date": {"type": ["null", "string"], "format": "date"}}],
148
+ "device": [{"device": {"type": ["null", "string"]}}],
149
+ "page": [{"page": {"type": ["null", "string"]}}],
150
+ "query": [{"query": {"type": ["null", "string"]}}],
151
+ }
152
+
153
+ dimensions: List[str]
154
+
155
+ def get_json_schema(self) -> Mapping[str, Any]:
156
+ schema: Mapping[str, Any] = {
157
+ "$schema": "https://json-schema.org/draft-07/schema#",
158
+ "type": ["null", "object"],
159
+ "additionalProperties": True,
160
+ "properties": {
161
+ # metrics
162
+ "clicks": {"type": ["null", "integer"]},
163
+ "ctr": {"type": ["null", "number"], "multipleOf": 1e-25},
164
+ "impressions": {"type": ["null", "integer"]},
165
+ "position": {"type": ["null", "number"], "multipleOf": 1e-25},
166
+ # default fields
167
+ "search_type": {"type": ["null", "string"]},
168
+ "site_url": {"type": ["null", "string"]},
169
+ },
170
+ }
171
+
172
+ # dimensions
173
+ dimension_properties = self._dimension_to_property_schema()
174
+ schema["properties"].update(dimension_properties)
175
+ return schema
176
+
177
+ def _dimension_to_property_schema(self) -> dict:
178
+ properties = {}
179
+ for dimension in sorted(self.dimensions):
180
+ fields = self.DIMENSION_TO_PROPERTY_SCHEMA_MAP[dimension]
181
+ for field in fields:
182
+ properties = {**properties, **field}
183
+ return properties