airbyte-source-google-sheets 0.9.5.dev202505141418__tar.gz → 0.10.0.dev202505211933__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/PKG-INFO +1 -1
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/pyproject.toml +1 -1
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/components/extractors.py +16 -7
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/spec.yaml +8 -0
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/utils.py +46 -1
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/README.md +0 -0
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/__init__.py +0 -0
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/components/__init__.py +0 -0
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/components/partition_routers.py +0 -0
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/manifest.yaml +0 -0
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/models/__init__.py +0 -0
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/models/spreadsheet.py +0 -0
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/models/spreadsheet_values.py +0 -0
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/run.py +0 -0
- {airbyte_source_google_sheets-0.9.5.dev202505141418 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/source.py +0 -0
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
|
|
12
12
|
from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
|
13
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
14
14
|
from airbyte_cdk.sources.types import Config
|
15
|
-
from source_google_sheets.utils import name_conversion, safe_name_conversion
|
15
|
+
from source_google_sheets.utils import experimental_safe_name_conversion, name_conversion, safe_name_conversion
|
16
16
|
|
17
17
|
|
18
18
|
class RawSchemaParser:
|
@@ -54,6 +54,7 @@ class RawSchemaParser:
|
|
54
54
|
schema_pointer: List[Union[InterpolatedString, str]],
|
55
55
|
key_pointer: List[Union[InterpolatedString, str]],
|
56
56
|
names_conversion: bool,
|
57
|
+
experimental_names_conversion: bool,
|
57
58
|
):
|
58
59
|
"""
|
59
60
|
1. Parses sheet headers from the provided raw schema. This method assumes that data is contiguous
|
@@ -69,9 +70,12 @@ class RawSchemaParser:
|
|
69
70
|
seen_values = set()
|
70
71
|
for property_index, raw_schema_property in enumerate(raw_schema_properties):
|
71
72
|
raw_schema_property_value = self._extract_data(raw_schema_property, key_pointer)
|
72
|
-
if not raw_schema_property_value:
|
73
|
+
if not raw_schema_property_value or raw_schema_property_value.isspace():
|
73
74
|
break
|
74
|
-
if
|
75
|
+
# Apply experimental conversion if enabled; otherwise, apply standard conversion if enabled
|
76
|
+
if experimental_names_conversion:
|
77
|
+
raw_schema_property_value = experimental_safe_name_conversion(raw_schema_property_value)
|
78
|
+
elif names_conversion:
|
75
79
|
raw_schema_property_value = safe_name_conversion(raw_schema_property_value)
|
76
80
|
|
77
81
|
if raw_schema_property_value in seen_values:
|
@@ -89,12 +93,13 @@ class RawSchemaParser:
|
|
89
93
|
def parse(self, schema_type_identifier, records: Iterable[MutableMapping[Any, Any]]):
|
90
94
|
"""Removes duplicated fields and makes names conversion"""
|
91
95
|
names_conversion = self.config.get("names_conversion", False)
|
96
|
+
experimental_names_conversion = self.config.get("experimental_names_conversion", False)
|
92
97
|
schema_pointer = schema_type_identifier.get("schema_pointer")
|
93
98
|
key_pointer = schema_type_identifier["key_pointer"]
|
94
99
|
parsed_properties = []
|
95
100
|
for raw_schema_data in records:
|
96
101
|
for _, parsed_value, raw_schema_property in self.parse_raw_schema_values(
|
97
|
-
raw_schema_data, schema_pointer, key_pointer, names_conversion
|
102
|
+
raw_schema_data, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
|
98
103
|
):
|
99
104
|
self._set_data(parsed_value, raw_schema_property, key_pointer)
|
100
105
|
parsed_properties.append(raw_schema_property)
|
@@ -140,16 +145,20 @@ class DpathSchemaMatchingExtractor(DpathExtractor, RawSchemaParser):
|
|
140
145
|
self._values_to_match_key = parameters["values_to_match_key"]
|
141
146
|
schema_type_identifier = parameters["schema_type_identifier"]
|
142
147
|
names_conversion = self.config.get("names_conversion", False)
|
148
|
+
experimental_names_conversion = self.config.get("experimental_names_conversion", False)
|
143
149
|
self._indexed_properties_to_match = self.extract_properties_to_match(
|
144
|
-
parameters["properties_to_match"],
|
150
|
+
parameters["properties_to_match"],
|
151
|
+
schema_type_identifier,
|
152
|
+
names_conversion=names_conversion,
|
153
|
+
experimental_names_conversion=experimental_names_conversion,
|
145
154
|
)
|
146
155
|
|
147
|
-
def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion):
|
156
|
+
def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion, experimental_names_conversion):
|
148
157
|
schema_pointer = schema_type_identifier.get("schema_pointer")
|
149
158
|
key_pointer = schema_type_identifier["key_pointer"]
|
150
159
|
indexed_properties = {}
|
151
160
|
for property_index, property_parsed_value, _ in self.parse_raw_schema_values(
|
152
|
-
properties_to_match, schema_pointer, key_pointer, names_conversion
|
161
|
+
properties_to_match, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
|
153
162
|
):
|
154
163
|
indexed_properties[property_index] = property_parsed_value
|
155
164
|
return indexed_properties
|
@@ -33,6 +33,14 @@ connectionSpecification:
|
|
33
33
|
title: Convert Column Names to SQL-Compliant Format
|
34
34
|
description: Enables the conversion of column names to a standardized, SQL-compliant format. For example, 'My Name' -> 'my_name'. Enable this option if your destination is SQL-based.
|
35
35
|
default: false
|
36
|
+
experimental_names_conversion:
|
37
|
+
type: boolean
|
38
|
+
title: Experimental Convert Column Names to SQL-Compliant Format
|
39
|
+
description: >-
|
40
|
+
Adds additional sanitization to column names before converting to SQL-compliant format, such as removing leading and trailing spaces.
|
41
|
+
This option may change behavior in the future, which may cause column names to update in your destination on future updates.
|
42
|
+
Due to this, it is recommended that you also change the "Detect and propagate schema changes" to "Approve all changes myself" in the connection advances settings.
|
43
|
+
If enabled, this option will supersede the `Convert Column Names to SQL-Compliant Format` option.
|
36
44
|
credentials:
|
37
45
|
type: object
|
38
46
|
title: Authentication
|
@@ -37,8 +37,44 @@ def name_conversion(text: str) -> str:
|
|
37
37
|
return text
|
38
38
|
|
39
39
|
|
40
|
+
def experimental_name_conversion(text: str) -> str:
|
41
|
+
"""
|
42
|
+
Convert name using a set of rules, for example: '1MyName' -> '_1_my_name'
|
43
|
+
Removes leading/trailing spaces and combines number-word pairs (e.g., '50th' -> '50th').
|
44
|
+
"""
|
45
|
+
text = unidecode.unidecode(text.strip()) # Strip leading/trailing spaces
|
46
|
+
|
47
|
+
tokens = []
|
48
|
+
for m in TOKEN_PATTERN.finditer(text):
|
49
|
+
if m.group("NoToken") is None:
|
50
|
+
tokens.append(m.group(0))
|
51
|
+
else:
|
52
|
+
tokens.append("")
|
53
|
+
|
54
|
+
# Combine number followed by word (e.g., "50" and "th" -> "50th")
|
55
|
+
combined_tokens = []
|
56
|
+
i = 0
|
57
|
+
while i < len(tokens):
|
58
|
+
if i + 1 < len(tokens) and tokens[i].isdigit() and tokens[i + 1].isalpha():
|
59
|
+
combined_tokens.append(tokens[i] + tokens[i + 1])
|
60
|
+
i += 2
|
61
|
+
else:
|
62
|
+
combined_tokens.append(tokens[i])
|
63
|
+
i += 1
|
64
|
+
|
65
|
+
if len(combined_tokens) >= 3:
|
66
|
+
combined_tokens = combined_tokens[:1] + [t for t in combined_tokens[1:-1] if t] + combined_tokens[-1:]
|
67
|
+
|
68
|
+
if combined_tokens and combined_tokens[0].isdigit():
|
69
|
+
combined_tokens.insert(0, "")
|
70
|
+
|
71
|
+
text = DEFAULT_SEPARATOR.join(combined_tokens)
|
72
|
+
text = text.lower()
|
73
|
+
return text
|
74
|
+
|
75
|
+
|
40
76
|
def safe_name_conversion(text: str) -> str:
|
41
|
-
if not text
|
77
|
+
if not text:
|
42
78
|
return text
|
43
79
|
new = name_conversion(text)
|
44
80
|
if not new:
|
@@ -46,6 +82,15 @@ def safe_name_conversion(text: str) -> str:
|
|
46
82
|
return new
|
47
83
|
|
48
84
|
|
85
|
+
def experimental_safe_name_conversion(text: str) -> str:
|
86
|
+
if not text:
|
87
|
+
return text
|
88
|
+
new = experimental_name_conversion(text)
|
89
|
+
if not new:
|
90
|
+
raise Exception(f"initial string '{text}' converted to empty")
|
91
|
+
return new
|
92
|
+
|
93
|
+
|
49
94
|
def exception_description_by_status_code(code: int, spreadsheet_id) -> str:
|
50
95
|
if code in [status_codes.INTERNAL_SERVER_ERROR, status_codes.BAD_GATEWAY, status_codes.SERVICE_UNAVAILABLE]:
|
51
96
|
return (
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|