airbyte-source-google-sheets 0.9.5.dev202505142036__py3-none-any.whl → 0.10.0.dev202505231635__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_google_sheets-0.9.5.dev202505142036.dist-info → airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info}/METADATA +1 -1
- {airbyte_source_google_sheets-0.9.5.dev202505142036.dist-info → airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info}/RECORD +7 -7
- source_google_sheets/components/extractors.py +19 -8
- source_google_sheets/spec.yaml +8 -0
- source_google_sheets/utils.py +73 -0
- {airbyte_source_google_sheets-0.9.5.dev202505142036.dist-info → airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info}/WHEEL +0 -0
- {airbyte_source_google_sheets-0.9.5.dev202505142036.dist-info → airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
source_google_sheets/__init__.py,sha256=dYaZS0KrTjksk_yeSuXmNDXDsNKWctFnlZJSMgLryXE,135
|
2
2
|
source_google_sheets/components/__init__.py,sha256=v7odPaLdz2S_NRzExsxkk6694Vmjbplz4Z5mA4lxdkA,343
|
3
|
-
source_google_sheets/components/extractors.py,sha256
|
3
|
+
source_google_sheets/components/extractors.py,sha256=-d5hPW40ZBjU3gTUZNudayvqf1eHlveW1amwKJywDkU,9668
|
4
4
|
source_google_sheets/components/partition_routers.py,sha256=SWo1V0K10ZdWE2TQ0KuQUfue04RTyHJe1f6BOj6c96s,1265
|
5
5
|
source_google_sheets/manifest.yaml,sha256=HG8vpxv4lqwe5TCneq_zxuERaGNdtno7yAs61Wsk30E,15821
|
6
6
|
source_google_sheets/models/__init__.py,sha256=Z-4MTpxG5t2jGhXzs4PPoIOa83zw3jRnUDx0N9Puv3s,61
|
@@ -8,9 +8,9 @@ source_google_sheets/models/spreadsheet.py,sha256=DEef7bWQEpY1Uqyk7RN6qLF8oxLSNz
|
|
8
8
|
source_google_sheets/models/spreadsheet_values.py,sha256=-XRMuuILn9JN8svHNTj6-oG8mLTZOZ5Hejy2pJ5bILk,440
|
9
9
|
source_google_sheets/run.py,sha256=eaPRcarWqkB2b2DokvI83w7rz1blmWPQCFahvCyCdSY,1887
|
10
10
|
source_google_sheets/source.py,sha256=qO1KoGdphieu7F5VgDYtrbqs56AUvMWFGNvFHP2b9Z4,778
|
11
|
-
source_google_sheets/spec.yaml,sha256=
|
12
|
-
source_google_sheets/utils.py,sha256=
|
13
|
-
airbyte_source_google_sheets-0.
|
14
|
-
airbyte_source_google_sheets-0.
|
15
|
-
airbyte_source_google_sheets-0.
|
16
|
-
airbyte_source_google_sheets-0.
|
11
|
+
source_google_sheets/spec.yaml,sha256=mFOiMN1IsrjyLRcjTRw7xa0b74LwkUXdVvUqau7iTXc,5709
|
12
|
+
source_google_sheets/utils.py,sha256=NUw8pJSCORGKx8mrhd11x4xS1dGUraDZdh6WxVn7aQw,5150
|
13
|
+
airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info/METADATA,sha256=MW6vZRfbucb1JqCsXRg2O93pL5s1hRalyUfVV-xKR6I,5385
|
14
|
+
airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
15
|
+
airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info/entry_points.txt,sha256=Dtsfjohe5IPUFyqojk49SIoP7CifCTlNLG_pgivzppo,69
|
16
|
+
airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info/RECORD,,
|
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
|
|
12
12
|
from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
|
13
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
14
14
|
from airbyte_cdk.sources.types import Config
|
15
|
-
from source_google_sheets.utils import name_conversion, safe_name_conversion
|
15
|
+
from source_google_sheets.utils import experimental_safe_name_conversion, name_conversion, safe_name_conversion
|
16
16
|
|
17
17
|
|
18
18
|
class RawSchemaParser:
|
@@ -54,9 +54,12 @@ class RawSchemaParser:
|
|
54
54
|
schema_pointer: List[Union[InterpolatedString, str]],
|
55
55
|
key_pointer: List[Union[InterpolatedString, str]],
|
56
56
|
names_conversion: bool,
|
57
|
+
experimental_names_conversion: bool,
|
57
58
|
):
|
58
59
|
"""
|
59
|
-
1. Parses sheet headers from the provided raw schema
|
60
|
+
1. Parses sheet headers from the provided raw schema. This method assumes that data is contiguous
|
61
|
+
i.e: every cell contains a value and the first cell which does not contain a value denotes the end
|
62
|
+
of the headers.
|
60
63
|
2. Makes name conversion if required.
|
61
64
|
3. Removes duplicated fields from the schema.
|
62
65
|
Return a list of tuples with correct property index (by found in array), value and raw_schema
|
@@ -68,8 +71,11 @@ class RawSchemaParser:
|
|
68
71
|
for property_index, raw_schema_property in enumerate(raw_schema_properties):
|
69
72
|
raw_schema_property_value = self._extract_data(raw_schema_property, key_pointer)
|
70
73
|
if not raw_schema_property_value or raw_schema_property_value.isspace():
|
71
|
-
|
72
|
-
if
|
74
|
+
break
|
75
|
+
# Apply experimental conversion if enabled; otherwise, apply standard conversion if enabled
|
76
|
+
if experimental_names_conversion:
|
77
|
+
raw_schema_property_value = experimental_safe_name_conversion(raw_schema_property_value)
|
78
|
+
elif names_conversion:
|
73
79
|
raw_schema_property_value = safe_name_conversion(raw_schema_property_value)
|
74
80
|
|
75
81
|
if raw_schema_property_value in seen_values:
|
@@ -87,12 +93,13 @@ class RawSchemaParser:
|
|
87
93
|
def parse(self, schema_type_identifier, records: Iterable[MutableMapping[Any, Any]]):
|
88
94
|
"""Removes duplicated fields and makes names conversion"""
|
89
95
|
names_conversion = self.config.get("names_conversion", False)
|
96
|
+
experimental_names_conversion = self.config.get("experimental_names_conversion", False)
|
90
97
|
schema_pointer = schema_type_identifier.get("schema_pointer")
|
91
98
|
key_pointer = schema_type_identifier["key_pointer"]
|
92
99
|
parsed_properties = []
|
93
100
|
for raw_schema_data in records:
|
94
101
|
for _, parsed_value, raw_schema_property in self.parse_raw_schema_values(
|
95
|
-
raw_schema_data, schema_pointer, key_pointer, names_conversion
|
102
|
+
raw_schema_data, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
|
96
103
|
):
|
97
104
|
self._set_data(parsed_value, raw_schema_property, key_pointer)
|
98
105
|
parsed_properties.append(raw_schema_property)
|
@@ -138,16 +145,20 @@ class DpathSchemaMatchingExtractor(DpathExtractor, RawSchemaParser):
|
|
138
145
|
self._values_to_match_key = parameters["values_to_match_key"]
|
139
146
|
schema_type_identifier = parameters["schema_type_identifier"]
|
140
147
|
names_conversion = self.config.get("names_conversion", False)
|
148
|
+
experimental_names_conversion = self.config.get("experimental_names_conversion", False)
|
141
149
|
self._indexed_properties_to_match = self.extract_properties_to_match(
|
142
|
-
parameters["properties_to_match"],
|
150
|
+
parameters["properties_to_match"],
|
151
|
+
schema_type_identifier,
|
152
|
+
names_conversion=names_conversion,
|
153
|
+
experimental_names_conversion=experimental_names_conversion,
|
143
154
|
)
|
144
155
|
|
145
|
-
def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion):
|
156
|
+
def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion, experimental_names_conversion):
|
146
157
|
schema_pointer = schema_type_identifier.get("schema_pointer")
|
147
158
|
key_pointer = schema_type_identifier["key_pointer"]
|
148
159
|
indexed_properties = {}
|
149
160
|
for property_index, property_parsed_value, _ in self.parse_raw_schema_values(
|
150
|
-
properties_to_match, schema_pointer, key_pointer, names_conversion
|
161
|
+
properties_to_match, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
|
151
162
|
):
|
152
163
|
indexed_properties[property_index] = property_parsed_value
|
153
164
|
return indexed_properties
|
source_google_sheets/spec.yaml
CHANGED
@@ -33,6 +33,14 @@ connectionSpecification:
|
|
33
33
|
title: Convert Column Names to SQL-Compliant Format
|
34
34
|
description: Enables the conversion of column names to a standardized, SQL-compliant format. For example, 'My Name' -> 'my_name'. Enable this option if your destination is SQL-based.
|
35
35
|
default: false
|
36
|
+
experimental_names_conversion:
|
37
|
+
type: boolean
|
38
|
+
title: Experimental Convert Column Names to SQL-Compliant Format
|
39
|
+
description: >-
|
40
|
+
Adds additional sanitization to column names before converting to SQL-compliant format, such as removing leading and trailing spaces.
|
41
|
+
This option may change behavior in the future, which may cause column names to update in your destination on future updates.
|
42
|
+
Due to this, it is recommended that you also change the "Detect and propagate schema changes" to "Approve all changes myself" in the connection advances settings.
|
43
|
+
If enabled, this option will supersede the `Convert Column Names to SQL-Compliant Format` option.
|
36
44
|
credentials:
|
37
45
|
type: object
|
38
46
|
title: Authentication
|
source_google_sheets/utils.py
CHANGED
@@ -37,6 +37,61 @@ def name_conversion(text: str) -> str:
|
|
37
37
|
return text
|
38
38
|
|
39
39
|
|
40
|
+
def experimental_name_conversion(text: str) -> str:
|
41
|
+
"""
|
42
|
+
Convert name using a set of rules, for example: '1MyName' -> '_1_my_name'
|
43
|
+
Removes leading/trailing spaces, combines number-word pairs (e.g., '50th' -> '50th'),
|
44
|
+
letter-number pairs (e.g., 'Q3' -> 'Q3'), and removes special characters without adding underscores.
|
45
|
+
Spaces are converted to underscores for snake_case.
|
46
|
+
"""
|
47
|
+
text = unidecode.unidecode(text.strip()) # Strip leading/trailing spaces
|
48
|
+
|
49
|
+
tokens = []
|
50
|
+
for m in TOKEN_PATTERN.finditer(text):
|
51
|
+
if m.group("NoToken") is None:
|
52
|
+
tokens.append(m.group(0))
|
53
|
+
else:
|
54
|
+
# Only add an empty token for spaces to preserve snake_case; skip other special characters
|
55
|
+
if m.group(0).isspace():
|
56
|
+
tokens.append("")
|
57
|
+
# Otherwise, skip the special character entirely
|
58
|
+
|
59
|
+
# Combine single uppercase letter followed by number (e.g., "Q" and "3" -> "Q3"), then number-word pairs
|
60
|
+
combined_tokens = []
|
61
|
+
i = 0
|
62
|
+
while i < len(tokens):
|
63
|
+
# Check for letter-number pair (e.g., "Q3")
|
64
|
+
if i + 1 < len(tokens) and len(tokens[i]) == 1 and tokens[i].isupper() and tokens[i + 1].isdigit():
|
65
|
+
combined_tokens.append(tokens[i] + tokens[i + 1])
|
66
|
+
i += 2
|
67
|
+
# Check for number-word pair (e.g., "50th")
|
68
|
+
elif i + 1 < len(tokens) and tokens[i].isdigit() and tokens[i + 1].isalpha():
|
69
|
+
combined_tokens.append(tokens[i] + tokens[i + 1])
|
70
|
+
i += 2
|
71
|
+
else:
|
72
|
+
# Only add the token if it's non-empty to avoid underscores from spaces near special characters
|
73
|
+
if tokens[i]:
|
74
|
+
combined_tokens.append(tokens[i])
|
75
|
+
i += 1
|
76
|
+
|
77
|
+
# Remove trailing empty tokens to avoid trailing underscores
|
78
|
+
while combined_tokens and combined_tokens[-1] == "":
|
79
|
+
combined_tokens.pop()
|
80
|
+
# Remove leading empty tokens to avoid leading underscores
|
81
|
+
while combined_tokens and combined_tokens[0] == "":
|
82
|
+
combined_tokens.pop(0)
|
83
|
+
|
84
|
+
if len(combined_tokens) >= 3:
|
85
|
+
combined_tokens = combined_tokens[:1] + [t for t in combined_tokens[1:-1] if t] + combined_tokens[-1:]
|
86
|
+
|
87
|
+
if combined_tokens and combined_tokens[0].isdigit():
|
88
|
+
combined_tokens.insert(0, "")
|
89
|
+
|
90
|
+
text = DEFAULT_SEPARATOR.join(combined_tokens)
|
91
|
+
text = text.lower()
|
92
|
+
return text
|
93
|
+
|
94
|
+
|
40
95
|
def safe_name_conversion(text: str) -> str:
|
41
96
|
if not text:
|
42
97
|
return text
|
@@ -46,6 +101,24 @@ def safe_name_conversion(text: str) -> str:
|
|
46
101
|
return new
|
47
102
|
|
48
103
|
|
104
|
+
import csv
|
105
|
+
|
106
|
+
|
107
|
+
def experimental_safe_name_conversion(text: str, output_file: str = "conversion_results2.csv") -> str:
|
108
|
+
if not text:
|
109
|
+
return text
|
110
|
+
new = experimental_name_conversion(text)
|
111
|
+
if not new:
|
112
|
+
raise Exception(f"initial string '{text}' converted to empty")
|
113
|
+
|
114
|
+
# Write to CSV
|
115
|
+
with open(output_file, mode="a", newline="", encoding="utf-8") as file:
|
116
|
+
writer = csv.writer(file)
|
117
|
+
writer.writerow([text, new]) # Write the original and converted text
|
118
|
+
|
119
|
+
return new
|
120
|
+
|
121
|
+
|
49
122
|
def exception_description_by_status_code(code: int, spreadsheet_id) -> str:
|
50
123
|
if code in [status_codes.INTERNAL_SERVER_ERROR, status_codes.BAD_GATEWAY, status_codes.SERVICE_UNAVAILABLE]:
|
51
124
|
return (
|
File without changes
|