airbyte-source-google-sheets 0.9.6__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_google_sheets-0.9.6.dist-info → airbyte_source_google_sheets-0.10.0.dist-info}/METADATA +1 -1
- {airbyte_source_google_sheets-0.9.6.dist-info → airbyte_source_google_sheets-0.10.0.dist-info}/RECORD +7 -7
- source_google_sheets/components/extractors.py +21 -2
- source_google_sheets/spec.yaml +43 -1
- source_google_sheets/utils.py +115 -0
- {airbyte_source_google_sheets-0.9.6.dist-info → airbyte_source_google_sheets-0.10.0.dist-info}/WHEEL +0 -0
- {airbyte_source_google_sheets-0.9.6.dist-info → airbyte_source_google_sheets-0.10.0.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
source_google_sheets/__init__.py,sha256=dYaZS0KrTjksk_yeSuXmNDXDsNKWctFnlZJSMgLryXE,135
|
2
2
|
source_google_sheets/components/__init__.py,sha256=v7odPaLdz2S_NRzExsxkk6694Vmjbplz4Z5mA4lxdkA,343
|
3
|
-
source_google_sheets/components/extractors.py,sha256=
|
3
|
+
source_google_sheets/components/extractors.py,sha256=Yrl5ge_gXJ6jqVYYR2bk2f6Rg-xak0wlHnPyK9f1lhc,9854
|
4
4
|
source_google_sheets/components/partition_routers.py,sha256=SWo1V0K10ZdWE2TQ0KuQUfue04RTyHJe1f6BOj6c96s,1265
|
5
5
|
source_google_sheets/manifest.yaml,sha256=CuSnA8dnRMeXWfyUA6aXBvGU3mz1dJIi0HqMiks9Fd0,15938
|
6
6
|
source_google_sheets/models/__init__.py,sha256=Z-4MTpxG5t2jGhXzs4PPoIOa83zw3jRnUDx0N9Puv3s,61
|
@@ -8,9 +8,9 @@ source_google_sheets/models/spreadsheet.py,sha256=DEef7bWQEpY1Uqyk7RN6qLF8oxLSNz
|
|
8
8
|
source_google_sheets/models/spreadsheet_values.py,sha256=-XRMuuILn9JN8svHNTj6-oG8mLTZOZ5Hejy2pJ5bILk,440
|
9
9
|
source_google_sheets/run.py,sha256=eaPRcarWqkB2b2DokvI83w7rz1blmWPQCFahvCyCdSY,1887
|
10
10
|
source_google_sheets/source.py,sha256=qO1KoGdphieu7F5VgDYtrbqs56AUvMWFGNvFHP2b9Z4,778
|
11
|
-
source_google_sheets/spec.yaml,sha256=
|
12
|
-
source_google_sheets/utils.py,sha256=
|
13
|
-
airbyte_source_google_sheets-0.
|
14
|
-
airbyte_source_google_sheets-0.
|
15
|
-
airbyte_source_google_sheets-0.
|
16
|
-
airbyte_source_google_sheets-0.
|
11
|
+
source_google_sheets/spec.yaml,sha256=HGVGay4VAxzi9TUj-MSpeQLyE9GAoOjD2-xIhQDiIGY,6901
|
12
|
+
source_google_sheets/utils.py,sha256=jiVPqsRDjVgdwIiBJMvFJEwwuUBQ7BQAebRqfpS9pZw,6943
|
13
|
+
airbyte_source_google_sheets-0.10.0.dist-info/METADATA,sha256=Mzmw5ZoFCeMjyDI0pxxoGtHaAGQV4BfQq3Wp108SdRA,5369
|
14
|
+
airbyte_source_google_sheets-0.10.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
15
|
+
airbyte_source_google_sheets-0.10.0.dist-info/entry_points.txt,sha256=Dtsfjohe5IPUFyqojk49SIoP7CifCTlNLG_pgivzppo,69
|
16
|
+
airbyte_source_google_sheets-0.10.0.dist-info/RECORD,,
|
@@ -12,7 +12,11 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
|
|
12
12
|
from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
|
13
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
14
14
|
from airbyte_cdk.sources.types import Config
|
15
|
-
from source_google_sheets.utils import
|
15
|
+
from source_google_sheets.utils import (
|
16
|
+
name_conversion,
|
17
|
+
safe_name_conversion,
|
18
|
+
safe_sanitzation_conversion,
|
19
|
+
)
|
16
20
|
|
17
21
|
|
18
22
|
class RawSchemaParser:
|
@@ -67,11 +71,25 @@ class RawSchemaParser:
|
|
67
71
|
duplicate_fields = set()
|
68
72
|
parsed_schema_values = []
|
69
73
|
seen_values = set()
|
74
|
+
# Gather all sanitisation flags from config
|
75
|
+
config = getattr(self, "config", {})
|
76
|
+
flags = {
|
77
|
+
"remove_leading_trailing_underscores": config.get("remove_leading_trailing_underscores", False),
|
78
|
+
"combine_number_word_pairs": config.get("combine_number_word_pairs", False),
|
79
|
+
"remove_special_characters": config.get("remove_special_characters", False),
|
80
|
+
"combine_letter_number_pairs": config.get("combine_letter_number_pairs", False),
|
81
|
+
"allow_leading_numbers": config.get("allow_leading_numbers", False),
|
82
|
+
}
|
83
|
+
use_granular = any(flags.values())
|
84
|
+
|
70
85
|
for property_index, raw_schema_property in enumerate(raw_schema_properties):
|
71
86
|
raw_schema_property_value = self._extract_data(raw_schema_property, key_pointer)
|
72
87
|
if not raw_schema_property_value or raw_schema_property_value.isspace():
|
73
88
|
break
|
74
|
-
if
|
89
|
+
# Use granular if any flag is set, else legacy
|
90
|
+
if names_conversion and use_granular:
|
91
|
+
raw_schema_property_value = safe_sanitzation_conversion(raw_schema_property_value, **flags)
|
92
|
+
elif names_conversion:
|
75
93
|
raw_schema_property_value = safe_name_conversion(raw_schema_property_value)
|
76
94
|
|
77
95
|
if raw_schema_property_value in seen_values:
|
@@ -193,6 +211,7 @@ class DpathSchemaMatchingExtractor(DpathExtractor, RawSchemaParser):
|
|
193
211
|
)
|
194
212
|
|
195
213
|
|
214
|
+
@dataclass
|
196
215
|
class DpathSchemaExtractor(DpathExtractor, RawSchemaParser):
|
197
216
|
"""
|
198
217
|
Makes names conversion and parses sheet headers from the provided row.
|
source_google_sheets/spec.yaml
CHANGED
@@ -31,7 +31,49 @@ connectionSpecification:
|
|
31
31
|
names_conversion:
|
32
32
|
type: boolean
|
33
33
|
title: Convert Column Names to SQL-Compliant Format
|
34
|
-
description:
|
34
|
+
description: >-
|
35
|
+
Converts column names to a SQL-compliant format (snake_case, lowercase, etc).
|
36
|
+
If enabled, you can further customize the sanitization using the options below.
|
37
|
+
default: false
|
38
|
+
remove_leading_trailing_underscores:
|
39
|
+
type: boolean
|
40
|
+
title: Remove Leading and Trailing Underscores
|
41
|
+
description: >-
|
42
|
+
Removes leading and trailing underscores from column names. Does not remove leading underscores from column names that start with a number.
|
43
|
+
Example: "50th Percentile? "→ "_50_th_percentile"
|
44
|
+
This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
|
45
|
+
default: false
|
46
|
+
combine_number_word_pairs:
|
47
|
+
type: boolean
|
48
|
+
title: Combine Number-Word Pairs
|
49
|
+
description: >-
|
50
|
+
Combines adjacent numbers and words.
|
51
|
+
Example: "50th Percentile?" → "_50th_percentile_"
|
52
|
+
This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
|
53
|
+
default: false
|
54
|
+
remove_special_characters:
|
55
|
+
type: boolean
|
56
|
+
title: Remove All Special Characters
|
57
|
+
description: >-
|
58
|
+
Removes all special characters from column names.
|
59
|
+
Example: "Example ID*" → "example_id"
|
60
|
+
This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
|
61
|
+
default: false
|
62
|
+
combine_letter_number_pairs:
|
63
|
+
type: boolean
|
64
|
+
title: Combine Letter-Number Pairs
|
65
|
+
description: >-
|
66
|
+
Combines adjacent letters and numbers.
|
67
|
+
Example: "Q3 2023" → "q3_2023"
|
68
|
+
This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
|
69
|
+
default: false
|
70
|
+
allow_leading_numbers:
|
71
|
+
type: boolean
|
72
|
+
title: Allow Leading Numbers
|
73
|
+
description: >-
|
74
|
+
Allows column names to start with numbers.
|
75
|
+
Example: "50th Percentile" → "50_th_percentile"
|
76
|
+
This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
|
35
77
|
default: false
|
36
78
|
credentials:
|
37
79
|
type: object
|
source_google_sheets/utils.py
CHANGED
@@ -46,6 +46,121 @@ def safe_name_conversion(text: str) -> str:
|
|
46
46
|
return new
|
47
47
|
|
48
48
|
|
49
|
+
def _sanitization(
|
50
|
+
text: str,
|
51
|
+
remove_leading_trailing_underscores: bool = False,
|
52
|
+
combine_number_word_pairs: bool = False,
|
53
|
+
remove_special_characters: bool = False,
|
54
|
+
combine_letter_number_pairs: bool = False,
|
55
|
+
allow_leading_numbers: bool = False,
|
56
|
+
) -> str:
|
57
|
+
"""
|
58
|
+
Converts a string into a normalized, SQL-compliant name using a set of configurable options.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
text: The input string to convert.
|
62
|
+
remove_leading_trailing_underscores: If True, removes underscores at the start/end of the result.
|
63
|
+
combine_number_word_pairs: If True, combines adjacent number and word tokens (e.g., "50 th" -> "50th").
|
64
|
+
remove_special_characters: If True, removes all special characters from the input.
|
65
|
+
combine_letter_number_pairs: If True, combines adjacent letter and number tokens (e.g., "Q 3" -> "Q3").
|
66
|
+
allow_leading_numbers: If False, prepends an underscore if the result starts with a number.
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
The normalized, SQL-compliant string.
|
70
|
+
|
71
|
+
Steps:
|
72
|
+
1. Transliterates the input text to ASCII using unidecode.
|
73
|
+
2. Optionally removes special characters if remove_special_characters is True.
|
74
|
+
3. Splits the text into tokens using a regex pattern that separates words, numbers, and non-alphanumeric characters.
|
75
|
+
4. Optionally combines adjacent letter+number or number+word tokens based on flags.
|
76
|
+
5. Removes empty tokens in the middle, but keeps leading/trailing empty tokens for underscore placement.
|
77
|
+
6. Optionally strips leading/trailing underscores if remove_leading_trailing_underscores is True.
|
78
|
+
7. Optionally prepends an underscore if the result starts with a number and allow_leading_numbers is False.
|
79
|
+
8. Returns the final string in lowercase.
|
80
|
+
"""
|
81
|
+
text = unidecode.unidecode(text)
|
82
|
+
|
83
|
+
if remove_special_characters:
|
84
|
+
text = re.sub(r"[^\w\s]", "", text)
|
85
|
+
|
86
|
+
tokens = []
|
87
|
+
for m in TOKEN_PATTERN.finditer(text):
|
88
|
+
if m.group("NoToken") is None:
|
89
|
+
tokens.append(m.group(0))
|
90
|
+
else:
|
91
|
+
tokens.append("")
|
92
|
+
|
93
|
+
# Combine tokens as per flags
|
94
|
+
combined_tokens = []
|
95
|
+
i = 0
|
96
|
+
while i < len(tokens):
|
97
|
+
if (
|
98
|
+
combine_letter_number_pairs
|
99
|
+
and i + 1 < len(tokens)
|
100
|
+
and tokens[i]
|
101
|
+
and tokens[i].isalpha()
|
102
|
+
and tokens[i + 1]
|
103
|
+
and tokens[i + 1].isdigit()
|
104
|
+
):
|
105
|
+
combined = tokens[i] + tokens[i + 1]
|
106
|
+
combined_tokens.append(combined)
|
107
|
+
i += 2
|
108
|
+
elif (
|
109
|
+
combine_number_word_pairs
|
110
|
+
and i + 1 < len(tokens)
|
111
|
+
and tokens[i]
|
112
|
+
and tokens[i].isdigit()
|
113
|
+
and tokens[i + 1]
|
114
|
+
and tokens[i + 1].isalpha()
|
115
|
+
):
|
116
|
+
combined = tokens[i] + tokens[i + 1]
|
117
|
+
combined_tokens.append(combined)
|
118
|
+
i += 2
|
119
|
+
else:
|
120
|
+
combined_tokens.append(tokens[i])
|
121
|
+
i += 1
|
122
|
+
|
123
|
+
# Find indices of first and last non-empty tokens
|
124
|
+
first_non_empty = next((i for i, t in enumerate(combined_tokens) if t), len(combined_tokens))
|
125
|
+
last_non_empty = next((i for i, t in reversed(list(enumerate(combined_tokens))) if t), -1)
|
126
|
+
|
127
|
+
# Process tokens: keep leading/trailing empty tokens, remove empty tokens in middle
|
128
|
+
if first_non_empty < len(combined_tokens):
|
129
|
+
leading = combined_tokens[:first_non_empty]
|
130
|
+
middle = [t for t in combined_tokens[first_non_empty : last_non_empty + 1] if t]
|
131
|
+
trailing = combined_tokens[last_non_empty + 1 :]
|
132
|
+
processed_tokens = leading + middle + trailing
|
133
|
+
else:
|
134
|
+
processed_tokens = combined_tokens # All tokens are empty
|
135
|
+
|
136
|
+
# Join tokens with underscores
|
137
|
+
result = DEFAULT_SEPARATOR.join(processed_tokens)
|
138
|
+
|
139
|
+
# Apply remove_leading_trailing_underscores on the final string
|
140
|
+
if remove_leading_trailing_underscores:
|
141
|
+
result = result.strip(DEFAULT_SEPARATOR)
|
142
|
+
|
143
|
+
# Handle leading numbers after underscore removal
|
144
|
+
if not allow_leading_numbers and result and result[0].isdigit():
|
145
|
+
result = DEFAULT_SEPARATOR + result
|
146
|
+
|
147
|
+
final_result = result.lower()
|
148
|
+
return final_result
|
149
|
+
|
150
|
+
|
151
|
+
def safe_sanitzation_conversion(text: str, **kwargs) -> str:
|
152
|
+
"""
|
153
|
+
Converts text to a safe name using _sanitization with the provided keyword arguments.
|
154
|
+
Raises an exception if the result is empty or "_". Unlike safe_name_conversion,
|
155
|
+
this function also rejects "_" as a valid result, since _sanitization
|
156
|
+
may return "_" for certain inputs (e.g., "*").
|
157
|
+
"""
|
158
|
+
new = _sanitization(text, **kwargs)
|
159
|
+
if not new or new == "_":
|
160
|
+
raise Exception(f"initial string '{text}' converted to empty")
|
161
|
+
return new
|
162
|
+
|
163
|
+
|
49
164
|
def exception_description_by_status_code(code: int, spreadsheet_id) -> str:
|
50
165
|
if code in [status_codes.INTERNAL_SERVER_ERROR, status_codes.BAD_GATEWAY, status_codes.SERVICE_UNAVAILABLE]:
|
51
166
|
return (
|
{airbyte_source_google_sheets-0.9.6.dist-info → airbyte_source_google_sheets-0.10.0.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|