airbyte-source-google-sheets 0.10.0.dev202506022219__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info → airbyte_source_google_sheets-0.11.0.dist-info}/METADATA +1 -1
- airbyte_source_google_sheets-0.11.0.dist-info/RECORD +16 -0
- source_google_sheets/components/extractors.py +23 -14
- source_google_sheets/manifest.yaml +9 -2
- source_google_sheets/spec.yaml +89 -7
- source_google_sheets/utils.py +97 -63
- airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info/RECORD +0 -16
- {airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info → airbyte_source_google_sheets-0.11.0.dist-info}/WHEEL +0 -0
- {airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info → airbyte_source_google_sheets-0.11.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
source_google_sheets/__init__.py,sha256=dYaZS0KrTjksk_yeSuXmNDXDsNKWctFnlZJSMgLryXE,135
|
2
|
+
source_google_sheets/components/__init__.py,sha256=v7odPaLdz2S_NRzExsxkk6694Vmjbplz4Z5mA4lxdkA,343
|
3
|
+
source_google_sheets/components/extractors.py,sha256=7CLiYFYTq6DfeYCxchvQI_sQLpaVd_86Zfon2hKZe9Q,9842
|
4
|
+
source_google_sheets/components/partition_routers.py,sha256=SWo1V0K10ZdWE2TQ0KuQUfue04RTyHJe1f6BOj6c96s,1265
|
5
|
+
source_google_sheets/manifest.yaml,sha256=GHQb_x_2pZQR4Oh5gWMnAm-KEGvuZUENYoX4PoUUR7Y,16489
|
6
|
+
source_google_sheets/models/__init__.py,sha256=Z-4MTpxG5t2jGhXzs4PPoIOa83zw3jRnUDx0N9Puv3s,61
|
7
|
+
source_google_sheets/models/spreadsheet.py,sha256=DEef7bWQEpY1Uqyk7RN6qLF8oxLSNzuAtHlGRmMAKQA,1112
|
8
|
+
source_google_sheets/models/spreadsheet_values.py,sha256=-XRMuuILn9JN8svHNTj6-oG8mLTZOZ5Hejy2pJ5bILk,440
|
9
|
+
source_google_sheets/run.py,sha256=eaPRcarWqkB2b2DokvI83w7rz1blmWPQCFahvCyCdSY,1887
|
10
|
+
source_google_sheets/source.py,sha256=qO1KoGdphieu7F5VgDYtrbqs56AUvMWFGNvFHP2b9Z4,778
|
11
|
+
source_google_sheets/spec.yaml,sha256=RN3XrLj6PYzqTNtpDxYo6PffkyPPizEBOxSzCrPL15Q,9058
|
12
|
+
source_google_sheets/utils.py,sha256=jiVPqsRDjVgdwIiBJMvFJEwwuUBQ7BQAebRqfpS9pZw,6943
|
13
|
+
airbyte_source_google_sheets-0.11.0.dist-info/METADATA,sha256=VBrcsENgMSKx9zxYAfcmDh03ezpuH1dCRbA2zezrxG0,5369
|
14
|
+
airbyte_source_google_sheets-0.11.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
15
|
+
airbyte_source_google_sheets-0.11.0.dist-info/entry_points.txt,sha256=Dtsfjohe5IPUFyqojk49SIoP7CifCTlNLG_pgivzppo,69
|
16
|
+
airbyte_source_google_sheets-0.11.0.dist-info/RECORD,,
|
@@ -12,7 +12,10 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
|
|
12
12
|
from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
|
13
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
14
14
|
from airbyte_cdk.sources.types import Config
|
15
|
-
from source_google_sheets.utils import
|
15
|
+
from source_google_sheets.utils import (
|
16
|
+
safe_name_conversion,
|
17
|
+
safe_sanitzation_conversion,
|
18
|
+
)
|
16
19
|
|
17
20
|
|
18
21
|
class RawSchemaParser:
|
@@ -54,7 +57,6 @@ class RawSchemaParser:
|
|
54
57
|
schema_pointer: List[Union[InterpolatedString, str]],
|
55
58
|
key_pointer: List[Union[InterpolatedString, str]],
|
56
59
|
names_conversion: bool,
|
57
|
-
experimental_names_conversion: bool,
|
58
60
|
):
|
59
61
|
"""
|
60
62
|
1. Parses sheet headers from the provided raw schema. This method assumes that data is contiguous
|
@@ -68,13 +70,24 @@ class RawSchemaParser:
|
|
68
70
|
duplicate_fields = set()
|
69
71
|
parsed_schema_values = []
|
70
72
|
seen_values = set()
|
73
|
+
# Gather all sanitisation flags from config
|
74
|
+
config = getattr(self, "config", {})
|
75
|
+
flags = {
|
76
|
+
"remove_leading_trailing_underscores": config.get("remove_leading_trailing_underscores", False),
|
77
|
+
"combine_number_word_pairs": config.get("combine_number_word_pairs", False),
|
78
|
+
"remove_special_characters": config.get("remove_special_characters", False),
|
79
|
+
"combine_letter_number_pairs": config.get("combine_letter_number_pairs", False),
|
80
|
+
"allow_leading_numbers": config.get("allow_leading_numbers", False),
|
81
|
+
}
|
82
|
+
use_sanitzation = any(flags.values())
|
83
|
+
|
71
84
|
for property_index, raw_schema_property in enumerate(raw_schema_properties):
|
72
85
|
raw_schema_property_value = self._extract_data(raw_schema_property, key_pointer)
|
73
86
|
if not raw_schema_property_value or raw_schema_property_value.isspace():
|
74
87
|
break
|
75
|
-
#
|
76
|
-
if
|
77
|
-
raw_schema_property_value =
|
88
|
+
# Use sanitzation if any flag is set, else legacy
|
89
|
+
if names_conversion and use_sanitzation:
|
90
|
+
raw_schema_property_value = safe_sanitzation_conversion(raw_schema_property_value, **flags)
|
78
91
|
elif names_conversion:
|
79
92
|
raw_schema_property_value = safe_name_conversion(raw_schema_property_value)
|
80
93
|
|
@@ -93,13 +106,12 @@ class RawSchemaParser:
|
|
93
106
|
def parse(self, schema_type_identifier, records: Iterable[MutableMapping[Any, Any]]):
|
94
107
|
"""Removes duplicated fields and makes names conversion"""
|
95
108
|
names_conversion = self.config.get("names_conversion", False)
|
96
|
-
experimental_names_conversion = self.config.get("experimental_names_conversion", False)
|
97
109
|
schema_pointer = schema_type_identifier.get("schema_pointer")
|
98
110
|
key_pointer = schema_type_identifier["key_pointer"]
|
99
111
|
parsed_properties = []
|
100
112
|
for raw_schema_data in records:
|
101
113
|
for _, parsed_value, raw_schema_property in self.parse_raw_schema_values(
|
102
|
-
raw_schema_data, schema_pointer, key_pointer, names_conversion
|
114
|
+
raw_schema_data, schema_pointer, key_pointer, names_conversion
|
103
115
|
):
|
104
116
|
self._set_data(parsed_value, raw_schema_property, key_pointer)
|
105
117
|
parsed_properties.append(raw_schema_property)
|
@@ -145,20 +157,16 @@ class DpathSchemaMatchingExtractor(DpathExtractor, RawSchemaParser):
|
|
145
157
|
self._values_to_match_key = parameters["values_to_match_key"]
|
146
158
|
schema_type_identifier = parameters["schema_type_identifier"]
|
147
159
|
names_conversion = self.config.get("names_conversion", False)
|
148
|
-
experimental_names_conversion = self.config.get("experimental_names_conversion", False)
|
149
160
|
self._indexed_properties_to_match = self.extract_properties_to_match(
|
150
|
-
parameters["properties_to_match"],
|
151
|
-
schema_type_identifier,
|
152
|
-
names_conversion=names_conversion,
|
153
|
-
experimental_names_conversion=experimental_names_conversion,
|
161
|
+
parameters["properties_to_match"], schema_type_identifier, names_conversion=names_conversion
|
154
162
|
)
|
155
163
|
|
156
|
-
def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion
|
164
|
+
def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion):
|
157
165
|
schema_pointer = schema_type_identifier.get("schema_pointer")
|
158
166
|
key_pointer = schema_type_identifier["key_pointer"]
|
159
167
|
indexed_properties = {}
|
160
168
|
for property_index, property_parsed_value, _ in self.parse_raw_schema_values(
|
161
|
-
properties_to_match, schema_pointer, key_pointer, names_conversion
|
169
|
+
properties_to_match, schema_pointer, key_pointer, names_conversion
|
162
170
|
):
|
163
171
|
indexed_properties[property_index] = property_parsed_value
|
164
172
|
return indexed_properties
|
@@ -202,6 +210,7 @@ class DpathSchemaMatchingExtractor(DpathExtractor, RawSchemaParser):
|
|
202
210
|
)
|
203
211
|
|
204
212
|
|
213
|
+
@dataclass
|
205
214
|
class DpathSchemaExtractor(DpathExtractor, RawSchemaParser):
|
206
215
|
"""
|
207
216
|
Makes names conversion and parses sheet headers from the provided row.
|
@@ -105,9 +105,16 @@ dynamic_streams:
|
|
105
105
|
- field_path:
|
106
106
|
- name
|
107
107
|
type: ComponentMappingDefinition
|
108
|
-
|
108
|
+
# Expression is structured as: <value_if_true> if <condition> else <value_if_false>
|
109
|
+
# This sets the stream name to the custom name from stream_name_overrides if a match is found,
|
110
|
+
# otherwise uses the original sheet/tab name.
|
111
|
+
value: >-
|
112
|
+
{%- set overrides = config.get('stream_name_overrides', [])
|
113
|
+
| selectattr('source_stream_name', 'equalto', components_values['properties']['title'])
|
114
|
+
| list -%}
|
115
|
+
{{ overrides[0]['custom_stream_name'] if overrides else components_values['properties']['title'] }}
|
109
116
|
value_type: string
|
110
|
-
description: name for dynamic stream.
|
117
|
+
description: name for dynamic stream (with optional override).
|
111
118
|
- field_path:
|
112
119
|
- schema_loader
|
113
120
|
- retriever
|
source_google_sheets/spec.yaml
CHANGED
@@ -21,6 +21,7 @@ connectionSpecification:
|
|
21
21
|
otherwise the request returns a timeout error. In regards to this information, consider network speed and
|
22
22
|
number of columns of the google sheet when deciding a batch_size value.
|
23
23
|
default: 1000000
|
24
|
+
order: 1
|
24
25
|
spreadsheet_id:
|
25
26
|
type: string
|
26
27
|
title: Spreadsheet Link
|
@@ -28,19 +29,60 @@ connectionSpecification:
|
|
28
29
|
Enter the link to the Google spreadsheet you want to sync. To copy the link, click the 'Share' button in the top-right corner of the spreadsheet, then click 'Copy link'.
|
29
30
|
examples:
|
30
31
|
- https://docs.google.com/spreadsheets/d/1hLd9Qqti3UyLXZB2aFfUWDT7BG-arw2xy4HR3D-dwUb/edit
|
32
|
+
order: 0
|
31
33
|
names_conversion:
|
32
34
|
type: boolean
|
33
35
|
title: Convert Column Names to SQL-Compliant Format
|
34
|
-
description:
|
36
|
+
description: >-
|
37
|
+
Converts column names to a SQL-compliant format (snake_case, lowercase, etc).
|
38
|
+
If enabled, you can further customize the sanitization using the options below.
|
39
|
+
default: false
|
40
|
+
order: 2
|
41
|
+
remove_leading_trailing_underscores:
|
42
|
+
type: boolean
|
43
|
+
title: Remove Leading and Trailing Underscores
|
44
|
+
description: >-
|
45
|
+
Removes leading and trailing underscores from column names. Does not remove leading underscores from column names that start with a number.
|
46
|
+
Example: "50th Percentile? "→ "_50_th_percentile"
|
47
|
+
This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
|
35
48
|
default: false
|
36
|
-
|
49
|
+
order: 3
|
50
|
+
combine_number_word_pairs:
|
37
51
|
type: boolean
|
38
|
-
title:
|
52
|
+
title: Combine Number-Word Pairs
|
39
53
|
description: >-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
54
|
+
Combines adjacent numbers and words.
|
55
|
+
Example: "50th Percentile?" → "_50th_percentile_"
|
56
|
+
This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
|
57
|
+
default: false
|
58
|
+
order: 4
|
59
|
+
remove_special_characters:
|
60
|
+
type: boolean
|
61
|
+
title: Remove All Special Characters
|
62
|
+
description: >-
|
63
|
+
Removes all special characters from column names.
|
64
|
+
Example: "Example ID*" → "example_id"
|
65
|
+
This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
|
66
|
+
default: false
|
67
|
+
order: 5
|
68
|
+
combine_letter_number_pairs:
|
69
|
+
type: boolean
|
70
|
+
title: Combine Letter-Number Pairs
|
71
|
+
description: >-
|
72
|
+
Combines adjacent letters and numbers.
|
73
|
+
Example: "Q3 2023" → "q3_2023"
|
74
|
+
This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
|
75
|
+
default: false
|
76
|
+
order: 6
|
77
|
+
allow_leading_numbers:
|
78
|
+
type: boolean
|
79
|
+
title: Allow Leading Numbers
|
80
|
+
description: >-
|
81
|
+
Allows column names to start with numbers.
|
82
|
+
Example: "50th Percentile" → "50_th_percentile"
|
83
|
+
This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
|
84
|
+
default: false
|
85
|
+
order: 7
|
44
86
|
credentials:
|
45
87
|
type: object
|
46
88
|
title: Authentication
|
@@ -89,6 +131,46 @@ connectionSpecification:
|
|
89
131
|
airbyte_secret: true
|
90
132
|
examples:
|
91
133
|
- '{ "type": "service_account", "project_id": YOUR_PROJECT_ID, "private_key_id": YOUR_PRIVATE_KEY, ... }'
|
134
|
+
stream_name_overrides:
|
135
|
+
type: array
|
136
|
+
title: Stream Name Overrides
|
137
|
+
description: >-
|
138
|
+
**Overridden streams will default to Sync Mode: Full Refresh (Append), which does not support primary keys. If you want to use primary keys and deduplication, update the sync mode to "Full Refresh | Overwrite + Deduped" in your connection settings.**
|
139
|
+
|
140
|
+
Allows you to rename streams (Google Sheet tab names) as they appear in Airbyte.
|
141
|
+
|
142
|
+
Each item should be an object with a `source_stream_name` (the exact name of the sheet/tab in your spreadsheet)
|
143
|
+
and a `custom_stream_name` (the name you want it to appear as in Airbyte and the destination).
|
144
|
+
|
145
|
+
If a `source_stream_name` is not found in your spreadsheet, it will be ignored and the default name will be used.
|
146
|
+
This feature only affects stream (sheet/tab) names, not field/column names.
|
147
|
+
|
148
|
+
If you want to rename fields or column names, you can do so using the Airbyte Mappings feature after your connection is created. See the Airbyte documentation for more details on how to use Mappings.
|
149
|
+
|
150
|
+
Examples:
|
151
|
+
- To rename a sheet called "Sheet1" to "sales_data", and "2024 Q1" to "q1_2024":
|
152
|
+
[
|
153
|
+
{ "source_stream_name": "Sheet1", "custom_stream_name": "sales_data" },
|
154
|
+
{ "source_stream_name": "2024 Q1", "custom_stream_name": "q1_2024" }
|
155
|
+
]
|
156
|
+
- If you do not wish to rename any streams, leave this blank.
|
157
|
+
items:
|
158
|
+
type: object
|
159
|
+
required:
|
160
|
+
- source_stream_name
|
161
|
+
- custom_stream_name
|
162
|
+
properties:
|
163
|
+
source_stream_name:
|
164
|
+
type: string
|
165
|
+
title: Source Stream Name
|
166
|
+
description: The exact name of the sheet/tab in your Google Spreadsheet.
|
167
|
+
order: 0
|
168
|
+
custom_stream_name:
|
169
|
+
type: string
|
170
|
+
title: Custom Stream Name
|
171
|
+
description: The name you want this stream to appear as in Airbyte and your destination.
|
172
|
+
order: 1
|
173
|
+
order: 8
|
92
174
|
advanced_auth:
|
93
175
|
auth_flow_type: oauth2.0
|
94
176
|
predicate_key:
|
source_google_sheets/utils.py
CHANGED
@@ -37,92 +37,126 @@ def name_conversion(text: str) -> str:
|
|
37
37
|
return text
|
38
38
|
|
39
39
|
|
40
|
-
def
|
40
|
+
def safe_name_conversion(text: str) -> str:
|
41
|
+
if not text:
|
42
|
+
return text
|
43
|
+
new = name_conversion(text)
|
44
|
+
if not new:
|
45
|
+
raise Exception(f"initial string '{text}' converted to empty")
|
46
|
+
return new
|
47
|
+
|
48
|
+
|
49
|
+
def _sanitization(
|
50
|
+
text: str,
|
51
|
+
remove_leading_trailing_underscores: bool = False,
|
52
|
+
combine_number_word_pairs: bool = False,
|
53
|
+
remove_special_characters: bool = False,
|
54
|
+
combine_letter_number_pairs: bool = False,
|
55
|
+
allow_leading_numbers: bool = False,
|
56
|
+
) -> str:
|
41
57
|
"""
|
42
|
-
Converts a string
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
"Q3 2023" -> "q3_2023"
|
65
|
-
"EXAMPLE Domain " -> "example_domain"
|
66
|
-
"50th Percentile" -> "50th_percentile"
|
67
|
-
"Example ID*" -> "example_id"
|
68
|
-
"App Loading Milestone 1 (All)" -> "app_loading_milestone_1_all"
|
69
|
-
"Example (ID)" -> "example_id"
|
58
|
+
Converts a string into a normalized, SQL-compliant name using a set of configurable options.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
text: The input string to convert.
|
62
|
+
remove_leading_trailing_underscores: If True, removes underscores at the start/end of the result.
|
63
|
+
combine_number_word_pairs: If True, combines adjacent number and word tokens (e.g., "50 th" -> "50th").
|
64
|
+
remove_special_characters: If True, removes all special characters from the input.
|
65
|
+
combine_letter_number_pairs: If True, combines adjacent letter and number tokens (e.g., "Q 3" -> "Q3").
|
66
|
+
allow_leading_numbers: If False, prepends an underscore if the result starts with a number.
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
The normalized, SQL-compliant string.
|
70
|
+
|
71
|
+
Steps:
|
72
|
+
1. Transliterates the input text to ASCII using unidecode.
|
73
|
+
2. Optionally removes special characters if remove_special_characters is True.
|
74
|
+
3. Splits the text into tokens using a regex pattern that separates words, numbers, and non-alphanumeric characters.
|
75
|
+
4. Optionally combines adjacent letter+number or number+word tokens based on flags.
|
76
|
+
5. Removes empty tokens in the middle, but keeps leading/trailing empty tokens for underscore placement.
|
77
|
+
6. Optionally strips leading/trailing underscores if remove_leading_trailing_underscores is True.
|
78
|
+
7. Optionally prepends an underscore if the result starts with a number and allow_leading_numbers is False.
|
79
|
+
8. Returns the final string in lowercase.
|
70
80
|
"""
|
71
|
-
|
72
|
-
|
81
|
+
text = unidecode.unidecode(text)
|
82
|
+
|
83
|
+
if remove_special_characters:
|
84
|
+
text = re.sub(r"[^\w\s]", "", text)
|
73
85
|
|
74
|
-
# Step 1: Tokenization
|
75
86
|
tokens = []
|
76
87
|
for m in TOKEN_PATTERN.finditer(text):
|
77
88
|
if m.group("NoToken") is None:
|
78
89
|
tokens.append(m.group(0))
|
79
90
|
else:
|
80
|
-
|
81
|
-
for char in m.group(0):
|
82
|
-
if char.isspace():
|
83
|
-
tokens.append("")
|
91
|
+
tokens.append("")
|
84
92
|
|
85
|
-
#
|
93
|
+
# Combine tokens as per flags
|
86
94
|
combined_tokens = []
|
87
95
|
i = 0
|
88
96
|
while i < len(tokens):
|
89
|
-
if
|
90
|
-
|
97
|
+
if (
|
98
|
+
combine_letter_number_pairs
|
99
|
+
and i + 1 < len(tokens)
|
100
|
+
and tokens[i]
|
101
|
+
and tokens[i].isalpha()
|
102
|
+
and tokens[i + 1]
|
103
|
+
and tokens[i + 1].isdigit()
|
104
|
+
):
|
105
|
+
combined = tokens[i] + tokens[i + 1]
|
106
|
+
combined_tokens.append(combined)
|
91
107
|
i += 2
|
92
|
-
elif
|
93
|
-
|
108
|
+
elif (
|
109
|
+
combine_number_word_pairs
|
110
|
+
and i + 1 < len(tokens)
|
111
|
+
and tokens[i]
|
112
|
+
and tokens[i].isdigit()
|
113
|
+
and tokens[i + 1]
|
114
|
+
and tokens[i + 1].isalpha()
|
115
|
+
):
|
116
|
+
combined = tokens[i] + tokens[i + 1]
|
117
|
+
combined_tokens.append(combined)
|
94
118
|
i += 2
|
95
119
|
else:
|
96
120
|
combined_tokens.append(tokens[i])
|
97
121
|
i += 1
|
98
122
|
|
99
|
-
#
|
100
|
-
|
101
|
-
|
102
|
-
while combined_tokens and combined_tokens[-1] == "":
|
103
|
-
combined_tokens.pop()
|
104
|
-
if len(combined_tokens) >= 3:
|
105
|
-
combined_tokens = combined_tokens[:1] + [t for t in combined_tokens[1:-1] if t] + combined_tokens[-1:]
|
123
|
+
# Find indices of first and last non-empty tokens
|
124
|
+
first_non_empty = next((i for i, t in enumerate(combined_tokens) if t), len(combined_tokens))
|
125
|
+
last_non_empty = next((i for i, t in reversed(list(enumerate(combined_tokens))) if t), -1)
|
106
126
|
|
107
|
-
#
|
108
|
-
|
109
|
-
|
127
|
+
# Process tokens: keep leading/trailing empty tokens, remove empty tokens in middle
|
128
|
+
if first_non_empty < len(combined_tokens):
|
129
|
+
leading = combined_tokens[:first_non_empty]
|
130
|
+
middle = [t for t in combined_tokens[first_non_empty : last_non_empty + 1] if t]
|
131
|
+
trailing = combined_tokens[last_non_empty + 1 :]
|
132
|
+
processed_tokens = leading + middle + trailing
|
133
|
+
else:
|
134
|
+
processed_tokens = combined_tokens # All tokens are empty
|
110
135
|
|
136
|
+
# Join tokens with underscores
|
137
|
+
result = DEFAULT_SEPARATOR.join(processed_tokens)
|
111
138
|
|
112
|
-
|
113
|
-
if
|
114
|
-
|
115
|
-
new = name_conversion(text)
|
116
|
-
if not new:
|
117
|
-
raise Exception(f"initial string '{text}' converted to empty")
|
118
|
-
return new
|
139
|
+
# Apply remove_leading_trailing_underscores on the final string
|
140
|
+
if remove_leading_trailing_underscores:
|
141
|
+
result = result.strip(DEFAULT_SEPARATOR)
|
119
142
|
|
143
|
+
# Handle leading numbers after underscore removal
|
144
|
+
if not allow_leading_numbers and result and result[0].isdigit():
|
145
|
+
result = DEFAULT_SEPARATOR + result
|
120
146
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
147
|
+
final_result = result.lower()
|
148
|
+
return final_result
|
149
|
+
|
150
|
+
|
151
|
+
def safe_sanitzation_conversion(text: str, **kwargs) -> str:
|
152
|
+
"""
|
153
|
+
Converts text to a safe name using _sanitization with the provided keyword arguments.
|
154
|
+
Raises an exception if the result is empty or "_". Unlike safe_name_conversion,
|
155
|
+
this function also rejects "_" as a valid result, since _sanitization
|
156
|
+
may return "_" for certain inputs (e.g., "*").
|
157
|
+
"""
|
158
|
+
new = _sanitization(text, **kwargs)
|
159
|
+
if not new or new == "_":
|
126
160
|
raise Exception(f"initial string '{text}' converted to empty")
|
127
161
|
return new
|
128
162
|
|
@@ -1,16 +0,0 @@
|
|
1
|
-
source_google_sheets/__init__.py,sha256=dYaZS0KrTjksk_yeSuXmNDXDsNKWctFnlZJSMgLryXE,135
|
2
|
-
source_google_sheets/components/__init__.py,sha256=v7odPaLdz2S_NRzExsxkk6694Vmjbplz4Z5mA4lxdkA,343
|
3
|
-
source_google_sheets/components/extractors.py,sha256=-d5hPW40ZBjU3gTUZNudayvqf1eHlveW1amwKJywDkU,9668
|
4
|
-
source_google_sheets/components/partition_routers.py,sha256=SWo1V0K10ZdWE2TQ0KuQUfue04RTyHJe1f6BOj6c96s,1265
|
5
|
-
source_google_sheets/manifest.yaml,sha256=CuSnA8dnRMeXWfyUA6aXBvGU3mz1dJIi0HqMiks9Fd0,15938
|
6
|
-
source_google_sheets/models/__init__.py,sha256=Z-4MTpxG5t2jGhXzs4PPoIOa83zw3jRnUDx0N9Puv3s,61
|
7
|
-
source_google_sheets/models/spreadsheet.py,sha256=DEef7bWQEpY1Uqyk7RN6qLF8oxLSNzuAtHlGRmMAKQA,1112
|
8
|
-
source_google_sheets/models/spreadsheet_values.py,sha256=-XRMuuILn9JN8svHNTj6-oG8mLTZOZ5Hejy2pJ5bILk,440
|
9
|
-
source_google_sheets/run.py,sha256=eaPRcarWqkB2b2DokvI83w7rz1blmWPQCFahvCyCdSY,1887
|
10
|
-
source_google_sheets/source.py,sha256=qO1KoGdphieu7F5VgDYtrbqs56AUvMWFGNvFHP2b9Z4,778
|
11
|
-
source_google_sheets/spec.yaml,sha256=mFOiMN1IsrjyLRcjTRw7xa0b74LwkUXdVvUqau7iTXc,5709
|
12
|
-
source_google_sheets/utils.py,sha256=4nr4MjdZa875qun2i8dyN2Y-B_QGjqShUPrvp4mJHec,5627
|
13
|
-
airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info/METADATA,sha256=NOT83ySAyrnrX7wr2Nm3FfH622KNxamaacHMb5WW64I,5385
|
14
|
-
airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
15
|
-
airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info/entry_points.txt,sha256=Dtsfjohe5IPUFyqojk49SIoP7CifCTlNLG_pgivzppo,69
|
16
|
-
airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info/RECORD,,
|
File without changes
|