airbyte-source-google-sheets 0.10.0.dev202505231635__py3-none-any.whl → 0.10.0.dev202506022219__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info → airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info}/METADATA +1 -1
- {airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info → airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info}/RECORD +6 -6
- source_google_sheets/manifest.yaml +7 -5
- source_google_sheets/utils.py +48 -40
- {airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info → airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info}/WHEEL +0 -0
- {airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info → airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info}/entry_points.txt +0 -0
@@ -2,15 +2,15 @@ source_google_sheets/__init__.py,sha256=dYaZS0KrTjksk_yeSuXmNDXDsNKWctFnlZJSMgLr
|
|
2
2
|
source_google_sheets/components/__init__.py,sha256=v7odPaLdz2S_NRzExsxkk6694Vmjbplz4Z5mA4lxdkA,343
|
3
3
|
source_google_sheets/components/extractors.py,sha256=-d5hPW40ZBjU3gTUZNudayvqf1eHlveW1amwKJywDkU,9668
|
4
4
|
source_google_sheets/components/partition_routers.py,sha256=SWo1V0K10ZdWE2TQ0KuQUfue04RTyHJe1f6BOj6c96s,1265
|
5
|
-
source_google_sheets/manifest.yaml,sha256=
|
5
|
+
source_google_sheets/manifest.yaml,sha256=CuSnA8dnRMeXWfyUA6aXBvGU3mz1dJIi0HqMiks9Fd0,15938
|
6
6
|
source_google_sheets/models/__init__.py,sha256=Z-4MTpxG5t2jGhXzs4PPoIOa83zw3jRnUDx0N9Puv3s,61
|
7
7
|
source_google_sheets/models/spreadsheet.py,sha256=DEef7bWQEpY1Uqyk7RN6qLF8oxLSNzuAtHlGRmMAKQA,1112
|
8
8
|
source_google_sheets/models/spreadsheet_values.py,sha256=-XRMuuILn9JN8svHNTj6-oG8mLTZOZ5Hejy2pJ5bILk,440
|
9
9
|
source_google_sheets/run.py,sha256=eaPRcarWqkB2b2DokvI83w7rz1blmWPQCFahvCyCdSY,1887
|
10
10
|
source_google_sheets/source.py,sha256=qO1KoGdphieu7F5VgDYtrbqs56AUvMWFGNvFHP2b9Z4,778
|
11
11
|
source_google_sheets/spec.yaml,sha256=mFOiMN1IsrjyLRcjTRw7xa0b74LwkUXdVvUqau7iTXc,5709
|
12
|
-
source_google_sheets/utils.py,sha256=
|
13
|
-
airbyte_source_google_sheets-0.10.0.
|
14
|
-
airbyte_source_google_sheets-0.10.0.
|
15
|
-
airbyte_source_google_sheets-0.10.0.
|
16
|
-
airbyte_source_google_sheets-0.10.0.
|
12
|
+
source_google_sheets/utils.py,sha256=4nr4MjdZa875qun2i8dyN2Y-B_QGjqShUPrvp4mJHec,5627
|
13
|
+
airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info/METADATA,sha256=NOT83ySAyrnrX7wr2Nm3FfH622KNxamaacHMb5WW64I,5385
|
14
|
+
airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
15
|
+
airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info/entry_points.txt,sha256=Dtsfjohe5IPUFyqojk49SIoP7CifCTlNLG_pgivzppo,69
|
16
|
+
airbyte_source_google_sheets-0.10.0.dev202506022219.dist-info/RECORD,,
|
@@ -224,11 +224,11 @@ definitions:
|
|
224
224
|
path: >-
|
225
225
|
{% if config["spreadsheet_id"] | regex_search("^(https://.*)") %}{{ config["spreadsheet_id"] | regex_search("/([-\\w]{20,})([/]?)") }}{% else %}{{ config["spreadsheet_id"] }}{% endif %}?includeGridData=true&ranges={{stream_partition.sheet_id | urlencode}}!1:1&alt=json
|
226
226
|
error_handler:
|
227
|
-
type:
|
228
|
-
|
229
|
-
- type:
|
230
|
-
|
231
|
-
|
227
|
+
type: DefaultErrorHandler
|
228
|
+
backoff_strategies:
|
229
|
+
- type: ExponentialBackoffStrategy
|
230
|
+
response_filters:
|
231
|
+
$ref: "#/definitions/response_filters/single_sheet_response_error_filters"
|
232
232
|
response_filters:
|
233
233
|
expected_one_sheet:
|
234
234
|
type: HttpResponseFilter
|
@@ -320,9 +320,11 @@ definitions:
|
|
320
320
|
single_sheet_response_error_filters:
|
321
321
|
- $ref: "#/definitions/response_filters/expected_one_sheet"
|
322
322
|
- $ref: "#/definitions/response_filters/ignore_duplicate_headers"
|
323
|
+
- $ref: "#/definitions/response_filters/rate_limit"
|
323
324
|
check_operation_single_sheet_response_error_filters:
|
324
325
|
- $ref: "#/definitions/response_filters/expected_one_sheet"
|
325
326
|
- $ref: "#/definitions/response_filters/fail_duplicate_headers"
|
327
|
+
- $ref: "#/definitions/response_filters/rate_limit"
|
326
328
|
response_error_filters:
|
327
329
|
- $ref: "#/definitions/response_filters/server_error"
|
328
330
|
- $ref: "#/definitions/response_filters/forbidden"
|
source_google_sheets/utils.py
CHANGED
@@ -39,57 +39,74 @@ def name_conversion(text: str) -> str:
|
|
39
39
|
|
40
40
|
def experimental_name_conversion(text: str) -> str:
|
41
41
|
"""
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
42
|
+
Converts a string to a normalized, snake_case identifier for destination compatibility.
|
43
|
+
|
44
|
+
Checks Performed:
|
45
|
+
1. Removes leading and trailing spaces.
|
46
|
+
- Example: "EXAMPLE Domain " -> "example_domain"
|
47
|
+
2. Combines number-word pairs (e.g., "50th Percentile" -> "50th_percentile").
|
48
|
+
3. Removes all special characters (e.g., "Example ID*" -> "example_id").
|
49
|
+
4. Combines letter-number pairs (e.g., "Q3 2023" -> "q3_2023").
|
50
|
+
5. Preserves spaces between numbers and words as underscores.
|
51
|
+
- Example: "App Loading Milestone 1 (All)" -> "app_loading_milestone_1_all"
|
52
|
+
6. Handles sequences of special characters and spaces correctly, ensuring no extra or trailing underscores.
|
53
|
+
- Example: "Example (ID)" -> "example_id"
|
54
|
+
|
55
|
+
Additional Details:
|
56
|
+
- All output is lowercased.
|
57
|
+
- Digits are allowed at the start of the result in this version(e.g., "1MyName" -> "1my_name").
|
58
|
+
- Multiple spaces or special characters are collapsed/removed, not replaced with underscores.
|
59
|
+
- Only single underscores are used to separate tokens.
|
60
|
+
|
61
|
+
Examples:
|
62
|
+
"X9 D(a)ta" -> "x9_data"
|
63
|
+
"1MyName" -> "1my_name"
|
64
|
+
"Q3 2023" -> "q3_2023"
|
65
|
+
"EXAMPLE Domain " -> "example_domain"
|
66
|
+
"50th Percentile" -> "50th_percentile"
|
67
|
+
"Example ID*" -> "example_id"
|
68
|
+
"App Loading Milestone 1 (All)" -> "app_loading_milestone_1_all"
|
69
|
+
"Example (ID)" -> "example_id"
|
46
70
|
"""
|
47
|
-
|
71
|
+
# Remove all non-alphanumeric and non-space characters (symbols)
|
72
|
+
text = re.sub(r"[^\w\s]", "", text)
|
48
73
|
|
74
|
+
# Step 1: Tokenization
|
49
75
|
tokens = []
|
50
76
|
for m in TOKEN_PATTERN.finditer(text):
|
51
77
|
if m.group("NoToken") is None:
|
52
78
|
tokens.append(m.group(0))
|
53
79
|
else:
|
54
|
-
#
|
55
|
-
|
56
|
-
|
57
|
-
|
80
|
+
# Process each character in NoToken match
|
81
|
+
for char in m.group(0):
|
82
|
+
if char.isspace():
|
83
|
+
tokens.append("")
|
58
84
|
|
59
|
-
#
|
85
|
+
# Step 2: Combine adjacent tokens where appropriate
|
60
86
|
combined_tokens = []
|
61
87
|
i = 0
|
62
88
|
while i < len(tokens):
|
63
|
-
|
64
|
-
|
65
|
-
combined_tokens.append(tokens[i] + tokens[i + 1])
|
89
|
+
if i + 1 < len(tokens) and tokens[i] and len(tokens[i]) == 1 and tokens[i].isupper() and tokens[i + 1] and tokens[i + 1].isdigit():
|
90
|
+
combined_tokens.append(tokens[i] + tokens[i + 1]) # e.g., "Q3"
|
66
91
|
i += 2
|
67
|
-
|
68
|
-
|
69
|
-
combined_tokens.append(tokens[i] + tokens[i + 1])
|
92
|
+
elif i + 1 < len(tokens) and tokens[i] and tokens[i].isdigit() and tokens[i + 1] and tokens[i + 1].isalpha():
|
93
|
+
combined_tokens.append(tokens[i] + tokens[i + 1]) # e.g., "80th"
|
70
94
|
i += 2
|
71
95
|
else:
|
72
|
-
|
73
|
-
if tokens[i]:
|
74
|
-
combined_tokens.append(tokens[i])
|
96
|
+
combined_tokens.append(tokens[i])
|
75
97
|
i += 1
|
76
98
|
|
77
|
-
#
|
78
|
-
while combined_tokens and combined_tokens[-1] == "":
|
79
|
-
combined_tokens.pop()
|
80
|
-
# Remove leading empty tokens to avoid leading underscores
|
99
|
+
# Step 3: Clean up empty tokens
|
81
100
|
while combined_tokens and combined_tokens[0] == "":
|
82
101
|
combined_tokens.pop(0)
|
83
|
-
|
102
|
+
while combined_tokens and combined_tokens[-1] == "":
|
103
|
+
combined_tokens.pop()
|
84
104
|
if len(combined_tokens) >= 3:
|
85
105
|
combined_tokens = combined_tokens[:1] + [t for t in combined_tokens[1:-1] if t] + combined_tokens[-1:]
|
86
106
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
text = DEFAULT_SEPARATOR.join(combined_tokens)
|
91
|
-
text = text.lower()
|
92
|
-
return text
|
107
|
+
# Step 4: Join and convert to lowercase
|
108
|
+
result = DEFAULT_SEPARATOR.join(combined_tokens)
|
109
|
+
return result.lower()
|
93
110
|
|
94
111
|
|
95
112
|
def safe_name_conversion(text: str) -> str:
|
@@ -101,21 +118,12 @@ def safe_name_conversion(text: str) -> str:
|
|
101
118
|
return new
|
102
119
|
|
103
120
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
def experimental_safe_name_conversion(text: str, output_file: str = "conversion_results2.csv") -> str:
|
121
|
+
def experimental_safe_name_conversion(text: str) -> str:
|
108
122
|
if not text:
|
109
123
|
return text
|
110
124
|
new = experimental_name_conversion(text)
|
111
125
|
if not new:
|
112
126
|
raise Exception(f"initial string '{text}' converted to empty")
|
113
|
-
|
114
|
-
# Write to CSV
|
115
|
-
with open(output_file, mode="a", newline="", encoding="utf-8") as file:
|
116
|
-
writer = csv.writer(file)
|
117
|
-
writer.writerow([text, new]) # Write the original and converted text
|
118
|
-
|
119
127
|
return new
|
120
128
|
|
121
129
|
|
File without changes
|