airbyte-source-google-sheets 0.10.0.dev202505211933__tar.gz → 0.10.0.dev202505231635__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/PKG-INFO +1 -1
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/pyproject.toml +1 -1
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/utils.py +34 -6
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/README.md +0 -0
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/__init__.py +0 -0
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/components/__init__.py +0 -0
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/components/extractors.py +0 -0
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/components/partition_routers.py +0 -0
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/manifest.yaml +0 -0
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/models/__init__.py +0 -0
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/models/spreadsheet.py +0 -0
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/models/spreadsheet_values.py +0 -0
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/run.py +0 -0
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/source.py +0 -0
- {airbyte_source_google_sheets-0.10.0.dev202505211933 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/spec.yaml +0 -0
@@ -40,7 +40,9 @@ def name_conversion(text: str) -> str:
|
|
40
40
|
def experimental_name_conversion(text: str) -> str:
|
41
41
|
"""
|
42
42
|
Convert name using a set of rules, for example: '1MyName' -> '_1_my_name'
|
43
|
-
Removes leading/trailing spaces
|
43
|
+
Removes leading/trailing spaces, combines number-word pairs (e.g., '50th' -> '50th'),
|
44
|
+
letter-number pairs (e.g., 'Q3' -> 'Q3'), and removes special characters without adding underscores.
|
45
|
+
Spaces are converted to underscores for snake_case.
|
44
46
|
"""
|
45
47
|
text = unidecode.unidecode(text.strip()) # Strip leading/trailing spaces
|
46
48
|
|
@@ -49,19 +51,36 @@ def experimental_name_conversion(text: str) -> str:
|
|
49
51
|
if m.group("NoToken") is None:
|
50
52
|
tokens.append(m.group(0))
|
51
53
|
else:
|
52
|
-
|
54
|
+
# Only add an empty token for spaces to preserve snake_case; skip other special characters
|
55
|
+
if m.group(0).isspace():
|
56
|
+
tokens.append("")
|
57
|
+
# Otherwise, skip the special character entirely
|
53
58
|
|
54
|
-
# Combine
|
59
|
+
# Combine single uppercase letter followed by number (e.g., "Q" and "3" -> "Q3"), then number-word pairs
|
55
60
|
combined_tokens = []
|
56
61
|
i = 0
|
57
62
|
while i < len(tokens):
|
58
|
-
|
63
|
+
# Check for letter-number pair (e.g., "Q3")
|
64
|
+
if i + 1 < len(tokens) and len(tokens[i]) == 1 and tokens[i].isupper() and tokens[i + 1].isdigit():
|
65
|
+
combined_tokens.append(tokens[i] + tokens[i + 1])
|
66
|
+
i += 2
|
67
|
+
# Check for number-word pair (e.g., "50th")
|
68
|
+
elif i + 1 < len(tokens) and tokens[i].isdigit() and tokens[i + 1].isalpha():
|
59
69
|
combined_tokens.append(tokens[i] + tokens[i + 1])
|
60
70
|
i += 2
|
61
71
|
else:
|
62
|
-
|
72
|
+
# Only add the token if it's non-empty to avoid underscores from spaces near special characters
|
73
|
+
if tokens[i]:
|
74
|
+
combined_tokens.append(tokens[i])
|
63
75
|
i += 1
|
64
76
|
|
77
|
+
# Remove trailing empty tokens to avoid trailing underscores
|
78
|
+
while combined_tokens and combined_tokens[-1] == "":
|
79
|
+
combined_tokens.pop()
|
80
|
+
# Remove leading empty tokens to avoid leading underscores
|
81
|
+
while combined_tokens and combined_tokens[0] == "":
|
82
|
+
combined_tokens.pop(0)
|
83
|
+
|
65
84
|
if len(combined_tokens) >= 3:
|
66
85
|
combined_tokens = combined_tokens[:1] + [t for t in combined_tokens[1:-1] if t] + combined_tokens[-1:]
|
67
86
|
|
@@ -82,12 +101,21 @@ def safe_name_conversion(text: str) -> str:
|
|
82
101
|
return new
|
83
102
|
|
84
103
|
|
85
|
-
|
104
|
+
import csv
|
105
|
+
|
106
|
+
|
107
|
+
def experimental_safe_name_conversion(text: str, output_file: str = "conversion_results2.csv") -> str:
|
86
108
|
if not text:
|
87
109
|
return text
|
88
110
|
new = experimental_name_conversion(text)
|
89
111
|
if not new:
|
90
112
|
raise Exception(f"initial string '{text}' converted to empty")
|
113
|
+
|
114
|
+
# Write to CSV
|
115
|
+
with open(output_file, mode="a", newline="", encoding="utf-8") as file:
|
116
|
+
writer = csv.writer(file)
|
117
|
+
writer.writerow([text, new]) # Write the original and converted text
|
118
|
+
|
91
119
|
return new
|
92
120
|
|
93
121
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|