airbyte-source-google-sheets 0.9.6__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-google-sheets
3
- Version: 0.9.6
3
+ Version: 0.10.0
4
4
  Summary: Source implementation for Google Sheets.
5
5
  License: Elv2
6
6
  Author: Airbyte
@@ -1,6 +1,6 @@
1
1
  source_google_sheets/__init__.py,sha256=dYaZS0KrTjksk_yeSuXmNDXDsNKWctFnlZJSMgLryXE,135
2
2
  source_google_sheets/components/__init__.py,sha256=v7odPaLdz2S_NRzExsxkk6694Vmjbplz4Z5mA4lxdkA,343
3
- source_google_sheets/components/extractors.py,sha256=Re0zt3_pUtRJMTcephi9GvvM6kYiZqWWtKefel0v0ZI,8948
3
+ source_google_sheets/components/extractors.py,sha256=Yrl5ge_gXJ6jqVYYR2bk2f6Rg-xak0wlHnPyK9f1lhc,9854
4
4
  source_google_sheets/components/partition_routers.py,sha256=SWo1V0K10ZdWE2TQ0KuQUfue04RTyHJe1f6BOj6c96s,1265
5
5
  source_google_sheets/manifest.yaml,sha256=CuSnA8dnRMeXWfyUA6aXBvGU3mz1dJIi0HqMiks9Fd0,15938
6
6
  source_google_sheets/models/__init__.py,sha256=Z-4MTpxG5t2jGhXzs4PPoIOa83zw3jRnUDx0N9Puv3s,61
@@ -8,9 +8,9 @@ source_google_sheets/models/spreadsheet.py,sha256=DEef7bWQEpY1Uqyk7RN6qLF8oxLSNz
8
8
  source_google_sheets/models/spreadsheet_values.py,sha256=-XRMuuILn9JN8svHNTj6-oG8mLTZOZ5Hejy2pJ5bILk,440
9
9
  source_google_sheets/run.py,sha256=eaPRcarWqkB2b2DokvI83w7rz1blmWPQCFahvCyCdSY,1887
10
10
  source_google_sheets/source.py,sha256=qO1KoGdphieu7F5VgDYtrbqs56AUvMWFGNvFHP2b9Z4,778
11
- source_google_sheets/spec.yaml,sha256=RIUILMhfS0is2r_mCkmIVrQfvND1D3eobDK1YElmzhU,5009
12
- source_google_sheets/utils.py,sha256=JEQIVLSFEAff-7zF3gPzsvFc9xLfCj9hVuFFYrSWiOo,2290
13
- airbyte_source_google_sheets-0.9.6.dist-info/METADATA,sha256=OJAGlWTUGzUNaLTBl_pf6PavyGMVI6vF9aOJH2qbL2E,5368
14
- airbyte_source_google_sheets-0.9.6.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
15
- airbyte_source_google_sheets-0.9.6.dist-info/entry_points.txt,sha256=Dtsfjohe5IPUFyqojk49SIoP7CifCTlNLG_pgivzppo,69
16
- airbyte_source_google_sheets-0.9.6.dist-info/RECORD,,
11
+ source_google_sheets/spec.yaml,sha256=HGVGay4VAxzi9TUj-MSpeQLyE9GAoOjD2-xIhQDiIGY,6901
12
+ source_google_sheets/utils.py,sha256=jiVPqsRDjVgdwIiBJMvFJEwwuUBQ7BQAebRqfpS9pZw,6943
13
+ airbyte_source_google_sheets-0.10.0.dist-info/METADATA,sha256=Mzmw5ZoFCeMjyDI0pxxoGtHaAGQV4BfQq3Wp108SdRA,5369
14
+ airbyte_source_google_sheets-0.10.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
15
+ airbyte_source_google_sheets-0.10.0.dist-info/entry_points.txt,sha256=Dtsfjohe5IPUFyqojk49SIoP7CifCTlNLG_pgivzppo,69
16
+ airbyte_source_google_sheets-0.10.0.dist-info/RECORD,,
@@ -12,7 +12,11 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
12
12
  from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
13
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
14
14
  from airbyte_cdk.sources.types import Config
15
- from source_google_sheets.utils import name_conversion, safe_name_conversion
15
+ from source_google_sheets.utils import (
16
+ name_conversion,
17
+ safe_name_conversion,
18
+ safe_sanitzation_conversion,
19
+ )
16
20
 
17
21
 
18
22
  class RawSchemaParser:
@@ -67,11 +71,25 @@ class RawSchemaParser:
67
71
  duplicate_fields = set()
68
72
  parsed_schema_values = []
69
73
  seen_values = set()
74
+ # Gather all sanitisation flags from config
75
+ config = getattr(self, "config", {})
76
+ flags = {
77
+ "remove_leading_trailing_underscores": config.get("remove_leading_trailing_underscores", False),
78
+ "combine_number_word_pairs": config.get("combine_number_word_pairs", False),
79
+ "remove_special_characters": config.get("remove_special_characters", False),
80
+ "combine_letter_number_pairs": config.get("combine_letter_number_pairs", False),
81
+ "allow_leading_numbers": config.get("allow_leading_numbers", False),
82
+ }
83
+ use_granular = any(flags.values())
84
+
70
85
  for property_index, raw_schema_property in enumerate(raw_schema_properties):
71
86
  raw_schema_property_value = self._extract_data(raw_schema_property, key_pointer)
72
87
  if not raw_schema_property_value or raw_schema_property_value.isspace():
73
88
  break
74
- if names_conversion:
89
+ # Use granular if any flag is set, else legacy
90
+ if names_conversion and use_granular:
91
+ raw_schema_property_value = safe_sanitzation_conversion(raw_schema_property_value, **flags)
92
+ elif names_conversion:
75
93
  raw_schema_property_value = safe_name_conversion(raw_schema_property_value)
76
94
 
77
95
  if raw_schema_property_value in seen_values:
@@ -193,6 +211,7 @@ class DpathSchemaMatchingExtractor(DpathExtractor, RawSchemaParser):
193
211
  )
194
212
 
195
213
 
214
+ @dataclass
196
215
  class DpathSchemaExtractor(DpathExtractor, RawSchemaParser):
197
216
  """
198
217
  Makes names conversion and parses sheet headers from the provided row.
@@ -31,7 +31,49 @@ connectionSpecification:
31
31
  names_conversion:
32
32
  type: boolean
33
33
  title: Convert Column Names to SQL-Compliant Format
34
- description: Enables the conversion of column names to a standardized, SQL-compliant format. For example, 'My Name' -> 'my_name'. Enable this option if your destination is SQL-based.
34
+ description: >-
35
+ Converts column names to a SQL-compliant format (snake_case, lowercase, etc).
36
+ If enabled, you can further customize the sanitization using the options below.
37
+ default: false
38
+ remove_leading_trailing_underscores:
39
+ type: boolean
40
+ title: Remove Leading and Trailing Underscores
41
+ description: >-
42
+ Removes leading and trailing underscores from column names. Does not remove leading underscores from column names that start with a number.
43
+ Example: "50th Percentile? "→ "_50_th_percentile"
44
+ This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
45
+ default: false
46
+ combine_number_word_pairs:
47
+ type: boolean
48
+ title: Combine Number-Word Pairs
49
+ description: >-
50
+ Combines adjacent numbers and words.
51
+ Example: "50th Percentile?" → "_50th_percentile_"
52
+ This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
53
+ default: false
54
+ remove_special_characters:
55
+ type: boolean
56
+ title: Remove All Special Characters
57
+ description: >-
58
+ Removes all special characters from column names.
59
+ Example: "Example ID*" → "example_id"
60
+ This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
61
+ default: false
62
+ combine_letter_number_pairs:
63
+ type: boolean
64
+ title: Combine Letter-Number Pairs
65
+ description: >-
66
+ Combines adjacent letters and numbers.
67
+ Example: "Q3 2023" → "q3_2023"
68
+ This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
69
+ default: false
70
+ allow_leading_numbers:
71
+ type: boolean
72
+ title: Allow Leading Numbers
73
+ description: >-
74
+ Allows column names to start with numbers.
75
+ Example: "50th Percentile" → "50_th_percentile"
76
+ This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
35
77
  default: false
36
78
  credentials:
37
79
  type: object
@@ -46,6 +46,121 @@ def safe_name_conversion(text: str) -> str:
46
46
  return new
47
47
 
48
48
 
49
+ def _sanitization(
50
+ text: str,
51
+ remove_leading_trailing_underscores: bool = False,
52
+ combine_number_word_pairs: bool = False,
53
+ remove_special_characters: bool = False,
54
+ combine_letter_number_pairs: bool = False,
55
+ allow_leading_numbers: bool = False,
56
+ ) -> str:
57
+ """
58
+ Converts a string into a normalized, SQL-compliant name using a set of configurable options.
59
+
60
+ Args:
61
+ text: The input string to convert.
62
+ remove_leading_trailing_underscores: If True, removes underscores at the start/end of the result.
63
+ combine_number_word_pairs: If True, combines adjacent number and word tokens (e.g., "50 th" -> "50th").
64
+ remove_special_characters: If True, removes all special characters from the input.
65
+ combine_letter_number_pairs: If True, combines adjacent letter and number tokens (e.g., "Q 3" -> "Q3").
66
+ allow_leading_numbers: If False, prepends an underscore if the result starts with a number.
67
+
68
+ Returns:
69
+ The normalized, SQL-compliant string.
70
+
71
+ Steps:
72
+ 1. Transliterates the input text to ASCII using unidecode.
73
+ 2. Optionally removes special characters if remove_special_characters is True.
74
+ 3. Splits the text into tokens using a regex pattern that separates words, numbers, and non-alphanumeric characters.
75
+ 4. Optionally combines adjacent letter+number or number+word tokens based on flags.
76
+ 5. Removes empty tokens in the middle, but keeps leading/trailing empty tokens for underscore placement.
77
+ 6. Optionally strips leading/trailing underscores if remove_leading_trailing_underscores is True.
78
+ 7. Optionally prepends an underscore if the result starts with a number and allow_leading_numbers is False.
79
+ 8. Returns the final string in lowercase.
80
+ """
81
+ text = unidecode.unidecode(text)
82
+
83
+ if remove_special_characters:
84
+ text = re.sub(r"[^\w\s]", "", text)
85
+
86
+ tokens = []
87
+ for m in TOKEN_PATTERN.finditer(text):
88
+ if m.group("NoToken") is None:
89
+ tokens.append(m.group(0))
90
+ else:
91
+ tokens.append("")
92
+
93
+ # Combine tokens as per flags
94
+ combined_tokens = []
95
+ i = 0
96
+ while i < len(tokens):
97
+ if (
98
+ combine_letter_number_pairs
99
+ and i + 1 < len(tokens)
100
+ and tokens[i]
101
+ and tokens[i].isalpha()
102
+ and tokens[i + 1]
103
+ and tokens[i + 1].isdigit()
104
+ ):
105
+ combined = tokens[i] + tokens[i + 1]
106
+ combined_tokens.append(combined)
107
+ i += 2
108
+ elif (
109
+ combine_number_word_pairs
110
+ and i + 1 < len(tokens)
111
+ and tokens[i]
112
+ and tokens[i].isdigit()
113
+ and tokens[i + 1]
114
+ and tokens[i + 1].isalpha()
115
+ ):
116
+ combined = tokens[i] + tokens[i + 1]
117
+ combined_tokens.append(combined)
118
+ i += 2
119
+ else:
120
+ combined_tokens.append(tokens[i])
121
+ i += 1
122
+
123
+ # Find indices of first and last non-empty tokens
124
+ first_non_empty = next((i for i, t in enumerate(combined_tokens) if t), len(combined_tokens))
125
+ last_non_empty = next((i for i, t in reversed(list(enumerate(combined_tokens))) if t), -1)
126
+
127
+ # Process tokens: keep leading/trailing empty tokens, remove empty tokens in middle
128
+ if first_non_empty < len(combined_tokens):
129
+ leading = combined_tokens[:first_non_empty]
130
+ middle = [t for t in combined_tokens[first_non_empty : last_non_empty + 1] if t]
131
+ trailing = combined_tokens[last_non_empty + 1 :]
132
+ processed_tokens = leading + middle + trailing
133
+ else:
134
+ processed_tokens = combined_tokens # All tokens are empty
135
+
136
+ # Join tokens with underscores
137
+ result = DEFAULT_SEPARATOR.join(processed_tokens)
138
+
139
+ # Apply remove_leading_trailing_underscores on the final string
140
+ if remove_leading_trailing_underscores:
141
+ result = result.strip(DEFAULT_SEPARATOR)
142
+
143
+ # Handle leading numbers after underscore removal
144
+ if not allow_leading_numbers and result and result[0].isdigit():
145
+ result = DEFAULT_SEPARATOR + result
146
+
147
+ final_result = result.lower()
148
+ return final_result
149
+
150
+
151
+ def safe_sanitzation_conversion(text: str, **kwargs) -> str:
152
+ """
153
+ Converts text to a safe name using _sanitization with the provided keyword arguments.
154
+ Raises an exception if the result is empty or "_". Unlike safe_name_conversion,
155
+ this function also rejects "_" as a valid result, since _sanitization
156
+ may return "_" for certain inputs (e.g., "*").
157
+ """
158
+ new = _sanitization(text, **kwargs)
159
+ if not new or new == "_":
160
+ raise Exception(f"initial string '{text}' converted to empty")
161
+ return new
162
+
163
+
49
164
  def exception_description_by_status_code(code: int, spreadsheet_id) -> str:
50
165
  if code in [status_codes.INTERNAL_SERVER_ERROR, status_codes.BAD_GATEWAY, status_codes.SERVICE_UNAVAILABLE]:
51
166
  return (