PyPI - airbyte-source-google-sheets - Versions diffs - 0.9.5.dev202505142036__py3-none-any.whl → 0.10.0.dev202505231635__py3-none-any.whl - Mend

airbyte-source-google-sheets 0.9.5.dev202505142036py3-none-any.whl → 0.10.0.dev202505231635py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

{airbyte_source_google_sheets-0.9.5.dev202505142036.dist-info → airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: airbyte-source-google-sheets
-Version: 0.9.5.dev202505142036
+Version: 0.10.0.dev202505231635
 Summary: Source implementation for Google Sheets.
 License: Elv2
 Author: Airbyte

{airbyte_source_google_sheets-0.9.5.dev202505142036.dist-info → airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 source_google_sheets/__init__.py,sha256=dYaZS0KrTjksk_yeSuXmNDXDsNKWctFnlZJSMgLryXE,135
 source_google_sheets/components/__init__.py,sha256=v7odPaLdz2S_NRzExsxkk6694Vmjbplz4Z5mA4lxdkA,343
-source_google_sheets/components/extractors.py,sha256=S7j78aZsoxYOj2ugCMWuXmphu2Eu8YfmYD3Xu_2CPAc,8832
+source_google_sheets/components/extractors.py,sha256=-d5hPW40ZBjU3gTUZNudayvqf1eHlveW1amwKJywDkU,9668
 source_google_sheets/components/partition_routers.py,sha256=SWo1V0K10ZdWE2TQ0KuQUfue04RTyHJe1f6BOj6c96s,1265
 source_google_sheets/manifest.yaml,sha256=HG8vpxv4lqwe5TCneq_zxuERaGNdtno7yAs61Wsk30E,15821
 source_google_sheets/models/__init__.py,sha256=Z-4MTpxG5t2jGhXzs4PPoIOa83zw3jRnUDx0N9Puv3s,61
@@ -8,9 +8,9 @@ source_google_sheets/models/spreadsheet.py,sha256=DEef7bWQEpY1Uqyk7RN6qLF8oxLSNz
 source_google_sheets/models/spreadsheet_values.py,sha256=-XRMuuILn9JN8svHNTj6-oG8mLTZOZ5Hejy2pJ5bILk,440
 source_google_sheets/run.py,sha256=eaPRcarWqkB2b2DokvI83w7rz1blmWPQCFahvCyCdSY,1887
 source_google_sheets/source.py,sha256=qO1KoGdphieu7F5VgDYtrbqs56AUvMWFGNvFHP2b9Z4,778
-source_google_sheets/spec.yaml,sha256=RIUILMhfS0is2r_mCkmIVrQfvND1D3eobDK1YElmzhU,5009
-source_google_sheets/utils.py,sha256=JEQIVLSFEAff-7zF3gPzsvFc9xLfCj9hVuFFYrSWiOo,2290
-airbyte_source_google_sheets-0.9.5.dev202505142036.dist-info/METADATA,sha256=-6yDL8Kuv_mT4ImBf1toakCIbNX3l-2qTd3XfI9WILY,5384
-airbyte_source_google_sheets-0.9.5.dev202505142036.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-airbyte_source_google_sheets-0.9.5.dev202505142036.dist-info/entry_points.txt,sha256=Dtsfjohe5IPUFyqojk49SIoP7CifCTlNLG_pgivzppo,69
-airbyte_source_google_sheets-0.9.5.dev202505142036.dist-info/RECORD,,
+source_google_sheets/spec.yaml,sha256=mFOiMN1IsrjyLRcjTRw7xa0b74LwkUXdVvUqau7iTXc,5709
+source_google_sheets/utils.py,sha256=NUw8pJSCORGKx8mrhd11x4xS1dGUraDZdh6WxVn7aQw,5150
+airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info/METADATA,sha256=MW6vZRfbucb1JqCsXRg2O93pL5s1hRalyUfVV-xKR6I,5385
+airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info/entry_points.txt,sha256=Dtsfjohe5IPUFyqojk49SIoP7CifCTlNLG_pgivzppo,69
+airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info/RECORD,,

source_google_sheets/components/extractors.py CHANGED Viewed

@@ -12,7 +12,7 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
 from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
 from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
 from airbyte_cdk.sources.types import Config
-from source_google_sheets.utils import name_conversion, safe_name_conversion
+from source_google_sheets.utils import experimental_safe_name_conversion, name_conversion, safe_name_conversion
 class RawSchemaParser:
@@ -54,9 +54,12 @@ class RawSchemaParser:
         schema_pointer: List[Union[InterpolatedString, str]],
         key_pointer: List[Union[InterpolatedString, str]],
         names_conversion: bool,
+        experimental_names_conversion: bool,
     ):
         """
-        1. Parses sheet headers from the provided raw schema, skipping any headers that are empty or contain only whitespace.
+        1. Parses sheet headers from the provided raw schema. This method assumes that data is contiguous
+            i.e: every cell contains a value and the first cell which does not contain a value denotes the end
+            of the headers.
         2. Makes name conversion if required.
         3. Removes duplicated fields from the schema.
         Return a list of tuples with correct property index (by found in array), value and raw_schema
@@ -68,8 +71,11 @@ class RawSchemaParser:
         for property_index, raw_schema_property in enumerate(raw_schema_properties):
             raw_schema_property_value = self._extract_data(raw_schema_property, key_pointer)
             if not raw_schema_property_value or raw_schema_property_value.isspace():
-                continue
-            if names_conversion:
+                break
+            # Apply experimental conversion if enabled; otherwise, apply standard conversion if enabled
+            if experimental_names_conversion:
+                raw_schema_property_value = experimental_safe_name_conversion(raw_schema_property_value)
+            elif names_conversion:
                 raw_schema_property_value = safe_name_conversion(raw_schema_property_value)
             if raw_schema_property_value in seen_values:
@@ -87,12 +93,13 @@ class RawSchemaParser:
     def parse(self, schema_type_identifier, records: Iterable[MutableMapping[Any, Any]]):
         """Removes duplicated fields and makes names conversion"""
         names_conversion = self.config.get("names_conversion", False)
+        experimental_names_conversion = self.config.get("experimental_names_conversion", False)
         schema_pointer = schema_type_identifier.get("schema_pointer")
         key_pointer = schema_type_identifier["key_pointer"]
         parsed_properties = []
         for raw_schema_data in records:
             for _, parsed_value, raw_schema_property in self.parse_raw_schema_values(
-                raw_schema_data, schema_pointer, key_pointer, names_conversion
+                raw_schema_data, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
             ):
                 self._set_data(parsed_value, raw_schema_property, key_pointer)
                 parsed_properties.append(raw_schema_property)
@@ -138,16 +145,20 @@ class DpathSchemaMatchingExtractor(DpathExtractor, RawSchemaParser):
         self._values_to_match_key = parameters["values_to_match_key"]
         schema_type_identifier = parameters["schema_type_identifier"]
         names_conversion = self.config.get("names_conversion", False)
+        experimental_names_conversion = self.config.get("experimental_names_conversion", False)
         self._indexed_properties_to_match = self.extract_properties_to_match(
-            parameters["properties_to_match"], schema_type_identifier, names_conversion=names_conversion
+            parameters["properties_to_match"],
+            schema_type_identifier,
+            names_conversion=names_conversion,
+            experimental_names_conversion=experimental_names_conversion,
         )
-    def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion):
+    def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion, experimental_names_conversion):
         schema_pointer = schema_type_identifier.get("schema_pointer")
         key_pointer = schema_type_identifier["key_pointer"]
         indexed_properties = {}
         for property_index, property_parsed_value, _ in self.parse_raw_schema_values(
-            properties_to_match, schema_pointer, key_pointer, names_conversion
+            properties_to_match, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
         ):
             indexed_properties[property_index] = property_parsed_value
         return indexed_properties

source_google_sheets/spec.yaml CHANGED Viewed

@@ -33,6 +33,14 @@ connectionSpecification:
       title: Convert Column Names to SQL-Compliant Format
       description: Enables the conversion of column names to a standardized, SQL-compliant format. For example, 'My Name' -> 'my_name'. Enable this option if your destination is SQL-based.
       default: false
+    experimental_names_conversion:
+      type: boolean
+      title: Experimental Convert Column Names to SQL-Compliant Format
+      description: >-
+        Adds additional sanitization to column names before converting to SQL-compliant format, such as removing leading and trailing spaces.
+        This option may change behavior in the future, which may cause column names to update in your destination on future updates.
+        Due to this, it is recommended that you also change the "Detect and propagate schema changes" to "Approve all changes myself" in the connection advances settings.
+        If enabled, this option will supersede the `Convert Column Names to SQL-Compliant Format` option.
     credentials:
       type: object
       title: Authentication

source_google_sheets/utils.py CHANGED Viewed

@@ -37,6 +37,61 @@ def name_conversion(text: str) -> str:
     return text
+def experimental_name_conversion(text: str) -> str:
+    """
+    Convert name using a set of rules, for example: '1MyName' -> '_1_my_name'
+    Removes leading/trailing spaces, combines number-word pairs (e.g., '50th' -> '50th'),
+    letter-number pairs (e.g., 'Q3' -> 'Q3'), and removes special characters without adding underscores.
+    Spaces are converted to underscores for snake_case.
+    """
+    text = unidecode.unidecode(text.strip())  # Strip leading/trailing spaces
+    tokens = []
+    for m in TOKEN_PATTERN.finditer(text):
+        if m.group("NoToken") is None:
+            tokens.append(m.group(0))
+        else:
+            # Only add an empty token for spaces to preserve snake_case; skip other special characters
+            if m.group(0).isspace():
+                tokens.append("")
+            # Otherwise, skip the special character entirely
+    # Combine single uppercase letter followed by number (e.g., "Q" and "3" -> "Q3"), then number-word pairs
+    combined_tokens = []
+    i = 0
+    while i < len(tokens):
+        # Check for letter-number pair (e.g., "Q3")
+        if i + 1 < len(tokens) and len(tokens[i]) == 1 and tokens[i].isupper() and tokens[i + 1].isdigit():
+            combined_tokens.append(tokens[i] + tokens[i + 1])
+            i += 2
+        # Check for number-word pair (e.g., "50th")
+        elif i + 1 < len(tokens) and tokens[i].isdigit() and tokens[i + 1].isalpha():
+            combined_tokens.append(tokens[i] + tokens[i + 1])
+            i += 2
+        else:
+            # Only add the token if it's non-empty to avoid underscores from spaces near special characters
+            if tokens[i]:
+                combined_tokens.append(tokens[i])
+            i += 1
+    # Remove trailing empty tokens to avoid trailing underscores
+    while combined_tokens and combined_tokens[-1] == "":
+        combined_tokens.pop()
+    # Remove leading empty tokens to avoid leading underscores
+    while combined_tokens and combined_tokens[0] == "":
+        combined_tokens.pop(0)
+    if len(combined_tokens) >= 3:
+        combined_tokens = combined_tokens[:1] + [t for t in combined_tokens[1:-1] if t] + combined_tokens[-1:]
+    if combined_tokens and combined_tokens[0].isdigit():
+        combined_tokens.insert(0, "")
+    text = DEFAULT_SEPARATOR.join(combined_tokens)
+    text = text.lower()
+    return text
 def safe_name_conversion(text: str) -> str:
     if not text:
         return text
@@ -46,6 +101,24 @@ def safe_name_conversion(text: str) -> str:
     return new
+import csv
+def experimental_safe_name_conversion(text: str, output_file: str = "conversion_results2.csv") -> str:
+    if not text:
+        return text
+    new = experimental_name_conversion(text)
+    if not new:
+        raise Exception(f"initial string '{text}' converted to empty")
+    # Write to CSV
+    with open(output_file, mode="a", newline="", encoding="utf-8") as file:
+        writer = csv.writer(file)
+        writer.writerow([text, new])  # Write the original and converted text
+    return new
 def exception_description_by_status_code(code: int, spreadsheet_id) -> str:
     if code in [status_codes.INTERNAL_SERVER_ERROR, status_codes.BAD_GATEWAY, status_codes.SERVICE_UNAVAILABLE]:
         return (

{airbyte_source_google_sheets-0.9.5.dev202505142036.dist-info → airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info}/WHEEL RENAMED Viewed

File without changes

{airbyte_source_google_sheets-0.9.5.dev202505142036.dist-info → airbyte_source_google_sheets-0.10.0.dev202505231635.dist-info}/entry_points.txt RENAMED Viewed

File without changes

airbyte-source-google-sheets 0.9.5.dev202505142036__py3-none-any.whl → 0.10.0.dev202505231635__py3-none-any.whl

airbyte-source-google-sheets 0.9.5.dev202505142036py3-none-any.whl → 0.10.0.dev202505231635py3-none-any.whl