PyPI - airbyte-source-google-sheets - Versions diffs - 0.9.5.dev202505142036__tar.gz → 0.10.0.dev202505231635__tar.gz - Mend

airbyte-source-google-sheets 0.9.5.dev202505142036tar.gz → 0.10.0.dev202505231635tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: airbyte-source-google-sheets
-Version: 0.9.5.dev202505142036
+Version: 0.10.0.dev202505231635
 Summary: Source implementation for Google Sheets.
 License: Elv2
 Author: Airbyte

{airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ requires = [
 build-backend = "poetry.core.masonry.api"
 [tool.poetry]
-version = "0.9.5.dev202505142036"
+version = "0.10.0.dev202505231635"
 name = "airbyte-source-google-sheets"
 description = "Source implementation for Google Sheets."
 authors = [

{airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/components/extractors.py RENAMED Viewed

@@ -12,7 +12,7 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
 from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
 from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
 from airbyte_cdk.sources.types import Config
-from source_google_sheets.utils import name_conversion, safe_name_conversion
+from source_google_sheets.utils import experimental_safe_name_conversion, name_conversion, safe_name_conversion
 class RawSchemaParser:
@@ -54,9 +54,12 @@ class RawSchemaParser:
         schema_pointer: List[Union[InterpolatedString, str]],
         key_pointer: List[Union[InterpolatedString, str]],
         names_conversion: bool,
+        experimental_names_conversion: bool,
     ):
         """
-        1. Parses sheet headers from the provided raw schema, skipping any headers that are empty or contain only whitespace.
+        1. Parses sheet headers from the provided raw schema. This method assumes that data is contiguous
+            i.e: every cell contains a value and the first cell which does not contain a value denotes the end
+            of the headers.
         2. Makes name conversion if required.
         3. Removes duplicated fields from the schema.
         Return a list of tuples with correct property index (by found in array), value and raw_schema
@@ -68,8 +71,11 @@ class RawSchemaParser:
         for property_index, raw_schema_property in enumerate(raw_schema_properties):
             raw_schema_property_value = self._extract_data(raw_schema_property, key_pointer)
             if not raw_schema_property_value or raw_schema_property_value.isspace():
-                continue
-            if names_conversion:
+                break
+            # Apply experimental conversion if enabled; otherwise, apply standard conversion if enabled
+            if experimental_names_conversion:
+                raw_schema_property_value = experimental_safe_name_conversion(raw_schema_property_value)
+            elif names_conversion:
                 raw_schema_property_value = safe_name_conversion(raw_schema_property_value)
             if raw_schema_property_value in seen_values:
@@ -87,12 +93,13 @@ class RawSchemaParser:
     def parse(self, schema_type_identifier, records: Iterable[MutableMapping[Any, Any]]):
         """Removes duplicated fields and makes names conversion"""
         names_conversion = self.config.get("names_conversion", False)
+        experimental_names_conversion = self.config.get("experimental_names_conversion", False)
         schema_pointer = schema_type_identifier.get("schema_pointer")
         key_pointer = schema_type_identifier["key_pointer"]
         parsed_properties = []
         for raw_schema_data in records:
             for _, parsed_value, raw_schema_property in self.parse_raw_schema_values(
-                raw_schema_data, schema_pointer, key_pointer, names_conversion
+                raw_schema_data, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
             ):
                 self._set_data(parsed_value, raw_schema_property, key_pointer)
                 parsed_properties.append(raw_schema_property)
@@ -138,16 +145,20 @@ class DpathSchemaMatchingExtractor(DpathExtractor, RawSchemaParser):
         self._values_to_match_key = parameters["values_to_match_key"]
         schema_type_identifier = parameters["schema_type_identifier"]
         names_conversion = self.config.get("names_conversion", False)
+        experimental_names_conversion = self.config.get("experimental_names_conversion", False)
         self._indexed_properties_to_match = self.extract_properties_to_match(
-            parameters["properties_to_match"], schema_type_identifier, names_conversion=names_conversion
+            parameters["properties_to_match"],
+            schema_type_identifier,
+            names_conversion=names_conversion,
+            experimental_names_conversion=experimental_names_conversion,
         )
-    def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion):
+    def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion, experimental_names_conversion):
         schema_pointer = schema_type_identifier.get("schema_pointer")
         key_pointer = schema_type_identifier["key_pointer"]
         indexed_properties = {}
         for property_index, property_parsed_value, _ in self.parse_raw_schema_values(
-            properties_to_match, schema_pointer, key_pointer, names_conversion
+            properties_to_match, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
         ):
             indexed_properties[property_index] = property_parsed_value
         return indexed_properties

{airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/spec.yaml RENAMED Viewed

@@ -33,6 +33,14 @@ connectionSpecification:
       title: Convert Column Names to SQL-Compliant Format
       description: Enables the conversion of column names to a standardized, SQL-compliant format. For example, 'My Name' -> 'my_name'. Enable this option if your destination is SQL-based.
       default: false
+    experimental_names_conversion:
+      type: boolean
+      title: Experimental Convert Column Names to SQL-Compliant Format
+      description: >-
+        Adds additional sanitization to column names before converting to SQL-compliant format, such as removing leading and trailing spaces.
+        This option may change behavior in the future, which may cause column names to update in your destination on future updates.
+        Due to this, it is recommended that you also change the "Detect and propagate schema changes" to "Approve all changes myself" in the connection advances settings.
+        If enabled, this option will supersede the `Convert Column Names to SQL-Compliant Format` option.
     credentials:
       type: object
       title: Authentication

airbyte_source_google_sheets-0.10.0.dev202505231635/source_google_sheets/utils.py ADDED Viewed

@@ -0,0 +1,142 @@
+#
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+#
+import re
+import unidecode
+from requests.status_codes import codes as status_codes
+TOKEN_PATTERN = re.compile(r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)")
+DEFAULT_SEPARATOR = "_"
+def name_conversion(text: str) -> str:
+    """
+    convert name using a set of rules, for example: '1MyName' -> '_1_my_name'
+    """
+    text = unidecode.unidecode(text)
+    tokens = []
+    for m in TOKEN_PATTERN.finditer(text):
+        if m.group("NoToken") is None:
+            tokens.append(m.group(0))
+        else:
+            tokens.append("")
+    if len(tokens) >= 3:
+        tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:]
+    if tokens and tokens[0].isdigit():
+        tokens.insert(0, "")
+    text = DEFAULT_SEPARATOR.join(tokens)
+    text = text.lower()
+    return text
+def experimental_name_conversion(text: str) -> str:
+    """
+    Convert name using a set of rules, for example: '1MyName' -> '_1_my_name'
+    Removes leading/trailing spaces, combines number-word pairs (e.g., '50th' -> '50th'),
+    letter-number pairs (e.g., 'Q3' -> 'Q3'), and removes special characters without adding underscores.
+    Spaces are converted to underscores for snake_case.
+    """
+    text = unidecode.unidecode(text.strip())  # Strip leading/trailing spaces
+    tokens = []
+    for m in TOKEN_PATTERN.finditer(text):
+        if m.group("NoToken") is None:
+            tokens.append(m.group(0))
+        else:
+            # Only add an empty token for spaces to preserve snake_case; skip other special characters
+            if m.group(0).isspace():
+                tokens.append("")
+            # Otherwise, skip the special character entirely
+    # Combine single uppercase letter followed by number (e.g., "Q" and "3" -> "Q3"), then number-word pairs
+    combined_tokens = []
+    i = 0
+    while i < len(tokens):
+        # Check for letter-number pair (e.g., "Q3")
+        if i + 1 < len(tokens) and len(tokens[i]) == 1 and tokens[i].isupper() and tokens[i + 1].isdigit():
+            combined_tokens.append(tokens[i] + tokens[i + 1])
+            i += 2
+        # Check for number-word pair (e.g., "50th")
+        elif i + 1 < len(tokens) and tokens[i].isdigit() and tokens[i + 1].isalpha():
+            combined_tokens.append(tokens[i] + tokens[i + 1])
+            i += 2
+        else:
+            # Only add the token if it's non-empty to avoid underscores from spaces near special characters
+            if tokens[i]:
+                combined_tokens.append(tokens[i])
+            i += 1
+    # Remove trailing empty tokens to avoid trailing underscores
+    while combined_tokens and combined_tokens[-1] == "":
+        combined_tokens.pop()
+    # Remove leading empty tokens to avoid leading underscores
+    while combined_tokens and combined_tokens[0] == "":
+        combined_tokens.pop(0)
+    if len(combined_tokens) >= 3:
+        combined_tokens = combined_tokens[:1] + [t for t in combined_tokens[1:-1] if t] + combined_tokens[-1:]
+    if combined_tokens and combined_tokens[0].isdigit():
+        combined_tokens.insert(0, "")
+    text = DEFAULT_SEPARATOR.join(combined_tokens)
+    text = text.lower()
+    return text
+def safe_name_conversion(text: str) -> str:
+    if not text:
+        return text
+    new = name_conversion(text)
+    if not new:
+        raise Exception(f"initial string '{text}' converted to empty")
+    return new
+import csv
+def experimental_safe_name_conversion(text: str, output_file: str = "conversion_results2.csv") -> str:
+    if not text:
+        return text
+    new = experimental_name_conversion(text)
+    if not new:
+        raise Exception(f"initial string '{text}' converted to empty")
+    # Write to CSV
+    with open(output_file, mode="a", newline="", encoding="utf-8") as file:
+        writer = csv.writer(file)
+        writer.writerow([text, new])  # Write the original and converted text
+    return new
+def exception_description_by_status_code(code: int, spreadsheet_id) -> str:
+    if code in [status_codes.INTERNAL_SERVER_ERROR, status_codes.BAD_GATEWAY, status_codes.SERVICE_UNAVAILABLE]:
+        return (
+            "There was an issue with the Google Sheets API. This is usually a temporary issue from Google's side."
+            " Please try again. If this issue persists, contact support"
+        )
+    if code == status_codes.FORBIDDEN:
+        return (
+            f"The authenticated Google Sheets user does not have permissions to view the spreadsheet with id {spreadsheet_id}. "
+            "Please ensure the authenticated user has access to the Spreadsheet and reauthenticate. If the issue persists, contact support"
+        )
+    if code == status_codes.NOT_FOUND:
+        return (
+            f"The requested Google Sheets spreadsheet with id {spreadsheet_id} does not exist. "
+            f"Please ensure the Spreadsheet Link you have set is valid and the spreadsheet exists. If the issue persists, contact support"
+        )
+    if code == status_codes.TOO_MANY_REQUESTS:
+        return "Rate limit has been reached. Please try later or request a higher quota for your account."
+    return ""

airbyte_source_google_sheets-0.9.5.dev202505142036/source_google_sheets/utils.py DELETED Viewed

@@ -1,69 +0,0 @@
-#
-# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
-#
-import re
-import unidecode
-from requests.status_codes import codes as status_codes
-TOKEN_PATTERN = re.compile(r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)")
-DEFAULT_SEPARATOR = "_"
-def name_conversion(text: str) -> str:
-    """
-    convert name using a set of rules, for example: '1MyName' -> '_1_my_name'
-    """
-    text = unidecode.unidecode(text)
-    tokens = []
-    for m in TOKEN_PATTERN.finditer(text):
-        if m.group("NoToken") is None:
-            tokens.append(m.group(0))
-        else:
-            tokens.append("")
-    if len(tokens) >= 3:
-        tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:]
-    if tokens and tokens[0].isdigit():
-        tokens.insert(0, "")
-    text = DEFAULT_SEPARATOR.join(tokens)
-    text = text.lower()
-    return text
-def safe_name_conversion(text: str) -> str:
-    if not text:
-        return text
-    new = name_conversion(text)
-    if not new:
-        raise Exception(f"initial string '{text}' converted to empty")
-    return new
-def exception_description_by_status_code(code: int, spreadsheet_id) -> str:
-    if code in [status_codes.INTERNAL_SERVER_ERROR, status_codes.BAD_GATEWAY, status_codes.SERVICE_UNAVAILABLE]:
-        return (
-            "There was an issue with the Google Sheets API. This is usually a temporary issue from Google's side."
-            " Please try again. If this issue persists, contact support"
-        )
-    if code == status_codes.FORBIDDEN:
-        return (
-            f"The authenticated Google Sheets user does not have permissions to view the spreadsheet with id {spreadsheet_id}. "
-            "Please ensure the authenticated user has access to the Spreadsheet and reauthenticate. If the issue persists, contact support"
-        )
-    if code == status_codes.NOT_FOUND:
-        return (
-            f"The requested Google Sheets spreadsheet with id {spreadsheet_id} does not exist. "
-            f"Please ensure the Spreadsheet Link you have set is valid and the spreadsheet exists. If the issue persists, contact support"
-        )
-    if code == status_codes.TOO_MANY_REQUESTS:
-        return "Rate limit has been reached. Please try later or request a higher quota for your account."
-    return ""