PyPI - airbyte-source-google-sheets - Versions diffs - 0.9.5.dev202505142036__tar.gz → 0.10.0.dev202505211933__tar.gz - Mend

airbyte-source-google-sheets 0.9.5.dev202505142036tar.gz → 0.10.0.dev202505211933tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: airbyte-source-google-sheets
-Version: 0.9.5.dev202505142036
+Version: 0.10.0.dev202505211933
 Summary: Source implementation for Google Sheets.
 License: Elv2
 Author: Airbyte

{airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ requires = [
 build-backend = "poetry.core.masonry.api"
 [tool.poetry]
-version = "0.9.5.dev202505142036"
+version = "0.10.0.dev202505211933"
 name = "airbyte-source-google-sheets"
 description = "Source implementation for Google Sheets."
 authors = [

{airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/components/extractors.py RENAMED Viewed

@@ -12,7 +12,7 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
 from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
 from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
 from airbyte_cdk.sources.types import Config
-from source_google_sheets.utils import name_conversion, safe_name_conversion
+from source_google_sheets.utils import experimental_safe_name_conversion, name_conversion, safe_name_conversion
 class RawSchemaParser:
@@ -54,9 +54,12 @@ class RawSchemaParser:
         schema_pointer: List[Union[InterpolatedString, str]],
         key_pointer: List[Union[InterpolatedString, str]],
         names_conversion: bool,
+        experimental_names_conversion: bool,
     ):
         """
-        1. Parses sheet headers from the provided raw schema, skipping any headers that are empty or contain only whitespace.
+        1. Parses sheet headers from the provided raw schema. This method assumes that data is contiguous
+            i.e: every cell contains a value and the first cell which does not contain a value denotes the end
+            of the headers.
         2. Makes name conversion if required.
         3. Removes duplicated fields from the schema.
         Return a list of tuples with correct property index (by found in array), value and raw_schema
@@ -68,8 +71,11 @@ class RawSchemaParser:
         for property_index, raw_schema_property in enumerate(raw_schema_properties):
             raw_schema_property_value = self._extract_data(raw_schema_property, key_pointer)
             if not raw_schema_property_value or raw_schema_property_value.isspace():
-                continue
-            if names_conversion:
+                break
+            # Apply experimental conversion if enabled; otherwise, apply standard conversion if enabled
+            if experimental_names_conversion:
+                raw_schema_property_value = experimental_safe_name_conversion(raw_schema_property_value)
+            elif names_conversion:
                 raw_schema_property_value = safe_name_conversion(raw_schema_property_value)
             if raw_schema_property_value in seen_values:
@@ -87,12 +93,13 @@ class RawSchemaParser:
     def parse(self, schema_type_identifier, records: Iterable[MutableMapping[Any, Any]]):
         """Removes duplicated fields and makes names conversion"""
         names_conversion = self.config.get("names_conversion", False)
+        experimental_names_conversion = self.config.get("experimental_names_conversion", False)
         schema_pointer = schema_type_identifier.get("schema_pointer")
         key_pointer = schema_type_identifier["key_pointer"]
         parsed_properties = []
         for raw_schema_data in records:
             for _, parsed_value, raw_schema_property in self.parse_raw_schema_values(
-                raw_schema_data, schema_pointer, key_pointer, names_conversion
+                raw_schema_data, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
             ):
                 self._set_data(parsed_value, raw_schema_property, key_pointer)
                 parsed_properties.append(raw_schema_property)
@@ -138,16 +145,20 @@ class DpathSchemaMatchingExtractor(DpathExtractor, RawSchemaParser):
         self._values_to_match_key = parameters["values_to_match_key"]
         schema_type_identifier = parameters["schema_type_identifier"]
         names_conversion = self.config.get("names_conversion", False)
+        experimental_names_conversion = self.config.get("experimental_names_conversion", False)
         self._indexed_properties_to_match = self.extract_properties_to_match(
-            parameters["properties_to_match"], schema_type_identifier, names_conversion=names_conversion
+            parameters["properties_to_match"],
+            schema_type_identifier,
+            names_conversion=names_conversion,
+            experimental_names_conversion=experimental_names_conversion,
         )
-    def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion):
+    def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion, experimental_names_conversion):
         schema_pointer = schema_type_identifier.get("schema_pointer")
         key_pointer = schema_type_identifier["key_pointer"]
         indexed_properties = {}
         for property_index, property_parsed_value, _ in self.parse_raw_schema_values(
-            properties_to_match, schema_pointer, key_pointer, names_conversion
+            properties_to_match, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
         ):
             indexed_properties[property_index] = property_parsed_value
         return indexed_properties

{airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/spec.yaml RENAMED Viewed

@@ -33,6 +33,14 @@ connectionSpecification:
       title: Convert Column Names to SQL-Compliant Format
       description: Enables the conversion of column names to a standardized, SQL-compliant format. For example, 'My Name' -> 'my_name'. Enable this option if your destination is SQL-based.
       default: false
+    experimental_names_conversion:
+      type: boolean
+      title: Experimental Convert Column Names to SQL-Compliant Format
+      description: >-
+        Adds additional sanitization to column names before converting to SQL-compliant format, such as removing leading and trailing spaces.
+        This option may change behavior in the future, which may cause column names to update in your destination on future updates.
+        Due to this, it is recommended that you also change the "Detect and propagate schema changes" to "Approve all changes myself" in the connection advances settings.
+        If enabled, this option will supersede the `Convert Column Names to SQL-Compliant Format` option.
     credentials:
       type: object
       title: Authentication

{airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/utils.py RENAMED Viewed

@@ -37,6 +37,42 @@ def name_conversion(text: str) -> str:
     return text
+def experimental_name_conversion(text: str) -> str:
+    """
+    Convert name using a set of rules, for example: '1MyName' -> '_1_my_name'
+    Removes leading/trailing spaces and combines number-word pairs (e.g., '50th' -> '50th').
+    """
+    text = unidecode.unidecode(text.strip())  # Strip leading/trailing spaces
+    tokens = []
+    for m in TOKEN_PATTERN.finditer(text):
+        if m.group("NoToken") is None:
+            tokens.append(m.group(0))
+        else:
+            tokens.append("")
+    # Combine number followed by word (e.g., "50" and "th" -> "50th")
+    combined_tokens = []
+    i = 0
+    while i < len(tokens):
+        if i + 1 < len(tokens) and tokens[i].isdigit() and tokens[i + 1].isalpha():
+            combined_tokens.append(tokens[i] + tokens[i + 1])
+            i += 2
+        else:
+            combined_tokens.append(tokens[i])
+            i += 1
+    if len(combined_tokens) >= 3:
+        combined_tokens = combined_tokens[:1] + [t for t in combined_tokens[1:-1] if t] + combined_tokens[-1:]
+    if combined_tokens and combined_tokens[0].isdigit():
+        combined_tokens.insert(0, "")
+    text = DEFAULT_SEPARATOR.join(combined_tokens)
+    text = text.lower()
+    return text
 def safe_name_conversion(text: str) -> str:
     if not text:
         return text
@@ -46,6 +82,15 @@ def safe_name_conversion(text: str) -> str:
     return new
+def experimental_safe_name_conversion(text: str) -> str:
+    if not text:
+        return text
+    new = experimental_name_conversion(text)
+    if not new:
+        raise Exception(f"initial string '{text}' converted to empty")
+    return new
 def exception_description_by_status_code(code: int, spreadsheet_id) -> str:
     if code in [status_codes.INTERNAL_SERVER_ERROR, status_codes.BAD_GATEWAY, status_codes.SERVICE_UNAVAILABLE]:
         return (