PyPI - airbyte-source-google-sheets - Versions diffs - 0.9.6__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

airbyte-source-google-sheets 0.9.6py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

{airbyte_source_google_sheets-0.9.6.dist-info → airbyte_source_google_sheets-0.10.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: airbyte-source-google-sheets
-Version: 0.9.6
+Version: 0.10.0
 Summary: Source implementation for Google Sheets.
 License: Elv2
 Author: Airbyte

{airbyte_source_google_sheets-0.9.6.dist-info → airbyte_source_google_sheets-0.10.0.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 source_google_sheets/__init__.py,sha256=dYaZS0KrTjksk_yeSuXmNDXDsNKWctFnlZJSMgLryXE,135
 source_google_sheets/components/__init__.py,sha256=v7odPaLdz2S_NRzExsxkk6694Vmjbplz4Z5mA4lxdkA,343
-source_google_sheets/components/extractors.py,sha256=Re0zt3_pUtRJMTcephi9GvvM6kYiZqWWtKefel0v0ZI,8948
+source_google_sheets/components/extractors.py,sha256=Yrl5ge_gXJ6jqVYYR2bk2f6Rg-xak0wlHnPyK9f1lhc,9854
 source_google_sheets/components/partition_routers.py,sha256=SWo1V0K10ZdWE2TQ0KuQUfue04RTyHJe1f6BOj6c96s,1265
 source_google_sheets/manifest.yaml,sha256=CuSnA8dnRMeXWfyUA6aXBvGU3mz1dJIi0HqMiks9Fd0,15938
 source_google_sheets/models/__init__.py,sha256=Z-4MTpxG5t2jGhXzs4PPoIOa83zw3jRnUDx0N9Puv3s,61
@@ -8,9 +8,9 @@ source_google_sheets/models/spreadsheet.py,sha256=DEef7bWQEpY1Uqyk7RN6qLF8oxLSNz
 source_google_sheets/models/spreadsheet_values.py,sha256=-XRMuuILn9JN8svHNTj6-oG8mLTZOZ5Hejy2pJ5bILk,440
 source_google_sheets/run.py,sha256=eaPRcarWqkB2b2DokvI83w7rz1blmWPQCFahvCyCdSY,1887
 source_google_sheets/source.py,sha256=qO1KoGdphieu7F5VgDYtrbqs56AUvMWFGNvFHP2b9Z4,778
-source_google_sheets/spec.yaml,sha256=RIUILMhfS0is2r_mCkmIVrQfvND1D3eobDK1YElmzhU,5009
-source_google_sheets/utils.py,sha256=JEQIVLSFEAff-7zF3gPzsvFc9xLfCj9hVuFFYrSWiOo,2290
-airbyte_source_google_sheets-0.9.6.dist-info/METADATA,sha256=OJAGlWTUGzUNaLTBl_pf6PavyGMVI6vF9aOJH2qbL2E,5368
-airbyte_source_google_sheets-0.9.6.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-airbyte_source_google_sheets-0.9.6.dist-info/entry_points.txt,sha256=Dtsfjohe5IPUFyqojk49SIoP7CifCTlNLG_pgivzppo,69
-airbyte_source_google_sheets-0.9.6.dist-info/RECORD,,
+source_google_sheets/spec.yaml,sha256=HGVGay4VAxzi9TUj-MSpeQLyE9GAoOjD2-xIhQDiIGY,6901
+source_google_sheets/utils.py,sha256=jiVPqsRDjVgdwIiBJMvFJEwwuUBQ7BQAebRqfpS9pZw,6943
+airbyte_source_google_sheets-0.10.0.dist-info/METADATA,sha256=Mzmw5ZoFCeMjyDI0pxxoGtHaAGQV4BfQq3Wp108SdRA,5369
+airbyte_source_google_sheets-0.10.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+airbyte_source_google_sheets-0.10.0.dist-info/entry_points.txt,sha256=Dtsfjohe5IPUFyqojk49SIoP7CifCTlNLG_pgivzppo,69
+airbyte_source_google_sheets-0.10.0.dist-info/RECORD,,

source_google_sheets/components/extractors.py CHANGED Viewed

@@ -12,7 +12,11 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
 from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
 from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
 from airbyte_cdk.sources.types import Config
-from source_google_sheets.utils import name_conversion, safe_name_conversion
+from source_google_sheets.utils import (
+    name_conversion,
+    safe_name_conversion,
+    safe_sanitzation_conversion,
+)
 class RawSchemaParser:
@@ -67,11 +71,25 @@ class RawSchemaParser:
         duplicate_fields = set()
         parsed_schema_values = []
         seen_values = set()
+        # Gather all sanitisation flags from config
+        config = getattr(self, "config", {})
+        flags = {
+            "remove_leading_trailing_underscores": config.get("remove_leading_trailing_underscores", False),
+            "combine_number_word_pairs": config.get("combine_number_word_pairs", False),
+            "remove_special_characters": config.get("remove_special_characters", False),
+            "combine_letter_number_pairs": config.get("combine_letter_number_pairs", False),
+            "allow_leading_numbers": config.get("allow_leading_numbers", False),
+        }
+        use_granular = any(flags.values())
         for property_index, raw_schema_property in enumerate(raw_schema_properties):
             raw_schema_property_value = self._extract_data(raw_schema_property, key_pointer)
             if not raw_schema_property_value or raw_schema_property_value.isspace():
                 break
-            if names_conversion:
+            # Use granular if any flag is set, else legacy
+            if names_conversion and use_granular:
+                raw_schema_property_value = safe_sanitzation_conversion(raw_schema_property_value, **flags)
+            elif names_conversion:
                 raw_schema_property_value = safe_name_conversion(raw_schema_property_value)
             if raw_schema_property_value in seen_values:
@@ -193,6 +211,7 @@ class DpathSchemaMatchingExtractor(DpathExtractor, RawSchemaParser):
                     )
+@dataclass
 class DpathSchemaExtractor(DpathExtractor, RawSchemaParser):
     """
     Makes names conversion and parses sheet headers from the provided row.

source_google_sheets/spec.yaml CHANGED Viewed

@@ -31,7 +31,49 @@ connectionSpecification:
     names_conversion:
       type: boolean
       title: Convert Column Names to SQL-Compliant Format
-      description: Enables the conversion of column names to a standardized, SQL-compliant format. For example, 'My Name' -> 'my_name'. Enable this option if your destination is SQL-based.
+      description: >-
+        Converts column names to a SQL-compliant format (snake_case, lowercase, etc).
+        If enabled, you can further customize the sanitization using the options below.
+      default: false
+    remove_leading_trailing_underscores:
+      type: boolean
+      title: Remove Leading and Trailing Underscores
+      description: >-
+        Removes leading and trailing underscores from column names. Does not remove leading underscores from column names that start with a number.
+        Example: "50th Percentile? "→ "_50_th_percentile"
+        This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
+      default: false
+    combine_number_word_pairs:
+      type: boolean
+      title: Combine Number-Word Pairs
+      description: >-
+        Combines adjacent numbers and words.
+        Example: "50th Percentile?" → "_50th_percentile_"
+        This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
+      default: false
+    remove_special_characters:
+      type: boolean
+      title: Remove All Special Characters
+      description: >-
+        Removes all special characters from column names.
+        Example: "Example ID*" → "example_id"
+        This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
+      default: false
+    combine_letter_number_pairs:
+      type: boolean
+      title: Combine Letter-Number Pairs
+      description: >-
+        Combines adjacent letters and numbers.
+        Example: "Q3 2023" → "q3_2023"
+        This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
+      default: false
+    allow_leading_numbers:
+      type: boolean
+      title: Allow Leading Numbers
+      description: >-
+        Allows column names to start with numbers.
+        Example: "50th Percentile" → "50_th_percentile"
+        This option will only work if "Convert Column Names to SQL-Compliant Format (names_conversion)" is enabled.
       default: false
     credentials:
       type: object

source_google_sheets/utils.py CHANGED Viewed

@@ -46,6 +46,121 @@ def safe_name_conversion(text: str) -> str:
     return new
+def _sanitization(
+    text: str,
+    remove_leading_trailing_underscores: bool = False,
+    combine_number_word_pairs: bool = False,
+    remove_special_characters: bool = False,
+    combine_letter_number_pairs: bool = False,
+    allow_leading_numbers: bool = False,
+) -> str:
+    """
+    Converts a string into a normalized, SQL-compliant name using a set of configurable options.
+    Args:
+        text: The input string to convert.
+        remove_leading_trailing_underscores: If True, removes underscores at the start/end of the result.
+        combine_number_word_pairs: If True, combines adjacent number and word tokens (e.g., "50 th" -> "50th").
+        remove_special_characters: If True, removes all special characters from the input.
+        combine_letter_number_pairs: If True, combines adjacent letter and number tokens (e.g., "Q 3" -> "Q3").
+        allow_leading_numbers: If False, prepends an underscore if the result starts with a number.
+    Returns:
+        The normalized, SQL-compliant string.
+    Steps:
+    1. Transliterates the input text to ASCII using unidecode.
+    2. Optionally removes special characters if remove_special_characters is True.
+    3. Splits the text into tokens using a regex pattern that separates words, numbers, and non-alphanumeric characters.
+    4. Optionally combines adjacent letter+number or number+word tokens based on flags.
+    5. Removes empty tokens in the middle, but keeps leading/trailing empty tokens for underscore placement.
+    6. Optionally strips leading/trailing underscores if remove_leading_trailing_underscores is True.
+    7. Optionally prepends an underscore if the result starts with a number and allow_leading_numbers is False.
+    8. Returns the final string in lowercase.
+    """
+    text = unidecode.unidecode(text)
+    if remove_special_characters:
+        text = re.sub(r"[^\w\s]", "", text)
+    tokens = []
+    for m in TOKEN_PATTERN.finditer(text):
+        if m.group("NoToken") is None:
+            tokens.append(m.group(0))
+        else:
+            tokens.append("")
+    # Combine tokens as per flags
+    combined_tokens = []
+    i = 0
+    while i < len(tokens):
+        if (
+            combine_letter_number_pairs
+            and i + 1 < len(tokens)
+            and tokens[i]
+            and tokens[i].isalpha()
+            and tokens[i + 1]
+            and tokens[i + 1].isdigit()
+        ):
+            combined = tokens[i] + tokens[i + 1]
+            combined_tokens.append(combined)
+            i += 2
+        elif (
+            combine_number_word_pairs
+            and i + 1 < len(tokens)
+            and tokens[i]
+            and tokens[i].isdigit()
+            and tokens[i + 1]
+            and tokens[i + 1].isalpha()
+        ):
+            combined = tokens[i] + tokens[i + 1]
+            combined_tokens.append(combined)
+            i += 2
+        else:
+            combined_tokens.append(tokens[i])
+            i += 1
+    # Find indices of first and last non-empty tokens
+    first_non_empty = next((i for i, t in enumerate(combined_tokens) if t), len(combined_tokens))
+    last_non_empty = next((i for i, t in reversed(list(enumerate(combined_tokens))) if t), -1)
+    # Process tokens: keep leading/trailing empty tokens, remove empty tokens in middle
+    if first_non_empty < len(combined_tokens):
+        leading = combined_tokens[:first_non_empty]
+        middle = [t for t in combined_tokens[first_non_empty : last_non_empty + 1] if t]
+        trailing = combined_tokens[last_non_empty + 1 :]
+        processed_tokens = leading + middle + trailing
+    else:
+        processed_tokens = combined_tokens  # All tokens are empty
+    # Join tokens with underscores
+    result = DEFAULT_SEPARATOR.join(processed_tokens)
+    # Apply remove_leading_trailing_underscores on the final string
+    if remove_leading_trailing_underscores:
+        result = result.strip(DEFAULT_SEPARATOR)
+    # Handle leading numbers after underscore removal
+    if not allow_leading_numbers and result and result[0].isdigit():
+        result = DEFAULT_SEPARATOR + result
+    final_result = result.lower()
+    return final_result
+def safe_sanitzation_conversion(text: str, **kwargs) -> str:
+    """
+    Converts text to a safe name using _sanitization with the provided keyword arguments.
+    Raises an exception if the result is empty or "_". Unlike safe_name_conversion,
+    this function also rejects "_" as a valid result, since _sanitization
+    may return "_" for certain inputs (e.g., "*").
+    """
+    new = _sanitization(text, **kwargs)
+    if not new or new == "_":
+        raise Exception(f"initial string '{text}' converted to empty")
+    return new
 def exception_description_by_status_code(code: int, spreadsheet_id) -> str:
     if code in [status_codes.INTERNAL_SERVER_ERROR, status_codes.BAD_GATEWAY, status_codes.SERVICE_UNAVAILABLE]:
         return (

{airbyte_source_google_sheets-0.9.6.dist-info → airbyte_source_google_sheets-0.10.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{airbyte_source_google_sheets-0.9.6.dist-info → airbyte_source_google_sheets-0.10.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

airbyte-source-google-sheets 0.9.6__py3-none-any.whl → 0.10.0__py3-none-any.whl

airbyte-source-google-sheets 0.9.6py3-none-any.whl → 0.10.0py3-none-any.whl