airbyte-source-google-sheets 0.9.5.dev202505142036__tar.gz → 0.10.0.dev202505211933__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (15) hide show
  1. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/PKG-INFO +1 -1
  2. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/pyproject.toml +1 -1
  3. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/components/extractors.py +19 -8
  4. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/spec.yaml +8 -0
  5. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/utils.py +45 -0
  6. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/README.md +0 -0
  7. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/__init__.py +0 -0
  8. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/components/__init__.py +0 -0
  9. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/components/partition_routers.py +0 -0
  10. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/manifest.yaml +0 -0
  11. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/models/__init__.py +0 -0
  12. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/models/spreadsheet.py +0 -0
  13. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/models/spreadsheet_values.py +0 -0
  14. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/run.py +0 -0
  15. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505211933}/source_google_sheets/source.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-google-sheets
3
- Version: 0.9.5.dev202505142036
3
+ Version: 0.10.0.dev202505211933
4
4
  Summary: Source implementation for Google Sheets.
5
5
  License: Elv2
6
6
  Author: Airbyte
@@ -5,7 +5,7 @@ requires = [
5
5
  build-backend = "poetry.core.masonry.api"
6
6
 
7
7
  [tool.poetry]
8
- version = "0.9.5.dev202505142036"
8
+ version = "0.10.0.dev202505211933"
9
9
  name = "airbyte-source-google-sheets"
10
10
  description = "Source implementation for Google Sheets."
11
11
  authors = [
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
12
12
  from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
13
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
14
14
  from airbyte_cdk.sources.types import Config
15
- from source_google_sheets.utils import name_conversion, safe_name_conversion
15
+ from source_google_sheets.utils import experimental_safe_name_conversion, name_conversion, safe_name_conversion
16
16
 
17
17
 
18
18
  class RawSchemaParser:
@@ -54,9 +54,12 @@ class RawSchemaParser:
54
54
  schema_pointer: List[Union[InterpolatedString, str]],
55
55
  key_pointer: List[Union[InterpolatedString, str]],
56
56
  names_conversion: bool,
57
+ experimental_names_conversion: bool,
57
58
  ):
58
59
  """
59
- 1. Parses sheet headers from the provided raw schema, skipping any headers that are empty or contain only whitespace.
60
+ 1. Parses sheet headers from the provided raw schema. This method assumes that data is contiguous
61
+ i.e: every cell contains a value and the first cell which does not contain a value denotes the end
62
+ of the headers.
60
63
  2. Makes name conversion if required.
61
64
  3. Removes duplicated fields from the schema.
62
65
  Return a list of tuples with correct property index (by found in array), value and raw_schema
@@ -68,8 +71,11 @@ class RawSchemaParser:
68
71
  for property_index, raw_schema_property in enumerate(raw_schema_properties):
69
72
  raw_schema_property_value = self._extract_data(raw_schema_property, key_pointer)
70
73
  if not raw_schema_property_value or raw_schema_property_value.isspace():
71
- continue
72
- if names_conversion:
74
+ break
75
+ # Apply experimental conversion if enabled; otherwise, apply standard conversion if enabled
76
+ if experimental_names_conversion:
77
+ raw_schema_property_value = experimental_safe_name_conversion(raw_schema_property_value)
78
+ elif names_conversion:
73
79
  raw_schema_property_value = safe_name_conversion(raw_schema_property_value)
74
80
 
75
81
  if raw_schema_property_value in seen_values:
@@ -87,12 +93,13 @@ class RawSchemaParser:
87
93
  def parse(self, schema_type_identifier, records: Iterable[MutableMapping[Any, Any]]):
88
94
  """Removes duplicated fields and makes names conversion"""
89
95
  names_conversion = self.config.get("names_conversion", False)
96
+ experimental_names_conversion = self.config.get("experimental_names_conversion", False)
90
97
  schema_pointer = schema_type_identifier.get("schema_pointer")
91
98
  key_pointer = schema_type_identifier["key_pointer"]
92
99
  parsed_properties = []
93
100
  for raw_schema_data in records:
94
101
  for _, parsed_value, raw_schema_property in self.parse_raw_schema_values(
95
- raw_schema_data, schema_pointer, key_pointer, names_conversion
102
+ raw_schema_data, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
96
103
  ):
97
104
  self._set_data(parsed_value, raw_schema_property, key_pointer)
98
105
  parsed_properties.append(raw_schema_property)
@@ -138,16 +145,20 @@ class DpathSchemaMatchingExtractor(DpathExtractor, RawSchemaParser):
138
145
  self._values_to_match_key = parameters["values_to_match_key"]
139
146
  schema_type_identifier = parameters["schema_type_identifier"]
140
147
  names_conversion = self.config.get("names_conversion", False)
148
+ experimental_names_conversion = self.config.get("experimental_names_conversion", False)
141
149
  self._indexed_properties_to_match = self.extract_properties_to_match(
142
- parameters["properties_to_match"], schema_type_identifier, names_conversion=names_conversion
150
+ parameters["properties_to_match"],
151
+ schema_type_identifier,
152
+ names_conversion=names_conversion,
153
+ experimental_names_conversion=experimental_names_conversion,
143
154
  )
144
155
 
145
- def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion):
156
+ def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion, experimental_names_conversion):
146
157
  schema_pointer = schema_type_identifier.get("schema_pointer")
147
158
  key_pointer = schema_type_identifier["key_pointer"]
148
159
  indexed_properties = {}
149
160
  for property_index, property_parsed_value, _ in self.parse_raw_schema_values(
150
- properties_to_match, schema_pointer, key_pointer, names_conversion
161
+ properties_to_match, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
151
162
  ):
152
163
  indexed_properties[property_index] = property_parsed_value
153
164
  return indexed_properties
@@ -33,6 +33,14 @@ connectionSpecification:
33
33
  title: Convert Column Names to SQL-Compliant Format
34
34
  description: Enables the conversion of column names to a standardized, SQL-compliant format. For example, 'My Name' -> 'my_name'. Enable this option if your destination is SQL-based.
35
35
  default: false
36
+ experimental_names_conversion:
37
+ type: boolean
38
+ title: Experimental Convert Column Names to SQL-Compliant Format
39
+ description: >-
40
+ Adds additional sanitization to column names before converting to SQL-compliant format, such as removing leading and trailing spaces.
41
+ This option may change behavior in the future, which may cause column names to update in your destination on future updates.
42
+ Due to this, it is recommended that you also change the "Detect and propagate schema changes" to "Approve all changes myself" in the connection advances settings.
43
+ If enabled, this option will supersede the `Convert Column Names to SQL-Compliant Format` option.
36
44
  credentials:
37
45
  type: object
38
46
  title: Authentication
@@ -37,6 +37,42 @@ def name_conversion(text: str) -> str:
37
37
  return text
38
38
 
39
39
 
40
+ def experimental_name_conversion(text: str) -> str:
41
+ """
42
+ Convert name using a set of rules, for example: '1MyName' -> '_1_my_name'
43
+ Removes leading/trailing spaces and combines number-word pairs (e.g., '50th' -> '50th').
44
+ """
45
+ text = unidecode.unidecode(text.strip()) # Strip leading/trailing spaces
46
+
47
+ tokens = []
48
+ for m in TOKEN_PATTERN.finditer(text):
49
+ if m.group("NoToken") is None:
50
+ tokens.append(m.group(0))
51
+ else:
52
+ tokens.append("")
53
+
54
+ # Combine number followed by word (e.g., "50" and "th" -> "50th")
55
+ combined_tokens = []
56
+ i = 0
57
+ while i < len(tokens):
58
+ if i + 1 < len(tokens) and tokens[i].isdigit() and tokens[i + 1].isalpha():
59
+ combined_tokens.append(tokens[i] + tokens[i + 1])
60
+ i += 2
61
+ else:
62
+ combined_tokens.append(tokens[i])
63
+ i += 1
64
+
65
+ if len(combined_tokens) >= 3:
66
+ combined_tokens = combined_tokens[:1] + [t for t in combined_tokens[1:-1] if t] + combined_tokens[-1:]
67
+
68
+ if combined_tokens and combined_tokens[0].isdigit():
69
+ combined_tokens.insert(0, "")
70
+
71
+ text = DEFAULT_SEPARATOR.join(combined_tokens)
72
+ text = text.lower()
73
+ return text
74
+
75
+
40
76
  def safe_name_conversion(text: str) -> str:
41
77
  if not text:
42
78
  return text
@@ -46,6 +82,15 @@ def safe_name_conversion(text: str) -> str:
46
82
  return new
47
83
 
48
84
 
85
+ def experimental_safe_name_conversion(text: str) -> str:
86
+ if not text:
87
+ return text
88
+ new = experimental_name_conversion(text)
89
+ if not new:
90
+ raise Exception(f"initial string '{text}' converted to empty")
91
+ return new
92
+
93
+
49
94
  def exception_description_by_status_code(code: int, spreadsheet_id) -> str:
50
95
  if code in [status_codes.INTERNAL_SERVER_ERROR, status_codes.BAD_GATEWAY, status_codes.SERVICE_UNAVAILABLE]:
51
96
  return (