airbyte-source-google-sheets 0.9.5.dev202505142036__tar.gz → 0.10.0.dev202505231635__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/PKG-INFO +1 -1
  2. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/pyproject.toml +1 -1
  3. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/components/extractors.py +19 -8
  4. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/spec.yaml +8 -0
  5. airbyte_source_google_sheets-0.10.0.dev202505231635/source_google_sheets/utils.py +142 -0
  6. airbyte_source_google_sheets-0.9.5.dev202505142036/source_google_sheets/utils.py +0 -69
  7. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/README.md +0 -0
  8. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/__init__.py +0 -0
  9. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/components/__init__.py +0 -0
  10. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/components/partition_routers.py +0 -0
  11. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/manifest.yaml +0 -0
  12. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/models/__init__.py +0 -0
  13. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/models/spreadsheet.py +0 -0
  14. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/models/spreadsheet_values.py +0 -0
  15. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/run.py +0 -0
  16. {airbyte_source_google_sheets-0.9.5.dev202505142036 → airbyte_source_google_sheets-0.10.0.dev202505231635}/source_google_sheets/source.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-source-google-sheets
3
- Version: 0.9.5.dev202505142036
3
+ Version: 0.10.0.dev202505231635
4
4
  Summary: Source implementation for Google Sheets.
5
5
  License: Elv2
6
6
  Author: Airbyte
@@ -5,7 +5,7 @@ requires = [
5
5
  build-backend = "poetry.core.masonry.api"
6
6
 
7
7
  [tool.poetry]
8
- version = "0.9.5.dev202505142036"
8
+ version = "0.10.0.dev202505231635"
9
9
  name = "airbyte-source-google-sheets"
10
10
  description = "Source implementation for Google Sheets."
11
11
  authors = [
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
12
12
  from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
13
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
14
14
  from airbyte_cdk.sources.types import Config
15
- from source_google_sheets.utils import name_conversion, safe_name_conversion
15
+ from source_google_sheets.utils import experimental_safe_name_conversion, name_conversion, safe_name_conversion
16
16
 
17
17
 
18
18
  class RawSchemaParser:
@@ -54,9 +54,12 @@ class RawSchemaParser:
54
54
  schema_pointer: List[Union[InterpolatedString, str]],
55
55
  key_pointer: List[Union[InterpolatedString, str]],
56
56
  names_conversion: bool,
57
+ experimental_names_conversion: bool,
57
58
  ):
58
59
  """
59
- 1. Parses sheet headers from the provided raw schema, skipping any headers that are empty or contain only whitespace.
60
+ 1. Parses sheet headers from the provided raw schema. This method assumes that data is contiguous
61
+ i.e: every cell contains a value and the first cell which does not contain a value denotes the end
62
+ of the headers.
60
63
  2. Makes name conversion if required.
61
64
  3. Removes duplicated fields from the schema.
62
65
  Return a list of tuples with correct property index (by found in array), value and raw_schema
@@ -68,8 +71,11 @@ class RawSchemaParser:
68
71
  for property_index, raw_schema_property in enumerate(raw_schema_properties):
69
72
  raw_schema_property_value = self._extract_data(raw_schema_property, key_pointer)
70
73
  if not raw_schema_property_value or raw_schema_property_value.isspace():
71
- continue
72
- if names_conversion:
74
+ break
75
+ # Apply experimental conversion if enabled; otherwise, apply standard conversion if enabled
76
+ if experimental_names_conversion:
77
+ raw_schema_property_value = experimental_safe_name_conversion(raw_schema_property_value)
78
+ elif names_conversion:
73
79
  raw_schema_property_value = safe_name_conversion(raw_schema_property_value)
74
80
 
75
81
  if raw_schema_property_value in seen_values:
@@ -87,12 +93,13 @@ class RawSchemaParser:
87
93
  def parse(self, schema_type_identifier, records: Iterable[MutableMapping[Any, Any]]):
88
94
  """Removes duplicated fields and makes names conversion"""
89
95
  names_conversion = self.config.get("names_conversion", False)
96
+ experimental_names_conversion = self.config.get("experimental_names_conversion", False)
90
97
  schema_pointer = schema_type_identifier.get("schema_pointer")
91
98
  key_pointer = schema_type_identifier["key_pointer"]
92
99
  parsed_properties = []
93
100
  for raw_schema_data in records:
94
101
  for _, parsed_value, raw_schema_property in self.parse_raw_schema_values(
95
- raw_schema_data, schema_pointer, key_pointer, names_conversion
102
+ raw_schema_data, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
96
103
  ):
97
104
  self._set_data(parsed_value, raw_schema_property, key_pointer)
98
105
  parsed_properties.append(raw_schema_property)
@@ -138,16 +145,20 @@ class DpathSchemaMatchingExtractor(DpathExtractor, RawSchemaParser):
138
145
  self._values_to_match_key = parameters["values_to_match_key"]
139
146
  schema_type_identifier = parameters["schema_type_identifier"]
140
147
  names_conversion = self.config.get("names_conversion", False)
148
+ experimental_names_conversion = self.config.get("experimental_names_conversion", False)
141
149
  self._indexed_properties_to_match = self.extract_properties_to_match(
142
- parameters["properties_to_match"], schema_type_identifier, names_conversion=names_conversion
150
+ parameters["properties_to_match"],
151
+ schema_type_identifier,
152
+ names_conversion=names_conversion,
153
+ experimental_names_conversion=experimental_names_conversion,
143
154
  )
144
155
 
145
- def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion):
156
+ def extract_properties_to_match(self, properties_to_match, schema_type_identifier, names_conversion, experimental_names_conversion):
146
157
  schema_pointer = schema_type_identifier.get("schema_pointer")
147
158
  key_pointer = schema_type_identifier["key_pointer"]
148
159
  indexed_properties = {}
149
160
  for property_index, property_parsed_value, _ in self.parse_raw_schema_values(
150
- properties_to_match, schema_pointer, key_pointer, names_conversion
161
+ properties_to_match, schema_pointer, key_pointer, names_conversion, experimental_names_conversion
151
162
  ):
152
163
  indexed_properties[property_index] = property_parsed_value
153
164
  return indexed_properties
@@ -33,6 +33,14 @@ connectionSpecification:
33
33
  title: Convert Column Names to SQL-Compliant Format
34
34
  description: Enables the conversion of column names to a standardized, SQL-compliant format. For example, 'My Name' -> 'my_name'. Enable this option if your destination is SQL-based.
35
35
  default: false
36
+ experimental_names_conversion:
37
+ type: boolean
38
+ title: Experimental Convert Column Names to SQL-Compliant Format
39
+ description: >-
40
+ Adds additional sanitization to column names before converting to SQL-compliant format, such as removing leading and trailing spaces.
41
+ This option may change behavior in the future, which may cause column names to update in your destination on future updates.
42
+ Due to this, it is recommended that you also change the "Detect and propagate schema changes" to "Approve all changes myself" in the connection advances settings.
43
+ If enabled, this option will supersede the `Convert Column Names to SQL-Compliant Format` option.
36
44
  credentials:
37
45
  type: object
38
46
  title: Authentication
@@ -0,0 +1,142 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+
6
+ import re
7
+
8
+ import unidecode
9
+ from requests.status_codes import codes as status_codes
10
+
11
+
12
+ TOKEN_PATTERN = re.compile(r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)")
13
+ DEFAULT_SEPARATOR = "_"
14
+
15
+
16
+ def name_conversion(text: str) -> str:
17
+ """
18
+ convert name using a set of rules, for example: '1MyName' -> '_1_my_name'
19
+ """
20
+ text = unidecode.unidecode(text)
21
+
22
+ tokens = []
23
+ for m in TOKEN_PATTERN.finditer(text):
24
+ if m.group("NoToken") is None:
25
+ tokens.append(m.group(0))
26
+ else:
27
+ tokens.append("")
28
+
29
+ if len(tokens) >= 3:
30
+ tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:]
31
+
32
+ if tokens and tokens[0].isdigit():
33
+ tokens.insert(0, "")
34
+
35
+ text = DEFAULT_SEPARATOR.join(tokens)
36
+ text = text.lower()
37
+ return text
38
+
39
+
40
+ def experimental_name_conversion(text: str) -> str:
41
+ """
42
+ Convert name using a set of rules, for example: '1MyName' -> '_1_my_name'
43
+ Removes leading/trailing spaces, combines number-word pairs (e.g., '50th' -> '50th'),
44
+ letter-number pairs (e.g., 'Q3' -> 'Q3'), and removes special characters without adding underscores.
45
+ Spaces are converted to underscores for snake_case.
46
+ """
47
+ text = unidecode.unidecode(text.strip()) # Strip leading/trailing spaces
48
+
49
+ tokens = []
50
+ for m in TOKEN_PATTERN.finditer(text):
51
+ if m.group("NoToken") is None:
52
+ tokens.append(m.group(0))
53
+ else:
54
+ # Only add an empty token for spaces to preserve snake_case; skip other special characters
55
+ if m.group(0).isspace():
56
+ tokens.append("")
57
+ # Otherwise, skip the special character entirely
58
+
59
+ # Combine single uppercase letter followed by number (e.g., "Q" and "3" -> "Q3"), then number-word pairs
60
+ combined_tokens = []
61
+ i = 0
62
+ while i < len(tokens):
63
+ # Check for letter-number pair (e.g., "Q3")
64
+ if i + 1 < len(tokens) and len(tokens[i]) == 1 and tokens[i].isupper() and tokens[i + 1].isdigit():
65
+ combined_tokens.append(tokens[i] + tokens[i + 1])
66
+ i += 2
67
+ # Check for number-word pair (e.g., "50th")
68
+ elif i + 1 < len(tokens) and tokens[i].isdigit() and tokens[i + 1].isalpha():
69
+ combined_tokens.append(tokens[i] + tokens[i + 1])
70
+ i += 2
71
+ else:
72
+ # Only add the token if it's non-empty to avoid underscores from spaces near special characters
73
+ if tokens[i]:
74
+ combined_tokens.append(tokens[i])
75
+ i += 1
76
+
77
+ # Remove trailing empty tokens to avoid trailing underscores
78
+ while combined_tokens and combined_tokens[-1] == "":
79
+ combined_tokens.pop()
80
+ # Remove leading empty tokens to avoid leading underscores
81
+ while combined_tokens and combined_tokens[0] == "":
82
+ combined_tokens.pop(0)
83
+
84
+ if len(combined_tokens) >= 3:
85
+ combined_tokens = combined_tokens[:1] + [t for t in combined_tokens[1:-1] if t] + combined_tokens[-1:]
86
+
87
+ if combined_tokens and combined_tokens[0].isdigit():
88
+ combined_tokens.insert(0, "")
89
+
90
+ text = DEFAULT_SEPARATOR.join(combined_tokens)
91
+ text = text.lower()
92
+ return text
93
+
94
+
95
+ def safe_name_conversion(text: str) -> str:
96
+ if not text:
97
+ return text
98
+ new = name_conversion(text)
99
+ if not new:
100
+ raise Exception(f"initial string '{text}' converted to empty")
101
+ return new
102
+
103
+
104
+ import csv
105
+
106
+
107
+ def experimental_safe_name_conversion(text: str, output_file: str = "conversion_results2.csv") -> str:
108
+ if not text:
109
+ return text
110
+ new = experimental_name_conversion(text)
111
+ if not new:
112
+ raise Exception(f"initial string '{text}' converted to empty")
113
+
114
+ # Write to CSV
115
+ with open(output_file, mode="a", newline="", encoding="utf-8") as file:
116
+ writer = csv.writer(file)
117
+ writer.writerow([text, new]) # Write the original and converted text
118
+
119
+ return new
120
+
121
+
122
+ def exception_description_by_status_code(code: int, spreadsheet_id) -> str:
123
+ if code in [status_codes.INTERNAL_SERVER_ERROR, status_codes.BAD_GATEWAY, status_codes.SERVICE_UNAVAILABLE]:
124
+ return (
125
+ "There was an issue with the Google Sheets API. This is usually a temporary issue from Google's side."
126
+ " Please try again. If this issue persists, contact support"
127
+ )
128
+ if code == status_codes.FORBIDDEN:
129
+ return (
130
+ f"The authenticated Google Sheets user does not have permissions to view the spreadsheet with id {spreadsheet_id}. "
131
+ "Please ensure the authenticated user has access to the Spreadsheet and reauthenticate. If the issue persists, contact support"
132
+ )
133
+ if code == status_codes.NOT_FOUND:
134
+ return (
135
+ f"The requested Google Sheets spreadsheet with id {spreadsheet_id} does not exist. "
136
+ f"Please ensure the Spreadsheet Link you have set is valid and the spreadsheet exists. If the issue persists, contact support"
137
+ )
138
+
139
+ if code == status_codes.TOO_MANY_REQUESTS:
140
+ return "Rate limit has been reached. Please try later or request a higher quota for your account."
141
+
142
+ return ""
@@ -1,69 +0,0 @@
1
- #
2
- # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
-
6
- import re
7
-
8
- import unidecode
9
- from requests.status_codes import codes as status_codes
10
-
11
-
12
- TOKEN_PATTERN = re.compile(r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)")
13
- DEFAULT_SEPARATOR = "_"
14
-
15
-
16
- def name_conversion(text: str) -> str:
17
- """
18
- convert name using a set of rules, for example: '1MyName' -> '_1_my_name'
19
- """
20
- text = unidecode.unidecode(text)
21
-
22
- tokens = []
23
- for m in TOKEN_PATTERN.finditer(text):
24
- if m.group("NoToken") is None:
25
- tokens.append(m.group(0))
26
- else:
27
- tokens.append("")
28
-
29
- if len(tokens) >= 3:
30
- tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:]
31
-
32
- if tokens and tokens[0].isdigit():
33
- tokens.insert(0, "")
34
-
35
- text = DEFAULT_SEPARATOR.join(tokens)
36
- text = text.lower()
37
- return text
38
-
39
-
40
- def safe_name_conversion(text: str) -> str:
41
- if not text:
42
- return text
43
- new = name_conversion(text)
44
- if not new:
45
- raise Exception(f"initial string '{text}' converted to empty")
46
- return new
47
-
48
-
49
- def exception_description_by_status_code(code: int, spreadsheet_id) -> str:
50
- if code in [status_codes.INTERNAL_SERVER_ERROR, status_codes.BAD_GATEWAY, status_codes.SERVICE_UNAVAILABLE]:
51
- return (
52
- "There was an issue with the Google Sheets API. This is usually a temporary issue from Google's side."
53
- " Please try again. If this issue persists, contact support"
54
- )
55
- if code == status_codes.FORBIDDEN:
56
- return (
57
- f"The authenticated Google Sheets user does not have permissions to view the spreadsheet with id {spreadsheet_id}. "
58
- "Please ensure the authenticated user has access to the Spreadsheet and reauthenticate. If the issue persists, contact support"
59
- )
60
- if code == status_codes.NOT_FOUND:
61
- return (
62
- f"The requested Google Sheets spreadsheet with id {spreadsheet_id} does not exist. "
63
- f"Please ensure the Spreadsheet Link you have set is valid and the spreadsheet exists. If the issue persists, contact support"
64
- )
65
-
66
- if code == status_codes.TOO_MANY_REQUESTS:
67
- return "Rate limit has been reached. Please try later or request a higher quota for your account."
68
-
69
- return ""