airbyte-cdk 6.34.0.dev2__py3-none-any.whl → 6.34.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +12 -16
  2. airbyte_cdk/connector_builder/message_grouper.py +448 -0
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
  5. airbyte_cdk/sources/declarative/auth/oauth.py +1 -6
  6. airbyte_cdk/sources/declarative/auth/token.py +8 -3
  7. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +19 -30
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +85 -203
  9. airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
  10. airbyte_cdk/sources/declarative/decoders/__init__.py +4 -0
  11. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -7
  12. airbyte_cdk/sources/declarative/decoders/json_decoder.py +58 -12
  13. airbyte_cdk/sources/declarative/extractors/record_selector.py +3 -12
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +38 -122
  15. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -12
  16. airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -9
  17. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -150
  18. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +84 -234
  19. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  20. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
  21. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +18 -26
  22. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -8
  23. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -16
  24. airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
  25. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
  26. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
  27. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -4
  28. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
  29. airbyte_cdk/sources/file_based/file_based_source.py +37 -70
  30. airbyte_cdk/sources/file_based/file_based_stream_reader.py +12 -107
  31. airbyte_cdk/sources/file_based/stream/__init__.py +1 -10
  32. airbyte_cdk/sources/streams/call_rate.py +47 -185
  33. airbyte_cdk/sources/streams/http/http.py +2 -1
  34. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +56 -217
  35. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +73 -144
  36. airbyte_cdk/test/mock_http/mocker.py +1 -9
  37. airbyte_cdk/test/mock_http/response.py +3 -6
  38. airbyte_cdk/utils/datetime_helpers.py +66 -48
  39. airbyte_cdk/utils/mapping_helpers.py +26 -126
  40. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/METADATA +1 -1
  41. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/RECORD +45 -54
  42. airbyte_cdk/connector_builder/test_reader/__init__.py +0 -7
  43. airbyte_cdk/connector_builder/test_reader/helpers.py +0 -591
  44. airbyte_cdk/connector_builder/test_reader/message_grouper.py +0 -160
  45. airbyte_cdk/connector_builder/test_reader/reader.py +0 -441
  46. airbyte_cdk/connector_builder/test_reader/types.py +0 -75
  47. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +0 -81
  48. airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -47
  49. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +0 -85
  50. airbyte_cdk/sources/specs/transfer_modes.py +0 -26
  51. airbyte_cdk/sources/streams/permissions/identities_stream.py +0 -75
  52. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE.txt +0 -0
  53. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE_SHORT +0 -0
  54. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/WHEEL +0 -0
  55. {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/entry_points.txt +0 -0
@@ -1,22 +1,19 @@
1
1
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
2
 
3
3
  from types import MappingProxyType
4
- from typing import Mapping, Union
4
+ from typing import Mapping
5
5
 
6
6
 
7
7
  class HttpResponse:
8
8
  def __init__(
9
- self,
10
- body: Union[str, bytes],
11
- status_code: int = 200,
12
- headers: Mapping[str, str] = MappingProxyType({}),
9
+ self, body: str, status_code: int = 200, headers: Mapping[str, str] = MappingProxyType({})
13
10
  ):
14
11
  self._body = body
15
12
  self._status_code = status_code
16
13
  self._headers = headers
17
14
 
18
15
  @property
19
- def body(self) -> Union[str, bytes]:
16
+ def body(self) -> str:
20
17
  return self._body
21
18
 
22
19
  @property
@@ -76,8 +76,8 @@ from airbyte_cdk.utils.datetime_helpers import ab_datetime_try_parse
76
76
  assert ab_datetime_try_parse("2023-03-14T15:09:26Z") # Basic UTC format
77
77
  assert ab_datetime_try_parse("2023-03-14T15:09:26-04:00") # With timezone offset
78
78
  assert ab_datetime_try_parse("2023-03-14T15:09:26+00:00") # With explicit UTC offset
79
- assert ab_datetime_try_parse("2023-03-14 15:09:26Z") # Missing T delimiter but still parsable
80
- assert not ab_datetime_try_parse("foo") # Invalid: not parsable, returns `None`
79
+ assert not ab_datetime_try_parse("2023-03-14 15:09:26Z") # Invalid: missing T delimiter
80
+ assert not ab_datetime_try_parse("foo") # Invalid: not a datetime
81
81
  ```
82
82
  """
83
83
 
@@ -138,14 +138,6 @@ class AirbyteDateTime(datetime):
138
138
  dt.tzinfo or timezone.utc,
139
139
  )
140
140
 
141
- def to_datetime(self) -> datetime:
142
- """Converts this AirbyteDateTime to a standard datetime object.
143
-
144
- Today, this just returns `self` because AirbyteDateTime is a subclass of `datetime`.
145
- In the future, we may modify our internal representation to use a different base class.
146
- """
147
- return self
148
-
149
141
  def __str__(self) -> str:
150
142
  """Returns the datetime in ISO8601/RFC3339 format with 'T' delimiter.
151
143
 
@@ -156,7 +148,12 @@ class AirbyteDateTime(datetime):
156
148
  str: ISO8601/RFC3339 formatted string.
157
149
  """
158
150
  aware_self = self if self.tzinfo else self.replace(tzinfo=timezone.utc)
159
- return aware_self.isoformat(sep="T", timespec="auto")
151
+ base = self.strftime("%Y-%m-%dT%H:%M:%S")
152
+ if self.microsecond:
153
+ base = f"{base}.{self.microsecond:06d}"
154
+ # Format timezone as ±HH:MM
155
+ offset = aware_self.strftime("%z")
156
+ return f"{base}{offset[:3]}:{offset[3:]}"
160
157
 
161
158
  def __repr__(self) -> str:
162
159
  """Returns the same string representation as __str__ for consistency.
@@ -361,15 +358,15 @@ def ab_datetime_now() -> AirbyteDateTime:
361
358
  def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
362
359
  """Parses a datetime string or timestamp into an AirbyteDateTime with timezone awareness.
363
360
 
364
- This implementation is as flexible as possible to handle various datetime formats.
365
- Always returns a timezone-aware datetime (defaults to UTC if no timezone specified).
361
+ Previously named: parse()
366
362
 
367
363
  Handles:
368
- - ISO8601/RFC3339 format strings (with ' ' or 'T' delimiter)
364
+ - ISO8601/RFC3339 format strings (with 'T' delimiter)
369
365
  - Unix timestamps (as integers or strings)
370
366
  - Date-only strings (YYYY-MM-DD)
371
367
  - Timezone-aware formats (+00:00 for UTC, or ±HH:MM offset)
372
- - Anything that can be parsed by `dateutil.parser.parse()`
368
+
369
+ Always returns a timezone-aware datetime (defaults to UTC if no timezone specified).
373
370
 
374
371
  Args:
375
372
  dt_str: A datetime string in ISO8601/RFC3339 format, Unix timestamp (int/str),
@@ -419,16 +416,15 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
419
416
  except (ValueError, TypeError):
420
417
  raise ValueError(f"Invalid date format: {dt_str}")
421
418
 
422
- # Reject time-only strings without date
423
- if ":" in dt_str and dt_str.count("-") < 2 and dt_str.count("/") < 2:
424
- raise ValueError(f"Missing date part in datetime string: {dt_str}")
419
+ # Validate datetime format
420
+ if "/" in dt_str or " " in dt_str or "GMT" in dt_str:
421
+ raise ValueError(f"Could not parse datetime string: {dt_str}")
425
422
 
426
423
  # Try parsing with dateutil for timezone handling
427
424
  try:
428
425
  parsed = parser.parse(dt_str)
429
426
  if parsed.tzinfo is None:
430
427
  parsed = parsed.replace(tzinfo=timezone.utc)
431
-
432
428
  return AirbyteDateTime.from_datetime(parsed)
433
429
  except (ValueError, TypeError):
434
430
  raise ValueError(f"Could not parse datetime string: {dt_str}")
@@ -442,29 +438,7 @@ def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
442
438
  raise ValueError(f"Could not parse datetime string: {dt_str}")
443
439
 
444
440
 
445
- def ab_datetime_try_parse(dt_str: str) -> AirbyteDateTime | None:
446
- """Try to parse the input as a datetime, failing gracefully instead of raising an exception.
447
-
448
- This is a thin wrapper around `ab_datetime_parse()` that catches parsing errors and
449
- returns `None` instead of raising an exception.
450
- The implementation is as flexible as possible to handle various datetime formats.
451
- Always returns a timezone-aware datetime (defaults to `UTC` if no timezone specified).
452
-
453
- Example:
454
- >>> ab_datetime_try_parse("2023-03-14T15:09:26Z") # Returns AirbyteDateTime
455
- >>> ab_datetime_try_parse("2023-03-14 15:09:26Z") # Missing 'T' delimiter still parsable
456
- >>> ab_datetime_try_parse("2023-03-14") # Returns midnight UTC time
457
- """
458
- try:
459
- return ab_datetime_parse(dt_str)
460
- except (ValueError, TypeError):
461
- return None
462
-
463
-
464
- def ab_datetime_format(
465
- dt: Union[datetime, AirbyteDateTime],
466
- format: str | None = None,
467
- ) -> str:
441
+ def ab_datetime_format(dt: Union[datetime, AirbyteDateTime]) -> str:
468
442
  """Formats a datetime object as an ISO8601/RFC3339 string with 'T' delimiter and timezone.
469
443
 
470
444
  Previously named: format()
@@ -475,8 +449,6 @@ def ab_datetime_format(
475
449
 
476
450
  Args:
477
451
  dt: Any datetime object to format.
478
- format: Optional format string. If provided, calls `strftime()` with this format.
479
- Otherwise, uses the default ISO8601/RFC3339 format, adapted for available precision.
480
452
 
481
453
  Returns:
482
454
  str: ISO8601/RFC3339 formatted datetime string.
@@ -492,8 +464,54 @@ def ab_datetime_format(
492
464
  if dt.tzinfo is None:
493
465
  dt = dt.replace(tzinfo=timezone.utc)
494
466
 
495
- if format:
496
- return dt.strftime(format)
467
+ # Format with consistent timezone representation
468
+ base = dt.strftime("%Y-%m-%dT%H:%M:%S")
469
+ if dt.microsecond:
470
+ base = f"{base}.{dt.microsecond:06d}"
471
+ offset = dt.strftime("%z")
472
+ return f"{base}{offset[:3]}:{offset[3:]}"
473
+
474
+
475
+ def ab_datetime_try_parse(dt_str: str) -> AirbyteDateTime | None:
476
+ """Try to parse the input string as an ISO8601/RFC3339 datetime, failing gracefully instead of raising an exception.
477
+
478
+ Requires strict ISO8601/RFC3339 format with:
479
+ - 'T' delimiter between date and time components
480
+ - Valid timezone (Z for UTC or ±HH:MM offset)
481
+ - Complete datetime representation (date and time)
497
482
 
498
- # Format with consistent timezone representation and "T" delimiter
499
- return dt.isoformat(sep="T", timespec="auto")
483
+ Returns None for any non-compliant formats including:
484
+ - Space-delimited datetimes
485
+ - Date-only strings
486
+ - Missing timezone
487
+ - Invalid timezone format
488
+ - Wrong date/time separators
489
+
490
+ Example:
491
+ >>> ab_datetime_try_parse("2023-03-14T15:09:26Z") # Returns AirbyteDateTime
492
+ >>> ab_datetime_try_parse("2023-03-14 15:09:26Z") # Returns None (invalid format)
493
+ >>> ab_datetime_try_parse("2023-03-14") # Returns None (missing time and timezone)
494
+ """
495
+ if not isinstance(dt_str, str):
496
+ return None
497
+ try:
498
+ # Validate format before parsing
499
+ if "T" not in dt_str:
500
+ return None
501
+ if not any(x in dt_str for x in ["Z", "+", "-"]):
502
+ return None
503
+ if "/" in dt_str or " " in dt_str or "GMT" in dt_str:
504
+ return None
505
+
506
+ # Try parsing with dateutil
507
+ parsed = parser.parse(dt_str)
508
+ if parsed.tzinfo is None:
509
+ return None
510
+
511
+ # Validate time components
512
+ if not (0 <= parsed.hour <= 23 and 0 <= parsed.minute <= 59 and 0 <= parsed.second <= 59):
513
+ return None
514
+
515
+ return AirbyteDateTime.from_datetime(parsed)
516
+ except (ValueError, TypeError):
517
+ return None
@@ -3,143 +3,43 @@
3
3
  #
4
4
 
5
5
 
6
- import copy
7
- from typing import Any, Dict, List, Mapping, Optional, Union
8
-
9
- from airbyte_cdk.sources.declarative.requesters.request_option import (
10
- RequestOption,
11
- RequestOptionType,
12
- )
13
- from airbyte_cdk.sources.types import Config
14
-
15
-
16
- def _merge_mappings(
17
- target: Dict[str, Any],
18
- source: Mapping[str, Any],
19
- path: Optional[List[str]] = None,
20
- allow_same_value_merge: bool = False,
21
- ) -> None:
22
- """
23
- Recursively merge two dictionaries, raising an error if there are any conflicts.
24
- For body_json requests (allow_same_value_merge=True), a conflict occurs only when the same path has different values.
25
- For other request types (allow_same_value_merge=False), any duplicate key is a conflict, regardless of value.
26
-
27
- Args:
28
- target: The dictionary to merge into
29
- source: The dictionary to merge from
30
- path: The current path in the nested structure (for error messages)
31
- allow_same_value_merge: Whether to allow merging the same value into the same key. Set to false by default, should only be true for body_json injections
32
- """
33
- path = path or []
34
- for key, source_value in source.items():
35
- current_path = path + [str(key)]
36
-
37
- if key in target:
38
- target_value = target[key]
39
- if isinstance(target_value, dict) and isinstance(source_value, dict):
40
- # Only body_json supports nested_structures
41
- if not allow_same_value_merge:
42
- raise ValueError(
43
- f"Request body collision, duplicate keys detected at key path: {'.'.join(current_path)}. Please ensure that all keys in the request are unique."
44
- )
45
- # If both are dictionaries, recursively merge them
46
- _merge_mappings(target_value, source_value, current_path, allow_same_value_merge)
47
-
48
- elif not allow_same_value_merge or target_value != source_value:
49
- # If same key has different values, that's a conflict
50
- raise ValueError(
51
- f"Request body collision, duplicate keys detected at key path: {'.'.join(current_path)}. Please ensure that all keys in the request are unique."
52
- )
53
- else:
54
- # No conflict, just copy the value (using deepcopy for nested structures)
55
- target[key] = copy.deepcopy(source_value)
6
+ from typing import Any, List, Mapping, Optional, Set, Union
56
7
 
57
8
 
58
9
  def combine_mappings(
59
10
  mappings: List[Optional[Union[Mapping[str, Any], str]]],
60
- allow_same_value_merge: bool = False,
61
11
  ) -> Union[Mapping[str, Any], str]:
62
12
  """
63
- Combine multiple mappings into a single mapping.
64
-
65
- For body_json requests (allow_same_value_merge=True):
66
- - Supports nested structures (e.g., {"data": {"user": {"id": 1}}})
67
- - Allows duplicate keys if their values match
68
- - Raises error if same path has different values
69
-
70
- For other request types (allow_same_value_merge=False):
71
- - Only supports flat structures
72
- - Any duplicate key raises an error, regardless of value
73
-
74
- Args:
75
- mappings: List of mappings to combine
76
- allow_same_value_merge: Whether to allow duplicate keys with matching values.
77
- Should only be True for body_json requests.
78
-
79
- Returns:
80
- A single mapping combining all inputs, or a string if there is exactly one
81
- string mapping and no other non-empty mappings.
82
-
83
- Raises:
84
- ValueError: If there are:
85
- - Multiple string mappings
86
- - Both a string mapping and non-empty dictionary mappings
87
- - Conflicting keys/paths based on allow_same_value_merge setting
13
+ Combine multiple mappings into a single mapping. If any of the mappings are a string, return
14
+ that string. Raise errors in the following cases:
15
+ * If there are duplicate keys across mappings
16
+ * If there are multiple string mappings
17
+ * If there are multiple mappings containing keys and one of them is a string
88
18
  """
89
- if not mappings:
90
- return {}
19
+ all_keys: List[Set[str]] = []
20
+ for part in mappings:
21
+ if part is None:
22
+ continue
23
+ keys = set(part.keys()) if not isinstance(part, str) else set()
24
+ all_keys.append(keys)
91
25
 
92
- # Count how many string options we have, ignoring None values
93
- string_options = sum(isinstance(mapping, str) for mapping in mappings if mapping is not None)
26
+ string_options = sum(isinstance(mapping, str) for mapping in mappings)
27
+ # If more than one mapping is a string, raise a ValueError
94
28
  if string_options > 1:
95
29
  raise ValueError("Cannot combine multiple string options")
96
30
 
97
- # Filter out None values and empty mappings
98
- non_empty_mappings = [
99
- m for m in mappings if m is not None and not (isinstance(m, Mapping) and not m)
100
- ]
101
-
102
- # If there is only one string option and no other non-empty mappings, return it
103
- if string_options == 1:
104
- if len(non_empty_mappings) > 1:
105
- raise ValueError("Cannot combine multiple options if one is a string")
106
- return next(m for m in non_empty_mappings if isinstance(m, str))
107
-
108
- # Start with an empty result and merge each mapping into it
109
- result: Dict[str, Any] = {}
110
- for mapping in non_empty_mappings:
111
- if mapping and isinstance(mapping, Mapping):
112
- _merge_mappings(result, mapping, allow_same_value_merge=allow_same_value_merge)
113
-
114
- return result
31
+ if string_options == 1 and sum(len(keys) for keys in all_keys) > 0:
32
+ raise ValueError("Cannot combine multiple options if one is a string")
115
33
 
34
+ # If any mapping is a string, return it
35
+ for mapping in mappings:
36
+ if isinstance(mapping, str):
37
+ return mapping
116
38
 
117
- def _validate_component_request_option_paths(
118
- config: Config, *request_options: Optional[RequestOption]
119
- ) -> None:
120
- """
121
- Validates that a component with multiple request options does not have conflicting paths.
122
- Uses dummy values for validation since actual values might not be available at init time.
123
- """
124
- grouped_options: Dict[RequestOptionType, List[RequestOption]] = {}
125
- for option in request_options:
126
- if option:
127
- grouped_options.setdefault(option.inject_into, []).append(option)
128
-
129
- for inject_type, options in grouped_options.items():
130
- if len(options) <= 1:
131
- continue
132
-
133
- option_dicts: List[Optional[Union[Mapping[str, Any], str]]] = []
134
- for i, option in enumerate(options):
135
- option_dict: Dict[str, Any] = {}
136
- # Use indexed dummy values to ensure we catch conflicts
137
- option.inject_into_request(option_dict, f"dummy_value_{i}", config)
138
- option_dicts.append(option_dict)
39
+ # If there are duplicate keys across mappings, raise a ValueError
40
+ intersection = set().union(*all_keys)
41
+ if len(intersection) < sum(len(keys) for keys in all_keys):
42
+ raise ValueError(f"Duplicate keys found: {intersection}")
139
43
 
140
- try:
141
- combine_mappings(
142
- option_dicts, allow_same_value_merge=(inject_type == RequestOptionType.body_json)
143
- )
144
- except ValueError as error:
145
- raise ValueError(error)
44
+ # Return the combined mappings
45
+ return {key: value for mapping in mappings if mapping for key, value in mapping.items()} # type: ignore # mapping can't be string here
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.34.0.dev2
3
+ Version: 6.34.1.dev0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT