airbyte-cdk 6.31.2.dev0__py3-none-any.whl → 6.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +9 -3
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +3 -2
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
  5. airbyte_cdk/sources/declarative/auth/oauth.py +89 -23
  6. airbyte_cdk/sources/declarative/auth/token.py +8 -3
  7. airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
  8. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
  9. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +134 -43
  10. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +55 -16
  11. airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
  12. airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
  13. airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +400 -0
  15. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -7
  16. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
  17. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
  18. airbyte_cdk/sources/declarative/manifest_declarative_source.py +20 -7
  19. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +45 -15
  20. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
  21. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +343 -64
  22. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  23. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
  24. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +55 -15
  25. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
  26. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  27. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -5
  28. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -6
  29. airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
  30. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
  31. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
  32. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +2 -5
  33. airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
  34. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
  35. airbyte_cdk/sources/http_logger.py +1 -1
  36. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  37. airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
  38. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  39. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
  40. airbyte_cdk/sources/streams/core.py +6 -6
  41. airbyte_cdk/sources/streams/http/http.py +1 -2
  42. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
  43. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +171 -88
  44. airbyte_cdk/sources/types.py +4 -2
  45. airbyte_cdk/sources/utils/transform.py +23 -2
  46. airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
  47. airbyte_cdk/utils/datetime_helpers.py +499 -0
  48. airbyte_cdk/utils/mapping_helpers.py +27 -86
  49. airbyte_cdk/utils/slice_hasher.py +8 -1
  50. airbyte_cdk-6.32.0.dist-info/LICENSE_SHORT +1 -0
  51. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/METADATA +6 -6
  52. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/RECORD +55 -49
  53. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/WHEEL +1 -1
  54. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/LICENSE.txt +0 -0
  55. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/entry_points.txt +0 -0
@@ -59,6 +59,11 @@ class CheckDynamicStream(BaseModel):
59
59
  description="Numbers of the streams to try reading from when running a check operation.",
60
60
  title="Stream Count",
61
61
  )
62
+ use_check_availability: Optional[bool] = Field(
63
+ True,
64
+ description="Enables stream check availability. This field is automatically set by the CDK.",
65
+ title="Use Check Availability",
66
+ )
62
67
 
63
68
 
64
69
  class ConcurrencyLevel(BaseModel):
@@ -328,6 +333,16 @@ class LegacyToPerPartitionStateMigration(BaseModel):
328
333
  type: Optional[Literal["LegacyToPerPartitionStateMigration"]] = None
329
334
 
330
335
 
336
+ class Clamping(BaseModel):
337
+ target: str = Field(
338
+ ...,
339
+ description="The period of time that datetime windows will be clamped by",
340
+ examples=["DAY", "WEEK", "MONTH", "{{ config['target'] }}"],
341
+ title="Target",
342
+ )
343
+ target_details: Optional[Dict[str, Any]] = None
344
+
345
+
331
346
  class Algorithm(Enum):
332
347
  HS256 = "HS256"
333
348
  HS384 = "HS384"
@@ -496,8 +511,8 @@ class OAuthAuthenticator(BaseModel):
496
511
  examples=["custom_app_id"],
497
512
  title="Client ID Property Name",
498
513
  )
499
- client_id: str = Field(
500
- ...,
514
+ client_id: Optional[str] = Field(
515
+ None,
501
516
  description="The OAuth client ID. Fill it in the user inputs.",
502
517
  examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"],
503
518
  title="Client ID",
@@ -508,8 +523,8 @@ class OAuthAuthenticator(BaseModel):
508
523
  examples=["custom_app_secret"],
509
524
  title="Client Secret Property Name",
510
525
  )
511
- client_secret: str = Field(
512
- ...,
526
+ client_secret: Optional[str] = Field(
527
+ None,
513
528
  description="The OAuth client secret. Fill it in the user inputs.",
514
529
  examples=[
515
530
  "{{ config['client_secret }}",
@@ -614,6 +629,16 @@ class OAuthAuthenticator(BaseModel):
614
629
  description="When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.",
615
630
  title="Token Updater",
616
631
  )
632
+ profile_assertion: Optional[JwtAuthenticator] = Field(
633
+ None,
634
+ description="The authenticator being used to authenticate the client authenticator.",
635
+ title="Profile Assertion",
636
+ )
637
+ use_profile_assertion: Optional[bool] = Field(
638
+ False,
639
+ description="Enable using profile assertion as a flow for OAuth authorization.",
640
+ title="Use Profile Assertion",
641
+ )
617
642
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
618
643
 
619
644
 
@@ -716,8 +741,13 @@ class HttpResponseFilter(BaseModel):
716
741
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
717
742
 
718
743
 
744
+ class ComplexFieldType(BaseModel):
745
+ field_type: str
746
+ items: Optional[Union[str, ComplexFieldType]] = None
747
+
748
+
719
749
  class TypesMap(BaseModel):
720
- target_type: Union[str, List[str]]
750
+ target_type: Union[str, List[str], ComplexFieldType]
721
751
  current_type: Union[str, List[str]]
722
752
  condition: Optional[str] = None
723
753
 
@@ -1170,17 +1200,11 @@ class InjectInto(Enum):
1170
1200
 
1171
1201
  class RequestOption(BaseModel):
1172
1202
  type: Literal["RequestOption"]
1173
- field_name: Optional[str] = Field(
1174
- None,
1175
- description="Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.",
1203
+ field_name: str = Field(
1204
+ ...,
1205
+ description="Configures which key should be used in the location that the descriptor is being injected into",
1176
1206
  examples=["segment_id"],
1177
- title="Field Name",
1178
- )
1179
- field_path: Optional[List[str]] = Field(
1180
- None,
1181
- description="Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)",
1182
- examples=[["data", "viewer", "id"]],
1183
- title="Field Path",
1207
+ title="Request Option",
1184
1208
  )
1185
1209
  inject_into: InjectInto = Field(
1186
1210
  ...,
@@ -1457,6 +1481,11 @@ class AuthFlow(BaseModel):
1457
1481
 
1458
1482
  class DatetimeBasedCursor(BaseModel):
1459
1483
  type: Literal["DatetimeBasedCursor"]
1484
+ clamping: Optional[Clamping] = Field(
1485
+ None,
1486
+ description="This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)",
1487
+ title="Date Range Clamping",
1488
+ )
1460
1489
  cursor_field: str = Field(
1461
1490
  ...,
1462
1491
  description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.",
@@ -2241,6 +2270,7 @@ class DynamicDeclarativeStream(BaseModel):
2241
2270
  )
2242
2271
 
2243
2272
 
2273
+ ComplexFieldType.update_forward_refs()
2244
2274
  CompositeErrorHandler.update_forward_refs()
2245
2275
  DeclarativeSource1.update_forward_refs()
2246
2276
  DeclarativeSource2.update_forward_refs()
@@ -0,0 +1,143 @@
1
+ """Contains functions to compile custom code from text."""
2
+
3
+ import hashlib
4
+ import os
5
+ import sys
6
+ from collections.abc import Mapping
7
+ from types import ModuleType
8
+ from typing import Any, cast
9
+
10
+ from typing_extensions import Literal
11
+
12
+ ChecksumType = Literal["md5", "sha256"]
13
+ CHECKSUM_FUNCTIONS = {
14
+ "md5": hashlib.md5,
15
+ "sha256": hashlib.sha256,
16
+ }
17
+ COMPONENTS_MODULE_NAME = "components"
18
+ SDM_COMPONENTS_MODULE_NAME = "source_declarative_manifest.components"
19
+ INJECTED_MANIFEST = "__injected_declarative_manifest"
20
+ INJECTED_COMPONENTS_PY = "__injected_components_py"
21
+ INJECTED_COMPONENTS_PY_CHECKSUMS = "__injected_components_py_checksums"
22
+ ENV_VAR_ALLOW_CUSTOM_CODE = "AIRBYTE_ALLOW_CUSTOM_CODE"
23
+
24
+
25
+ class AirbyteCodeTamperedError(Exception):
26
+ """Raised when the connector's components module does not match its checksum.
27
+
28
+ This is a fatal error, as it can be a sign of code tampering.
29
+ """
30
+
31
+
32
+ class AirbyteCustomCodeNotPermittedError(Exception):
33
+ """Raised when custom code is attempted to be run in an environment that does not support it."""
34
+
35
+ def __init__(self) -> None:
36
+ super().__init__(
37
+ "Custom connector code is not permitted in this environment. "
38
+ "If you need to run custom code, please ask your administrator to set the `AIRBYTE_ALLOW_CUSTOM_CODE` "
39
+ "environment variable to 'true' in your Airbyte environment. "
40
+ "If you see this message in Airbyte Cloud, your workspace does not allow executing "
41
+ "custom connector code."
42
+ )
43
+
44
+
45
+ def _hash_text(input_text: str, hash_type: str = "md5") -> str:
46
+ """Return the hash of the input text using the specified hash type."""
47
+ if not input_text:
48
+ raise ValueError("Input text cannot be empty.")
49
+
50
+ hash_object = CHECKSUM_FUNCTIONS[hash_type]()
51
+ hash_object.update(input_text.encode())
52
+ return hash_object.hexdigest()
53
+
54
+
55
+ def custom_code_execution_permitted() -> bool:
56
+ """Return `True` if custom code execution is permitted, otherwise `False`.
57
+
58
+ Custom code execution is permitted if the `AIRBYTE_ALLOW_CUSTOM_CODE` environment variable is set to 'true'.
59
+ """
60
+ return os.environ.get(ENV_VAR_ALLOW_CUSTOM_CODE, "").lower() == "true"
61
+
62
+
63
+ def validate_python_code(
64
+ code_text: str,
65
+ checksums: dict[str, str] | None,
66
+ ) -> None:
67
+ """Validate the provided Python code text against the provided checksums.
68
+
69
+ Currently we fail if no checksums are provided, although this may change in the future.
70
+ """
71
+ if not checksums:
72
+ raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
73
+
74
+ for checksum_type, checksum in checksums.items():
75
+ if checksum_type not in CHECKSUM_FUNCTIONS:
76
+ raise ValueError(
77
+ f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
78
+ )
79
+
80
+ if _hash_text(code_text, checksum_type) != checksum:
81
+ raise AirbyteCodeTamperedError(f"{checksum_type} checksum does not match.")
82
+
83
+
84
+ def get_registered_components_module(
85
+ config: Mapping[str, Any] | None,
86
+ ) -> ModuleType | None:
87
+ """Get a components module object based on the provided config.
88
+
89
+ If custom python components is provided, this will be loaded. Otherwise, we will
90
+ attempt to load from the `components` module already imported/registered in sys.modules.
91
+
92
+ If custom `components.py` text is provided in config, it will be registered with sys.modules
93
+ so that it can be later imported by manifest declarations which reference the provided classes.
94
+
95
+ Returns `None` if no components is provided and the `components` module is not found.
96
+ """
97
+ if config and INJECTED_COMPONENTS_PY in config:
98
+ if not custom_code_execution_permitted():
99
+ raise AirbyteCustomCodeNotPermittedError
100
+
101
+ # Create a new module object and execute the provided Python code text within it
102
+ python_text: str = config[INJECTED_COMPONENTS_PY]
103
+ return register_components_module_from_string(
104
+ components_py_text=python_text,
105
+ checksums=config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None),
106
+ )
107
+
108
+ # Check for `components` or `source_declarative_manifest.components`.
109
+ if SDM_COMPONENTS_MODULE_NAME in sys.modules:
110
+ return cast(ModuleType, sys.modules.get(SDM_COMPONENTS_MODULE_NAME))
111
+
112
+ if COMPONENTS_MODULE_NAME in sys.modules:
113
+ return cast(ModuleType, sys.modules.get(COMPONENTS_MODULE_NAME))
114
+
115
+ # Could not find module 'components' in `sys.modules`
116
+ # and INJECTED_COMPONENTS_PY was not provided in config.
117
+ return None
118
+
119
+
120
+ def register_components_module_from_string(
121
+ components_py_text: str,
122
+ checksums: dict[str, Any] | None,
123
+ ) -> ModuleType:
124
+ """Load and return the components module from a provided string containing the python code."""
125
+ # First validate the code
126
+ validate_python_code(
127
+ code_text=components_py_text,
128
+ checksums=checksums,
129
+ )
130
+
131
+ # Create a new module object
132
+ components_module = ModuleType(name=COMPONENTS_MODULE_NAME)
133
+
134
+ # Execute the module text in the module's namespace
135
+ exec(components_py_text, components_module.__dict__)
136
+
137
+ # Register the module in `sys.modules`` so it can be imported as
138
+ # `source_declarative_manifest.components` and/or `components`.
139
+ sys.modules[SDM_COMPONENTS_MODULE_NAME] = components_module
140
+ sys.modules[COMPONENTS_MODULE_NAME] = components_module
141
+
142
+ # Now you can import and use the module
143
+ return components_module