airbyte-cdk 6.9.1__py3-none-any.whl → 6.9.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +4 -5
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +35 -36
- airbyte_cdk/sources/streams/http/http_client.py +5 -15
- {airbyte_cdk-6.9.1.dist-info → airbyte_cdk-6.9.1.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.9.1.dist-info → airbyte_cdk-6.9.1.dev0.dist-info}/RECORD +9 -9
- {airbyte_cdk-6.9.1.dist-info → airbyte_cdk-6.9.1.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.9.1.dist-info → airbyte_cdk-6.9.1.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.9.1.dist-info → airbyte_cdk-6.9.1.dev0.dist-info}/entry_points.txt +0 -0
@@ -56,9 +56,8 @@ from airbyte_cdk.sources.types import Config, StreamState
|
|
56
56
|
|
57
57
|
|
58
58
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
59
|
-
# By default, we defer to a value of
|
60
|
-
|
61
|
-
_LOWEST_SAFE_CONCURRENCY_LEVEL = 2
|
59
|
+
# By default, we defer to a value of 1 which represents running a connector using the Concurrent CDK engine on only one thread.
|
60
|
+
SINGLE_THREADED_CONCURRENCY_LEVEL = 1
|
62
61
|
|
63
62
|
def __init__(
|
64
63
|
self,
|
@@ -108,8 +107,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
108
107
|
concurrency_level // 2, 1
|
109
108
|
) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
|
110
109
|
else:
|
111
|
-
concurrency_level = self.
|
112
|
-
initial_number_of_partitions_to_generate = self.
|
110
|
+
concurrency_level = self.SINGLE_THREADED_CONCURRENCY_LEVEL
|
111
|
+
initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL
|
113
112
|
|
114
113
|
self._concurrent_source = ConcurrentSource.create(
|
115
114
|
num_workers=concurrency_level,
|
@@ -327,7 +327,7 @@ definitions:
|
|
327
327
|
additionalProperties: true
|
328
328
|
ConcurrencyLevel:
|
329
329
|
title: Concurrency Level
|
330
|
-
description: Defines the amount of parallelization for the streams that are being synced. The factor of parallelization is how many partitions or streams are synced at the same time. For example, with a concurrency_level of 10, ten streams or partitions of data will processed at the same time.
|
330
|
+
description: Defines the amount of parallelization for the streams that are being synced. The factor of parallelization is how many partitions or streams are synced at the same time. For example, with a concurrency_level of 10, ten streams or partitions of data will processed at the same time.
|
331
331
|
type: object
|
332
332
|
required:
|
333
333
|
- default_concurrency
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
import ast
|
6
6
|
from functools import cache
|
7
|
-
from typing import Any, Mapping, Optional,
|
7
|
+
from typing import Any, Mapping, Optional, Tuple, Type
|
8
8
|
|
9
9
|
from jinja2 import meta
|
10
10
|
from jinja2.environment import Template
|
@@ -27,35 +27,7 @@ class StreamPartitionAccessEnvironment(SandboxedEnvironment):
|
|
27
27
|
def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool:
|
28
28
|
if attr in ["_partition"]:
|
29
29
|
return True
|
30
|
-
return super().is_safe_attribute(obj, attr, value)
|
31
|
-
|
32
|
-
|
33
|
-
# These aliases are used to deprecate existing keywords without breaking all existing connectors.
|
34
|
-
_ALIASES = {
|
35
|
-
"stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values
|
36
|
-
"stream_partition": "stream_slice", # Use stream_partition to access partition router's values
|
37
|
-
}
|
38
|
-
|
39
|
-
# These extensions are not installed so they're not currently a problem,
|
40
|
-
# but we're still explicitly removing them from the jinja context.
|
41
|
-
# At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
|
42
|
-
_RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops
|
43
|
-
|
44
|
-
# By default, these Python builtin functions are available in the Jinja context.
|
45
|
-
# We explicitly remove them because of the potential security risk.
|
46
|
-
# Please add a unit test to test_jinja.py when adding a restriction.
|
47
|
-
_RESTRICTED_BUILTIN_FUNCTIONS = [
|
48
|
-
"range"
|
49
|
-
] # The range function can cause very expensive computations
|
50
|
-
|
51
|
-
_ENVIRONMENT = StreamPartitionAccessEnvironment()
|
52
|
-
_ENVIRONMENT.filters.update(**filters)
|
53
|
-
_ENVIRONMENT.globals.update(**macros)
|
54
|
-
|
55
|
-
for extension in _RESTRICTED_EXTENSIONS:
|
56
|
-
_ENVIRONMENT.extensions.pop(extension, None)
|
57
|
-
for builtin in _RESTRICTED_BUILTIN_FUNCTIONS:
|
58
|
-
_ENVIRONMENT.globals.pop(builtin, None)
|
30
|
+
return super().is_safe_attribute(obj, attr, value)
|
59
31
|
|
60
32
|
|
61
33
|
class JinjaInterpolation(Interpolation):
|
@@ -76,6 +48,34 @@ class JinjaInterpolation(Interpolation):
|
|
76
48
|
Additional information on jinja templating can be found at https://jinja.palletsprojects.com/en/3.1.x/templates/#
|
77
49
|
"""
|
78
50
|
|
51
|
+
# These aliases are used to deprecate existing keywords without breaking all existing connectors.
|
52
|
+
ALIASES = {
|
53
|
+
"stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values
|
54
|
+
"stream_partition": "stream_slice", # Use stream_partition to access partition router's values
|
55
|
+
}
|
56
|
+
|
57
|
+
# These extensions are not installed so they're not currently a problem,
|
58
|
+
# but we're still explicitely removing them from the jinja context.
|
59
|
+
# At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
|
60
|
+
RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops
|
61
|
+
|
62
|
+
# By default, these Python builtin functions are available in the Jinja context.
|
63
|
+
# We explicitely remove them because of the potential security risk.
|
64
|
+
# Please add a unit test to test_jinja.py when adding a restriction.
|
65
|
+
RESTRICTED_BUILTIN_FUNCTIONS = [
|
66
|
+
"range"
|
67
|
+
] # The range function can cause very expensive computations
|
68
|
+
|
69
|
+
def __init__(self) -> None:
|
70
|
+
self._environment = StreamPartitionAccessEnvironment()
|
71
|
+
self._environment.filters.update(**filters)
|
72
|
+
self._environment.globals.update(**macros)
|
73
|
+
|
74
|
+
for extension in self.RESTRICTED_EXTENSIONS:
|
75
|
+
self._environment.extensions.pop(extension, None)
|
76
|
+
for builtin in self.RESTRICTED_BUILTIN_FUNCTIONS:
|
77
|
+
self._environment.globals.pop(builtin, None)
|
78
|
+
|
79
79
|
def eval(
|
80
80
|
self,
|
81
81
|
input_str: str,
|
@@ -86,7 +86,7 @@ class JinjaInterpolation(Interpolation):
|
|
86
86
|
) -> Any:
|
87
87
|
context = {"config": config, **additional_parameters}
|
88
88
|
|
89
|
-
for alias, equivalent in
|
89
|
+
for alias, equivalent in self.ALIASES.items():
|
90
90
|
if alias in context:
|
91
91
|
# This is unexpected. We could ignore or log a warning, but failing loudly should result in fewer surprises
|
92
92
|
raise ValueError(
|
@@ -105,7 +105,6 @@ class JinjaInterpolation(Interpolation):
|
|
105
105
|
raise Exception(f"Expected a string, got {input_str}")
|
106
106
|
except UndefinedError:
|
107
107
|
pass
|
108
|
-
|
109
108
|
# If result is empty or resulted in an undefined error, evaluate and return the default string
|
110
109
|
return self._literal_eval(self._eval(default, context), valid_types)
|
111
110
|
|
@@ -133,16 +132,16 @@ class JinjaInterpolation(Interpolation):
|
|
133
132
|
return s
|
134
133
|
|
135
134
|
@cache
|
136
|
-
def _find_undeclared_variables(self, s: Optional[str]) ->
|
135
|
+
def _find_undeclared_variables(self, s: Optional[str]) -> set[str]:
|
137
136
|
"""
|
138
137
|
Find undeclared variables and cache them
|
139
138
|
"""
|
140
|
-
ast =
|
139
|
+
ast = self._environment.parse(s) # type: ignore # parse is able to handle None
|
141
140
|
return meta.find_undeclared_variables(ast)
|
142
141
|
|
143
142
|
@cache
|
144
|
-
def _compile(self, s: str) -> Template:
|
143
|
+
def _compile(self, s: Optional[str]) -> Template:
|
145
144
|
"""
|
146
145
|
We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader
|
147
146
|
"""
|
148
|
-
return
|
147
|
+
return self._environment.from_string(s) # type: ignore [arg-type] # Expected `str | Template` but passed `str | None`
|
@@ -138,22 +138,12 @@ class HttpClient:
|
|
138
138
|
cache_dir = os.getenv(ENV_REQUEST_CACHE_PATH)
|
139
139
|
# Use in-memory cache if cache_dir is not set
|
140
140
|
# This is a non-obvious interface, but it ensures we don't write sql files when running unit tests
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
if cache_dir
|
146
|
-
else "file::memory:?cache=shared"
|
147
|
-
)
|
148
|
-
# By using `PRAGMA synchronous=OFF` and `PRAGMA journal_mode=WAL`, we reduce the possible occurrences of `database table is locked` errors.
|
149
|
-
# Note that those were blindly added at the same time and one or the other might be sufficient to prevent the issues but we have seen good results with both. Feel free to revisit given more information.
|
150
|
-
# There are strong signals that `fast_save` might create problems but if the sync crashes, we start back from the beginning in terms of sqlite anyway so the impact should be minimal. Signals are:
|
151
|
-
# * https://github.com/requests-cache/requests-cache/commit/7fa89ffda300331c37d8fad7f773348a3b5b0236#diff-f43db4a5edf931647c32dec28ea7557aae4cae8444af4b26c8ecbe88d8c925aaR238
|
152
|
-
# * https://github.com/requests-cache/requests-cache/commit/7fa89ffda300331c37d8fad7f773348a3b5b0236#diff-2e7f95b7d7be270ff1a8118f817ea3e6663cdad273592e536a116c24e6d23c18R164-R168
|
153
|
-
# * `If the application running SQLite crashes, the data will be safe, but the database [might become corrupted](https://www.sqlite.org/howtocorrupt.html#cfgerr) if the operating system crashes or the computer loses power before that data has been written to the disk surface.` in [this description](https://www.sqlite.org/pragma.html#pragma_synchronous).
|
154
|
-
backend = requests_cache.SQLiteCache(sqlite_path, fast_save=True, wal=True)
|
141
|
+
if cache_dir:
|
142
|
+
sqlite_path = str(Path(cache_dir) / self.cache_filename)
|
143
|
+
else:
|
144
|
+
sqlite_path = "file::memory:?cache=shared"
|
155
145
|
return CachedLimiterSession(
|
156
|
-
sqlite_path, backend=
|
146
|
+
sqlite_path, backend="sqlite", api_budget=self._api_budget, match_headers=True
|
157
147
|
)
|
158
148
|
else:
|
159
149
|
return LimiterSession(api_budget=self._api_budget)
|
@@ -62,11 +62,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
|
|
62
62
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
63
63
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
64
64
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
65
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
65
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=9OXOFzG5PBL_MHEJg4ETE0TXfXeuhvv-at38AN3wWEM,23323
|
66
66
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
67
67
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
68
68
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
69
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
69
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=kZkGoasVzufDE2BkFo_7DVO6xHL9kueNBjddtl-7kaU,124134
|
70
70
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
71
71
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
|
72
72
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=hNlhaB5FjNC6IfJyglj5ZJWkYD2nEAukMDmzRz5PC6o,671
|
@@ -97,7 +97,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py,sha256=UrF
|
|
97
97
|
airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py,sha256=i2L0gREX8nHA-pKokdVqwBf4aJgWP71KOxIABj_DHcY,1857
|
98
98
|
airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZnZ_hB7rvBSZxG9s0RSrzsOkDWbBY0_P6qu5lEfc,3212
|
99
99
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
|
100
|
-
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=
|
100
|
+
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=gcihTEnfD_6sUivxOomoY5r7VMAGqVVnK_HEsid9Y5k,6605
|
101
101
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=jf24RK-1fBhTYDpcGEakZtGNNJfG5NS8CCF5bEgNmRo,3977
|
102
102
|
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=-v3GvuTVHwUonrfUwDj3wYKaZjX6hTyKmMBRgEzj-j0,15201
|
103
103
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -276,7 +276,7 @@ airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py,sha
|
|
276
276
|
airbyte_cdk/sources/streams/http/error_handlers/response_models.py,sha256=xGIVELBFY0TmH9aUq1ikoqJz8oHLr6di2JLvKWVEO-s,2236
|
277
277
|
airbyte_cdk/sources/streams/http/exceptions.py,sha256=njC7MlMJoFYcSGz4mIp6-bqLFTr6vC8ej25X0oSeyjE,1824
|
278
278
|
airbyte_cdk/sources/streams/http/http.py,sha256=JAMpiTdS9HFNOlwayWNvQdxoqs2rpW9wdYlhFHv_1Q4,28496
|
279
|
-
airbyte_cdk/sources/streams/http/http_client.py,sha256=
|
279
|
+
airbyte_cdk/sources/streams/http/http_client.py,sha256=Jqmbd3jL8jjnOfA1325-cpG3nE80YDMDwyxPZ08D7wo,21341
|
280
280
|
airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
|
281
281
|
airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
|
282
282
|
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=nxI94yJ3bGfpDO8RR3QvOJ-PSW0n9CElSAkgl5ae80Y,10321
|
@@ -333,8 +333,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
333
333
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
334
334
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
335
335
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
336
|
-
airbyte_cdk-6.9.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
337
|
-
airbyte_cdk-6.9.1.dist-info/METADATA,sha256=
|
338
|
-
airbyte_cdk-6.9.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
339
|
-
airbyte_cdk-6.9.1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
340
|
-
airbyte_cdk-6.9.1.dist-info/RECORD,,
|
336
|
+
airbyte_cdk-6.9.1.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
337
|
+
airbyte_cdk-6.9.1.dev0.dist-info/METADATA,sha256=pA5YAkFWlFKzRsXDIw20NDtNEH7Sk9lHgR5bMgR8DVM,5954
|
338
|
+
airbyte_cdk-6.9.1.dev0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
339
|
+
airbyte_cdk-6.9.1.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
340
|
+
airbyte_cdk-6.9.1.dev0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|