airbyte-cdk 6.9.1__py3-none-any.whl → 6.9.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,9 +56,8 @@ from airbyte_cdk.sources.types import Config, StreamState
56
56
 
57
57
 
58
58
  class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
59
- # By default, we defer to a value of 2. A value lower than than could cause a PartitionEnqueuer to be stuck in a state of deadlock
60
- # because it has hit the limit of futures but not partition reader is consuming them.
61
- _LOWEST_SAFE_CONCURRENCY_LEVEL = 2
59
+ # By default, we defer to a value of 1 which represents running a connector using the Concurrent CDK engine on only one thread.
60
+ SINGLE_THREADED_CONCURRENCY_LEVEL = 1
62
61
 
63
62
  def __init__(
64
63
  self,
@@ -108,8 +107,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
108
107
  concurrency_level // 2, 1
109
108
  ) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
110
109
  else:
111
- concurrency_level = self._LOWEST_SAFE_CONCURRENCY_LEVEL
112
- initial_number_of_partitions_to_generate = self._LOWEST_SAFE_CONCURRENCY_LEVEL // 2
110
+ concurrency_level = self.SINGLE_THREADED_CONCURRENCY_LEVEL
111
+ initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL
113
112
 
114
113
  self._concurrent_source = ConcurrentSource.create(
115
114
  num_workers=concurrency_level,
@@ -327,7 +327,7 @@ definitions:
327
327
  additionalProperties: true
328
328
  ConcurrencyLevel:
329
329
  title: Concurrency Level
330
- description: Defines the amount of parallelization for the streams that are being synced. The factor of parallelization is how many partitions or streams are synced at the same time. For example, with a concurrency_level of 10, ten streams or partitions of data will processed at the same time. Note that a value of 1 could create deadlock if a stream has a very high number of partitions.
330
+ description: Defines the amount of parallelization for the streams that are being synced. The factor of parallelization is how many partitions or streams are synced at the same time. For example, with a concurrency_level of 10, ten streams or partitions of data will processed at the same time.
331
331
  type: object
332
332
  required:
333
333
  - default_concurrency
@@ -4,7 +4,7 @@
4
4
 
5
5
  import ast
6
6
  from functools import cache
7
- from typing import Any, Mapping, Optional, Set, Tuple, Type
7
+ from typing import Any, Mapping, Optional, Tuple, Type
8
8
 
9
9
  from jinja2 import meta
10
10
  from jinja2.environment import Template
@@ -27,35 +27,7 @@ class StreamPartitionAccessEnvironment(SandboxedEnvironment):
27
27
  def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool:
28
28
  if attr in ["_partition"]:
29
29
  return True
30
- return super().is_safe_attribute(obj, attr, value) # type: ignore # for some reason, mypy says 'Returning Any from function declared to return "bool"'
31
-
32
-
33
- # These aliases are used to deprecate existing keywords without breaking all existing connectors.
34
- _ALIASES = {
35
- "stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values
36
- "stream_partition": "stream_slice", # Use stream_partition to access partition router's values
37
- }
38
-
39
- # These extensions are not installed so they're not currently a problem,
40
- # but we're still explicitly removing them from the jinja context.
41
- # At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
42
- _RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops
43
-
44
- # By default, these Python builtin functions are available in the Jinja context.
45
- # We explicitly remove them because of the potential security risk.
46
- # Please add a unit test to test_jinja.py when adding a restriction.
47
- _RESTRICTED_BUILTIN_FUNCTIONS = [
48
- "range"
49
- ] # The range function can cause very expensive computations
50
-
51
- _ENVIRONMENT = StreamPartitionAccessEnvironment()
52
- _ENVIRONMENT.filters.update(**filters)
53
- _ENVIRONMENT.globals.update(**macros)
54
-
55
- for extension in _RESTRICTED_EXTENSIONS:
56
- _ENVIRONMENT.extensions.pop(extension, None)
57
- for builtin in _RESTRICTED_BUILTIN_FUNCTIONS:
58
- _ENVIRONMENT.globals.pop(builtin, None)
30
+ return super().is_safe_attribute(obj, attr, value)
59
31
 
60
32
 
61
33
  class JinjaInterpolation(Interpolation):
@@ -76,6 +48,34 @@ class JinjaInterpolation(Interpolation):
76
48
  Additional information on jinja templating can be found at https://jinja.palletsprojects.com/en/3.1.x/templates/#
77
49
  """
78
50
 
51
+ # These aliases are used to deprecate existing keywords without breaking all existing connectors.
52
+ ALIASES = {
53
+ "stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values
54
+ "stream_partition": "stream_slice", # Use stream_partition to access partition router's values
55
+ }
56
+
57
+ # These extensions are not installed so they're not currently a problem,
58
+ # but we're still explicitely removing them from the jinja context.
59
+ # At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
60
+ RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops
61
+
62
+ # By default, these Python builtin functions are available in the Jinja context.
63
+ # We explicitely remove them because of the potential security risk.
64
+ # Please add a unit test to test_jinja.py when adding a restriction.
65
+ RESTRICTED_BUILTIN_FUNCTIONS = [
66
+ "range"
67
+ ] # The range function can cause very expensive computations
68
+
69
+ def __init__(self) -> None:
70
+ self._environment = StreamPartitionAccessEnvironment()
71
+ self._environment.filters.update(**filters)
72
+ self._environment.globals.update(**macros)
73
+
74
+ for extension in self.RESTRICTED_EXTENSIONS:
75
+ self._environment.extensions.pop(extension, None)
76
+ for builtin in self.RESTRICTED_BUILTIN_FUNCTIONS:
77
+ self._environment.globals.pop(builtin, None)
78
+
79
79
  def eval(
80
80
  self,
81
81
  input_str: str,
@@ -86,7 +86,7 @@ class JinjaInterpolation(Interpolation):
86
86
  ) -> Any:
87
87
  context = {"config": config, **additional_parameters}
88
88
 
89
- for alias, equivalent in _ALIASES.items():
89
+ for alias, equivalent in self.ALIASES.items():
90
90
  if alias in context:
91
91
  # This is unexpected. We could ignore or log a warning, but failing loudly should result in fewer surprises
92
92
  raise ValueError(
@@ -105,7 +105,6 @@ class JinjaInterpolation(Interpolation):
105
105
  raise Exception(f"Expected a string, got {input_str}")
106
106
  except UndefinedError:
107
107
  pass
108
-
109
108
  # If result is empty or resulted in an undefined error, evaluate and return the default string
110
109
  return self._literal_eval(self._eval(default, context), valid_types)
111
110
 
@@ -133,16 +132,16 @@ class JinjaInterpolation(Interpolation):
133
132
  return s
134
133
 
135
134
  @cache
136
- def _find_undeclared_variables(self, s: Optional[str]) -> Set[str]:
135
+ def _find_undeclared_variables(self, s: Optional[str]) -> set[str]:
137
136
  """
138
137
  Find undeclared variables and cache them
139
138
  """
140
- ast = _ENVIRONMENT.parse(s) # type: ignore # parse is able to handle None
139
+ ast = self._environment.parse(s) # type: ignore # parse is able to handle None
141
140
  return meta.find_undeclared_variables(ast)
142
141
 
143
142
  @cache
144
- def _compile(self, s: str) -> Template:
143
+ def _compile(self, s: Optional[str]) -> Template:
145
144
  """
146
145
  We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader
147
146
  """
148
- return _ENVIRONMENT.from_string(s)
147
+ return self._environment.from_string(s) # type: ignore [arg-type] # Expected `str | Template` but passed `str | None`
@@ -138,22 +138,12 @@ class HttpClient:
138
138
  cache_dir = os.getenv(ENV_REQUEST_CACHE_PATH)
139
139
  # Use in-memory cache if cache_dir is not set
140
140
  # This is a non-obvious interface, but it ensures we don't write sql files when running unit tests
141
- # Use in-memory cache if cache_dir is not set
142
- # This is a non-obvious interface, but it ensures we don't write sql files when running unit tests
143
- sqlite_path = (
144
- str(Path(cache_dir) / self.cache_filename)
145
- if cache_dir
146
- else "file::memory:?cache=shared"
147
- )
148
- # By using `PRAGMA synchronous=OFF` and `PRAGMA journal_mode=WAL`, we reduce the possible occurrences of `database table is locked` errors.
149
- # Note that those were blindly added at the same time and one or the other might be sufficient to prevent the issues but we have seen good results with both. Feel free to revisit given more information.
150
- # There are strong signals that `fast_save` might create problems but if the sync crashes, we start back from the beginning in terms of sqlite anyway so the impact should be minimal. Signals are:
151
- # * https://github.com/requests-cache/requests-cache/commit/7fa89ffda300331c37d8fad7f773348a3b5b0236#diff-f43db4a5edf931647c32dec28ea7557aae4cae8444af4b26c8ecbe88d8c925aaR238
152
- # * https://github.com/requests-cache/requests-cache/commit/7fa89ffda300331c37d8fad7f773348a3b5b0236#diff-2e7f95b7d7be270ff1a8118f817ea3e6663cdad273592e536a116c24e6d23c18R164-R168
153
- # * `If the application running SQLite crashes, the data will be safe, but the database [might become corrupted](https://www.sqlite.org/howtocorrupt.html#cfgerr) if the operating system crashes or the computer loses power before that data has been written to the disk surface.` in [this description](https://www.sqlite.org/pragma.html#pragma_synchronous).
154
- backend = requests_cache.SQLiteCache(sqlite_path, fast_save=True, wal=True)
141
+ if cache_dir:
142
+ sqlite_path = str(Path(cache_dir) / self.cache_filename)
143
+ else:
144
+ sqlite_path = "file::memory:?cache=shared"
155
145
  return CachedLimiterSession(
156
- sqlite_path, backend=backend, api_budget=self._api_budget, match_headers=True
146
+ sqlite_path, backend="sqlite", api_budget=self._api_budget, match_headers=True
157
147
  )
158
148
  else:
159
149
  return LimiterSession(api_budget=self._api_budget)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.9.1
3
+ Version: 6.9.1.dev0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -62,11 +62,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
62
62
  airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
63
63
  airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
64
64
  airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
65
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=PxP4p2686wsf1gjsumGKnh2o2Jjnrqg8QLGijEIrp-A,23412
65
+ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=9OXOFzG5PBL_MHEJg4ETE0TXfXeuhvv-at38AN3wWEM,23323
66
66
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
67
67
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
68
68
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
69
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=Z1v19wOXYpuffvcmZ5TZyU4kSCFyt3Hba7qfY-2o46U,124229
69
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=kZkGoasVzufDE2BkFo_7DVO6xHL9kueNBjddtl-7kaU,124134
70
70
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
71
71
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
72
72
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=hNlhaB5FjNC6IfJyglj5ZJWkYD2nEAukMDmzRz5PC6o,671
@@ -97,7 +97,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py,sha256=UrF
97
97
  airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py,sha256=i2L0gREX8nHA-pKokdVqwBf4aJgWP71KOxIABj_DHcY,1857
98
98
  airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZnZ_hB7rvBSZxG9s0RSrzsOkDWbBY0_P6qu5lEfc,3212
99
99
  airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
100
- airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=BtsY_jtT4MihFqeQgc05HXj3Ndt-e2ESQgGwbg3Sdxc,6430
100
+ airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=gcihTEnfD_6sUivxOomoY5r7VMAGqVVnK_HEsid9Y5k,6605
101
101
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=jf24RK-1fBhTYDpcGEakZtGNNJfG5NS8CCF5bEgNmRo,3977
102
102
  airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=-v3GvuTVHwUonrfUwDj3wYKaZjX6hTyKmMBRgEzj-j0,15201
103
103
  airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -276,7 +276,7 @@ airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py,sha
276
276
  airbyte_cdk/sources/streams/http/error_handlers/response_models.py,sha256=xGIVELBFY0TmH9aUq1ikoqJz8oHLr6di2JLvKWVEO-s,2236
277
277
  airbyte_cdk/sources/streams/http/exceptions.py,sha256=njC7MlMJoFYcSGz4mIp6-bqLFTr6vC8ej25X0oSeyjE,1824
278
278
  airbyte_cdk/sources/streams/http/http.py,sha256=JAMpiTdS9HFNOlwayWNvQdxoqs2rpW9wdYlhFHv_1Q4,28496
279
- airbyte_cdk/sources/streams/http/http_client.py,sha256=dyNrbcahEnDfGTrhqkr1XdfPiuVVRlKOdj-TJ5WRVrk,22923
279
+ airbyte_cdk/sources/streams/http/http_client.py,sha256=Jqmbd3jL8jjnOfA1325-cpG3nE80YDMDwyxPZ08D7wo,21341
280
280
  airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
281
281
  airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
282
282
  airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=nxI94yJ3bGfpDO8RR3QvOJ-PSW0n9CElSAkgl5ae80Y,10321
@@ -333,8 +333,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
333
333
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
334
334
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
335
335
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
336
- airbyte_cdk-6.9.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
337
- airbyte_cdk-6.9.1.dist-info/METADATA,sha256=vmQrD-o8vQwVRNF1PSFviNR1x8VcdqsvNr42p8_8u18,5949
338
- airbyte_cdk-6.9.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
339
- airbyte_cdk-6.9.1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
340
- airbyte_cdk-6.9.1.dist-info/RECORD,,
336
+ airbyte_cdk-6.9.1.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
337
+ airbyte_cdk-6.9.1.dev0.dist-info/METADATA,sha256=pA5YAkFWlFKzRsXDIw20NDtNEH7Sk9lHgR5bMgR8DVM,5954
338
+ airbyte_cdk-6.9.1.dev0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
339
+ airbyte_cdk-6.9.1.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
340
+ airbyte_cdk-6.9.1.dev0.dist-info/RECORD,,