PyPI - airbyte-cdk - Versions diffs - 6.8.2.dev1__py3-none-any.whl → 6.8.3rc1__py3-none-any.whl - Mend

airbyte-cdk 6.8.2.dev1py3-none-any.whl → 6.8.3rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of airbyte-cdk might be problematic. Click here for more details.

Files changed (16) hide show

airbyte_cdk/sources/declarative/concurrent_declarative_source.py CHANGED Viewed

@@ -20,9 +20,6 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
     ClientSideIncrementalRecordFilterDecorator,
 )
 from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
-from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
-    PerPartitionWithGlobalCursor,
-)
 from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
 from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -309,59 +306,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
                             cursor=final_state_cursor,
                         )
                     )
-                elif (
-                    incremental_sync_component_definition
-                    and incremental_sync_component_definition.get("type", "")
-                    == DatetimeBasedCursorModel.__name__
-                    and self._stream_supports_concurrent_partition_processing(
-                        declarative_stream=declarative_stream
-                    )
-                    and hasattr(declarative_stream.retriever, "stream_slicer")
-                    and isinstance(declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor)
-                ):
-                    stream_state = state_manager.get_stream_state(
-                        stream_name=declarative_stream.name, namespace=declarative_stream.namespace
-                    )
-                    partition_router = declarative_stream.retriever.stream_slicer._partition_router
-                    cursor = self._constructor.create_concurrent_cursor_from_perpartition_cursor(
-                            state_manager=state_manager,
-                            model_type=DatetimeBasedCursorModel,
-                            component_definition=incremental_sync_component_definition,
-                            stream_name=declarative_stream.name,
-                            stream_namespace=declarative_stream.namespace,
-                            config=config or {},
-                            stream_state=stream_state,
-                            partition_router=partition_router,
-                        )
-                    partition_generator = StreamSlicerPartitionGenerator(
-                        DeclarativePartitionFactory(
-                            declarative_stream.name,
-                            declarative_stream.get_json_schema(),
-                            self._retriever_factory(
-                                name_to_stream_mapping[declarative_stream.name],
-                                config,
-                                stream_state,
-                            ),
-                            self.message_repository,
-                        ),
-                        cursor,
-                    )
-                    concurrent_streams.append(
-                        DefaultStream(
-                            partition_generator=partition_generator,
-                            name=declarative_stream.name,
-                            json_schema=declarative_stream.get_json_schema(),
-                            availability_strategy=AlwaysAvailableAvailabilityStrategy(),
-                            primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
-                            cursor_field=cursor.cursor_field.cursor_field_key,
-                            logger=self.logger,
-                            cursor=cursor,
-                        )
-                    )
                 else:
                     synchronous_streams.append(declarative_stream)
             else:

airbyte_cdk/sources/declarative/extractors/record_filter.py CHANGED Viewed

@@ -59,11 +59,13 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
     def __init__(
         self,
-        cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
+        date_time_based_cursor: DatetimeBasedCursor,
+        substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
         **kwargs: Any,
     ):
         super().__init__(**kwargs)
-        self._cursor = cursor
+        self._date_time_based_cursor = date_time_based_cursor
+        self._substream_cursor = substream_cursor
     def filter_records(
         self,
@@ -75,7 +77,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
         records = (
             record
             for record in records
-            if self._cursor.should_be_synced(
+            if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
                 # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
                 # Record stream name is empty cause it is not used durig the filtering
                 Record(data=record, associated_slice=stream_slice, stream_name="")

airbyte_cdk/sources/declarative/incremental/__init__.py CHANGED Viewed

@@ -2,7 +2,6 @@
 # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
 #
-from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import ConcurrentCursorFactory, ConcurrentPerPartitionCursor
 from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
 from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
 from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import GlobalSubstreamCursor
@@ -15,8 +14,6 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
 __all__ = [
     "CursorFactory",
-    "ConcurrentCursorFactory"
-    "ConcurrentPerPartitionCursor",
     "DatetimeBasedCursor",
     "DeclarativeCursor",
     "GlobalSubstreamCursor",

airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py CHANGED Viewed

@@ -303,15 +303,6 @@ class PerPartitionCursor(DeclarativeCursor):
             raise ValueError("A partition needs to be provided in order to get request body json")
     def should_be_synced(self, record: Record) -> bool:
-        if self._to_partition_key(record.associated_slice.partition) not in self._cursor_per_partition:
-            partition_state = (
-                self._state_to_migrate_from
-                if self._state_to_migrate_from
-                else self._NO_CURSOR_STATE
-            )
-            cursor = self._create_cursor(partition_state)
-            self._cursor_per_partition[self._to_partition_key(record.associated_slice.partition)] = cursor
         return self._get_cursor(record).should_be_synced(
             self._convert_record_to_cursor_record(record)
         )

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py CHANGED Viewed

@@ -81,8 +81,6 @@ from airbyte_cdk.sources.declarative.extractors.record_selector import (
 )
 from airbyte_cdk.sources.declarative.incremental import (
     ChildPartitionResumableFullRefreshCursor,
-    ConcurrentCursorFactory,
-    ConcurrentPerPartitionCursor,
     CursorFactory,
     DatetimeBasedCursor,
     DeclarativeCursor,
@@ -907,62 +905,6 @@ class ModelToComponentFactory:
             cursor_granularity=cursor_granularity,
         )
-    def create_concurrent_cursor_from_perpartition_cursor(
-        self,
-        state_manager: ConnectorStateManager,
-        model_type: Type[BaseModel],
-        component_definition: ComponentDefinition,
-        stream_name: str,
-        stream_namespace: Optional[str],
-        config: Config,
-        stream_state: MutableMapping[str, Any],
-        partition_router,
-            **kwargs: Any,
-    ) -> ConcurrentPerPartitionCursor:
-        component_type = component_definition.get("type")
-        if component_definition.get("type") != model_type.__name__:
-            raise ValueError(
-                f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
-            )
-        datetime_based_cursor_model = model_type.parse_obj(component_definition)
-        if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
-            raise ValueError(
-                f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
-            )
-        interpolated_cursor_field = InterpolatedString.create(
-            datetime_based_cursor_model.cursor_field,
-            parameters=datetime_based_cursor_model.parameters or {},
-        )
-        cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
-        # Create the cursor factory
-        cursor_factory = ConcurrentCursorFactory(
-            partial(
-                self.create_concurrent_cursor_from_datetime_based_cursor,
-                state_manager=state_manager,
-                model_type=model_type,
-                component_definition=component_definition,
-                stream_name=stream_name,
-                stream_namespace=stream_namespace,
-                config=config,
-            )
-        )
-        # Return the concurrent cursor and state converter
-        return ConcurrentPerPartitionCursor(
-                cursor_factory=cursor_factory,
-                partition_router=partition_router,
-                stream_name=stream_name,
-                stream_namespace=stream_namespace,
-                stream_state=stream_state,
-                message_repository=self._message_repository,  # type: ignore
-                connector_state_manager=state_manager,
-                cursor_field=cursor_field,
-            )
     @staticmethod
     def create_constant_backoff_strategy(
         model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
@@ -1245,14 +1187,17 @@ class ModelToComponentFactory:
                 raise ValueError(
                     "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
                 )
-            cursor = combined_slicers if isinstance(
-                combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
-            ) else self._create_component_from_model(
-                model=model.incremental_sync, config=config
-            )
             client_side_incremental_sync = {
-                "cursor": cursor
+                "date_time_based_cursor": self._create_component_from_model(
+                    model=model.incremental_sync, config=config
+                ),
+                "substream_cursor": (
+                    combined_slicers
+                    if isinstance(
+                        combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
+                    )
+                    else None
+                ),
             }
         if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
@@ -1966,7 +1911,7 @@ class ModelToComponentFactory:
         if (
             not isinstance(stream_slicer, DatetimeBasedCursor)
             or type(stream_slicer) is not DatetimeBasedCursor
-        ) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
+        ):
             # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
             # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
             # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's

airbyte_cdk/sources/declarative/retrievers/simple_retriever.py CHANGED Viewed

@@ -178,7 +178,7 @@ class SimpleRetriever(Retriever):
             stream_slice,
             next_page_token,
             self._paginator.get_request_headers,
-            self.request_option_provider.get_request_headers,
+            self.stream_slicer.get_request_headers,
         )
         if isinstance(headers, str):
             raise ValueError("Request headers cannot be a string")

airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py CHANGED Viewed

@@ -38,6 +38,7 @@ class DeclarativePartitionFactory:
             stream_slice,
         )
 class DeclarativePartition(Partition):
     def __init__(
         self,

airbyte_cdk/sources/streams/concurrent/cursor.py CHANGED Viewed

@@ -240,15 +240,6 @@ class ConcurrentCursor(Cursor):
     def _extract_cursor_value(self, record: Record) -> Any:
         return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
-    def close_partition_without_emit(self, partition: Partition) -> None:
-        slice_count_before = len(self.state.get("slices", []))
-        self._add_slice_to_state(partition)
-        if slice_count_before < len(
-            self.state["slices"]
-        ):  # only emit if at least one slice has been processed
-            self._merge_partitions()
-        self._has_closed_at_least_one_slice = True
     def close_partition(self, partition: Partition) -> None:
         slice_count_before = len(self.state.get("slices", []))
         self._add_slice_to_state(partition)

airbyte_cdk/test/utils/manifest_only_fixtures.py ADDED Viewed

@@ -0,0 +1,55 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+import importlib.util
+from pathlib import Path
+from types import ModuleType
+from typing import Optional
+import pytest
+# The following fixtures are used to load a manifest-only connector's components module and manifest file.
+# They can be accessed from any test file in the connector's unit_tests directory by importing them as follows:
+# from airbyte_cdk.test.utils.manifest_only_fixtures import components_module, connector_dir, manifest_path
+# individual components can then be referenced as: components_module.<CustomComponentClass>
+@pytest.fixture(scope="session")
+def connector_dir(request: pytest.FixtureRequest) -> Path:
+    """Return the connector's root directory.
+    This assumes tests are being run from the unit_tests directory,
+    and that it is a direct child of the connector directory.
+    """
+    test_dir = Path(request.config.invocation_params.dir)
+    return test_dir.parent
+@pytest.fixture(scope="session")
+def components_module(connector_dir: Path) -> Optional[ModuleType]:
+    """Load and return the components module from the connector directory.
+    This assumes the components module is located at <connector_dir>/components.py.
+    """
+    components_path = connector_dir / "components.py"
+    if not components_path.exists():
+        return None
+    components_spec = importlib.util.spec_from_file_location("components", components_path)
+    if components_spec is None:
+        return None
+    components_module = importlib.util.module_from_spec(components_spec)
+    if components_spec.loader is None:
+        return None
+    components_spec.loader.exec_module(components_module)
+    return components_module
+@pytest.fixture(scope="session")
+def manifest_path(connector_dir: Path) -> Path:
+    """Return the path to the connector's manifest file."""
+    return connector_dir / "manifest.yaml"

airbyte_cdk-6.8.3rc1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,306 @@
+Metadata-Version: 2.1
+Name: airbyte-cdk
+Version: 6.8.3rc1
+Summary: A framework for writing Airbyte Connectors.
+Home-page: https://airbyte.com
+License: MIT
+Keywords: airbyte,connector-development-kit,cdk
+Author: Airbyte
+Author-email: contact@airbyte.io
+Requires-Python: >=3.10,<3.13
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Provides-Extra: file-based
+Provides-Extra: sphinx-docs
+Provides-Extra: sql
+Provides-Extra: vector-db-based
+Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
+Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
+Requires-Dist: Sphinx (>=4.2,<4.3) ; extra == "sphinx-docs"
+Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
+Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
+Requires-Dist: backoff
+Requires-Dist: cachetools
+Requires-Dist: cohere (==4.21) ; extra == "vector-db-based"
+Requires-Dist: cryptography (>=42.0.5,<44.0.0)
+Requires-Dist: dpath (>=2.1.6,<3.0.0)
+Requires-Dist: dunamai (>=1.22.0,<2.0.0)
+Requires-Dist: fastavro (>=1.8.0,<1.9.0) ; extra == "file-based"
+Requires-Dist: genson (==1.3.0)
+Requires-Dist: isodate (>=0.6.1,<0.7.0)
+Requires-Dist: jsonref (>=0.2,<0.3)
+Requires-Dist: jsonschema (>=4.17.3,<4.18.0)
+Requires-Dist: langchain (==0.1.16) ; extra == "vector-db-based"
+Requires-Dist: langchain_core (==0.1.42)
+Requires-Dist: markdown ; extra == "file-based"
+Requires-Dist: nltk (==3.9.1)
+Requires-Dist: numpy (<2)
+Requires-Dist: openai[embeddings] (==0.27.9) ; extra == "vector-db-based"
+Requires-Dist: orjson (>=3.10.7,<4.0.0)
+Requires-Dist: pandas (==2.2.2)
+Requires-Dist: pdf2image (==1.16.3) ; extra == "file-based"
+Requires-Dist: pdfminer.six (==20221105) ; extra == "file-based"
+Requires-Dist: pendulum (<3.0.0)
+Requires-Dist: psutil (==6.1.0)
+Requires-Dist: pyarrow (>=15.0.0,<15.1.0) ; extra == "file-based"
+Requires-Dist: pydantic (>=2.7,<3.0)
+Requires-Dist: pyjwt (>=2.8.0,<3.0.0)
+Requires-Dist: pyrate-limiter (>=3.1.0,<3.2.0)
+Requires-Dist: pytesseract (==0.3.10) ; extra == "file-based"
+Requires-Dist: python-calamine (==0.2.3) ; extra == "file-based"
+Requires-Dist: python-dateutil
+Requires-Dist: python-snappy (==0.7.3) ; extra == "file-based"
+Requires-Dist: python-ulid (>=3.0.0,<4.0.0)
+Requires-Dist: pytz (==2024.1)
+Requires-Dist: rapidfuzz (>=3.10.1,<4.0.0)
+Requires-Dist: requests
+Requires-Dist: requests_cache
+Requires-Dist: serpyco-rs (>=1.10.2,<2.0.0)
+Requires-Dist: sphinx-rtd-theme (>=1.0,<1.1) ; extra == "sphinx-docs"
+Requires-Dist: sqlalchemy (>=2.0,<3.0,!=2.0.36) ; extra == "sql"
+Requires-Dist: tiktoken (==0.8.0) ; extra == "vector-db-based"
+Requires-Dist: unstructured.pytesseract (>=0.3.12) ; extra == "file-based"
+Requires-Dist: unstructured[docx,pptx] (==0.10.27) ; extra == "file-based"
+Requires-Dist: wcmatch (==10.0)
+Requires-Dist: xmltodict (>=0.13.0,<0.14.0)
+Project-URL: Documentation, https://docs.airbyte.io/
+Project-URL: Repository, https://github.com/airbytehq/airbyte-python-cdk
+Description-Content-Type: text/markdown
+# Airbyte Python CDK and Low-Code CDK
+Airbyte Python CDK is a framework for building Airbyte API Source Connectors. It provides a set of
+classes and helpers that make it easy to build a connector against an HTTP API (REST, GraphQL, etc),
+or a generic Python source connector.
+## Usage
+If you're looking to build a connector, we highly recommend that you
+[start with the Connector Builder](https://docs.airbyte.com/connector-development/connector-builder-ui/overview).
+It should be enough for 90% connectors out there. For more flexible and complex connectors, use the
+[low-code CDK and `SourceDeclarativeManifest`](https://docs.airbyte.com/connector-development/config-based/low-code-cdk-overview).
+If that doesn't work, then consider building on top of the
+[lower-level Python CDK itself](https://docs.airbyte.com/connector-development/cdk-python/).
+### Quick Start
+To get started on a Python CDK based connector or a low-code connector, you can generate a connector
+project from a template:
+```bash
+# from the repo root
+cd airbyte-integrations/connector-templates/generator
+./generate.sh
+```
+### Example Connectors
+**HTTP Connectors**:
+- [Stripe](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-stripe/)
+- [Salesforce](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-salesforce/)
+**Python connectors using the bare-bones `Source` abstraction**:
+- [Google Sheets](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py)
+This will generate a project with a type and a name of your choice and put it in
+`airbyte-integrations/connectors`. Open the directory with your connector in an editor and follow
+the `TODO` items.
+## Python CDK Overview
+Airbyte CDK code is within `airbyte_cdk` directory. Here's a high level overview of what's inside:
+- `connector_builder`. Internal wrapper that helps the Connector Builder platform run a declarative
+  manifest (low-code connector). You should not use this code directly. If you need to run a
+  `SourceDeclarativeManifest`, take a look at
+  [`source-declarative-manifest`](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-declarative-manifest)
+  connector implementation instead.
+- `destinations`. Basic Destination connector support! If you're building a Destination connector in
+  Python, try that. Some of our vector DB destinations like `destination-pinecone` are using that
+  code.
+- `models` expose `airbyte_protocol.models` as a part of `airbyte_cdk` package.
+- `sources/concurrent_source` is the Concurrent CDK implementation. It supports reading data from
+  streams concurrently per slice / partition, useful for connectors with high throughput and high
+  number of records.
+- `sources/declarative` is the low-code CDK. It works on top of Airbyte Python CDK, but provides a
+  declarative manifest language to define streams, operations, etc. This makes it easier to build
+  connectors without writing Python code.
+- `sources/file_based` is the CDK for file-based sources. Examples include S3, Azure, GCS, etc.
+## Contributing
+Thank you for being interested in contributing to Airbyte Python CDK! Here are some guidelines to
+get you started:
+- We adhere to the [code of conduct](/CODE_OF_CONDUCT.md).
+- You can contribute by reporting bugs, posting github discussions, opening issues, improving
+  [documentation](/docs/), and submitting pull requests with bugfixes and new features alike.
+- If you're changing the code, please add unit tests for your change.
+- When submitting issues or PRs, please add a small reproduction project. Using the changes in your
+  connector and providing that connector code as an example (or a satellite PR) helps!
+### First time setup
+Install the project dependencies and development tools:
+```bash
+poetry install --all-extras
+```
+Installing all extras is required to run the full suite of unit tests.
+#### Running tests locally
+- Iterate on the CDK code locally
+- Run tests via `poetry run poe unit-test-with-cov`, or `python -m pytest -s unit_tests` if you want
+  to pass pytest options.
+- Run `poetry run poe check-local` to lint all code, type-check modified code, and run unit tests
+  with coverage in one command.
+To see all available scripts, run `poetry run poe`.
+#### Formatting the code
+- Iterate on the CDK code locally
+- Run `poetry run ruff format` to format your changes.
+To see all available `ruff` options, run `poetry run ruff`.
+##### Autogenerated files
+Low-code CDK models are generated from `sources/declarative/declarative_component_schema.yaml`. If
+the iteration you are working on includes changes to the models or the connector generator, you
+might want to regenerate them. In order to do that, you can run:
+```bash
+poetry run poe build
+```
+This will generate the code generator docker image and the component manifest files based on the
+schemas and templates.
+#### Testing
+All tests are located in the `unit_tests` directory. Run `poetry run poe unit-test-with-cov` to run
+them. This also presents a test coverage report. For faster iteration with no coverage report and
+more options, `python -m pytest -s unit_tests` is a good place to start.
+#### Building and testing a connector with your local CDK
+When developing a new feature in the CDK, you may find it helpful to run a connector that uses that
+new feature. You can test this in one of two ways:
+- Running a connector locally
+- Building and running a source via Docker
+##### Installing your local CDK into a local Python connector
+Open the connector's `pyproject.toml` file and replace the line with `airbyte_cdk` with the
+following:
+```toml
+airbyte_cdk = { path = "../../../airbyte-cdk/python/airbyte_cdk", develop = true }
+```
+Then, running `poetry update` should reinstall `airbyte_cdk` from your local working directory.
+##### Building a Python connector in Docker with your local CDK installed
+_Pre-requisite: Install the
+[`airbyte-ci` CLI](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md)_
+You can build your connector image with the local CDK using
+```bash
+# from the airbytehq/airbyte base directory
+airbyte-ci connectors --use-local-cdk --name=<CONNECTOR> build
+```
+Note that the local CDK is injected at build time, so if you make changes, you will have to run the
+build command again to see them reflected.
+##### Running Connector Acceptance Tests for a single connector in Docker with your local CDK installed
+_Pre-requisite: Install the
+[`airbyte-ci` CLI](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md)_
+To run acceptance tests for a single connectors using the local CDK, from the connector directory,
+run
+```bash
+airbyte-ci connectors --use-local-cdk --name=<CONNECTOR> test
+```
+#### When you don't have access to the API
+There may be a time when you do not have access to the API (either because you don't have the
+credentials, network access, etc...) You will probably still want to do end-to-end testing at least
+once. In order to do so, you can emulate the server you would be reaching using a server stubbing
+tool.
+For example, using [mockserver](https://www.mock-server.com/), you can set up an expectation file
+like this:
+```json
+{
+  "httpRequest": {
+    "method": "GET",
+    "path": "/data"
+  },
+  "httpResponse": {
+    "body": "{\"data\": [{\"record_key\": 1}, {\"record_key\": 2}]}"
+  }
+}
+```
+Assuming this file has been created at `secrets/mock_server_config/expectations.json`, running the
+following command will allow to match any requests on path `/data` to return the response defined in
+the expectation file:
+```bash
+docker run -d --rm -v $(pwd)/secrets/mock_server_config:/config -p 8113:8113 --env MOCKSERVER_LOG_LEVEL=TRACE --env MOCKSERVER_SERVER_PORT=8113 --env MOCKSERVER_WATCH_INITIALIZATION_JSON=true --env MOCKSERVER_PERSISTED_EXPECTATIONS_PATH=/config/expectations.json --env MOCKSERVER_INITIALIZATION_JSON_PATH=/config/expectations.json mockserver/mockserver:5.15.0
+```
+HTTP requests to `localhost:8113/data` should now return the body defined in the expectations file.
+To test this, the implementer either has to change the code which defines the base URL for Python
+source or update the `url_base` from low-code. With the Connector Builder running in docker, you
+will have to use domain `host.docker.internal` instead of `localhost` as the requests are executed
+within docker.
+#### Publishing a new version to PyPi
+Python CDK has a
+[GitHub workflow](https://github.com/airbytehq/airbyte/actions/workflows/publish-cdk-command-manually.yml)
+that manages the CDK changelog, making a new release for `airbyte_cdk`, publishing it to PyPI, and
+then making a commit to update (and subsequently auto-release)
+[`source-declarative-manifest`](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-declarative-manifest)
+and Connector Builder (in the platform repository).
+> [!Note]: The workflow will handle the `CHANGELOG.md` entry for you. You should not add changelog
+> lines in your PRs to the CDK itself.
+> [!Warning]: The workflow bumps version on it's own, please don't change the CDK version in
+> `pyproject.toml` manually.
+1. You only trigger the release workflow once all the PRs that you want to be included are already
+   merged into the `master` branch.
+2. The
+   [`Publish CDK Manually`](https://github.com/airbytehq/airbyte/actions/workflows/publish-cdk-command-manually.yml)
+   workflow from master using `release-type=major|manor|patch` and setting the changelog message.
+3. When the workflow runs, it will commit a new version directly to master branch.
+4. The workflow will bump the version of `source-declarative-manifest` according to the
+   `release-type` of the CDK, then commit these changes back to master. The commit to master will
+   kick off a publish of the new version of `source-declarative-manifest`.
+5. The workflow will also add a pull request to `airbyte-platform-internal` repo to bump the
+   dependency in Connector Builder.

{airbyte_cdk-6.8.2.dev1.dist-info → airbyte_cdk-6.8.3rc1.dist-info}/RECORD RENAMED Viewed

@@ -62,7 +62,7 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
 airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
 airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
 airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
-airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=PWqtQ6xzRZiM0XrMO_zCJjl9tvbFMJMwSec0nN2ZekA,26434
+airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=F2X2ZS9eDfrohNbxG2TgPW-f4YP8IAkMjO1XHtD6NIg,23464
 airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
 airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
 airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
@@ -80,15 +80,14 @@ airbyte_cdk/sources/declarative/extractors/__init__.py,sha256=YFuL4D4RuuB8E1DNSb
 airbyte_cdk/sources/declarative/extractors/dpath_extractor.py,sha256=wR4Ol4MG2lt5UlqXF5EU_k7qa5cN4_-luu3PJ1PlO3A,3131
 airbyte_cdk/sources/declarative/extractors/http_selector.py,sha256=2zWZ4ewTqQC8VwkjS0xD_u350Km3SiYP7hpOOgiLg5o,1169
 airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=XJELMjahAsaomlvQgN2zrNO0DJX0G0fr9r682gUz7Pg,691
-airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=yTdEkyDUSW2KbFkEwJJMlS963C955LgCCOVfTmmScpQ,3367
+airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=OJ9xmhNWNwwzxYOeIrDy1GINb1zH9MBy6suC5tm2LSk,3545
 airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=AkXPOWyp741cpYLBl9AbmVmOQmQ2BzZ2XjgsMEB6gGc,6583
 airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
-airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=zEERPIXz1WxCJypqlSXZCFIpT4-mIsjzRdmFlX2-nMg,1210
-airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=-ECXZbDh3nw7G4mBncsTT_68LWQvS8TySIgckBTZZQQ,11899
+airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=CmZl9ddwMZFo8L7mEl_OFHN3ahIFRSYrJjMbR_cJaFA,1006
 airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=_UzUnSIUsDbRgbFTXgSyZEFb4ws-KdhdQPWO8mFbV7U,22028
 airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
 airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=3_EEZop94bMitZaJd2PF5Q2Xt9v94tYg7p7YJz8tAFc,15869
-airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=cdk4gSuYQmqcxxIOclhms6cnI1qm-FrSu7lmZULxOPM,16199
+airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=hElcYijbOHjdLKOMA7W7aizEbf22r7OSApXALP875uI,15749
 airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py,sha256=2YBOA2NnwAeIKlIhSwUB_W-FaGnPcmrG_liY7b4mV2Y,8365
 airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py,sha256=10LFv1QPM-agVKl6eaANmEBOfd7gZgBrkoTcMggsieQ,4809
 airbyte_cdk/sources/declarative/interpolation/__init__.py,sha256=tjUJkn3B-iZ-p7RP2c3dVZejrGiQeooGmS5ibWTuUL4,437
@@ -110,7 +109,7 @@ airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQ
 airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
 airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=jVZ3ZV5YZrmDNIX5cM2mugXmnbH27zHRcD22_3oatpo,8454
 airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
-airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=mOO0HahnHP0yv5LHFCayIx98R-yYHw6qkY9T5BxSlBg,98683
+airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=tO7xkv4y5iH6wGkj5As1T5ItUQxlw6cLflHAH48PKwc,96355
 airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=8uGos2u7TFTx_EJBdcjdUGn3Eyx6jUuEa1_VB8UP_dI,631
 airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
 airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
@@ -156,7 +155,7 @@ airbyte_cdk/sources/declarative/requesters/requester.py,sha256=iVVpXQ4KEd9OyZNwm
 airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=FVQpUGVwp2Gibk4gp07VmLKX5AafUlsZWFSrDpUDuJM,443
 airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=WDFnjrXLz3-YEjFhmlMkWAn9AJvnZ0mk9FyC8DAhEYk,4976
 airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
-airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=6IP6e9cjGEU2y77lcOKj1bqn3bYGBAsP8vJU4Skzp30,24182
+airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=N4swGw5mfuTXJ2R7AKX18CHzizsr69pXwt5uSHLPi48,24172
 airbyte_cdk/sources/declarative/schema/__init__.py,sha256=ul8L9S0-__AMEdbCLHBq-PMEeA928NVp8BB83BMotfU,517
 airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
 airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
@@ -165,7 +164,7 @@ airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLn
 airbyte_cdk/sources/declarative/spec/__init__.py,sha256=H0UwoRhgucbKBIzg85AXrifybVmfpwWpPdy22vZKVuo,141
 airbyte_cdk/sources/declarative/spec/spec.py,sha256=ODSNUgkDOhnLQnwLjgSaME6R3kNeywjROvbNrWEnsgU,1876
 airbyte_cdk/sources/declarative/stream_slicers/__init__.py,sha256=sI9vhc95RwJYOnA0VKjcbtKgFcmAbWjhdWBXFbAijOs,176
-airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=7KE_qBBP3QYA7qQdOE42u3fwUM5S1FD5rowf7gtu3qk,3462
+airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=E7feZ5xkHwFHODq8FSjwdGe291RZoCMCRHT1rWnQ1lI,3463
 airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py,sha256=SOkIPBi2Wu7yxIvA15yFzUAB95a3IzA8LPq5DEqHQQc,725
 airbyte_cdk/sources/declarative/transformations/__init__.py,sha256=CPJ8TlMpiUmvG3624VYu_NfTzxwKcfBjM2Q2wJ7fkSA,919
 airbyte_cdk/sources/declarative/transformations/add_fields.py,sha256=r4YdAuAk2bQtNWJMztIIy2CC-NglD9NeK1s1TeO9wkw,5027
@@ -246,7 +245,7 @@ airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=3OB5VsvOkJmCxIM
 airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCBUwJDw1QSCEvz23s7zIEx_7QMxkPq9j-oPIQ,1358
 airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=QP_64kQo-b3sRNHZA5aqrgCJqAhIVegRM3vJ8jGyuSY,15213
 airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=4La5v2UffSjGnhmF4kwNIKt_g3RXk2ux1mSHA1ejgYM,2898
-airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=SbkWn2t5uxVhT6W657zrENWnxC74oyp_WU9ol-_w5so,21215
+airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=Hke6CpD8Sq1FS4g1Xuht39UN7hKkGy1mvOxvQrm1lLM,20810
 airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=K3rLMpYhS7nnmvwQ52lqBy7DQdFMJpvvT7sgBg_ckA8,3207
 airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
 airbyte_cdk/sources/streams/concurrent/helpers.py,sha256=S6AW8TgIASCZ2UuUcQLE8OzgYUHWt2-KPOvNPwnQf-Q,1596
@@ -314,6 +313,7 @@ airbyte_cdk/test/state_builder.py,sha256=kLPql9lNzUJaBg5YYRLJlY_Hy5JLHJDVyKPMZMo
 airbyte_cdk/test/utils/__init__.py,sha256=Hu-1XT2KDoYjDF7-_ziDwv5bY3PueGjANOCbzeOegDg,57
 airbyte_cdk/test/utils/data.py,sha256=CkCR1_-rujWNmPXFR1IXTMwx1rAl06wAyIKWpDcN02w,820
 airbyte_cdk/test/utils/http_mocking.py,sha256=F2hpm2q4ijojQN5u2XtgTAp8aNgHgJ64eZNkZ9BW0ig,550
+airbyte_cdk/test/utils/manifest_only_fixtures.py,sha256=kGg8kSmEouHPDCJf8GKkKqEAQaCLYfgdPEvRTb64dCI,1898
 airbyte_cdk/test/utils/reading.py,sha256=SOTDYlps6Te9KumfTJ3vVDSm9EUXhvKtE8aD7gvdPlg,965
 airbyte_cdk/utils/__init__.py,sha256=gHjOCoUkolS_nKtgFSudXUY-ObK2vUo6aNQLvW7o8q8,347
 airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=wEtRnl5KRhN6eLJwrDrC4FJjyqt_4vkA1F65mdl8c24,3142
@@ -331,8 +331,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
 airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
 airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
 airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
-airbyte_cdk-6.8.2.dev1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
-airbyte_cdk-6.8.2.dev1.dist-info/METADATA,sha256=yATvM83Zo6tZfb5wnnP-1YGmBjL1ZR2zWzZFr09J1R8,6112
-airbyte_cdk-6.8.2.dev1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-airbyte_cdk-6.8.2.dev1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
-airbyte_cdk-6.8.2.dev1.dist-info/RECORD,,
+airbyte_cdk-6.8.3rc1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
+airbyte_cdk-6.8.3rc1.dist-info/METADATA,sha256=BJ498EOIPCD0I5hhKWn_RkGxSLEu_ewOVqDo75QpFAs,13483
+airbyte_cdk-6.8.3rc1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+airbyte_cdk-6.8.3rc1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
+airbyte_cdk-6.8.3rc1.dist-info/RECORD,,

airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py DELETED Viewed

@@ -1,270 +0,0 @@
-import copy
-#
-# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
-#
-import logging
-from collections import OrderedDict
-from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
-from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
-from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
-from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
-from airbyte_cdk.sources.message import MessageRepository
-from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
-    PerPartitionKeySerializer,
-)
-from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, CursorField
-from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
-from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
-logger = logging.getLogger("airbyte")
-class ConcurrentCursorFactory:
-    def __init__(self, create_function: Callable[..., Cursor]):
-        self._create_function = create_function
-    def create(self, stream_state: Mapping[str, Any]) -> Cursor:
-        return self._create_function(stream_state=stream_state)
-class ConcurrentPerPartitionCursor(Cursor):
-    """
-    Manages state per partition when a stream has many partitions, to prevent data loss or duplication.
-    **Partition Limitation and Limit Reached Logic**
-    - **DEFAULT_MAX_PARTITIONS_NUMBER**: The maximum number of partitions to keep in memory (default is 10,000).
-    - **_cursor_per_partition**: An ordered dictionary that stores cursors for each partition.
-    - **_over_limit**: A counter that increments each time an oldest partition is removed when the limit is exceeded.
-    The class ensures that the number of partitions tracked does not exceed the `DEFAULT_MAX_PARTITIONS_NUMBER` to prevent excessive memory usage.
-    - When the number of partitions exceeds the limit, the oldest partitions are removed from `_cursor_per_partition`, and `_over_limit` is incremented accordingly.
-    - The `limit_reached` method returns `True` when `_over_limit` exceeds `DEFAULT_MAX_PARTITIONS_NUMBER`, indicating that the global cursor should be used instead of per-partition cursors.
-    This approach avoids unnecessary switching to a global cursor due to temporary spikes in partition counts, ensuring that switching is only done when a sustained high number of partitions is observed.
-    """
-    DEFAULT_MAX_PARTITIONS_NUMBER = 10000
-    _NO_STATE: Mapping[str, Any] = {}
-    _NO_CURSOR_STATE: Mapping[str, Any] = {}
-    _KEY = 0
-    _VALUE = 1
-    _state_to_migrate_from: Mapping[str, Any] = {}
-    def __init__(
-        self,
-        cursor_factory: ConcurrentCursorFactory,
-        partition_router: PartitionRouter,
-        stream_name: str,
-        stream_namespace: Optional[str],
-        stream_state: Any,
-        message_repository: MessageRepository,
-        connector_state_manager: ConnectorStateManager,
-        cursor_field: CursorField,
-    ) -> None:
-        self._stream_name = stream_name
-        self._stream_namespace = stream_namespace
-        self._message_repository = message_repository
-        self._connector_state_manager = connector_state_manager
-        self._cursor_field = cursor_field
-        self._cursor_factory = cursor_factory
-        self._partition_router = partition_router
-        # The dict is ordered to ensure that once the maximum number of partitions is reached,
-        # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
-        self._cursor_per_partition: OrderedDict[str, Cursor] = OrderedDict()
-        self._over_limit = 0
-        self._partition_serializer = PerPartitionKeySerializer()
-        self._set_initial_state(stream_state)
-    @property
-    def cursor_field(self) -> CursorField:
-        return self._cursor_field
-    @property
-    def state(self) -> MutableMapping[str, Any]:
-        states = []
-        for partition_tuple, cursor in self._cursor_per_partition.items():
-            cursor_state = cursor._connector_state_converter.convert_to_state_message(
-                cursor._cursor_field, cursor.state
-            )
-            if cursor_state:
-                states.append(
-                    {
-                        "partition": self._to_dict(partition_tuple),
-                        "cursor": copy.deepcopy(cursor_state),
-                    }
-                )
-        state: dict[str, Any] = {"states": states}
-        return state
-    def close_partition(self, partition: Partition) -> None:
-        self._cursor_per_partition[self._to_partition_key(partition._stream_slice.partition)].close_partition_without_emit(partition=partition)
-    def ensure_at_least_one_state_emitted(self) -> None:
-        """
-        The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
-        called.
-        """
-        self._emit_state_message()
-    def _emit_state_message(self) -> None:
-        self._connector_state_manager.update_state_for_stream(
-            self._stream_name,
-            self._stream_namespace,
-            self.state,
-        )
-        state_message = self._connector_state_manager.create_state_message(
-            self._stream_name, self._stream_namespace
-        )
-        self._message_repository.emit_message(state_message)
-    def stream_slices(self) -> Iterable[StreamSlice]:
-        slices = self._partition_router.stream_slices()
-        for partition in slices:
-            yield from self.generate_slices_from_partition(partition)
-    def generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
-        # Ensure the maximum number of partitions is not exceeded
-        self._ensure_partition_limit()
-        cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
-        if not cursor:
-            partition_state = (
-                self._state_to_migrate_from
-                if self._state_to_migrate_from
-                else self._NO_CURSOR_STATE
-            )
-            cursor = self._create_cursor(partition_state)
-            self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
-        for cursor_slice in cursor.stream_slices():
-            yield StreamSlice(
-                partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
-            )
-    def _ensure_partition_limit(self) -> None:
-        """
-        Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
-        """
-        while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
-            self._over_limit += 1
-            oldest_partition = self._cursor_per_partition.popitem(last=False)[
-                0
-            ]  # Remove the oldest partition
-            logger.warning(
-                f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
-            )
-    def limit_reached(self) -> bool:
-        return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
-    def _set_initial_state(self, stream_state: StreamState) -> None:
-        """
-        Set the initial state for the cursors.
-        This method initializes the state for each partition cursor using the provided stream state.
-        If a partition state is provided in the stream state, it will update the corresponding partition cursor with this state.
-        Additionally, it sets the parent state for partition routers that are based on parent streams. If a partition router
-        does not have parent streams, this step will be skipped due to the default PartitionRouter implementation.
-        Args:
-            stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
-                {
-                    "states": [
-                        {
-                            "partition": {
-                                "partition_key": "value"
-                            },
-                            "cursor": {
-                                "last_updated": "2023-05-27T00:00:00Z"
-                            }
-                        }
-                    ],
-                    "parent_state": {
-                        "parent_stream_name": {
-                            "last_updated": "2023-05-27T00:00:00Z"
-                        }
-                    }
-                }
-        """
-        if not stream_state:
-            return
-        if "states" not in stream_state:
-            # We assume that `stream_state` is in a global format that can be applied to all partitions.
-            # Example: {"global_state_format_key": "global_state_format_value"}
-            self._state_to_migrate_from = stream_state
-        else:
-            for state in stream_state["states"]:
-                self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
-                    self._create_cursor(state["cursor"])
-                )
-            # set default state for missing partitions if it is per partition with fallback to global
-            if "state" in stream_state:
-                self._state_to_migrate_from = stream_state["state"]
-        # Set parent state for partition routers based on parent streams
-        self._partition_router.set_initial_state(stream_state)
-    def observe(self, record: Record) -> None:
-        self._cursor_per_partition[self._to_partition_key(record.associated_slice.partition)].observe(record)
-    def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
-        return self._partition_serializer.to_partition_key(partition)
-    def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
-        return self._partition_serializer.to_partition(partition_key)
-    def _create_cursor(self, cursor_state: Any) -> DeclarativeCursor:
-        cursor = self._cursor_factory.create(stream_state=cursor_state)
-        return cursor
-    def should_be_synced(self, record: Record) -> bool:
-        return self._get_cursor(record).should_be_synced(record)
-    def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
-        if not first.associated_slice or not second.associated_slice:
-            raise ValueError(
-                f"Both records should have an associated slice but got {first.associated_slice} and {second.associated_slice}"
-            )
-        if first.associated_slice.partition != second.associated_slice.partition:
-            raise ValueError(
-                f"To compare records, partition should be the same but got {first.associated_slice.partition} and {second.associated_slice.partition}"
-            )
-        return self._get_cursor(first).is_greater_than_or_equal(
-            self._convert_record_to_cursor_record(first),
-            self._convert_record_to_cursor_record(second),
-        )
-    @staticmethod
-    def _convert_record_to_cursor_record(record: Record) -> Record:
-        return Record(
-            record.data,
-            StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice)
-            if record.associated_slice
-            else None,
-        )
-    def _get_cursor(self, record: Record) -> Cursor:
-        if not record.associated_slice:
-            raise ValueError(
-                "Invalid state as stream slices that are emitted should refer to an existing cursor"
-            )
-        partition_key = self._to_partition_key(record.associated_slice.partition)
-        if partition_key not in self._cursor_per_partition:
-            raise ValueError(
-                "Invalid state as stream slices that are emitted should refer to an existing cursor"
-            )
-        cursor = self._cursor_per_partition[partition_key]
-        return cursor

airbyte_cdk-6.8.2.dev1.dist-info/METADATA DELETED Viewed

@@ -1,111 +0,0 @@
-Metadata-Version: 2.1
-Name: airbyte-cdk
-Version: 6.8.2.dev1
-Summary: A framework for writing Airbyte Connectors.
-Home-page: https://airbyte.com
-License: MIT
-Keywords: airbyte,connector-development-kit,cdk
-Author: Airbyte
-Author-email: contact@airbyte.io
-Requires-Python: >=3.10,<3.13
-Classifier: Development Status :: 3 - Alpha
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Topic :: Scientific/Engineering
-Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Provides-Extra: file-based
-Provides-Extra: sphinx-docs
-Provides-Extra: sql
-Provides-Extra: vector-db-based
-Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
-Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
-Requires-Dist: Sphinx (>=4.2,<4.3) ; extra == "sphinx-docs"
-Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
-Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
-Requires-Dist: backoff
-Requires-Dist: cachetools
-Requires-Dist: cohere (==4.21) ; extra == "vector-db-based"
-Requires-Dist: cryptography (>=42.0.5,<44.0.0)
-Requires-Dist: dpath (>=2.1.6,<3.0.0)
-Requires-Dist: dunamai (>=1.22.0,<2.0.0)
-Requires-Dist: fastavro (>=1.8.0,<1.9.0) ; extra == "file-based"
-Requires-Dist: genson (==1.3.0)
-Requires-Dist: isodate (>=0.6.1,<0.7.0)
-Requires-Dist: jsonref (>=0.2,<0.3)
-Requires-Dist: jsonschema (>=4.17.3,<4.18.0)
-Requires-Dist: langchain (==0.1.16) ; extra == "vector-db-based"
-Requires-Dist: langchain_core (==0.1.42)
-Requires-Dist: markdown ; extra == "file-based"
-Requires-Dist: nltk (==3.9.1)
-Requires-Dist: numpy (<2)
-Requires-Dist: openai[embeddings] (==0.27.9) ; extra == "vector-db-based"
-Requires-Dist: orjson (>=3.10.7,<4.0.0)
-Requires-Dist: pandas (==2.2.2)
-Requires-Dist: pdf2image (==1.16.3) ; extra == "file-based"
-Requires-Dist: pdfminer.six (==20221105) ; extra == "file-based"
-Requires-Dist: pendulum (<3.0.0)
-Requires-Dist: psutil (==6.1.0)
-Requires-Dist: pyarrow (>=15.0.0,<15.1.0) ; extra == "file-based"
-Requires-Dist: pydantic (>=2.7,<3.0)
-Requires-Dist: pyjwt (>=2.8.0,<3.0.0)
-Requires-Dist: pyrate-limiter (>=3.1.0,<3.2.0)
-Requires-Dist: pytesseract (==0.3.10) ; extra == "file-based"
-Requires-Dist: python-calamine (==0.2.3) ; extra == "file-based"
-Requires-Dist: python-dateutil
-Requires-Dist: python-snappy (==0.7.3) ; extra == "file-based"
-Requires-Dist: python-ulid (>=3.0.0,<4.0.0)
-Requires-Dist: pytz (==2024.1)
-Requires-Dist: rapidfuzz (>=3.10.1,<4.0.0)
-Requires-Dist: requests
-Requires-Dist: requests_cache
-Requires-Dist: serpyco-rs (>=1.10.2,<2.0.0)
-Requires-Dist: sphinx-rtd-theme (>=1.0,<1.1) ; extra == "sphinx-docs"
-Requires-Dist: sqlalchemy (>=2.0,<3.0,!=2.0.36) ; extra == "sql"
-Requires-Dist: tiktoken (==0.8.0) ; extra == "vector-db-based"
-Requires-Dist: unstructured.pytesseract (>=0.3.12) ; extra == "file-based"
-Requires-Dist: unstructured[docx,pptx] (==0.10.27) ; extra == "file-based"
-Requires-Dist: wcmatch (==10.0)
-Requires-Dist: xmltodict (>=0.13.0,<0.14.0)
-Project-URL: Documentation, https://docs.airbyte.io/
-Project-URL: Repository, https://github.com/airbytehq/airbyte-python-cdk
-Description-Content-Type: text/markdown
-# Airbyte Python CDK and Low-Code CDK
-Airbyte Python CDK is a framework for building Airbyte API Source Connectors. It provides a set of
-classes and helpers that make it easy to build a connector against an HTTP API (REST, GraphQL, etc),
-or a generic Python source connector.
-## Building Connectors with the CDK
-If you're looking to build a connector, we highly recommend that you first
-[start with the Connector Builder](https://docs.airbyte.com/connector-development/connector-builder-ui/overview).
-It should be enough for 90% connectors out there. For more flexible and complex connectors, use the
-[low-code CDK and `SourceDeclarativeManifest`](https://docs.airbyte.com/connector-development/config-based/low-code-cdk-overview).
-For more information on building connectors, please see the [Connector Development](https://docs.airbyte.com/connector-development/) guide on [docs.airbyte.com](https://docs.airbyte.com).
-## Python CDK Overview
-Airbyte CDK code is within `airbyte_cdk` directory. Here's a high level overview of what's inside:
-- `airbyte_cdk/connector_builder`. Internal wrapper that helps the Connector Builder platform run a declarative manifest (low-code connector). You should not use this code directly. If you need to run a `SourceDeclarativeManifest`, take a look at [`source-declarative-manifest`](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-declarative-manifest) connector implementation instead.
-- `airbyte_cdk/cli/source_declarative_manifest`. This module defines the `source-declarative-manifest` (aka "SDM") connector execution logic and associated CLI.
-- `airbyte_cdk/destinations`. Basic Destination connector support! If you're building a Destination connector in Python, try that. Some of our vector DB destinations like `destination-pinecone` are using that code.
-- `airbyte_cdk/models` expose `airbyte_protocol.models` as a part of `airbyte_cdk` package.
-- `airbyte_cdk/sources/concurrent_source` is the Concurrent CDK implementation. It supports reading data from streams concurrently per slice / partition, useful for connectors with high throughput and high number of records.
-- `airbyte_cdk/sources/declarative` is the low-code CDK. It works on top of Airbyte Python CDK, but provides a declarative manifest language to define streams, operations, etc. This makes it easier to build connectors without writing Python code.
-- `airbyte_cdk/sources/file_based` is the CDK for file-based sources. Examples include S3, Azure, GCS, etc.
-## Contributing
-For instructions on how to contribute, please see our [Contributing Guide](docs/CONTRIBUTING.md).
-## Release Management
-Please see the [Release Management](docs/RELEASES.md) guide for information on how to perform releases and pre-releases.

{airbyte_cdk-6.8.2.dev1.dist-info → airbyte_cdk-6.8.3rc1.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{airbyte_cdk-6.8.2.dev1.dist-info → airbyte_cdk-6.8.3rc1.dist-info}/WHEEL RENAMED Viewed

File without changes

{airbyte_cdk-6.8.2.dev1.dist-info → airbyte_cdk-6.8.3rc1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

airbyte-cdk 6.8.2.dev1__py3-none-any.whl → 6.8.3rc1__py3-none-any.whl

Potentially problematic release.

airbyte-cdk 6.8.2.dev1py3-none-any.whl → 6.8.3rc1py3-none-any.whl