airbyte-cdk 6.8.2rc1__py3-none-any.whl → 6.8.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -86,10 +86,23 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
86
86
  component_factory=component_factory,
87
87
  )
88
88
 
89
- # todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
90
- # no longer needs to store the original incoming state. But maybe there's an edge case?
91
89
  self._state = state
92
90
 
91
+ self._concurrent_streams: Optional[List[AbstractStream]]
92
+ self._synchronous_streams: Optional[List[Stream]]
93
+
94
+ # If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
95
+ # they might depend on it. Ideally we want to have a static method on this class to get the spec without
96
+ # any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
97
+ # for our future improvements to the CDK.
98
+ if config:
99
+ self._concurrent_streams, self._synchronous_streams = self._group_streams(
100
+ config=config or {}
101
+ )
102
+ else:
103
+ self._concurrent_streams = None
104
+ self._synchronous_streams = None
105
+
93
106
  concurrency_level_from_manifest = self._source_config.get("concurrency_level")
94
107
  if concurrency_level_from_manifest:
95
108
  concurrency_level_component = self._constructor.create_component(
@@ -123,20 +136,17 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
123
136
  logger: logging.Logger,
124
137
  config: Mapping[str, Any],
125
138
  catalog: ConfiguredAirbyteCatalog,
126
- state: Optional[List[AirbyteStateMessage]] = None,
139
+ state: Optional[Union[List[AirbyteStateMessage]]] = None,
127
140
  ) -> Iterator[AirbyteMessage]:
128
- concurrent_streams, _ = self._group_streams(config=config)
129
-
130
- # ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of
131
- # the concurrent streams must be saved so that they can be removed from the catalog before starting
132
- # synchronous streams
133
- if len(concurrent_streams) > 0:
141
+ # ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of the concurrent
142
+ # streams must be saved so that they can be removed from the catalog before starting synchronous streams
143
+ if self._concurrent_streams:
134
144
  concurrent_stream_names = set(
135
- [concurrent_stream.name for concurrent_stream in concurrent_streams]
145
+ [concurrent_stream.name for concurrent_stream in self._concurrent_streams]
136
146
  )
137
147
 
138
148
  selected_concurrent_streams = self._select_streams(
139
- streams=concurrent_streams, configured_catalog=catalog
149
+ streams=self._concurrent_streams, configured_catalog=catalog
140
150
  )
141
151
  # It would appear that passing in an empty set of streams causes an infinite loop in ConcurrentReadProcessor.
142
152
  # This is also evident in concurrent_source_adapter.py so I'll leave this out of scope to fix for now
@@ -155,7 +165,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
155
165
  yield from super().read(logger, config, filtered_catalog, state)
156
166
 
157
167
  def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
158
- concurrent_streams, synchronous_streams = self._group_streams(config=config)
168
+ concurrent_streams = self._concurrent_streams or []
169
+ synchronous_streams = self._synchronous_streams or []
159
170
  return AirbyteCatalog(
160
171
  streams=[
161
172
  stream.as_airbyte_stream() for stream in concurrent_streams + synchronous_streams
@@ -0,0 +1,55 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+
4
+ import importlib.util
5
+ from pathlib import Path
6
+ from types import ModuleType
7
+ from typing import Optional
8
+
9
+ import pytest
10
+
11
+ # The following fixtures are used to load a manifest-only connector's components module and manifest file.
12
+ # They can be accessed from any test file in the connector's unit_tests directory by importing them as follows:
13
+
14
+ # from airbyte_cdk.test.utils.manifest_only_fixtures import components_module, connector_dir, manifest_path
15
+
16
+ # individual components can then be referenced as: components_module.<CustomComponentClass>
17
+
18
+
19
+ @pytest.fixture(scope="session")
20
+ def connector_dir(request: pytest.FixtureRequest) -> Path:
21
+ """Return the connector's root directory.
22
+
23
+ This assumes tests are being run from the unit_tests directory,
24
+ and that it is a direct child of the connector directory.
25
+ """
26
+ test_dir = Path(request.config.invocation_params.dir)
27
+ return test_dir.parent
28
+
29
+
30
+ @pytest.fixture(scope="session")
31
+ def components_module(connector_dir: Path) -> Optional[ModuleType]:
32
+ """Load and return the components module from the connector directory.
33
+
34
+ This assumes the components module is located at <connector_dir>/components.py.
35
+ """
36
+ components_path = connector_dir / "components.py"
37
+ if not components_path.exists():
38
+ return None
39
+
40
+ components_spec = importlib.util.spec_from_file_location("components", components_path)
41
+ if components_spec is None:
42
+ return None
43
+
44
+ components_module = importlib.util.module_from_spec(components_spec)
45
+ if components_spec.loader is None:
46
+ return None
47
+
48
+ components_spec.loader.exec_module(components_module)
49
+ return components_module
50
+
51
+
52
+ @pytest.fixture(scope="session")
53
+ def manifest_path(connector_dir: Path) -> Path:
54
+ """Return the path to the connector's manifest file."""
55
+ return connector_dir / "manifest.yaml"
@@ -0,0 +1,306 @@
1
+ Metadata-Version: 2.1
2
+ Name: airbyte-cdk
3
+ Version: 6.8.3rc1
4
+ Summary: A framework for writing Airbyte Connectors.
5
+ Home-page: https://airbyte.com
6
+ License: MIT
7
+ Keywords: airbyte,connector-development-kit,cdk
8
+ Author: Airbyte
9
+ Author-email: contact@airbyte.io
10
+ Requires-Python: >=3.10,<3.13
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Provides-Extra: file-based
21
+ Provides-Extra: sphinx-docs
22
+ Provides-Extra: sql
23
+ Provides-Extra: vector-db-based
24
+ Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
25
+ Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
26
+ Requires-Dist: Sphinx (>=4.2,<4.3) ; extra == "sphinx-docs"
27
+ Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
28
+ Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
29
+ Requires-Dist: backoff
30
+ Requires-Dist: cachetools
31
+ Requires-Dist: cohere (==4.21) ; extra == "vector-db-based"
32
+ Requires-Dist: cryptography (>=42.0.5,<44.0.0)
33
+ Requires-Dist: dpath (>=2.1.6,<3.0.0)
34
+ Requires-Dist: dunamai (>=1.22.0,<2.0.0)
35
+ Requires-Dist: fastavro (>=1.8.0,<1.9.0) ; extra == "file-based"
36
+ Requires-Dist: genson (==1.3.0)
37
+ Requires-Dist: isodate (>=0.6.1,<0.7.0)
38
+ Requires-Dist: jsonref (>=0.2,<0.3)
39
+ Requires-Dist: jsonschema (>=4.17.3,<4.18.0)
40
+ Requires-Dist: langchain (==0.1.16) ; extra == "vector-db-based"
41
+ Requires-Dist: langchain_core (==0.1.42)
42
+ Requires-Dist: markdown ; extra == "file-based"
43
+ Requires-Dist: nltk (==3.9.1)
44
+ Requires-Dist: numpy (<2)
45
+ Requires-Dist: openai[embeddings] (==0.27.9) ; extra == "vector-db-based"
46
+ Requires-Dist: orjson (>=3.10.7,<4.0.0)
47
+ Requires-Dist: pandas (==2.2.2)
48
+ Requires-Dist: pdf2image (==1.16.3) ; extra == "file-based"
49
+ Requires-Dist: pdfminer.six (==20221105) ; extra == "file-based"
50
+ Requires-Dist: pendulum (<3.0.0)
51
+ Requires-Dist: psutil (==6.1.0)
52
+ Requires-Dist: pyarrow (>=15.0.0,<15.1.0) ; extra == "file-based"
53
+ Requires-Dist: pydantic (>=2.7,<3.0)
54
+ Requires-Dist: pyjwt (>=2.8.0,<3.0.0)
55
+ Requires-Dist: pyrate-limiter (>=3.1.0,<3.2.0)
56
+ Requires-Dist: pytesseract (==0.3.10) ; extra == "file-based"
57
+ Requires-Dist: python-calamine (==0.2.3) ; extra == "file-based"
58
+ Requires-Dist: python-dateutil
59
+ Requires-Dist: python-snappy (==0.7.3) ; extra == "file-based"
60
+ Requires-Dist: python-ulid (>=3.0.0,<4.0.0)
61
+ Requires-Dist: pytz (==2024.1)
62
+ Requires-Dist: rapidfuzz (>=3.10.1,<4.0.0)
63
+ Requires-Dist: requests
64
+ Requires-Dist: requests_cache
65
+ Requires-Dist: serpyco-rs (>=1.10.2,<2.0.0)
66
+ Requires-Dist: sphinx-rtd-theme (>=1.0,<1.1) ; extra == "sphinx-docs"
67
+ Requires-Dist: sqlalchemy (>=2.0,<3.0,!=2.0.36) ; extra == "sql"
68
+ Requires-Dist: tiktoken (==0.8.0) ; extra == "vector-db-based"
69
+ Requires-Dist: unstructured.pytesseract (>=0.3.12) ; extra == "file-based"
70
+ Requires-Dist: unstructured[docx,pptx] (==0.10.27) ; extra == "file-based"
71
+ Requires-Dist: wcmatch (==10.0)
72
+ Requires-Dist: xmltodict (>=0.13.0,<0.14.0)
73
+ Project-URL: Documentation, https://docs.airbyte.io/
74
+ Project-URL: Repository, https://github.com/airbytehq/airbyte-python-cdk
75
+ Description-Content-Type: text/markdown
76
+
77
+ # Airbyte Python CDK and Low-Code CDK
78
+
79
+ Airbyte Python CDK is a framework for building Airbyte API Source Connectors. It provides a set of
80
+ classes and helpers that make it easy to build a connector against an HTTP API (REST, GraphQL, etc),
81
+ or a generic Python source connector.
82
+
83
+ ## Usage
84
+
85
+ If you're looking to build a connector, we highly recommend that you
86
+ [start with the Connector Builder](https://docs.airbyte.com/connector-development/connector-builder-ui/overview).
87
+ It should be enough for 90% connectors out there. For more flexible and complex connectors, use the
88
+ [low-code CDK and `SourceDeclarativeManifest`](https://docs.airbyte.com/connector-development/config-based/low-code-cdk-overview).
89
+
90
+ If that doesn't work, then consider building on top of the
91
+ [lower-level Python CDK itself](https://docs.airbyte.com/connector-development/cdk-python/).
92
+
93
+ ### Quick Start
94
+
95
+ To get started on a Python CDK based connector or a low-code connector, you can generate a connector
96
+ project from a template:
97
+
98
+ ```bash
99
+ # from the repo root
100
+ cd airbyte-integrations/connector-templates/generator
101
+ ./generate.sh
102
+ ```
103
+
104
+ ### Example Connectors
105
+
106
+ **HTTP Connectors**:
107
+
108
+ - [Stripe](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-stripe/)
109
+ - [Salesforce](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-salesforce/)
110
+
111
+ **Python connectors using the bare-bones `Source` abstraction**:
112
+
113
+ - [Google Sheets](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py)
114
+
115
+ This will generate a project with a type and a name of your choice and put it in
116
+ `airbyte-integrations/connectors`. Open the directory with your connector in an editor and follow
117
+ the `TODO` items.
118
+
119
+ ## Python CDK Overview
120
+
121
+ Airbyte CDK code is within `airbyte_cdk` directory. Here's a high level overview of what's inside:
122
+
123
+ - `connector_builder`. Internal wrapper that helps the Connector Builder platform run a declarative
124
+ manifest (low-code connector). You should not use this code directly. If you need to run a
125
+ `SourceDeclarativeManifest`, take a look at
126
+ [`source-declarative-manifest`](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-declarative-manifest)
127
+ connector implementation instead.
128
+ - `destinations`. Basic Destination connector support! If you're building a Destination connector in
129
+ Python, try that. Some of our vector DB destinations like `destination-pinecone` are using that
130
+ code.
131
+ - `models` expose `airbyte_protocol.models` as a part of `airbyte_cdk` package.
132
+ - `sources/concurrent_source` is the Concurrent CDK implementation. It supports reading data from
133
+ streams concurrently per slice / partition, useful for connectors with high throughput and high
134
+ number of records.
135
+ - `sources/declarative` is the low-code CDK. It works on top of Airbyte Python CDK, but provides a
136
+ declarative manifest language to define streams, operations, etc. This makes it easier to build
137
+ connectors without writing Python code.
138
+ - `sources/file_based` is the CDK for file-based sources. Examples include S3, Azure, GCS, etc.
139
+
140
+ ## Contributing
141
+
142
+ Thank you for being interested in contributing to Airbyte Python CDK! Here are some guidelines to
143
+ get you started:
144
+
145
+ - We adhere to the [code of conduct](/CODE_OF_CONDUCT.md).
146
+ - You can contribute by reporting bugs, posting github discussions, opening issues, improving
147
+ [documentation](/docs/), and submitting pull requests with bugfixes and new features alike.
148
+ - If you're changing the code, please add unit tests for your change.
149
+ - When submitting issues or PRs, please add a small reproduction project. Using the changes in your
150
+ connector and providing that connector code as an example (or a satellite PR) helps!
151
+
152
+ ### First time setup
153
+
154
+ Install the project dependencies and development tools:
155
+
156
+ ```bash
157
+ poetry install --all-extras
158
+ ```
159
+
160
+ Installing all extras is required to run the full suite of unit tests.
161
+
162
+ #### Running tests locally
163
+
164
+ - Iterate on the CDK code locally
165
+ - Run tests via `poetry run poe unit-test-with-cov`, or `python -m pytest -s unit_tests` if you want
166
+ to pass pytest options.
167
+ - Run `poetry run poe check-local` to lint all code, type-check modified code, and run unit tests
168
+ with coverage in one command.
169
+
170
+ To see all available scripts, run `poetry run poe`.
171
+
172
+ #### Formatting the code
173
+
174
+ - Iterate on the CDK code locally
175
+ - Run `poetry run ruff format` to format your changes.
176
+
177
+ To see all available `ruff` options, run `poetry run ruff`.
178
+
179
+ ##### Autogenerated files
180
+
181
+ Low-code CDK models are generated from `sources/declarative/declarative_component_schema.yaml`. If
182
+ the iteration you are working on includes changes to the models or the connector generator, you
183
+ might want to regenerate them. In order to do that, you can run:
184
+
185
+ ```bash
186
+ poetry run poe build
187
+ ```
188
+
189
+ This will generate the code generator docker image and the component manifest files based on the
190
+ schemas and templates.
191
+
192
+ #### Testing
193
+
194
+ All tests are located in the `unit_tests` directory. Run `poetry run poe unit-test-with-cov` to run
195
+ them. This also presents a test coverage report. For faster iteration with no coverage report and
196
+ more options, `python -m pytest -s unit_tests` is a good place to start.
197
+
198
+ #### Building and testing a connector with your local CDK
199
+
200
+ When developing a new feature in the CDK, you may find it helpful to run a connector that uses that
201
+ new feature. You can test this in one of two ways:
202
+
203
+ - Running a connector locally
204
+ - Building and running a source via Docker
205
+
206
+ ##### Installing your local CDK into a local Python connector
207
+
208
+ Open the connector's `pyproject.toml` file and replace the line with `airbyte_cdk` with the
209
+ following:
210
+
211
+ ```toml
212
+ airbyte_cdk = { path = "../../../airbyte-cdk/python/airbyte_cdk", develop = true }
213
+ ```
214
+
215
+ Then, running `poetry update` should reinstall `airbyte_cdk` from your local working directory.
216
+
217
+ ##### Building a Python connector in Docker with your local CDK installed
218
+
219
+ _Pre-requisite: Install the
220
+ [`airbyte-ci` CLI](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md)_
221
+
222
+ You can build your connector image with the local CDK using
223
+
224
+ ```bash
225
+ # from the airbytehq/airbyte base directory
226
+ airbyte-ci connectors --use-local-cdk --name=<CONNECTOR> build
227
+ ```
228
+
229
+ Note that the local CDK is injected at build time, so if you make changes, you will have to run the
230
+ build command again to see them reflected.
231
+
232
+ ##### Running Connector Acceptance Tests for a single connector in Docker with your local CDK installed
233
+
234
+ _Pre-requisite: Install the
235
+ [`airbyte-ci` CLI](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md)_
236
+
237
+ To run acceptance tests for a single connectors using the local CDK, from the connector directory,
238
+ run
239
+
240
+ ```bash
241
+ airbyte-ci connectors --use-local-cdk --name=<CONNECTOR> test
242
+ ```
243
+
244
+ #### When you don't have access to the API
245
+
246
+ There may be a time when you do not have access to the API (either because you don't have the
247
+ credentials, network access, etc...) You will probably still want to do end-to-end testing at least
248
+ once. In order to do so, you can emulate the server you would be reaching using a server stubbing
249
+ tool.
250
+
251
+ For example, using [mockserver](https://www.mock-server.com/), you can set up an expectation file
252
+ like this:
253
+
254
+ ```json
255
+ {
256
+ "httpRequest": {
257
+ "method": "GET",
258
+ "path": "/data"
259
+ },
260
+ "httpResponse": {
261
+ "body": "{\"data\": [{\"record_key\": 1}, {\"record_key\": 2}]}"
262
+ }
263
+ }
264
+ ```
265
+
266
+ Assuming this file has been created at `secrets/mock_server_config/expectations.json`, running the
267
+ following command will allow to match any requests on path `/data` to return the response defined in
268
+ the expectation file:
269
+
270
+ ```bash
271
+ docker run -d --rm -v $(pwd)/secrets/mock_server_config:/config -p 8113:8113 --env MOCKSERVER_LOG_LEVEL=TRACE --env MOCKSERVER_SERVER_PORT=8113 --env MOCKSERVER_WATCH_INITIALIZATION_JSON=true --env MOCKSERVER_PERSISTED_EXPECTATIONS_PATH=/config/expectations.json --env MOCKSERVER_INITIALIZATION_JSON_PATH=/config/expectations.json mockserver/mockserver:5.15.0
272
+ ```
273
+
274
+ HTTP requests to `localhost:8113/data` should now return the body defined in the expectations file.
275
+ To test this, the implementer either has to change the code which defines the base URL for Python
276
+ source or update the `url_base` from low-code. With the Connector Builder running in docker, you
277
+ will have to use domain `host.docker.internal` instead of `localhost` as the requests are executed
278
+ within docker.
279
+
280
+ #### Publishing a new version to PyPi
281
+
282
+ Python CDK has a
283
+ [GitHub workflow](https://github.com/airbytehq/airbyte/actions/workflows/publish-cdk-command-manually.yml)
284
+ that manages the CDK changelog, making a new release for `airbyte_cdk`, publishing it to PyPI, and
285
+ then making a commit to update (and subsequently auto-release)
286
+ [`source-declarative-m anifest`](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-declarative-manifest)
287
+ and Connector Builder (in the platform repository).
288
+
289
+ > [!Note]: The workflow will handle the `CHANGELOG.md` entry for you. You should not add changelog
290
+ > lines in your PRs to the CDK itself.
291
+
292
+ > [!Warning]: The workflow bumps version on it's own, please don't change the CDK version in
293
+ > `pyproject.toml` manually.
294
+
295
+ 1. You only trigger the release workflow once all the PRs that you want to be included are already
296
+ merged into the `master` branch.
297
+ 2. The
298
+ [`Publish CDK Manually`](https://github.com/airbytehq/airbyte/actions/workflows/publish-cdk-command-manually.yml)
299
+ workflow from master using `release-type=major|manor|patch` and setting the changelog message.
300
+ 3. When the workflow runs, it will commit a new version directly to master branch.
301
+ 4. The workflow will bump the version of `source-declarative-manifest` according to the
302
+ `release-type` of the CDK, then commit these changes back to master. The commit to master will
303
+ kick off a publish of the new version of `source-declarative-manifest`.
304
+ 5. The workflow will also add a pull request to `airbyte-platform-internal` repo to bump the
305
+ dependency in Connector Builder.
306
+
@@ -62,7 +62,7 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
62
62
  airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
63
63
  airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
64
64
  airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
65
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=UAkFzFJ62tq7qWfudLLt-Sj-EhOJquYPd-FrwMBSA9I,22928
65
+ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=F2X2ZS9eDfrohNbxG2TgPW-f4YP8IAkMjO1XHtD6NIg,23464
66
66
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
67
67
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
68
68
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
@@ -313,6 +313,7 @@ airbyte_cdk/test/state_builder.py,sha256=kLPql9lNzUJaBg5YYRLJlY_Hy5JLHJDVyKPMZMo
313
313
  airbyte_cdk/test/utils/__init__.py,sha256=Hu-1XT2KDoYjDF7-_ziDwv5bY3PueGjANOCbzeOegDg,57
314
314
  airbyte_cdk/test/utils/data.py,sha256=CkCR1_-rujWNmPXFR1IXTMwx1rAl06wAyIKWpDcN02w,820
315
315
  airbyte_cdk/test/utils/http_mocking.py,sha256=F2hpm2q4ijojQN5u2XtgTAp8aNgHgJ64eZNkZ9BW0ig,550
316
+ airbyte_cdk/test/utils/manifest_only_fixtures.py,sha256=kGg8kSmEouHPDCJf8GKkKqEAQaCLYfgdPEvRTb64dCI,1898
316
317
  airbyte_cdk/test/utils/reading.py,sha256=SOTDYlps6Te9KumfTJ3vVDSm9EUXhvKtE8aD7gvdPlg,965
317
318
  airbyte_cdk/utils/__init__.py,sha256=gHjOCoUkolS_nKtgFSudXUY-ObK2vUo6aNQLvW7o8q8,347
318
319
  airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=wEtRnl5KRhN6eLJwrDrC4FJjyqt_4vkA1F65mdl8c24,3142
@@ -330,8 +331,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
330
331
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
331
332
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
332
333
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
333
- airbyte_cdk-6.8.2rc1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
334
- airbyte_cdk-6.8.2rc1.dist-info/METADATA,sha256=u5k6gz5XLQCQmR3CLeP-StHB8_iV6QhEqLuuBvh1G1w,6110
335
- airbyte_cdk-6.8.2rc1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
336
- airbyte_cdk-6.8.2rc1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
337
- airbyte_cdk-6.8.2rc1.dist-info/RECORD,,
334
+ airbyte_cdk-6.8.3rc1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
335
+ airbyte_cdk-6.8.3rc1.dist-info/METADATA,sha256=BJ498EOIPCD0I5hhKWn_RkGxSLEu_ewOVqDo75QpFAs,13483
336
+ airbyte_cdk-6.8.3rc1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
337
+ airbyte_cdk-6.8.3rc1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
338
+ airbyte_cdk-6.8.3rc1.dist-info/RECORD,,
@@ -1,111 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: airbyte-cdk
3
- Version: 6.8.2rc1
4
- Summary: A framework for writing Airbyte Connectors.
5
- Home-page: https://airbyte.com
6
- License: MIT
7
- Keywords: airbyte,connector-development-kit,cdk
8
- Author: Airbyte
9
- Author-email: contact@airbyte.io
10
- Requires-Python: >=3.10,<3.13
11
- Classifier: Development Status :: 3 - Alpha
12
- Classifier: Intended Audience :: Developers
13
- Classifier: License :: OSI Approved :: MIT License
14
- Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.10
16
- Classifier: Programming Language :: Python :: 3.11
17
- Classifier: Programming Language :: Python :: 3.12
18
- Classifier: Topic :: Scientific/Engineering
19
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
- Provides-Extra: file-based
21
- Provides-Extra: sphinx-docs
22
- Provides-Extra: sql
23
- Provides-Extra: vector-db-based
24
- Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
25
- Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
26
- Requires-Dist: Sphinx (>=4.2,<4.3) ; extra == "sphinx-docs"
27
- Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
28
- Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
29
- Requires-Dist: backoff
30
- Requires-Dist: cachetools
31
- Requires-Dist: cohere (==4.21) ; extra == "vector-db-based"
32
- Requires-Dist: cryptography (>=42.0.5,<44.0.0)
33
- Requires-Dist: dpath (>=2.1.6,<3.0.0)
34
- Requires-Dist: dunamai (>=1.22.0,<2.0.0)
35
- Requires-Dist: fastavro (>=1.8.0,<1.9.0) ; extra == "file-based"
36
- Requires-Dist: genson (==1.3.0)
37
- Requires-Dist: isodate (>=0.6.1,<0.7.0)
38
- Requires-Dist: jsonref (>=0.2,<0.3)
39
- Requires-Dist: jsonschema (>=4.17.3,<4.18.0)
40
- Requires-Dist: langchain (==0.1.16) ; extra == "vector-db-based"
41
- Requires-Dist: langchain_core (==0.1.42)
42
- Requires-Dist: markdown ; extra == "file-based"
43
- Requires-Dist: nltk (==3.9.1)
44
- Requires-Dist: numpy (<2)
45
- Requires-Dist: openai[embeddings] (==0.27.9) ; extra == "vector-db-based"
46
- Requires-Dist: orjson (>=3.10.7,<4.0.0)
47
- Requires-Dist: pandas (==2.2.2)
48
- Requires-Dist: pdf2image (==1.16.3) ; extra == "file-based"
49
- Requires-Dist: pdfminer.six (==20221105) ; extra == "file-based"
50
- Requires-Dist: pendulum (<3.0.0)
51
- Requires-Dist: psutil (==6.1.0)
52
- Requires-Dist: pyarrow (>=15.0.0,<15.1.0) ; extra == "file-based"
53
- Requires-Dist: pydantic (>=2.7,<3.0)
54
- Requires-Dist: pyjwt (>=2.8.0,<3.0.0)
55
- Requires-Dist: pyrate-limiter (>=3.1.0,<3.2.0)
56
- Requires-Dist: pytesseract (==0.3.10) ; extra == "file-based"
57
- Requires-Dist: python-calamine (==0.2.3) ; extra == "file-based"
58
- Requires-Dist: python-dateutil
59
- Requires-Dist: python-snappy (==0.7.3) ; extra == "file-based"
60
- Requires-Dist: python-ulid (>=3.0.0,<4.0.0)
61
- Requires-Dist: pytz (==2024.1)
62
- Requires-Dist: rapidfuzz (>=3.10.1,<4.0.0)
63
- Requires-Dist: requests
64
- Requires-Dist: requests_cache
65
- Requires-Dist: serpyco-rs (>=1.10.2,<2.0.0)
66
- Requires-Dist: sphinx-rtd-theme (>=1.0,<1.1) ; extra == "sphinx-docs"
67
- Requires-Dist: sqlalchemy (>=2.0,<3.0,!=2.0.36) ; extra == "sql"
68
- Requires-Dist: tiktoken (==0.8.0) ; extra == "vector-db-based"
69
- Requires-Dist: unstructured.pytesseract (>=0.3.12) ; extra == "file-based"
70
- Requires-Dist: unstructured[docx,pptx] (==0.10.27) ; extra == "file-based"
71
- Requires-Dist: wcmatch (==10.0)
72
- Requires-Dist: xmltodict (>=0.13.0,<0.14.0)
73
- Project-URL: Documentation, https://docs.airbyte.io/
74
- Project-URL: Repository, https://github.com/airbytehq/airbyte-python-cdk
75
- Description-Content-Type: text/markdown
76
-
77
- # Airbyte Python CDK and Low-Code CDK
78
-
79
- Airbyte Python CDK is a framework for building Airbyte API Source Connectors. It provides a set of
80
- classes and helpers that make it easy to build a connector against an HTTP API (REST, GraphQL, etc),
81
- or a generic Python source connector.
82
-
83
- ## Building Connectors with the CDK
84
-
85
- If you're looking to build a connector, we highly recommend that you first
86
- [start with the Connector Builder](https://docs.airbyte.com/connector-development/connector-builder-ui/overview).
87
- It should be enough for 90% connectors out there. For more flexible and complex connectors, use the
88
- [low-code CDK and `SourceDeclarativeManifest`](https://docs.airbyte.com/connector-development/config-based/low-code-cdk-overview).
89
-
90
- For more information on building connectors, please see the [Connector Development](https://docs.airbyte.com/connector-development/) guide on [docs.airbyte.com](https://docs.airbyte.com).
91
-
92
- ## Python CDK Overview
93
-
94
- Airbyte CDK code is within `airbyte_cdk` directory. Here's a high level overview of what's inside:
95
-
96
- - `airbyte_cdk/connector_builder`. Internal wrapper that helps the Connector Builder platform run a declarative manifest (low-code connector). You should not use this code directly. If you need to run a `SourceDeclarativeManifest`, take a look at [`source-declarative-manifest`](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-declarative-manifest) connector implementation instead.
97
- - `airbyte_cdk/cli/source_declarative_manifest`. This module defines the `source-declarative-manifest` (aka "SDM") connector execution logic and associated CLI.
98
- - `airbyte_cdk/destinations`. Basic Destination connector support! If you're building a Destination connector in Python, try that. Some of our vector DB destinations like `destination-pinecone` are using that code.
99
- - `airbyte_cdk/models` expose `airbyte_protocol.models` as a part of `airbyte_cdk` package.
100
- - `airbyte_cdk/sources/concurrent_source` is the Concurrent CDK implementation. It supports reading data from streams concurrently per slice / partition, useful for connectors with high throughput and high number of records.
101
- - `airbyte_cdk/sources/declarative` is the low-code CDK. It works on top of Airbyte Python CDK, but provides a declarative manifest language to define streams, operations, etc. This makes it easier to build connectors without writing Python code.
102
- - `airbyte_cdk/sources/file_based` is the CDK for file-based sources. Examples include S3, Azure, GCS, etc.
103
-
104
- ## Contributing
105
-
106
- For instructions on how to contribute, please see our [Contributing Guide](docs/CONTRIBUTING.md).
107
-
108
- ## Release Management
109
-
110
- Please see the [Release Management](docs/RELEASES.md) guide for information on how to perform releases and pre-releases.
111
-