airbyte-source-azure-blob-storage 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-azure-blob-storage might be problematic. Click here for more details.
- airbyte_source_azure_blob_storage-0.3.5.dist-info/METADATA +112 -0
- airbyte_source_azure_blob_storage-0.3.5.dist-info/RECORD +10 -0
- {airbyte_source_azure_blob_storage-0.3.4.dist-info → airbyte_source_azure_blob_storage-0.3.5.dist-info}/WHEEL +1 -2
- airbyte_source_azure_blob_storage-0.3.5.dist-info/entry_points.txt +3 -0
- source_azure_blob_storage/run.py +2 -0
- airbyte_source_azure_blob_storage-0.3.4.dist-info/METADATA +0 -142
- airbyte_source_azure_blob_storage-0.3.4.dist-info/RECORD +0 -24
- airbyte_source_azure_blob_storage-0.3.4.dist-info/entry_points.txt +0 -2
- airbyte_source_azure_blob_storage-0.3.4.dist-info/top_level.txt +0 -2
- integration_tests/__init__.py +0 -3
- integration_tests/abnormal_states/avro.json +0 -12
- integration_tests/abnormal_states/csv.json +0 -12
- integration_tests/abnormal_states/jsonl.json +0 -12
- integration_tests/abnormal_states/jsonl_newlines.json +0 -14
- integration_tests/abnormal_states/parquet.json +0 -18
- integration_tests/acceptance.py +0 -16
- integration_tests/configured_catalog.json +0 -14
- integration_tests/configured_catalogs/avro.json +0 -38
- integration_tests/configured_catalogs/csv.json +0 -35
- integration_tests/configured_catalogs/jsonl.json +0 -41
- integration_tests/configured_catalogs/parquet.json +0 -74
- integration_tests/spec.json +0 -387
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: airbyte-source-azure-blob-storage
|
|
3
|
+
Version: 0.3.5
|
|
4
|
+
Summary: Source implementation for Azure Blob Storage.
|
|
5
|
+
Home-page: https://airbyte.com
|
|
6
|
+
License: MIT
|
|
7
|
+
Author: Airbyte
|
|
8
|
+
Author-email: contact@airbyte.io
|
|
9
|
+
Requires-Python: >=3.9,<3.12
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Requires-Dist: airbyte-cdk[file-based] (==0.75)
|
|
16
|
+
Requires-Dist: pytz (==2024.1)
|
|
17
|
+
Requires-Dist: smart-open[azure] (==6.4.0)
|
|
18
|
+
Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/azure-blob-storage
|
|
19
|
+
Project-URL: Repository, https://github.com/airbytehq/airbyte
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# Azure-Blob-Storage source connector
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
This is the repository for the Azure-Blob-Storage source connector, written in Python.
|
|
26
|
+
For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/sources/azure-blob-storage).
|
|
27
|
+
|
|
28
|
+
## Local development
|
|
29
|
+
|
|
30
|
+
### Prerequisites
|
|
31
|
+
* Python (~=3.9)
|
|
32
|
+
* Poetry (~=1.7) - installation instructions [here](https://python-poetry.org/docs/#installation)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
### Installing the connector
|
|
36
|
+
From this connector directory, run:
|
|
37
|
+
```bash
|
|
38
|
+
poetry install --with dev
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
### Create credentials
|
|
43
|
+
**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/sources/azure-blob-storage)
|
|
44
|
+
to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_azure_blob_storage/spec.yaml` file.
|
|
45
|
+
Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information.
|
|
46
|
+
See `sample_files/sample_config.json` for a sample config file.
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
### Locally running the connector
|
|
50
|
+
```
|
|
51
|
+
poetry run source-azure-blob-storage spec
|
|
52
|
+
poetry run source-azure-blob-storage check --config secrets/config.json
|
|
53
|
+
poetry run source-azure-blob-storage discover --config secrets/config.json
|
|
54
|
+
poetry run source-azure-blob-storage read --config secrets/config.json --catalog sample_files/configured_catalog.json
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Running unit tests
|
|
58
|
+
To run unit tests locally, from the connector directory run:
|
|
59
|
+
```
|
|
60
|
+
poetry run pytest unit_tests
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Building the docker image
|
|
64
|
+
1. Install [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md)
|
|
65
|
+
2. Run the following command to build the docker image:
|
|
66
|
+
```bash
|
|
67
|
+
airbyte-ci connectors --name=source-azure-blob-storage build
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
An image will be available on your host with the tag `airbyte/source-azure-blob-storage:dev`.
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
### Running as a docker container
|
|
74
|
+
Then run any of the connector commands as follows:
|
|
75
|
+
```
|
|
76
|
+
docker run --rm airbyte/source-azure-blob-storage:dev spec
|
|
77
|
+
docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-azure-blob-storage:dev check --config /secrets/config.json
|
|
78
|
+
docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-azure-blob-storage:dev discover --config /secrets/config.json
|
|
79
|
+
docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-azure-blob-storage:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Running our CI test suite
|
|
83
|
+
You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md):
|
|
84
|
+
```bash
|
|
85
|
+
airbyte-ci connectors --name=source-azure-blob-storage test
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Customizing acceptance Tests
|
|
89
|
+
Customize `acceptance-test-config.yml` file to configure acceptance tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information.
|
|
90
|
+
If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py.
|
|
91
|
+
|
|
92
|
+
### Dependency Management
|
|
93
|
+
All of your dependencies should be managed via Poetry.
|
|
94
|
+
To add a new dependency, run:
|
|
95
|
+
```bash
|
|
96
|
+
poetry add <package-name>
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Please commit the changes to `pyproject.toml` and `poetry.lock` files.
|
|
100
|
+
|
|
101
|
+
## Publishing a new version of the connector
|
|
102
|
+
You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what?
|
|
103
|
+
1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=source-azure-blob-storage test`
|
|
104
|
+
2. Bump the connector version (please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors)):
|
|
105
|
+
- bump the `dockerImageTag` value in in `metadata.yaml`
|
|
106
|
+
- bump the `version` value in `pyproject.toml`
|
|
107
|
+
3. Make sure the `metadata.yaml` content is up to date.
|
|
108
|
+
4. Make sure the connector documentation and its changelog is up to date (`docs/integrations/sources/azure-blob-storage.md`).
|
|
109
|
+
5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention).
|
|
110
|
+
6. Pat yourself on the back for being an awesome contributor.
|
|
111
|
+
7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master.
|
|
112
|
+
8. Once your PR is merged, the new version of the connector will be automatically published to Docker Hub and our connector registry.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
source_azure_blob_storage/__init__.py,sha256=dUrGWNqeIfK3e4yI6dgpIzSi8d03kItsUWCHQcmxXlg,281
|
|
2
|
+
source_azure_blob_storage/config.py,sha256=fg1fitDbsX-LXtPGBmG3bfu-jb0XoFYqHxGWsYY6Uy8,2499
|
|
3
|
+
source_azure_blob_storage/legacy_config_transformer.py,sha256=yiS4GgjaVvJtedmweAkq-eEdPG0RWWAAr0T9UV4RmOQ,1306
|
|
4
|
+
source_azure_blob_storage/run.py,sha256=xam3N4LxPECFb0HTpJKIrmr8R_bVJmKiqEN4sluK4yk,1758
|
|
5
|
+
source_azure_blob_storage/source.py,sha256=dt6BmjpDsxjeSlm6BccfFD3NJblEmDk4gUiJnvjcU40,1186
|
|
6
|
+
source_azure_blob_storage/stream_reader.py,sha256=HA9jNcsMKK-5TVlVelpV2JmjxSf0OfqxC0yBEUtdEe0,2940
|
|
7
|
+
airbyte_source_azure_blob_storage-0.3.5.dist-info/METADATA,sha256=aobYZFqV7OVJs68wTuObNEYRPEd7zJ_NDElXom1-aY0,5536
|
|
8
|
+
airbyte_source_azure_blob_storage-0.3.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
9
|
+
airbyte_source_azure_blob_storage-0.3.5.dist-info/entry_points.txt,sha256=75v_DA_Xu0qr0eqtEXyh8sPCqcL9eXKWY8UwdST3ANE,79
|
|
10
|
+
airbyte_source_azure_blob_storage-0.3.5.dist-info/RECORD,,
|
source_azure_blob_storage/run.py
CHANGED
|
@@ -8,6 +8,7 @@ from datetime import datetime
|
|
|
8
8
|
|
|
9
9
|
from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
|
|
10
10
|
from airbyte_cdk.models import AirbyteErrorTraceMessage, AirbyteMessage, AirbyteTraceMessage, TraceType, Type
|
|
11
|
+
from airbyte_cdk.sources.file_based.stream.cursor import DefaultFileBasedCursor
|
|
11
12
|
from source_azure_blob_storage import Config, SourceAzureBlobStorage, SourceAzureBlobStorageStreamReader
|
|
12
13
|
|
|
13
14
|
|
|
@@ -23,6 +24,7 @@ def run():
|
|
|
23
24
|
SourceAzureBlobStorage.read_catalog(catalog_path) if catalog_path else None,
|
|
24
25
|
SourceAzureBlobStorage.read_config(config_path) if catalog_path else None,
|
|
25
26
|
SourceAzureBlobStorage.read_state(state_path) if catalog_path else None,
|
|
27
|
+
cursor_cls=DefaultFileBasedCursor,
|
|
26
28
|
)
|
|
27
29
|
except Exception:
|
|
28
30
|
print(
|
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: airbyte-source-azure-blob-storage
|
|
3
|
-
Version: 0.3.4
|
|
4
|
-
Summary: Source implementation for Azure Blob Storage.
|
|
5
|
-
Author: Airbyte
|
|
6
|
-
Author-email: contact@airbyte.io
|
|
7
|
-
Description-Content-Type: text/markdown
|
|
8
|
-
Requires-Dist: airbyte-cdk[file-based] >=0.61.0
|
|
9
|
-
Requires-Dist: smart-open[azure]
|
|
10
|
-
Requires-Dist: pytz
|
|
11
|
-
Provides-Extra: tests
|
|
12
|
-
Requires-Dist: requests-mock ~=1.9.3 ; extra == 'tests'
|
|
13
|
-
Requires-Dist: pytest-mock ~=3.6.1 ; extra == 'tests'
|
|
14
|
-
Requires-Dist: pytest ~=6.2 ; extra == 'tests'
|
|
15
|
-
|
|
16
|
-
# Azure Blob Storage Source
|
|
17
|
-
|
|
18
|
-
This is the repository for the Azure Blob Storage source connector, written in Python.
|
|
19
|
-
For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/sources/azure-blob-storage).
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
**To iterate on this connector, make sure to complete this prerequisites section.**
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
From this connector directory, create a virtual environment:
|
|
26
|
-
```
|
|
27
|
-
python -m venv .venv
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your
|
|
31
|
-
development environment of choice. To activate it from the terminal, run:
|
|
32
|
-
```
|
|
33
|
-
source .venv/bin/activate
|
|
34
|
-
pip install -r requirements.txt
|
|
35
|
-
```
|
|
36
|
-
If you are in an IDE, follow your IDE's instructions to activate the virtualenv.
|
|
37
|
-
|
|
38
|
-
Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is
|
|
39
|
-
used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`.
|
|
40
|
-
If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything
|
|
41
|
-
should work as you expect.
|
|
42
|
-
|
|
43
|
-
**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/sources/azure-blob-storage)
|
|
44
|
-
to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_azure_blob_storage/spec.yaml` file.
|
|
45
|
-
Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information.
|
|
46
|
-
See `integration_tests/sample_config.json` for a sample config file.
|
|
47
|
-
|
|
48
|
-
**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source azure-blob-storage test creds`
|
|
49
|
-
and place them into `secrets/config.json`.
|
|
50
|
-
|
|
51
|
-
```
|
|
52
|
-
python main.py spec
|
|
53
|
-
python main.py check --config secrets/config.json
|
|
54
|
-
python main.py discover --config secrets/config.json
|
|
55
|
-
python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
The Airbyte way of building this connector is to use our `airbyte-ci` tool.
|
|
62
|
-
You can follow install instructions [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md#L1).
|
|
63
|
-
Then running the following command will build your connector:
|
|
64
|
-
|
|
65
|
-
```bash
|
|
66
|
-
airbyte-ci connectors --name=source-azure-blob-storage build
|
|
67
|
-
```
|
|
68
|
-
Once the command is done, you will find your connector image in your local docker registry: `airbyte/source-azure-blob-storage:dev`.
|
|
69
|
-
|
|
70
|
-
When contributing on our connector you might need to customize the build process to add a system dependency or set an env var.
|
|
71
|
-
You can customize our build process by adding a `build_customization.py` module to your connector.
|
|
72
|
-
This module should contain a `pre_connector_install` and `post_connector_install` async function that will mutate the base image and the connector container respectively.
|
|
73
|
-
It will be imported at runtime by our build process and the functions will be called if they exist.
|
|
74
|
-
|
|
75
|
-
Here is an example of a `build_customization.py` module:
|
|
76
|
-
```python
|
|
77
|
-
from __future__ import annotations
|
|
78
|
-
|
|
79
|
-
from typing import TYPE_CHECKING
|
|
80
|
-
|
|
81
|
-
if TYPE_CHECKING:
|
|
82
|
-
from dagger import Container
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
async def pre_connector_install(base_image_container: Container) -> Container:
|
|
86
|
-
return await base_image_container.with_env_variable("MY_PRE_BUILD_ENV_VAR", "my_pre_build_env_var_value")
|
|
87
|
-
|
|
88
|
-
async def post_connector_install(connector_container: Container) -> Container:
|
|
89
|
-
return await connector_container.with_env_variable("MY_POST_BUILD_ENV_VAR", "my_post_build_env_var_value")
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
This connector is built using our dynamic built process in `airbyte-ci`.
|
|
93
|
-
The base image used to build it is defined within the metadata.yaml file under the `connectorBuildOptions`.
|
|
94
|
-
The build logic is defined using [Dagger](https://dagger.io/) [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/pipelines/builds/python_connectors.py).
|
|
95
|
-
It does not rely on a Dockerfile.
|
|
96
|
-
|
|
97
|
-
If you would like to patch our connector and build your own a simple approach would be to:
|
|
98
|
-
|
|
99
|
-
1. Create your own Dockerfile based on the latest version of the connector image.
|
|
100
|
-
```Dockerfile
|
|
101
|
-
FROM airbyte/source-azure-blob-storage:latest
|
|
102
|
-
|
|
103
|
-
COPY . ./airbyte/integration_code
|
|
104
|
-
RUN pip install ./airbyte/integration_code
|
|
105
|
-
|
|
106
|
-
```
|
|
107
|
-
Please use this as an example. This is not optimized.
|
|
108
|
-
|
|
109
|
-
2. Build your image:
|
|
110
|
-
```bash
|
|
111
|
-
docker build -t airbyte/source-azure-blob-storage:dev .
|
|
112
|
-
docker run airbyte/source-azure-blob-storage:dev spec
|
|
113
|
-
```
|
|
114
|
-
Then run any of the connector commands as follows:
|
|
115
|
-
```
|
|
116
|
-
docker run --rm airbyte/source-azure-blob-storage:dev spec
|
|
117
|
-
docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-azure-blob-storage:dev check --config /secrets/config.json
|
|
118
|
-
docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-azure-blob-storage:dev discover --config /secrets/config.json
|
|
119
|
-
docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-azure-blob-storage:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json
|
|
120
|
-
```
|
|
121
|
-
|
|
122
|
-
You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md):
|
|
123
|
-
```bash
|
|
124
|
-
airbyte-ci connectors --name=source-azure-blob-storage test
|
|
125
|
-
```
|
|
126
|
-
|
|
127
|
-
Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information.
|
|
128
|
-
If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py.
|
|
129
|
-
|
|
130
|
-
All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development.
|
|
131
|
-
We split dependencies between two groups, dependencies that are:
|
|
132
|
-
* required for your connector to work need to go to `MAIN_REQUIREMENTS` list.
|
|
133
|
-
* required for the testing need to go to `TEST_REQUIREMENTS` list
|
|
134
|
-
|
|
135
|
-
You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what?
|
|
136
|
-
1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=source-azure-blob-storage test`
|
|
137
|
-
2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors).
|
|
138
|
-
3. Make sure the `metadata.yaml` content is up to date.
|
|
139
|
-
4. Make the connector documentation and its changelog is up to date (`docs/integrations/sources/azure-blob-storage.md`).
|
|
140
|
-
5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention).
|
|
141
|
-
6. Pat yourself on the back for being an awesome contributor.
|
|
142
|
-
7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master.
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
integration_tests/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
|
2
|
-
integration_tests/acceptance.py,sha256=BihZD7mJlxEjTIji9nRzAF3ACiMNMlPrYiSrhMi8Zow,389
|
|
3
|
-
integration_tests/configured_catalog.json,sha256=ddwBuSPtlK8_0ck2bToVnFbuwUq3qecm2bWibo3vV1s,314
|
|
4
|
-
integration_tests/spec.json,sha256=rksO-JaztL548tO3K_26wt0nmCafAeTdT-XCNfjL0Go,19099
|
|
5
|
-
integration_tests/abnormal_states/avro.json,sha256=9rnZTdDhqvzKPN9l5ZVVimgc_w8x_YWz8BFVb_CcZdw,331
|
|
6
|
-
integration_tests/abnormal_states/csv.json,sha256=1WeyKShPYUsJvmCsxKn8Jl4ixTd711p6E8EGmwh4D4U,329
|
|
7
|
-
integration_tests/abnormal_states/jsonl.json,sha256=ikU5jTHlVKpnaOMPlG6o6CSR6BMOfFgg7W0mGkSan74,333
|
|
8
|
-
integration_tests/abnormal_states/jsonl_newlines.json,sha256=lbLXEyGaLs0JfGIlkP8wqSdujS6hBnx-94hoVSrslKU,369
|
|
9
|
-
integration_tests/abnormal_states/parquet.json,sha256=0pK8U8owP5uzbOnprTSui7RaZ_G-eiVjxGGlhCvfRHE,1159
|
|
10
|
-
integration_tests/configured_catalogs/avro.json,sha256=eu1WMwTLxyZLhGzNG4-2v1SEFATeVgs5SO5AcgAMmQw,1021
|
|
11
|
-
integration_tests/configured_catalogs/csv.json,sha256=TxQ7Seb50F-E3vUpzvPGdy7MtupsbX6BjtUe0FOiGDE,913
|
|
12
|
-
integration_tests/configured_catalogs/jsonl.json,sha256=-8Hsm0RGthF-PocgaL4vBBILv8AqHa2ubL8f0q4PTDk,1076
|
|
13
|
-
integration_tests/configured_catalogs/parquet.json,sha256=7yR6hGUibb9s7mJS2_xm3aCfjr8k-Bt9nMmGdw0O8Nc,2082
|
|
14
|
-
source_azure_blob_storage/__init__.py,sha256=dUrGWNqeIfK3e4yI6dgpIzSi8d03kItsUWCHQcmxXlg,281
|
|
15
|
-
source_azure_blob_storage/config.py,sha256=fg1fitDbsX-LXtPGBmG3bfu-jb0XoFYqHxGWsYY6Uy8,2499
|
|
16
|
-
source_azure_blob_storage/legacy_config_transformer.py,sha256=yiS4GgjaVvJtedmweAkq-eEdPG0RWWAAr0T9UV4RmOQ,1306
|
|
17
|
-
source_azure_blob_storage/run.py,sha256=ttnVIs2ey_Y35ATXP2NeGIZrrV95pgxNSO-R4R34WmA,1631
|
|
18
|
-
source_azure_blob_storage/source.py,sha256=dt6BmjpDsxjeSlm6BccfFD3NJblEmDk4gUiJnvjcU40,1186
|
|
19
|
-
source_azure_blob_storage/stream_reader.py,sha256=HA9jNcsMKK-5TVlVelpV2JmjxSf0OfqxC0yBEUtdEe0,2940
|
|
20
|
-
airbyte_source_azure_blob_storage-0.3.4.dist-info/METADATA,sha256=0xMwiW6yNRYpteSmvZLC5zDp5Az343LHgRZ7Vmhn_Xc,7709
|
|
21
|
-
airbyte_source_azure_blob_storage-0.3.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
22
|
-
airbyte_source_azure_blob_storage-0.3.4.dist-info/entry_points.txt,sha256=nqkqWZ26VaT5Wi0k2smHU0cFBCkM_MMd_BIRmpoS5BI,80
|
|
23
|
-
airbyte_source_azure_blob_storage-0.3.4.dist-info/top_level.txt,sha256=gt74MnBvYMxSGNGDxP6PAD5J0-obkzEipLpN2GUQ_G8,44
|
|
24
|
-
airbyte_source_azure_blob_storage-0.3.4.dist-info/RECORD,,
|
integration_tests/__init__.py
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
[
|
|
2
|
-
{
|
|
3
|
-
"type": "STREAM",
|
|
4
|
-
"stream": {
|
|
5
|
-
"stream_state": {
|
|
6
|
-
"_ab_source_file_last_modified": "2999-01-01T00:00:00.000000Z_test_sample.avro",
|
|
7
|
-
"history": { "test_sample.avro": "2999-01-01T00:00:00.000000Z" }
|
|
8
|
-
},
|
|
9
|
-
"stream_descriptor": { "name": "airbyte-source-azure-blob-storage-test" }
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
]
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
[
|
|
2
|
-
{
|
|
3
|
-
"type": "STREAM",
|
|
4
|
-
"stream": {
|
|
5
|
-
"stream_state": {
|
|
6
|
-
"_ab_source_file_last_modified": "2999-01-01T00:00:00.000000Z_simple_test.csv",
|
|
7
|
-
"history": { "simple_test.csv": "2999-01-01T00:00:00.000000Z" }
|
|
8
|
-
},
|
|
9
|
-
"stream_descriptor": { "name": "airbyte-source-azure-blob-storage-test" }
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
]
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
[
|
|
2
|
-
{
|
|
3
|
-
"type": "STREAM",
|
|
4
|
-
"stream": {
|
|
5
|
-
"stream_state": {
|
|
6
|
-
"_ab_source_file_last_modified": "2999-01-01T00:00:00.000000Z_simple_test.jsonl",
|
|
7
|
-
"history": { "simple_test.jsonl": "2999-01-01T00:00:00.000000Z" }
|
|
8
|
-
},
|
|
9
|
-
"stream_descriptor": { "name": "airbyte-source-azure-blob-storage-test" }
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
]
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
[
|
|
2
|
-
{
|
|
3
|
-
"type": "STREAM",
|
|
4
|
-
"stream": {
|
|
5
|
-
"stream_state": {
|
|
6
|
-
"_ab_source_file_last_modified": "2999-01-01T00:00:00.000000Z_simple_test_newlines.jsonl",
|
|
7
|
-
"history": {
|
|
8
|
-
"simple_test_newlines.jsonl": "2999-01-01T00:00:00.000000Z"
|
|
9
|
-
}
|
|
10
|
-
},
|
|
11
|
-
"stream_descriptor": { "name": "airbyte-source-azure-blob-storage-test" }
|
|
12
|
-
}
|
|
13
|
-
}
|
|
14
|
-
]
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
[
|
|
2
|
-
{
|
|
3
|
-
"type": "STREAM",
|
|
4
|
-
"stream": {
|
|
5
|
-
"stream_state": {
|
|
6
|
-
"_ab_source_file_last_modified": "2999-01-01T00:00:00.000000Z_simple_test.csv",
|
|
7
|
-
"history": {
|
|
8
|
-
"test_payroll/Fiscal_Year=2021/Leave_Status_as_of_June_30=ACTIVE/Pay_Basis=per%20Annum/4e0ea65c5a074c0592e43f7b950f3ce8-0.parquet": "2999-01-01T00:00:00.000000Z",
|
|
9
|
-
"test_payroll/Fiscal_Year=2021/Leave_Status_as_of_June_30=ACTIVE/Pay_Basis=per%20Hour/4e0ea65c5a074c0592e43f7b950f3ce8-0.parquet": "2999-01-01T00:00:00.000000Z",
|
|
10
|
-
"test_payroll/Fiscal_Year=2021/Leave_Status_as_of_June_30=ON%20LEAVE/Pay_Basis=per%20Annum/4e0ea65c5a074c0592e43f7b950f3ce8-0.parquet": "2999-01-01T00:00:00.000000Z",
|
|
11
|
-
"test_payroll/Fiscal_Year=2022/Leave_Status_as_of_June_30=ACTIVE/Pay_Basis=per%20Annum/4e0ea65c5a074c0592e43f7b950f3ce8-0.parquet": "2999-01-01T00:00:00.000000Z",
|
|
12
|
-
"test_payroll/Fiscal_Year=2022/Leave_Status_as_of_June_30=ON%20LEAVE/Pay_Basis=per%20Annum/4e0ea65c5a074c0592e43f7b950f3ce8-0.parquet": "2999-01-01T00:00:00.000000Z"
|
|
13
|
-
}
|
|
14
|
-
},
|
|
15
|
-
"stream_descriptor": { "name": "airbyte-source-azure-blob-storage-test" }
|
|
16
|
-
}
|
|
17
|
-
}
|
|
18
|
-
]
|
integration_tests/acceptance.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
-
#
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
import pytest
|
|
7
|
-
|
|
8
|
-
pytest_plugins = ("connector_acceptance_test.plugin",)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@pytest.fixture(scope="session", autouse=True)
|
|
12
|
-
def connector_setup():
|
|
13
|
-
"""This fixture is a placeholder for external resources that acceptance test might require."""
|
|
14
|
-
# TODO: setup test dependencies
|
|
15
|
-
yield
|
|
16
|
-
# TODO: clean up test dependencies
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"streams": [
|
|
3
|
-
{
|
|
4
|
-
"stream": {
|
|
5
|
-
"name": "airbyte-source-azure-blob-storage-test",
|
|
6
|
-
"json_schema": {},
|
|
7
|
-
"supported_sync_modes": ["full_refresh"],
|
|
8
|
-
"source_defined_cursor": false
|
|
9
|
-
},
|
|
10
|
-
"sync_mode": "full_refresh",
|
|
11
|
-
"destination_sync_mode": "overwrite"
|
|
12
|
-
}
|
|
13
|
-
]
|
|
14
|
-
}
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"streams": [
|
|
3
|
-
{
|
|
4
|
-
"stream": {
|
|
5
|
-
"name": "airbyte-source-azure-blob-storage-test",
|
|
6
|
-
"json_schema": {
|
|
7
|
-
"type": "object",
|
|
8
|
-
"properties": {
|
|
9
|
-
"id": {
|
|
10
|
-
"type": ["integer", "null"]
|
|
11
|
-
},
|
|
12
|
-
"fullname_and_valid": {
|
|
13
|
-
"type": ["object", "null"],
|
|
14
|
-
"fullname": {
|
|
15
|
-
"type": ["string", "null"]
|
|
16
|
-
},
|
|
17
|
-
"valid": {
|
|
18
|
-
"type": ["boolean", "null"]
|
|
19
|
-
}
|
|
20
|
-
},
|
|
21
|
-
"_ab_source_file_last_modified": {
|
|
22
|
-
"type": "string",
|
|
23
|
-
"format": "date-time"
|
|
24
|
-
},
|
|
25
|
-
"_ab_source_file_url": {
|
|
26
|
-
"type": "string"
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
},
|
|
30
|
-
"supported_sync_modes": ["full_refresh", "incremental"],
|
|
31
|
-
"source_defined_cursor": true,
|
|
32
|
-
"default_cursor_field": ["_ab_source_file_last_modified"]
|
|
33
|
-
},
|
|
34
|
-
"sync_mode": "incremental",
|
|
35
|
-
"destination_sync_mode": "append"
|
|
36
|
-
}
|
|
37
|
-
]
|
|
38
|
-
}
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"streams": [
|
|
3
|
-
{
|
|
4
|
-
"stream": {
|
|
5
|
-
"name": "airbyte-source-azure-blob-storage-test",
|
|
6
|
-
"json_schema": {
|
|
7
|
-
"type": "object",
|
|
8
|
-
"properties": {
|
|
9
|
-
"id": {
|
|
10
|
-
"type": ["null", "integer"]
|
|
11
|
-
},
|
|
12
|
-
"name": {
|
|
13
|
-
"type": ["null", "string"]
|
|
14
|
-
},
|
|
15
|
-
"valid": {
|
|
16
|
-
"type": ["null", "boolean"]
|
|
17
|
-
},
|
|
18
|
-
"_ab_source_file_last_modified": {
|
|
19
|
-
"type": "string",
|
|
20
|
-
"format": "date-time"
|
|
21
|
-
},
|
|
22
|
-
"_ab_source_file_url": {
|
|
23
|
-
"type": "string"
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
},
|
|
27
|
-
"supported_sync_modes": ["full_refresh", "incremental"],
|
|
28
|
-
"source_defined_cursor": true,
|
|
29
|
-
"default_cursor_field": ["_ab_source_file_last_modified"]
|
|
30
|
-
},
|
|
31
|
-
"sync_mode": "incremental",
|
|
32
|
-
"destination_sync_mode": "append"
|
|
33
|
-
}
|
|
34
|
-
]
|
|
35
|
-
}
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"streams": [
|
|
3
|
-
{
|
|
4
|
-
"stream": {
|
|
5
|
-
"name": "airbyte-source-azure-blob-storage-test",
|
|
6
|
-
"json_schema": {
|
|
7
|
-
"type": "object",
|
|
8
|
-
"properties": {
|
|
9
|
-
"id": {
|
|
10
|
-
"type": ["null", "integer"]
|
|
11
|
-
},
|
|
12
|
-
"name": {
|
|
13
|
-
"type": ["null", "string"]
|
|
14
|
-
},
|
|
15
|
-
"valid": {
|
|
16
|
-
"type": ["null", "boolean"]
|
|
17
|
-
},
|
|
18
|
-
"value": {
|
|
19
|
-
"type": ["null", "number"]
|
|
20
|
-
},
|
|
21
|
-
"event_date": {
|
|
22
|
-
"type": ["null", "string"]
|
|
23
|
-
},
|
|
24
|
-
"_ab_source_file_last_modified": {
|
|
25
|
-
"type": "string",
|
|
26
|
-
"format": "date-time"
|
|
27
|
-
},
|
|
28
|
-
"_ab_source_file_url": {
|
|
29
|
-
"type": "string"
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
},
|
|
33
|
-
"supported_sync_modes": ["full_refresh", "incremental"],
|
|
34
|
-
"source_defined_cursor": true,
|
|
35
|
-
"default_cursor_field": ["_ab_source_file_last_modified"]
|
|
36
|
-
},
|
|
37
|
-
"sync_mode": "incremental",
|
|
38
|
-
"destination_sync_mode": "append"
|
|
39
|
-
}
|
|
40
|
-
]
|
|
41
|
-
}
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"streams": [
|
|
3
|
-
{
|
|
4
|
-
"stream": {
|
|
5
|
-
"name": "airbyte-source-azure-blob-storage-test",
|
|
6
|
-
"json_schema": {
|
|
7
|
-
"type": "object",
|
|
8
|
-
"properties": {
|
|
9
|
-
"Payroll_Number": {
|
|
10
|
-
"type": ["null", "number"]
|
|
11
|
-
},
|
|
12
|
-
"Last_Name": {
|
|
13
|
-
"type": ["null", "string"]
|
|
14
|
-
},
|
|
15
|
-
"First_Name": {
|
|
16
|
-
"type": ["null", "string"]
|
|
17
|
-
},
|
|
18
|
-
"Mid_Init": {
|
|
19
|
-
"type": ["null", "string"]
|
|
20
|
-
},
|
|
21
|
-
"Agency_Start_Date": {
|
|
22
|
-
"type": ["null", "string"]
|
|
23
|
-
},
|
|
24
|
-
"Work_Location_Borough": {
|
|
25
|
-
"type": ["null", "number"]
|
|
26
|
-
},
|
|
27
|
-
"Title_Description": {
|
|
28
|
-
"type": ["null", "string"]
|
|
29
|
-
},
|
|
30
|
-
"Base_Salary": {
|
|
31
|
-
"type": ["null", "number"]
|
|
32
|
-
},
|
|
33
|
-
"Regular_Hours": {
|
|
34
|
-
"type": ["null", "number"]
|
|
35
|
-
},
|
|
36
|
-
"Regular_Gross_Paid": {
|
|
37
|
-
"type": ["null", "number"]
|
|
38
|
-
},
|
|
39
|
-
"OT_Hours": {
|
|
40
|
-
"type": ["null", "number"]
|
|
41
|
-
},
|
|
42
|
-
"Total_OT_Paid": {
|
|
43
|
-
"type": ["null", "number"]
|
|
44
|
-
},
|
|
45
|
-
"Total_Other_Pay": {
|
|
46
|
-
"type": ["null", "number"]
|
|
47
|
-
},
|
|
48
|
-
"Fiscal_Year": {
|
|
49
|
-
"type": ["null", "string"]
|
|
50
|
-
},
|
|
51
|
-
"Leave_Status_as_of_June_30": {
|
|
52
|
-
"type": ["null", "string"]
|
|
53
|
-
},
|
|
54
|
-
"Pay_Basis": {
|
|
55
|
-
"type": ["null", "string"]
|
|
56
|
-
},
|
|
57
|
-
"_ab_source_file_last_modified": {
|
|
58
|
-
"type": "string",
|
|
59
|
-
"format": "date-time"
|
|
60
|
-
},
|
|
61
|
-
"_ab_source_file_url": {
|
|
62
|
-
"type": "string"
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
},
|
|
66
|
-
"supported_sync_modes": ["full_refresh", "incremental"],
|
|
67
|
-
"source_defined_cursor": true,
|
|
68
|
-
"default_cursor_field": ["_ab_source_file_last_modified"]
|
|
69
|
-
},
|
|
70
|
-
"sync_mode": "incremental",
|
|
71
|
-
"destination_sync_mode": "append"
|
|
72
|
-
}
|
|
73
|
-
]
|
|
74
|
-
}
|
integration_tests/spec.json
DELETED
|
@@ -1,387 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"documentationUrl": "https://docs.airbyte.com/integrations/sources/azure-blob-storage",
|
|
3
|
-
"connectionSpecification": {
|
|
4
|
-
"title": "Config",
|
|
5
|
-
"description": "NOTE: When this Spec is changed, legacy_config_transformer.py must also be modified to uptake the changes\nbecause it is responsible for converting legacy Azure Blob Storage v0 configs into v1 configs using the File-Based CDK.",
|
|
6
|
-
"type": "object",
|
|
7
|
-
"properties": {
|
|
8
|
-
"start_date": {
|
|
9
|
-
"title": "Start Date",
|
|
10
|
-
"description": "UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.",
|
|
11
|
-
"examples": ["2021-01-01T00:00:00.000000Z"],
|
|
12
|
-
"format": "date-time",
|
|
13
|
-
"pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{6}Z$",
|
|
14
|
-
"pattern_descriptor": "YYYY-MM-DDTHH:mm:ss.SSSSSSZ",
|
|
15
|
-
"order": 1,
|
|
16
|
-
"type": "string"
|
|
17
|
-
},
|
|
18
|
-
"streams": {
|
|
19
|
-
"title": "The list of streams to sync",
|
|
20
|
-
"description": "Each instance of this configuration defines a <a href=\"https://docs.airbyte.com/cloud/core-concepts#stream\">stream</a>. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.",
|
|
21
|
-
"order": 10,
|
|
22
|
-
"type": "array",
|
|
23
|
-
"items": {
|
|
24
|
-
"title": "FileBasedStreamConfig",
|
|
25
|
-
"type": "object",
|
|
26
|
-
"properties": {
|
|
27
|
-
"name": {
|
|
28
|
-
"title": "Name",
|
|
29
|
-
"description": "The name of the stream.",
|
|
30
|
-
"type": "string"
|
|
31
|
-
},
|
|
32
|
-
"globs": {
|
|
33
|
-
"title": "Globs",
|
|
34
|
-
"default": ["**"],
|
|
35
|
-
"order": 1,
|
|
36
|
-
"description": "The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look <a href=\"https://en.wikipedia.org/wiki/Glob_(programming)\">here</a>.",
|
|
37
|
-
"type": "array",
|
|
38
|
-
"items": {
|
|
39
|
-
"type": "string"
|
|
40
|
-
}
|
|
41
|
-
},
|
|
42
|
-
"legacy_prefix": {
|
|
43
|
-
"title": "Legacy Prefix",
|
|
44
|
-
"description": "The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.",
|
|
45
|
-
"airbyte_hidden": true,
|
|
46
|
-
"type": "string"
|
|
47
|
-
},
|
|
48
|
-
"validation_policy": {
|
|
49
|
-
"title": "Validation Policy",
|
|
50
|
-
"description": "The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema.",
|
|
51
|
-
"default": "Emit Record",
|
|
52
|
-
"enum": ["Emit Record", "Skip Record", "Wait for Discover"]
|
|
53
|
-
},
|
|
54
|
-
"input_schema": {
|
|
55
|
-
"title": "Input Schema",
|
|
56
|
-
"description": "The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.",
|
|
57
|
-
"type": "string"
|
|
58
|
-
},
|
|
59
|
-
"primary_key": {
|
|
60
|
-
"title": "Primary Key",
|
|
61
|
-
"description": "The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.",
|
|
62
|
-
"type": "string",
|
|
63
|
-
"airbyte_hidden": true
|
|
64
|
-
},
|
|
65
|
-
"days_to_sync_if_history_is_full": {
|
|
66
|
-
"title": "Days To Sync If History Is Full",
|
|
67
|
-
"description": "When the state history of the file store is full, syncs will only read files that were last modified in the provided day range.",
|
|
68
|
-
"default": 3,
|
|
69
|
-
"type": "integer"
|
|
70
|
-
},
|
|
71
|
-
"format": {
|
|
72
|
-
"title": "Format",
|
|
73
|
-
"description": "The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.",
|
|
74
|
-
"type": "object",
|
|
75
|
-
"oneOf": [
|
|
76
|
-
{
|
|
77
|
-
"title": "Avro Format",
|
|
78
|
-
"type": "object",
|
|
79
|
-
"properties": {
|
|
80
|
-
"filetype": {
|
|
81
|
-
"title": "Filetype",
|
|
82
|
-
"default": "avro",
|
|
83
|
-
"const": "avro",
|
|
84
|
-
"type": "string"
|
|
85
|
-
},
|
|
86
|
-
"double_as_string": {
|
|
87
|
-
"title": "Convert Double Fields to Strings",
|
|
88
|
-
"description": "Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers.",
|
|
89
|
-
"default": false,
|
|
90
|
-
"type": "boolean"
|
|
91
|
-
}
|
|
92
|
-
},
|
|
93
|
-
"required": ["filetype"]
|
|
94
|
-
},
|
|
95
|
-
{
|
|
96
|
-
"title": "CSV Format",
|
|
97
|
-
"type": "object",
|
|
98
|
-
"properties": {
|
|
99
|
-
"filetype": {
|
|
100
|
-
"title": "Filetype",
|
|
101
|
-
"default": "csv",
|
|
102
|
-
"const": "csv",
|
|
103
|
-
"type": "string"
|
|
104
|
-
},
|
|
105
|
-
"delimiter": {
|
|
106
|
-
"title": "Delimiter",
|
|
107
|
-
"description": "The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\\t'.",
|
|
108
|
-
"default": ",",
|
|
109
|
-
"type": "string"
|
|
110
|
-
},
|
|
111
|
-
"quote_char": {
|
|
112
|
-
"title": "Quote Character",
|
|
113
|
-
"description": "The character used for quoting CSV values. To disallow quoting, make this field blank.",
|
|
114
|
-
"default": "\"",
|
|
115
|
-
"type": "string"
|
|
116
|
-
},
|
|
117
|
-
"escape_char": {
|
|
118
|
-
"title": "Escape Character",
|
|
119
|
-
"description": "The character used for escaping special characters. To disallow escaping, leave this field blank.",
|
|
120
|
-
"type": "string"
|
|
121
|
-
},
|
|
122
|
-
"encoding": {
|
|
123
|
-
"title": "Encoding",
|
|
124
|
-
"description": "The character encoding of the CSV data. Leave blank to default to <strong>UTF8</strong>. See <a href=\"https://docs.python.org/3/library/codecs.html#standard-encodings\" target=\"_blank\">list of python encodings</a> for allowable options.",
|
|
125
|
-
"default": "utf8",
|
|
126
|
-
"type": "string"
|
|
127
|
-
},
|
|
128
|
-
"double_quote": {
|
|
129
|
-
"title": "Double Quote",
|
|
130
|
-
"description": "Whether two quotes in a quoted CSV value denote a single quote in the data.",
|
|
131
|
-
"default": true,
|
|
132
|
-
"type": "boolean"
|
|
133
|
-
},
|
|
134
|
-
"null_values": {
|
|
135
|
-
"title": "Null Values",
|
|
136
|
-
"description": "A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field.",
|
|
137
|
-
"default": [],
|
|
138
|
-
"type": "array",
|
|
139
|
-
"items": {
|
|
140
|
-
"type": "string"
|
|
141
|
-
},
|
|
142
|
-
"uniqueItems": true
|
|
143
|
-
},
|
|
144
|
-
"strings_can_be_null": {
|
|
145
|
-
"title": "Strings Can Be Null",
|
|
146
|
-
"description": "Whether strings can be interpreted as null values. If true, strings that match the null_values set will be interpreted as null. If false, strings that match the null_values set will be interpreted as the string itself.",
|
|
147
|
-
"default": true,
|
|
148
|
-
"type": "boolean"
|
|
149
|
-
},
|
|
150
|
-
"skip_rows_before_header": {
|
|
151
|
-
"title": "Skip Rows Before Header",
|
|
152
|
-
"description": "The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field.",
|
|
153
|
-
"default": 0,
|
|
154
|
-
"type": "integer"
|
|
155
|
-
},
|
|
156
|
-
"skip_rows_after_header": {
|
|
157
|
-
"title": "Skip Rows After Header",
|
|
158
|
-
"description": "The number of rows to skip after the header row.",
|
|
159
|
-
"default": 0,
|
|
160
|
-
"type": "integer"
|
|
161
|
-
},
|
|
162
|
-
"header_definition": {
|
|
163
|
-
"title": "CSV Header Definition",
|
|
164
|
-
"description": "How headers will be defined. `User Provided` assumes the CSV does not have a header row and uses the headers provided and `Autogenerated` assumes the CSV does not have a header row and the CDK will generate headers using for `f{i}` where `i` is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.",
|
|
165
|
-
"default": {
|
|
166
|
-
"header_definition_type": "From CSV"
|
|
167
|
-
},
|
|
168
|
-
"oneOf": [
|
|
169
|
-
{
|
|
170
|
-
"title": "From CSV",
|
|
171
|
-
"type": "object",
|
|
172
|
-
"properties": {
|
|
173
|
-
"header_definition_type": {
|
|
174
|
-
"title": "Header Definition Type",
|
|
175
|
-
"default": "From CSV",
|
|
176
|
-
"const": "From CSV",
|
|
177
|
-
"type": "string"
|
|
178
|
-
}
|
|
179
|
-
},
|
|
180
|
-
"required": ["header_definition_type"]
|
|
181
|
-
},
|
|
182
|
-
{
|
|
183
|
-
"title": "Autogenerated",
|
|
184
|
-
"type": "object",
|
|
185
|
-
"properties": {
|
|
186
|
-
"header_definition_type": {
|
|
187
|
-
"title": "Header Definition Type",
|
|
188
|
-
"default": "Autogenerated",
|
|
189
|
-
"const": "Autogenerated",
|
|
190
|
-
"type": "string"
|
|
191
|
-
}
|
|
192
|
-
},
|
|
193
|
-
"required": ["header_definition_type"]
|
|
194
|
-
},
|
|
195
|
-
{
|
|
196
|
-
"title": "User Provided",
|
|
197
|
-
"type": "object",
|
|
198
|
-
"properties": {
|
|
199
|
-
"header_definition_type": {
|
|
200
|
-
"title": "Header Definition Type",
|
|
201
|
-
"default": "User Provided",
|
|
202
|
-
"const": "User Provided",
|
|
203
|
-
"type": "string"
|
|
204
|
-
},
|
|
205
|
-
"column_names": {
|
|
206
|
-
"title": "Column Names",
|
|
207
|
-
"description": "The column names that will be used while emitting the CSV records",
|
|
208
|
-
"type": "array",
|
|
209
|
-
"items": {
|
|
210
|
-
"type": "string"
|
|
211
|
-
}
|
|
212
|
-
}
|
|
213
|
-
},
|
|
214
|
-
"required": ["column_names", "header_definition_type"]
|
|
215
|
-
}
|
|
216
|
-
],
|
|
217
|
-
"type": "object"
|
|
218
|
-
},
|
|
219
|
-
"true_values": {
|
|
220
|
-
"title": "True Values",
|
|
221
|
-
"description": "A set of case-sensitive strings that should be interpreted as true values.",
|
|
222
|
-
"default": ["y", "yes", "t", "true", "on", "1"],
|
|
223
|
-
"type": "array",
|
|
224
|
-
"items": {
|
|
225
|
-
"type": "string"
|
|
226
|
-
},
|
|
227
|
-
"uniqueItems": true
|
|
228
|
-
},
|
|
229
|
-
"false_values": {
|
|
230
|
-
"title": "False Values",
|
|
231
|
-
"description": "A set of case-sensitive strings that should be interpreted as false values.",
|
|
232
|
-
"default": ["n", "no", "f", "false", "off", "0"],
|
|
233
|
-
"type": "array",
|
|
234
|
-
"items": {
|
|
235
|
-
"type": "string"
|
|
236
|
-
},
|
|
237
|
-
"uniqueItems": true
|
|
238
|
-
},
|
|
239
|
-
"inference_type": {
|
|
240
|
-
"title": "Inference Type",
|
|
241
|
-
"description": "How to infer the types of the columns. If none, inference default to strings.",
|
|
242
|
-
"default": "None",
|
|
243
|
-
"airbyte_hidden": true,
|
|
244
|
-
"enum": ["None", "Primitive Types Only"]
|
|
245
|
-
}
|
|
246
|
-
},
|
|
247
|
-
"required": ["filetype"]
|
|
248
|
-
},
|
|
249
|
-
{
|
|
250
|
-
"title": "Jsonl Format",
|
|
251
|
-
"type": "object",
|
|
252
|
-
"properties": {
|
|
253
|
-
"filetype": {
|
|
254
|
-
"title": "Filetype",
|
|
255
|
-
"default": "jsonl",
|
|
256
|
-
"const": "jsonl",
|
|
257
|
-
"type": "string"
|
|
258
|
-
}
|
|
259
|
-
},
|
|
260
|
-
"required": ["filetype"]
|
|
261
|
-
},
|
|
262
|
-
{
|
|
263
|
-
"title": "Parquet Format",
|
|
264
|
-
"type": "object",
|
|
265
|
-
"properties": {
|
|
266
|
-
"filetype": {
|
|
267
|
-
"title": "Filetype",
|
|
268
|
-
"default": "parquet",
|
|
269
|
-
"const": "parquet",
|
|
270
|
-
"type": "string"
|
|
271
|
-
},
|
|
272
|
-
"decimal_as_float": {
|
|
273
|
-
"title": "Convert Decimal Fields to Floats",
|
|
274
|
-
"description": "Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended.",
|
|
275
|
-
"default": false,
|
|
276
|
-
"type": "boolean"
|
|
277
|
-
}
|
|
278
|
-
},
|
|
279
|
-
"required": ["filetype"]
|
|
280
|
-
},
|
|
281
|
-
{
|
|
282
|
-
"title": "Document File Type Format (Experimental)",
|
|
283
|
-
"type": "object",
|
|
284
|
-
"properties": {
|
|
285
|
-
"filetype": {
|
|
286
|
-
"title": "Filetype",
|
|
287
|
-
"default": "unstructured",
|
|
288
|
-
"const": "unstructured",
|
|
289
|
-
"type": "string"
|
|
290
|
-
},
|
|
291
|
-
"skip_unprocessable_files": {
|
|
292
|
-
"type": "boolean",
|
|
293
|
-
"default": true,
|
|
294
|
-
"title": "Skip Unprocessable Files",
|
|
295
|
-
"description": "If true, skip files that cannot be parsed and pass the error message along as the _ab_source_file_parse_error field. If false, fail the sync.",
|
|
296
|
-
"always_show": true
|
|
297
|
-
},
|
|
298
|
-
"strategy": {
|
|
299
|
-
"type": "string",
|
|
300
|
-
"always_show": true,
|
|
301
|
-
"order": 0,
|
|
302
|
-
"default": "auto",
|
|
303
|
-
"title": "Parsing Strategy",
|
|
304
|
-
"enum": ["auto", "fast", "ocr_only", "hi_res"],
|
|
305
|
-
"description": "The strategy used to parse documents. `fast` extracts text directly from the document which doesn't work for all files. `ocr_only` is more reliable, but slower. `hi_res` is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf"
|
|
306
|
-
},
|
|
307
|
-
"processing": {
|
|
308
|
-
"title": "Processing",
|
|
309
|
-
"description": "Processing configuration",
|
|
310
|
-
"default": {
|
|
311
|
-
"mode": "local"
|
|
312
|
-
},
|
|
313
|
-
"type": "object",
|
|
314
|
-
"oneOf": [
|
|
315
|
-
{
|
|
316
|
-
"title": "Local",
|
|
317
|
-
"type": "object",
|
|
318
|
-
"properties": {
|
|
319
|
-
"mode": {
|
|
320
|
-
"title": "Mode",
|
|
321
|
-
"default": "local",
|
|
322
|
-
"const": "local",
|
|
323
|
-
"enum": ["local"],
|
|
324
|
-
"type": "string"
|
|
325
|
-
}
|
|
326
|
-
},
|
|
327
|
-
"description": "Process files locally, supporting `fast` and `ocr` modes. This is the default option.",
|
|
328
|
-
"required": ["mode"]
|
|
329
|
-
}
|
|
330
|
-
]
|
|
331
|
-
}
|
|
332
|
-
},
|
|
333
|
-
"description": "Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.",
|
|
334
|
-
"required": ["filetype"]
|
|
335
|
-
}
|
|
336
|
-
]
|
|
337
|
-
},
|
|
338
|
-
"schemaless": {
|
|
339
|
-
"title": "Schemaless",
|
|
340
|
-
"description": "When enabled, syncs will not validate or structure records against the stream's schema.",
|
|
341
|
-
"default": false,
|
|
342
|
-
"type": "boolean"
|
|
343
|
-
}
|
|
344
|
-
},
|
|
345
|
-
"required": ["name", "format"]
|
|
346
|
-
}
|
|
347
|
-
},
|
|
348
|
-
"azure_blob_storage_account_name": {
|
|
349
|
-
"title": "Azure Blob Storage account name",
|
|
350
|
-
"description": "The account's name of the Azure Blob Storage.",
|
|
351
|
-
"examples": ["airbyte5storage"],
|
|
352
|
-
"order": 2,
|
|
353
|
-
"type": "string"
|
|
354
|
-
},
|
|
355
|
-
"azure_blob_storage_account_key": {
|
|
356
|
-
"title": "Azure Blob Storage account key",
|
|
357
|
-
"description": "The Azure blob storage account key.",
|
|
358
|
-
"airbyte_secret": true,
|
|
359
|
-
"examples": [
|
|
360
|
-
"Z8ZkZpteggFx394vm+PJHnGTvdRncaYS+JhLKdj789YNmD+iyGTnG+PV+POiuYNhBg/ACS+LKjd%4FG3FHGN12Nd=="
|
|
361
|
-
],
|
|
362
|
-
"order": 3,
|
|
363
|
-
"type": "string"
|
|
364
|
-
},
|
|
365
|
-
"azure_blob_storage_container_name": {
|
|
366
|
-
"title": "Azure blob storage container (Bucket) Name",
|
|
367
|
-
"description": "The name of the Azure blob storage container.",
|
|
368
|
-
"examples": ["airbytetescontainername"],
|
|
369
|
-
"order": 4,
|
|
370
|
-
"type": "string"
|
|
371
|
-
},
|
|
372
|
-
"azure_blob_storage_endpoint": {
|
|
373
|
-
"title": "Endpoint Domain Name",
|
|
374
|
-
"description": "This is Azure Blob Storage endpoint domain name. Leave default value (or leave it empty if run container from command line) to use Microsoft native from example.",
|
|
375
|
-
"examples": ["blob.core.windows.net"],
|
|
376
|
-
"order": 11,
|
|
377
|
-
"type": "string"
|
|
378
|
-
}
|
|
379
|
-
},
|
|
380
|
-
"required": [
|
|
381
|
-
"streams",
|
|
382
|
-
"azure_blob_storage_account_name",
|
|
383
|
-
"azure_blob_storage_account_key",
|
|
384
|
-
"azure_blob_storage_container_name"
|
|
385
|
-
]
|
|
386
|
-
}
|
|
387
|
-
}
|