airbyte-cdk 6.7.0.dev10__py3-none-any.whl → 6.7.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +50 -18
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +13 -13
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +24 -33
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +15 -19
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +19 -69
- {airbyte_cdk-6.7.0.dev10.dist-info → airbyte_cdk-6.7.0rc1.dist-info}/METADATA +5 -5
- {airbyte_cdk-6.7.0.dev10.dist-info → airbyte_cdk-6.7.0rc1.dist-info}/RECORD +9 -14
- airbyte_cdk/cli/__init__.py +0 -1
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +0 -6
- airbyte_cdk/cli/source_declarative_manifest/_run.py +0 -223
- airbyte_cdk/cli/source_declarative_manifest/spec.json +0 -17
- airbyte_cdk-6.7.0.dev10.dist-info/entry_points.txt +0 -3
- {airbyte_cdk-6.7.0.dev10.dist-info → airbyte_cdk-6.7.0rc1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.7.0.dev10.dist-info → airbyte_cdk-6.7.0rc1.dist-info}/WHEEL +0 -0
@@ -48,6 +48,7 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStrea
|
|
48
48
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
49
49
|
AlwaysAvailableAvailabilityStrategy,
|
50
50
|
)
|
51
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
|
51
52
|
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
52
53
|
from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
|
53
54
|
|
@@ -193,31 +194,44 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
193
194
|
declarative_stream.name
|
194
195
|
].get("incremental_sync")
|
195
196
|
|
196
|
-
|
197
|
+
is_without_partition_router_nor_cursor = not bool(
|
197
198
|
datetime_based_cursor_component_definition
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
199
|
+
) and not (
|
200
|
+
name_to_stream_mapping[declarative_stream.name]
|
201
|
+
.get("retriever", {})
|
202
|
+
.get("partition_router")
|
203
|
+
)
|
204
|
+
is_datetime_incremental_without_partition_routing = (
|
205
|
+
self._is_datetime_incremental_without_partition_routing(
|
206
|
+
datetime_based_cursor_component_definition, declarative_stream
|
202
207
|
)
|
203
|
-
|
204
|
-
|
208
|
+
)
|
209
|
+
if (
|
210
|
+
is_without_partition_router_nor_cursor
|
211
|
+
or is_datetime_incremental_without_partition_routing
|
205
212
|
):
|
206
213
|
stream_state = state_manager.get_stream_state(
|
207
214
|
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
208
215
|
)
|
209
216
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
217
|
+
if is_datetime_incremental_without_partition_routing:
|
218
|
+
cursor: Cursor = (
|
219
|
+
self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
|
220
|
+
state_manager=state_manager,
|
221
|
+
model_type=DatetimeBasedCursorModel,
|
222
|
+
component_definition=datetime_based_cursor_component_definition,
|
223
|
+
stream_name=declarative_stream.name,
|
224
|
+
stream_namespace=declarative_stream.namespace,
|
225
|
+
config=config or {},
|
226
|
+
stream_state=stream_state,
|
227
|
+
)
|
228
|
+
)
|
229
|
+
else:
|
230
|
+
cursor = FinalStateCursor(
|
231
|
+
declarative_stream.name,
|
232
|
+
declarative_stream.namespace,
|
233
|
+
self.message_repository,
|
219
234
|
)
|
220
|
-
)
|
221
235
|
|
222
236
|
partition_generator = StreamSlicerPartitionGenerator(
|
223
237
|
DeclarativePartitionFactory(
|
@@ -240,7 +254,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
240
254
|
json_schema=declarative_stream.get_json_schema(),
|
241
255
|
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
242
256
|
primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
|
243
|
-
cursor_field=cursor.cursor_field.cursor_field_key
|
257
|
+
cursor_field=cursor.cursor_field.cursor_field_key
|
258
|
+
if hasattr(cursor, "cursor_field")
|
259
|
+
else None,
|
244
260
|
logger=self.logger,
|
245
261
|
cursor=cursor,
|
246
262
|
)
|
@@ -252,6 +268,22 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
252
268
|
|
253
269
|
return concurrent_streams, synchronous_streams
|
254
270
|
|
271
|
+
def _is_datetime_incremental_without_partition_routing(
|
272
|
+
self,
|
273
|
+
datetime_based_cursor_component_definition: Mapping[str, Any],
|
274
|
+
declarative_stream: DeclarativeStream,
|
275
|
+
) -> bool:
|
276
|
+
return (
|
277
|
+
bool(datetime_based_cursor_component_definition)
|
278
|
+
and datetime_based_cursor_component_definition.get("type", "")
|
279
|
+
== DatetimeBasedCursorModel.__name__
|
280
|
+
and self._stream_supports_concurrent_partition_processing(
|
281
|
+
declarative_stream=declarative_stream
|
282
|
+
)
|
283
|
+
and hasattr(declarative_stream.retriever, "stream_slicer")
|
284
|
+
and isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
|
285
|
+
)
|
286
|
+
|
255
287
|
def _stream_supports_concurrent_partition_processing(
|
256
288
|
self, declarative_stream: DeclarativeStream
|
257
289
|
) -> bool:
|
@@ -2790,21 +2790,21 @@ interpolation:
|
|
2790
2790
|
- created_at: "2020-01-01 00:00:00.000+00:00"
|
2791
2791
|
- updated_at: "2020-01-02 00:00:00.000+00:00"
|
2792
2792
|
macros:
|
2793
|
-
- title:
|
2793
|
+
- title: Now (UTC)
|
2794
2794
|
description: Returns the current date and time in the UTC timezone.
|
2795
2795
|
arguments: {}
|
2796
2796
|
return_type: Datetime
|
2797
2797
|
examples:
|
2798
2798
|
- "'{{ now_utc() }}' -> '2021-09-01 00:00:00+00:00'"
|
2799
2799
|
- "'{{ now_utc().strftime('%Y-%m-%d') }}' -> '2021-09-01'"
|
2800
|
-
- title:
|
2800
|
+
- title: Today (UTC)
|
2801
2801
|
description: Returns the current date in UTC timezone. The output is a date object.
|
2802
2802
|
arguments: {}
|
2803
2803
|
return_type: Date
|
2804
2804
|
examples:
|
2805
2805
|
- "'{{ today_utc() }}' -> '2021-09-01'"
|
2806
2806
|
- "'{{ today_utc().strftime('%Y/%m/%d')}}' -> '2021/09/01'"
|
2807
|
-
- title:
|
2807
|
+
- title: Timestamp
|
2808
2808
|
description: Converts a number or a string representing a datetime (formatted as ISO8601) to a timestamp. If the input is a number, it is converted to an int. If no timezone is specified, the string is interpreted as UTC.
|
2809
2809
|
arguments:
|
2810
2810
|
datetime: A string formatted as ISO8601 or an integer representing a unix timestamp
|
@@ -2815,7 +2815,7 @@ interpolation:
|
|
2815
2815
|
- "'{{ timestamp('2022-02-28T00:00:00Z') }}' -> 1646006400"
|
2816
2816
|
- "'{{ timestamp('2022-02-28 00:00:00Z') }}' -> 1646006400"
|
2817
2817
|
- "'{{ timestamp('2022-02-28T00:00:00-08:00') }}' -> 1646035200"
|
2818
|
-
- title:
|
2818
|
+
- title: Max
|
2819
2819
|
description: Returns the largest object of a iterable, or or two or more arguments.
|
2820
2820
|
arguments:
|
2821
2821
|
args: iterable or a sequence of two or more arguments
|
@@ -2823,7 +2823,7 @@ interpolation:
|
|
2823
2823
|
examples:
|
2824
2824
|
- "'{{ max(2, 3) }}' -> 3"
|
2825
2825
|
- "'{{ max([2, 3]) }}' -> 3"
|
2826
|
-
- title:
|
2826
|
+
- title: Day Delta
|
2827
2827
|
description: Returns the datetime of now() + num_days.
|
2828
2828
|
arguments:
|
2829
2829
|
num_days: The number of days to add to now
|
@@ -2833,8 +2833,8 @@ interpolation:
|
|
2833
2833
|
- "'{{ day_delta(1) }}' -> '2021-09-02T00:00:00.000000+0000'"
|
2834
2834
|
- "'{{ day_delta(-1) }}' -> '2021-08-31:00:00.000000+0000'"
|
2835
2835
|
- "'{{ day_delta(25, format='%Y-%m-%d') }}' -> '2021-09-02'"
|
2836
|
-
- title:
|
2837
|
-
description: Converts an ISO8601
|
2836
|
+
- title: Duration
|
2837
|
+
description: Converts an ISO8601 duratioin to datetime.timedelta.
|
2838
2838
|
arguments:
|
2839
2839
|
duration_string: "A string representing an ISO8601 duration. See https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm for more details."
|
2840
2840
|
return_type: datetime.timedelta
|
@@ -2842,7 +2842,7 @@ interpolation:
|
|
2842
2842
|
- "'{{ duration('P1D') }}' -> '1 day, 0:00:00'"
|
2843
2843
|
- "'{{ duration('P6DT23H') }}' -> '6 days, 23:00:00'"
|
2844
2844
|
- "'{{ (now_utc() - duration('P1D')).strftime('%Y-%m-%dT%H:%M:%SZ') }}' -> '2021-08-31T00:00:00Z'"
|
2845
|
-
- title:
|
2845
|
+
- title: Format Datetime
|
2846
2846
|
description: Converts a datetime or a datetime-string to the specified format.
|
2847
2847
|
arguments:
|
2848
2848
|
datetime: The datetime object or a string to convert. If datetime is a string, it must be formatted as ISO8601.
|
@@ -2854,7 +2854,7 @@ interpolation:
|
|
2854
2854
|
- "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ') }}"
|
2855
2855
|
- "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ', '%a, %d %b %Y %H:%M:%S %z') }}"
|
2856
2856
|
filters:
|
2857
|
-
- title:
|
2857
|
+
- title: Hash
|
2858
2858
|
description: Convert the specified value to a hashed string.
|
2859
2859
|
arguments:
|
2860
2860
|
hash_type: Valid hash type for converts ('md5' as default value).
|
@@ -2864,26 +2864,26 @@ interpolation:
|
|
2864
2864
|
- "{{ 'Test client_secret' | hash() }} -> '3032d57a12f76b61a820e47b9a5a0cbb'"
|
2865
2865
|
- "{{ 'Test client_secret' | hash('md5') }} -> '3032d57a12f76b61a820e47b9a5a0cbb'"
|
2866
2866
|
- "{{ 'Test client_secret' | hash('md5', salt='salt') }} -> '5011a0168579c2d94cbbe1c6ad14327c'"
|
2867
|
-
- title:
|
2867
|
+
- title: Base64 encoder
|
2868
2868
|
description: Convert the specified value to a string in the base64 format.
|
2869
2869
|
arguments: {}
|
2870
2870
|
return_type: str
|
2871
2871
|
examples:
|
2872
2872
|
- "{{ 'Test client_secret' | base64encode }} -> 'VGVzdCBjbGllbnRfc2VjcmV0'"
|
2873
|
-
- title:
|
2873
|
+
- title: Base64 decoder
|
2874
2874
|
description: Decodes the specified base64 format value into a common string.
|
2875
2875
|
arguments: {}
|
2876
2876
|
return_type: str
|
2877
2877
|
examples:
|
2878
2878
|
- "{{ 'ZmFrZSByZWZyZXNoX3Rva2VuIHZhbHVl' | base64decode }} -> 'fake refresh_token value'"
|
2879
|
-
- title:
|
2879
|
+
- title: String
|
2880
2880
|
description: Converts the specified value to a string.
|
2881
2881
|
arguments: {}
|
2882
2882
|
return_type: str
|
2883
2883
|
examples:
|
2884
2884
|
- '{{ 1 | string }} -> "1"'
|
2885
2885
|
- '{{ ["hello", "world" | string }} -> "["hello", "world"]"'
|
2886
|
-
- title:
|
2886
|
+
- title: Regex Search
|
2887
2887
|
description: Match the input string against a regular expression and return the first match.
|
2888
2888
|
arguments:
|
2889
2889
|
regex: The regular expression to search for. It must include a capture group.
|
@@ -5,10 +5,10 @@
|
|
5
5
|
import json
|
6
6
|
import logging
|
7
7
|
import pkgutil
|
8
|
+
import re
|
8
9
|
from copy import deepcopy
|
9
10
|
from importlib import metadata
|
10
|
-
from typing import Any, Dict, Iterator, List, Mapping, Optional
|
11
|
-
from packaging.version import Version, InvalidVersion
|
11
|
+
from typing import Any, Dict, Iterator, List, Mapping, Optional, Tuple
|
12
12
|
|
13
13
|
import yaml
|
14
14
|
from airbyte_cdk.models import (
|
@@ -245,54 +245,45 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
245
245
|
"Validation against json schema defined in declarative_component_schema.yaml schema failed"
|
246
246
|
) from e
|
247
247
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
if
|
248
|
+
cdk_version = metadata.version("airbyte_cdk")
|
249
|
+
cdk_major, cdk_minor, cdk_patch = self._get_version_parts(cdk_version, "airbyte-cdk")
|
250
|
+
manifest_version = self._source_config.get("version")
|
251
|
+
if manifest_version is None:
|
252
252
|
raise RuntimeError(
|
253
253
|
"Manifest version is not defined in the manifest. This is unexpected since it should be a required field. Please contact support."
|
254
254
|
)
|
255
|
-
|
255
|
+
manifest_major, manifest_minor, manifest_patch = self._get_version_parts(
|
256
|
+
manifest_version, "manifest"
|
257
|
+
)
|
256
258
|
|
257
|
-
if
|
259
|
+
if cdk_version.startswith("0.0.0"):
|
258
260
|
# Skipping version compatibility check on unreleased dev branch
|
259
261
|
pass
|
260
|
-
elif
|
261
|
-
|
262
|
-
manifest_version.minor,
|
262
|
+
elif cdk_major < manifest_major or (
|
263
|
+
cdk_major == manifest_major and cdk_minor < manifest_minor
|
263
264
|
):
|
264
265
|
raise ValidationError(
|
265
|
-
f"The manifest version {manifest_version
|
266
|
+
f"The manifest version {manifest_version} is greater than the airbyte-cdk package version ({cdk_version}). Your "
|
266
267
|
f"manifest may contain features that are not in the current CDK version."
|
267
268
|
)
|
268
|
-
elif
|
269
|
+
elif manifest_major == 0 and manifest_minor < 29:
|
269
270
|
raise ValidationError(
|
270
271
|
f"The low-code framework was promoted to Beta in airbyte-cdk version 0.29.0 and contains many breaking changes to the "
|
271
|
-
f"language. The manifest version {manifest_version
|
272
|
-
f"{cdk_version
|
272
|
+
f"language. The manifest version {manifest_version} is incompatible with the airbyte-cdk package version "
|
273
|
+
f"{cdk_version} which contains these breaking changes."
|
273
274
|
)
|
274
275
|
|
275
276
|
@staticmethod
|
276
|
-
def
|
277
|
-
version: str,
|
278
|
-
version_type: str,
|
279
|
-
) -> Version:
|
280
|
-
"""Takes a semantic version represented as a string and splits it into a tuple.
|
281
|
-
|
282
|
-
The fourth part (prerelease) is not returned in the tuple.
|
283
|
-
|
284
|
-
Returns:
|
285
|
-
Version: the parsed version object
|
277
|
+
def _get_version_parts(version: str, version_type: str) -> Tuple[int, int, int]:
|
286
278
|
"""
|
287
|
-
|
288
|
-
|
289
|
-
|
279
|
+
Takes a semantic version represented as a string and splits it into a tuple of its major, minor, and patch versions.
|
280
|
+
"""
|
281
|
+
version_parts = re.split(r"\.", version)
|
282
|
+
if len(version_parts) != 3 or not all([part.isdigit() for part in version_parts]):
|
290
283
|
raise ValidationError(
|
291
|
-
f"The {version_type} version
|
292
|
-
)
|
293
|
-
|
294
|
-
# No exception
|
295
|
-
return parsed_version
|
284
|
+
f"The {version_type} version {version} specified is not a valid version format (ex. 1.2.3)"
|
285
|
+
)
|
286
|
+
return tuple(int(part) for part in version_parts) # type: ignore # We already verified there were 3 parts and they are all digits
|
296
287
|
|
297
288
|
def _stream_configs(self, manifest: Mapping[str, Any]) -> List[Dict[str, Any]]:
|
298
289
|
# This has a warning flag for static, but after we finish part 4 we'll replace manifest with self._source_config
|
@@ -17,7 +17,6 @@ from typing import (
|
|
17
17
|
Mapping,
|
18
18
|
MutableMapping,
|
19
19
|
Optional,
|
20
|
-
Tuple,
|
21
20
|
Type,
|
22
21
|
Union,
|
23
22
|
get_args,
|
@@ -753,7 +752,7 @@ class ModelToComponentFactory:
|
|
753
752
|
config: Config,
|
754
753
|
stream_state: MutableMapping[str, Any],
|
755
754
|
**kwargs: Any,
|
756
|
-
) ->
|
755
|
+
) -> ConcurrentCursor:
|
757
756
|
component_type = component_definition.get("type")
|
758
757
|
if component_definition.get("type") != model_type.__name__:
|
759
758
|
raise ValueError(
|
@@ -884,23 +883,20 @@ class ModelToComponentFactory:
|
|
884
883
|
if evaluated_step:
|
885
884
|
step_length = parse_duration(evaluated_step)
|
886
885
|
|
887
|
-
return (
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
cursor_granularity=cursor_granularity,
|
902
|
-
),
|
903
|
-
connector_state_converter,
|
886
|
+
return ConcurrentCursor(
|
887
|
+
stream_name=stream_name,
|
888
|
+
stream_namespace=stream_namespace,
|
889
|
+
stream_state=stream_state,
|
890
|
+
message_repository=self._message_repository, # type: ignore # message_repository is always instantiated with a value by factory
|
891
|
+
connector_state_manager=state_manager,
|
892
|
+
connector_state_converter=connector_state_converter,
|
893
|
+
cursor_field=cursor_field,
|
894
|
+
slice_boundary_fields=slice_boundary_fields,
|
895
|
+
start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
896
|
+
end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
897
|
+
lookback_window=lookback_window,
|
898
|
+
slice_range=step_length,
|
899
|
+
cursor_granularity=cursor_granularity,
|
904
900
|
)
|
905
901
|
|
906
902
|
@staticmethod
|
@@ -29,25 +29,16 @@ from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
|
|
29
29
|
from airbyte_cdk.utils import is_cloud_environment
|
30
30
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
31
31
|
from unstructured.file_utils.filetype import (
|
32
|
-
EXT_TO_FILETYPE,
|
33
32
|
FILETYPE_TO_MIMETYPE,
|
34
33
|
STR_TO_FILETYPE,
|
35
34
|
FileType,
|
36
35
|
detect_filetype,
|
37
36
|
)
|
38
|
-
import nltk
|
39
37
|
|
40
38
|
unstructured_partition_pdf = None
|
41
39
|
unstructured_partition_docx = None
|
42
40
|
unstructured_partition_pptx = None
|
43
41
|
|
44
|
-
try:
|
45
|
-
nltk.data.find("tokenizers/punkt.zip")
|
46
|
-
nltk.data.find("tokenizers/punkt_tab.zip")
|
47
|
-
except LookupError:
|
48
|
-
nltk.download("punkt")
|
49
|
-
nltk.download("punkt_tab")
|
50
|
-
|
51
42
|
|
52
43
|
def optional_decode(contents: Union[str, bytes]) -> str:
|
53
44
|
if isinstance(contents, bytes):
|
@@ -117,11 +108,9 @@ class UnstructuredParser(FileTypeParser):
|
|
117
108
|
format = _extract_format(config)
|
118
109
|
with stream_reader.open_file(file, self.file_read_mode, None, logger) as file_handle:
|
119
110
|
filetype = self._get_filetype(file_handle, file)
|
111
|
+
|
120
112
|
if filetype not in self._supported_file_types() and not format.skip_unprocessable_files:
|
121
|
-
raise self._create_parse_error(
|
122
|
-
file,
|
123
|
-
self._get_file_type_error_message(filetype),
|
124
|
-
)
|
113
|
+
raise self._create_parse_error(file, self._get_file_type_error_message(filetype))
|
125
114
|
|
126
115
|
return {
|
127
116
|
"content": {
|
@@ -170,10 +159,6 @@ class UnstructuredParser(FileTypeParser):
|
|
170
159
|
logger.warn(f"File {file.uri} cannot be parsed. Skipping it.")
|
171
160
|
else:
|
172
161
|
raise e
|
173
|
-
except Exception as e:
|
174
|
-
exception_str = str(e)
|
175
|
-
logger.error(f"File {file.uri} caused an error during parsing: {exception_str}.")
|
176
|
-
raise e
|
177
162
|
|
178
163
|
def _read_file(
|
179
164
|
self,
|
@@ -191,32 +176,20 @@ class UnstructuredParser(FileTypeParser):
|
|
191
176
|
# check whether unstructured library is actually available for better error message and to ensure proper typing (can't be None after this point)
|
192
177
|
raise Exception("unstructured library is not available")
|
193
178
|
|
194
|
-
filetype
|
179
|
+
filetype = self._get_filetype(file_handle, remote_file)
|
195
180
|
|
196
|
-
if filetype
|
197
|
-
raise self._create_parse_error(
|
198
|
-
remote_file,
|
199
|
-
self._get_file_type_error_message(filetype),
|
200
|
-
)
|
201
|
-
if filetype in {FileType.MD, FileType.TXT}:
|
181
|
+
if filetype == FileType.MD or filetype == FileType.TXT:
|
202
182
|
file_content: bytes = file_handle.read()
|
203
183
|
decoded_content: str = optional_decode(file_content)
|
204
184
|
return decoded_content
|
185
|
+
if filetype not in self._supported_file_types():
|
186
|
+
raise self._create_parse_error(remote_file, self._get_file_type_error_message(filetype))
|
205
187
|
if format.processing.mode == "local":
|
206
|
-
return self._read_file_locally(
|
207
|
-
file_handle,
|
208
|
-
filetype,
|
209
|
-
format.strategy,
|
210
|
-
remote_file,
|
211
|
-
)
|
188
|
+
return self._read_file_locally(file_handle, filetype, format.strategy, remote_file)
|
212
189
|
elif format.processing.mode == "api":
|
213
190
|
try:
|
214
191
|
result: str = self._read_file_remotely_with_retries(
|
215
|
-
file_handle,
|
216
|
-
format.processing,
|
217
|
-
filetype,
|
218
|
-
format.strategy,
|
219
|
-
remote_file,
|
192
|
+
file_handle, format.processing, filetype, format.strategy, remote_file
|
220
193
|
)
|
221
194
|
except Exception as e:
|
222
195
|
# If a parser error happens during remotely processing the file, this means the file is corrupted. This case is handled by the parse_records method, so just rethrow.
|
@@ -363,11 +336,7 @@ class UnstructuredParser(FileTypeParser):
|
|
363
336
|
|
364
337
|
return self._render_markdown([element.to_dict() for element in elements])
|
365
338
|
|
366
|
-
def _create_parse_error(
|
367
|
-
self,
|
368
|
-
remote_file: RemoteFile,
|
369
|
-
message: str,
|
370
|
-
) -> RecordParseError:
|
339
|
+
def _create_parse_error(self, remote_file: RemoteFile, message: str) -> RecordParseError:
|
371
340
|
return RecordParseError(
|
372
341
|
FileBasedSourceError.ERROR_PARSING_RECORD, filename=remote_file.uri, message=message
|
373
342
|
)
|
@@ -391,51 +360,32 @@ class UnstructuredParser(FileTypeParser):
|
|
391
360
|
# detect_filetype is either using the file name or file content
|
392
361
|
# if possible, try to leverage the file name to detect the file type
|
393
362
|
# if the file name is not available, use the file content
|
394
|
-
file_type
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
)
|
399
|
-
except Exception:
|
400
|
-
# Path doesn't exist locally. Try something else...
|
401
|
-
pass
|
402
|
-
|
403
|
-
if file_type and file_type != FileType.UNK:
|
363
|
+
file_type = detect_filetype(
|
364
|
+
filename=remote_file.uri,
|
365
|
+
)
|
366
|
+
if file_type is not None and not file_type == FileType.UNK:
|
404
367
|
return file_type
|
405
368
|
|
406
369
|
type_based_on_content = detect_filetype(file=file)
|
407
|
-
file.seek(0) # detect_filetype is reading to read the file content, so we need to reset
|
408
370
|
|
409
|
-
|
410
|
-
|
371
|
+
# detect_filetype is reading to read the file content
|
372
|
+
file.seek(0)
|
411
373
|
|
412
|
-
|
413
|
-
if extension in EXT_TO_FILETYPE:
|
414
|
-
return EXT_TO_FILETYPE[extension]
|
415
|
-
|
416
|
-
return None
|
374
|
+
return type_based_on_content
|
417
375
|
|
418
376
|
def _supported_file_types(self) -> List[Any]:
|
419
377
|
return [FileType.MD, FileType.PDF, FileType.DOCX, FileType.PPTX, FileType.TXT]
|
420
378
|
|
421
|
-
def _get_file_type_error_message(
|
422
|
-
self,
|
423
|
-
file_type: FileType | None,
|
424
|
-
) -> str:
|
379
|
+
def _get_file_type_error_message(self, file_type: FileType) -> str:
|
425
380
|
supported_file_types = ", ".join([str(type) for type in self._supported_file_types()])
|
426
|
-
return f"File type {file_type
|
381
|
+
return f"File type {file_type} is not supported. Supported file types are {supported_file_types}"
|
427
382
|
|
428
383
|
def _render_markdown(self, elements: List[Any]) -> str:
|
429
384
|
return "\n\n".join((self._convert_to_markdown(el) for el in elements))
|
430
385
|
|
431
386
|
def _convert_to_markdown(self, el: Dict[str, Any]) -> str:
|
432
387
|
if dpath.get(el, "type") == "Title":
|
433
|
-
|
434
|
-
if not isinstance(category_depth, int):
|
435
|
-
category_depth = (
|
436
|
-
int(category_depth) if isinstance(category_depth, (str, float)) else 1
|
437
|
-
)
|
438
|
-
heading_str = "#" * category_depth
|
388
|
+
heading_str = "#" * (dpath.get(el, "metadata/category_depth", default=1) or 1)
|
439
389
|
return f"{heading_str} {dpath.get(el, 'text')}"
|
440
390
|
elif dpath.get(el, "type") == "ListItem":
|
441
391
|
return f"- {dpath.get(el, 'text')}"
|
@@ -1,13 +1,13 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.7.
|
3
|
+
Version: 6.7.0rc1
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
7
7
|
Keywords: airbyte,connector-development-kit,cdk
|
8
8
|
Author: Airbyte
|
9
9
|
Author-email: contact@airbyte.io
|
10
|
-
Requires-Python: >=3.10,<
|
10
|
+
Requires-Python: >=3.10,<4.0
|
11
11
|
Classifier: Development Status :: 3 - Alpha
|
12
12
|
Classifier: Intended Audience :: Developers
|
13
13
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.10
|
16
16
|
Classifier: Programming Language :: Python :: 3.11
|
17
17
|
Classifier: Programming Language :: Python :: 3.12
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
18
19
|
Classifier: Topic :: Scientific/Engineering
|
19
20
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
20
21
|
Provides-Extra: file-based
|
@@ -37,11 +38,11 @@ Requires-Dist: fastavro (>=1.8.0,<1.9.0) ; extra == "file-based"
|
|
37
38
|
Requires-Dist: genson (==1.3.0)
|
38
39
|
Requires-Dist: isodate (>=0.6.1,<0.7.0)
|
39
40
|
Requires-Dist: jsonref (>=0.2,<0.3)
|
40
|
-
Requires-Dist: jsonschema (>=
|
41
|
+
Requires-Dist: jsonschema (>=3.2.0,<3.3.0)
|
41
42
|
Requires-Dist: langchain (==0.1.16) ; extra == "vector-db-based"
|
42
43
|
Requires-Dist: langchain_core (==0.1.42)
|
43
44
|
Requires-Dist: markdown ; extra == "file-based"
|
44
|
-
Requires-Dist: nltk (==3.
|
45
|
+
Requires-Dist: nltk (==3.8.1)
|
45
46
|
Requires-Dist: numpy (<2)
|
46
47
|
Requires-Dist: openai[embeddings] (==0.27.9) ; extra == "vector-db-based"
|
47
48
|
Requires-Dist: orjson (>=3.10.7,<4.0.0)
|
@@ -60,7 +61,6 @@ Requires-Dist: python-dateutil
|
|
60
61
|
Requires-Dist: python-snappy (==0.7.3) ; extra == "file-based"
|
61
62
|
Requires-Dist: python-ulid (>=3.0.0,<4.0.0)
|
62
63
|
Requires-Dist: pytz (==2024.1)
|
63
|
-
Requires-Dist: rapidfuzz (>=3.10.1,<4.0.0)
|
64
64
|
Requires-Dist: requests
|
65
65
|
Requires-Dist: requests_cache
|
66
66
|
Requires-Dist: serpyco-rs (>=1.10.2,<2.0.0)
|
@@ -1,8 +1,4 @@
|
|
1
1
|
airbyte_cdk/__init__.py,sha256=3BlW1O37s_grUaioVZvGj3hRsofR0tY4sMceu5ygylk,11550
|
2
|
-
airbyte_cdk/cli/__init__.py,sha256=Hu-1XT2KDoYjDF7-_ziDwv5bY3PueGjANOCbzeOegDg,57
|
3
|
-
airbyte_cdk/cli/source_declarative_manifest/__init__.py,sha256=_zFyFFl4leAvtnkHmBFbtLYT6Bh44qxmLbU0wnK2TZQ,92
|
4
|
-
airbyte_cdk/cli/source_declarative_manifest/_run.py,sha256=3rIz-W65J6c2g3eMvvh2jk00cBBTiSgxx-MqA9WPUkw,7769
|
5
|
-
airbyte_cdk/cli/source_declarative_manifest/spec.json,sha256=Earc1L6ngcdIr514oFQlUoOxdF4RHqtUyStSIAquXdY,554
|
6
2
|
airbyte_cdk/config_observation.py,sha256=A2P475pS9JndFzBggtkkAmcN1aMeq_thRbXRzmWjI3E,3997
|
7
3
|
airbyte_cdk/connector.py,sha256=srfjRNgkt1nsPw-Mm0d1qXoVmM90zHPYHIfxu8p6JXI,4223
|
8
4
|
airbyte_cdk/connector_builder/README.md,sha256=Hw3wvVewuHG9-QgsAq1jDiKuLlStDxKBz52ftyNRnBw,1665
|
@@ -62,11 +58,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
|
|
62
58
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
63
59
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
64
60
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
65
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
61
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=hbHylluHcEexCFonA0fYuTQl7gDhU3uwNkn-CgvuYl8,21198
|
66
62
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
67
63
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
68
64
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=8VZJP18eJLabSPP1XBSPDaagUBG6q1ynIiPJy3rE2mc,5344
|
69
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
65
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=QGpwBEd-KZIeUwtWiZNvRW9SbG4SLGveZHRjAgUk7mg,110383
|
70
66
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
71
67
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
|
72
68
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=hNlhaB5FjNC6IfJyglj5ZJWkYD2nEAukMDmzRz5PC6o,671
|
@@ -99,7 +95,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZ
|
|
99
95
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
|
100
96
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=KwTd0oagnZI4tARxnJZlQiDHn1IXqS7dbnRT0rKRAj8,6626
|
101
97
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=QgIfSVPHx_MMUCgbQdm-NMpUlp_cpk0OQhoRDFtkrxE,4040
|
102
|
-
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=
|
98
|
+
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=LjOyya1Eh3x3NOO_0MIbiev57OniJUyKkQ3dheT16kI,12896
|
103
99
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
104
100
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iNsF3jWCaZAmJYArmDQg0MJgZikk6frh3IfhcMBR_Qc,3924
|
105
101
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
@@ -109,7 +105,7 @@ airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQ
|
|
109
105
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
110
106
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=jVZ3ZV5YZrmDNIX5cM2mugXmnbH27zHRcD22_3oatpo,8454
|
111
107
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
112
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
108
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=ffPS6F7qPOoFNJDgC1wbvRjM4wC7UshLJ1Trde3Xjyc,95235
|
113
109
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=8uGos2u7TFTx_EJBdcjdUGn3Eyx6jUuEa1_VB8UP_dI,631
|
114
110
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
115
111
|
airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
|
@@ -206,7 +202,7 @@ airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=HyGRihJxcb_lEs
|
|
206
202
|
airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=JgpH21PrbRqwK92BJklZWvh2TndA6xZ-eP1LPMo44oQ,2832
|
207
203
|
airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=k1ri7TtwrN8oYZpCl1bNNeAQmwBbwLjmOmIz8-tKflY,5897
|
208
204
|
airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=0B4RYehU4z4dys3Tu-O98B0Uw7JO_LzStRwmNxKh6Xk,10486
|
209
|
-
airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=
|
205
|
+
airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=oR0XdsMLpOh9kXzkVuqZbIfxzsREeWYCWpWY2vlyVHk,17171
|
210
206
|
airbyte_cdk/sources/file_based/remote_file.py,sha256=yqRz93vPe8PBXLIMJ5W5u2JRlZRhg6sBrAjn3pPjJ8A,315
|
211
207
|
airbyte_cdk/sources/file_based/schema_helpers.py,sha256=Cf8FH1bDFP0qCDDfEYir_WjP4exXUnikz8hZ40y1Ek0,9601
|
212
208
|
airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=sEVnRhZ8x9f7PNjo6lewxid9z0PI8eSj7gSoFC3MH1Y,527
|
@@ -331,8 +327,7 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EemcgcQlI8-LPYOPlYv4Qkdjyho79XVLWaUHF5X
|
|
331
327
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=LVc9KbtMeV_z99jWo0Ou8u4l6eBJ0BWNhxj4zrrGKRs,763
|
332
328
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
333
329
|
airbyte_cdk/utils/traced_exception.py,sha256=89TQdFuYZ1NJgmFpqLzY_T_T_64TpJYmVqs119Bp43g,6164
|
334
|
-
airbyte_cdk-6.7.
|
335
|
-
airbyte_cdk-6.7.
|
336
|
-
airbyte_cdk-6.7.
|
337
|
-
airbyte_cdk-6.7.
|
338
|
-
airbyte_cdk-6.7.0.dev10.dist-info/RECORD,,
|
330
|
+
airbyte_cdk-6.7.0rc1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
331
|
+
airbyte_cdk-6.7.0rc1.dist-info/METADATA,sha256=CCsHOmSWEHYUT7Yd4UNppLGoDi6EcBvFr9fw5o0A9Dc,13350
|
332
|
+
airbyte_cdk-6.7.0rc1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
333
|
+
airbyte_cdk-6.7.0rc1.dist-info/RECORD,,
|
airbyte_cdk/cli/__init__.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
@@ -1,223 +0,0 @@
|
|
1
|
-
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
-
"""Defines the `source-declarative-manifest` connector, which installs alongside CDK.
|
3
|
-
|
4
|
-
This file was originally imported from the dedicated connector directory, under the
|
5
|
-
`airbyte` monorepo.
|
6
|
-
|
7
|
-
Usage:
|
8
|
-
|
9
|
-
```
|
10
|
-
pipx install airbyte-cdk
|
11
|
-
source-declarative-manifest --help
|
12
|
-
source-declarative-manifest spec
|
13
|
-
...
|
14
|
-
```
|
15
|
-
"""
|
16
|
-
|
17
|
-
from __future__ import annotations
|
18
|
-
|
19
|
-
import json
|
20
|
-
import pkgutil
|
21
|
-
import sys
|
22
|
-
import traceback
|
23
|
-
from collections.abc import Mapping
|
24
|
-
from datetime import datetime
|
25
|
-
from pathlib import Path
|
26
|
-
from typing import Any, cast
|
27
|
-
|
28
|
-
from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
|
29
|
-
from airbyte_cdk.models import (
|
30
|
-
AirbyteErrorTraceMessage,
|
31
|
-
AirbyteMessage,
|
32
|
-
AirbyteMessageSerializer,
|
33
|
-
AirbyteStateMessage,
|
34
|
-
AirbyteTraceMessage,
|
35
|
-
ConfiguredAirbyteCatalog,
|
36
|
-
ConnectorSpecificationSerializer,
|
37
|
-
TraceType,
|
38
|
-
Type,
|
39
|
-
)
|
40
|
-
from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
|
41
|
-
ConcurrentDeclarativeSource,
|
42
|
-
)
|
43
|
-
from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
|
44
|
-
from airbyte_cdk.sources.source import TState
|
45
|
-
from orjson import orjson
|
46
|
-
|
47
|
-
|
48
|
-
class SourceLocalYaml(YamlDeclarativeSource):
|
49
|
-
"""
|
50
|
-
Declarative source defined by a yaml file in the local filesystem
|
51
|
-
"""
|
52
|
-
|
53
|
-
def __init__(
|
54
|
-
self,
|
55
|
-
catalog: ConfiguredAirbyteCatalog | None,
|
56
|
-
config: Mapping[str, Any] | None,
|
57
|
-
state: TState,
|
58
|
-
**kwargs: Any,
|
59
|
-
) -> None:
|
60
|
-
"""
|
61
|
-
HACK!
|
62
|
-
Problem: YamlDeclarativeSource relies on the calling module name/path to find the yaml file.
|
63
|
-
Implication: If you call YamlDeclarativeSource directly it will look for the yaml file in the wrong place. (e.g. the airbyte-cdk package)
|
64
|
-
Solution: Subclass YamlDeclarativeSource from the same location as the manifest to load.
|
65
|
-
|
66
|
-
When can we remove this?
|
67
|
-
When the airbyte-cdk is updated to not rely on the calling module name/path to find the yaml file.
|
68
|
-
When all manifest connectors are updated to use the new airbyte-cdk.
|
69
|
-
When all manifest connectors are updated to use the source-declarative-manifest as the base image.
|
70
|
-
"""
|
71
|
-
super().__init__(
|
72
|
-
catalog=catalog,
|
73
|
-
config=config,
|
74
|
-
state=state,
|
75
|
-
path_to_yaml="manifest.yaml",
|
76
|
-
)
|
77
|
-
|
78
|
-
|
79
|
-
def _is_local_manifest_command(args: list[str]) -> bool:
|
80
|
-
# Check for a local manifest.yaml file
|
81
|
-
return Path("/airbyte/integration_code/source_declarative_manifest/manifest.yaml").exists()
|
82
|
-
|
83
|
-
|
84
|
-
def handle_command(args: list[str]) -> None:
|
85
|
-
if _is_local_manifest_command(args):
|
86
|
-
handle_local_manifest_command(args)
|
87
|
-
else:
|
88
|
-
handle_remote_manifest_command(args)
|
89
|
-
|
90
|
-
|
91
|
-
def _get_local_yaml_source(args: list[str]) -> SourceLocalYaml:
|
92
|
-
try:
|
93
|
-
config, catalog, state = _parse_inputs_into_config_catalog_state(args)
|
94
|
-
return SourceLocalYaml(config=config, catalog=catalog, state=state)
|
95
|
-
except Exception as error:
|
96
|
-
print(
|
97
|
-
orjson.dumps(
|
98
|
-
AirbyteMessageSerializer.dump(
|
99
|
-
AirbyteMessage(
|
100
|
-
type=Type.TRACE,
|
101
|
-
trace=AirbyteTraceMessage(
|
102
|
-
type=TraceType.ERROR,
|
103
|
-
emitted_at=int(datetime.now().timestamp() * 1000),
|
104
|
-
error=AirbyteErrorTraceMessage(
|
105
|
-
message=f"Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance. Error: {error}",
|
106
|
-
stack_trace=traceback.format_exc(),
|
107
|
-
),
|
108
|
-
),
|
109
|
-
)
|
110
|
-
)
|
111
|
-
).decode()
|
112
|
-
)
|
113
|
-
raise error
|
114
|
-
|
115
|
-
|
116
|
-
def handle_local_manifest_command(args: list[str]) -> None:
|
117
|
-
source = _get_local_yaml_source(args)
|
118
|
-
launch(
|
119
|
-
source=source,
|
120
|
-
args=args,
|
121
|
-
)
|
122
|
-
|
123
|
-
|
124
|
-
def handle_remote_manifest_command(args: list[str]) -> None:
|
125
|
-
"""Overrides the spec command to return the generalized spec for the declarative manifest source.
|
126
|
-
|
127
|
-
This is different from a typical low-code, but built and published separately source built as a ManifestDeclarativeSource,
|
128
|
-
because that will have a spec method that returns the spec for that specific source. Other than spec,
|
129
|
-
the generalized connector behaves the same as any other, since the manifest is provided in the config.
|
130
|
-
"""
|
131
|
-
if args[0] == "spec":
|
132
|
-
json_spec = pkgutil.get_data(
|
133
|
-
"airbyte_cdk.cli.source_declarative_manifest",
|
134
|
-
"spec.json",
|
135
|
-
)
|
136
|
-
if json_spec is None:
|
137
|
-
raise FileNotFoundError(
|
138
|
-
"Could not find `spec.json` file for source-declarative-manifest"
|
139
|
-
)
|
140
|
-
|
141
|
-
spec_obj = json.loads(json_spec)
|
142
|
-
spec = ConnectorSpecificationSerializer.load(spec_obj)
|
143
|
-
|
144
|
-
message = AirbyteMessage(type=Type.SPEC, spec=spec)
|
145
|
-
print(AirbyteEntrypoint.airbyte_message_to_string(message))
|
146
|
-
else:
|
147
|
-
source = create_declarative_source(args)
|
148
|
-
launch(
|
149
|
-
source=source,
|
150
|
-
args=args,
|
151
|
-
)
|
152
|
-
|
153
|
-
|
154
|
-
def create_declarative_source(args: list[str]) -> ConcurrentDeclarativeSource:
|
155
|
-
"""Creates the source with the injected config.
|
156
|
-
|
157
|
-
This essentially does what other low-code sources do at build time, but at runtime,
|
158
|
-
with a user-provided manifest in the config. This better reflects what happens in the
|
159
|
-
connector builder.
|
160
|
-
"""
|
161
|
-
try:
|
162
|
-
config, catalog, state = _parse_inputs_into_config_catalog_state(args)
|
163
|
-
if "__injected_declarative_manifest" not in config:
|
164
|
-
raise ValueError(
|
165
|
-
f"Invalid config: `__injected_declarative_manifest` should be provided at the root of the config but config only has keys {list(config.keys())}"
|
166
|
-
)
|
167
|
-
return ConcurrentDeclarativeSource(
|
168
|
-
config=config,
|
169
|
-
catalog=catalog,
|
170
|
-
state=state,
|
171
|
-
source_config=cast(dict[str, Any], config["__injected_declarative_manifest"]),
|
172
|
-
)
|
173
|
-
except Exception as error:
|
174
|
-
print(
|
175
|
-
orjson.dumps(
|
176
|
-
AirbyteMessageSerializer.dump(
|
177
|
-
AirbyteMessage(
|
178
|
-
type=Type.TRACE,
|
179
|
-
trace=AirbyteTraceMessage(
|
180
|
-
type=TraceType.ERROR,
|
181
|
-
emitted_at=int(datetime.now().timestamp() * 1000),
|
182
|
-
error=AirbyteErrorTraceMessage(
|
183
|
-
message=f"Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance. Error: {error}",
|
184
|
-
stack_trace=traceback.format_exc(),
|
185
|
-
),
|
186
|
-
),
|
187
|
-
)
|
188
|
-
)
|
189
|
-
).decode()
|
190
|
-
)
|
191
|
-
raise error
|
192
|
-
|
193
|
-
|
194
|
-
def _parse_inputs_into_config_catalog_state(
|
195
|
-
args: list[str],
|
196
|
-
) -> tuple[
|
197
|
-
Mapping[str, Any] | None,
|
198
|
-
ConfiguredAirbyteCatalog | None,
|
199
|
-
list[AirbyteStateMessage],
|
200
|
-
]:
|
201
|
-
parsed_args = AirbyteEntrypoint.parse_args(args)
|
202
|
-
config = (
|
203
|
-
ConcurrentDeclarativeSource.read_config(parsed_args.config)
|
204
|
-
if hasattr(parsed_args, "config")
|
205
|
-
else None
|
206
|
-
)
|
207
|
-
catalog = (
|
208
|
-
ConcurrentDeclarativeSource.read_catalog(parsed_args.catalog)
|
209
|
-
if hasattr(parsed_args, "catalog")
|
210
|
-
else None
|
211
|
-
)
|
212
|
-
state = (
|
213
|
-
ConcurrentDeclarativeSource.read_state(parsed_args.state)
|
214
|
-
if hasattr(parsed_args, "state")
|
215
|
-
else []
|
216
|
-
)
|
217
|
-
|
218
|
-
return config, catalog, state
|
219
|
-
|
220
|
-
|
221
|
-
def run() -> None:
|
222
|
-
args: list[str] = sys.argv[1:]
|
223
|
-
handle_command(args)
|
@@ -1,17 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"documentationUrl": "https://docs.airbyte.com/integrations/sources/low-code",
|
3
|
-
"connectionSpecification": {
|
4
|
-
"$schema": "http://json-schema.org/draft-07/schema#",
|
5
|
-
"title": "Low-code source spec",
|
6
|
-
"type": "object",
|
7
|
-
"required": ["__injected_declarative_manifest"],
|
8
|
-
"additionalProperties": true,
|
9
|
-
"properties": {
|
10
|
-
"__injected_declarative_manifest": {
|
11
|
-
"title": "Low-code manifest",
|
12
|
-
"type": "object",
|
13
|
-
"description": "The low-code manifest that defines the components of the source."
|
14
|
-
}
|
15
|
-
}
|
16
|
-
}
|
17
|
-
}
|
File without changes
|
File without changes
|