airbyte-cdk 6.12.4.dev0__py3-none-any.whl → 6.13.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. airbyte_cdk/__init__.py +93 -34
  2. airbyte_cdk/cli/source_declarative_manifest/__init__.py +0 -1
  3. airbyte_cdk/models/__init__.py +10 -11
  4. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +1 -1
  5. airbyte_cdk/sources/declarative/auth/__init__.py +2 -5
  6. airbyte_cdk/sources/declarative/auth/oauth.py +27 -12
  7. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +25 -65
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +78 -1
  9. airbyte_cdk/sources/declarative/decoders/__init__.py +21 -3
  10. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
  11. airbyte_cdk/sources/declarative/extractors/__init__.py +10 -2
  12. airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
  13. airbyte_cdk/sources/declarative/incremental/__init__.py +10 -6
  14. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -14
  15. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +49 -2
  16. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +96 -80
  17. airbyte_cdk/sources/declarative/partition_routers/__init__.py +23 -5
  18. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  19. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
  20. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
  21. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
  22. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +52 -35
  23. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +10 -7
  24. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +9 -4
  25. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
  26. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +11 -6
  27. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +16 -5
  28. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +14 -13
  29. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +7 -8
  30. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +10 -7
  31. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +12 -3
  32. airbyte_cdk/sources/declarative/resolvers/__init__.py +31 -8
  33. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +20 -14
  34. airbyte_cdk/sources/declarative/retrievers/__init__.py +5 -2
  35. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +9 -32
  36. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +72 -65
  37. airbyte_cdk/sources/declarative/schema/__init__.py +14 -2
  38. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +20 -3
  39. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
  40. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
  41. airbyte_cdk/sources/file_based/file_types/__init__.py +12 -3
  42. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
  43. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
  44. airbyte_cdk/sources/message/__init__.py +7 -1
  45. airbyte_cdk/sources/streams/__init__.py +1 -1
  46. airbyte_cdk/sources/streams/checkpoint/__init__.py +2 -3
  47. airbyte_cdk/sources/streams/concurrent/cursor.py +0 -1
  48. airbyte_cdk/sources/streams/http/__init__.py +2 -2
  49. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +2 -2
  50. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +8 -3
  51. airbyte_cdk/test/mock_http/__init__.py +1 -1
  52. airbyte_cdk/test/mock_http/mocker.py +3 -1
  53. airbyte_cdk/test/mock_http/response_builder.py +1 -1
  54. airbyte_cdk/utils/__init__.py +1 -1
  55. {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/METADATA +2 -2
  56. {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/RECORD +59 -58
  57. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -344
  58. {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/LICENSE.txt +0 -0
  59. {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/WHEEL +0 -0
  60. {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,97 @@
1
+ import csv
2
+ import gzip
3
+ import json
4
+ import logging
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import dataclass
7
+ from io import BufferedIOBase, TextIOWrapper
8
+ from typing import Any, Generator, MutableMapping, Optional
9
+
10
+ import requests
11
+
12
+ from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
13
+
14
+ logger = logging.getLogger("airbyte")
15
+
16
+
17
+ @dataclass
18
+ class Parser(ABC):
19
+ @abstractmethod
20
+ def parse(
21
+ self,
22
+ data: BufferedIOBase,
23
+ ) -> Generator[MutableMapping[str, Any], None, None]:
24
+ """
25
+ Parse data and yield dictionaries.
26
+ """
27
+ pass
28
+
29
+
30
+ @dataclass
31
+ class GzipParser(Parser):
32
+ inner_parser: Parser
33
+
34
+ def parse(
35
+ self,
36
+ data: BufferedIOBase,
37
+ ) -> Generator[MutableMapping[str, Any], None, None]:
38
+ """
39
+ Decompress gzipped bytes and pass decompressed data to the inner parser.
40
+ """
41
+ with gzip.GzipFile(fileobj=data, mode="rb") as gzipobj:
42
+ yield from self.inner_parser.parse(gzipobj)
43
+
44
+
45
+ @dataclass
46
+ class JsonLineParser(Parser):
47
+ encoding: Optional[str] = "utf-8"
48
+
49
+ def parse(
50
+ self,
51
+ data: BufferedIOBase,
52
+ ) -> Generator[MutableMapping[str, Any], None, None]:
53
+ for line in data:
54
+ try:
55
+ yield json.loads(line.decode(encoding=self.encoding or "utf-8"))
56
+ except json.JSONDecodeError as e:
57
+ logger.warning(f"Cannot decode/parse line {line!r} as JSON, error: {e}")
58
+
59
+
60
+ @dataclass
61
+ class CsvParser(Parser):
62
+ # TODO: migrate implementation to re-use file-base classes
63
+ encoding: Optional[str] = "utf-8"
64
+ delimiter: Optional[str] = ","
65
+
66
+ def parse(
67
+ self,
68
+ data: BufferedIOBase,
69
+ ) -> Generator[MutableMapping[str, Any], None, None]:
70
+ """
71
+ Parse CSV data from decompressed bytes.
72
+ """
73
+ text_data = TextIOWrapper(data, encoding=self.encoding) # type: ignore
74
+ reader = csv.DictReader(text_data, delimiter=self.delimiter or ",")
75
+ yield from reader
76
+
77
+
78
+ @dataclass
79
+ class CompositeRawDecoder(Decoder):
80
+ """
81
+ Decoder strategy to transform a requests.Response into a Generator[MutableMapping[str, Any], None, None]
82
+ passed response.raw to parser(s).
83
+ Note: response.raw is not decoded/decompressed by default.
84
+ parsers should be instantiated recursively.
85
+ Example:
86
+ composite_raw_decoder = CompositeRawDecoder(parser=GzipParser(inner_parser=JsonLineParser(encoding="iso-8859-1")))
87
+ """
88
+
89
+ parser: Parser
90
+
91
+ def is_stream_response(self) -> bool:
92
+ return True
93
+
94
+ def decode(
95
+ self, response: requests.Response
96
+ ) -> Generator[MutableMapping[str, Any], None, None]:
97
+ yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
@@ -6,6 +6,14 @@ from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtr
6
6
  from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
7
7
  from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
8
8
  from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
9
- from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor import ResponseToFileExtractor
9
+ from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor import (
10
+ ResponseToFileExtractor,
11
+ )
10
12
 
11
- __all__ = ["HttpSelector", "DpathExtractor", "RecordFilter", "RecordSelector", "ResponseToFileExtractor"]
13
+ __all__ = [
14
+ "HttpSelector",
15
+ "DpathExtractor",
16
+ "RecordFilter",
17
+ "RecordSelector",
18
+ "ResponseToFileExtractor",
19
+ ]
@@ -59,11 +59,13 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
59
59
 
60
60
  def __init__(
61
61
  self,
62
- cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
62
+ date_time_based_cursor: DatetimeBasedCursor,
63
+ substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
63
64
  **kwargs: Any,
64
65
  ):
65
66
  super().__init__(**kwargs)
66
- self._cursor = cursor
67
+ self._date_time_based_cursor = date_time_based_cursor
68
+ self._substream_cursor = substream_cursor
67
69
 
68
70
  def filter_records(
69
71
  self,
@@ -75,7 +77,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
75
77
  records = (
76
78
  record
77
79
  for record in records
78
- if self._cursor.should_be_synced(
80
+ if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
79
81
  # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
80
82
  # Record stream name is empty cause it is not used durig the filtering
81
83
  Record(data=record, associated_slice=stream_slice, stream_name="")
@@ -2,12 +2,18 @@
2
2
  # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import ConcurrentCursorFactory, ConcurrentPerPartitionCursor
6
5
  from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
7
6
  from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
8
- from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import GlobalSubstreamCursor
9
- from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import CursorFactory, PerPartitionCursor
10
- from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import PerPartitionWithGlobalCursor
7
+ from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
8
+ GlobalSubstreamCursor,
9
+ )
10
+ from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import (
11
+ CursorFactory,
12
+ PerPartitionCursor,
13
+ )
14
+ from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
15
+ PerPartitionWithGlobalCursor,
16
+ )
11
17
  from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor import (
12
18
  ChildPartitionResumableFullRefreshCursor,
13
19
  ResumableFullRefreshCursor,
@@ -15,8 +21,6 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
15
21
 
16
22
  __all__ = [
17
23
  "CursorFactory",
18
- "ConcurrentCursorFactory"
19
- "ConcurrentPerPartitionCursor",
20
24
  "DatetimeBasedCursor",
21
25
  "DeclarativeCursor",
22
26
  "GlobalSubstreamCursor",
@@ -303,20 +303,6 @@ class PerPartitionCursor(DeclarativeCursor):
303
303
  raise ValueError("A partition needs to be provided in order to get request body json")
304
304
 
305
305
  def should_be_synced(self, record: Record) -> bool:
306
- if (
307
- self._to_partition_key(record.associated_slice.partition)
308
- not in self._cursor_per_partition
309
- ):
310
- partition_state = (
311
- self._state_to_migrate_from
312
- if self._state_to_migrate_from
313
- else self._NO_CURSOR_STATE
314
- )
315
- cursor = self._create_cursor(partition_state)
316
-
317
- self._cursor_per_partition[
318
- self._to_partition_key(record.associated_slice.partition)
319
- ] = cursor
320
306
  return self._get_cursor(record).should_be_synced(
321
307
  self._convert_record_to_cursor_record(record)
322
308
  )
@@ -489,8 +489,8 @@ class OAuthAuthenticator(BaseModel):
489
489
  ],
490
490
  title="Refresh Token",
491
491
  )
492
- token_refresh_endpoint: str = Field(
493
- ...,
492
+ token_refresh_endpoint: Optional[str] = Field(
493
+ None,
494
494
  description="The full URL to call to obtain a new access token.",
495
495
  examples=["https://connect.squareup.com/oauth2/token"],
496
496
  title="Token Refresh Endpoint",
@@ -501,6 +501,12 @@ class OAuthAuthenticator(BaseModel):
501
501
  examples=["access_token"],
502
502
  title="Access Token Property Name",
503
503
  )
504
+ access_token_value: Optional[str] = Field(
505
+ None,
506
+ description="The value of the access_token to bypass the token refreshing using `refresh_token`.",
507
+ examples=["secret_access_token_value"],
508
+ title="Access Token Value",
509
+ )
504
510
  expires_in_name: Optional[str] = Field(
505
511
  "expires_in",
506
512
  description="The name of the property which contains the expiry date in the response from the token refresh endpoint.",
@@ -1119,6 +1125,17 @@ class LegacySessionTokenAuthenticator(BaseModel):
1119
1125
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1120
1126
 
1121
1127
 
1128
+ class JsonLineParser(BaseModel):
1129
+ type: Literal["JsonLineParser"]
1130
+ encoding: Optional[str] = "utf-8"
1131
+
1132
+
1133
+ class CsvParser(BaseModel):
1134
+ type: Literal["CsvParser"]
1135
+ encoding: Optional[str] = "utf-8"
1136
+ delimiter: Optional[str] = ","
1137
+
1138
+
1122
1139
  class AsyncJobStatusMap(BaseModel):
1123
1140
  type: Optional[Literal["AsyncJobStatusMap"]] = None
1124
1141
  running: List[str]
@@ -1202,6 +1219,8 @@ class ComponentMappingDefinition(BaseModel):
1202
1219
  "{{ components_values['updates'] }}",
1203
1220
  "{{ components_values['MetaData']['LastUpdatedTime'] }}",
1204
1221
  "{{ config['segment_id'] }}",
1222
+ "{{ stream_slice['parent_id'] }}",
1223
+ "{{ stream_slice['extra_fields']['name'] }}",
1205
1224
  ],
1206
1225
  title="Value",
1207
1226
  )
@@ -1498,6 +1517,11 @@ class RecordSelector(BaseModel):
1498
1517
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1499
1518
 
1500
1519
 
1520
+ class GzipParser(BaseModel):
1521
+ type: Literal["GzipParser"]
1522
+ inner_parser: Union[JsonLineParser, CsvParser]
1523
+
1524
+
1501
1525
  class Spec(BaseModel):
1502
1526
  type: Literal["Spec"]
1503
1527
  connection_specification: Dict[str, Any] = Field(
@@ -1528,6 +1552,11 @@ class CompositeErrorHandler(BaseModel):
1528
1552
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1529
1553
 
1530
1554
 
1555
+ class CompositeRawDecoder(BaseModel):
1556
+ type: Literal["CompositeRawDecoder"]
1557
+ parser: Union[GzipParser, JsonLineParser, CsvParser]
1558
+
1559
+
1531
1560
  class DeclarativeSource1(BaseModel):
1532
1561
  class Config:
1533
1562
  extra = Extra.forbid
@@ -1671,6 +1700,7 @@ class DeclarativeStream(BaseModel):
1671
1700
  RemoveFields,
1672
1701
  KeysToLower,
1673
1702
  KeysToSnakeCase,
1703
+ FlattenFields,
1674
1704
  ]
1675
1705
  ]
1676
1706
  ] = Field(
@@ -1836,6 +1866,22 @@ class DynamicSchemaLoader(BaseModel):
1836
1866
  description="Component used to coordinate how records are extracted across stream slices and request pages.",
1837
1867
  title="Retriever",
1838
1868
  )
1869
+ schema_transformations: Optional[
1870
+ List[
1871
+ Union[
1872
+ AddFields,
1873
+ CustomTransformation,
1874
+ RemoveFields,
1875
+ KeysToLower,
1876
+ KeysToSnakeCase,
1877
+ FlattenFields,
1878
+ ]
1879
+ ]
1880
+ ] = Field(
1881
+ None,
1882
+ description="A list of transformations to be applied to the schema.",
1883
+ title="Schema Transformations",
1884
+ )
1839
1885
  schema_type_identifier: SchemaTypeIdentifier
1840
1886
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1841
1887
 
@@ -1913,6 +1959,7 @@ class SimpleRetriever(BaseModel):
1913
1959
  IterableDecoder,
1914
1960
  XmlDecoder,
1915
1961
  GzipJsonDecoder,
1962
+ CompositeRawDecoder,
1916
1963
  ]
1917
1964
  ] = Field(
1918
1965
  None,
@@ -67,6 +67,12 @@ from airbyte_cdk.sources.declarative.decoders import (
67
67
  PaginationDecoderDecorator,
68
68
  XmlDecoder,
69
69
  )
70
+ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
71
+ CompositeRawDecoder,
72
+ CsvParser,
73
+ GzipParser,
74
+ JsonLineParser,
75
+ )
70
76
  from airbyte_cdk.sources.declarative.extractors import (
71
77
  DpathExtractor,
72
78
  RecordFilter,
@@ -81,8 +87,6 @@ from airbyte_cdk.sources.declarative.extractors.record_selector import (
81
87
  )
82
88
  from airbyte_cdk.sources.declarative.incremental import (
83
89
  ChildPartitionResumableFullRefreshCursor,
84
- ConcurrentCursorFactory,
85
- ConcurrentPerPartitionCursor,
86
90
  CursorFactory,
87
91
  DatetimeBasedCursor,
88
92
  DeclarativeCursor,
@@ -127,6 +131,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
127
131
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
128
132
  CompositeErrorHandler as CompositeErrorHandlerModel,
129
133
  )
134
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
135
+ CompositeRawDecoder as CompositeRawDecoderModel,
136
+ )
130
137
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
131
138
  ConcurrencyLevel as ConcurrencyLevelModel,
132
139
  )
@@ -136,6 +143,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
136
143
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
137
144
  ConstantBackoffStrategy as ConstantBackoffStrategyModel,
138
145
  )
146
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
147
+ CsvParser as CsvParserModel,
148
+ )
139
149
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
140
150
  CursorPagination as CursorPaginationModel,
141
151
  )
@@ -205,6 +215,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
205
215
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
206
216
  GzipJsonDecoder as GzipJsonDecoderModel,
207
217
  )
218
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
219
+ GzipParser as GzipParserModel,
220
+ )
208
221
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
209
222
  HttpComponentsResolver as HttpComponentsResolverModel,
210
223
  )
@@ -229,6 +242,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
229
242
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
230
243
  JsonlDecoder as JsonlDecoderModel,
231
244
  )
245
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
246
+ JsonLineParser as JsonLineParserModel,
247
+ )
232
248
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
233
249
  JwtAuthenticator as JwtAuthenticatorModel,
234
250
  )
@@ -241,6 +257,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
241
257
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
242
258
  KeysToLower as KeysToLowerModel,
243
259
  )
260
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
261
+ KeysToSnakeCase as KeysToSnakeCaseModel,
262
+ )
244
263
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
245
264
  LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
246
265
  )
@@ -328,6 +347,9 @@ from airbyte_cdk.sources.declarative.partition_routers import (
328
347
  SinglePartitionRouter,
329
348
  SubstreamPartitionRouter,
330
349
  )
350
+ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
351
+ AsyncJobPartitionRouter,
352
+ )
331
353
  from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
332
354
  ParentStreamConfig,
333
355
  )
@@ -398,11 +420,13 @@ from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
398
420
  from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
399
421
  KeysToLowerTransformation,
400
422
  )
423
+ from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import (
424
+ KeysToSnakeCaseTransformation,
425
+ )
401
426
  from airbyte_cdk.sources.message import (
402
427
  InMemoryMessageRepository,
403
428
  LogAppenderMessageRepositoryDecorator,
404
429
  MessageRepository,
405
- NoopMessageRepository,
406
430
  )
407
431
  from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
408
432
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
@@ -449,6 +473,7 @@ class ModelToComponentFactory:
449
473
  BearerAuthenticatorModel: self.create_bearer_authenticator,
450
474
  CheckStreamModel: self.create_check_stream,
451
475
  CompositeErrorHandlerModel: self.create_composite_error_handler,
476
+ CompositeRawDecoderModel: self.create_composite_raw_decoder,
452
477
  ConcurrencyLevelModel: self.create_concurrency_level,
453
478
  ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
454
479
  CursorPaginationModel: self.create_cursor_pagination,
@@ -479,8 +504,11 @@ class ModelToComponentFactory:
479
504
  InlineSchemaLoaderModel: self.create_inline_schema_loader,
480
505
  JsonDecoderModel: self.create_json_decoder,
481
506
  JsonlDecoderModel: self.create_jsonl_decoder,
507
+ JsonLineParserModel: self.create_json_line_parser,
482
508
  GzipJsonDecoderModel: self.create_gzipjson_decoder,
509
+ GzipParserModel: self.create_gzip_parser,
483
510
  KeysToLowerModel: self.create_keys_to_lower_transformation,
511
+ KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
484
512
  FlattenFieldsModel: self.create_flatten_fields,
485
513
  IterableDecoderModel: self.create_iterable_decoder,
486
514
  XmlDecoderModel: self.create_xml_decoder,
@@ -597,6 +625,11 @@ class ModelToComponentFactory:
597
625
  ) -> KeysToLowerTransformation:
598
626
  return KeysToLowerTransformation()
599
627
 
628
+ def create_keys_to_snake_transformation(
629
+ self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
630
+ ) -> KeysToSnakeCaseTransformation:
631
+ return KeysToSnakeCaseTransformation()
632
+
600
633
  def create_flatten_fields(
601
634
  self, model: FlattenFieldsModel, config: Config, **kwargs: Any
602
635
  ) -> FlattenFields:
@@ -811,7 +844,6 @@ class ModelToComponentFactory:
811
844
  stream_namespace: Optional[str],
812
845
  config: Config,
813
846
  stream_state: MutableMapping[str, Any],
814
- message_repository: Optional[MessageRepository] = None,
815
847
  **kwargs: Any,
816
848
  ) -> ConcurrentCursor:
817
849
  component_type = component_definition.get("type")
@@ -947,7 +979,7 @@ class ModelToComponentFactory:
947
979
  stream_name=stream_name,
948
980
  stream_namespace=stream_namespace,
949
981
  stream_state=stream_state,
950
- message_repository=message_repository or self._message_repository,
982
+ message_repository=self._message_repository,
951
983
  connector_state_manager=state_manager,
952
984
  connector_state_converter=connector_state_converter,
953
985
  cursor_field=cursor_field,
@@ -959,63 +991,6 @@ class ModelToComponentFactory:
959
991
  cursor_granularity=cursor_granularity,
960
992
  )
961
993
 
962
- def create_concurrent_cursor_from_perpartition_cursor(
963
- self,
964
- state_manager: ConnectorStateManager,
965
- model_type: Type[BaseModel],
966
- component_definition: ComponentDefinition,
967
- stream_name: str,
968
- stream_namespace: Optional[str],
969
- config: Config,
970
- stream_state: MutableMapping[str, Any],
971
- partition_router,
972
- **kwargs: Any,
973
- ) -> ConcurrentPerPartitionCursor:
974
- component_type = component_definition.get("type")
975
- if component_definition.get("type") != model_type.__name__:
976
- raise ValueError(
977
- f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
978
- )
979
-
980
- datetime_based_cursor_model = model_type.parse_obj(component_definition)
981
-
982
- if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
983
- raise ValueError(
984
- f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
985
- )
986
-
987
- interpolated_cursor_field = InterpolatedString.create(
988
- datetime_based_cursor_model.cursor_field,
989
- parameters=datetime_based_cursor_model.parameters or {},
990
- )
991
- cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
992
-
993
- # Create the cursor factory
994
- cursor_factory = ConcurrentCursorFactory(
995
- partial(
996
- self.create_concurrent_cursor_from_datetime_based_cursor,
997
- state_manager=state_manager,
998
- model_type=model_type,
999
- component_definition=component_definition,
1000
- stream_name=stream_name,
1001
- stream_namespace=stream_namespace,
1002
- config=config,
1003
- message_repository=NoopMessageRepository(),
1004
- )
1005
- )
1006
-
1007
- # Return the concurrent cursor and state converter
1008
- return ConcurrentPerPartitionCursor(
1009
- cursor_factory=cursor_factory,
1010
- partition_router=partition_router,
1011
- stream_name=stream_name,
1012
- stream_namespace=stream_namespace,
1013
- stream_state=stream_state,
1014
- message_repository=self._message_repository, # type: ignore
1015
- connector_state_manager=state_manager,
1016
- cursor_field=cursor_field,
1017
- )
1018
-
1019
994
  @staticmethod
1020
995
  def create_constant_backoff_strategy(
1021
996
  model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
@@ -1298,15 +1273,18 @@ class ModelToComponentFactory:
1298
1273
  raise ValueError(
1299
1274
  "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1300
1275
  )
1301
- cursor = (
1302
- combined_slicers
1303
- if isinstance(
1304
- combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1305
- )
1306
- else self._create_component_from_model(model=model.incremental_sync, config=config)
1307
- )
1308
-
1309
- client_side_incremental_sync = {"cursor": cursor}
1276
+ client_side_incremental_sync = {
1277
+ "date_time_based_cursor": self._create_component_from_model(
1278
+ model=model.incremental_sync, config=config
1279
+ ),
1280
+ "substream_cursor": (
1281
+ combined_slicers
1282
+ if isinstance(
1283
+ combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1284
+ )
1285
+ else None
1286
+ ),
1287
+ }
1310
1288
 
1311
1289
  if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1312
1290
  cursor_model = model.incremental_sync
@@ -1708,6 +1686,13 @@ class ModelToComponentFactory:
1708
1686
  model.retriever, stream_slicer
1709
1687
  )
1710
1688
 
1689
+ schema_transformations = []
1690
+ if model.schema_transformations:
1691
+ for transformation_model in model.schema_transformations:
1692
+ schema_transformations.append(
1693
+ self._create_component_from_model(model=transformation_model, config=config)
1694
+ )
1695
+
1711
1696
  retriever = self._create_component_from_model(
1712
1697
  model=model.retriever,
1713
1698
  config=config,
@@ -1722,6 +1707,7 @@ class ModelToComponentFactory:
1722
1707
  return DynamicSchemaLoader(
1723
1708
  retriever=retriever,
1724
1709
  config=config,
1710
+ schema_transformations=schema_transformations,
1725
1711
  schema_type_identifier=schema_type_identifier,
1726
1712
  parameters=model.parameters or {},
1727
1713
  )
@@ -1736,6 +1722,12 @@ class ModelToComponentFactory:
1736
1722
  ) -> JsonlDecoder:
1737
1723
  return JsonlDecoder(parameters={})
1738
1724
 
1725
+ @staticmethod
1726
+ def create_json_line_parser(
1727
+ model: JsonLineParserModel, config: Config, **kwargs: Any
1728
+ ) -> JsonLineParser:
1729
+ return JsonLineParser(encoding=model.encoding)
1730
+
1739
1731
  @staticmethod
1740
1732
  def create_iterable_decoder(
1741
1733
  model: IterableDecoderModel, config: Config, **kwargs: Any
@@ -1752,6 +1744,22 @@ class ModelToComponentFactory:
1752
1744
  ) -> GzipJsonDecoder:
1753
1745
  return GzipJsonDecoder(parameters={}, encoding=model.encoding)
1754
1746
 
1747
+ def create_gzip_parser(
1748
+ self, model: GzipParserModel, config: Config, **kwargs: Any
1749
+ ) -> GzipParser:
1750
+ inner_parser = self._create_component_from_model(model=model.inner_parser, config=config)
1751
+ return GzipParser(inner_parser=inner_parser)
1752
+
1753
+ @staticmethod
1754
+ def create_csv_parser(model: CsvParserModel, config: Config, **kwargs: Any) -> CsvParser:
1755
+ return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
1756
+
1757
+ def create_composite_raw_decoder(
1758
+ self, model: CompositeRawDecoderModel, config: Config, **kwargs: Any
1759
+ ) -> CompositeRawDecoder:
1760
+ parser = self._create_component_from_model(model=model.parser, config=config)
1761
+ return CompositeRawDecoder(parser=parser)
1762
+
1755
1763
  @staticmethod
1756
1764
  def create_json_file_schema_loader(
1757
1765
  model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
@@ -1835,7 +1843,8 @@ class ModelToComponentFactory:
1835
1843
  return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
1836
1844
  config,
1837
1845
  InterpolatedString.create(
1838
- model.token_refresh_endpoint, parameters=model.parameters or {}
1846
+ model.token_refresh_endpoint, # type: ignore
1847
+ parameters=model.parameters or {},
1839
1848
  ).eval(config),
1840
1849
  access_token_name=InterpolatedString.create(
1841
1850
  model.access_token_name or "access_token", parameters=model.parameters or {}
@@ -1869,6 +1878,7 @@ class ModelToComponentFactory:
1869
1878
  # ignore type error because fixing it would have a lot of dependencies, revisit later
1870
1879
  return DeclarativeOauth2Authenticator( # type: ignore
1871
1880
  access_token_name=model.access_token_name or "access_token",
1881
+ access_token_value=model.access_token_value,
1872
1882
  client_id=model.client_id,
1873
1883
  client_secret=model.client_secret,
1874
1884
  expires_in_name=model.expires_in_name or "expires_in",
@@ -2084,7 +2094,7 @@ class ModelToComponentFactory:
2084
2094
  if (
2085
2095
  not isinstance(stream_slicer, DatetimeBasedCursor)
2086
2096
  or type(stream_slicer) is not DatetimeBasedCursor
2087
- ) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
2097
+ ):
2088
2098
  # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
2089
2099
  # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
2090
2100
  # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
@@ -2298,22 +2308,28 @@ class ModelToComponentFactory:
2298
2308
  urls_extractor=urls_extractor,
2299
2309
  )
2300
2310
 
2301
- return AsyncRetriever(
2311
+ async_job_partition_router = AsyncJobPartitionRouter(
2302
2312
  job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
2303
2313
  job_repository,
2304
2314
  stream_slices,
2305
- JobTracker(
2306
- 1
2307
- ), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
2315
+ JobTracker(1),
2316
+ # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
2308
2317
  self._message_repository,
2309
- has_bulk_parent=False, # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
2318
+ has_bulk_parent=False,
2319
+ # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
2310
2320
  ),
2311
- record_selector=record_selector,
2312
2321
  stream_slicer=stream_slicer,
2313
2322
  config=config,
2314
2323
  parameters=model.parameters or {},
2315
2324
  )
2316
2325
 
2326
+ return AsyncRetriever(
2327
+ record_selector=record_selector,
2328
+ stream_slicer=async_job_partition_router,
2329
+ config=config,
2330
+ parameters=model.parameters or {},
2331
+ )
2332
+
2317
2333
  @staticmethod
2318
2334
  def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
2319
2335
  return Spec(
@@ -2423,7 +2439,7 @@ class ModelToComponentFactory:
2423
2439
  config=config,
2424
2440
  name="",
2425
2441
  primary_key=None,
2426
- stream_slicer=combined_slicers,
2442
+ stream_slicer=stream_slicer if stream_slicer else combined_slicers,
2427
2443
  transformations=[],
2428
2444
  )
2429
2445