airbyte-cdk 7.2.3__py3-none-any.whl → 7.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import base64
6
6
  import json
7
7
  from dataclasses import InitVar, dataclass
8
8
  from datetime import datetime
9
- from typing import Any, Mapping, Optional, Union, cast
9
+ from typing import Any, Mapping, MutableMapping, Optional, Union, cast
10
10
 
11
11
  import jwt
12
12
  from cryptography.hazmat.primitives import serialization
@@ -14,12 +14,15 @@ from cryptography.hazmat.primitives.asymmetric.ec import EllipticCurvePrivateKey
14
14
  from cryptography.hazmat.primitives.asymmetric.ed448 import Ed448PrivateKey
15
15
  from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey
16
16
  from cryptography.hazmat.primitives.asymmetric.rsa import RSAPrivateKey
17
- from cryptography.hazmat.primitives.asymmetric.types import PrivateKeyTypes
18
17
 
19
18
  from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
20
19
  from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
21
20
  from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
22
21
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
22
+ from airbyte_cdk.sources.declarative.requesters.request_option import (
23
+ RequestOption,
24
+ RequestOptionType,
25
+ )
23
26
 
24
27
  # Type alias for keys that JWT library accepts
25
28
  JwtKeyTypes = Union[
@@ -86,6 +89,7 @@ class JwtAuthenticator(DeclarativeAuthenticator):
86
89
  additional_jwt_headers: Optional[Mapping[str, Any]] = None
87
90
  additional_jwt_payload: Optional[Mapping[str, Any]] = None
88
91
  passphrase: Optional[Union[InterpolatedString, str]] = None
92
+ request_option: Optional[RequestOption] = None
89
93
 
90
94
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
91
95
  self._secret_key = InterpolatedString.create(self.secret_key, parameters=parameters)
@@ -121,6 +125,13 @@ class JwtAuthenticator(DeclarativeAuthenticator):
121
125
  else None
122
126
  )
123
127
 
128
+ # When we first implemented the JWT authenticator, we assumed that the signed token was always supposed
129
+ # to be loaded into the request headers under the `Authorization` key. This is not always the case, but
130
+ # this default option allows for backwards compatibility to be retained for existing connectors
131
+ self._request_option = self.request_option or RequestOption(
132
+ inject_into=RequestOptionType.header, field_name="Authorization", parameters=parameters
133
+ )
134
+
124
135
  def _get_jwt_headers(self) -> dict[str, Any]:
125
136
  """
126
137
  Builds and returns the headers used when signing the JWT.
@@ -213,7 +224,8 @@ class JwtAuthenticator(DeclarativeAuthenticator):
213
224
 
214
225
  @property
215
226
  def auth_header(self) -> str:
216
- return "Authorization"
227
+ options = self._get_request_options(RequestOptionType.header)
228
+ return next(iter(options.keys()), "")
217
229
 
218
230
  @property
219
231
  def token(self) -> str:
@@ -222,3 +234,18 @@ class JwtAuthenticator(DeclarativeAuthenticator):
222
234
  if self._get_header_prefix()
223
235
  else self._get_signed_token()
224
236
  )
237
+
238
+ def get_request_params(self) -> Mapping[str, Any]:
239
+ return self._get_request_options(RequestOptionType.request_parameter)
240
+
241
+ def get_request_body_data(self) -> Union[Mapping[str, Any], str]:
242
+ return self._get_request_options(RequestOptionType.body_data)
243
+
244
+ def get_request_body_json(self) -> Mapping[str, Any]:
245
+ return self._get_request_options(RequestOptionType.body_json)
246
+
247
+ def _get_request_options(self, option_type: RequestOptionType) -> Mapping[str, Any]:
248
+ options: MutableMapping[str, Any] = {}
249
+ if self._request_option.inject_into == option_type:
250
+ self._request_option.inject_into_request(options, self.token, self.config)
251
+ return options
@@ -1276,6 +1276,10 @@ definitions:
1276
1276
  type: string
1277
1277
  examples:
1278
1278
  - "{{ config['passphrase'] }}"
1279
+ request_option:
1280
+ title: Request Option
1281
+ description: A request option describing where the signed JWT token that is generated should be injected into the outbound API request.
1282
+ "$ref": "#/definitions/RequestOption"
1279
1283
  $parameters:
1280
1284
  type: object
1281
1285
  additionalProperties: true
@@ -1,5 +1,3 @@
1
- # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
-
3
1
  # generated by datamodel-codegen:
4
2
  # filename: declarative_component_schema.yaml
5
3
 
@@ -397,66 +395,6 @@ class JwtPayload(BaseModel):
397
395
  )
398
396
 
399
397
 
400
- class JwtAuthenticator(BaseModel):
401
- type: Literal["JwtAuthenticator"]
402
- secret_key: str = Field(
403
- ...,
404
- description="Secret used to sign the JSON web token.",
405
- examples=["{{ config['secret_key'] }}"],
406
- title="Secret Key",
407
- )
408
- base64_encode_secret_key: Optional[bool] = Field(
409
- False,
410
- description='When set to true, the secret key will be base64 encoded prior to being encoded as part of the JWT. Only set to "true" when required by the API.',
411
- title="Base64-encode Secret Key",
412
- )
413
- algorithm: Algorithm = Field(
414
- ...,
415
- description="Algorithm used to sign the JSON web token.",
416
- examples=["ES256", "HS256", "RS256", "{{ config['algorithm'] }}"],
417
- title="Algorithm",
418
- )
419
- token_duration: Optional[int] = Field(
420
- 1200,
421
- description="The amount of time in seconds a JWT token can be valid after being issued.",
422
- examples=[1200, 3600],
423
- title="Token Duration",
424
- )
425
- header_prefix: Optional[str] = Field(
426
- None,
427
- description="The prefix to be used within the Authentication header.",
428
- examples=["Bearer", "Basic"],
429
- title="Header Prefix",
430
- )
431
- jwt_headers: Optional[JwtHeaders] = Field(
432
- None,
433
- description="JWT headers used when signing JSON web token.",
434
- title="JWT Headers",
435
- )
436
- additional_jwt_headers: Optional[Dict[str, Any]] = Field(
437
- None,
438
- description="Additional headers to be included with the JWT headers object.",
439
- title="Additional JWT Headers",
440
- )
441
- jwt_payload: Optional[JwtPayload] = Field(
442
- None,
443
- description="JWT Payload used when signing JSON web token.",
444
- title="JWT Payload",
445
- )
446
- additional_jwt_payload: Optional[Dict[str, Any]] = Field(
447
- None,
448
- description="Additional properties to be added to the JWT payload.",
449
- title="Additional JWT Payload Properties",
450
- )
451
- passphrase: Optional[str] = Field(
452
- None,
453
- description="A passphrase/password used to encrypt the private key. Only provide a passphrase if required by the API for JWT authentication. The API will typically provide the passphrase when generating the public/private key pair.",
454
- examples=["{{ config['passphrase'] }}"],
455
- title="Passphrase",
456
- )
457
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
458
-
459
-
460
398
  class RefreshTokenUpdater(BaseModel):
461
399
  refresh_token_name: Optional[str] = Field(
462
400
  "refresh_token",
@@ -502,148 +440,6 @@ class RefreshTokenUpdater(BaseModel):
502
440
  )
503
441
 
504
442
 
505
- class OAuthAuthenticator(BaseModel):
506
- type: Literal["OAuthAuthenticator"]
507
- client_id_name: Optional[str] = Field(
508
- "client_id",
509
- description="The name of the property to use to refresh the `access_token`.",
510
- examples=["custom_app_id"],
511
- title="Client ID Property Name",
512
- )
513
- client_id: Optional[str] = Field(
514
- None,
515
- description="The OAuth client ID. Fill it in the user inputs.",
516
- examples=[
517
- "{{ config['client_id'] }}",
518
- "{{ config['credentials']['client_id }}",
519
- ],
520
- title="Client ID",
521
- )
522
- client_secret_name: Optional[str] = Field(
523
- "client_secret",
524
- description="The name of the property to use to refresh the `access_token`.",
525
- examples=["custom_app_secret"],
526
- title="Client Secret Property Name",
527
- )
528
- client_secret: Optional[str] = Field(
529
- None,
530
- description="The OAuth client secret. Fill it in the user inputs.",
531
- examples=[
532
- "{{ config['client_secret'] }}",
533
- "{{ config['credentials']['client_secret }}",
534
- ],
535
- title="Client Secret",
536
- )
537
- refresh_token_name: Optional[str] = Field(
538
- "refresh_token",
539
- description="The name of the property to use to refresh the `access_token`.",
540
- examples=["custom_app_refresh_value"],
541
- title="Refresh Token Property Name",
542
- )
543
- refresh_token: Optional[str] = Field(
544
- None,
545
- description="Credential artifact used to get a new access token.",
546
- examples=[
547
- "{{ config['refresh_token'] }}",
548
- "{{ config['credentials]['refresh_token'] }}",
549
- ],
550
- title="Refresh Token",
551
- )
552
- token_refresh_endpoint: Optional[str] = Field(
553
- None,
554
- description="The full URL to call to obtain a new access token.",
555
- examples=["https://connect.squareup.com/oauth2/token"],
556
- title="Token Refresh Endpoint",
557
- )
558
- access_token_name: Optional[str] = Field(
559
- "access_token",
560
- description="The name of the property which contains the access token in the response from the token refresh endpoint.",
561
- examples=["access_token"],
562
- title="Access Token Property Name",
563
- )
564
- access_token_value: Optional[str] = Field(
565
- None,
566
- description="The value of the access_token to bypass the token refreshing using `refresh_token`.",
567
- examples=["secret_access_token_value"],
568
- title="Access Token Value",
569
- )
570
- expires_in_name: Optional[str] = Field(
571
- "expires_in",
572
- description="The name of the property which contains the expiry date in the response from the token refresh endpoint.",
573
- examples=["expires_in"],
574
- title="Token Expiry Property Name",
575
- )
576
- grant_type_name: Optional[str] = Field(
577
- "grant_type",
578
- description="The name of the property to use to refresh the `access_token`.",
579
- examples=["custom_grant_type"],
580
- title="Grant Type Property Name",
581
- )
582
- grant_type: Optional[str] = Field(
583
- "refresh_token",
584
- description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.",
585
- examples=["refresh_token", "client_credentials"],
586
- title="Grant Type",
587
- )
588
- refresh_request_body: Optional[Dict[str, Any]] = Field(
589
- None,
590
- description="Body of the request sent to get a new access token.",
591
- examples=[
592
- {
593
- "applicationId": "{{ config['application_id'] }}",
594
- "applicationSecret": "{{ config['application_secret'] }}",
595
- "token": "{{ config['token'] }}",
596
- }
597
- ],
598
- title="Refresh Request Body",
599
- )
600
- refresh_request_headers: Optional[Dict[str, Any]] = Field(
601
- None,
602
- description="Headers of the request sent to get a new access token.",
603
- examples=[
604
- {
605
- "Authorization": "<AUTH_TOKEN>",
606
- "Content-Type": "application/x-www-form-urlencoded",
607
- }
608
- ],
609
- title="Refresh Request Headers",
610
- )
611
- scopes: Optional[List[str]] = Field(
612
- None,
613
- description="List of scopes that should be granted to the access token.",
614
- examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]],
615
- title="Scopes",
616
- )
617
- token_expiry_date: Optional[str] = Field(
618
- None,
619
- description="The access token expiry date.",
620
- examples=["2023-04-06T07:12:10.421833+00:00", 1680842386],
621
- title="Token Expiry Date",
622
- )
623
- token_expiry_date_format: Optional[str] = Field(
624
- None,
625
- description="The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.",
626
- examples=["%Y-%m-%d %H:%M:%S.%f+00:00"],
627
- title="Token Expiry Date Format",
628
- )
629
- refresh_token_updater: Optional[RefreshTokenUpdater] = Field(
630
- None,
631
- description="When the refresh token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.",
632
- title="Refresh Token Updater",
633
- )
634
- profile_assertion: Optional[JwtAuthenticator] = Field(
635
- None,
636
- description="The authenticator being used to authenticate the client authenticator.",
637
- title="Profile Assertion",
638
- )
639
- use_profile_assertion: Optional[bool] = Field(
640
- False,
641
- description="Enable using profile assertion as a flow for OAuth authorization.",
642
- title="Use Profile Assertion",
643
- )
644
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
645
-
646
-
647
443
  class Rate(BaseModel):
648
444
  class Config:
649
445
  extra = Extra.allow
@@ -1895,6 +1691,213 @@ class DatetimeBasedCursor(BaseModel):
1895
1691
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1896
1692
 
1897
1693
 
1694
+ class JwtAuthenticator(BaseModel):
1695
+ type: Literal["JwtAuthenticator"]
1696
+ secret_key: str = Field(
1697
+ ...,
1698
+ description="Secret used to sign the JSON web token.",
1699
+ examples=["{{ config['secret_key'] }}"],
1700
+ title="Secret Key",
1701
+ )
1702
+ base64_encode_secret_key: Optional[bool] = Field(
1703
+ False,
1704
+ description='When set to true, the secret key will be base64 encoded prior to being encoded as part of the JWT. Only set to "true" when required by the API.',
1705
+ title="Base64-encode Secret Key",
1706
+ )
1707
+ algorithm: Algorithm = Field(
1708
+ ...,
1709
+ description="Algorithm used to sign the JSON web token.",
1710
+ examples=["ES256", "HS256", "RS256", "{{ config['algorithm'] }}"],
1711
+ title="Algorithm",
1712
+ )
1713
+ token_duration: Optional[int] = Field(
1714
+ 1200,
1715
+ description="The amount of time in seconds a JWT token can be valid after being issued.",
1716
+ examples=[1200, 3600],
1717
+ title="Token Duration",
1718
+ )
1719
+ header_prefix: Optional[str] = Field(
1720
+ None,
1721
+ description="The prefix to be used within the Authentication header.",
1722
+ examples=["Bearer", "Basic"],
1723
+ title="Header Prefix",
1724
+ )
1725
+ jwt_headers: Optional[JwtHeaders] = Field(
1726
+ None,
1727
+ description="JWT headers used when signing JSON web token.",
1728
+ title="JWT Headers",
1729
+ )
1730
+ additional_jwt_headers: Optional[Dict[str, Any]] = Field(
1731
+ None,
1732
+ description="Additional headers to be included with the JWT headers object.",
1733
+ title="Additional JWT Headers",
1734
+ )
1735
+ jwt_payload: Optional[JwtPayload] = Field(
1736
+ None,
1737
+ description="JWT Payload used when signing JSON web token.",
1738
+ title="JWT Payload",
1739
+ )
1740
+ additional_jwt_payload: Optional[Dict[str, Any]] = Field(
1741
+ None,
1742
+ description="Additional properties to be added to the JWT payload.",
1743
+ title="Additional JWT Payload Properties",
1744
+ )
1745
+ passphrase: Optional[str] = Field(
1746
+ None,
1747
+ description="A passphrase/password used to encrypt the private key. Only provide a passphrase if required by the API for JWT authentication. The API will typically provide the passphrase when generating the public/private key pair.",
1748
+ examples=["{{ config['passphrase'] }}"],
1749
+ title="Passphrase",
1750
+ )
1751
+ request_option: Optional[RequestOption] = Field(
1752
+ None,
1753
+ description="A request option describing where the signed JWT token that is generated should be injected into the outbound API request.",
1754
+ title="Request Option",
1755
+ )
1756
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1757
+
1758
+
1759
+ class OAuthAuthenticator(BaseModel):
1760
+ type: Literal["OAuthAuthenticator"]
1761
+ client_id_name: Optional[str] = Field(
1762
+ "client_id",
1763
+ description="The name of the property to use to refresh the `access_token`.",
1764
+ examples=["custom_app_id"],
1765
+ title="Client ID Property Name",
1766
+ )
1767
+ client_id: Optional[str] = Field(
1768
+ None,
1769
+ description="The OAuth client ID. Fill it in the user inputs.",
1770
+ examples=[
1771
+ "{{ config['client_id'] }}",
1772
+ "{{ config['credentials']['client_id }}",
1773
+ ],
1774
+ title="Client ID",
1775
+ )
1776
+ client_secret_name: Optional[str] = Field(
1777
+ "client_secret",
1778
+ description="The name of the property to use to refresh the `access_token`.",
1779
+ examples=["custom_app_secret"],
1780
+ title="Client Secret Property Name",
1781
+ )
1782
+ client_secret: Optional[str] = Field(
1783
+ None,
1784
+ description="The OAuth client secret. Fill it in the user inputs.",
1785
+ examples=[
1786
+ "{{ config['client_secret'] }}",
1787
+ "{{ config['credentials']['client_secret }}",
1788
+ ],
1789
+ title="Client Secret",
1790
+ )
1791
+ refresh_token_name: Optional[str] = Field(
1792
+ "refresh_token",
1793
+ description="The name of the property to use to refresh the `access_token`.",
1794
+ examples=["custom_app_refresh_value"],
1795
+ title="Refresh Token Property Name",
1796
+ )
1797
+ refresh_token: Optional[str] = Field(
1798
+ None,
1799
+ description="Credential artifact used to get a new access token.",
1800
+ examples=[
1801
+ "{{ config['refresh_token'] }}",
1802
+ "{{ config['credentials]['refresh_token'] }}",
1803
+ ],
1804
+ title="Refresh Token",
1805
+ )
1806
+ token_refresh_endpoint: Optional[str] = Field(
1807
+ None,
1808
+ description="The full URL to call to obtain a new access token.",
1809
+ examples=["https://connect.squareup.com/oauth2/token"],
1810
+ title="Token Refresh Endpoint",
1811
+ )
1812
+ access_token_name: Optional[str] = Field(
1813
+ "access_token",
1814
+ description="The name of the property which contains the access token in the response from the token refresh endpoint.",
1815
+ examples=["access_token"],
1816
+ title="Access Token Property Name",
1817
+ )
1818
+ access_token_value: Optional[str] = Field(
1819
+ None,
1820
+ description="The value of the access_token to bypass the token refreshing using `refresh_token`.",
1821
+ examples=["secret_access_token_value"],
1822
+ title="Access Token Value",
1823
+ )
1824
+ expires_in_name: Optional[str] = Field(
1825
+ "expires_in",
1826
+ description="The name of the property which contains the expiry date in the response from the token refresh endpoint.",
1827
+ examples=["expires_in"],
1828
+ title="Token Expiry Property Name",
1829
+ )
1830
+ grant_type_name: Optional[str] = Field(
1831
+ "grant_type",
1832
+ description="The name of the property to use to refresh the `access_token`.",
1833
+ examples=["custom_grant_type"],
1834
+ title="Grant Type Property Name",
1835
+ )
1836
+ grant_type: Optional[str] = Field(
1837
+ "refresh_token",
1838
+ description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.",
1839
+ examples=["refresh_token", "client_credentials"],
1840
+ title="Grant Type",
1841
+ )
1842
+ refresh_request_body: Optional[Dict[str, Any]] = Field(
1843
+ None,
1844
+ description="Body of the request sent to get a new access token.",
1845
+ examples=[
1846
+ {
1847
+ "applicationId": "{{ config['application_id'] }}",
1848
+ "applicationSecret": "{{ config['application_secret'] }}",
1849
+ "token": "{{ config['token'] }}",
1850
+ }
1851
+ ],
1852
+ title="Refresh Request Body",
1853
+ )
1854
+ refresh_request_headers: Optional[Dict[str, Any]] = Field(
1855
+ None,
1856
+ description="Headers of the request sent to get a new access token.",
1857
+ examples=[
1858
+ {
1859
+ "Authorization": "<AUTH_TOKEN>",
1860
+ "Content-Type": "application/x-www-form-urlencoded",
1861
+ }
1862
+ ],
1863
+ title="Refresh Request Headers",
1864
+ )
1865
+ scopes: Optional[List[str]] = Field(
1866
+ None,
1867
+ description="List of scopes that should be granted to the access token.",
1868
+ examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]],
1869
+ title="Scopes",
1870
+ )
1871
+ token_expiry_date: Optional[str] = Field(
1872
+ None,
1873
+ description="The access token expiry date.",
1874
+ examples=["2023-04-06T07:12:10.421833+00:00", 1680842386],
1875
+ title="Token Expiry Date",
1876
+ )
1877
+ token_expiry_date_format: Optional[str] = Field(
1878
+ None,
1879
+ description="The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.",
1880
+ examples=["%Y-%m-%d %H:%M:%S.%f+00:00"],
1881
+ title="Token Expiry Date Format",
1882
+ )
1883
+ refresh_token_updater: Optional[RefreshTokenUpdater] = Field(
1884
+ None,
1885
+ description="When the refresh token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.",
1886
+ title="Refresh Token Updater",
1887
+ )
1888
+ profile_assertion: Optional[JwtAuthenticator] = Field(
1889
+ None,
1890
+ description="The authenticator being used to authenticate the client authenticator.",
1891
+ title="Profile Assertion",
1892
+ )
1893
+ use_profile_assertion: Optional[bool] = Field(
1894
+ False,
1895
+ description="Enable using profile assertion as a flow for OAuth authorization.",
1896
+ title="Use Profile Assertion",
1897
+ )
1898
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1899
+
1900
+
1898
1901
  class FixedWindowCallRatePolicy(BaseModel):
1899
1902
  class Config:
1900
1903
  extra = Extra.allow
@@ -2683,12 +2683,16 @@ class ModelToComponentFactory:
2683
2683
  file_path=model.file_path or "", config=config, parameters=model.parameters or {}
2684
2684
  )
2685
2685
 
2686
- @staticmethod
2687
2686
  def create_jwt_authenticator(
2688
- model: JwtAuthenticatorModel, config: Config, **kwargs: Any
2687
+ self, model: JwtAuthenticatorModel, config: Config, **kwargs: Any
2689
2688
  ) -> JwtAuthenticator:
2690
2689
  jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None)
2691
2690
  jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None)
2691
+ request_option = (
2692
+ self._create_component_from_model(model.request_option, config)
2693
+ if model.request_option
2694
+ else None
2695
+ )
2692
2696
  return JwtAuthenticator(
2693
2697
  config=config,
2694
2698
  parameters=model.parameters or {},
@@ -2706,6 +2710,7 @@ class ModelToComponentFactory:
2706
2710
  additional_jwt_headers=model.additional_jwt_headers,
2707
2711
  additional_jwt_payload=model.additional_jwt_payload,
2708
2712
  passphrase=model.passphrase,
2713
+ request_option=request_option,
2709
2714
  )
2710
2715
 
2711
2716
  def create_list_partition_router(
@@ -3,13 +3,15 @@
3
3
  #
4
4
 
5
5
  import logging
6
+ import time
6
7
  from abc import ABC, abstractmethod
7
8
  from datetime import datetime
8
9
  from enum import Enum
9
10
  from io import IOBase
10
11
  from os import makedirs, path
11
- from typing import Any, Callable, Iterable, List, MutableMapping, Optional, Set, Tuple
12
+ from typing import Any, Iterable, List, MutableMapping, Optional, Set, Tuple
12
13
 
14
+ from airbyte_protocol_dataclasses.models import FailureType
13
15
  from wcmatch.glob import GLOBSTAR, globmatch
14
16
 
15
17
  from airbyte_cdk.models import AirbyteRecordMessageFileReference
@@ -19,8 +21,9 @@ from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import
19
21
  preserve_directory_structure,
20
22
  use_file_transfer,
21
23
  )
24
+ from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError
22
25
  from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
23
- from airbyte_cdk.sources.file_based.remote_file import RemoteFile
26
+ from airbyte_cdk.sources.file_based.remote_file import RemoteFile, UploadableRemoteFile
24
27
 
25
28
 
26
29
  class FileReadMode(Enum):
@@ -34,6 +37,7 @@ class AbstractFileBasedStreamReader(ABC):
34
37
  FILE_NAME = "file_name"
35
38
  LOCAL_FILE_PATH = "local_file_path"
36
39
  FILE_FOLDER = "file_folder"
40
+ FILE_SIZE_LIMIT = 1_500_000_000
37
41
 
38
42
  def __init__(self) -> None:
39
43
  self._config = None
@@ -113,16 +117,6 @@ class AbstractFileBasedStreamReader(ABC):
113
117
  seen.add(file.uri)
114
118
  yield file
115
119
 
116
- @abstractmethod
117
- def file_size(self, file: RemoteFile) -> int:
118
- """Utility method to get size of the remote file.
119
-
120
- This is required for connectors that will support writing to
121
- files. If the connector does not support writing files, then the
122
- subclass can simply `return 0`.
123
- """
124
- ...
125
-
126
120
  @staticmethod
127
121
  def file_matches_globs(file: RemoteFile, globs: List[str]) -> bool:
128
122
  # Use the GLOBSTAR flag to enable recursive ** matching
@@ -153,9 +147,8 @@ class AbstractFileBasedStreamReader(ABC):
153
147
  return include_identities_stream(self.config)
154
148
  return False
155
149
 
156
- @abstractmethod
157
150
  def upload(
158
- self, file: RemoteFile, local_directory: str, logger: logging.Logger
151
+ self, file: UploadableRemoteFile, local_directory: str, logger: logging.Logger
159
152
  ) -> Tuple[FileRecordData, AirbyteRecordMessageFileReference]:
160
153
  """
161
154
  This is required for connectors that will support writing to
@@ -173,7 +166,53 @@ class AbstractFileBasedStreamReader(ABC):
173
166
  - file_size_bytes (int): The size of the referenced file in bytes.
174
167
  - source_file_relative_path (str): The relative path to the referenced file in source.
175
168
  """
176
- ...
169
+ if not isinstance(file, UploadableRemoteFile):
170
+ raise TypeError(f"Expected UploadableRemoteFile, got {type(file)}")
171
+
172
+ file_size = file.size
173
+
174
+ if file_size > self.FILE_SIZE_LIMIT:
175
+ message = f"File size exceeds the {self.FILE_SIZE_LIMIT / 1e9} GB limit."
176
+ raise FileSizeLimitError(
177
+ message=message, internal_message=message, failure_type=FailureType.config_error
178
+ )
179
+
180
+ file_paths = self._get_file_transfer_paths(
181
+ source_file_relative_path=file.source_file_relative_path,
182
+ staging_directory=local_directory,
183
+ )
184
+ local_file_path = file_paths[self.LOCAL_FILE_PATH]
185
+ file_relative_path = file_paths[self.FILE_RELATIVE_PATH]
186
+ file_name = file_paths[self.FILE_NAME]
187
+
188
+ logger.info(
189
+ f"Starting to download the file {file.file_uri_for_logging} with size: {file_size / (1024 * 1024):,.2f} MB ({file_size / (1024 * 1024 * 1024):.2f} GB)"
190
+ )
191
+ start_download_time = time.time()
192
+
193
+ file.download_to_local_directory(local_file_path)
194
+
195
+ write_duration = time.time() - start_download_time
196
+ logger.info(
197
+ f"Finished downloading the file {file.file_uri_for_logging} and saved to {local_file_path} in {write_duration:,.2f} seconds."
198
+ )
199
+
200
+ file_record_data = FileRecordData(
201
+ folder=file_paths[self.FILE_FOLDER],
202
+ file_name=file_name,
203
+ bytes=file_size,
204
+ id=file.id,
205
+ mime_type=file.mime_type,
206
+ created_at=file.created_at,
207
+ updated_at=file.updated_at,
208
+ source_uri=file.uri,
209
+ )
210
+ file_reference = AirbyteRecordMessageFileReference(
211
+ staging_file_url=local_file_path,
212
+ source_file_relative_path=file_relative_path,
213
+ file_size_bytes=file_size,
214
+ )
215
+ return file_record_data, file_reference
177
216
 
178
217
  def _get_file_transfer_paths(
179
218
  self, source_file_relative_path: str, staging_directory: str
@@ -7,7 +7,7 @@ from typing import Iterable, Tuple
7
7
  from airbyte_cdk.models import AirbyteRecordMessageFileReference
8
8
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
9
9
  from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
10
- from airbyte_cdk.sources.file_based.remote_file import RemoteFile
10
+ from airbyte_cdk.sources.file_based.remote_file import UploadableRemoteFile
11
11
  from airbyte_cdk.sources.utils.files_directory import get_files_directory
12
12
 
13
13
 
@@ -17,7 +17,7 @@ class FileTransfer:
17
17
 
18
18
  def upload(
19
19
  self,
20
- file: RemoteFile,
20
+ file: UploadableRemoteFile,
21
21
  stream_reader: AbstractFileBasedStreamReader,
22
22
  logger: logging.Logger,
23
23
  ) -> Iterable[Tuple[FileRecordData, AirbyteRecordMessageFileReference]]:
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
-
4
+ from abc import ABC, abstractmethod
5
5
  from datetime import datetime
6
6
  from typing import Optional
7
7
 
@@ -16,3 +16,42 @@ class RemoteFile(BaseModel):
16
16
  uri: str
17
17
  last_modified: datetime
18
18
  mime_type: Optional[str] = None
19
+
20
+
21
+ class UploadableRemoteFile(RemoteFile, ABC):
22
+ """
23
+ A file in a file-based stream that supports uploading(file transferring).
24
+ """
25
+
26
+ id: Optional[str] = None
27
+ created_at: Optional[str] = None
28
+ updated_at: Optional[str] = None
29
+
30
+ @property
31
+ @abstractmethod
32
+ def size(self) -> int:
33
+ """
34
+ Returns the file size in bytes.
35
+ """
36
+ ...
37
+
38
+ @abstractmethod
39
+ def download_to_local_directory(self, local_file_path: str) -> None:
40
+ """
41
+ Download the file from remote source to local storage.
42
+ """
43
+ ...
44
+
45
+ @property
46
+ def source_file_relative_path(self) -> str:
47
+ """
48
+ Returns the relative path of the source file.
49
+ """
50
+ return self.uri
51
+
52
+ @property
53
+ def file_uri_for_logging(self) -> str:
54
+ """
55
+ Returns the URI for the file being logged.
56
+ """
57
+ return self.uri
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 7.2.3
3
+ Version: 7.3.1
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -115,7 +115,7 @@ airbyte_cdk/sources/declarative/async_job/status.py,sha256=mkExR-uOAO1ckUnclaUOa
115
115
  airbyte_cdk/sources/declarative/async_job/timer.py,sha256=Fb8P72CQ7jIzJyzMSSNuBf2vt8bmrg9SrfmNxKwph2A,1242
116
116
  airbyte_cdk/sources/declarative/auth/__init__.py,sha256=e2CRrcBWGhz3sQu3Oh34d1riEIwXipGS8hrSB1pu0Oo,284
117
117
  airbyte_cdk/sources/declarative/auth/declarative_authenticator.py,sha256=nf-OmRUHYG4ORBwyb5CANzuHEssE-oNmL-Lccn41Td8,1099
118
- airbyte_cdk/sources/declarative/auth/jwt.py,sha256=zZANSwaq-LkO22VbcdZloRrv5u7zTaC770xvWRtSKrE,9779
118
+ airbyte_cdk/sources/declarative/auth/jwt.py,sha256=Zf2U17HZAzzXGAnneAuq4zynrE7OalasVeMYSnvPdZc,11190
119
119
  airbyte_cdk/sources/declarative/auth/oauth.py,sha256=bCwf3f3Td_CA8DZ6CXMVPNiImM9QEGDxkcLKzSo3-f0,14339
120
120
  airbyte_cdk/sources/declarative/auth/selective_authenticator.py,sha256=qGwC6YsCldr1bIeKG6Qo-A9a5cTdHw-vcOn3OtQrS4c,1540
121
121
  airbyte_cdk/sources/declarative/auth/token.py,sha256=2EnE78EhBOY9hbeZnQJ9AuFaM-G7dccU-oKo_LThRQk,11070
@@ -130,7 +130,7 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=z0AgJ6AZ
130
130
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
131
131
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
132
132
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
133
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=dWfbQUSNdW1HuY1NZslMNL8sTF-I3mKFXa9Fo3747-0,187551
133
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=D5WeK1Iw_T0ZxLKCmKLowyO7GwxnwfhYom-sd1W14uQ,187793
134
134
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
135
135
  airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=qB4lRUrCXLTE-a3VlpOLaazHiC7RIF_FIVJesuz7ebw,8078
136
136
  airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=1PeKwuMK8x9dsA2zqUjSVinEWVSEgYcUS6npiW3aC2c,855
@@ -165,14 +165,14 @@ airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migrati
165
165
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
166
166
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
167
167
  airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py,sha256=Imnj3yef0aqRdLfaUxkIYISUb8YkiPrRH_wBd-x8HjM,5999
168
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=THYAUpV9dDO3n7nTshHrq9iMulGDy60QKM1_jy5t8Ak,131930
168
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=t2td-7swgXY3RJc1VDBFUYI2Blc55j5TDFdg90aHwlU,132123
169
169
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
170
170
  airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=nlVvHC511NUyDEEIRBkoeDTAvLqKNp-hRy8D19z8tdk,5941
171
171
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=wnRUP0Xeru9Rbu5OexXSDN9QWDo8YU4tT9M2LDVOgGA,802
172
172
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=la9Ulpc0lQewiBLKJ0FpsWxyU5XISv-ulmFRHJLJ1Pc,11292
173
173
  airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py,sha256=EtKjS9c94yNp3AwQC8KUCQaAYW5T3zvFYxoWYjc_buI,19729
174
174
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=pJmg78vqE5VfUrF_KJnWjucQ4k9IWFULeAxHCowrHXE,6806
175
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=1U2IMabGxODG9-sh2zNNHUBk3bkcWFssPisFL76vIVk,183420
175
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=lMjcNJKqU3HcOjNZyR4_Ln1v32HK-7iNcFq4nmFjxSE,183622
176
176
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=TBC9AkGaUqHm2IKHMPN6punBIcY5tWGULowcLoAVkfw,1109
177
177
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
178
178
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=ocm4hZ4k-tEGs5HLrtI8ecWSK0hGqNH0Rvz2byx_HZk,6927
@@ -298,18 +298,18 @@ airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha2
298
298
  airbyte_cdk/sources/file_based/exceptions.py,sha256=WP0qkG6fpWoBpOyyicgp5YNE393VWyegq5qSy0v4QtM,7362
299
299
  airbyte_cdk/sources/file_based/file_based_source.py,sha256=Xg8OYWnGc-OcVBglvS08uwAWGWHBhEqsBnyODIkOK-4,20051
300
300
  airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py,sha256=4e7FXqQ9hueacexC0SyrZyjF8oREYHza8pKF9CgKbD8,5050
301
- airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=rwz8AhEIqYB9gBF7uW9eR--eUiHOntzuwLH8jFHNacE,7854
301
+ airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=Yg9KRXpyAtElBrUOO8oX4WHQH6k6Lk7keklrZmB5Klg,9614
302
302
  airbyte_cdk/sources/file_based/file_record_data.py,sha256=Vkr5AyZzlsOezjVCLhFrm_WpymlQdolWCnFAwqLJ9Iw,453
303
303
  airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=blCLn0-2LC-ZdgcNyDEhqM2RiUvEjEBh-G4-t32ZtuM,1268
304
304
  airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=USEYqiICXBWpDV443VtNOCmUA-GINzY_Zah74_5w3qQ,10860
305
305
  airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QlCXB-ry3np67Q_VerQEPoWDOTcPTB6Go4ydZxY9ae4,20445
306
306
  airbyte_cdk/sources/file_based/file_types/excel_parser.py,sha256=BeplCq0hmojELU6bZCvvpRLpQ9us81TqbGYwrhd3INo,7188
307
- airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=5l2Jo6bp6neDmgM427PrZMZeqU0hCIZVWnzUZ_7BT10,1100
307
+ airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=rFxWaqItBux9tPf4xU03LT6b-wDZf1QolM92mP8Diuk,1120
308
308
  airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=JgpH21PrbRqwK92BJklZWvh2TndA6xZ-eP1LPMo44oQ,2832
309
309
  airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=GwyNyxmST4RX-XpXy7xVH0D-znYWWBmGv_pVAu95oHQ,5886
310
310
  airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=XenFg5sJ-UBnIkSmsiNJRou11NO0zZXx-RXgPHMT2NA,10487
311
311
  airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=2TYOQl62FQPCa8otLbkDIk_j01EP3oWaKSfXGhCjCHg,19492
312
- airbyte_cdk/sources/file_based/remote_file.py,sha256=yqRz93vPe8PBXLIMJ5W5u2JRlZRhg6sBrAjn3pPjJ8A,315
312
+ airbyte_cdk/sources/file_based/remote_file.py,sha256=1Afzr2WFWwjiUz8R2vNFepeI192UNeHOZAXIGTWOzOM,1248
313
313
  airbyte_cdk/sources/file_based/schema_helpers.py,sha256=dKXAOTmMI3YmC5u7PeHC9AaZmlL6ft7CYSFQKCg0sXw,9911
314
314
  airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=FkByIyEy56x2_awYnxGPqGaOp7zAzpAoRkPZHKySI9M,536
315
315
  airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py,sha256=kjvX7nOmUALYd7HuZHilUzgJPZ-MnZ08mtvuBnt2tQ0,618
@@ -457,9 +457,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
457
457
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=9YDJmnIGFsT51CVQf2tSSvTapGimITjEFGbUTSZAGTI,963
458
458
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
459
459
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
460
- airbyte_cdk-7.2.3.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
461
- airbyte_cdk-7.2.3.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
462
- airbyte_cdk-7.2.3.dist-info/METADATA,sha256=gZXCUx6Kq31vc4g4TcmcOfaNbnKlW4af5Ka_WqVjvSg,6798
463
- airbyte_cdk-7.2.3.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
464
- airbyte_cdk-7.2.3.dist-info/entry_points.txt,sha256=eLZ2UYvJZGm1s07Pplcs--1Gim60YhZWTb53j_dghwU,195
465
- airbyte_cdk-7.2.3.dist-info/RECORD,,
460
+ airbyte_cdk-7.3.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
461
+ airbyte_cdk-7.3.1.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
462
+ airbyte_cdk-7.3.1.dist-info/METADATA,sha256=_n29oKSyO6A6mUMN1c6YqHvrJRFQrXOIhhAh1E0PuXo,6798
463
+ airbyte_cdk-7.3.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
464
+ airbyte_cdk-7.3.1.dist-info/entry_points.txt,sha256=eLZ2UYvJZGm1s07Pplcs--1Gim60YhZWTb53j_dghwU,195
465
+ airbyte_cdk-7.3.1.dist-info/RECORD,,