airbyte-cdk 6.20.2.dev0__py3-none-any.whl → 6.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/auth/oauth.py +34 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +18 -2
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +51 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +16 -80
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +123 -21
- airbyte_cdk/sources/declarative/decoders/__init__.py +9 -1
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +43 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +59 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -15
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +112 -27
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +127 -106
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +33 -4
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +13 -3
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +11 -0
- airbyte_cdk/sources/file_based/exceptions.py +34 -0
- airbyte_cdk/sources/file_based/file_based_source.py +28 -5
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +18 -4
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +25 -2
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +30 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +21 -30
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +33 -4
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +42 -4
- airbyte_cdk/sources/types.py +3 -0
- airbyte_cdk/sources/utils/transform.py +29 -3
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/RECORD +35 -33
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -331
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/entry_points.txt +0 -0
@@ -53,44 +53,45 @@ airbyte_cdk/sources/declarative/async_job/timer.py,sha256=Fb8P72CQ7jIzJyzMSSNuBf
|
|
53
53
|
airbyte_cdk/sources/declarative/auth/__init__.py,sha256=e2CRrcBWGhz3sQu3Oh34d1riEIwXipGS8hrSB1pu0Oo,284
|
54
54
|
airbyte_cdk/sources/declarative/auth/declarative_authenticator.py,sha256=nf-OmRUHYG4ORBwyb5CANzuHEssE-oNmL-Lccn41Td8,1099
|
55
55
|
airbyte_cdk/sources/declarative/auth/jwt.py,sha256=7r5q1zOekjw8kEmEk1oUyovzVt3cbD6BuFnRILeLZi8,8250
|
56
|
-
airbyte_cdk/sources/declarative/auth/oauth.py,sha256=
|
56
|
+
airbyte_cdk/sources/declarative/auth/oauth.py,sha256=GhXWheC5GkKV7req3jBCY0aTbFwCuQ5RRSfZi3jFphM,11002
|
57
57
|
airbyte_cdk/sources/declarative/auth/selective_authenticator.py,sha256=qGwC6YsCldr1bIeKG6Qo-A9a5cTdHw-vcOn3OtQrS4c,1540
|
58
58
|
airbyte_cdk/sources/declarative/auth/token.py,sha256=r4u3WXyVa7WmiSZ9-eZXlrUI-pS0D4YWJnwjLzwV-Fk,11210
|
59
59
|
airbyte_cdk/sources/declarative/auth/token_provider.py,sha256=9oq3dcBPAPwXSfkISjhA05dMhIzxaDQTmwOydBrnsMk,3028
|
60
|
-
airbyte_cdk/sources/declarative/checks/__init__.py,sha256=
|
60
|
+
airbyte_cdk/sources/declarative/checks/__init__.py,sha256=nsVV5Bo0E_tBNd8A4Xdsdb-75PpcLo5RQu2RQ_Gv-ME,806
|
61
|
+
airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py,sha256=aXKL1YSAB-0T_eZiavb7e5rprf-DdXG77Fy81FtlcWk,1843
|
61
62
|
airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQrilWCfJmncBzXCZ18ptRNip3XA,2139
|
62
63
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
63
64
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
64
65
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
65
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
66
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=tSTCSmyMCu1qoGsne1Ooz3c1da-8EDZk6Suiy2gIq9Q,22475
|
66
67
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
67
68
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
68
69
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
69
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
70
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=PxY_V8vGyNdUMw3vjhqFbqjRNgYs_-0-0xeSTGkLSBw,137031
|
70
71
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
71
72
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
|
72
|
-
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=
|
73
|
-
airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256
|
73
|
+
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=KSpQetKGqPCv-38QgcVJ5kzM5nzbFldTSsYDCS3Xf0Y,1035
|
74
|
+
airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=kQfUVMVhChKe5OngwIQrs0F9KGnRUN-CKVFakCU23DQ,4354
|
74
75
|
airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=sl-Gt8lXi7yD2Q-sD8je5QS2PbgrgsYjxRLWsay7DMc,826
|
75
76
|
airbyte_cdk/sources/declarative/decoders/json_decoder.py,sha256=qdbjeR6RffKaah_iWvMsOcDolYuxJY5DaI3b9AMTZXg,3327
|
76
77
|
airbyte_cdk/sources/declarative/decoders/noop_decoder.py,sha256=iZh0yKY_JzgBnJWiubEusf5c0o6Khd-8EWFWT-8EgFo,542
|
77
78
|
airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py,sha256=ZVBZhAOl0I0MymXN5CKTC-kIXG4GuUQAEyn0XpUDuSE,1081
|
78
79
|
airbyte_cdk/sources/declarative/decoders/xml_decoder.py,sha256=EU-7t-5vIGRHZ14h-f0GUE4V5-eTM9Flux-A8xgI1Rc,3117
|
80
|
+
airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py,sha256=OTGeNh-Zkab9JwCTgiHtLH1IS6PiVO9jnr82c0vrHbw,2269
|
79
81
|
airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
|
80
82
|
airbyte_cdk/sources/declarative/extractors/__init__.py,sha256=RmV-IkO1YLj0PSOrrqC9AV1gO8-90t8UTDVfJGshN9E,754
|
81
83
|
airbyte_cdk/sources/declarative/extractors/dpath_extractor.py,sha256=wR4Ol4MG2lt5UlqXF5EU_k7qa5cN4_-luu3PJ1PlO3A,3131
|
82
84
|
airbyte_cdk/sources/declarative/extractors/http_selector.py,sha256=2zWZ4ewTqQC8VwkjS0xD_u350Km3SiYP7hpOOgiLg5o,1169
|
83
85
|
airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=XJELMjahAsaomlvQgN2zrNO0DJX0G0fr9r682gUz7Pg,691
|
84
|
-
airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=
|
86
|
+
airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=OJ9xmhNWNwwzxYOeIrDy1GINb1zH9MBy6suC5tm2LSk,3545
|
85
87
|
airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=tjNwcURmlyD-TGCScXvW95ThNKyPGcx2SiWbG1-H-sc,6552
|
86
88
|
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
|
87
89
|
airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
|
88
|
-
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=
|
89
|
-
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=V_KXjVQFB_55u_ZncM52fxFr4XEcoUje9Tu5pxI9-Fo,14398
|
90
|
+
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=huRz3KQJSUFmJCg5GPE9TckEBsB5TMsCa_THhJAhPVI,1037
|
90
91
|
airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=_UzUnSIUsDbRgbFTXgSyZEFb4ws-KdhdQPWO8mFbV7U,22028
|
91
92
|
airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
|
92
|
-
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=
|
93
|
-
airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=
|
93
|
+
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=3_EEZop94bMitZaJd2PF5Q2Xt9v94tYg7p7YJz8tAFc,15869
|
94
|
+
airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=hElcYijbOHjdLKOMA7W7aizEbf22r7OSApXALP875uI,15749
|
94
95
|
airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py,sha256=2YBOA2NnwAeIKlIhSwUB_W-FaGnPcmrG_liY7b4mV2Y,8365
|
95
96
|
airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py,sha256=10LFv1QPM-agVKl6eaANmEBOfd7gZgBrkoTcMggsieQ,4809
|
96
97
|
airbyte_cdk/sources/declarative/interpolation/__init__.py,sha256=tjUJkn3B-iZ-p7RP2c3dVZejrGiQeooGmS5ibWTuUL4,437
|
@@ -102,17 +103,17 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZ
|
|
102
103
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
|
103
104
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=BtsY_jtT4MihFqeQgc05HXj3Ndt-e2ESQgGwbg3Sdxc,6430
|
104
105
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=Y5AWYxbJTUtJ_Jm7DV9qrZDiymFR9LST7fBt4piT2-U,4585
|
105
|
-
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=
|
106
|
+
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=_xCg7CfWQCDXVQx8ZRzcS6yuocfWzqLvOMLkgwEK5vw,16352
|
106
107
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
107
108
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
108
109
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
109
110
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
110
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
111
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=57IP4WKtwsoVvWpJKFTTsWMR58nzPIwVvzAehYJ0BrA,96250
|
111
112
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
112
113
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
113
114
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
114
115
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
115
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
116
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=ZIl9MKlzOPzo-iMWwcJorGboWuCi8ZMy65YW04TS6UM,112776
|
116
117
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
117
118
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
|
118
119
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -120,6 +121,7 @@ airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha25
|
|
120
121
|
airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
|
121
122
|
airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=SKzKjSyfccq4dxGIh-J6ejrgkCHzaiTIazmbmeQiRD4,1942
|
122
123
|
airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py,sha256=5bgXoJfBg_6i53krQMptAGb50XB5XoVfqQxKQhlLtBA,15383
|
124
|
+
airbyte_cdk/sources/declarative/requesters/README.md,sha256=eL1I4iLkxaw7hJi9S9d18_XcRl-R8lUSjqBVJJzvXmg,2656
|
123
125
|
airbyte_cdk/sources/declarative/requesters/__init__.py,sha256=d7a3OoHbqaJDyyPli3nqqJ2yAW_SLX6XDaBAKOwvpxw,364
|
124
126
|
airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py,sha256=SkEDcJxlT1683rNx93K9whoS0OyUukkuOfToGtgpF58,776
|
125
127
|
airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py,sha256=1WZdpFmWL6W_Dko0qjflTaKIWeqt8jHT-D6HcujIp3s,884
|
@@ -134,7 +136,7 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.
|
|
134
136
|
airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py,sha256=q0YkeYUUWO6iErUy0vjqiOkhg8_9d5YcCmtlpXAJJ9E,1314
|
135
137
|
airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py,sha256=Tan66odx8VHzfdyyXMQkXz2pJYksllGqvxmpoajgcK4,669
|
136
138
|
airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py,sha256=vhWsEKNTYEzZ4gerhHqnDNKu4wGIP485NAzpSQ5DRZg,7941
|
137
|
-
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=
|
139
|
+
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=3GtOefPH08evlSUxaILkiKLTHbIspFY4qd5B3ZqNE60,10063
|
138
140
|
airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=RqYPkgJFAWfcZBTc-JBcGHPm4JL1ZQOhs9GKU4MP2eE,14723
|
139
141
|
airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
|
140
142
|
airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=FnSl3qPvv5wD6ieAI2Ic5c4dqBk-3fRe4tCaWzq3YwM,11840
|
@@ -163,10 +165,10 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
|
|
163
165
|
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC5RK_ZZM7ojTLBQ4IkWQTfmfCk,456
|
164
166
|
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=kX9ltelK2xLIBWDJBK2ucrvVe5tc5xmhdbVbgsjvlxY,3696
|
165
167
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
166
|
-
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=
|
168
|
+
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=jxQ_9xcVD07r9PKhofitAqMkdX1k8ZNyy50qz5NwkFs,24540
|
167
169
|
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=HztgVVaZdil5UfgUZcv_Hyy84r89_EKRwyO2hoewNVg,749
|
168
170
|
airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
|
169
|
-
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=
|
171
|
+
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=sa99VqU1U45fgZL2qEdw8ueX1tPTPfGxibQ-ZFePjSM,9361
|
170
172
|
airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
|
171
173
|
airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py,sha256=5Wl-fqW-pVf_dxJ4yGHMAFfC4JjKHYJhqFJT1xA57F4,4177
|
172
174
|
airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLnrDLxf1PJKdUqvQq2RVnAOAzNSY,379
|
@@ -196,7 +198,7 @@ airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=ddKQfUmk
|
|
196
198
|
airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=01Nd4b7ERAbp-OZo_8rrAzFXWPTMwr02SnWiN17nx8Q,2363
|
197
199
|
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=j9T5TimfWFUz7nqsaj-83G3xWmDpsmeSbDnaUNmz0UM,5849
|
198
200
|
airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
199
|
-
airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=
|
201
|
+
airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=gXlZwnEKLWknnK_n7j14lANgR6vkqhlLJ-G3rRu-ox4,6897
|
200
202
|
airbyte_cdk/sources/file_based/config/avro_format.py,sha256=NxTF96ewzn6HuhgodsY7Rpb-ybr1ZEWW5d4Vid64g5A,716
|
201
203
|
airbyte_cdk/sources/file_based/config/csv_format.py,sha256=NWekkyT8dTwiVK0mwa_krQD4FJPHSDfILo8kPAg3-Vs,8006
|
202
204
|
airbyte_cdk/sources/file_based/config/excel_format.py,sha256=9qAmTsT6SoVzNfNv0oBVkVCmiyqQuVAbfRKajjoa7Js,378
|
@@ -207,9 +209,9 @@ airbyte_cdk/sources/file_based/config/unstructured_format.py,sha256=tIbB9Pn1HqU6
|
|
207
209
|
airbyte_cdk/sources/file_based/discovery_policy/__init__.py,sha256=gl3ey6mZbyfraB9P3pFhf9UJp2JeTZ1SUFAopy2iBvY,301
|
208
210
|
airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha256=dCfXX529Rd5rtopg4VeEgTPJjFtqjtjzPq6LCw18Wt0,605
|
209
211
|
airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=-xujTidtrq6HC00WKbjQh1CZdT5LMuzkp5BLjqDmfTY,1007
|
210
|
-
airbyte_cdk/sources/file_based/exceptions.py,sha256=
|
211
|
-
airbyte_cdk/sources/file_based/file_based_source.py,sha256=
|
212
|
-
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=
|
212
|
+
airbyte_cdk/sources/file_based/exceptions.py,sha256=WP0qkG6fpWoBpOyyicgp5YNE393VWyegq5qSy0v4QtM,7362
|
213
|
+
airbyte_cdk/sources/file_based/file_based_source.py,sha256=Biv2QufYQtHZQCBZs4iCUpqTd82rk7xo8SDYkEeau3k,17616
|
214
|
+
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=e1KhgTh7mzvkBOz9DjLwzOsDwevrTmbxSYIcvhgWgGM,6856
|
213
215
|
airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=blCLn0-2LC-ZdgcNyDEhqM2RiUvEjEBh-G4-t32ZtuM,1268
|
214
216
|
airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=XNx-JC-sgzH9u3nOJ2M59FxBXvtig8LN6BIkeDOavZA,10858
|
215
217
|
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QlCXB-ry3np67Q_VerQEPoWDOTcPTB6Go4ydZxY9ae4,20445
|
@@ -218,7 +220,7 @@ airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=HyGRihJxcb_lEs
|
|
218
220
|
airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=JgpH21PrbRqwK92BJklZWvh2TndA6xZ-eP1LPMo44oQ,2832
|
219
221
|
airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=GwyNyxmST4RX-XpXy7xVH0D-znYWWBmGv_pVAu95oHQ,5886
|
220
222
|
airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=XenFg5sJ-UBnIkSmsiNJRou11NO0zZXx-RXgPHMT2NA,10487
|
221
|
-
airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=
|
223
|
+
airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=2TYOQl62FQPCa8otLbkDIk_j01EP3oWaKSfXGhCjCHg,19492
|
222
224
|
airbyte_cdk/sources/file_based/remote_file.py,sha256=yqRz93vPe8PBXLIMJ5W5u2JRlZRhg6sBrAjn3pPjJ8A,315
|
223
225
|
airbyte_cdk/sources/file_based/schema_helpers.py,sha256=Cf8FH1bDFP0qCDDfEYir_WjP4exXUnikz8hZ40y1Ek0,9601
|
224
226
|
airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=FkByIyEy56x2_awYnxGPqGaOp7zAzpAoRkPZHKySI9M,536
|
@@ -235,7 +237,7 @@ airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_c
|
|
235
237
|
airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
|
236
238
|
airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=om-x3gZFPgWDpi15S9RxZmR36VHnk8sytgN6LlBQhAw,1934
|
237
239
|
airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=VGV7xLyBribuBMVrXtO1xqkWJD86bl7yhXtjnwLMohM,7051
|
238
|
-
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=
|
240
|
+
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=XLU5cNqQ-5mj243gNzMyXtm_oCtg1ORyoqbCsUo9Dn4,18044
|
239
241
|
airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
|
240
242
|
airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
|
241
243
|
airbyte_cdk/sources/http_logger.py,sha256=TyBmtRA6D9g0XDkKGvdM415b36RXDjgfkwRewDsH8-0,1576
|
@@ -257,7 +259,7 @@ airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=3OB5VsvOkJmCxIM
|
|
257
259
|
airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCBUwJDw1QSCEvz23s7zIEx_7QMxkPq9j-oPIQ,1358
|
258
260
|
airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=QP_64kQo-b3sRNHZA5aqrgCJqAhIVegRM3vJ8jGyuSY,15213
|
259
261
|
airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=4La5v2UffSjGnhmF4kwNIKt_g3RXk2ux1mSHA1ejgYM,2898
|
260
|
-
airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=
|
262
|
+
airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=Hke6CpD8Sq1FS4g1Xuht39UN7hKkGy1mvOxvQrm1lLM,20810
|
261
263
|
airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=K3rLMpYhS7nnmvwQ52lqBy7DQdFMJpvvT7sgBg_ckA8,3207
|
262
264
|
airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
|
263
265
|
airbyte_cdk/sources/streams/concurrent/helpers.py,sha256=S6AW8TgIASCZ2UuUcQLE8OzgYUHWt2-KPOvNPwnQf-Q,1596
|
@@ -288,18 +290,18 @@ airbyte_cdk/sources/streams/http/http.py,sha256=JAMpiTdS9HFNOlwayWNvQdxoqs2rpW9w
|
|
288
290
|
airbyte_cdk/sources/streams/http/http_client.py,sha256=tDE0ROtxjGMVphvsw8INvGMtZ97hIF-v47pZ3jIyiwc,23011
|
289
291
|
airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
|
290
292
|
airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
|
291
|
-
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256
|
293
|
+
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=-GDNyqccdutOftFpqCvvk81NwkskHhDZ8QcsUKzNjRQ,11660
|
292
294
|
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py,sha256=Y3n7J-sk5yGjv_OxtY6Z6k0PEsFZmtIRi-x0KCbaHdA,1010
|
293
|
-
airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py,sha256=
|
295
|
+
airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py,sha256=OCNokL0GypjN8PUVJk1UFJVE5THkHAbNDmB984_F7W0,16718
|
294
296
|
airbyte_cdk/sources/streams/http/requests_native_auth/token.py,sha256=h5PTzcdH-RQLeCg7xZ45w_484OPUDSwNWl_iMJQmZoI,2526
|
295
297
|
airbyte_cdk/sources/streams/utils/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
296
|
-
airbyte_cdk/sources/types.py,sha256=
|
298
|
+
airbyte_cdk/sources/types.py,sha256=nLPkTpyfGV4E6e99qcBWX4r8C3fE4I8Fvgx2EjvT9ic,5005
|
297
299
|
airbyte_cdk/sources/utils/__init__.py,sha256=TTN6VUxVy6Is8BhYQZR5pxJGQh8yH4duXh4O1TiMiEY,118
|
298
300
|
airbyte_cdk/sources/utils/casing.py,sha256=QC-gV1O4e8DR4-bhdXieUPKm_JamzslVyfABLYYRSXA,256
|
299
301
|
airbyte_cdk/sources/utils/record_helper.py,sha256=jeB0mucudzna7Zvj-pCBbwFrbLJ36SlAWZTh5O4Fb9Y,2168
|
300
302
|
airbyte_cdk/sources/utils/schema_helpers.py,sha256=bR3I70-e11S6B8r6VK-pthQXtcYrXojgXFvuK7lRrpg,8545
|
301
303
|
airbyte_cdk/sources/utils/slice_logger.py,sha256=qWWeFLAvigFz0b4O1_O3QDM1cy8PqZAMMgVPR2hEeb8,1778
|
302
|
-
airbyte_cdk/sources/utils/transform.py,sha256=
|
304
|
+
airbyte_cdk/sources/utils/transform.py,sha256=Sks6kiRbef1W-5I6PRqnFxksJe2NOPKCRXQLudaltf8,11015
|
303
305
|
airbyte_cdk/sources/utils/types.py,sha256=41ZQR681t5TUnOScij58d088sb99klH_ZENFcaYro_g,175
|
304
306
|
airbyte_cdk/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
305
307
|
airbyte_cdk/sql/_util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -343,8 +345,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
343
345
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
344
346
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
345
347
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
346
|
-
airbyte_cdk-6.
|
347
|
-
airbyte_cdk-6.
|
348
|
-
airbyte_cdk-6.
|
349
|
-
airbyte_cdk-6.
|
350
|
-
airbyte_cdk-6.
|
348
|
+
airbyte_cdk-6.21.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
349
|
+
airbyte_cdk-6.21.0.dist-info/METADATA,sha256=6djJTSQ0PJieSZE0V6_FAaDKlwVCELJ0_YyMsez9oLE,6000
|
350
|
+
airbyte_cdk-6.21.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
351
|
+
airbyte_cdk-6.21.0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
352
|
+
airbyte_cdk-6.21.0.dist-info/RECORD,,
|
@@ -1,331 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
import copy
|
6
|
-
import logging
|
7
|
-
import threading
|
8
|
-
from collections import OrderedDict
|
9
|
-
from copy import deepcopy
|
10
|
-
from datetime import timedelta
|
11
|
-
from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
|
12
|
-
|
13
|
-
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
14
|
-
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
15
|
-
Timer,
|
16
|
-
iterate_with_last_flag_and_state,
|
17
|
-
)
|
18
|
-
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
19
|
-
from airbyte_cdk.sources.message import MessageRepository
|
20
|
-
from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
|
21
|
-
PerPartitionKeySerializer,
|
22
|
-
)
|
23
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, Cursor, CursorField
|
24
|
-
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
25
|
-
from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
|
26
|
-
|
27
|
-
logger = logging.getLogger("airbyte")
|
28
|
-
|
29
|
-
|
30
|
-
class ConcurrentCursorFactory:
|
31
|
-
def __init__(self, create_function: Callable[..., ConcurrentCursor]):
|
32
|
-
self._create_function = create_function
|
33
|
-
|
34
|
-
def create(
|
35
|
-
self, stream_state: Mapping[str, Any], runtime_lookback_window: Any
|
36
|
-
) -> ConcurrentCursor:
|
37
|
-
return self._create_function(
|
38
|
-
stream_state=stream_state, runtime_lookback_window=runtime_lookback_window
|
39
|
-
)
|
40
|
-
|
41
|
-
|
42
|
-
class ConcurrentPerPartitionCursor(Cursor):
|
43
|
-
"""
|
44
|
-
Manages state per partition when a stream has many partitions, preventing data loss or duplication.
|
45
|
-
|
46
|
-
Attributes:
|
47
|
-
DEFAULT_MAX_PARTITIONS_NUMBER (int): Maximum number of partitions to retain in memory (default is 10,000).
|
48
|
-
|
49
|
-
- **Partition Limitation Logic**
|
50
|
-
Ensures the number of tracked partitions does not exceed the specified limit to prevent memory overuse. Oldest partitions are removed when the limit is reached.
|
51
|
-
|
52
|
-
- **Global Cursor Fallback**
|
53
|
-
New partitions use global state as the initial state to progress the state for deleted or new partitions. The history data added after the initial sync will be missing.
|
54
|
-
"""
|
55
|
-
|
56
|
-
DEFAULT_MAX_PARTITIONS_NUMBER = 10000
|
57
|
-
_NO_STATE: Mapping[str, Any] = {}
|
58
|
-
_NO_CURSOR_STATE: Mapping[str, Any] = {}
|
59
|
-
_GLOBAL_STATE_KEY = "state"
|
60
|
-
_PERPARTITION_STATE_KEY = "states"
|
61
|
-
_KEY = 0
|
62
|
-
_VALUE = 1
|
63
|
-
|
64
|
-
def __init__(
|
65
|
-
self,
|
66
|
-
cursor_factory: ConcurrentCursorFactory,
|
67
|
-
partition_router: PartitionRouter,
|
68
|
-
stream_name: str,
|
69
|
-
stream_namespace: Optional[str],
|
70
|
-
stream_state: Any,
|
71
|
-
message_repository: MessageRepository,
|
72
|
-
connector_state_manager: ConnectorStateManager,
|
73
|
-
cursor_field: CursorField,
|
74
|
-
) -> None:
|
75
|
-
self._global_cursor: Optional[StreamState] = {}
|
76
|
-
self._stream_name = stream_name
|
77
|
-
self._stream_namespace = stream_namespace
|
78
|
-
self._message_repository = message_repository
|
79
|
-
self._connector_state_manager = connector_state_manager
|
80
|
-
self._cursor_field = cursor_field
|
81
|
-
|
82
|
-
self._cursor_factory = cursor_factory
|
83
|
-
self._partition_router = partition_router
|
84
|
-
|
85
|
-
# The dict is ordered to ensure that once the maximum number of partitions is reached,
|
86
|
-
# the oldest partitions can be efficiently removed, maintaining the most recent partitions.
|
87
|
-
self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
|
88
|
-
self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
|
89
|
-
self._finished_partitions: set[str] = set()
|
90
|
-
self._lock = threading.Lock()
|
91
|
-
self._timer = Timer()
|
92
|
-
self._new_global_cursor: Optional[StreamState] = None
|
93
|
-
self._lookback_window: int = 0
|
94
|
-
self._parent_state: Optional[StreamState] = None
|
95
|
-
self._over_limit: int = 0
|
96
|
-
self._partition_serializer = PerPartitionKeySerializer()
|
97
|
-
|
98
|
-
self._set_initial_state(stream_state)
|
99
|
-
|
100
|
-
@property
|
101
|
-
def cursor_field(self) -> CursorField:
|
102
|
-
return self._cursor_field
|
103
|
-
|
104
|
-
@property
|
105
|
-
def state(self) -> MutableMapping[str, Any]:
|
106
|
-
states = []
|
107
|
-
for partition_tuple, cursor in self._cursor_per_partition.items():
|
108
|
-
if cursor.state:
|
109
|
-
states.append(
|
110
|
-
{
|
111
|
-
"partition": self._to_dict(partition_tuple),
|
112
|
-
"cursor": copy.deepcopy(cursor.state),
|
113
|
-
}
|
114
|
-
)
|
115
|
-
state: dict[str, Any] = {self._PERPARTITION_STATE_KEY: states}
|
116
|
-
|
117
|
-
if self._global_cursor:
|
118
|
-
state[self._GLOBAL_STATE_KEY] = self._global_cursor
|
119
|
-
if self._lookback_window is not None:
|
120
|
-
state["lookback_window"] = self._lookback_window
|
121
|
-
if self._parent_state is not None:
|
122
|
-
state["parent_state"] = self._parent_state
|
123
|
-
return state
|
124
|
-
|
125
|
-
def close_partition(self, partition: Partition) -> None:
|
126
|
-
# Attempt to retrieve the stream slice
|
127
|
-
stream_slice: Optional[StreamSlice] = partition.to_slice() # type: ignore[assignment]
|
128
|
-
|
129
|
-
# Ensure stream_slice is not None
|
130
|
-
if stream_slice is None:
|
131
|
-
raise ValueError("stream_slice cannot be None")
|
132
|
-
|
133
|
-
partition_key = self._to_partition_key(stream_slice.partition)
|
134
|
-
self._cursor_per_partition[partition_key].close_partition(partition=partition)
|
135
|
-
with self._lock:
|
136
|
-
self._semaphore_per_partition[partition_key].acquire()
|
137
|
-
cursor = self._cursor_per_partition[partition_key]
|
138
|
-
if (
|
139
|
-
partition_key in self._finished_partitions
|
140
|
-
and self._semaphore_per_partition[partition_key]._value == 0
|
141
|
-
):
|
142
|
-
if (
|
143
|
-
self._new_global_cursor is None
|
144
|
-
or self._new_global_cursor[self.cursor_field.cursor_field_key]
|
145
|
-
< cursor.state[self.cursor_field.cursor_field_key]
|
146
|
-
):
|
147
|
-
self._new_global_cursor = copy.deepcopy(cursor.state)
|
148
|
-
|
149
|
-
def ensure_at_least_one_state_emitted(self) -> None:
|
150
|
-
"""
|
151
|
-
The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
|
152
|
-
called.
|
153
|
-
"""
|
154
|
-
if not any(
|
155
|
-
semaphore_item[1]._value for semaphore_item in self._semaphore_per_partition.items()
|
156
|
-
):
|
157
|
-
self._global_cursor = self._new_global_cursor
|
158
|
-
self._lookback_window = self._timer.finish()
|
159
|
-
self._parent_state = self._partition_router.get_stream_state()
|
160
|
-
self._emit_state_message()
|
161
|
-
|
162
|
-
def _emit_state_message(self) -> None:
|
163
|
-
self._connector_state_manager.update_state_for_stream(
|
164
|
-
self._stream_name,
|
165
|
-
self._stream_namespace,
|
166
|
-
self.state,
|
167
|
-
)
|
168
|
-
state_message = self._connector_state_manager.create_state_message(
|
169
|
-
self._stream_name, self._stream_namespace
|
170
|
-
)
|
171
|
-
self._message_repository.emit_message(state_message)
|
172
|
-
|
173
|
-
def stream_slices(self) -> Iterable[StreamSlice]:
|
174
|
-
if self._timer.is_running():
|
175
|
-
raise RuntimeError("stream_slices has been executed more than once.")
|
176
|
-
|
177
|
-
slices = self._partition_router.stream_slices()
|
178
|
-
self._timer.start()
|
179
|
-
for partition in slices:
|
180
|
-
yield from self._generate_slices_from_partition(partition)
|
181
|
-
|
182
|
-
def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
|
183
|
-
# Ensure the maximum number of partitions is not exceeded
|
184
|
-
self._ensure_partition_limit()
|
185
|
-
|
186
|
-
cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
|
187
|
-
if not cursor:
|
188
|
-
cursor = self._create_cursor(
|
189
|
-
self._global_cursor,
|
190
|
-
self._lookback_window if self._global_cursor else self._NO_CURSOR_STATE,
|
191
|
-
)
|
192
|
-
self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
|
193
|
-
self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
|
194
|
-
threading.Semaphore(0)
|
195
|
-
)
|
196
|
-
|
197
|
-
for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
|
198
|
-
cursor.stream_slices(),
|
199
|
-
lambda: None,
|
200
|
-
):
|
201
|
-
self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
|
202
|
-
if is_last_slice:
|
203
|
-
self._finished_partitions.add(self._to_partition_key(partition.partition))
|
204
|
-
yield StreamSlice(
|
205
|
-
partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
|
206
|
-
)
|
207
|
-
|
208
|
-
def _ensure_partition_limit(self) -> None:
|
209
|
-
"""
|
210
|
-
Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
|
211
|
-
"""
|
212
|
-
while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
|
213
|
-
self._over_limit += 1
|
214
|
-
oldest_partition = self._cursor_per_partition.popitem(last=False)[
|
215
|
-
0
|
216
|
-
] # Remove the oldest partition
|
217
|
-
logger.warning(
|
218
|
-
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
|
219
|
-
)
|
220
|
-
|
221
|
-
def limit_reached(self) -> bool:
|
222
|
-
return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
|
223
|
-
|
224
|
-
def _set_initial_state(self, stream_state: StreamState) -> None:
|
225
|
-
"""
|
226
|
-
Initialize the cursor's state using the provided `stream_state`.
|
227
|
-
|
228
|
-
This method supports global and per-partition state initialization.
|
229
|
-
|
230
|
-
- **Global State**: If `states` is missing, the `state` is treated as global and applied to all partitions.
|
231
|
-
The `global state` holds a single cursor position representing the latest processed record across all partitions.
|
232
|
-
|
233
|
-
- **Lookback Window**: Configured via `lookback_window`, it defines the period (in seconds) for reprocessing records.
|
234
|
-
This ensures robustness in case of upstream data delays or reordering. If not specified, it defaults to 0.
|
235
|
-
|
236
|
-
- **Per-Partition State**: If `states` is present, each partition's cursor state is initialized separately.
|
237
|
-
|
238
|
-
- **Parent State**: (if available) Used to initialize partition routers based on parent streams.
|
239
|
-
|
240
|
-
Args:
|
241
|
-
stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
|
242
|
-
{
|
243
|
-
"states": [
|
244
|
-
{
|
245
|
-
"partition": {
|
246
|
-
"partition_key": "value"
|
247
|
-
},
|
248
|
-
"cursor": {
|
249
|
-
"last_updated": "2023-05-27T00:00:00Z"
|
250
|
-
}
|
251
|
-
}
|
252
|
-
],
|
253
|
-
"state": {
|
254
|
-
"last_updated": "2023-05-27T00:00:00Z"
|
255
|
-
},
|
256
|
-
lookback_window: 10,
|
257
|
-
"parent_state": {
|
258
|
-
"parent_stream_name": {
|
259
|
-
"last_updated": "2023-05-27T00:00:00Z"
|
260
|
-
}
|
261
|
-
}
|
262
|
-
}
|
263
|
-
"""
|
264
|
-
if not stream_state:
|
265
|
-
return
|
266
|
-
|
267
|
-
if self._PERPARTITION_STATE_KEY not in stream_state:
|
268
|
-
# We assume that `stream_state` is in a global format that can be applied to all partitions.
|
269
|
-
# Example: {"global_state_format_key": "global_state_format_value"}
|
270
|
-
self._global_cursor = deepcopy(stream_state)
|
271
|
-
self._new_global_cursor = deepcopy(stream_state)
|
272
|
-
|
273
|
-
else:
|
274
|
-
self._lookback_window = int(stream_state.get("lookback_window", 0))
|
275
|
-
|
276
|
-
for state in stream_state[self._PERPARTITION_STATE_KEY]:
|
277
|
-
self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
|
278
|
-
self._create_cursor(state["cursor"])
|
279
|
-
)
|
280
|
-
self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
|
281
|
-
threading.Semaphore(0)
|
282
|
-
)
|
283
|
-
|
284
|
-
# set default state for missing partitions if it is per partition with fallback to global
|
285
|
-
if self._GLOBAL_STATE_KEY in stream_state:
|
286
|
-
self._global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
|
287
|
-
self._new_global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
|
288
|
-
|
289
|
-
# Set parent state for partition routers based on parent streams
|
290
|
-
self._partition_router.set_initial_state(stream_state)
|
291
|
-
|
292
|
-
def observe(self, record: Record) -> None:
|
293
|
-
if not record.associated_slice:
|
294
|
-
raise ValueError(
|
295
|
-
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
296
|
-
)
|
297
|
-
self._cursor_per_partition[
|
298
|
-
self._to_partition_key(record.associated_slice.partition)
|
299
|
-
].observe(record)
|
300
|
-
|
301
|
-
def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
|
302
|
-
return self._partition_serializer.to_partition_key(partition)
|
303
|
-
|
304
|
-
def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
|
305
|
-
return self._partition_serializer.to_partition(partition_key)
|
306
|
-
|
307
|
-
def _create_cursor(
|
308
|
-
self, cursor_state: Any, runtime_lookback_window: Any = None
|
309
|
-
) -> ConcurrentCursor:
|
310
|
-
if runtime_lookback_window:
|
311
|
-
runtime_lookback_window = timedelta(seconds=runtime_lookback_window)
|
312
|
-
cursor = self._cursor_factory.create(
|
313
|
-
stream_state=deepcopy(cursor_state), runtime_lookback_window=runtime_lookback_window
|
314
|
-
)
|
315
|
-
return cursor
|
316
|
-
|
317
|
-
def should_be_synced(self, record: Record) -> bool:
|
318
|
-
return self._get_cursor(record).should_be_synced(record)
|
319
|
-
|
320
|
-
def _get_cursor(self, record: Record) -> ConcurrentCursor:
|
321
|
-
if not record.associated_slice:
|
322
|
-
raise ValueError(
|
323
|
-
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
324
|
-
)
|
325
|
-
partition_key = self._to_partition_key(record.associated_slice.partition)
|
326
|
-
if partition_key not in self._cursor_per_partition:
|
327
|
-
raise ValueError(
|
328
|
-
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
329
|
-
)
|
330
|
-
cursor = self._cursor_per_partition[partition_key]
|
331
|
-
return cursor
|
File without changes
|
File without changes
|
File without changes
|