airbyte-cdk 6.20.2.dev0__py3-none-any.whl → 6.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. airbyte_cdk/sources/declarative/auth/oauth.py +34 -0
  2. airbyte_cdk/sources/declarative/checks/__init__.py +18 -2
  3. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +51 -0
  4. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +16 -80
  5. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +123 -21
  6. airbyte_cdk/sources/declarative/decoders/__init__.py +9 -1
  7. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +43 -0
  8. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +59 -0
  9. airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
  10. airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
  11. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
  12. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -15
  13. airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -1
  14. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +112 -27
  15. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +127 -106
  16. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  17. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +33 -4
  18. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
  19. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +13 -3
  20. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +11 -0
  21. airbyte_cdk/sources/file_based/exceptions.py +34 -0
  22. airbyte_cdk/sources/file_based/file_based_source.py +28 -5
  23. airbyte_cdk/sources/file_based/file_based_stream_reader.py +18 -4
  24. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +25 -2
  25. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +30 -2
  26. airbyte_cdk/sources/streams/concurrent/cursor.py +21 -30
  27. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +33 -4
  28. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +42 -4
  29. airbyte_cdk/sources/types.py +3 -0
  30. airbyte_cdk/sources/utils/transform.py +29 -3
  31. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/METADATA +1 -1
  32. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/RECORD +35 -33
  33. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -331
  34. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/LICENSE.txt +0 -0
  35. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/WHEEL +0 -0
  36. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-cdk
3
- Version: 6.20.2.dev0
3
+ Version: 6.21.0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  License: MIT
6
6
  Keywords: airbyte,connector-development-kit,cdk
@@ -53,44 +53,45 @@ airbyte_cdk/sources/declarative/async_job/timer.py,sha256=Fb8P72CQ7jIzJyzMSSNuBf
53
53
  airbyte_cdk/sources/declarative/auth/__init__.py,sha256=e2CRrcBWGhz3sQu3Oh34d1riEIwXipGS8hrSB1pu0Oo,284
54
54
  airbyte_cdk/sources/declarative/auth/declarative_authenticator.py,sha256=nf-OmRUHYG4ORBwyb5CANzuHEssE-oNmL-Lccn41Td8,1099
55
55
  airbyte_cdk/sources/declarative/auth/jwt.py,sha256=7r5q1zOekjw8kEmEk1oUyovzVt3cbD6BuFnRILeLZi8,8250
56
- airbyte_cdk/sources/declarative/auth/oauth.py,sha256=Yr0ljFjln9FIWudQohXARyKEo6-4ACG840pZoi6JVrE,9165
56
+ airbyte_cdk/sources/declarative/auth/oauth.py,sha256=GhXWheC5GkKV7req3jBCY0aTbFwCuQ5RRSfZi3jFphM,11002
57
57
  airbyte_cdk/sources/declarative/auth/selective_authenticator.py,sha256=qGwC6YsCldr1bIeKG6Qo-A9a5cTdHw-vcOn3OtQrS4c,1540
58
58
  airbyte_cdk/sources/declarative/auth/token.py,sha256=r4u3WXyVa7WmiSZ9-eZXlrUI-pS0D4YWJnwjLzwV-Fk,11210
59
59
  airbyte_cdk/sources/declarative/auth/token_provider.py,sha256=9oq3dcBPAPwXSfkISjhA05dMhIzxaDQTmwOydBrnsMk,3028
60
- airbyte_cdk/sources/declarative/checks/__init__.py,sha256=WWXMfvKkndqwAUZdgSr7xVHVXDFTKCUQ9EubqT7H4QE,274
60
+ airbyte_cdk/sources/declarative/checks/__init__.py,sha256=nsVV5Bo0E_tBNd8A4Xdsdb-75PpcLo5RQu2RQ_Gv-ME,806
61
+ airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py,sha256=aXKL1YSAB-0T_eZiavb7e5rprf-DdXG77Fy81FtlcWk,1843
61
62
  airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQrilWCfJmncBzXCZ18ptRNip3XA,2139
62
63
  airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
63
64
  airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
64
65
  airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
65
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=Ta2hizfoZ5kloXDKhgco5nyo7rn8mdklajLD1DSDNP8,25550
66
+ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=tSTCSmyMCu1qoGsne1Ooz3c1da-8EDZk6Suiy2gIq9Q,22475
66
67
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
67
68
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
68
69
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
69
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=GfZlk9EvYQiWDx3AipNLf1us1e986q2mgqcbHbeZU0k,133172
70
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=PxY_V8vGyNdUMw3vjhqFbqjRNgYs_-0-0xeSTGkLSBw,137031
70
71
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
71
72
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
72
- airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=edGj4fGxznBk4xzRQyCA1rGfbpqe7z-RE0K3kQQWbgA,858
73
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=-aO3ujXX9YTP2ZDvI2BP-x0VOKdAq2TlHo4zG8DCTlY,2748
73
+ airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=KSpQetKGqPCv-38QgcVJ5kzM5nzbFldTSsYDCS3Xf0Y,1035
74
+ airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=kQfUVMVhChKe5OngwIQrs0F9KGnRUN-CKVFakCU23DQ,4354
74
75
  airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=sl-Gt8lXi7yD2Q-sD8je5QS2PbgrgsYjxRLWsay7DMc,826
75
76
  airbyte_cdk/sources/declarative/decoders/json_decoder.py,sha256=qdbjeR6RffKaah_iWvMsOcDolYuxJY5DaI3b9AMTZXg,3327
76
77
  airbyte_cdk/sources/declarative/decoders/noop_decoder.py,sha256=iZh0yKY_JzgBnJWiubEusf5c0o6Khd-8EWFWT-8EgFo,542
77
78
  airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py,sha256=ZVBZhAOl0I0MymXN5CKTC-kIXG4GuUQAEyn0XpUDuSE,1081
78
79
  airbyte_cdk/sources/declarative/decoders/xml_decoder.py,sha256=EU-7t-5vIGRHZ14h-f0GUE4V5-eTM9Flux-A8xgI1Rc,3117
80
+ airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py,sha256=OTGeNh-Zkab9JwCTgiHtLH1IS6PiVO9jnr82c0vrHbw,2269
79
81
  airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
80
82
  airbyte_cdk/sources/declarative/extractors/__init__.py,sha256=RmV-IkO1YLj0PSOrrqC9AV1gO8-90t8UTDVfJGshN9E,754
81
83
  airbyte_cdk/sources/declarative/extractors/dpath_extractor.py,sha256=wR4Ol4MG2lt5UlqXF5EU_k7qa5cN4_-luu3PJ1PlO3A,3131
82
84
  airbyte_cdk/sources/declarative/extractors/http_selector.py,sha256=2zWZ4ewTqQC8VwkjS0xD_u350Km3SiYP7hpOOgiLg5o,1169
83
85
  airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=XJELMjahAsaomlvQgN2zrNO0DJX0G0fr9r682gUz7Pg,691
84
- airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=yTdEkyDUSW2KbFkEwJJMlS963C955LgCCOVfTmmScpQ,3367
86
+ airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=OJ9xmhNWNwwzxYOeIrDy1GINb1zH9MBy6suC5tm2LSk,3545
85
87
  airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=tjNwcURmlyD-TGCScXvW95ThNKyPGcx2SiWbG1-H-sc,6552
86
88
  airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
87
89
  airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
88
- airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
89
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=V_KXjVQFB_55u_ZncM52fxFr4XEcoUje9Tu5pxI9-Fo,14398
90
+ airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=huRz3KQJSUFmJCg5GPE9TckEBsB5TMsCa_THhJAhPVI,1037
90
91
  airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=_UzUnSIUsDbRgbFTXgSyZEFb4ws-KdhdQPWO8mFbV7U,22028
91
92
  airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
92
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=9HO-QbL9akvjq2NP7l498RwLA4iQZlBMQW1tZbt34I8,15943
93
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=_FSJjAwL4Zu-i2CngnhTtx8j-NPVSBKj5LwDSPta3Cg,16305
93
+ airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=3_EEZop94bMitZaJd2PF5Q2Xt9v94tYg7p7YJz8tAFc,15869
94
+ airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=hElcYijbOHjdLKOMA7W7aizEbf22r7OSApXALP875uI,15749
94
95
  airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py,sha256=2YBOA2NnwAeIKlIhSwUB_W-FaGnPcmrG_liY7b4mV2Y,8365
95
96
  airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py,sha256=10LFv1QPM-agVKl6eaANmEBOfd7gZgBrkoTcMggsieQ,4809
96
97
  airbyte_cdk/sources/declarative/interpolation/__init__.py,sha256=tjUJkn3B-iZ-p7RP2c3dVZejrGiQeooGmS5ibWTuUL4,437
@@ -102,17 +103,17 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZ
102
103
  airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
103
104
  airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=BtsY_jtT4MihFqeQgc05HXj3Ndt-e2ESQgGwbg3Sdxc,6430
104
105
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=Y5AWYxbJTUtJ_Jm7DV9qrZDiymFR9LST7fBt4piT2-U,4585
105
- airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=wX_dQ401siuwh3zHgSHRnSN1vIojI4Nufg3BwzZAzk0,16239
106
+ airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=_xCg7CfWQCDXVQx8ZRzcS6yuocfWzqLvOMLkgwEK5vw,16352
106
107
  airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
108
  airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
108
109
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
109
110
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
110
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=IZFT1m4d-zp5hQ0ayU06Vdxm6r3MEq-X2sOCo9SuG-k,93270
111
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=57IP4WKtwsoVvWpJKFTTsWMR58nzPIwVvzAehYJ0BrA,96250
111
112
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
112
113
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
113
114
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
114
115
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
115
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=dSMPrVh51LE9FMhBJQZM7S1jlvhpM8FlEqYfPteS2XU,112114
116
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=ZIl9MKlzOPzo-iMWwcJorGboWuCi8ZMy65YW04TS6UM,112776
116
117
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
117
118
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
118
119
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
@@ -120,6 +121,7 @@ airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha25
120
121
  airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
121
122
  airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=SKzKjSyfccq4dxGIh-J6ejrgkCHzaiTIazmbmeQiRD4,1942
122
123
  airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py,sha256=5bgXoJfBg_6i53krQMptAGb50XB5XoVfqQxKQhlLtBA,15383
124
+ airbyte_cdk/sources/declarative/requesters/README.md,sha256=eL1I4iLkxaw7hJi9S9d18_XcRl-R8lUSjqBVJJzvXmg,2656
123
125
  airbyte_cdk/sources/declarative/requesters/__init__.py,sha256=d7a3OoHbqaJDyyPli3nqqJ2yAW_SLX6XDaBAKOwvpxw,364
124
126
  airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py,sha256=SkEDcJxlT1683rNx93K9whoS0OyUukkuOfToGtgpF58,776
125
127
  airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py,sha256=1WZdpFmWL6W_Dko0qjflTaKIWeqt8jHT-D6HcujIp3s,884
@@ -134,7 +136,7 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.
134
136
  airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py,sha256=q0YkeYUUWO6iErUy0vjqiOkhg8_9d5YcCmtlpXAJJ9E,1314
135
137
  airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py,sha256=Tan66odx8VHzfdyyXMQkXz2pJYksllGqvxmpoajgcK4,669
136
138
  airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py,sha256=vhWsEKNTYEzZ4gerhHqnDNKu4wGIP485NAzpSQ5DRZg,7941
137
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=o0520AmHMb7SAoeokVNwoOzuZzIAT6ryx9uFYGSOrs0,8664
139
+ airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=3GtOefPH08evlSUxaILkiKLTHbIspFY4qd5B3ZqNE60,10063
138
140
  airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=RqYPkgJFAWfcZBTc-JBcGHPm4JL1ZQOhs9GKU4MP2eE,14723
139
141
  airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
140
142
  airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=FnSl3qPvv5wD6ieAI2Ic5c4dqBk-3fRe4tCaWzq3YwM,11840
@@ -163,10 +165,10 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
163
165
  airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC5RK_ZZM7ojTLBQ4IkWQTfmfCk,456
164
166
  airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=kX9ltelK2xLIBWDJBK2ucrvVe5tc5xmhdbVbgsjvlxY,3696
165
167
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
166
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=kgnhVQxRlFqJs2-rDu2-QH-p-GzQU3nKmSp6_aq8u0s,24550
168
+ airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=jxQ_9xcVD07r9PKhofitAqMkdX1k8ZNyy50qz5NwkFs,24540
167
169
  airbyte_cdk/sources/declarative/schema/__init__.py,sha256=HztgVVaZdil5UfgUZcv_Hyy84r89_EKRwyO2hoewNVg,749
168
170
  airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
169
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=H6A3NQ6kPPM-cUNPmdvDPc9xNzR1rQNrK95GbgCW334,8822
171
+ airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=sa99VqU1U45fgZL2qEdw8ueX1tPTPfGxibQ-ZFePjSM,9361
170
172
  airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
171
173
  airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py,sha256=5Wl-fqW-pVf_dxJ4yGHMAFfC4JjKHYJhqFJT1xA57F4,4177
172
174
  airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLnrDLxf1PJKdUqvQq2RVnAOAzNSY,379
@@ -196,7 +198,7 @@ airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=ddKQfUmk
196
198
  airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=01Nd4b7ERAbp-OZo_8rrAzFXWPTMwr02SnWiN17nx8Q,2363
197
199
  airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=j9T5TimfWFUz7nqsaj-83G3xWmDpsmeSbDnaUNmz0UM,5849
198
200
  airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=tj-M1L5BTa5yIQ3jHo09CtCTSq_eR-68zgyOPqwsurw,6455
201
+ airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=gXlZwnEKLWknnK_n7j14lANgR6vkqhlLJ-G3rRu-ox4,6897
200
202
  airbyte_cdk/sources/file_based/config/avro_format.py,sha256=NxTF96ewzn6HuhgodsY7Rpb-ybr1ZEWW5d4Vid64g5A,716
201
203
  airbyte_cdk/sources/file_based/config/csv_format.py,sha256=NWekkyT8dTwiVK0mwa_krQD4FJPHSDfILo8kPAg3-Vs,8006
202
204
  airbyte_cdk/sources/file_based/config/excel_format.py,sha256=9qAmTsT6SoVzNfNv0oBVkVCmiyqQuVAbfRKajjoa7Js,378
@@ -207,9 +209,9 @@ airbyte_cdk/sources/file_based/config/unstructured_format.py,sha256=tIbB9Pn1HqU6
207
209
  airbyte_cdk/sources/file_based/discovery_policy/__init__.py,sha256=gl3ey6mZbyfraB9P3pFhf9UJp2JeTZ1SUFAopy2iBvY,301
208
210
  airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha256=dCfXX529Rd5rtopg4VeEgTPJjFtqjtjzPq6LCw18Wt0,605
209
211
  airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=-xujTidtrq6HC00WKbjQh1CZdT5LMuzkp5BLjqDmfTY,1007
210
- airbyte_cdk/sources/file_based/exceptions.py,sha256=AEELNIRzKPX6eopKd_2jhE7WiNeR0Aw7nQWVOL8fvkc,5760
211
- airbyte_cdk/sources/file_based/file_based_source.py,sha256=RfpctRNLJ_EHKKEc2E1EZGYRfhG0Z9o6TgsKS4XrSNY,16652
212
- airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=ohxKlqPuV7TGwjyRy_gaWUol8QN5lBSoCYoaqBtRh1c,6179
212
+ airbyte_cdk/sources/file_based/exceptions.py,sha256=WP0qkG6fpWoBpOyyicgp5YNE393VWyegq5qSy0v4QtM,7362
213
+ airbyte_cdk/sources/file_based/file_based_source.py,sha256=Biv2QufYQtHZQCBZs4iCUpqTd82rk7xo8SDYkEeau3k,17616
214
+ airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=e1KhgTh7mzvkBOz9DjLwzOsDwevrTmbxSYIcvhgWgGM,6856
213
215
  airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=blCLn0-2LC-ZdgcNyDEhqM2RiUvEjEBh-G4-t32ZtuM,1268
214
216
  airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=XNx-JC-sgzH9u3nOJ2M59FxBXvtig8LN6BIkeDOavZA,10858
215
217
  airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QlCXB-ry3np67Q_VerQEPoWDOTcPTB6Go4ydZxY9ae4,20445
@@ -218,7 +220,7 @@ airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=HyGRihJxcb_lEs
218
220
  airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=JgpH21PrbRqwK92BJklZWvh2TndA6xZ-eP1LPMo44oQ,2832
219
221
  airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=GwyNyxmST4RX-XpXy7xVH0D-znYWWBmGv_pVAu95oHQ,5886
220
222
  airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=XenFg5sJ-UBnIkSmsiNJRou11NO0zZXx-RXgPHMT2NA,10487
221
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=r5FNcJadiI5PTyl1-doIodPCwW7xZWOTHl4Epd-w0-8,18602
223
+ airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=2TYOQl62FQPCa8otLbkDIk_j01EP3oWaKSfXGhCjCHg,19492
222
224
  airbyte_cdk/sources/file_based/remote_file.py,sha256=yqRz93vPe8PBXLIMJ5W5u2JRlZRhg6sBrAjn3pPjJ8A,315
223
225
  airbyte_cdk/sources/file_based/schema_helpers.py,sha256=Cf8FH1bDFP0qCDDfEYir_WjP4exXUnikz8hZ40y1Ek0,9601
224
226
  airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=FkByIyEy56x2_awYnxGPqGaOp7zAzpAoRkPZHKySI9M,536
@@ -235,7 +237,7 @@ airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_c
235
237
  airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
236
238
  airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=om-x3gZFPgWDpi15S9RxZmR36VHnk8sytgN6LlBQhAw,1934
237
239
  airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=VGV7xLyBribuBMVrXtO1xqkWJD86bl7yhXtjnwLMohM,7051
238
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=rpwU6AOyhFLuXtcFKkcOHFWbRQ4kLCOKzAjcID_M87k,16770
240
+ airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=XLU5cNqQ-5mj243gNzMyXtm_oCtg1ORyoqbCsUo9Dn4,18044
239
241
  airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
240
242
  airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
241
243
  airbyte_cdk/sources/http_logger.py,sha256=TyBmtRA6D9g0XDkKGvdM415b36RXDjgfkwRewDsH8-0,1576
@@ -257,7 +259,7 @@ airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=3OB5VsvOkJmCxIM
257
259
  airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCBUwJDw1QSCEvz23s7zIEx_7QMxkPq9j-oPIQ,1358
258
260
  airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=QP_64kQo-b3sRNHZA5aqrgCJqAhIVegRM3vJ8jGyuSY,15213
259
261
  airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=4La5v2UffSjGnhmF4kwNIKt_g3RXk2ux1mSHA1ejgYM,2898
260
- airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=suObbNi24so8Wcj0Wm32OkJAcuvODAOwp373YBmUPp0,21213
262
+ airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=Hke6CpD8Sq1FS4g1Xuht39UN7hKkGy1mvOxvQrm1lLM,20810
261
263
  airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=K3rLMpYhS7nnmvwQ52lqBy7DQdFMJpvvT7sgBg_ckA8,3207
262
264
  airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
263
265
  airbyte_cdk/sources/streams/concurrent/helpers.py,sha256=S6AW8TgIASCZ2UuUcQLE8OzgYUHWt2-KPOvNPwnQf-Q,1596
@@ -288,18 +290,18 @@ airbyte_cdk/sources/streams/http/http.py,sha256=JAMpiTdS9HFNOlwayWNvQdxoqs2rpW9w
288
290
  airbyte_cdk/sources/streams/http/http_client.py,sha256=tDE0ROtxjGMVphvsw8INvGMtZ97hIF-v47pZ3jIyiwc,23011
289
291
  airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
290
292
  airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
291
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=sxmrQKAvN8piamssL3xh8KXevTwdaXuLs2O0hNEA5aQ,10635
293
+ airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=-GDNyqccdutOftFpqCvvk81NwkskHhDZ8QcsUKzNjRQ,11660
292
294
  airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py,sha256=Y3n7J-sk5yGjv_OxtY6Z6k0PEsFZmtIRi-x0KCbaHdA,1010
293
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py,sha256=ka-bBRWvIv09LmZNYl49p2lK9nd_Tvi2g0lIp3OkU40,14872
295
+ airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py,sha256=OCNokL0GypjN8PUVJk1UFJVE5THkHAbNDmB984_F7W0,16718
294
296
  airbyte_cdk/sources/streams/http/requests_native_auth/token.py,sha256=h5PTzcdH-RQLeCg7xZ45w_484OPUDSwNWl_iMJQmZoI,2526
295
297
  airbyte_cdk/sources/streams/utils/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
296
- airbyte_cdk/sources/types.py,sha256=WWVigI7ZSoQU2TBCzDsHJtoX4Ima9v--lcLyYwUG_cE,4904
298
+ airbyte_cdk/sources/types.py,sha256=nLPkTpyfGV4E6e99qcBWX4r8C3fE4I8Fvgx2EjvT9ic,5005
297
299
  airbyte_cdk/sources/utils/__init__.py,sha256=TTN6VUxVy6Is8BhYQZR5pxJGQh8yH4duXh4O1TiMiEY,118
298
300
  airbyte_cdk/sources/utils/casing.py,sha256=QC-gV1O4e8DR4-bhdXieUPKm_JamzslVyfABLYYRSXA,256
299
301
  airbyte_cdk/sources/utils/record_helper.py,sha256=jeB0mucudzna7Zvj-pCBbwFrbLJ36SlAWZTh5O4Fb9Y,2168
300
302
  airbyte_cdk/sources/utils/schema_helpers.py,sha256=bR3I70-e11S6B8r6VK-pthQXtcYrXojgXFvuK7lRrpg,8545
301
303
  airbyte_cdk/sources/utils/slice_logger.py,sha256=qWWeFLAvigFz0b4O1_O3QDM1cy8PqZAMMgVPR2hEeb8,1778
302
- airbyte_cdk/sources/utils/transform.py,sha256=zXlZ00akGt0OpiuYQf6FCDL0eI_Qdo1tWPKxA88RTwk,10168
304
+ airbyte_cdk/sources/utils/transform.py,sha256=Sks6kiRbef1W-5I6PRqnFxksJe2NOPKCRXQLudaltf8,11015
303
305
  airbyte_cdk/sources/utils/types.py,sha256=41ZQR681t5TUnOScij58d088sb99klH_ZENFcaYro_g,175
304
306
  airbyte_cdk/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
305
307
  airbyte_cdk/sql/_util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -343,8 +345,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
343
345
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
344
346
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
345
347
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
346
- airbyte_cdk-6.20.2.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
347
- airbyte_cdk-6.20.2.dev0.dist-info/METADATA,sha256=PY1jdRMhUMAzjoWjxgGIimwhSkbgqkgaCWEMrhQAIl0,6005
348
- airbyte_cdk-6.20.2.dev0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
349
- airbyte_cdk-6.20.2.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
350
- airbyte_cdk-6.20.2.dev0.dist-info/RECORD,,
348
+ airbyte_cdk-6.21.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
349
+ airbyte_cdk-6.21.0.dist-info/METADATA,sha256=6djJTSQ0PJieSZE0V6_FAaDKlwVCELJ0_YyMsez9oLE,6000
350
+ airbyte_cdk-6.21.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
351
+ airbyte_cdk-6.21.0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
352
+ airbyte_cdk-6.21.0.dist-info/RECORD,,
@@ -1,331 +0,0 @@
1
- #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- import copy
6
- import logging
7
- import threading
8
- from collections import OrderedDict
9
- from copy import deepcopy
10
- from datetime import timedelta
11
- from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
12
-
13
- from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
14
- from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
15
- Timer,
16
- iterate_with_last_flag_and_state,
17
- )
18
- from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
19
- from airbyte_cdk.sources.message import MessageRepository
20
- from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
21
- PerPartitionKeySerializer,
22
- )
23
- from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, Cursor, CursorField
24
- from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
25
- from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
26
-
27
- logger = logging.getLogger("airbyte")
28
-
29
-
30
- class ConcurrentCursorFactory:
31
- def __init__(self, create_function: Callable[..., ConcurrentCursor]):
32
- self._create_function = create_function
33
-
34
- def create(
35
- self, stream_state: Mapping[str, Any], runtime_lookback_window: Any
36
- ) -> ConcurrentCursor:
37
- return self._create_function(
38
- stream_state=stream_state, runtime_lookback_window=runtime_lookback_window
39
- )
40
-
41
-
42
- class ConcurrentPerPartitionCursor(Cursor):
43
- """
44
- Manages state per partition when a stream has many partitions, preventing data loss or duplication.
45
-
46
- Attributes:
47
- DEFAULT_MAX_PARTITIONS_NUMBER (int): Maximum number of partitions to retain in memory (default is 10,000).
48
-
49
- - **Partition Limitation Logic**
50
- Ensures the number of tracked partitions does not exceed the specified limit to prevent memory overuse. Oldest partitions are removed when the limit is reached.
51
-
52
- - **Global Cursor Fallback**
53
- New partitions use global state as the initial state to progress the state for deleted or new partitions. The history data added after the initial sync will be missing.
54
- """
55
-
56
- DEFAULT_MAX_PARTITIONS_NUMBER = 10000
57
- _NO_STATE: Mapping[str, Any] = {}
58
- _NO_CURSOR_STATE: Mapping[str, Any] = {}
59
- _GLOBAL_STATE_KEY = "state"
60
- _PERPARTITION_STATE_KEY = "states"
61
- _KEY = 0
62
- _VALUE = 1
63
-
64
- def __init__(
65
- self,
66
- cursor_factory: ConcurrentCursorFactory,
67
- partition_router: PartitionRouter,
68
- stream_name: str,
69
- stream_namespace: Optional[str],
70
- stream_state: Any,
71
- message_repository: MessageRepository,
72
- connector_state_manager: ConnectorStateManager,
73
- cursor_field: CursorField,
74
- ) -> None:
75
- self._global_cursor: Optional[StreamState] = {}
76
- self._stream_name = stream_name
77
- self._stream_namespace = stream_namespace
78
- self._message_repository = message_repository
79
- self._connector_state_manager = connector_state_manager
80
- self._cursor_field = cursor_field
81
-
82
- self._cursor_factory = cursor_factory
83
- self._partition_router = partition_router
84
-
85
- # The dict is ordered to ensure that once the maximum number of partitions is reached,
86
- # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
87
- self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
88
- self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
89
- self._finished_partitions: set[str] = set()
90
- self._lock = threading.Lock()
91
- self._timer = Timer()
92
- self._new_global_cursor: Optional[StreamState] = None
93
- self._lookback_window: int = 0
94
- self._parent_state: Optional[StreamState] = None
95
- self._over_limit: int = 0
96
- self._partition_serializer = PerPartitionKeySerializer()
97
-
98
- self._set_initial_state(stream_state)
99
-
100
- @property
101
- def cursor_field(self) -> CursorField:
102
- return self._cursor_field
103
-
104
- @property
105
- def state(self) -> MutableMapping[str, Any]:
106
- states = []
107
- for partition_tuple, cursor in self._cursor_per_partition.items():
108
- if cursor.state:
109
- states.append(
110
- {
111
- "partition": self._to_dict(partition_tuple),
112
- "cursor": copy.deepcopy(cursor.state),
113
- }
114
- )
115
- state: dict[str, Any] = {self._PERPARTITION_STATE_KEY: states}
116
-
117
- if self._global_cursor:
118
- state[self._GLOBAL_STATE_KEY] = self._global_cursor
119
- if self._lookback_window is not None:
120
- state["lookback_window"] = self._lookback_window
121
- if self._parent_state is not None:
122
- state["parent_state"] = self._parent_state
123
- return state
124
-
125
- def close_partition(self, partition: Partition) -> None:
126
- # Attempt to retrieve the stream slice
127
- stream_slice: Optional[StreamSlice] = partition.to_slice() # type: ignore[assignment]
128
-
129
- # Ensure stream_slice is not None
130
- if stream_slice is None:
131
- raise ValueError("stream_slice cannot be None")
132
-
133
- partition_key = self._to_partition_key(stream_slice.partition)
134
- self._cursor_per_partition[partition_key].close_partition(partition=partition)
135
- with self._lock:
136
- self._semaphore_per_partition[partition_key].acquire()
137
- cursor = self._cursor_per_partition[partition_key]
138
- if (
139
- partition_key in self._finished_partitions
140
- and self._semaphore_per_partition[partition_key]._value == 0
141
- ):
142
- if (
143
- self._new_global_cursor is None
144
- or self._new_global_cursor[self.cursor_field.cursor_field_key]
145
- < cursor.state[self.cursor_field.cursor_field_key]
146
- ):
147
- self._new_global_cursor = copy.deepcopy(cursor.state)
148
-
149
- def ensure_at_least_one_state_emitted(self) -> None:
150
- """
151
- The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
152
- called.
153
- """
154
- if not any(
155
- semaphore_item[1]._value for semaphore_item in self._semaphore_per_partition.items()
156
- ):
157
- self._global_cursor = self._new_global_cursor
158
- self._lookback_window = self._timer.finish()
159
- self._parent_state = self._partition_router.get_stream_state()
160
- self._emit_state_message()
161
-
162
- def _emit_state_message(self) -> None:
163
- self._connector_state_manager.update_state_for_stream(
164
- self._stream_name,
165
- self._stream_namespace,
166
- self.state,
167
- )
168
- state_message = self._connector_state_manager.create_state_message(
169
- self._stream_name, self._stream_namespace
170
- )
171
- self._message_repository.emit_message(state_message)
172
-
173
- def stream_slices(self) -> Iterable[StreamSlice]:
174
- if self._timer.is_running():
175
- raise RuntimeError("stream_slices has been executed more than once.")
176
-
177
- slices = self._partition_router.stream_slices()
178
- self._timer.start()
179
- for partition in slices:
180
- yield from self._generate_slices_from_partition(partition)
181
-
182
- def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
183
- # Ensure the maximum number of partitions is not exceeded
184
- self._ensure_partition_limit()
185
-
186
- cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
187
- if not cursor:
188
- cursor = self._create_cursor(
189
- self._global_cursor,
190
- self._lookback_window if self._global_cursor else self._NO_CURSOR_STATE,
191
- )
192
- self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
193
- self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
194
- threading.Semaphore(0)
195
- )
196
-
197
- for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
198
- cursor.stream_slices(),
199
- lambda: None,
200
- ):
201
- self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
202
- if is_last_slice:
203
- self._finished_partitions.add(self._to_partition_key(partition.partition))
204
- yield StreamSlice(
205
- partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
206
- )
207
-
208
- def _ensure_partition_limit(self) -> None:
209
- """
210
- Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
211
- """
212
- while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
213
- self._over_limit += 1
214
- oldest_partition = self._cursor_per_partition.popitem(last=False)[
215
- 0
216
- ] # Remove the oldest partition
217
- logger.warning(
218
- f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
219
- )
220
-
221
- def limit_reached(self) -> bool:
222
- return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
223
-
224
- def _set_initial_state(self, stream_state: StreamState) -> None:
225
- """
226
- Initialize the cursor's state using the provided `stream_state`.
227
-
228
- This method supports global and per-partition state initialization.
229
-
230
- - **Global State**: If `states` is missing, the `state` is treated as global and applied to all partitions.
231
- The `global state` holds a single cursor position representing the latest processed record across all partitions.
232
-
233
- - **Lookback Window**: Configured via `lookback_window`, it defines the period (in seconds) for reprocessing records.
234
- This ensures robustness in case of upstream data delays or reordering. If not specified, it defaults to 0.
235
-
236
- - **Per-Partition State**: If `states` is present, each partition's cursor state is initialized separately.
237
-
238
- - **Parent State**: (if available) Used to initialize partition routers based on parent streams.
239
-
240
- Args:
241
- stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
242
- {
243
- "states": [
244
- {
245
- "partition": {
246
- "partition_key": "value"
247
- },
248
- "cursor": {
249
- "last_updated": "2023-05-27T00:00:00Z"
250
- }
251
- }
252
- ],
253
- "state": {
254
- "last_updated": "2023-05-27T00:00:00Z"
255
- },
256
- lookback_window: 10,
257
- "parent_state": {
258
- "parent_stream_name": {
259
- "last_updated": "2023-05-27T00:00:00Z"
260
- }
261
- }
262
- }
263
- """
264
- if not stream_state:
265
- return
266
-
267
- if self._PERPARTITION_STATE_KEY not in stream_state:
268
- # We assume that `stream_state` is in a global format that can be applied to all partitions.
269
- # Example: {"global_state_format_key": "global_state_format_value"}
270
- self._global_cursor = deepcopy(stream_state)
271
- self._new_global_cursor = deepcopy(stream_state)
272
-
273
- else:
274
- self._lookback_window = int(stream_state.get("lookback_window", 0))
275
-
276
- for state in stream_state[self._PERPARTITION_STATE_KEY]:
277
- self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
278
- self._create_cursor(state["cursor"])
279
- )
280
- self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
281
- threading.Semaphore(0)
282
- )
283
-
284
- # set default state for missing partitions if it is per partition with fallback to global
285
- if self._GLOBAL_STATE_KEY in stream_state:
286
- self._global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
287
- self._new_global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
288
-
289
- # Set parent state for partition routers based on parent streams
290
- self._partition_router.set_initial_state(stream_state)
291
-
292
- def observe(self, record: Record) -> None:
293
- if not record.associated_slice:
294
- raise ValueError(
295
- "Invalid state as stream slices that are emitted should refer to an existing cursor"
296
- )
297
- self._cursor_per_partition[
298
- self._to_partition_key(record.associated_slice.partition)
299
- ].observe(record)
300
-
301
- def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
302
- return self._partition_serializer.to_partition_key(partition)
303
-
304
- def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
305
- return self._partition_serializer.to_partition(partition_key)
306
-
307
- def _create_cursor(
308
- self, cursor_state: Any, runtime_lookback_window: Any = None
309
- ) -> ConcurrentCursor:
310
- if runtime_lookback_window:
311
- runtime_lookback_window = timedelta(seconds=runtime_lookback_window)
312
- cursor = self._cursor_factory.create(
313
- stream_state=deepcopy(cursor_state), runtime_lookback_window=runtime_lookback_window
314
- )
315
- return cursor
316
-
317
- def should_be_synced(self, record: Record) -> bool:
318
- return self._get_cursor(record).should_be_synced(record)
319
-
320
- def _get_cursor(self, record: Record) -> ConcurrentCursor:
321
- if not record.associated_slice:
322
- raise ValueError(
323
- "Invalid state as stream slices that are emitted should refer to an existing cursor"
324
- )
325
- partition_key = self._to_partition_key(record.associated_slice.partition)
326
- if partition_key not in self._cursor_per_partition:
327
- raise ValueError(
328
- "Invalid state as stream slices that are emitted should refer to an existing cursor"
329
- )
330
- cursor = self._cursor_per_partition[partition_key]
331
- return cursor