airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. airbyte_cdk/__init__.py +17 -2
  2. airbyte_cdk/config_observation.py +10 -3
  3. airbyte_cdk/connector.py +19 -9
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
  5. airbyte_cdk/connector_builder/main.py +26 -6
  6. airbyte_cdk/connector_builder/message_grouper.py +95 -25
  7. airbyte_cdk/destinations/destination.py +47 -14
  8. airbyte_cdk/destinations/vector_db_based/config.py +36 -14
  9. airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
  10. airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
  11. airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
  12. airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
  13. airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
  14. airbyte_cdk/entrypoint.py +82 -26
  15. airbyte_cdk/exception_handler.py +13 -3
  16. airbyte_cdk/logger.py +10 -2
  17. airbyte_cdk/models/airbyte_protocol.py +11 -5
  18. airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
  19. airbyte_cdk/models/well_known_types.py +1 -1
  20. airbyte_cdk/sources/abstract_source.py +63 -17
  21. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
  22. airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
  23. airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
  24. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
  25. airbyte_cdk/sources/connector_state_manager.py +32 -10
  26. airbyte_cdk/sources/declarative/async_job/job.py +3 -1
  27. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
  28. airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
  29. airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
  30. airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
  31. airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
  32. airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
  33. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
  34. airbyte_cdk/sources/declarative/auth/token.py +25 -8
  35. airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
  36. airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
  37. airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
  38. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
  39. airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
  40. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +43 -0
  41. airbyte_cdk/sources/declarative/declarative_source.py +3 -1
  42. airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
  43. airbyte_cdk/sources/declarative/decoders/__init__.py +2 -2
  44. airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
  45. airbyte_cdk/sources/declarative/decoders/json_decoder.py +48 -13
  46. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
  47. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
  48. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
  49. airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
  50. airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
  51. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
  52. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
  53. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
  54. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
  55. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
  56. airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
  57. airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
  58. airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
  59. airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
  60. airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
  61. airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
  62. airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
  63. airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
  64. airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
  65. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +14 -5
  66. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +697 -678
  67. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
  68. airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
  69. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +802 -232
  70. airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
  71. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
  72. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
  73. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
  74. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
  75. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
  76. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
  77. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
  78. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
  79. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
  80. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
  81. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
  82. airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
  83. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
  84. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
  85. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
  86. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
  87. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
  88. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
  89. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
  90. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
  91. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
  92. airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
  93. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
  94. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
  95. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
  96. airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
  97. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
  98. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
  99. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
  100. airbyte_cdk/sources/declarative/spec/spec.py +8 -2
  101. airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
  102. airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
  103. airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
  104. airbyte_cdk/sources/declarative/types.py +8 -1
  105. airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
  106. airbyte_cdk/sources/embedded/base_integration.py +14 -4
  107. airbyte_cdk/sources/embedded/catalog.py +16 -4
  108. airbyte_cdk/sources/embedded/runner.py +19 -3
  109. airbyte_cdk/sources/embedded/tools.py +3 -1
  110. airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
  111. airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
  112. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
  113. airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
  114. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
  115. airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
  116. airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
  117. airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
  118. airbyte_cdk/sources/file_based/exceptions.py +13 -15
  119. airbyte_cdk/sources/file_based/file_based_source.py +82 -24
  120. airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
  121. airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
  122. airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
  123. airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
  124. airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
  125. airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
  126. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
  127. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
  128. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
  129. airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
  130. airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
  131. airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
  132. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
  133. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
  134. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
  135. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
  136. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
  137. airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
  138. airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
  139. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
  140. airbyte_cdk/sources/http_logger.py +5 -1
  141. airbyte_cdk/sources/message/repository.py +18 -4
  142. airbyte_cdk/sources/source.py +17 -7
  143. airbyte_cdk/sources/streams/availability_strategy.py +9 -3
  144. airbyte_cdk/sources/streams/call_rate.py +63 -19
  145. airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
  146. airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
  147. airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
  148. airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
  149. airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
  150. airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
  151. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
  152. airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
  153. airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
  154. airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
  155. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
  156. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
  157. airbyte_cdk/sources/streams/core.py +77 -22
  158. airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
  159. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
  160. airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
  161. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
  162. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
  163. airbyte_cdk/sources/streams/http/exceptions.py +2 -2
  164. airbyte_cdk/sources/streams/http/http.py +133 -33
  165. airbyte_cdk/sources/streams/http/http_client.py +91 -29
  166. airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
  167. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
  168. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
  169. airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
  170. airbyte_cdk/sources/types.py +5 -1
  171. airbyte_cdk/sources/utils/record_helper.py +12 -3
  172. airbyte_cdk/sources/utils/schema_helpers.py +9 -3
  173. airbyte_cdk/sources/utils/slice_logger.py +4 -1
  174. airbyte_cdk/sources/utils/transform.py +24 -9
  175. airbyte_cdk/sql/exceptions.py +19 -6
  176. airbyte_cdk/sql/secrets.py +3 -1
  177. airbyte_cdk/sql/shared/catalog_providers.py +13 -4
  178. airbyte_cdk/sql/shared/sql_processor.py +44 -14
  179. airbyte_cdk/test/catalog_builder.py +19 -8
  180. airbyte_cdk/test/entrypoint_wrapper.py +27 -8
  181. airbyte_cdk/test/mock_http/mocker.py +41 -11
  182. airbyte_cdk/test/mock_http/request.py +9 -3
  183. airbyte_cdk/test/mock_http/response.py +3 -1
  184. airbyte_cdk/test/mock_http/response_builder.py +29 -7
  185. airbyte_cdk/test/state_builder.py +10 -2
  186. airbyte_cdk/test/utils/data.py +6 -2
  187. airbyte_cdk/test/utils/http_mocking.py +3 -1
  188. airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
  189. airbyte_cdk/utils/analytics_message.py +10 -2
  190. airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
  191. airbyte_cdk/utils/mapping_helpers.py +3 -1
  192. airbyte_cdk/utils/message_utils.py +11 -4
  193. airbyte_cdk/utils/print_buffer.py +6 -1
  194. airbyte_cdk/utils/schema_inferrer.py +30 -9
  195. airbyte_cdk/utils/spec_schema_transformations.py +3 -1
  196. airbyte_cdk/utils/traced_exception.py +35 -9
  197. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/METADATA +8 -7
  198. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/RECORD +200 -200
  199. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/LICENSE.txt +0 -0
  200. {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/WHEEL +0 -0
@@ -45,7 +45,9 @@ class AbstractFileBasedStreamReader(ABC):
45
45
  ...
46
46
 
47
47
  @abstractmethod
48
- def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger) -> IOBase:
48
+ def open_file(
49
+ self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger
50
+ ) -> IOBase:
49
51
  """
50
52
  Return a file handle for reading.
51
53
 
@@ -80,11 +82,17 @@ class AbstractFileBasedStreamReader(ABC):
80
82
  """
81
83
  ...
82
84
 
83
- def filter_files_by_globs_and_start_date(self, files: List[RemoteFile], globs: List[str]) -> Iterable[RemoteFile]:
85
+ def filter_files_by_globs_and_start_date(
86
+ self, files: List[RemoteFile], globs: List[str]
87
+ ) -> Iterable[RemoteFile]:
84
88
  """
85
89
  Utility method for filtering files based on globs.
86
90
  """
87
- start_date = datetime.strptime(self.config.start_date, self.DATE_TIME_FORMAT) if self.config and self.config.start_date else None
91
+ start_date = (
92
+ datetime.strptime(self.config.start_date, self.DATE_TIME_FORMAT)
93
+ if self.config and self.config.start_date
94
+ else None
95
+ )
88
96
  seen = set()
89
97
 
90
98
  for file in files:
@@ -120,13 +128,16 @@ class AbstractFileBasedStreamReader(ABC):
120
128
  def use_file_transfer(self) -> bool:
121
129
  if self.config:
122
130
  use_file_transfer = (
123
- hasattr(self.config.delivery_method, "delivery_type") and self.config.delivery_method.delivery_type == "use_file_transfer"
131
+ hasattr(self.config.delivery_method, "delivery_type")
132
+ and self.config.delivery_method.delivery_type == "use_file_transfer"
124
133
  )
125
134
  return use_file_transfer
126
135
  return False
127
136
 
128
137
  @abstractmethod
129
- def get_file(self, file: RemoteFile, local_directory: str, logger: logging.Logger) -> Dict[str, Any]:
138
+ def get_file(
139
+ self, file: RemoteFile, local_directory: str, logger: logging.Logger
140
+ ) -> Dict[str, Any]:
130
141
  """
131
142
  This is required for connectors that will support writing to
132
143
  files. It will handle the logic to download,get,read,acquire or
@@ -9,7 +9,10 @@ import fastavro
9
9
  from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
10
10
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
11
11
  from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
12
- from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
12
+ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
13
+ AbstractFileBasedStreamReader,
14
+ FileReadMode,
15
+ )
13
16
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
14
17
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
15
18
  from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
@@ -64,15 +67,21 @@ class AvroParser(FileTypeParser):
64
67
  avro_schema = avro_reader.writer_schema
65
68
  if not avro_schema["type"] == "record":
66
69
  unsupported_type = avro_schema["type"]
67
- raise ValueError(f"Only record based avro files are supported. Found {unsupported_type}")
70
+ raise ValueError(
71
+ f"Only record based avro files are supported. Found {unsupported_type}"
72
+ )
68
73
  json_schema = {
69
- field["name"]: AvroParser._convert_avro_type_to_json(avro_format, field["name"], field["type"])
74
+ field["name"]: AvroParser._convert_avro_type_to_json(
75
+ avro_format, field["name"], field["type"]
76
+ )
70
77
  for field in avro_schema["fields"]
71
78
  }
72
79
  return json_schema
73
80
 
74
81
  @classmethod
75
- def _convert_avro_type_to_json(cls, avro_format: AvroFormat, field_name: str, avro_field: str) -> Mapping[str, Any]:
82
+ def _convert_avro_type_to_json(
83
+ cls, avro_format: AvroFormat, field_name: str, avro_field: str
84
+ ) -> Mapping[str, Any]:
76
85
  if isinstance(avro_field, str) and avro_field in AVRO_TYPE_TO_JSON_TYPE:
77
86
  # Legacy behavior to retain backwards compatibility. Long term we should always represent doubles as strings
78
87
  if avro_field == "double" and not avro_format.double_as_string:
@@ -83,17 +92,28 @@ class AvroParser(FileTypeParser):
83
92
  return {
84
93
  "type": "object",
85
94
  "properties": {
86
- object_field["name"]: AvroParser._convert_avro_type_to_json(avro_format, object_field["name"], object_field["type"])
95
+ object_field["name"]: AvroParser._convert_avro_type_to_json(
96
+ avro_format, object_field["name"], object_field["type"]
97
+ )
87
98
  for object_field in avro_field["fields"]
88
99
  },
89
100
  }
90
101
  elif avro_field["type"] == "array":
91
102
  if "items" not in avro_field:
92
- raise ValueError(f"{field_name} array type does not have a required field items")
93
- return {"type": "array", "items": AvroParser._convert_avro_type_to_json(avro_format, "", avro_field["items"])}
103
+ raise ValueError(
104
+ f"{field_name} array type does not have a required field items"
105
+ )
106
+ return {
107
+ "type": "array",
108
+ "items": AvroParser._convert_avro_type_to_json(
109
+ avro_format, "", avro_field["items"]
110
+ ),
111
+ }
94
112
  elif avro_field["type"] == "enum":
95
113
  if "symbols" not in avro_field:
96
- raise ValueError(f"{field_name} enum type does not have a required field symbols")
114
+ raise ValueError(
115
+ f"{field_name} enum type does not have a required field symbols"
116
+ )
97
117
  if "name" not in avro_field:
98
118
  raise ValueError(f"{field_name} enum type does not have a required field name")
99
119
  return {"type": "string", "enum": avro_field["symbols"]}
@@ -102,7 +122,9 @@ class AvroParser(FileTypeParser):
102
122
  raise ValueError(f"{field_name} map type does not have a required field values")
103
123
  return {
104
124
  "type": "object",
105
- "additionalProperties": AvroParser._convert_avro_type_to_json(avro_format, "", avro_field["values"]),
125
+ "additionalProperties": AvroParser._convert_avro_type_to_json(
126
+ avro_format, "", avro_field["values"]
127
+ ),
106
128
  }
107
129
  elif avro_field["type"] == "fixed" and avro_field.get("logicalType") != "duration":
108
130
  if "size" not in avro_field:
@@ -115,18 +137,27 @@ class AvroParser(FileTypeParser):
115
137
  }
116
138
  elif avro_field.get("logicalType") == "decimal":
117
139
  if "precision" not in avro_field:
118
- raise ValueError(f"{field_name} decimal type does not have a required field precision")
140
+ raise ValueError(
141
+ f"{field_name} decimal type does not have a required field precision"
142
+ )
119
143
  if "scale" not in avro_field:
120
- raise ValueError(f"{field_name} decimal type does not have a required field scale")
144
+ raise ValueError(
145
+ f"{field_name} decimal type does not have a required field scale"
146
+ )
121
147
  max_whole_number_range = avro_field["precision"] - avro_field["scale"]
122
148
  decimal_range = avro_field["scale"]
123
149
 
124
150
  # This regex looks like a mess, but it is validation for at least one whole number and optional fractional numbers
125
151
  # For example: ^-?\d{1,5}(?:\.\d{1,3})?$ would accept 12345.123 and 123456.12345 would be rejected
126
- return {"type": "string", "pattern": f"^-?\\d{{{1,max_whole_number_range}}}(?:\\.\\d{1,decimal_range})?$"}
152
+ return {
153
+ "type": "string",
154
+ "pattern": f"^-?\\d{{{1,max_whole_number_range}}}(?:\\.\\d{1,decimal_range})?$",
155
+ }
127
156
  elif "logicalType" in avro_field:
128
157
  if avro_field["logicalType"] not in AVRO_LOGICAL_TYPE_TO_JSON:
129
- raise ValueError(f"{avro_field['logicalType']} is not a valid Avro logical type")
158
+ raise ValueError(
159
+ f"{avro_field['logicalType']} is not a valid Avro logical type"
160
+ )
130
161
  return AVRO_LOGICAL_TYPE_TO_JSON[avro_field["logicalType"]]
131
162
  else:
132
163
  raise ValueError(f"Unsupported avro type: {avro_field}")
@@ -150,22 +181,32 @@ class AvroParser(FileTypeParser):
150
181
  with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp:
151
182
  avro_reader = fastavro.reader(fp)
152
183
  schema = avro_reader.writer_schema
153
- schema_field_name_to_type = {field["name"]: field["type"] for field in schema["fields"]}
184
+ schema_field_name_to_type = {
185
+ field["name"]: field["type"] for field in schema["fields"]
186
+ }
154
187
  for record in avro_reader:
155
188
  line_no += 1
156
189
  yield {
157
- record_field: self._to_output_value(avro_format, schema_field_name_to_type[record_field], record[record_field])
190
+ record_field: self._to_output_value(
191
+ avro_format,
192
+ schema_field_name_to_type[record_field],
193
+ record[record_field],
194
+ )
158
195
  for record_field, record_value in schema_field_name_to_type.items()
159
196
  }
160
197
  except Exception as exc:
161
- raise RecordParseError(FileBasedSourceError.ERROR_PARSING_RECORD, filename=file.uri, lineno=line_no) from exc
198
+ raise RecordParseError(
199
+ FileBasedSourceError.ERROR_PARSING_RECORD, filename=file.uri, lineno=line_no
200
+ ) from exc
162
201
 
163
202
  @property
164
203
  def file_read_mode(self) -> FileReadMode:
165
204
  return FileReadMode.READ_BINARY
166
205
 
167
206
  @staticmethod
168
- def _to_output_value(avro_format: AvroFormat, record_type: Mapping[str, Any], record_value: Any) -> Any:
207
+ def _to_output_value(
208
+ avro_format: AvroFormat, record_type: Mapping[str, Any], record_value: Any
209
+ ) -> Any:
169
210
  if isinstance(record_value, bytes):
170
211
  return record_value.decode()
171
212
  elif not isinstance(record_type, Mapping):
@@ -13,10 +13,18 @@ from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Opti
13
13
  from uuid import uuid4
14
14
 
15
15
  from airbyte_cdk.models import FailureType
16
- from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat, CsvHeaderAutogenerated, CsvHeaderUserProvided, InferenceType
16
+ from airbyte_cdk.sources.file_based.config.csv_format import (
17
+ CsvFormat,
18
+ CsvHeaderAutogenerated,
19
+ CsvHeaderUserProvided,
20
+ InferenceType,
21
+ )
17
22
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
18
23
  from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
19
- from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
24
+ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
25
+ AbstractFileBasedStreamReader,
26
+ FileReadMode,
27
+ )
20
28
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
21
29
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
22
30
  from airbyte_cdk.sources.file_based.schema_helpers import TYPE_PYTHON_MAPPING, SchemaType
@@ -77,7 +85,9 @@ class _CsvReader:
77
85
  # than headers or more headers dans columns
78
86
  if None in row:
79
87
  if config_format.ignore_errors_on_fields_mismatch:
80
- logger.error(f"Skipping record in line {lineno} of file {file.uri}; invalid CSV row with missing column.")
88
+ logger.error(
89
+ f"Skipping record in line {lineno} of file {file.uri}; invalid CSV row with missing column."
90
+ )
81
91
  else:
82
92
  raise RecordParseError(
83
93
  FileBasedSourceError.ERROR_PARSING_RECORD_MISMATCHED_COLUMNS,
@@ -86,10 +96,14 @@ class _CsvReader:
86
96
  )
87
97
  if None in row.values():
88
98
  if config_format.ignore_errors_on_fields_mismatch:
89
- logger.error(f"Skipping record in line {lineno} of file {file.uri}; invalid CSV row with extra column.")
99
+ logger.error(
100
+ f"Skipping record in line {lineno} of file {file.uri}; invalid CSV row with extra column."
101
+ )
90
102
  else:
91
103
  raise RecordParseError(
92
- FileBasedSourceError.ERROR_PARSING_RECORD_MISMATCHED_ROWS, filename=file.uri, lineno=lineno
104
+ FileBasedSourceError.ERROR_PARSING_RECORD_MISMATCHED_ROWS,
105
+ filename=file.uri,
106
+ lineno=lineno,
93
107
  )
94
108
  yield row
95
109
  finally:
@@ -105,7 +119,9 @@ class _CsvReader:
105
119
  return config_format.header_definition.column_names # type: ignore # should be CsvHeaderUserProvided given the type
106
120
 
107
121
  if isinstance(config_format.header_definition, CsvHeaderAutogenerated):
108
- self._skip_rows(fp, config_format.skip_rows_before_header + config_format.skip_rows_after_header)
122
+ self._skip_rows(
123
+ fp, config_format.skip_rows_before_header + config_format.skip_rows_after_header
124
+ )
109
125
  headers = self._auto_generate_headers(fp, dialect_name)
110
126
  else:
111
127
  # Then read the header
@@ -165,11 +181,15 @@ class CsvParser(FileTypeParser):
165
181
  # sources will likely require one. Rather than modify the interface now we can wait until the real use case
166
182
  config_format = _extract_format(config)
167
183
  type_inferrer_by_field: Dict[str, _TypeInferrer] = defaultdict(
168
- lambda: _JsonTypeInferrer(config_format.true_values, config_format.false_values, config_format.null_values)
184
+ lambda: _JsonTypeInferrer(
185
+ config_format.true_values, config_format.false_values, config_format.null_values
186
+ )
169
187
  if config_format.inference_type != InferenceType.NONE
170
188
  else _DisabledTypeInferrer()
171
189
  )
172
- data_generator = self._csv_reader.read_data(config, file, stream_reader, logger, self.file_read_mode)
190
+ data_generator = self._csv_reader.read_data(
191
+ config, file, stream_reader, logger, self.file_read_mode
192
+ )
173
193
  read_bytes = 0
174
194
  for row in data_generator:
175
195
  for header, value in row.items():
@@ -187,7 +207,10 @@ class CsvParser(FileTypeParser):
187
207
  f"Else, please contact Airbyte.",
188
208
  failure_type=FailureType.config_error,
189
209
  )
190
- schema = {header.strip(): {"type": type_inferred.infer()} for header, type_inferred in type_inferrer_by_field.items()}
210
+ schema = {
211
+ header.strip(): {"type": type_inferred.infer()}
212
+ for header, type_inferred in type_inferrer_by_field.items()
213
+ }
191
214
  data_generator.close()
192
215
  return schema
193
216
 
@@ -203,19 +226,30 @@ class CsvParser(FileTypeParser):
203
226
  try:
204
227
  config_format = _extract_format(config)
205
228
  if discovered_schema:
206
- property_types = {col: prop["type"] for col, prop in discovered_schema["properties"].items()} # type: ignore # discovered_schema["properties"] is known to be a mapping
229
+ property_types = {
230
+ col: prop["type"] for col, prop in discovered_schema["properties"].items()
231
+ } # type: ignore # discovered_schema["properties"] is known to be a mapping
207
232
  deduped_property_types = CsvParser._pre_propcess_property_types(property_types)
208
233
  else:
209
234
  deduped_property_types = {}
210
- cast_fn = CsvParser._get_cast_function(deduped_property_types, config_format, logger, config.schemaless)
211
- data_generator = self._csv_reader.read_data(config, file, stream_reader, logger, self.file_read_mode)
235
+ cast_fn = CsvParser._get_cast_function(
236
+ deduped_property_types, config_format, logger, config.schemaless
237
+ )
238
+ data_generator = self._csv_reader.read_data(
239
+ config, file, stream_reader, logger, self.file_read_mode
240
+ )
212
241
  for row in data_generator:
213
242
  line_no += 1
214
243
  yield CsvParser._to_nullable(
215
- cast_fn(row), deduped_property_types, config_format.null_values, config_format.strings_can_be_null
244
+ cast_fn(row),
245
+ deduped_property_types,
246
+ config_format.null_values,
247
+ config_format.strings_can_be_null,
216
248
  )
217
249
  except RecordParseError as parse_err:
218
- raise RecordParseError(FileBasedSourceError.ERROR_PARSING_RECORD, filename=file.uri, lineno=line_no) from parse_err
250
+ raise RecordParseError(
251
+ FileBasedSourceError.ERROR_PARSING_RECORD, filename=file.uri, lineno=line_no
252
+ ) from parse_err
219
253
  finally:
220
254
  data_generator.close()
221
255
 
@@ -225,27 +259,47 @@ class CsvParser(FileTypeParser):
225
259
 
226
260
  @staticmethod
227
261
  def _get_cast_function(
228
- deduped_property_types: Mapping[str, str], config_format: CsvFormat, logger: logging.Logger, schemaless: bool
262
+ deduped_property_types: Mapping[str, str],
263
+ config_format: CsvFormat,
264
+ logger: logging.Logger,
265
+ schemaless: bool,
229
266
  ) -> Callable[[Mapping[str, str]], Mapping[str, str]]:
230
267
  # Only cast values if the schema is provided
231
268
  if deduped_property_types and not schemaless:
232
- return partial(CsvParser._cast_types, deduped_property_types=deduped_property_types, config_format=config_format, logger=logger)
269
+ return partial(
270
+ CsvParser._cast_types,
271
+ deduped_property_types=deduped_property_types,
272
+ config_format=config_format,
273
+ logger=logger,
274
+ )
233
275
  else:
234
276
  # If no schema is provided, yield the rows as they are
235
277
  return _no_cast
236
278
 
237
279
  @staticmethod
238
280
  def _to_nullable(
239
- row: Mapping[str, str], deduped_property_types: Mapping[str, str], null_values: Set[str], strings_can_be_null: bool
281
+ row: Mapping[str, str],
282
+ deduped_property_types: Mapping[str, str],
283
+ null_values: Set[str],
284
+ strings_can_be_null: bool,
240
285
  ) -> Dict[str, Optional[str]]:
241
286
  nullable = {
242
- k: None if CsvParser._value_is_none(v, deduped_property_types.get(k), null_values, strings_can_be_null) else v
287
+ k: None
288
+ if CsvParser._value_is_none(
289
+ v, deduped_property_types.get(k), null_values, strings_can_be_null
290
+ )
291
+ else v
243
292
  for k, v in row.items()
244
293
  }
245
294
  return nullable
246
295
 
247
296
  @staticmethod
248
- def _value_is_none(value: Any, deduped_property_type: Optional[str], null_values: Set[str], strings_can_be_null: bool) -> bool:
297
+ def _value_is_none(
298
+ value: Any,
299
+ deduped_property_type: Optional[str],
300
+ null_values: Set[str],
301
+ strings_can_be_null: bool,
302
+ ) -> bool:
249
303
  return value in null_values and (strings_can_be_null or deduped_property_type != "string")
250
304
 
251
305
  @staticmethod
@@ -280,7 +334,10 @@ class CsvParser(FileTypeParser):
280
334
 
281
335
  @staticmethod
282
336
  def _cast_types(
283
- row: Dict[str, str], deduped_property_types: Mapping[str, str], config_format: CsvFormat, logger: logging.Logger
337
+ row: Dict[str, str],
338
+ deduped_property_types: Mapping[str, str],
339
+ config_format: CsvFormat,
340
+ logger: logging.Logger,
284
341
  ) -> Dict[str, Any]:
285
342
  """
286
343
  Casts the values in the input 'row' dictionary according to the types defined in the JSON schema.
@@ -305,20 +362,22 @@ class CsvParser(FileTypeParser):
305
362
  else:
306
363
  warnings.append(_format_warning(key, value, prop_type))
307
364
 
308
- elif python_type == bool:
365
+ elif python_type is bool:
309
366
  try:
310
- cast_value = _value_to_bool(value, config_format.true_values, config_format.false_values)
367
+ cast_value = _value_to_bool(
368
+ value, config_format.true_values, config_format.false_values
369
+ )
311
370
  except ValueError:
312
371
  warnings.append(_format_warning(key, value, prop_type))
313
372
 
314
- elif python_type == dict:
373
+ elif python_type is dict:
315
374
  try:
316
375
  # we don't re-use _value_to_object here because we type the column as object as long as there is only one object
317
376
  cast_value = orjson.loads(value)
318
377
  except orjson.JSONDecodeError:
319
378
  warnings.append(_format_warning(key, value, prop_type))
320
379
 
321
- elif python_type == list:
380
+ elif python_type is list:
322
381
  try:
323
382
  cast_value = _value_to_list(value)
324
383
  except (ValueError, json.JSONDecodeError):
@@ -364,7 +423,9 @@ class _JsonTypeInferrer(_TypeInferrer):
364
423
  _NUMBER_TYPE = "number"
365
424
  _STRING_TYPE = "string"
366
425
 
367
- def __init__(self, boolean_trues: Set[str], boolean_falses: Set[str], null_values: Set[str]) -> None:
426
+ def __init__(
427
+ self, boolean_trues: Set[str], boolean_falses: Set[str], null_values: Set[str]
428
+ ) -> None:
368
429
  self._boolean_trues = boolean_trues
369
430
  self._boolean_falses = boolean_falses
370
431
  self._null_values = null_values
@@ -375,7 +436,9 @@ class _JsonTypeInferrer(_TypeInferrer):
375
436
 
376
437
  def infer(self) -> str:
377
438
  types_by_value = {value: self._infer_type(value) for value in self._values}
378
- types_excluding_null_values = [types for types in types_by_value.values() if self._NULL_TYPE not in types]
439
+ types_excluding_null_values = [
440
+ types for types in types_by_value.values() if self._NULL_TYPE not in types
441
+ ]
379
442
  if not types_excluding_null_values:
380
443
  # this is highly unusual but we will consider the column as a string
381
444
  return self._STRING_TYPE
@@ -8,9 +8,19 @@ from pathlib import Path
8
8
  from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
9
9
 
10
10
  import pandas as pd
11
- from airbyte_cdk.sources.file_based.config.file_based_stream_config import ExcelFormat, FileBasedStreamConfig
12
- from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError, RecordParseError
13
- from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
11
+ from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
12
+ ExcelFormat,
13
+ FileBasedStreamConfig,
14
+ )
15
+ from airbyte_cdk.sources.file_based.exceptions import (
16
+ ConfigValidationError,
17
+ FileBasedSourceError,
18
+ RecordParseError,
19
+ )
20
+ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
21
+ AbstractFileBasedStreamReader,
22
+ FileReadMode,
23
+ )
14
24
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
15
25
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
16
26
  from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
@@ -63,7 +73,11 @@ class ExcelParser(FileTypeParser):
63
73
  fields[column] = self.dtype_to_json_type(prev_frame_column_type, df_type)
64
74
 
65
75
  schema = {
66
- field: ({"type": "string", "format": "date-time"} if fields[field] == "date-time" else {"type": fields[field]})
76
+ field: (
77
+ {"type": "string", "format": "date-time"}
78
+ if fields[field] == "date-time"
79
+ else {"type": fields[field]}
80
+ )
67
81
  for field in fields
68
82
  }
69
83
  return schema
@@ -101,11 +115,15 @@ class ExcelParser(FileTypeParser):
101
115
  # DataFrame.to_dict() method returns datetime values in pandas.Timestamp values, which are not serializable by orjson
102
116
  # DataFrame.to_json() returns string with datetime values serialized to iso8601 with microseconds to align with pydantic behavior
103
117
  # see PR description: https://github.com/airbytehq/airbyte/pull/44444/
104
- yield from orjson.loads(df.to_json(orient="records", date_format="iso", date_unit="us"))
118
+ yield from orjson.loads(
119
+ df.to_json(orient="records", date_format="iso", date_unit="us")
120
+ )
105
121
 
106
122
  except Exception as exc:
107
123
  # Raise a RecordParseError if any exception occurs during parsing
108
- raise RecordParseError(FileBasedSourceError.ERROR_PARSING_RECORD, filename=file.uri) from exc
124
+ raise RecordParseError(
125
+ FileBasedSourceError.ERROR_PARSING_RECORD, filename=file.uri
126
+ ) from exc
109
127
 
110
128
  @property
111
129
  def file_read_mode(self) -> FileReadMode:
@@ -133,7 +151,7 @@ class ExcelParser(FileTypeParser):
133
151
  if current_type == "string":
134
152
  # Previous column values were of the string type, no need to look further.
135
153
  return current_type
136
- if dtype == object:
154
+ if dtype is object:
137
155
  return "string"
138
156
  if dtype in number_types and (not current_type or current_type == "number"):
139
157
  return "number"
@@ -15,7 +15,11 @@ DEFAULT_LOCAL_DIRECTORY = "/tmp/airbyte-file-transfer"
15
15
 
16
16
  class FileTransfer:
17
17
  def __init__(self) -> None:
18
- self._local_directory = AIRBYTE_STAGING_DIRECTORY if os.path.exists(AIRBYTE_STAGING_DIRECTORY) else DEFAULT_LOCAL_DIRECTORY
18
+ self._local_directory = (
19
+ AIRBYTE_STAGING_DIRECTORY
20
+ if os.path.exists(AIRBYTE_STAGING_DIRECTORY)
21
+ else DEFAULT_LOCAL_DIRECTORY
22
+ )
19
23
 
20
24
  def get_file(
21
25
  self,
@@ -25,7 +29,9 @@ class FileTransfer:
25
29
  logger: logging.Logger,
26
30
  ) -> Iterable[Dict[str, Any]]:
27
31
  try:
28
- yield stream_reader.get_file(file=file, local_directory=self._local_directory, logger=logger)
32
+ yield stream_reader.get_file(
33
+ file=file, local_directory=self._local_directory, logger=logger
34
+ )
29
35
  except Exception as ex:
30
36
  logger.error("An error has occurred while getting file: %s", str(ex))
31
37
  raise ex
@@ -7,7 +7,10 @@ from abc import ABC, abstractmethod
7
7
  from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
8
8
 
9
9
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
10
- from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
10
+ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
11
+ AbstractFileBasedStreamReader,
12
+ FileReadMode,
13
+ )
11
14
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
12
15
  from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
13
16
 
@@ -8,15 +8,21 @@ from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
8
8
 
9
9
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
10
10
  from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
11
- from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
11
+ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
12
+ AbstractFileBasedStreamReader,
13
+ FileReadMode,
14
+ )
12
15
  from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
13
16
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
14
- from airbyte_cdk.sources.file_based.schema_helpers import PYTHON_TYPE_MAPPING, SchemaType, merge_schemas
17
+ from airbyte_cdk.sources.file_based.schema_helpers import (
18
+ PYTHON_TYPE_MAPPING,
19
+ SchemaType,
20
+ merge_schemas,
21
+ )
15
22
  from orjson import orjson
16
23
 
17
24
 
18
25
  class JsonlParser(FileTypeParser):
19
-
20
26
  MAX_BYTES_PER_FILE_FOR_SCHEMA_INFERENCE = 1_000_000
21
27
  ENCODING = "utf8"
22
28
 
@@ -103,7 +109,9 @@ class JsonlParser(FileTypeParser):
103
109
  try:
104
110
  record = orjson.loads(accumulator)
105
111
  if had_json_parsing_error and not has_warned_for_multiline_json_object:
106
- logger.warning(f"File at {file.uri} is using multiline JSON. Performance could be greatly reduced")
112
+ logger.warning(
113
+ f"File at {file.uri} is using multiline JSON. Performance could be greatly reduced"
114
+ )
107
115
  has_warned_for_multiline_json_object = True
108
116
 
109
117
  yield record
@@ -112,7 +120,11 @@ class JsonlParser(FileTypeParser):
112
120
  except orjson.JSONDecodeError:
113
121
  had_json_parsing_error = True
114
122
 
115
- if read_limit and yielded_at_least_once and read_bytes >= self.MAX_BYTES_PER_FILE_FOR_SCHEMA_INFERENCE:
123
+ if (
124
+ read_limit
125
+ and yielded_at_least_once
126
+ and read_bytes >= self.MAX_BYTES_PER_FILE_FOR_SCHEMA_INFERENCE
127
+ ):
116
128
  logger.warning(
117
129
  f"Exceeded the maximum number of bytes per file for schema inference ({self.MAX_BYTES_PER_FILE_FOR_SCHEMA_INFERENCE}). "
118
130
  f"Inferring schema from an incomplete set of records."
@@ -120,7 +132,9 @@ class JsonlParser(FileTypeParser):
120
132
  break
121
133
 
122
134
  if had_json_parsing_error and not yielded_at_least_once:
123
- raise RecordParseError(FileBasedSourceError.ERROR_PARSING_RECORD, filename=file.uri, lineno=line)
135
+ raise RecordParseError(
136
+ FileBasedSourceError.ERROR_PARSING_RECORD, filename=file.uri, lineno=line
137
+ )
124
138
 
125
139
  @staticmethod
126
140
  def _instantiate_accumulator(line: Union[bytes, str]) -> Union[bytes, str]: