azure-storage-blob 12.25.0b1__py3-none-any.whl → 12.26.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure/storage/blob/__init__.py +3 -2
- azure/storage/blob/_blob_client.py +94 -41
- azure/storage/blob/_blob_client_helpers.py +19 -4
- azure/storage/blob/_blob_service_client.py +16 -13
- azure/storage/blob/_container_client.py +25 -22
- azure/storage/blob/_deserialize.py +1 -1
- azure/storage/blob/_download.py +7 -7
- azure/storage/blob/_encryption.py +177 -184
- azure/storage/blob/_generated/_azure_blob_storage.py +1 -1
- azure/storage/blob/_generated/_configuration.py +2 -2
- azure/storage/blob/_generated/_serialization.py +3 -3
- azure/storage/blob/_generated/aio/_azure_blob_storage.py +1 -1
- azure/storage/blob/_generated/aio/_configuration.py +2 -2
- azure/storage/blob/_generated/aio/operations/_append_blob_operations.py +5 -4
- azure/storage/blob/_generated/aio/operations/_blob_operations.py +5 -25
- azure/storage/blob/_generated/aio/operations/_block_blob_operations.py +9 -7
- azure/storage/blob/_generated/aio/operations/_container_operations.py +1 -19
- azure/storage/blob/_generated/aio/operations/_page_blob_operations.py +5 -10
- azure/storage/blob/_generated/aio/operations/_service_operations.py +1 -8
- azure/storage/blob/_generated/models/__init__.py +2 -0
- azure/storage/blob/_generated/models/_azure_blob_storage_enums.py +6 -0
- azure/storage/blob/_generated/operations/_append_blob_operations.py +12 -9
- azure/storage/blob/_generated/operations/_blob_operations.py +32 -49
- azure/storage/blob/_generated/operations/_block_blob_operations.py +21 -13
- azure/storage/blob/_generated/operations/_container_operations.py +19 -37
- azure/storage/blob/_generated/operations/_page_blob_operations.py +17 -19
- azure/storage/blob/_generated/operations/_service_operations.py +9 -17
- azure/storage/blob/_lease.py +1 -0
- azure/storage/blob/_quick_query_helper.py +20 -24
- azure/storage/blob/_serialize.py +1 -0
- azure/storage/blob/_shared/__init__.py +7 -7
- azure/storage/blob/_shared/authentication.py +49 -32
- azure/storage/blob/_shared/avro/avro_io.py +45 -43
- azure/storage/blob/_shared/avro/avro_io_async.py +42 -41
- azure/storage/blob/_shared/avro/datafile.py +24 -21
- azure/storage/blob/_shared/avro/datafile_async.py +15 -15
- azure/storage/blob/_shared/avro/schema.py +196 -217
- azure/storage/blob/_shared/base_client.py +87 -61
- azure/storage/blob/_shared/base_client_async.py +58 -51
- azure/storage/blob/_shared/constants.py +1 -1
- azure/storage/blob/_shared/models.py +93 -92
- azure/storage/blob/_shared/parser.py +3 -3
- azure/storage/blob/_shared/policies.py +176 -145
- azure/storage/blob/_shared/policies_async.py +59 -70
- azure/storage/blob/_shared/request_handlers.py +51 -47
- azure/storage/blob/_shared/response_handlers.py +49 -45
- azure/storage/blob/_shared/shared_access_signature.py +67 -71
- azure/storage/blob/_shared/uploads.py +56 -49
- azure/storage/blob/_shared/uploads_async.py +72 -61
- azure/storage/blob/_shared_access_signature.py +3 -1
- azure/storage/blob/_version.py +1 -1
- azure/storage/blob/aio/__init__.py +3 -2
- azure/storage/blob/aio/_blob_client_async.py +241 -44
- azure/storage/blob/aio/_blob_service_client_async.py +13 -11
- azure/storage/blob/aio/_container_client_async.py +28 -25
- azure/storage/blob/aio/_download_async.py +16 -12
- azure/storage/blob/aio/_lease_async.py +1 -0
- azure/storage/blob/aio/_quick_query_helper_async.py +194 -0
- {azure_storage_blob-12.25.0b1.dist-info → azure_storage_blob-12.26.0.dist-info}/METADATA +7 -7
- azure_storage_blob-12.26.0.dist-info/RECORD +85 -0
- {azure_storage_blob-12.25.0b1.dist-info → azure_storage_blob-12.26.0.dist-info}/WHEEL +1 -1
- azure_storage_blob-12.25.0b1.dist-info/RECORD +0 -84
- {azure_storage_blob-12.25.0b1.dist-info → azure_storage_blob-12.26.0.dist-info}/LICENSE +0 -0
- {azure_storage_blob-12.25.0b1.dist-info → azure_storage_blob-12.26.0.dist-info}/top_level.txt +0 -0
@@ -61,14 +61,14 @@ class AsyncBinaryDecoder(object):
|
|
61
61
|
"""Read n bytes.
|
62
62
|
|
63
63
|
:param int n: Number of bytes to read.
|
64
|
-
:
|
64
|
+
:return: The next n bytes from the input.
|
65
65
|
:rtype: bytes
|
66
66
|
"""
|
67
|
-
assert
|
67
|
+
assert n >= 0, n
|
68
68
|
input_bytes = await self.reader.read(n)
|
69
69
|
if n > 0 and not input_bytes:
|
70
70
|
raise StopAsyncIteration
|
71
|
-
assert
|
71
|
+
assert len(input_bytes) == n, input_bytes
|
72
72
|
return input_bytes
|
73
73
|
|
74
74
|
@staticmethod
|
@@ -132,7 +132,7 @@ class AsyncBinaryDecoder(object):
|
|
132
132
|
Bytes are encoded as a long followed by that many bytes of data.
|
133
133
|
"""
|
134
134
|
nbytes = await self.read_long()
|
135
|
-
assert
|
135
|
+
assert nbytes >= 0, nbytes
|
136
136
|
return await self.read(nbytes)
|
137
137
|
|
138
138
|
async def read_utf8(self):
|
@@ -143,13 +143,13 @@ class AsyncBinaryDecoder(object):
|
|
143
143
|
input_bytes = await self.read_bytes()
|
144
144
|
if PY3:
|
145
145
|
try:
|
146
|
-
return input_bytes.decode(
|
146
|
+
return input_bytes.decode("utf-8")
|
147
147
|
except UnicodeDecodeError as exn:
|
148
|
-
logger.error(
|
148
|
+
logger.error("Invalid UTF-8 input bytes: %r", input_bytes) # pylint: disable=do-not-log-raised-errors
|
149
149
|
raise exn
|
150
150
|
else:
|
151
151
|
# PY2
|
152
|
-
return unicode(input_bytes, "utf-8")
|
152
|
+
return unicode(input_bytes, "utf-8") # pylint: disable=undefined-variable
|
153
153
|
|
154
154
|
def skip_null(self):
|
155
155
|
pass
|
@@ -200,41 +200,40 @@ class AsyncDatumReader(object):
|
|
200
200
|
def set_writer_schema(self, writer_schema):
|
201
201
|
self._writer_schema = writer_schema
|
202
202
|
|
203
|
-
writer_schema = property(lambda self: self._writer_schema,
|
204
|
-
set_writer_schema)
|
203
|
+
writer_schema = property(lambda self: self._writer_schema, set_writer_schema)
|
205
204
|
|
206
205
|
async def read(self, decoder):
|
207
206
|
return await self.read_data(self.writer_schema, decoder)
|
208
207
|
|
209
208
|
async def read_data(self, writer_schema, decoder):
|
210
209
|
# function dispatch for reading data based on type of writer's schema
|
211
|
-
if writer_schema.type ==
|
210
|
+
if writer_schema.type == "null":
|
212
211
|
result = decoder.read_null()
|
213
|
-
elif writer_schema.type ==
|
212
|
+
elif writer_schema.type == "boolean":
|
214
213
|
result = await decoder.read_boolean()
|
215
|
-
elif writer_schema.type ==
|
214
|
+
elif writer_schema.type == "string":
|
216
215
|
result = await decoder.read_utf8()
|
217
|
-
elif writer_schema.type ==
|
216
|
+
elif writer_schema.type == "int":
|
218
217
|
result = await decoder.read_int()
|
219
|
-
elif writer_schema.type ==
|
218
|
+
elif writer_schema.type == "long":
|
220
219
|
result = await decoder.read_long()
|
221
|
-
elif writer_schema.type ==
|
220
|
+
elif writer_schema.type == "float":
|
222
221
|
result = await decoder.read_float()
|
223
|
-
elif writer_schema.type ==
|
222
|
+
elif writer_schema.type == "double":
|
224
223
|
result = await decoder.read_double()
|
225
|
-
elif writer_schema.type ==
|
224
|
+
elif writer_schema.type == "bytes":
|
226
225
|
result = await decoder.read_bytes()
|
227
|
-
elif writer_schema.type ==
|
226
|
+
elif writer_schema.type == "fixed":
|
228
227
|
result = await self.read_fixed(writer_schema, decoder)
|
229
|
-
elif writer_schema.type ==
|
228
|
+
elif writer_schema.type == "enum":
|
230
229
|
result = await self.read_enum(writer_schema, decoder)
|
231
|
-
elif writer_schema.type ==
|
230
|
+
elif writer_schema.type == "array":
|
232
231
|
result = await self.read_array(writer_schema, decoder)
|
233
|
-
elif writer_schema.type ==
|
232
|
+
elif writer_schema.type == "map":
|
234
233
|
result = await self.read_map(writer_schema, decoder)
|
235
|
-
elif writer_schema.type in [
|
234
|
+
elif writer_schema.type in ["union", "error_union"]:
|
236
235
|
result = await self.read_union(writer_schema, decoder)
|
237
|
-
elif writer_schema.type in [
|
236
|
+
elif writer_schema.type in ["record", "error", "request"]:
|
238
237
|
result = await self.read_record(writer_schema, decoder)
|
239
238
|
else:
|
240
239
|
fail_msg = f"Cannot read unknown schema type: {writer_schema.type}"
|
@@ -242,35 +241,35 @@ class AsyncDatumReader(object):
|
|
242
241
|
return result
|
243
242
|
|
244
243
|
async def skip_data(self, writer_schema, decoder):
|
245
|
-
if writer_schema.type ==
|
244
|
+
if writer_schema.type == "null":
|
246
245
|
result = decoder.skip_null()
|
247
|
-
elif writer_schema.type ==
|
246
|
+
elif writer_schema.type == "boolean":
|
248
247
|
result = await decoder.skip_boolean()
|
249
|
-
elif writer_schema.type ==
|
248
|
+
elif writer_schema.type == "string":
|
250
249
|
result = await decoder.skip_utf8()
|
251
|
-
elif writer_schema.type ==
|
250
|
+
elif writer_schema.type == "int":
|
252
251
|
result = await decoder.skip_int()
|
253
|
-
elif writer_schema.type ==
|
252
|
+
elif writer_schema.type == "long":
|
254
253
|
result = await decoder.skip_long()
|
255
|
-
elif writer_schema.type ==
|
254
|
+
elif writer_schema.type == "float":
|
256
255
|
result = await decoder.skip_float()
|
257
|
-
elif writer_schema.type ==
|
256
|
+
elif writer_schema.type == "double":
|
258
257
|
result = await decoder.skip_double()
|
259
|
-
elif writer_schema.type ==
|
258
|
+
elif writer_schema.type == "bytes":
|
260
259
|
result = await decoder.skip_bytes()
|
261
|
-
elif writer_schema.type ==
|
260
|
+
elif writer_schema.type == "fixed":
|
262
261
|
result = await self.skip_fixed(writer_schema, decoder)
|
263
|
-
elif writer_schema.type ==
|
262
|
+
elif writer_schema.type == "enum":
|
264
263
|
result = await self.skip_enum(decoder)
|
265
|
-
elif writer_schema.type ==
|
264
|
+
elif writer_schema.type == "array":
|
266
265
|
await self.skip_array(writer_schema, decoder)
|
267
266
|
result = None
|
268
|
-
elif writer_schema.type ==
|
267
|
+
elif writer_schema.type == "map":
|
269
268
|
await self.skip_map(writer_schema, decoder)
|
270
269
|
result = None
|
271
|
-
elif writer_schema.type in [
|
270
|
+
elif writer_schema.type in ["union", "error_union"]:
|
272
271
|
result = await self.skip_union(writer_schema, decoder)
|
273
|
-
elif writer_schema.type in [
|
272
|
+
elif writer_schema.type in ["record", "error", "request"]:
|
274
273
|
await self.skip_record(writer_schema, decoder)
|
275
274
|
result = None
|
276
275
|
else:
|
@@ -373,8 +372,9 @@ class AsyncDatumReader(object):
|
|
373
372
|
# schema resolution
|
374
373
|
index_of_schema = int(await decoder.read_long())
|
375
374
|
if index_of_schema >= len(writer_schema.schemas):
|
376
|
-
fail_msg = (
|
377
|
-
|
375
|
+
fail_msg = (
|
376
|
+
f"Can't access branch index {index_of_schema} " f"for union with {len(writer_schema.schemas)} branches"
|
377
|
+
)
|
378
378
|
raise SchemaResolutionException(fail_msg, writer_schema)
|
379
379
|
selected_writer_schema = writer_schema.schemas[index_of_schema]
|
380
380
|
|
@@ -384,8 +384,9 @@ class AsyncDatumReader(object):
|
|
384
384
|
async def skip_union(self, writer_schema, decoder):
|
385
385
|
index_of_schema = int(await decoder.read_long())
|
386
386
|
if index_of_schema >= len(writer_schema.schemas):
|
387
|
-
fail_msg = (
|
388
|
-
|
387
|
+
fail_msg = (
|
388
|
+
f"Can't access branch index {index_of_schema} " f"for union with {len(writer_schema.schemas)} branches"
|
389
|
+
)
|
389
390
|
raise SchemaResolutionException(fail_msg, writer_schema)
|
390
391
|
return await self.skip_data(writer_schema.schemas[index_of_schema], decoder)
|
391
392
|
|
@@ -26,17 +26,18 @@ logger = logging.getLogger(__name__)
|
|
26
26
|
VERSION = 1
|
27
27
|
|
28
28
|
if PY3:
|
29
|
-
MAGIC = b
|
29
|
+
MAGIC = b"Obj" + bytes([VERSION])
|
30
30
|
MAGIC_SIZE = len(MAGIC)
|
31
31
|
else:
|
32
|
-
MAGIC =
|
32
|
+
MAGIC = "Obj" + chr(VERSION)
|
33
33
|
MAGIC_SIZE = len(MAGIC)
|
34
34
|
|
35
35
|
# Size of the synchronization marker, in number of bytes:
|
36
36
|
SYNC_SIZE = 16
|
37
37
|
|
38
38
|
# Schema of the container header:
|
39
|
-
META_SCHEMA = schema.parse(
|
39
|
+
META_SCHEMA = schema.parse(
|
40
|
+
"""
|
40
41
|
{
|
41
42
|
"type": "record", "name": "org.apache.avro.file.Header",
|
42
43
|
"fields": [{
|
@@ -50,13 +51,15 @@ META_SCHEMA = schema.parse("""
|
|
50
51
|
"type": {"type": "fixed", "name": "sync", "size": %(sync_size)d}
|
51
52
|
}]
|
52
53
|
}
|
53
|
-
"""
|
54
|
-
|
55
|
-
|
56
|
-
|
54
|
+
"""
|
55
|
+
% {
|
56
|
+
"magic_size": MAGIC_SIZE,
|
57
|
+
"sync_size": SYNC_SIZE,
|
58
|
+
}
|
59
|
+
)
|
57
60
|
|
58
61
|
# Codecs supported by container files:
|
59
|
-
VALID_CODECS = frozenset([
|
62
|
+
VALID_CODECS = frozenset(["null", "deflate"])
|
60
63
|
|
61
64
|
# Metadata key associated to the schema:
|
62
65
|
SCHEMA_KEY = "avro.schema"
|
@@ -69,6 +72,7 @@ SCHEMA_KEY = "avro.schema"
|
|
69
72
|
class DataFileException(schema.AvroException):
|
70
73
|
"""Problem reading or writing file object containers."""
|
71
74
|
|
75
|
+
|
72
76
|
# ------------------------------------------------------------------------------
|
73
77
|
|
74
78
|
|
@@ -84,7 +88,7 @@ class DataFileReader(object): # pylint: disable=too-many-instance-attributes
|
|
84
88
|
"""
|
85
89
|
self._reader = reader
|
86
90
|
self._raw_decoder = avro_io.BinaryDecoder(reader)
|
87
|
-
self._header_reader = kwargs.pop(
|
91
|
+
self._header_reader = kwargs.pop("header_reader", None)
|
88
92
|
self._header_decoder = None if self._header_reader is None else avro_io.BinaryDecoder(self._header_reader)
|
89
93
|
self._datum_decoder = None # Maybe reset at every block.
|
90
94
|
self._datum_reader = datum_reader
|
@@ -97,11 +101,11 @@ class DataFileReader(object): # pylint: disable=too-many-instance-attributes
|
|
97
101
|
self._read_header()
|
98
102
|
|
99
103
|
# ensure codec is valid
|
100
|
-
avro_codec_raw = self.get_meta(
|
104
|
+
avro_codec_raw = self.get_meta("avro.codec")
|
101
105
|
if avro_codec_raw is None:
|
102
106
|
self.codec = "null"
|
103
107
|
else:
|
104
|
-
self.codec = avro_codec_raw.decode(
|
108
|
+
self.codec = avro_codec_raw.decode("utf-8")
|
105
109
|
if self.codec not in VALID_CODECS:
|
106
110
|
raise DataFileException(f"Unknown codec: {self.codec}.")
|
107
111
|
|
@@ -110,7 +114,7 @@ class DataFileReader(object): # pylint: disable=too-many-instance-attributes
|
|
110
114
|
|
111
115
|
# object_position is to support reading from current position in the future read,
|
112
116
|
# no need to downloading from the beginning of avro.
|
113
|
-
if hasattr(self._reader,
|
117
|
+
if hasattr(self._reader, "object_position"):
|
114
118
|
self.reader.track_object_position()
|
115
119
|
|
116
120
|
self._cur_object_index = 0
|
@@ -120,8 +124,7 @@ class DataFileReader(object): # pylint: disable=too-many-instance-attributes
|
|
120
124
|
if self._header_reader is not None:
|
121
125
|
self._datum_decoder = self._raw_decoder
|
122
126
|
|
123
|
-
self.datum_reader.writer_schema = (
|
124
|
-
schema.parse(self.get_meta(SCHEMA_KEY).decode('utf-8')))
|
127
|
+
self.datum_reader.writer_schema = schema.parse(self.get_meta(SCHEMA_KEY).decode("utf-8"))
|
125
128
|
|
126
129
|
def __enter__(self):
|
127
130
|
return self
|
@@ -168,7 +171,7 @@ class DataFileReader(object): # pylint: disable=too-many-instance-attributes
|
|
168
171
|
"""Reports the value of a given metadata key.
|
169
172
|
|
170
173
|
:param str key: Metadata key to report the value of.
|
171
|
-
:
|
174
|
+
:return: Value associated to the metadata key, as bytes.
|
172
175
|
:rtype: bytes
|
173
176
|
"""
|
174
177
|
return self._meta.get(key)
|
@@ -184,15 +187,15 @@ class DataFileReader(object): # pylint: disable=too-many-instance-attributes
|
|
184
187
|
header = self.datum_reader.read_data(META_SCHEMA, header_decoder)
|
185
188
|
|
186
189
|
# check magic number
|
187
|
-
if header.get(
|
190
|
+
if header.get("magic") != MAGIC:
|
188
191
|
fail_msg = f"Not an Avro data file: {header.get('magic')} doesn't match {MAGIC!r}."
|
189
192
|
raise schema.AvroException(fail_msg)
|
190
193
|
|
191
194
|
# set metadata
|
192
|
-
self._meta = header[
|
195
|
+
self._meta = header["meta"]
|
193
196
|
|
194
197
|
# set sync marker
|
195
|
-
self._sync_marker = header[
|
198
|
+
self._sync_marker = header["sync"]
|
196
199
|
|
197
200
|
def _read_block_header(self):
|
198
201
|
self._block_count = self.raw_decoder.read_long()
|
@@ -200,7 +203,7 @@ class DataFileReader(object): # pylint: disable=too-many-instance-attributes
|
|
200
203
|
# Skip a long; we don't need to use the length.
|
201
204
|
self.raw_decoder.skip_long()
|
202
205
|
self._datum_decoder = self._raw_decoder
|
203
|
-
elif self.codec ==
|
206
|
+
elif self.codec == "deflate":
|
204
207
|
# Compressed data is stored as (length, data), which
|
205
208
|
# corresponds to how the "bytes" type is encoded.
|
206
209
|
data = self.raw_decoder.read_bytes()
|
@@ -229,7 +232,7 @@ class DataFileReader(object): # pylint: disable=too-many-instance-attributes
|
|
229
232
|
|
230
233
|
# object_position is to support reading from current position in the future read,
|
231
234
|
# no need to downloading from the beginning of avro file with this attr.
|
232
|
-
if hasattr(self._reader,
|
235
|
+
if hasattr(self._reader, "object_position"):
|
233
236
|
self.reader.track_object_position()
|
234
237
|
self._cur_object_index = 0
|
235
238
|
|
@@ -242,7 +245,7 @@ class DataFileReader(object): # pylint: disable=too-many-instance-attributes
|
|
242
245
|
# object_position is to support reading from current position in the future read,
|
243
246
|
# This will track the index of the next item to be read.
|
244
247
|
# This will also track the offset before the next sync marker.
|
245
|
-
if hasattr(self._reader,
|
248
|
+
if hasattr(self._reader, "object_position"):
|
246
249
|
if self.block_count == 0:
|
247
250
|
# the next event to be read is at index 0 in the new chunk of blocks,
|
248
251
|
self.reader.track_object_position()
|
@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
|
|
24
24
|
# Constants
|
25
25
|
|
26
26
|
# Codecs supported by container files:
|
27
|
-
VALID_CODECS = frozenset([
|
27
|
+
VALID_CODECS = frozenset(["null"])
|
28
28
|
|
29
29
|
|
30
30
|
class AsyncDataFileReader(object): # pylint: disable=too-many-instance-attributes
|
@@ -39,9 +39,10 @@ class AsyncDataFileReader(object): # pylint: disable=too-many-instance-attribut
|
|
39
39
|
"""
|
40
40
|
self._reader = reader
|
41
41
|
self._raw_decoder = avro_io_async.AsyncBinaryDecoder(reader)
|
42
|
-
self._header_reader = kwargs.pop(
|
43
|
-
self._header_decoder =
|
44
|
-
avro_io_async.AsyncBinaryDecoder(self._header_reader)
|
42
|
+
self._header_reader = kwargs.pop("header_reader", None)
|
43
|
+
self._header_decoder = (
|
44
|
+
None if self._header_reader is None else avro_io_async.AsyncBinaryDecoder(self._header_reader)
|
45
|
+
)
|
45
46
|
self._datum_decoder = None # Maybe reset at every block.
|
46
47
|
self._datum_reader = datum_reader
|
47
48
|
self.codec = "null"
|
@@ -59,11 +60,11 @@ class AsyncDataFileReader(object): # pylint: disable=too-many-instance-attribut
|
|
59
60
|
await self._read_header()
|
60
61
|
|
61
62
|
# ensure codec is valid
|
62
|
-
avro_codec_raw = self.get_meta(
|
63
|
+
avro_codec_raw = self.get_meta("avro.codec")
|
63
64
|
if avro_codec_raw is None:
|
64
65
|
self.codec = "null"
|
65
66
|
else:
|
66
|
-
self.codec = avro_codec_raw.decode(
|
67
|
+
self.codec = avro_codec_raw.decode("utf-8")
|
67
68
|
if self.codec not in VALID_CODECS:
|
68
69
|
raise DataFileException(f"Unknown codec: {self.codec}.")
|
69
70
|
|
@@ -72,7 +73,7 @@ class AsyncDataFileReader(object): # pylint: disable=too-many-instance-attribut
|
|
72
73
|
|
73
74
|
# object_position is to support reading from current position in the future read,
|
74
75
|
# no need to downloading from the beginning of avro.
|
75
|
-
if hasattr(self._reader,
|
76
|
+
if hasattr(self._reader, "object_position"):
|
76
77
|
self.reader.track_object_position()
|
77
78
|
|
78
79
|
# header_reader indicates reader only has partial content. The reader doesn't have block header,
|
@@ -80,8 +81,7 @@ class AsyncDataFileReader(object): # pylint: disable=too-many-instance-attribut
|
|
80
81
|
# Also ChangeFeed only has codec==null, so use _raw_decoder is good.
|
81
82
|
if self._header_reader is not None:
|
82
83
|
self._datum_decoder = self._raw_decoder
|
83
|
-
self.datum_reader.writer_schema = (
|
84
|
-
schema.parse(self.get_meta(SCHEMA_KEY).decode('utf-8')))
|
84
|
+
self.datum_reader.writer_schema = schema.parse(self.get_meta(SCHEMA_KEY).decode("utf-8"))
|
85
85
|
return self
|
86
86
|
|
87
87
|
async def __aenter__(self):
|
@@ -129,7 +129,7 @@ class AsyncDataFileReader(object): # pylint: disable=too-many-instance-attribut
|
|
129
129
|
"""Reports the value of a given metadata key.
|
130
130
|
|
131
131
|
:param str key: Metadata key to report the value of.
|
132
|
-
:
|
132
|
+
:return: Value associated to the metadata key, as bytes.
|
133
133
|
:rtype: bytes
|
134
134
|
"""
|
135
135
|
return self._meta.get(key)
|
@@ -145,15 +145,15 @@ class AsyncDataFileReader(object): # pylint: disable=too-many-instance-attribut
|
|
145
145
|
header = await self.datum_reader.read_data(META_SCHEMA, header_decoder)
|
146
146
|
|
147
147
|
# check magic number
|
148
|
-
if header.get(
|
148
|
+
if header.get("magic") != MAGIC:
|
149
149
|
fail_msg = f"Not an Avro data file: {header.get('magic')} doesn't match {MAGIC!r}."
|
150
150
|
raise schema.AvroException(fail_msg)
|
151
151
|
|
152
152
|
# set metadata
|
153
|
-
self._meta = header[
|
153
|
+
self._meta = header["meta"]
|
154
154
|
|
155
155
|
# set sync marker
|
156
|
-
self._sync_marker = header[
|
156
|
+
self._sync_marker = header["sync"]
|
157
157
|
|
158
158
|
async def _read_block_header(self):
|
159
159
|
self._block_count = await self.raw_decoder.read_long()
|
@@ -182,7 +182,7 @@ class AsyncDataFileReader(object): # pylint: disable=too-many-instance-attribut
|
|
182
182
|
|
183
183
|
# object_position is to support reading from current position in the future read,
|
184
184
|
# no need to downloading from the beginning of avro file with this attr.
|
185
|
-
if hasattr(self._reader,
|
185
|
+
if hasattr(self._reader, "object_position"):
|
186
186
|
await self.reader.track_object_position()
|
187
187
|
self._cur_object_index = 0
|
188
188
|
|
@@ -195,7 +195,7 @@ class AsyncDataFileReader(object): # pylint: disable=too-many-instance-attribut
|
|
195
195
|
# object_position is to support reading from current position in the future read,
|
196
196
|
# This will track the index of the next item to be read.
|
197
197
|
# This will also track the offset before the next sync marker.
|
198
|
-
if hasattr(self._reader,
|
198
|
+
if hasattr(self._reader, "object_position"):
|
199
199
|
if self.block_count == 0:
|
200
200
|
# the next event to be read is at index 0 in the new chunk of blocks,
|
201
201
|
await self.reader.track_object_position()
|