flow.record 3.12.dev5__tar.gz → 3.13.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {flow.record-3.12.dev5/flow.record.egg-info → flow.record-3.13.dev2}/PKG-INFO +14 -3
  2. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/__init__.py +6 -0
  3. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/avro.py +12 -9
  4. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/stream.py +16 -13
  5. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/base.py +163 -86
  6. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/exceptions.py +4 -0
  7. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/fieldtypes/__init__.py +14 -2
  8. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/stream.py +1 -3
  9. flow.record-3.13.dev2/flow/record/version.py +16 -0
  10. {flow.record-3.12.dev5 → flow.record-3.13.dev2/flow.record.egg-info}/PKG-INFO +14 -3
  11. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow.record.egg-info/SOURCES.txt +1 -0
  12. flow.record-3.13.dev2/tests/test_avro.py +64 -0
  13. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_rdump.py +38 -1
  14. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_record_adapter.py +30 -0
  15. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_regression.py +7 -2
  16. flow.record-3.12.dev5/flow/record/version.py +0 -4
  17. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/COPYRIGHT +0 -0
  18. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/LICENSE +0 -0
  19. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/MANIFEST.in +0 -0
  20. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/README.md +0 -0
  21. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/examples/filesystem.py +0 -0
  22. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/examples/passivedns.py +0 -0
  23. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/examples/records.json +0 -0
  24. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/examples/tcpconn.py +0 -0
  25. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/__init__.py +0 -0
  26. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/archive.py +0 -0
  27. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/broker.py +0 -0
  28. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/csvfile.py +0 -0
  29. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/elastic.py +0 -0
  30. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/jsonfile.py +0 -0
  31. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/line.py +0 -0
  32. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/mongo.py +0 -0
  33. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/split.py +0 -0
  34. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/splunk.py +0 -0
  35. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/text.py +0 -0
  36. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/adapter/xlsx.py +0 -0
  37. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/fieldtypes/credential.py +0 -0
  38. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/fieldtypes/net/__init__.py +0 -0
  39. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/fieldtypes/net/ip.py +0 -0
  40. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/fieldtypes/net/ipv4.py +0 -0
  41. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/fieldtypes/net/tcp.py +0 -0
  42. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/fieldtypes/net/udp.py +0 -0
  43. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/jsonpacker.py +0 -0
  44. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/packer.py +0 -0
  45. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/selector.py +0 -0
  46. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/tools/__init__.py +0 -0
  47. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/tools/geoip.py +0 -0
  48. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/tools/rdump.py +0 -0
  49. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/utils.py +0 -0
  50. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow/record/whitelist.py +0 -0
  51. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow.record.egg-info/dependency_links.txt +0 -0
  52. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow.record.egg-info/entry_points.txt +0 -0
  53. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow.record.egg-info/requires.txt +0 -0
  54. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/flow.record.egg-info/top_level.txt +0 -0
  55. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/pyproject.toml +0 -0
  56. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/setup.cfg +0 -0
  57. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/__init__.py +0 -0
  58. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/_utils.py +0 -0
  59. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/docs/Makefile +0 -0
  60. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/docs/conf.py +0 -0
  61. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/docs/index.rst +0 -0
  62. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/selector_explain_example.py +0 -0
  63. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/standalone_test.py +0 -0
  64. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_avro_adapter.py +0 -0
  65. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_compiled_selector.py +0 -0
  66. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_deprecations.py +0 -0
  67. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_fieldtype_ip.py +0 -0
  68. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_fieldtypes.py +0 -0
  69. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_json_packer.py +0 -0
  70. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_json_record_adapter.py +0 -0
  71. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_multi_timestamp.py +0 -0
  72. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_packer.py +0 -0
  73. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_record.py +0 -0
  74. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_record_descriptor.py +0 -0
  75. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_selector.py +0 -0
  76. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/test_splunk_adapter.py +0 -0
  77. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tests/utils_inspect.py +0 -0
  78. {flow.record-3.12.dev5 → flow.record-3.13.dev2}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.12.dev5
3
+ Version: 3.13.dev2
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -18,13 +18,24 @@ Classifier: Topic :: Scientific/Engineering :: Information Analysis
18
18
  Classifier: Topic :: Utilities
19
19
  Requires-Python: ~=3.7
20
20
  Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ License-File: COPYRIGHT
23
+ Requires-Dist: msgpack>=0.5.2
24
+ Requires-Dist: backports.zoneinfo[tzdata]; python_version < "3.9"
25
+ Requires-Dist: tzdata; platform_system == "Windows"
21
26
  Provides-Extra: compression
27
+ Requires-Dist: lz4; extra == "compression"
28
+ Requires-Dist: zstandard; extra == "compression"
22
29
  Provides-Extra: elastic
30
+ Requires-Dist: elasticsearch; extra == "elastic"
23
31
  Provides-Extra: geoip
32
+ Requires-Dist: maxminddb; extra == "geoip"
24
33
  Provides-Extra: avro
34
+ Requires-Dist: fastavro[snappy]; extra == "avro"
25
35
  Provides-Extra: test
26
- License-File: LICENSE
27
- License-File: COPYRIGHT
36
+ Requires-Dist: lz4; extra == "test"
37
+ Requires-Dist: zstandard; extra == "test"
38
+ Requires-Dist: fastavro; extra == "test"
28
39
 
29
40
  # flow.record
30
41
 
@@ -3,6 +3,7 @@ import os
3
3
 
4
4
  from flow.record.base import (
5
5
  RECORD_VERSION,
6
+ RECORDSTREAM_MAGIC,
6
7
  DynamicDescriptor,
7
8
  FieldType,
8
9
  GroupedRecord,
@@ -16,7 +17,9 @@ from flow.record.base import (
16
17
  dynamic_fieldtype,
17
18
  extend_record,
18
19
  iter_timestamped_records,
20
+ open_file,
19
21
  open_path,
22
+ open_stream,
20
23
  stream,
21
24
  )
22
25
  from flow.record.jsonpacker import JsonRecordPacker
@@ -33,6 +36,7 @@ from flow.record.stream import (
33
36
 
34
37
  __all__ = [
35
38
  "RECORD_VERSION",
39
+ "RECORDSTREAM_MAGIC",
36
40
  "FieldType",
37
41
  "Record",
38
42
  "GroupedRecord",
@@ -47,7 +51,9 @@ __all__ = [
47
51
  "JsonRecordPacker",
48
52
  "RecordStreamWriter",
49
53
  "RecordStreamReader",
54
+ "open_file",
50
55
  "open_path",
56
+ "open_stream",
51
57
  "stream",
52
58
  "dynamic_fieldtype",
53
59
  "DynamicDescriptor",
@@ -1,6 +1,9 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  from datetime import datetime, timedelta, timezone
3
5
  from importlib.util import find_spec
6
+ from typing import Any, Iterator
4
7
 
5
8
  import fastavro
6
9
 
@@ -50,7 +53,7 @@ class AvroWriter(AbstractWriter):
50
53
  writer = None
51
54
 
52
55
  def __init__(self, path, key=None, **kwargs):
53
- self.fp = record.open_path(path, "wb")
56
+ self.fp = record.open_file(path, "wb")
54
57
 
55
58
  self.desc = None
56
59
  self.schema = None
@@ -58,7 +61,7 @@ class AvroWriter(AbstractWriter):
58
61
  self.writer = None
59
62
  self.codec = "snappy" if find_spec("snappy") else "deflate"
60
63
 
61
- def write(self, r):
64
+ def write(self, r: record.Record) -> None:
62
65
  if not self.desc:
63
66
  self.desc = r._desc
64
67
  self.schema = descriptor_to_schema(self.desc)
@@ -79,7 +82,7 @@ class AvroWriter(AbstractWriter):
79
82
  )
80
83
  self.writer.flush()
81
84
 
82
- def close(self):
85
+ def close(self) -> None:
83
86
  if self.fp and not is_stdout(self.fp):
84
87
  self.fp.close()
85
88
  self.fp = None
@@ -90,7 +93,7 @@ class AvroReader(AbstractReader):
90
93
  fp = None
91
94
 
92
95
  def __init__(self, path, selector=None, **kwargs):
93
- self.fp = record.open_path(path, "rb")
96
+ self.fp = record.open_file(path, "rb")
94
97
  self.selector = make_selector(selector)
95
98
 
96
99
  self.reader = fastavro.reader(self.fp)
@@ -105,7 +108,7 @@ class AvroReader(AbstractReader):
105
108
  name for name, field in self.desc.get_all_fields().items() if field.typename == "datetime"
106
109
  )
107
110
 
108
- def __iter__(self):
111
+ def __iter__(self) -> Iterator[record.Record]:
109
112
  for obj in self.reader:
110
113
  # Convert timestamp-micros fields back to datetime fields
111
114
  for field_name in self.datetime_fields:
@@ -117,13 +120,13 @@ class AvroReader(AbstractReader):
117
120
  if not self.selector or self.selector.match(rec):
118
121
  yield rec
119
122
 
120
- def close(self):
123
+ def close(self) -> None:
121
124
  if self.fp:
122
125
  self.fp.close()
123
126
  self.fp = None
124
127
 
125
128
 
126
- def descriptor_to_schema(desc):
129
+ def descriptor_to_schema(desc: record.RecordDescriptor) -> dict[str, Any]:
127
130
  namespace, _, name = desc.name.rpartition("/")
128
131
  schema = {
129
132
  "type": "record",
@@ -156,7 +159,7 @@ def descriptor_to_schema(desc):
156
159
  return schema
157
160
 
158
161
 
159
- def schema_to_descriptor(schema):
162
+ def schema_to_descriptor(schema: dict) -> record.RecordDescriptor:
160
163
  doc = schema.get("doc")
161
164
 
162
165
  # Sketchy record descriptor detection
@@ -178,7 +181,7 @@ def schema_to_descriptor(schema):
178
181
  return record.RecordDescriptor(name, fields)
179
182
 
180
183
 
181
- def avro_type_to_flow_type(ftype):
184
+ def avro_type_to_flow_type(ftype: list) -> str:
182
185
  ftypes = [ftype] if not isinstance(ftype, list) else ftype
183
186
 
184
187
  # If a field can be null, it has an additional type of "null"
@@ -1,5 +1,8 @@
1
- from flow import record
1
+ from typing import Iterator, Union
2
+
3
+ from flow.record import Record, RecordOutput, RecordStreamReader, open_file, open_path
2
4
  from flow.record.adapter import AbstractReader, AbstractWriter
5
+ from flow.record.selector import Selector
3
6
  from flow.record.utils import is_stdout
4
7
 
5
8
  __usage__ = """
@@ -15,20 +18,20 @@ class StreamWriter(AbstractWriter):
15
18
  fp = None
16
19
  stream = None
17
20
 
18
- def __init__(self, path, clobber=True, **kwargs):
19
- self.fp = record.open_path(path, "wb", clobber=clobber)
20
- self.stream = record.RecordOutput(self.fp)
21
+ def __init__(self, path: str, clobber=True, **kwargs):
22
+ self.fp = open_path(path, "wb", clobber=clobber)
23
+ self.stream = RecordOutput(self.fp)
21
24
 
22
- def write(self, r):
23
- self.stream.write(r)
25
+ def write(self, record: Record) -> None:
26
+ self.stream.write(record)
24
27
 
25
- def flush(self):
28
+ def flush(self) -> None:
26
29
  if self.stream and hasattr(self.stream, "flush"):
27
30
  self.stream.flush()
28
31
  if self.fp:
29
32
  self.fp.flush()
30
33
 
31
- def close(self):
34
+ def close(self) -> None:
32
35
  if self.stream:
33
36
  self.stream.close()
34
37
  self.stream = None
@@ -42,14 +45,14 @@ class StreamReader(AbstractReader):
42
45
  fp = None
43
46
  stream = None
44
47
 
45
- def __init__(self, path, selector=None, **kwargs):
46
- self.fp = record.open_path(path, "rb")
47
- self.stream = record.RecordStreamReader(self.fp, selector=selector)
48
+ def __init__(self, path: str, selector: Union[str, Selector] = None, **kwargs):
49
+ self.fp = open_file(path, "rb")
50
+ self.stream = RecordStreamReader(self.fp, selector=selector)
48
51
 
49
- def __iter__(self):
52
+ def __iter__(self) -> Iterator[Record]:
50
53
  return iter(self.stream)
51
54
 
52
- def close(self):
55
+ def close(self) -> None:
53
56
  if self.stream:
54
57
  self.stream.close()
55
58
  self.stream = None
@@ -14,10 +14,12 @@ import sys
14
14
  import warnings
15
15
  from datetime import datetime, timezone
16
16
  from itertools import zip_longest
17
- from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple
17
+ from pathlib import Path
18
+ from typing import IO, Any, BinaryIO, Iterator, Mapping, Optional, Sequence, Union
18
19
  from urllib.parse import parse_qsl, urlparse
19
20
 
20
- from .exceptions import RecordDescriptorError
21
+ from flow.record.adapter import AbstractReader, AbstractWriter
22
+ from flow.record.exceptions import RecordAdapterNotFound, RecordDescriptorError
21
23
 
22
24
  try:
23
25
  import lz4.frame as lz4
@@ -38,6 +40,13 @@ try:
38
40
  except ImportError:
39
41
  HAS_ZSTD = False
40
42
 
43
+ try:
44
+ import fastavro as avro # noqa
45
+
46
+ HAS_AVRO = True
47
+ except ImportError:
48
+ HAS_AVRO = False
49
+
41
50
  from collections import OrderedDict
42
51
 
43
52
  from .utils import to_native_str, to_str
@@ -63,6 +72,10 @@ GZIP_MAGIC = b"\x1f\x8b"
63
72
  BZ2_MAGIC = b"BZh"
64
73
  LZ4_MAGIC = b"\x04\x22\x4d\x18"
65
74
  ZSTD_MAGIC = b"\x28\xb5\x2f\xfd"
75
+ AVRO_MAGIC = b"Obj"
76
+
77
+ RECORDSTREAM_MAGIC = b"RECORDSTREAM\n"
78
+ RECORDSTREAM_MAGIC_DEPTH = 4 + 2 + len(RECORDSTREAM_MAGIC)
66
79
 
67
80
  RE_VALID_FIELD_NAME = re.compile(r"^_?[a-zA-Z][a-zA-Z0-9_]*(?:\[\])?$")
68
81
  RE_VALID_RECORD_TYPE_NAME = re.compile("^[a-zA-Z][a-zA-Z0-9_]*(/[a-zA-Z][a-zA-Z0-9_]*)*$")
@@ -83,37 +96,6 @@ class {name}(Record):
83
96
  """
84
97
 
85
98
 
86
- class Peekable:
87
- """Wrapper class for adding .peek() to a file object."""
88
-
89
- def __init__(self, fd):
90
- self.fd = fd
91
- self.buffer = None
92
-
93
- def peek(self, size):
94
- if self.buffer is not None:
95
- raise BufferError("Only 1 peek allowed")
96
- data = self.fd.read(size)
97
- self.buffer = io.BytesIO(data)
98
- return data
99
-
100
- def read(self, size=None):
101
- data = b""
102
- if self.buffer is None:
103
- data = self.fd.read(size)
104
- else:
105
- data = self.buffer.read(size)
106
- if len(data) < size:
107
- data += self.fd.read(size - len(data))
108
- self.buffer = None
109
- return data
110
-
111
- def close(self):
112
- self.buffer = None
113
- self.fd.close()
114
- self.fd = None
115
-
116
-
117
99
  class FieldType:
118
100
  def _typename(self):
119
101
  t = type(self)
@@ -339,7 +321,7 @@ class RecordFieldSet(list):
339
321
 
340
322
 
341
323
  @functools.lru_cache(maxsize=4096)
342
- def _generate_record_class(name: str, fields: Tuple[Tuple[str, str]]) -> type:
324
+ def _generate_record_class(name: str, fields: tuple[tuple[str, str]]) -> type:
343
325
  """Generate a record class
344
326
 
345
327
  Args:
@@ -442,9 +424,9 @@ class RecordDescriptor:
442
424
  _desc_hash: int = None
443
425
  _fields: Mapping[str, RecordField] = None
444
426
  _all_fields: Mapping[str, RecordField] = None
445
- _field_tuples: Sequence[Tuple[str, str]] = None
427
+ _field_tuples: Sequence[tuple[str, str]] = None
446
428
 
447
- def __init__(self, name: str, fields: Optional[Sequence[Tuple[str, str]]] = None):
429
+ def __init__(self, name: str, fields: Optional[Sequence[tuple[str, str]]] = None):
448
430
  if not name:
449
431
  raise RecordDescriptorError("Record name is required")
450
432
 
@@ -548,7 +530,7 @@ class RecordDescriptor:
548
530
  """Create a new Record initialized with `args` and `kwargs`."""
549
531
  return self.recordType(*args, **kwargs)
550
532
 
551
- def init_from_dict(self, rdict: Dict[str, Any], raise_unknown=False) -> Record:
533
+ def init_from_dict(self, rdict: dict[str, Any], raise_unknown=False) -> Record:
552
534
  """Create a new Record initialized with key, value pairs from `rdict`.
553
535
 
554
536
  If `raise_unknown=True` then fields on `rdict` that are unknown to this
@@ -575,7 +557,7 @@ class RecordDescriptor:
575
557
  """
576
558
  return self.init_from_dict(record._asdict(), raise_unknown=raise_unknown)
577
559
 
578
- def extend(self, fields: Sequence[Tuple[str, str]]) -> RecordDescriptor:
560
+ def extend(self, fields: Sequence[tuple[str, str]]) -> RecordDescriptor:
579
561
  """Returns a new RecordDescriptor with the extended fields
580
562
 
581
563
  Returns:
@@ -584,7 +566,7 @@ class RecordDescriptor:
584
566
  new_fields = list(self.get_field_tuples()) + fields
585
567
  return RecordDescriptor(self.name, new_fields)
586
568
 
587
- def get_field_tuples(self) -> Tuple[Tuple[str, str]]:
569
+ def get_field_tuples(self) -> tuple[tuple[str, str]]:
588
570
  """Returns a tuple containing the (typename, name) tuples, eg:
589
571
 
590
572
  (('boolean', 'foo'), ('string', 'bar'))
@@ -596,7 +578,7 @@ class RecordDescriptor:
596
578
 
597
579
  @staticmethod
598
580
  @functools.lru_cache(maxsize=256)
599
- def calc_descriptor_hash(name, fields: Sequence[Tuple[str, str]]) -> int:
581
+ def calc_descriptor_hash(name, fields: Sequence[tuple[str, str]]) -> int:
600
582
  """Calculate and return the (cached) descriptor hash as a 32 bit integer.
601
583
 
602
584
  The descriptor hash is the first 4 bytes of the sha256sum of the descriptor name and field names and types.
@@ -612,7 +594,7 @@ class RecordDescriptor:
612
594
  return self._desc_hash
613
595
 
614
596
  @property
615
- def identifier(self) -> Tuple[str, int]:
597
+ def identifier(self) -> tuple[str, int]:
616
598
  """Returns a tuple containing the descriptor name and hash"""
617
599
  return (self.name, self.descriptor_hash)
618
600
 
@@ -650,11 +632,11 @@ class RecordDescriptor:
650
632
 
651
633
  return wrapper
652
634
 
653
- def _pack(self) -> Tuple[str, Tuple[Tuple[str, str]]]:
635
+ def _pack(self) -> tuple[str, tuple[tuple[str, str]]]:
654
636
  return (self.name, self._field_tuples)
655
637
 
656
638
  @staticmethod
657
- def _unpack(name, fields: Tuple[Tuple[str, str]]) -> RecordDescriptor:
639
+ def _unpack(name, fields: tuple[tuple[str, str]]) -> RecordDescriptor:
658
640
  return RecordDescriptor(name, fields)
659
641
 
660
642
 
@@ -662,17 +644,66 @@ def DynamicDescriptor(name, fields):
662
644
  return RecordDescriptor(name, [("dynamic", field) for field in fields])
663
645
 
664
646
 
665
- def open_path(path, mode, clobber=True):
647
+ def open_stream(fp: BinaryIO, mode: str) -> BinaryIO:
648
+ if not hasattr(fp, "peek"):
649
+ fp = io.BufferedReader(fp)
650
+
651
+ # We peek into the file at the maximum possible length we might need, which is the amount of bytes needed to
652
+ # determine whether a stream is a RECORDSTREAM or not.
653
+ peek_data = fp.peek(RECORDSTREAM_MAGIC_DEPTH)
654
+
655
+ # If the data stream is compressed, we wrap the file pointer in a reader that can decompress accordingly.
656
+ if peek_data[:2] == GZIP_MAGIC:
657
+ fp = gzip.GzipFile(fileobj=fp, mode=mode)
658
+ elif HAS_BZ2 and peek_data[:3] == BZ2_MAGIC:
659
+ fp = bz2.BZ2File(fp, mode=mode)
660
+ elif HAS_LZ4 and peek_data[:4] == LZ4_MAGIC:
661
+ fp = lz4.open(fp, mode=mode)
662
+ elif HAS_ZSTD and peek_data[:4] == ZSTD_MAGIC:
663
+ dctx = zstd.ZstdDecompressor()
664
+ fp = dctx.stream_reader(fp)
665
+
666
+ return fp
667
+
668
+
669
+ def find_adapter_for_stream(fp: BinaryIO) -> tuple[BinaryIO, Optional[str]]:
670
+ # We need to peek into the stream to be able to determine which adapter is needed. The fp given to this function
671
+ # might already be an instance of the 'Peekable' class, but might also be a different file pointer, for example
672
+ # a transparent decompressor. As calling peek() twice on the same peekable is not allowed, we wrap the fp into
673
+ # a Peekable again, so that we are able to determine the correct adapter.
674
+ if not hasattr(fp, "peek"):
675
+ fp = io.BufferedReader(fp)
676
+
677
+ peek_data = fp.peek(RECORDSTREAM_MAGIC_DEPTH)
678
+ if HAS_AVRO and peek_data[:3] == AVRO_MAGIC:
679
+ return fp, "avro"
680
+ elif RECORDSTREAM_MAGIC in peek_data[:RECORDSTREAM_MAGIC_DEPTH]:
681
+ return fp, "stream"
682
+ return fp, None
683
+
684
+
685
+ def open_file(path: Union[str, Path, BinaryIO], mode: str, clobber: bool = True) -> IO:
686
+ if isinstance(path, Path):
687
+ path = str(path)
688
+ if isinstance(path, str):
689
+ return open_path(path, mode, clobber)
690
+ elif isinstance(path, io.IOBase):
691
+ return open_stream(path, "rb")
692
+ else:
693
+ raise ValueError(f"Unsupported path type {path}")
694
+
695
+
696
+ def open_path(path: str, mode: str, clobber: bool = True) -> IO:
666
697
  """
667
- Open `path` using `mode` and returns a file object.
698
+ Open ``path`` using ``mode`` and returns a file object.
668
699
 
669
700
  It handles special cases if path is meant to be stdin or stdout.
670
701
  And also supports compression based on extension or file header of stream.
671
702
 
672
703
  Args:
673
- path (str): Filename or path to filename to open
674
- mode (str): Could be "r", "rb" to open file for reading, "w", "wb" for writing
675
- clobber (bool): Overwrite file if it already exists if `clobber=True`, else raises IOError.
704
+ path: Filename or path to filename to open
705
+ mode: Could be "r", "rb" to open file for reading, "w", "wb" for writing
706
+ clobber: Overwrite file if it already exists if `clobber=True`, else raises IOError.
676
707
 
677
708
  """
678
709
  binary = "b" in mode
@@ -724,24 +755,18 @@ def open_path(path, mode, clobber=True):
724
755
  fp = io.open(path, mode)
725
756
  # check if we are reading a compressed stream
726
757
  if not out and binary:
727
- if not hasattr(fp, "peek"):
728
- fp = Peekable(fp)
729
- peek_data = fp.peek(4)
730
- if peek_data[:2] == GZIP_MAGIC:
731
- fp = gzip.GzipFile(fileobj=fp, mode=mode)
732
- elif HAS_BZ2 and peek_data[:3] == BZ2_MAGIC:
733
- fp = bz2.BZ2File(fp, mode=mode)
734
- elif HAS_LZ4 and peek_data[:4] == LZ4_MAGIC:
735
- fp = lz4.open(fp, mode=mode)
736
- elif HAS_ZSTD and peek_data[:4] == ZSTD_MAGIC:
737
- dctx = zstd.ZstdDecompressor()
738
- fp = dctx.stream_reader(fp)
758
+ fp = open_stream(fp, mode)
739
759
  return fp
740
760
 
741
761
 
742
- def RecordAdapter(url, out, selector=None, clobber=True, **kwargs):
743
- url = str(url or "")
744
-
762
+ def RecordAdapter(
763
+ url: Optional[str] = None,
764
+ out: bool = False,
765
+ selector: Optional[str] = None,
766
+ clobber: bool = True,
767
+ fileobj: Optional[BinaryIO] = None,
768
+ **kwargs,
769
+ ) -> Union[AbstractWriter, AbstractReader]:
745
770
  # Guess adapter based on extension
746
771
  ext_to_adapter = {
747
772
  ".avro": "avro",
@@ -749,42 +774,94 @@ def RecordAdapter(url, out, selector=None, clobber=True, **kwargs):
749
774
  ".jsonl": "jsonfile",
750
775
  ".csv": "csvfile",
751
776
  }
752
- _, ext = os.path.splitext(url)
753
-
754
- adapter_scheme = ext_to_adapter.get(ext, "stream")
755
- if "://" not in url:
756
- url = f"{adapter_scheme}://{url}"
757
-
758
- p = urlparse(url, scheme=adapter_scheme)
759
- adapter, _, sub_adapter = p.scheme.partition("+")
760
-
777
+ cls_stream = None
778
+ cls_url = None
779
+ adapter = None
780
+
781
+ # When a url is given, we interpret it to determine what kind of adapter we need. This piece of logic is always
782
+ # necessary for the RecordWriter (as it does not currently support file-like objects), and only needed for
783
+ # RecordReader if a url is provided.
784
+ if out is True or url not in ("-", "", None):
785
+ # Either stdout / stdin is given, or a path-like string.
786
+ url = str(url or "")
787
+ _, ext = os.path.splitext(url)
788
+
789
+ adapter_scheme = ext_to_adapter.get(ext, "stream")
790
+ if "://" not in url:
791
+ url = f"{adapter_scheme}://{url}"
792
+ p = urlparse(url, scheme=adapter_scheme)
793
+ adapter, _, sub_adapter = p.scheme.partition("+")
794
+
795
+ arg_dict = dict(parse_qsl(p.query))
796
+ arg_dict.update(kwargs)
797
+
798
+ cls_url = p.netloc + p.path
799
+ if sub_adapter:
800
+ cls_url = sub_adapter + "://" + cls_url
801
+ elif url in ("-", ""):
802
+ # For reading stdin, we cannot rely on an extension to know what sort of stream is incoming. Thus, we will treat
803
+ # it as a 'fileobj', where we can peek into the stream and try to select the appropriate adapter.
804
+ fileobj = getattr(sys.stdin, "buffer", sys.stdin)
805
+ if fileobj is not None:
806
+ # This record adapter has received a file-like object for record reading
807
+ # We just need to find the right adapter by peeking into the first few bytes.
808
+
809
+ # First, we open the stream. If the stream is compressed, open_stream will wrap it for us into a decompressor.
810
+ cls_stream = open_stream(fileobj, "rb")
811
+
812
+ # Now, we have a stream that will be transparently decompressed but we still do not know what adapter to use.
813
+ # This requires a new peek into the transparent stream. This peek will cause the stream pointer to be moved.
814
+ # Therefore, find_adapter_for_stream returns both a BinaryIO-supportive object that can correctly read the
815
+ # adjusted stream, and a string indicating the type of adapter to be used on said stream.
816
+ arg_dict = kwargs.copy()
817
+
818
+ # If a user did not provide a url, we have to peek into the stream to be able to determine the right adapter
819
+ # based on magic bytes encountered in the first few bytes of the stream.
820
+ if adapter is None:
821
+ cls_stream, adapter = find_adapter_for_stream(cls_stream)
822
+ if adapter is None:
823
+ peek_data = cls_stream.peek(RECORDSTREAM_MAGIC_DEPTH)
824
+ if peek_data and peek_data.startswith(b"<"):
825
+ # As peek() can result in a larger buffer than requested, we make sure the peek_data variable isn't
826
+ # unnecessarily long in the error message.
827
+ peek_data = peek_data[:RECORDSTREAM_MAGIC_DEPTH]
828
+ raise RecordAdapterNotFound(
829
+ (
830
+ f"Could not find a reader for input {peek_data!r}. Are you perhaps "
831
+ "entering record text, rather than a record stream? This can be fixed by using "
832
+ "'rdump -w -' to write a record stream to stdout."
833
+ )
834
+ )
835
+ raise RecordAdapterNotFound("Could not find adapter for file-like object")
836
+
837
+ # Now that we know which adapter is needed, we import it.
761
838
  mod = importlib.import_module("flow.record.adapter.{}".format(adapter))
762
-
763
839
  clsname = ("{}Writer" if out else "{}Reader").format(adapter.title())
764
840
 
765
841
  cls = getattr(mod, clsname)
766
- arg_dict = dict(parse_qsl(p.query))
767
- arg_dict.update(kwargs)
768
- cls_url = p.netloc + p.path
769
- if sub_adapter:
770
- cls_url = sub_adapter + "://" + cls_url
771
-
772
842
  if not out and selector:
773
843
  arg_dict["selector"] = selector
774
844
 
775
845
  if out:
776
846
  arg_dict["clobber"] = clobber
777
-
778
847
  log.debug("Creating {!r} for {!r} with args {!r}".format(cls, url, arg_dict))
848
+
849
+ if fileobj is not None:
850
+ return cls(cls_stream, **arg_dict)
779
851
  return cls(cls_url, **arg_dict)
780
852
 
781
853
 
782
- def RecordReader(url=None, selector=None, **kwargs):
783
- return RecordAdapter(url, False, selector=selector, **kwargs)
854
+ def RecordReader(
855
+ url: Optional[str] = None,
856
+ selector: Optional[str] = None,
857
+ fileobj: Optional[BinaryIO] = None,
858
+ **kwargs,
859
+ ) -> AbstractReader:
860
+ return RecordAdapter(url=url, out=False, selector=selector, fileobj=fileobj, **kwargs)
784
861
 
785
862
 
786
- def RecordWriter(url=None, clobber=True, **kwargs):
787
- return RecordAdapter(url, True, clobber=clobber, **kwargs)
863
+ def RecordWriter(url: Optional[str] = None, clobber: bool = True, **kwargs) -> AbstractWriter:
864
+ return RecordAdapter(url=url, out=True, clobber=clobber, **kwargs)
788
865
 
789
866
 
790
867
  def stream(src, dst):
@@ -834,7 +911,7 @@ def fieldtype(clspath: str) -> FieldType:
834
911
 
835
912
  @functools.lru_cache(maxsize=4069)
836
913
  def merge_record_descriptors(
837
- descriptors: Tuple[RecordDescriptor], replace: bool = False, name: Optional[str] = None
914
+ descriptors: tuple[RecordDescriptor], replace: bool = False, name: Optional[str] = None
838
915
  ) -> RecordDescriptor:
839
916
  """Create a newly merged RecordDescriptor from a list of RecordDescriptors.
840
917
  This function uses a cache to avoid creating the same descriptor multiple times.
@@ -861,7 +938,7 @@ def merge_record_descriptors(
861
938
 
862
939
 
863
940
  def extend_record(
864
- record: Record, other_records: List[Record], replace: bool = False, name: Optional[str] = None
941
+ record: Record, other_records: list[Record], replace: bool = False, name: Optional[str] = None
865
942
  ) -> Record:
866
943
  """Extend ``record`` with fields and values from ``other_records``.
867
944
 
@@ -4,3 +4,7 @@ class RecordDescriptorError(Exception):
4
4
 
5
5
  class RecordDescriptorNotFound(Exception):
6
6
  """The specified record descriptor could not be found"""
7
+
8
+
9
+ class RecordAdapterNotFound(Exception):
10
+ """Could not find a fitting RecordAdapter for a given input"""
@@ -15,9 +15,14 @@ from typing import Any, Optional, Tuple
15
15
  from urllib.parse import urlparse
16
16
 
17
17
  try:
18
- from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
18
+ try:
19
+ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
20
+ except ImportError:
21
+ from backports.zoneinfo import ZoneInfo, ZoneInfoNotFoundError
22
+ HAS_ZONE_INFO = True
19
23
  except ImportError:
20
- from backports.zoneinfo import ZoneInfo, ZoneInfoNotFoundError
24
+ HAS_ZONE_INFO = False
25
+
21
26
 
22
27
  from flow.record.base import FieldType
23
28
 
@@ -50,9 +55,16 @@ def flow_record_tz(*, default_tz: str = "UTC") -> Optional[ZoneInfo | UTC]:
50
55
  Returns:
51
56
  None if ``FLOW_RECORD_TZ=NONE`` otherwise ``ZoneInfo(FLOW_RECORD_TZ)`` or ``UTC`` if ZoneInfo is not found.
52
57
  """
58
+
53
59
  tz = os.environ.get("FLOW_RECORD_TZ", default_tz)
54
60
  if tz.upper() == "NONE":
55
61
  return None
62
+
63
+ if not HAS_ZONE_INFO:
64
+ if tz != "UTC":
65
+ warnings.warn("Cannot use FLOW_RECORD_TZ due to missing zoneinfo module, defaulting to 'UTC'.")
66
+ return UTC
67
+
56
68
  try:
57
69
  return ZoneInfo(tz)
58
70
  except ZoneInfoNotFoundError as exc:
@@ -8,7 +8,7 @@ import sys
8
8
  from collections import ChainMap
9
9
  from functools import lru_cache
10
10
 
11
- from flow.record import RecordWriter
11
+ from flow.record import RECORDSTREAM_MAGIC, RecordWriter
12
12
  from flow.record.fieldtypes import fieldtype_for_value
13
13
  from flow.record.selector import make_selector
14
14
 
@@ -17,8 +17,6 @@ from .packer import RecordPacker
17
17
 
18
18
  log = logging.getLogger(__package__)
19
19
 
20
- RECORDSTREAM_MAGIC = b"RECORDSTREAM\n"
21
-
22
20
 
23
21
  def RecordOutput(fp):
24
22
  """Return a RecordPrinter if `fp` is a tty otherwise a RecordStreamWriter."""
@@ -0,0 +1,16 @@
1
+ # file generated by setuptools_scm
2
+ # don't change, don't track in version control
3
+ TYPE_CHECKING = False
4
+ if TYPE_CHECKING:
5
+ from typing import Tuple, Union
6
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
7
+ else:
8
+ VERSION_TUPLE = object
9
+
10
+ version: str
11
+ __version__: str
12
+ __version_tuple__: VERSION_TUPLE
13
+ version_tuple: VERSION_TUPLE
14
+
15
+ __version__ = version = '3.13.dev2'
16
+ __version_tuple__ = version_tuple = (3, 13, 'dev2')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.12.dev5
3
+ Version: 3.13.dev2
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -18,13 +18,24 @@ Classifier: Topic :: Scientific/Engineering :: Information Analysis
18
18
  Classifier: Topic :: Utilities
19
19
  Requires-Python: ~=3.7
20
20
  Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ License-File: COPYRIGHT
23
+ Requires-Dist: msgpack>=0.5.2
24
+ Requires-Dist: backports.zoneinfo[tzdata]; python_version < "3.9"
25
+ Requires-Dist: tzdata; platform_system == "Windows"
21
26
  Provides-Extra: compression
27
+ Requires-Dist: lz4; extra == "compression"
28
+ Requires-Dist: zstandard; extra == "compression"
22
29
  Provides-Extra: elastic
30
+ Requires-Dist: elasticsearch; extra == "elastic"
23
31
  Provides-Extra: geoip
32
+ Requires-Dist: maxminddb; extra == "geoip"
24
33
  Provides-Extra: avro
34
+ Requires-Dist: fastavro[snappy]; extra == "avro"
25
35
  Provides-Extra: test
26
- License-File: LICENSE
27
- License-File: COPYRIGHT
36
+ Requires-Dist: lz4; extra == "test"
37
+ Requires-Dist: zstandard; extra == "test"
38
+ Requires-Dist: fastavro; extra == "test"
28
39
 
29
40
  # flow.record
30
41
 
@@ -52,6 +52,7 @@ tests/__init__.py
52
52
  tests/_utils.py
53
53
  tests/selector_explain_example.py
54
54
  tests/standalone_test.py
55
+ tests/test_avro.py
55
56
  tests/test_avro_adapter.py
56
57
  tests/test_compiled_selector.py
57
58
  tests/test_deprecations.py
@@ -0,0 +1,64 @@
1
+ from io import BytesIO
2
+
3
+ import pytest
4
+
5
+ from flow.record import RecordDescriptor, RecordReader
6
+ from flow.record.adapter.avro import AvroReader, AvroWriter
7
+ from flow.record.base import HAS_AVRO
8
+
9
+
10
+ def generate_records(amount):
11
+ TestRecordWithFooBar = RecordDescriptor(
12
+ "test/record",
13
+ [
14
+ ("string", "name"),
15
+ ("string", "foo"),
16
+ ("string", "bar"),
17
+ ],
18
+ )
19
+ for i in range(amount):
20
+ yield TestRecordWithFooBar(name=f"record{i}", foo="bar", bar="baz")
21
+
22
+
23
+ def test_writing_reading_avrofile(tmp_path):
24
+ if not HAS_AVRO:
25
+ pytest.skip("fastavro module not installed")
26
+ avro_path = tmp_path / "test.avro"
27
+
28
+ out = AvroWriter(avro_path)
29
+ for rec in generate_records(100):
30
+ out.write(rec)
31
+ out.close()
32
+
33
+ reader = AvroReader(avro_path)
34
+ for index, rec in enumerate(reader):
35
+ assert rec.name == f"record{index}"
36
+ assert rec.foo == "bar"
37
+ assert rec.bar == "baz"
38
+
39
+
40
+ def test_avrostream_filelike_object(tmp_path):
41
+ if not HAS_AVRO:
42
+ pytest.skip("fastavro module not installed")
43
+ avro_path = tmp_path / "test.avro"
44
+
45
+ out = AvroWriter(avro_path)
46
+ for rec in generate_records(100):
47
+ out.write(rec)
48
+ out.close()
49
+
50
+ with open(avro_path, "rb") as avro_file:
51
+ avro_buffer = avro_file.read()
52
+
53
+ avro_io = BytesIO(avro_buffer)
54
+
55
+ reader = RecordReader(fileobj=avro_io)
56
+
57
+ # The record reader should automatically have created an 'AvroReader' to handle the Avro Record Stream
58
+ assert isinstance(reader, AvroReader)
59
+
60
+ # Verify if selector worked and records are the same
61
+ for index, rec in enumerate(reader):
62
+ assert rec.name == f"record{index}"
63
+ assert rec.foo == "bar"
64
+ assert rec.bar == "baz"
@@ -67,7 +67,7 @@ def test_rdump_pipe(tmp_path):
67
67
  )
68
68
  stdout, stderr = p2.communicate()
69
69
  assert stdout.strip() == b""
70
- assert b"Unknown file format, not a RecordStream" in stderr.strip()
70
+ assert b"Are you perhaps entering record text, rather than a record stream?" in stderr.strip()
71
71
 
72
72
  # rdump test.records -w - | rdump -s 'r.count in (1, 3, 9)' -w filtered.records
73
73
  path2 = tmp_path / "filtered.records"
@@ -461,6 +461,43 @@ def test_rdump_headerless_csv(tmp_path, capsysbinary):
461
461
  ]
462
462
 
463
463
 
464
+ def test_rdump_stdin_peek(tmp_path):
465
+ if platform.system() == "Windows":
466
+ pytest.skip("No Gzip on Windows")
467
+
468
+ TestRecord = RecordDescriptor(
469
+ "test/record",
470
+ [
471
+ ("varint", "count"),
472
+ ("string", "foo"),
473
+ ],
474
+ )
475
+
476
+ path = tmp_path / "test.records"
477
+ writer = RecordWriter(path)
478
+ # generate some test records
479
+ for i in range(10):
480
+ writer.write(TestRecord(count=i, foo="bar"))
481
+ writer.close()
482
+
483
+ # Gzip compress records file
484
+ compress_cmd = ["gzip", "--keep", str(path)]
485
+ subprocess.check_output(compress_cmd)
486
+ compressed_path = str(path) + ".gz"
487
+
488
+ # Rdump should transparently decompress and select the correct adapter
489
+ p1 = subprocess.Popen(["cat", compressed_path], stdout=subprocess.PIPE)
490
+ p2 = subprocess.Popen(
491
+ ["rdump", "-s", "r.count == 5"],
492
+ stdin=p1.stdout,
493
+ stdout=subprocess.PIPE,
494
+ stderr=subprocess.PIPE,
495
+ )
496
+ stdout, _ = p2.communicate()
497
+
498
+ assert stdout.strip() in (b"<test/record count=5 foo='bar'>", b"<test/record count=5L foo=u'bar'>")
499
+
500
+
464
501
  @pytest.mark.parametrize(
465
502
  ("total_records", "count", "skip", "expected_numbers"),
466
503
  [
@@ -18,6 +18,7 @@ from flow.record import (
18
18
  RecordStreamReader,
19
19
  RecordWriter,
20
20
  )
21
+ from flow.record.adapter.stream import StreamReader
21
22
  from flow.record.base import (
22
23
  BZ2_MAGIC,
23
24
  GZIP_MAGIC,
@@ -46,6 +47,26 @@ def test_stream_writer_reader():
46
47
  assert set([2, 7]) == set([r.number for r in records])
47
48
 
48
49
 
50
+ def test_recordstream_filelike_object():
51
+ fp = StringIO()
52
+ out = RecordOutput(fp)
53
+ for rec in generate_records():
54
+ out.write(rec)
55
+
56
+ fp.seek(0)
57
+ reader = RecordReader(fileobj=fp, selector="r.number in (6, 9)")
58
+
59
+ # The record reader should automatically have created a 'StreamReader' to handle the Record Stream.
60
+ assert isinstance(reader, StreamReader)
61
+
62
+ # Verify if selector worked and records are the same
63
+ records = []
64
+ for rec in reader:
65
+ records.append(rec)
66
+
67
+ assert set([6, 9]) == set([r.number for r in records])
68
+
69
+
49
70
  @pytest.mark.parametrize("PSelector", [Selector, CompiledSelector])
50
71
  def test_file_writer_reader(tmpdir, PSelector):
51
72
  p = tmpdir.join("test.records")
@@ -104,6 +125,15 @@ def test_compressed_writer_reader(tmpdir, compression):
104
125
 
105
126
  assert numbers == list(range(count))
106
127
 
128
+ # Using a file-handle instead of a path should also work
129
+ with open(path, "rb") as fh:
130
+ reader = RecordReader(fileobj=fh)
131
+ numbers = []
132
+ for rec in reader:
133
+ numbers.append(rec.number)
134
+
135
+ assert numbers == list(range(count))
136
+
107
137
 
108
138
  def test_path_template_writer(tmpdir):
109
139
  TestRecord = RecordDescriptor(
@@ -5,7 +5,8 @@ import pathlib
5
5
  import subprocess
6
6
  import sys
7
7
  from datetime import datetime, timezone
8
- from unittest.mock import mock_open, patch
8
+ from io import BytesIO
9
+ from unittest.mock import MagicMock, mock_open, patch
9
10
 
10
11
  import msgpack
11
12
  import pytest
@@ -589,7 +590,11 @@ def test_record_adapter_windows_path(tmp_path):
589
590
  writer.write(TestRecord("foo"))
590
591
  writer.write(TestRecord("bar"))
591
592
 
592
- with patch("io.open", mock_open(read_data=path_records.read_bytes())) as m:
593
+ test_read_buf = BytesIO(path_records.read_bytes())
594
+ mock_reader = MagicMock(wraps=test_read_buf, spec=BytesIO)
595
+
596
+ with patch("io.open", MagicMock(return_value=mock_reader)) as m:
597
+ m.return_value.closed = False
593
598
  adapter = RecordReader(r"c:\users\user\test.records")
594
599
  assert type(adapter).__name__ == "StreamReader"
595
600
  m.assert_called_once_with(r"c:\users\user\test.records", "rb")
@@ -1,4 +0,0 @@
1
- # file generated by setuptools_scm
2
- # don't change, don't track in version control
3
- __version__ = version = '3.12.dev5'
4
- __version_tuple__ = version_tuple = (3, 12, 'dev5')
File without changes
File without changes