flow.record 3.19.dev3__tar.gz → 3.19.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {flow_record-3.19.dev3/flow.record.egg-info → flow_record-3.19.dev5}/PKG-INFO +2 -2
  2. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/__init__.py +27 -35
  3. flow_record-3.19.dev5/flow/record/adapter/__init__.py +53 -0
  4. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/archive.py +12 -5
  5. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/avro.py +18 -15
  6. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/broker.py +16 -8
  7. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/csvfile.py +26 -12
  8. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/elastic.py +11 -4
  9. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/jsonfile.py +20 -9
  10. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/line.py +3 -6
  11. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/mongo.py +17 -8
  12. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/split.py +12 -5
  13. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/splunk.py +39 -41
  14. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/sqlite.py +7 -4
  15. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/stream.py +10 -4
  16. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/text.py +15 -9
  17. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/xlsx.py +17 -9
  18. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/base.py +107 -123
  19. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/fieldtypes/__init__.py +140 -151
  20. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/fieldtypes/credential.py +2 -0
  21. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/fieldtypes/net/__init__.py +5 -4
  22. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/fieldtypes/net/ip.py +5 -4
  23. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/fieldtypes/net/ipv4.py +35 -34
  24. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/fieldtypes/net/tcp.py +2 -0
  25. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/fieldtypes/net/udp.py +2 -0
  26. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/jsonpacker.py +19 -19
  27. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/packer.py +26 -22
  28. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/selector.py +105 -119
  29. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/stream.py +66 -53
  30. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/tools/geoip.py +18 -15
  31. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/tools/rdump.py +10 -8
  32. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/utils.py +11 -10
  33. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/version.py +2 -2
  34. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/whitelist.py +2 -0
  35. {flow_record-3.19.dev3 → flow_record-3.19.dev5/flow.record.egg-info}/PKG-INFO +2 -2
  36. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/pyproject.toml +48 -4
  37. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/_utils.py +10 -2
  38. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/selector_explain_example.py +4 -2
  39. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/standalone_test.py +6 -4
  40. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_adapter_line.py +3 -1
  41. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_adapter_text.py +3 -1
  42. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_avro.py +13 -7
  43. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_avro_adapter.py +13 -6
  44. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_compiled_selector.py +5 -3
  45. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_csv_adapter.py +6 -1
  46. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_deprecations.py +6 -4
  47. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_elastic_adapter.py +7 -1
  48. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_fieldtype_ip.py +18 -17
  49. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_fieldtypes.py +51 -61
  50. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_json_packer.py +5 -3
  51. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_json_record_adapter.py +24 -18
  52. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_multi_timestamp.py +13 -11
  53. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_packer.py +14 -12
  54. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_rdump.py +57 -39
  55. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_record.py +47 -42
  56. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_record_adapter.py +81 -81
  57. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_record_descriptor.py +10 -8
  58. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_regression.py +50 -45
  59. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_selector.py +53 -54
  60. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_splunk_adapter.py +22 -17
  61. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_sqlite_duckdb_adapter.py +21 -13
  62. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/test_xlsx_adapter.py +8 -3
  63. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tox.ini +5 -19
  64. flow_record-3.19.dev3/flow/record/adapter/__init__.py +0 -63
  65. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/COPYRIGHT +0 -0
  66. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/LICENSE +0 -0
  67. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/MANIFEST.in +0 -0
  68. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/README.md +0 -0
  69. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/examples/filesystem.py +0 -0
  70. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/examples/passivedns.py +0 -0
  71. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/examples/records.json +0 -0
  72. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/examples/tcpconn.py +0 -0
  73. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/adapter/duckdb.py +0 -0
  74. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/exceptions.py +0 -0
  75. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow/record/tools/__init__.py +0 -0
  76. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow.record.egg-info/SOURCES.txt +0 -0
  77. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow.record.egg-info/dependency_links.txt +0 -0
  78. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow.record.egg-info/entry_points.txt +0 -0
  79. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow.record.egg-info/requires.txt +0 -0
  80. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/flow.record.egg-info/top_level.txt +0 -0
  81. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/setup.cfg +0 -0
  82. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/__init__.py +0 -0
  83. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/docs/Makefile +0 -0
  84. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/docs/conf.py +0 -0
  85. {flow_record-3.19.dev3 → flow_record-3.19.dev5}/tests/docs/index.rst +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: flow.record
3
- Version: 3.19.dev3
3
+ Version: 3.19.dev5
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -1,5 +1,7 @@
1
+ from __future__ import annotations
2
+
1
3
  import gzip
2
- import os
4
+ from pathlib import Path
3
5
 
4
6
  from flow.record.base import (
5
7
  IGNORE_FIELDS_FOR_COMPARISON,
@@ -39,71 +41,61 @@ from flow.record.stream import (
39
41
 
40
42
  __all__ = [
41
43
  "IGNORE_FIELDS_FOR_COMPARISON",
42
- "RECORD_VERSION",
43
44
  "RECORDSTREAM_MAGIC",
45
+ "RECORD_VERSION",
46
+ "DynamicDescriptor",
44
47
  "FieldType",
45
- "Record",
46
48
  "GroupedRecord",
47
- "RecordDescriptor",
49
+ "JsonRecordPacker",
50
+ "PathTemplateWriter",
51
+ "Record",
48
52
  "RecordAdapter",
53
+ "RecordArchiver",
54
+ "RecordDescriptor",
55
+ "RecordDescriptorError",
49
56
  "RecordField",
50
- "RecordReader",
51
- "RecordWriter",
52
57
  "RecordOutput",
53
- "RecordPrinter",
54
58
  "RecordPacker",
55
- "JsonRecordPacker",
56
- "RecordStreamWriter",
59
+ "RecordPrinter",
60
+ "RecordReader",
57
61
  "RecordStreamReader",
58
- "open_path_or_stream",
62
+ "RecordStreamWriter",
63
+ "RecordWriter",
64
+ "dynamic_fieldtype",
65
+ "extend_record",
66
+ "ignore_fields_for_comparison",
67
+ "iter_timestamped_records",
59
68
  "open_path",
69
+ "open_path_or_stream",
60
70
  "open_stream",
61
- "ignore_fields_for_comparison",
71
+ "record_stream",
62
72
  "set_ignored_fields_for_comparison",
63
73
  "stream",
64
- "dynamic_fieldtype",
65
- "DynamicDescriptor",
66
- "PathTemplateWriter",
67
- "RecordArchiver",
68
- "RecordDescriptorError",
69
- "record_stream",
70
- "extend_record",
71
- "iter_timestamped_records",
72
74
  ]
73
75
 
74
76
 
75
- class View:
76
- fields = None
77
-
78
- def __init__(self, fields):
79
- self.fields = fields
80
-
81
- def __iter__(self, fields):
82
- pass
83
-
84
-
85
77
  class RecordDateSplitter:
86
78
  basepath = None
87
79
  out = None
88
80
 
89
- def __init__(self, basepath):
90
- self.basepath = basepath
81
+ def __init__(self, basepath: str | Path):
82
+ self.basepath = Path(basepath)
91
83
  self.out = {}
92
84
 
93
- def getstream(self, t):
85
+ def getstream(self, t: tuple[int, int, int]) -> RecordStreamWriter:
94
86
  if t not in self.out:
95
- path = os.path.join(self.basepath, "-".join(["{:2d}".format(v) for v in t]) + ".rec.gz")
87
+ path = self.basepath.joinpath("-".join([f"{v:2d}" for v in t]) + ".rec.gz")
96
88
  f = gzip.GzipFile(path, "wb")
97
89
  rs = RecordStreamWriter(f)
98
90
  self.out[t] = rs
99
91
  return self.out[t]
100
92
 
101
- def write(self, r):
93
+ def write(self, r: Record) -> None:
102
94
  t = (r.ts.year, r.ts.month, r.ts.day)
103
95
  rs = self.getstream(t)
104
96
  rs.write(r)
105
97
  rs.fp.flush()
106
98
 
107
- def close(self):
99
+ def close(self) -> None:
108
100
  for rs in self.out.values():
109
101
  rs.close()
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ __path__ = __import__("pkgutil").extend_path(__path__, __name__) # make this namespace extensible from other packages
4
+ import abc
5
+ from typing import TYPE_CHECKING
6
+
7
+ if TYPE_CHECKING:
8
+ from collections.abc import Iterator
9
+
10
+ from flow.record.base import Record
11
+
12
+
13
+ class AbstractWriter(metaclass=abc.ABCMeta):
14
+ @abc.abstractmethod
15
+ def write(self, rec: Record) -> None:
16
+ """Write a record."""
17
+ raise NotImplementedError
18
+
19
+ @abc.abstractmethod
20
+ def flush(self) -> None:
21
+ """Flush any buffered writes."""
22
+ raise NotImplementedError
23
+
24
+ @abc.abstractmethod
25
+ def close(self) -> None:
26
+ """Close the Writer, no more writes will be possible."""
27
+ raise NotImplementedError
28
+
29
+ def __del__(self) -> None:
30
+ self.close()
31
+
32
+ def __enter__(self) -> AbstractWriter: # noqa: PYI034
33
+ return self
34
+
35
+ def __exit__(self, *args) -> None:
36
+ self.flush()
37
+ self.close()
38
+
39
+
40
+ class AbstractReader(metaclass=abc.ABCMeta):
41
+ @abc.abstractmethod
42
+ def __iter__(self) -> Iterator[Record]:
43
+ """Return a record iterator."""
44
+ raise NotImplementedError
45
+
46
+ def close(self) -> None: # noqa: B027
47
+ """Close the Reader, can be overriden to properly free resources."""
48
+
49
+ def __enter__(self) -> AbstractReader: # noqa: PYI034
50
+ return self
51
+
52
+ def __exit__(self, *args) -> None:
53
+ self.close()
@@ -1,6 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  from flow.record.adapter import AbstractReader, AbstractWriter
2
6
  from flow.record.stream import RecordArchiver
3
7
 
8
+ if TYPE_CHECKING:
9
+ from flow.record.base import Record
10
+
4
11
  __usage__ = """
5
12
  Record archiver adapter, writes records to YYYY/mm/dd directories (writer only)
6
13
  ---
@@ -12,7 +19,7 @@ Write usage: rdump -w archive://[PATH]
12
19
  class ArchiveWriter(AbstractWriter):
13
20
  writer = None
14
21
 
15
- def __init__(self, path, **kwargs):
22
+ def __init__(self, path: str, **kwargs):
16
23
  self.path = path
17
24
 
18
25
  path_template = kwargs.get("path_template")
@@ -20,19 +27,19 @@ class ArchiveWriter(AbstractWriter):
20
27
 
21
28
  self.writer = RecordArchiver(self.path, path_template=path_template, name=name)
22
29
 
23
- def write(self, r):
30
+ def write(self, r: Record) -> None:
24
31
  self.writer.write(r)
25
32
 
26
- def flush(self):
33
+ def flush(self) -> None:
27
34
  # RecordArchiver already flushes after every write
28
35
  pass
29
36
 
30
- def close(self):
37
+ def close(self) -> None:
31
38
  if self.writer:
32
39
  self.writer.close()
33
40
  self.writer = None
34
41
 
35
42
 
36
43
  class ArchiveReader(AbstractReader):
37
- def __init__(self, path, **kwargs):
44
+ def __init__(self, path: str, **kwargs):
38
45
  raise NotImplementedError
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import json
4
4
  from datetime import datetime, timedelta, timezone
5
5
  from importlib.util import find_spec
6
- from typing import Any, Iterator
6
+ from typing import TYPE_CHECKING, Any, BinaryIO
7
7
 
8
8
  import fastavro
9
9
 
@@ -12,6 +12,10 @@ from flow.record.adapter import AbstractReader, AbstractWriter
12
12
  from flow.record.selector import make_selector
13
13
  from flow.record.utils import is_stdout
14
14
 
15
+ if TYPE_CHECKING:
16
+ from collections.abc import Iterator
17
+ from pathlib import Path
18
+
15
19
  __usage__ = """
16
20
  Apache AVRO adapter
17
21
  ---
@@ -52,7 +56,7 @@ class AvroWriter(AbstractWriter):
52
56
  fp = None
53
57
  writer = None
54
58
 
55
- def __init__(self, path, key=None, **kwargs):
59
+ def __init__(self, path: str | Path | BinaryIO, **kwargs):
56
60
  self.fp = record.open_path_or_stream(path, "wb")
57
61
 
58
62
  self.desc = None
@@ -69,11 +73,11 @@ class AvroWriter(AbstractWriter):
69
73
  self.writer = fastavro.write.Writer(self.fp, self.parsed_schema, codec=self.codec)
70
74
 
71
75
  if self.desc != r._desc:
72
- raise Exception("Mixed record types")
76
+ raise ValueError("Mixed record types")
73
77
 
74
78
  self.writer.write(r._packdict())
75
79
 
76
- def flush(self):
80
+ def flush(self) -> None:
77
81
  if not self.writer:
78
82
  self.writer = fastavro.write.Writer(
79
83
  self.fp,
@@ -92,21 +96,21 @@ class AvroWriter(AbstractWriter):
92
96
  class AvroReader(AbstractReader):
93
97
  fp = None
94
98
 
95
- def __init__(self, path, selector=None, **kwargs):
99
+ def __init__(self, path: str, selector: str | None = None, **kwargs):
96
100
  self.fp = record.open_path_or_stream(path, "rb")
97
101
  self.selector = make_selector(selector)
98
102
 
99
103
  self.reader = fastavro.reader(self.fp)
100
104
  self.schema = self.reader.writer_schema
101
105
  if not self.schema:
102
- raise Exception("Missing Avro schema")
106
+ raise ValueError("Missing Avro schema")
103
107
 
104
108
  self.desc = schema_to_descriptor(self.schema)
105
109
 
106
110
  # Store the fieldnames that are of type "datetime"
107
- self.datetime_fields = set(
111
+ self.datetime_fields = {
108
112
  name for name, field in self.desc.get_all_fields().items() if field.typename == "datetime"
109
- )
113
+ }
110
114
 
111
115
  def __iter__(self) -> Iterator[record.Record]:
112
116
  for obj in self.reader:
@@ -149,7 +153,7 @@ def descriptor_to_schema(desc: record.RecordDescriptor) -> dict[str, Any]:
149
153
  else:
150
154
  avro_type = AVRO_TYPE_MAP.get(field_type)
151
155
  if not avro_type:
152
- raise Exception("Unsupported Avro type: {}".format(field_type))
156
+ raise ValueError(f"Unsupported Avro type: {field_type}")
153
157
 
154
158
  field_schema["type"] = [avro_type, "null"]
155
159
 
@@ -190,11 +194,10 @@ def avro_type_to_flow_type(ftype: list) -> str:
190
194
  if isinstance(t, dict):
191
195
  if t.get("type") == "array":
192
196
  item_type = avro_type_to_flow_type(t.get("items"))
193
- return "{}[]".format(item_type)
194
- else:
195
- logical_type = t.get("logicalType")
196
- if logical_type and ("time" in logical_type or "date" in logical_type):
197
- return "datetime"
197
+ return f"{item_type}[]"
198
+ logical_type = t.get("logicalType")
199
+ if logical_type and ("time" in logical_type or "date" in logical_type):
200
+ return "datetime"
198
201
 
199
202
  if t == "null":
200
203
  continue
@@ -202,4 +205,4 @@ def avro_type_to_flow_type(ftype: list) -> str:
202
205
  if t in RECORD_TYPE_MAP:
203
206
  return RECORD_TYPE_MAP[t]
204
207
 
205
- raise TypeError("Can't map avro type to flow type: {}".format(t))
208
+ raise TypeError(f"Can't map avro type to flow type: {t}")
@@ -1,7 +1,15 @@
1
- from flow.broker import Publisher, Subscriber
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
2
4
 
5
+ from flow.broker import Publisher, Subscriber
3
6
  from flow.record.adapter import AbstractReader, AbstractWriter
4
7
 
8
+ if TYPE_CHECKING:
9
+ from collections.abc import Iterator
10
+
11
+ from flow.record.base import Record
12
+
5
13
  __usage__ = """
6
14
  PubSub adapter using flow.broker
7
15
  ---
@@ -13,23 +21,23 @@ Read usage: rdump broker+tcp://[IP]:[PORT] -s True
13
21
  class BrokerWriter(AbstractWriter):
14
22
  publisher = None
15
23
 
16
- def __init__(self, uri, source=None, classification=None, **kwargs):
24
+ def __init__(self, uri: str, source: str | None = None, classification: str | None = None, **kwargs):
17
25
  self.publisher = Publisher(uri, **kwargs)
18
26
  self.source = source
19
27
  self.classification = classification
20
28
 
21
- def write(self, r):
29
+ def write(self, r: Record) -> None:
22
30
  record = r._replace(
23
31
  _source=self.source or r._source,
24
32
  _classification=self.classification or r._classification,
25
33
  )
26
34
  self.publisher.send(record)
27
35
 
28
- def flush(self):
36
+ def flush(self) -> None:
29
37
  if self.publisher:
30
38
  self.publisher.flush()
31
39
 
32
- def close(self):
40
+ def close(self) -> None:
33
41
  if self.publisher:
34
42
  if hasattr(self.publisher, "stop"):
35
43
  # Requires flow.broker >= 1.1.1
@@ -42,14 +50,14 @@ class BrokerWriter(AbstractWriter):
42
50
  class BrokerReader(AbstractReader):
43
51
  subscriber = None
44
52
 
45
- def __init__(self, uri, name=None, selector=None, **kwargs):
53
+ def __init__(self, uri: str, name: str | None = None, selector: str | None = None, **kwargs):
46
54
  self.subscriber = Subscriber(uri, **kwargs)
47
55
  self.subscription = self.subscriber.select(name, str(selector))
48
56
 
49
- def __iter__(self):
57
+ def __iter__(self) -> Iterator[Record]:
50
58
  return iter(self.subscription)
51
59
 
52
- def close(self):
60
+ def close(self) -> None:
53
61
  if self.subscriber:
54
62
  self.subscriber.stop()
55
63
  self.subscriber = None
@@ -1,14 +1,19 @@
1
- from __future__ import absolute_import
1
+ from __future__ import annotations
2
2
 
3
3
  import csv
4
4
  import sys
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
5
7
 
6
8
  from flow.record import RecordDescriptor
7
9
  from flow.record.adapter import AbstractReader, AbstractWriter
8
- from flow.record.base import normalize_fieldname
10
+ from flow.record.base import Record, normalize_fieldname
9
11
  from flow.record.selector import make_selector
10
12
  from flow.record.utils import is_stdout
11
13
 
14
+ if TYPE_CHECKING:
15
+ from collections.abc import Iterator
16
+
12
17
  __usage__ = """
13
18
  Comma-separated values (CSV) adapter
14
19
  ---
@@ -23,13 +28,20 @@ Optional parameters:
23
28
 
24
29
 
25
30
  class CsvfileWriter(AbstractWriter):
26
- def __init__(self, path, fields=None, exclude=None, lineterminator=None, **kwargs):
31
+ def __init__(
32
+ self,
33
+ path: str | Path | None,
34
+ fields: str | list[str] | None = None,
35
+ exclude: str | list[str] | None = None,
36
+ lineterminator: str = "\r\n",
37
+ **kwargs,
38
+ ):
27
39
  self.fp = None
28
40
  if path in (None, "", "-"):
29
41
  self.fp = sys.stdout
30
42
  else:
31
- self.fp = open(path, "w", newline="")
32
- self.lineterminator = lineterminator or "\r\n"
43
+ self.fp = Path(path).open("w", newline="") # noqa: SIM115
44
+ self.lineterminator = lineterminator
33
45
  for r, n in ((r"\r", "\r"), (r"\n", "\n"), (r"\t", "\t")):
34
46
  self.lineterminator = self.lineterminator.replace(r, n)
35
47
  self.desc = None
@@ -41,7 +53,7 @@ class CsvfileWriter(AbstractWriter):
41
53
  if isinstance(self.exclude, str):
42
54
  self.exclude = self.exclude.split(",")
43
55
 
44
- def write(self, r):
56
+ def write(self, r: Record) -> None:
45
57
  rdict = r._asdict(fields=self.fields, exclude=self.exclude)
46
58
  if not self.desc or self.desc != r._desc:
47
59
  self.desc = r._desc
@@ -49,24 +61,26 @@ class CsvfileWriter(AbstractWriter):
49
61
  self.writer.writeheader()
50
62
  self.writer.writerow(rdict)
51
63
 
52
- def flush(self):
64
+ def flush(self) -> None:
53
65
  if self.fp:
54
66
  self.fp.flush()
55
67
 
56
- def close(self):
68
+ def close(self) -> None:
57
69
  if self.fp and not is_stdout(self.fp):
58
70
  self.fp.close()
59
71
  self.fp = None
60
72
 
61
73
 
62
74
  class CsvfileReader(AbstractReader):
63
- def __init__(self, path, selector=None, fields=None, **kwargs):
75
+ def __init__(
76
+ self, path: str | Path | None, selector: str | None = None, fields: str | list[str] | None = None, **kwargs
77
+ ):
64
78
  self.fp = None
65
79
  self.selector = make_selector(selector)
66
80
  if path in (None, "", "-"):
67
81
  self.fp = sys.stdin
68
82
  else:
69
- self.fp = open(path, "r", newline="")
83
+ self.fp = Path(path).open("r", newline="") # noqa: SIM115
70
84
 
71
85
  self.dialect = "excel"
72
86
  if self.fp.seekable():
@@ -87,12 +101,12 @@ class CsvfileReader(AbstractReader):
87
101
  # Create RecordDescriptor from fields, skipping fields starting with "_" (reserved for internal use)
88
102
  self.desc = RecordDescriptor("csv/reader", [("string", col) for col in self.fields if not col.startswith("_")])
89
103
 
90
- def close(self):
104
+ def close(self) -> None:
91
105
  if self.fp:
92
106
  self.fp.close()
93
107
  self.fp = None
94
108
 
95
- def __iter__(self):
109
+ def __iter__(self) -> Iterator[Record]:
96
110
  for row in self.reader:
97
111
  rdict = dict(zip(self.fields, row))
98
112
  record = self.desc.init_from_dict(rdict)
@@ -4,7 +4,7 @@ import hashlib
4
4
  import logging
5
5
  import queue
6
6
  import threading
7
- from typing import Iterator
7
+ from typing import TYPE_CHECKING
8
8
 
9
9
  import elasticsearch
10
10
  import elasticsearch.helpers
@@ -13,7 +13,11 @@ from flow.record.adapter import AbstractReader, AbstractWriter
13
13
  from flow.record.base import Record, RecordDescriptor
14
14
  from flow.record.fieldtypes import fieldtype_for_value
15
15
  from flow.record.jsonpacker import JsonRecordPacker
16
- from flow.record.selector import CompiledSelector, Selector
16
+
17
+ if TYPE_CHECKING:
18
+ from collections.abc import Iterator
19
+
20
+ from flow.record.selector import CompiledSelector, Selector
17
21
 
18
22
  __usage__ = """
19
23
  ElasticSearch adapter
@@ -25,6 +29,7 @@ Read usage: rdump elastic+[PROTOCOL]://[IP]:[PORT]?index=[INDEX]
25
29
 
26
30
  Optional arguments:
27
31
  [API_KEY]: base64 encoded api key to authenticate with (default: False)
32
+ [QUEUE_SIZE]: maximum queue size for writing records; limits memory usage (default: 100000)
28
33
  [INDEX]: name of the index to use (default: records)
29
34
  [VERIFY_CERTS]: verify certs of Elasticsearch instance (default: True)
30
35
  [HASH_RECORD]: make record unique by hashing record [slow] (default: False)
@@ -43,6 +48,7 @@ class ElasticWriter(AbstractWriter):
43
48
  http_compress: str | bool = True,
44
49
  hash_record: str | bool = False,
45
50
  api_key: str | None = None,
51
+ queue_size: int = 100000,
46
52
  **kwargs,
47
53
  ) -> None:
48
54
  self.index = index
@@ -50,11 +56,12 @@ class ElasticWriter(AbstractWriter):
50
56
  verify_certs = str(verify_certs).lower() in ("1", "true")
51
57
  http_compress = str(http_compress).lower() in ("1", "true")
52
58
  self.hash_record = str(hash_record).lower() in ("1", "true")
59
+ queue_size = int(queue_size)
53
60
 
54
61
  if not uri.lower().startswith(("http://", "https://")):
55
62
  uri = "http://" + uri
56
63
 
57
- self.queue: queue.Queue[Record | StopIteration] = queue.Queue()
64
+ self.queue: queue.Queue[Record | StopIteration] = queue.Queue(maxsize=queue_size)
58
65
  self.event = threading.Event()
59
66
 
60
67
  self.es = elasticsearch.Elasticsearch(
@@ -147,7 +154,7 @@ class ElasticWriter(AbstractWriter):
147
154
  self.event.set()
148
155
 
149
156
  def write(self, record: Record) -> None:
150
- self.queue.put_nowait(record)
157
+ self.queue.put(record)
151
158
 
152
159
  def flush(self) -> None:
153
160
  pass
@@ -1,4 +1,7 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
4
+ from typing import TYPE_CHECKING, BinaryIO
2
5
 
3
6
  from flow import record
4
7
  from flow.record import JsonRecordPacker
@@ -7,6 +10,12 @@ from flow.record.fieldtypes import fieldtype_for_value
7
10
  from flow.record.selector import make_selector
8
11
  from flow.record.utils import is_stdout
9
12
 
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Iterator
15
+ from pathlib import Path
16
+
17
+ from flow.record.base import Record, RecordDescriptor
18
+
10
19
  __usage__ = """
11
20
  JSON adapter
12
21
  ---
@@ -21,7 +30,9 @@ Read usage: rdump jsonfile://[PATH]
21
30
  class JsonfileWriter(AbstractWriter):
22
31
  fp = None
23
32
 
24
- def __init__(self, path, indent=None, descriptors=True, **kwargs):
33
+ def __init__(
34
+ self, path: str | Path | BinaryIO, indent: str | int | None = None, descriptors: bool = True, **kwargs
35
+ ):
25
36
  self.descriptors = str(descriptors).lower() in ("true", "1")
26
37
  self.fp = record.open_path_or_stream(path, "w")
27
38
  if isinstance(indent, str):
@@ -30,21 +41,21 @@ class JsonfileWriter(AbstractWriter):
30
41
  if self.descriptors:
31
42
  self.packer.on_descriptor.add_handler(self.packer_on_new_descriptor)
32
43
 
33
- def packer_on_new_descriptor(self, descriptor):
44
+ def packer_on_new_descriptor(self, descriptor: RecordDescriptor) -> None:
34
45
  self._write(descriptor)
35
46
 
36
- def _write(self, obj):
47
+ def _write(self, obj: Record | RecordDescriptor) -> None:
37
48
  record_json = self.packer.pack(obj)
38
49
  self.fp.write(record_json + "\n")
39
50
 
40
- def write(self, r):
51
+ def write(self, r: Record) -> None:
41
52
  self._write(r)
42
53
 
43
- def flush(self):
54
+ def flush(self) -> None:
44
55
  if self.fp:
45
56
  self.fp.flush()
46
57
 
47
- def close(self):
58
+ def close(self) -> None:
48
59
  if self.fp and not is_stdout(self.fp):
49
60
  self.fp.close()
50
61
  self.fp = None
@@ -53,17 +64,17 @@ class JsonfileWriter(AbstractWriter):
53
64
  class JsonfileReader(AbstractReader):
54
65
  fp = None
55
66
 
56
- def __init__(self, path, selector=None, **kwargs):
67
+ def __init__(self, path: str | Path | BinaryIO, selector: str | None = None, **kwargs):
57
68
  self.selector = make_selector(selector)
58
69
  self.fp = record.open_path_or_stream(path, "r")
59
70
  self.packer = JsonRecordPacker()
60
71
 
61
- def close(self):
72
+ def close(self) -> None:
62
73
  if self.fp:
63
74
  self.fp.close()
64
75
  self.fp = None
65
76
 
66
- def __iter__(self):
77
+ def __iter__(self) -> Iterator[Record]:
67
78
  for line in self.fp:
68
79
  obj = self.packer.unpack(line)
69
80
  if isinstance(obj, record.Record):
@@ -60,12 +60,9 @@ class LineWriter(AbstractWriter):
60
60
  self.count += 1
61
61
  self.fp.write(f"--[ RECORD {self.count} ]--\n".encode())
62
62
  if rdict:
63
- if rdict_types:
64
- # also account for extra characters for fieldtype and whitespace + parenthesis
65
- width = max(len(k + rdict_types[k]) for k in rdict) + 3
66
- else:
67
- width = max(len(k) for k in rdict)
68
- fmt = "{{:>{width}}} = {{}}\n".format(width=width)
63
+ # also account for extra characters for fieldtype and whitespace + parenthesis
64
+ width = max(len(k + rdict_types[k]) for k in rdict) + 3 if rdict_types else max(len(k) for k in rdict)
65
+ fmt = f"{{:>{width}}} = {{}}\n"
69
66
  for key, value in rdict.items():
70
67
  if rdict_types:
71
68
  key = f"{key} ({rdict_types[key]})"