flow.record 3.19.dev4__tar.gz → 3.19.dev6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flow_record-3.19.dev6/.git-blame-ignore-revs +6 -0
- {flow_record-3.19.dev4/flow.record.egg-info → flow_record-3.19.dev6}/PKG-INFO +1 -1
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/__init__.py +27 -35
- flow_record-3.19.dev6/flow/record/adapter/__init__.py +53 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/archive.py +12 -5
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/avro.py +18 -15
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/broker.py +16 -8
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/csvfile.py +26 -12
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/elastic.py +6 -2
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/jsonfile.py +20 -9
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/line.py +3 -6
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/mongo.py +17 -8
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/split.py +12 -5
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/splunk.py +39 -41
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/sqlite.py +7 -4
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/stream.py +10 -4
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/text.py +15 -9
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/xlsx.py +17 -9
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/base.py +107 -123
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/fieldtypes/__init__.py +140 -151
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/fieldtypes/credential.py +2 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/fieldtypes/net/__init__.py +5 -4
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/fieldtypes/net/ip.py +5 -4
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/fieldtypes/net/ipv4.py +35 -34
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/fieldtypes/net/tcp.py +2 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/fieldtypes/net/udp.py +2 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/jsonpacker.py +19 -19
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/packer.py +26 -22
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/selector.py +105 -119
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/stream.py +66 -53
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/tools/geoip.py +18 -15
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/tools/rdump.py +10 -8
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/utils.py +11 -10
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/version.py +2 -2
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/whitelist.py +2 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6/flow.record.egg-info}/PKG-INFO +1 -1
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow.record.egg-info/SOURCES.txt +1 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/pyproject.toml +48 -4
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/_utils.py +10 -2
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/selector_explain_example.py +4 -2
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/standalone_test.py +6 -4
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_adapter_line.py +3 -1
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_adapter_text.py +3 -1
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_avro.py +13 -7
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_avro_adapter.py +13 -6
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_compiled_selector.py +5 -3
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_csv_adapter.py +6 -1
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_deprecations.py +6 -4
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_elastic_adapter.py +7 -1
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_fieldtype_ip.py +18 -17
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_fieldtypes.py +51 -61
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_json_packer.py +5 -3
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_json_record_adapter.py +24 -18
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_multi_timestamp.py +13 -11
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_packer.py +14 -12
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_rdump.py +57 -39
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_record.py +47 -42
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_record_adapter.py +81 -81
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_record_descriptor.py +10 -8
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_regression.py +50 -45
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_selector.py +53 -54
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_splunk_adapter.py +22 -17
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_sqlite_duckdb_adapter.py +21 -13
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/test_xlsx_adapter.py +8 -3
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tox.ini +5 -19
- flow_record-3.19.dev4/flow/record/adapter/__init__.py +0 -63
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/COPYRIGHT +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/LICENSE +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/MANIFEST.in +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/README.md +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/examples/filesystem.py +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/examples/passivedns.py +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/examples/records.json +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/examples/tcpconn.py +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/adapter/duckdb.py +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/exceptions.py +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow/record/tools/__init__.py +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow.record.egg-info/entry_points.txt +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow.record.egg-info/requires.txt +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/flow.record.egg-info/top_level.txt +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/setup.cfg +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/__init__.py +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/docs/Makefile +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/docs/conf.py +0 -0
- {flow_record-3.19.dev4 → flow_record-3.19.dev6}/tests/docs/index.rst +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.19.
|
|
3
|
+
Version: 3.19.dev6
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import gzip
|
|
2
|
-
import
|
|
4
|
+
from pathlib import Path
|
|
3
5
|
|
|
4
6
|
from flow.record.base import (
|
|
5
7
|
IGNORE_FIELDS_FOR_COMPARISON,
|
|
@@ -39,71 +41,61 @@ from flow.record.stream import (
|
|
|
39
41
|
|
|
40
42
|
__all__ = [
|
|
41
43
|
"IGNORE_FIELDS_FOR_COMPARISON",
|
|
42
|
-
"RECORD_VERSION",
|
|
43
44
|
"RECORDSTREAM_MAGIC",
|
|
45
|
+
"RECORD_VERSION",
|
|
46
|
+
"DynamicDescriptor",
|
|
44
47
|
"FieldType",
|
|
45
|
-
"Record",
|
|
46
48
|
"GroupedRecord",
|
|
47
|
-
"
|
|
49
|
+
"JsonRecordPacker",
|
|
50
|
+
"PathTemplateWriter",
|
|
51
|
+
"Record",
|
|
48
52
|
"RecordAdapter",
|
|
53
|
+
"RecordArchiver",
|
|
54
|
+
"RecordDescriptor",
|
|
55
|
+
"RecordDescriptorError",
|
|
49
56
|
"RecordField",
|
|
50
|
-
"RecordReader",
|
|
51
|
-
"RecordWriter",
|
|
52
57
|
"RecordOutput",
|
|
53
|
-
"RecordPrinter",
|
|
54
58
|
"RecordPacker",
|
|
55
|
-
"
|
|
56
|
-
"
|
|
59
|
+
"RecordPrinter",
|
|
60
|
+
"RecordReader",
|
|
57
61
|
"RecordStreamReader",
|
|
58
|
-
"
|
|
62
|
+
"RecordStreamWriter",
|
|
63
|
+
"RecordWriter",
|
|
64
|
+
"dynamic_fieldtype",
|
|
65
|
+
"extend_record",
|
|
66
|
+
"ignore_fields_for_comparison",
|
|
67
|
+
"iter_timestamped_records",
|
|
59
68
|
"open_path",
|
|
69
|
+
"open_path_or_stream",
|
|
60
70
|
"open_stream",
|
|
61
|
-
"
|
|
71
|
+
"record_stream",
|
|
62
72
|
"set_ignored_fields_for_comparison",
|
|
63
73
|
"stream",
|
|
64
|
-
"dynamic_fieldtype",
|
|
65
|
-
"DynamicDescriptor",
|
|
66
|
-
"PathTemplateWriter",
|
|
67
|
-
"RecordArchiver",
|
|
68
|
-
"RecordDescriptorError",
|
|
69
|
-
"record_stream",
|
|
70
|
-
"extend_record",
|
|
71
|
-
"iter_timestamped_records",
|
|
72
74
|
]
|
|
73
75
|
|
|
74
76
|
|
|
75
|
-
class View:
|
|
76
|
-
fields = None
|
|
77
|
-
|
|
78
|
-
def __init__(self, fields):
|
|
79
|
-
self.fields = fields
|
|
80
|
-
|
|
81
|
-
def __iter__(self, fields):
|
|
82
|
-
pass
|
|
83
|
-
|
|
84
|
-
|
|
85
77
|
class RecordDateSplitter:
|
|
86
78
|
basepath = None
|
|
87
79
|
out = None
|
|
88
80
|
|
|
89
|
-
def __init__(self, basepath):
|
|
90
|
-
self.basepath = basepath
|
|
81
|
+
def __init__(self, basepath: str | Path):
|
|
82
|
+
self.basepath = Path(basepath)
|
|
91
83
|
self.out = {}
|
|
92
84
|
|
|
93
|
-
def getstream(self, t):
|
|
85
|
+
def getstream(self, t: tuple[int, int, int]) -> RecordStreamWriter:
|
|
94
86
|
if t not in self.out:
|
|
95
|
-
path =
|
|
87
|
+
path = self.basepath.joinpath("-".join([f"{v:2d}" for v in t]) + ".rec.gz")
|
|
96
88
|
f = gzip.GzipFile(path, "wb")
|
|
97
89
|
rs = RecordStreamWriter(f)
|
|
98
90
|
self.out[t] = rs
|
|
99
91
|
return self.out[t]
|
|
100
92
|
|
|
101
|
-
def write(self, r):
|
|
93
|
+
def write(self, r: Record) -> None:
|
|
102
94
|
t = (r.ts.year, r.ts.month, r.ts.day)
|
|
103
95
|
rs = self.getstream(t)
|
|
104
96
|
rs.write(r)
|
|
105
97
|
rs.fp.flush()
|
|
106
98
|
|
|
107
|
-
def close(self):
|
|
99
|
+
def close(self) -> None:
|
|
108
100
|
for rs in self.out.values():
|
|
109
101
|
rs.close()
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
__path__ = __import__("pkgutil").extend_path(__path__, __name__) # make this namespace extensible from other packages
|
|
4
|
+
import abc
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from collections.abc import Iterator
|
|
9
|
+
|
|
10
|
+
from flow.record.base import Record
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AbstractWriter(metaclass=abc.ABCMeta):
|
|
14
|
+
@abc.abstractmethod
|
|
15
|
+
def write(self, rec: Record) -> None:
|
|
16
|
+
"""Write a record."""
|
|
17
|
+
raise NotImplementedError
|
|
18
|
+
|
|
19
|
+
@abc.abstractmethod
|
|
20
|
+
def flush(self) -> None:
|
|
21
|
+
"""Flush any buffered writes."""
|
|
22
|
+
raise NotImplementedError
|
|
23
|
+
|
|
24
|
+
@abc.abstractmethod
|
|
25
|
+
def close(self) -> None:
|
|
26
|
+
"""Close the Writer, no more writes will be possible."""
|
|
27
|
+
raise NotImplementedError
|
|
28
|
+
|
|
29
|
+
def __del__(self) -> None:
|
|
30
|
+
self.close()
|
|
31
|
+
|
|
32
|
+
def __enter__(self) -> AbstractWriter: # noqa: PYI034
|
|
33
|
+
return self
|
|
34
|
+
|
|
35
|
+
def __exit__(self, *args) -> None:
|
|
36
|
+
self.flush()
|
|
37
|
+
self.close()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class AbstractReader(metaclass=abc.ABCMeta):
|
|
41
|
+
@abc.abstractmethod
|
|
42
|
+
def __iter__(self) -> Iterator[Record]:
|
|
43
|
+
"""Return a record iterator."""
|
|
44
|
+
raise NotImplementedError
|
|
45
|
+
|
|
46
|
+
def close(self) -> None: # noqa: B027
|
|
47
|
+
"""Close the Reader, can be overriden to properly free resources."""
|
|
48
|
+
|
|
49
|
+
def __enter__(self) -> AbstractReader: # noqa: PYI034
|
|
50
|
+
return self
|
|
51
|
+
|
|
52
|
+
def __exit__(self, *args) -> None:
|
|
53
|
+
self.close()
|
|
@@ -1,6 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
1
5
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
2
6
|
from flow.record.stream import RecordArchiver
|
|
3
7
|
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from flow.record.base import Record
|
|
10
|
+
|
|
4
11
|
__usage__ = """
|
|
5
12
|
Record archiver adapter, writes records to YYYY/mm/dd directories (writer only)
|
|
6
13
|
---
|
|
@@ -12,7 +19,7 @@ Write usage: rdump -w archive://[PATH]
|
|
|
12
19
|
class ArchiveWriter(AbstractWriter):
|
|
13
20
|
writer = None
|
|
14
21
|
|
|
15
|
-
def __init__(self, path, **kwargs):
|
|
22
|
+
def __init__(self, path: str, **kwargs):
|
|
16
23
|
self.path = path
|
|
17
24
|
|
|
18
25
|
path_template = kwargs.get("path_template")
|
|
@@ -20,19 +27,19 @@ class ArchiveWriter(AbstractWriter):
|
|
|
20
27
|
|
|
21
28
|
self.writer = RecordArchiver(self.path, path_template=path_template, name=name)
|
|
22
29
|
|
|
23
|
-
def write(self, r):
|
|
30
|
+
def write(self, r: Record) -> None:
|
|
24
31
|
self.writer.write(r)
|
|
25
32
|
|
|
26
|
-
def flush(self):
|
|
33
|
+
def flush(self) -> None:
|
|
27
34
|
# RecordArchiver already flushes after every write
|
|
28
35
|
pass
|
|
29
36
|
|
|
30
|
-
def close(self):
|
|
37
|
+
def close(self) -> None:
|
|
31
38
|
if self.writer:
|
|
32
39
|
self.writer.close()
|
|
33
40
|
self.writer = None
|
|
34
41
|
|
|
35
42
|
|
|
36
43
|
class ArchiveReader(AbstractReader):
|
|
37
|
-
def __init__(self, path, **kwargs):
|
|
44
|
+
def __init__(self, path: str, **kwargs):
|
|
38
45
|
raise NotImplementedError
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import json
|
|
4
4
|
from datetime import datetime, timedelta, timezone
|
|
5
5
|
from importlib.util import find_spec
|
|
6
|
-
from typing import Any,
|
|
6
|
+
from typing import TYPE_CHECKING, Any, BinaryIO
|
|
7
7
|
|
|
8
8
|
import fastavro
|
|
9
9
|
|
|
@@ -12,6 +12,10 @@ from flow.record.adapter import AbstractReader, AbstractWriter
|
|
|
12
12
|
from flow.record.selector import make_selector
|
|
13
13
|
from flow.record.utils import is_stdout
|
|
14
14
|
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from collections.abc import Iterator
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
15
19
|
__usage__ = """
|
|
16
20
|
Apache AVRO adapter
|
|
17
21
|
---
|
|
@@ -52,7 +56,7 @@ class AvroWriter(AbstractWriter):
|
|
|
52
56
|
fp = None
|
|
53
57
|
writer = None
|
|
54
58
|
|
|
55
|
-
def __init__(self, path
|
|
59
|
+
def __init__(self, path: str | Path | BinaryIO, **kwargs):
|
|
56
60
|
self.fp = record.open_path_or_stream(path, "wb")
|
|
57
61
|
|
|
58
62
|
self.desc = None
|
|
@@ -69,11 +73,11 @@ class AvroWriter(AbstractWriter):
|
|
|
69
73
|
self.writer = fastavro.write.Writer(self.fp, self.parsed_schema, codec=self.codec)
|
|
70
74
|
|
|
71
75
|
if self.desc != r._desc:
|
|
72
|
-
raise
|
|
76
|
+
raise ValueError("Mixed record types")
|
|
73
77
|
|
|
74
78
|
self.writer.write(r._packdict())
|
|
75
79
|
|
|
76
|
-
def flush(self):
|
|
80
|
+
def flush(self) -> None:
|
|
77
81
|
if not self.writer:
|
|
78
82
|
self.writer = fastavro.write.Writer(
|
|
79
83
|
self.fp,
|
|
@@ -92,21 +96,21 @@ class AvroWriter(AbstractWriter):
|
|
|
92
96
|
class AvroReader(AbstractReader):
|
|
93
97
|
fp = None
|
|
94
98
|
|
|
95
|
-
def __init__(self, path, selector=None, **kwargs):
|
|
99
|
+
def __init__(self, path: str, selector: str | None = None, **kwargs):
|
|
96
100
|
self.fp = record.open_path_or_stream(path, "rb")
|
|
97
101
|
self.selector = make_selector(selector)
|
|
98
102
|
|
|
99
103
|
self.reader = fastavro.reader(self.fp)
|
|
100
104
|
self.schema = self.reader.writer_schema
|
|
101
105
|
if not self.schema:
|
|
102
|
-
raise
|
|
106
|
+
raise ValueError("Missing Avro schema")
|
|
103
107
|
|
|
104
108
|
self.desc = schema_to_descriptor(self.schema)
|
|
105
109
|
|
|
106
110
|
# Store the fieldnames that are of type "datetime"
|
|
107
|
-
self.datetime_fields =
|
|
111
|
+
self.datetime_fields = {
|
|
108
112
|
name for name, field in self.desc.get_all_fields().items() if field.typename == "datetime"
|
|
109
|
-
|
|
113
|
+
}
|
|
110
114
|
|
|
111
115
|
def __iter__(self) -> Iterator[record.Record]:
|
|
112
116
|
for obj in self.reader:
|
|
@@ -149,7 +153,7 @@ def descriptor_to_schema(desc: record.RecordDescriptor) -> dict[str, Any]:
|
|
|
149
153
|
else:
|
|
150
154
|
avro_type = AVRO_TYPE_MAP.get(field_type)
|
|
151
155
|
if not avro_type:
|
|
152
|
-
raise
|
|
156
|
+
raise ValueError(f"Unsupported Avro type: {field_type}")
|
|
153
157
|
|
|
154
158
|
field_schema["type"] = [avro_type, "null"]
|
|
155
159
|
|
|
@@ -190,11 +194,10 @@ def avro_type_to_flow_type(ftype: list) -> str:
|
|
|
190
194
|
if isinstance(t, dict):
|
|
191
195
|
if t.get("type") == "array":
|
|
192
196
|
item_type = avro_type_to_flow_type(t.get("items"))
|
|
193
|
-
return "{}[]"
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
return "datetime"
|
|
197
|
+
return f"{item_type}[]"
|
|
198
|
+
logical_type = t.get("logicalType")
|
|
199
|
+
if logical_type and ("time" in logical_type or "date" in logical_type):
|
|
200
|
+
return "datetime"
|
|
198
201
|
|
|
199
202
|
if t == "null":
|
|
200
203
|
continue
|
|
@@ -202,4 +205,4 @@ def avro_type_to_flow_type(ftype: list) -> str:
|
|
|
202
205
|
if t in RECORD_TYPE_MAP:
|
|
203
206
|
return RECORD_TYPE_MAP[t]
|
|
204
207
|
|
|
205
|
-
raise TypeError("Can't map avro type to flow type: {}"
|
|
208
|
+
raise TypeError(f"Can't map avro type to flow type: {t}")
|
|
@@ -1,7 +1,15 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
2
4
|
|
|
5
|
+
from flow.broker import Publisher, Subscriber
|
|
3
6
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
4
7
|
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from collections.abc import Iterator
|
|
10
|
+
|
|
11
|
+
from flow.record.base import Record
|
|
12
|
+
|
|
5
13
|
__usage__ = """
|
|
6
14
|
PubSub adapter using flow.broker
|
|
7
15
|
---
|
|
@@ -13,23 +21,23 @@ Read usage: rdump broker+tcp://[IP]:[PORT] -s True
|
|
|
13
21
|
class BrokerWriter(AbstractWriter):
|
|
14
22
|
publisher = None
|
|
15
23
|
|
|
16
|
-
def __init__(self, uri, source=None, classification=None, **kwargs):
|
|
24
|
+
def __init__(self, uri: str, source: str | None = None, classification: str | None = None, **kwargs):
|
|
17
25
|
self.publisher = Publisher(uri, **kwargs)
|
|
18
26
|
self.source = source
|
|
19
27
|
self.classification = classification
|
|
20
28
|
|
|
21
|
-
def write(self, r):
|
|
29
|
+
def write(self, r: Record) -> None:
|
|
22
30
|
record = r._replace(
|
|
23
31
|
_source=self.source or r._source,
|
|
24
32
|
_classification=self.classification or r._classification,
|
|
25
33
|
)
|
|
26
34
|
self.publisher.send(record)
|
|
27
35
|
|
|
28
|
-
def flush(self):
|
|
36
|
+
def flush(self) -> None:
|
|
29
37
|
if self.publisher:
|
|
30
38
|
self.publisher.flush()
|
|
31
39
|
|
|
32
|
-
def close(self):
|
|
40
|
+
def close(self) -> None:
|
|
33
41
|
if self.publisher:
|
|
34
42
|
if hasattr(self.publisher, "stop"):
|
|
35
43
|
# Requires flow.broker >= 1.1.1
|
|
@@ -42,14 +50,14 @@ class BrokerWriter(AbstractWriter):
|
|
|
42
50
|
class BrokerReader(AbstractReader):
|
|
43
51
|
subscriber = None
|
|
44
52
|
|
|
45
|
-
def __init__(self, uri, name=None, selector=None, **kwargs):
|
|
53
|
+
def __init__(self, uri: str, name: str | None = None, selector: str | None = None, **kwargs):
|
|
46
54
|
self.subscriber = Subscriber(uri, **kwargs)
|
|
47
55
|
self.subscription = self.subscriber.select(name, str(selector))
|
|
48
56
|
|
|
49
|
-
def __iter__(self):
|
|
57
|
+
def __iter__(self) -> Iterator[Record]:
|
|
50
58
|
return iter(self.subscription)
|
|
51
59
|
|
|
52
|
-
def close(self):
|
|
60
|
+
def close(self) -> None:
|
|
53
61
|
if self.subscriber:
|
|
54
62
|
self.subscriber.stop()
|
|
55
63
|
self.subscriber = None
|
|
@@ -1,14 +1,19 @@
|
|
|
1
|
-
from __future__ import
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import csv
|
|
4
4
|
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
5
7
|
|
|
6
8
|
from flow.record import RecordDescriptor
|
|
7
9
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
8
|
-
from flow.record.base import normalize_fieldname
|
|
10
|
+
from flow.record.base import Record, normalize_fieldname
|
|
9
11
|
from flow.record.selector import make_selector
|
|
10
12
|
from flow.record.utils import is_stdout
|
|
11
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from collections.abc import Iterator
|
|
16
|
+
|
|
12
17
|
__usage__ = """
|
|
13
18
|
Comma-separated values (CSV) adapter
|
|
14
19
|
---
|
|
@@ -23,13 +28,20 @@ Optional parameters:
|
|
|
23
28
|
|
|
24
29
|
|
|
25
30
|
class CsvfileWriter(AbstractWriter):
|
|
26
|
-
def __init__(
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
path: str | Path | None,
|
|
34
|
+
fields: str | list[str] | None = None,
|
|
35
|
+
exclude: str | list[str] | None = None,
|
|
36
|
+
lineterminator: str = "\r\n",
|
|
37
|
+
**kwargs,
|
|
38
|
+
):
|
|
27
39
|
self.fp = None
|
|
28
40
|
if path in (None, "", "-"):
|
|
29
41
|
self.fp = sys.stdout
|
|
30
42
|
else:
|
|
31
|
-
self.fp = open(
|
|
32
|
-
self.lineterminator = lineterminator
|
|
43
|
+
self.fp = Path(path).open("w", newline="") # noqa: SIM115
|
|
44
|
+
self.lineterminator = lineterminator
|
|
33
45
|
for r, n in ((r"\r", "\r"), (r"\n", "\n"), (r"\t", "\t")):
|
|
34
46
|
self.lineterminator = self.lineterminator.replace(r, n)
|
|
35
47
|
self.desc = None
|
|
@@ -41,7 +53,7 @@ class CsvfileWriter(AbstractWriter):
|
|
|
41
53
|
if isinstance(self.exclude, str):
|
|
42
54
|
self.exclude = self.exclude.split(",")
|
|
43
55
|
|
|
44
|
-
def write(self, r):
|
|
56
|
+
def write(self, r: Record) -> None:
|
|
45
57
|
rdict = r._asdict(fields=self.fields, exclude=self.exclude)
|
|
46
58
|
if not self.desc or self.desc != r._desc:
|
|
47
59
|
self.desc = r._desc
|
|
@@ -49,24 +61,26 @@ class CsvfileWriter(AbstractWriter):
|
|
|
49
61
|
self.writer.writeheader()
|
|
50
62
|
self.writer.writerow(rdict)
|
|
51
63
|
|
|
52
|
-
def flush(self):
|
|
64
|
+
def flush(self) -> None:
|
|
53
65
|
if self.fp:
|
|
54
66
|
self.fp.flush()
|
|
55
67
|
|
|
56
|
-
def close(self):
|
|
68
|
+
def close(self) -> None:
|
|
57
69
|
if self.fp and not is_stdout(self.fp):
|
|
58
70
|
self.fp.close()
|
|
59
71
|
self.fp = None
|
|
60
72
|
|
|
61
73
|
|
|
62
74
|
class CsvfileReader(AbstractReader):
|
|
63
|
-
def __init__(
|
|
75
|
+
def __init__(
|
|
76
|
+
self, path: str | Path | None, selector: str | None = None, fields: str | list[str] | None = None, **kwargs
|
|
77
|
+
):
|
|
64
78
|
self.fp = None
|
|
65
79
|
self.selector = make_selector(selector)
|
|
66
80
|
if path in (None, "", "-"):
|
|
67
81
|
self.fp = sys.stdin
|
|
68
82
|
else:
|
|
69
|
-
self.fp = open(
|
|
83
|
+
self.fp = Path(path).open("r", newline="") # noqa: SIM115
|
|
70
84
|
|
|
71
85
|
self.dialect = "excel"
|
|
72
86
|
if self.fp.seekable():
|
|
@@ -87,12 +101,12 @@ class CsvfileReader(AbstractReader):
|
|
|
87
101
|
# Create RecordDescriptor from fields, skipping fields starting with "_" (reserved for internal use)
|
|
88
102
|
self.desc = RecordDescriptor("csv/reader", [("string", col) for col in self.fields if not col.startswith("_")])
|
|
89
103
|
|
|
90
|
-
def close(self):
|
|
104
|
+
def close(self) -> None:
|
|
91
105
|
if self.fp:
|
|
92
106
|
self.fp.close()
|
|
93
107
|
self.fp = None
|
|
94
108
|
|
|
95
|
-
def __iter__(self):
|
|
109
|
+
def __iter__(self) -> Iterator[Record]:
|
|
96
110
|
for row in self.reader:
|
|
97
111
|
rdict = dict(zip(self.fields, row))
|
|
98
112
|
record = self.desc.init_from_dict(rdict)
|
|
@@ -4,7 +4,7 @@ import hashlib
|
|
|
4
4
|
import logging
|
|
5
5
|
import queue
|
|
6
6
|
import threading
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
8
|
|
|
9
9
|
import elasticsearch
|
|
10
10
|
import elasticsearch.helpers
|
|
@@ -13,7 +13,11 @@ from flow.record.adapter import AbstractReader, AbstractWriter
|
|
|
13
13
|
from flow.record.base import Record, RecordDescriptor
|
|
14
14
|
from flow.record.fieldtypes import fieldtype_for_value
|
|
15
15
|
from flow.record.jsonpacker import JsonRecordPacker
|
|
16
|
-
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Iterator
|
|
19
|
+
|
|
20
|
+
from flow.record.selector import CompiledSelector, Selector
|
|
17
21
|
|
|
18
22
|
__usage__ = """
|
|
19
23
|
ElasticSearch adapter
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import json
|
|
4
|
+
from typing import TYPE_CHECKING, BinaryIO
|
|
2
5
|
|
|
3
6
|
from flow import record
|
|
4
7
|
from flow.record import JsonRecordPacker
|
|
@@ -7,6 +10,12 @@ from flow.record.fieldtypes import fieldtype_for_value
|
|
|
7
10
|
from flow.record.selector import make_selector
|
|
8
11
|
from flow.record.utils import is_stdout
|
|
9
12
|
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Iterator
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from flow.record.base import Record, RecordDescriptor
|
|
18
|
+
|
|
10
19
|
__usage__ = """
|
|
11
20
|
JSON adapter
|
|
12
21
|
---
|
|
@@ -21,7 +30,9 @@ Read usage: rdump jsonfile://[PATH]
|
|
|
21
30
|
class JsonfileWriter(AbstractWriter):
|
|
22
31
|
fp = None
|
|
23
32
|
|
|
24
|
-
def __init__(
|
|
33
|
+
def __init__(
|
|
34
|
+
self, path: str | Path | BinaryIO, indent: str | int | None = None, descriptors: bool = True, **kwargs
|
|
35
|
+
):
|
|
25
36
|
self.descriptors = str(descriptors).lower() in ("true", "1")
|
|
26
37
|
self.fp = record.open_path_or_stream(path, "w")
|
|
27
38
|
if isinstance(indent, str):
|
|
@@ -30,21 +41,21 @@ class JsonfileWriter(AbstractWriter):
|
|
|
30
41
|
if self.descriptors:
|
|
31
42
|
self.packer.on_descriptor.add_handler(self.packer_on_new_descriptor)
|
|
32
43
|
|
|
33
|
-
def packer_on_new_descriptor(self, descriptor):
|
|
44
|
+
def packer_on_new_descriptor(self, descriptor: RecordDescriptor) -> None:
|
|
34
45
|
self._write(descriptor)
|
|
35
46
|
|
|
36
|
-
def _write(self, obj):
|
|
47
|
+
def _write(self, obj: Record | RecordDescriptor) -> None:
|
|
37
48
|
record_json = self.packer.pack(obj)
|
|
38
49
|
self.fp.write(record_json + "\n")
|
|
39
50
|
|
|
40
|
-
def write(self, r):
|
|
51
|
+
def write(self, r: Record) -> None:
|
|
41
52
|
self._write(r)
|
|
42
53
|
|
|
43
|
-
def flush(self):
|
|
54
|
+
def flush(self) -> None:
|
|
44
55
|
if self.fp:
|
|
45
56
|
self.fp.flush()
|
|
46
57
|
|
|
47
|
-
def close(self):
|
|
58
|
+
def close(self) -> None:
|
|
48
59
|
if self.fp and not is_stdout(self.fp):
|
|
49
60
|
self.fp.close()
|
|
50
61
|
self.fp = None
|
|
@@ -53,17 +64,17 @@ class JsonfileWriter(AbstractWriter):
|
|
|
53
64
|
class JsonfileReader(AbstractReader):
|
|
54
65
|
fp = None
|
|
55
66
|
|
|
56
|
-
def __init__(self, path, selector=None, **kwargs):
|
|
67
|
+
def __init__(self, path: str | Path | BinaryIO, selector: str | None = None, **kwargs):
|
|
57
68
|
self.selector = make_selector(selector)
|
|
58
69
|
self.fp = record.open_path_or_stream(path, "r")
|
|
59
70
|
self.packer = JsonRecordPacker()
|
|
60
71
|
|
|
61
|
-
def close(self):
|
|
72
|
+
def close(self) -> None:
|
|
62
73
|
if self.fp:
|
|
63
74
|
self.fp.close()
|
|
64
75
|
self.fp = None
|
|
65
76
|
|
|
66
|
-
def __iter__(self):
|
|
77
|
+
def __iter__(self) -> Iterator[Record]:
|
|
67
78
|
for line in self.fp:
|
|
68
79
|
obj = self.packer.unpack(line)
|
|
69
80
|
if isinstance(obj, record.Record):
|
|
@@ -60,12 +60,9 @@ class LineWriter(AbstractWriter):
|
|
|
60
60
|
self.count += 1
|
|
61
61
|
self.fp.write(f"--[ RECORD {self.count} ]--\n".encode())
|
|
62
62
|
if rdict:
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
else:
|
|
67
|
-
width = max(len(k) for k in rdict)
|
|
68
|
-
fmt = "{{:>{width}}} = {{}}\n".format(width=width)
|
|
63
|
+
# also account for extra characters for fieldtype and whitespace + parenthesis
|
|
64
|
+
width = max(len(k + rdict_types[k]) for k in rdict) + 3 if rdict_types else max(len(k) for k in rdict)
|
|
65
|
+
fmt = f"{{:>{width}}} = {{}}\n"
|
|
69
66
|
for key, value in rdict.items():
|
|
70
67
|
if rdict_types:
|
|
71
68
|
key = f"{key} ({rdict_types[key]})"
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
1
5
|
import bson
|
|
2
6
|
from pymongo import MongoClient
|
|
3
7
|
|
|
@@ -5,6 +9,11 @@ from flow import record
|
|
|
5
9
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
6
10
|
from flow.record.selector import make_selector
|
|
7
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Iterator
|
|
14
|
+
|
|
15
|
+
from flow.record.base import Record
|
|
16
|
+
|
|
8
17
|
__usage__ = """
|
|
9
18
|
MongoDB adapter
|
|
10
19
|
---
|
|
@@ -16,7 +25,7 @@ Read usage: rdump mongo://[IP]:[PORT]/[DBNAME]/[COLLECTION]
|
|
|
16
25
|
"""
|
|
17
26
|
|
|
18
27
|
|
|
19
|
-
def parse_path(path):
|
|
28
|
+
def parse_path(path: str) -> tuple[str, str, str]:
|
|
20
29
|
elements = path.strip("/").split("/", 2) # max 3 elements
|
|
21
30
|
if len(elements) == 2:
|
|
22
31
|
return "localhost", elements[0], elements[1]
|
|
@@ -28,7 +37,7 @@ def parse_path(path):
|
|
|
28
37
|
class MongoWriter(AbstractWriter):
|
|
29
38
|
client = None
|
|
30
39
|
|
|
31
|
-
def __init__(self, path, key=None, **kwargs):
|
|
40
|
+
def __init__(self, path: str, key: str | None = None, **kwargs):
|
|
32
41
|
dbhost, dbname, collection = parse_path(path)
|
|
33
42
|
|
|
34
43
|
self.key = key
|
|
@@ -38,7 +47,7 @@ class MongoWriter(AbstractWriter):
|
|
|
38
47
|
self.coll_descriptors = self.db["_descriptors"]
|
|
39
48
|
self.descriptors = {}
|
|
40
49
|
|
|
41
|
-
def write(self, r):
|
|
50
|
+
def write(self, r: Record) -> None:
|
|
42
51
|
d = r._packdict()
|
|
43
52
|
d["_type"] = r._desc.identifier
|
|
44
53
|
|
|
@@ -53,10 +62,10 @@ class MongoWriter(AbstractWriter):
|
|
|
53
62
|
else:
|
|
54
63
|
self.collection.insert(d)
|
|
55
64
|
|
|
56
|
-
def flush(self):
|
|
65
|
+
def flush(self) -> None:
|
|
57
66
|
pass
|
|
58
67
|
|
|
59
|
-
def close(self):
|
|
68
|
+
def close(self) -> None:
|
|
60
69
|
if self.client:
|
|
61
70
|
self.client.close()
|
|
62
71
|
self.client = None
|
|
@@ -65,7 +74,7 @@ class MongoWriter(AbstractWriter):
|
|
|
65
74
|
class MongoReader(AbstractReader):
|
|
66
75
|
client = None
|
|
67
76
|
|
|
68
|
-
def __init__(self, path, selector=None, **kwargs):
|
|
77
|
+
def __init__(self, path: str, selector: str | None = None, **kwargs):
|
|
69
78
|
dbhost, dbname, collection = parse_path(path)
|
|
70
79
|
|
|
71
80
|
self.selector = make_selector(selector)
|
|
@@ -75,12 +84,12 @@ class MongoReader(AbstractReader):
|
|
|
75
84
|
self.coll_descriptors = self.db["_descriptors"]
|
|
76
85
|
self.descriptors = {}
|
|
77
86
|
|
|
78
|
-
def close(self):
|
|
87
|
+
def close(self) -> None:
|
|
79
88
|
if self.client:
|
|
80
89
|
self.client.close()
|
|
81
90
|
self.client = None
|
|
82
91
|
|
|
83
|
-
def __iter__(self):
|
|
92
|
+
def __iter__(self) -> Iterator[Record]:
|
|
84
93
|
desc = None
|
|
85
94
|
for r in self.collection.find():
|
|
86
95
|
if r["_type"] not in self.descriptors:
|