flow.record 3.20.dev1__tar.gz → 3.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flow_record-3.21/.gitattributes +1 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/PKG-INFO +11 -16
- {flow_record-3.20.dev1 → flow_record-3.21}/examples/filesystem.py +28 -29
- {flow_record-3.20.dev1 → flow_record-3.21}/examples/passivedns.py +12 -9
- {flow_record-3.20.dev1 → flow_record-3.21}/examples/tcpconn.py +5 -3
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/avro.py +4 -1
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/broker.py +1 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/csvfile.py +16 -6
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/elastic.py +58 -19
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/jsonfile.py +7 -4
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/mongo.py +4 -1
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/splunk.py +3 -3
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/sqlite.py +5 -2
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/xlsx.py +5 -2
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/base.py +11 -5
- flow_record-3.21/flow/record/context.py +69 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/__init__.py +10 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/net/ip.py +6 -18
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/net/ipv4.py +3 -3
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/jsonpacker.py +3 -2
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/selector.py +2 -2
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/stream.py +25 -8
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/tools/rdump.py +209 -46
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/utils.py +35 -1
- flow_record-3.21/flow/record/version.py +34 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow.record.egg-info/PKG-INFO +11 -16
- {flow_record-3.20.dev1 → flow_record-3.21}/flow.record.egg-info/SOURCES.txt +38 -27
- flow_record-3.21/flow.record.egg-info/requires.txt +36 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/pyproject.toml +65 -21
- flow_record-3.21/tests/__init__.py +0 -0
- flow_record-3.21/tests/_data/.gitkeep +0 -0
- flow_record-3.21/tests/adapter/__init__.py +0 -0
- {flow_record-3.20.dev1/tests → flow_record-3.21/tests/adapter}/test_avro.py +50 -1
- flow_record-3.20.dev1/tests/test_json_record_adapter.py → flow_record-3.21/tests/adapter/test_json.py +1 -2
- flow_record-3.20.dev1/tests/test_splunk_adapter.py → flow_record-3.21/tests/adapter/test_splunk.py +7 -3
- flow_record-3.20.dev1/tests/test_sqlite_duckdb_adapter.py → flow_record-3.21/tests/adapter/test_sqlite_duckdb.py +3 -2
- flow_record-3.20.dev1/tests/test_xlsx_adapter.py → flow_record-3.21/tests/adapter/test_xlsx.py +1 -2
- flow_record-3.21/tests/conftest.py +13 -0
- flow_record-3.21/tests/fieldtypes/__init__.py +0 -0
- {flow_record-3.20.dev1/tests → flow_record-3.21/tests/fieldtypes}/test_fieldtypes.py +15 -6
- flow_record-3.20.dev1/tests/test_fieldtype_ip.py → flow_record-3.21/tests/fieldtypes/test_ip.py +2 -2
- flow_record-3.21/tests/packer/__init__.py +0 -0
- {flow_record-3.20.dev1/tests → flow_record-3.21/tests/packer}/test_json_packer.py +27 -1
- {flow_record-3.20.dev1/tests → flow_record-3.21/tests/packer}/test_packer.py +8 -6
- flow_record-3.21/tests/record/__init__.py +0 -0
- flow_record-3.20.dev1/tests/test_record_adapter.py → flow_record-3.21/tests/record/test_adapter.py +8 -9
- flow_record-3.21/tests/record/test_context.py +66 -0
- {flow_record-3.20.dev1/tests → flow_record-3.21/tests/record}/test_record.py +27 -7
- flow_record-3.21/tests/selector/__init__.py +0 -0
- flow_record-3.20.dev1/tests/test_selector.py → flow_record-3.21/tests/selector/test_selectors.py +7 -3
- flow_record-3.20.dev1/tests/test_regression.py → flow_record-3.21/tests/test_regressions.py +79 -19
- flow_record-3.21/tests/test_utils.py +25 -0
- flow_record-3.21/tests/tools/__init__.py +0 -0
- {flow_record-3.20.dev1/tests → flow_record-3.21/tests/tools}/test_rdump.py +82 -7
- {flow_record-3.20.dev1 → flow_record-3.21}/tox.ini +12 -14
- flow_record-3.20.dev1/flow/record/version.py +0 -16
- flow_record-3.20.dev1/flow.record.egg-info/requires.txt +0 -41
- flow_record-3.20.dev1/tests/standalone_test.py +0 -19
- flow_record-3.20.dev1/tests/test_avro_adapter.py +0 -58
- {flow_record-3.20.dev1 → flow_record-3.21}/.git-blame-ignore-revs +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/COPYRIGHT +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/LICENSE +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/MANIFEST.in +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/README.md +0 -0
- {flow_record-3.20.dev1/flow/record/tools → flow_record-3.21/examples}/__init__.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/examples/records.json +0 -0
- /flow_record-3.20.dev1/tests/selector_explain_example.py → /flow_record-3.21/examples/selectors.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/__init__.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/__init__.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/archive.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/duckdb.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/line.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/split.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/stream.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/text.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/exceptions.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/credential.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/net/__init__.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/net/tcp.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/net/udp.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/packer.py +0 -0
- {flow_record-3.20.dev1/tests → flow_record-3.21/flow/record/tools}/__init__.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/tools/geoip.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/whitelist.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow.record.egg-info/entry_points.txt +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/flow.record.egg-info/top_level.txt +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/setup.cfg +0 -0
- {flow_record-3.20.dev1/tests/docs → flow_record-3.21/tests/_docs}/Makefile +0 -0
- {flow_record-3.20.dev1/tests/docs → flow_record-3.21/tests/_docs}/conf.py +0 -0
- {flow_record-3.20.dev1/tests/docs → flow_record-3.21/tests/_docs}/index.rst +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/tests/_utils.py +0 -0
- /flow_record-3.20.dev1/tests/test_csv_adapter.py → /flow_record-3.21/tests/adapter/test_csv.py +0 -0
- /flow_record-3.20.dev1/tests/test_elastic_adapter.py → /flow_record-3.21/tests/adapter/test_elastic.py +0 -0
- /flow_record-3.20.dev1/tests/test_adapter_line.py → /flow_record-3.21/tests/adapter/test_line.py +0 -0
- /flow_record-3.20.dev1/tests/test_adapter_text.py → /flow_record-3.21/tests/adapter/test_text.py +0 -0
- /flow_record-3.20.dev1/tests/test_record_descriptor.py → /flow_record-3.21/tests/record/test_descriptor.py +0 -0
- {flow_record-3.20.dev1/tests → flow_record-3.21/tests/record}/test_multi_timestamp.py +0 -0
- /flow_record-3.20.dev1/tests/test_compiled_selector.py → /flow_record-3.21/tests/selector/test_compiled.py +0 -0
- {flow_record-3.20.dev1 → flow_record-3.21}/tests/test_deprecations.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
tests/_data/** filter=lfs diff=lfs merge=lfs -text
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.21
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
|
-
License:
|
|
6
|
+
License-Expression: AGPL-3.0-or-later
|
|
7
7
|
Project-URL: homepage, https://dissect.tools
|
|
8
8
|
Project-URL: documentation, https://docs.dissect.tools/en/latest/projects/flow.record
|
|
9
9
|
Project-URL: repository, https://github.com/fox-it/flow.record
|
|
@@ -11,12 +11,11 @@ Classifier: Development Status :: 5 - Production/Stable
|
|
|
11
11
|
Classifier: Environment :: Console
|
|
12
12
|
Classifier: Intended Audience :: Developers
|
|
13
13
|
Classifier: Intended Audience :: Information Technology
|
|
14
|
-
Classifier: License :: OSI Approved
|
|
15
14
|
Classifier: Operating System :: OS Independent
|
|
16
15
|
Classifier: Programming Language :: Python :: 3
|
|
17
16
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
18
17
|
Classifier: Topic :: Utilities
|
|
19
|
-
Requires-Python:
|
|
18
|
+
Requires-Python: >=3.10
|
|
20
19
|
Description-Content-Type: text/markdown
|
|
21
20
|
License-File: LICENSE
|
|
22
21
|
License-File: COPYRIGHT
|
|
@@ -24,29 +23,25 @@ Requires-Dist: msgpack>=0.5.2
|
|
|
24
23
|
Requires-Dist: tzdata; platform_system == "Windows"
|
|
25
24
|
Provides-Extra: compression
|
|
26
25
|
Requires-Dist: lz4; extra == "compression"
|
|
27
|
-
Requires-Dist: zstandard; extra == "compression"
|
|
26
|
+
Requires-Dist: zstandard; platform_python_implementation != "PyPy" and extra == "compression"
|
|
28
27
|
Provides-Extra: elastic
|
|
29
28
|
Requires-Dist: elasticsearch; extra == "elastic"
|
|
30
29
|
Provides-Extra: geoip
|
|
31
30
|
Requires-Dist: maxminddb; extra == "geoip"
|
|
32
31
|
Provides-Extra: avro
|
|
33
|
-
Requires-Dist: cramjam<2.8.4; (platform_python_implementation == "PyPy" and python_version == "3.9") and extra == "avro"
|
|
34
32
|
Requires-Dist: fastavro[snappy]; extra == "avro"
|
|
35
33
|
Provides-Extra: duckdb
|
|
36
|
-
Requires-Dist: duckdb; extra == "duckdb"
|
|
37
|
-
Requires-Dist: pytz; extra == "duckdb"
|
|
34
|
+
Requires-Dist: duckdb; platform_python_implementation != "PyPy" and extra == "duckdb"
|
|
35
|
+
Requires-Dist: pytz; platform_python_implementation != "PyPy" and extra == "duckdb"
|
|
38
36
|
Provides-Extra: splunk
|
|
39
37
|
Requires-Dist: httpx; extra == "splunk"
|
|
40
|
-
Provides-Extra:
|
|
41
|
-
Requires-Dist:
|
|
42
|
-
Requires-Dist: flow.record[avro]; extra == "test"
|
|
43
|
-
Requires-Dist: flow.record[elastic]; extra == "test"
|
|
44
|
-
Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
45
|
-
Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
46
|
-
Requires-Dist: tqdm; extra == "test"
|
|
38
|
+
Provides-Extra: xlsx
|
|
39
|
+
Requires-Dist: openpyxl; extra == "xlsx"
|
|
47
40
|
Provides-Extra: full
|
|
48
41
|
Requires-Dist: flow.record[compression]; extra == "full"
|
|
49
42
|
Requires-Dist: tqdm; extra == "full"
|
|
43
|
+
Requires-Dist: structlog; extra == "full"
|
|
44
|
+
Dynamic: license-file
|
|
50
45
|
|
|
51
46
|
# flow.record
|
|
52
47
|
|
|
@@ -1,10 +1,15 @@
|
|
|
1
|
-
import
|
|
2
|
-
import stat
|
|
1
|
+
from __future__ import annotations
|
|
3
2
|
|
|
4
|
-
|
|
3
|
+
import stat
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
5
6
|
|
|
6
7
|
from flow.record import RecordDescriptor, RecordWriter
|
|
7
8
|
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from collections.abc import Iterator
|
|
11
|
+
|
|
12
|
+
|
|
8
13
|
descriptor = """
|
|
9
14
|
filesystem/unix/entry
|
|
10
15
|
string path;
|
|
@@ -22,34 +27,32 @@ filesystem/unix/entry
|
|
|
22
27
|
FilesystemFile = RecordDescriptor(descriptor)
|
|
23
28
|
|
|
24
29
|
|
|
25
|
-
def hash_file(path
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
f.close()
|
|
30
|
+
def hash_file(path: str | Path) -> None:
|
|
31
|
+
with Path(path).open("rb") as f:
|
|
32
|
+
while True:
|
|
33
|
+
d = f.read(4096)
|
|
34
|
+
if not d:
|
|
35
|
+
break
|
|
32
36
|
|
|
33
37
|
|
|
34
38
|
class FilesystemIterator:
|
|
35
39
|
basepath = None
|
|
36
40
|
|
|
37
|
-
def __init__(self, basepath):
|
|
41
|
+
def __init__(self, basepath: str | None):
|
|
38
42
|
self.basepath = basepath
|
|
39
43
|
self.recordType = FilesystemFile
|
|
40
44
|
|
|
41
|
-
def classify(self, source, classification):
|
|
45
|
+
def classify(self, source: str, classification: str) -> None:
|
|
42
46
|
self.recordType = FilesystemFile.base(_source=source, _classification=classification)
|
|
43
47
|
|
|
44
|
-
def iter(self, path):
|
|
45
|
-
|
|
46
|
-
return self._iter(path)
|
|
48
|
+
def iter(self, path: str | Path) -> Iterator[FilesystemFile]:
|
|
49
|
+
return self._iter(Path(path).resolve())
|
|
47
50
|
|
|
48
|
-
def _iter(self, path):
|
|
49
|
-
if path.
|
|
51
|
+
def _iter(self, path: Path) -> Iterator[FilesystemFile]:
|
|
52
|
+
if path.is_relative_to("/proc"):
|
|
50
53
|
return
|
|
51
54
|
|
|
52
|
-
st =
|
|
55
|
+
st = path.lstat()
|
|
53
56
|
|
|
54
57
|
abspath = path
|
|
55
58
|
if self.basepath and abspath.startswith(self.basepath):
|
|
@@ -59,7 +62,7 @@ class FilesystemIterator:
|
|
|
59
62
|
|
|
60
63
|
link = None
|
|
61
64
|
if ifmt == stat.S_IFLNK:
|
|
62
|
-
link =
|
|
65
|
+
link = path.readlink()
|
|
63
66
|
|
|
64
67
|
yield self.recordType(
|
|
65
68
|
path=abspath,
|
|
@@ -69,20 +72,16 @@ class FilesystemIterator:
|
|
|
69
72
|
size=st.st_size,
|
|
70
73
|
uid=st.st_uid,
|
|
71
74
|
gid=st.st_gid,
|
|
72
|
-
ctime=
|
|
73
|
-
mtime=
|
|
74
|
-
atime=
|
|
75
|
+
ctime=st.st_ctime,
|
|
76
|
+
mtime=st.st_mtime,
|
|
77
|
+
atime=st.st_atime,
|
|
75
78
|
link=link,
|
|
76
79
|
)
|
|
77
80
|
|
|
78
81
|
if ifmt == stat.S_IFDIR:
|
|
79
|
-
for i in
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
fullpath = os.path.join(path, i)
|
|
84
|
-
for e in self.iter(fullpath):
|
|
85
|
-
yield e
|
|
82
|
+
for i in path.iterdir():
|
|
83
|
+
fullpath = path.joinpath(i)
|
|
84
|
+
yield from self.iter(fullpath)
|
|
86
85
|
|
|
87
86
|
|
|
88
87
|
chunk = []
|
|
@@ -1,18 +1,21 @@
|
|
|
1
1
|
#!/usr/bin/env pypy
|
|
2
|
-
import
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
3
4
|
import sys
|
|
4
|
-
import datetime
|
|
5
|
+
from datetime import datetime, timezone
|
|
5
6
|
|
|
6
7
|
import net.ipv4
|
|
7
|
-
|
|
8
|
+
import record
|
|
8
9
|
from fileprocessing import DirectoryProcessor
|
|
9
10
|
|
|
11
|
+
UTC_TIMEZONE = timezone.utc
|
|
12
|
+
|
|
10
13
|
|
|
11
|
-
def ts(s):
|
|
12
|
-
return datetime.
|
|
14
|
+
def ts(s: float) -> datetime:
|
|
15
|
+
return datetime.fromtimestamp(float(s), tz=UTC_TIMEZONE)
|
|
13
16
|
|
|
14
17
|
|
|
15
|
-
def ip(s):
|
|
18
|
+
def ip(s: str) -> net.ipv4.Address:
|
|
16
19
|
return net.ipv4.Address(s)
|
|
17
20
|
|
|
18
21
|
|
|
@@ -21,7 +24,7 @@ class SeparatedFile:
|
|
|
21
24
|
seperator = None
|
|
22
25
|
format = None
|
|
23
26
|
|
|
24
|
-
def __init__(self, fp, seperator, format):
|
|
27
|
+
def __init__(self, fp: list[str], seperator: str | None, format: list[tuple]):
|
|
25
28
|
self.fp = fp
|
|
26
29
|
self.seperator = seperator
|
|
27
30
|
self.format = format
|
|
@@ -46,7 +49,7 @@ class SeparatedFile:
|
|
|
46
49
|
yield recordtype(**r)
|
|
47
50
|
|
|
48
51
|
|
|
49
|
-
def PassiveDnsFile(fp):
|
|
52
|
+
def PassiveDnsFile(fp: list[str]) -> SeparatedFile:
|
|
50
53
|
return SeparatedFile(fp, "||", PASSIVEDNS_FORMAT)
|
|
51
54
|
|
|
52
55
|
|
|
@@ -63,7 +66,7 @@ PASSIVEDNS_FORMAT = [
|
|
|
63
66
|
]
|
|
64
67
|
|
|
65
68
|
|
|
66
|
-
def main():
|
|
69
|
+
def main() -> None:
|
|
67
70
|
rs = record.RecordOutput(sys.stdout)
|
|
68
71
|
for r in DirectoryProcessor(sys.argv[1], PassiveDnsFile, r"\.log\.gz"):
|
|
69
72
|
rs.write(r)
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import random
|
|
2
|
+
from datetime import datetime, timezone
|
|
2
3
|
|
|
3
|
-
from datetime import datetime
|
|
4
4
|
from flow import record
|
|
5
5
|
|
|
6
|
+
UTC_TIMEZONE = timezone.utc
|
|
7
|
+
|
|
6
8
|
descriptor = """
|
|
7
9
|
network/traffic/tcp/connection
|
|
8
10
|
datetime ts;
|
|
@@ -32,9 +34,9 @@ port_list = [
|
|
|
32
34
|
|
|
33
35
|
rs = record.RecordWriter()
|
|
34
36
|
|
|
35
|
-
for
|
|
37
|
+
for _ in range(500):
|
|
36
38
|
r = conn(
|
|
37
|
-
ts=datetime.now(),
|
|
39
|
+
ts=datetime.now(tz=UTC_TIMEZONE),
|
|
38
40
|
src=random.choice(ip_list),
|
|
39
41
|
srcport=random.choice(port_list),
|
|
40
42
|
dst=random.choice(ip_list),
|
|
@@ -9,6 +9,7 @@ import fastavro
|
|
|
9
9
|
|
|
10
10
|
from flow import record
|
|
11
11
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
12
|
+
from flow.record.context import get_app_context, match_record_with_context
|
|
12
13
|
from flow.record.selector import make_selector
|
|
13
14
|
from flow.record.utils import is_stdout
|
|
14
15
|
|
|
@@ -113,6 +114,8 @@ class AvroReader(AbstractReader):
|
|
|
113
114
|
}
|
|
114
115
|
|
|
115
116
|
def __iter__(self) -> Iterator[record.Record]:
|
|
117
|
+
ctx = get_app_context()
|
|
118
|
+
selector = self.selector
|
|
116
119
|
for obj in self.reader:
|
|
117
120
|
# Convert timestamp-micros fields back to datetime fields
|
|
118
121
|
for field_name in self.datetime_fields:
|
|
@@ -121,7 +124,7 @@ class AvroReader(AbstractReader):
|
|
|
121
124
|
obj[field_name] = EPOCH + timedelta(microseconds=value)
|
|
122
125
|
|
|
123
126
|
rec = self.desc.recordType(**obj)
|
|
124
|
-
if
|
|
127
|
+
if match_record_with_context(rec, selector, ctx):
|
|
125
128
|
yield rec
|
|
126
129
|
|
|
127
130
|
def close(self) -> None:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import contextlib
|
|
3
4
|
import csv
|
|
4
5
|
import sys
|
|
5
6
|
from pathlib import Path
|
|
@@ -8,8 +9,9 @@ from typing import TYPE_CHECKING
|
|
|
8
9
|
from flow.record import RecordDescriptor
|
|
9
10
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
10
11
|
from flow.record.base import Record, normalize_fieldname
|
|
12
|
+
from flow.record.context import get_app_context, match_record_with_context
|
|
11
13
|
from flow.record.selector import make_selector
|
|
12
|
-
from flow.record.utils import is_stdout
|
|
14
|
+
from flow.record.utils import boolean_argument, is_stdout
|
|
13
15
|
|
|
14
16
|
if TYPE_CHECKING:
|
|
15
17
|
from collections.abc import Iterator
|
|
@@ -17,11 +19,12 @@ if TYPE_CHECKING:
|
|
|
17
19
|
__usage__ = """
|
|
18
20
|
Comma-separated values (CSV) adapter
|
|
19
21
|
---
|
|
20
|
-
Write usage: rdump -w csvfile://[PATH]?lineterminator=[TERMINATOR]
|
|
22
|
+
Write usage: rdump -w csvfile://[PATH]?lineterminator=[TERMINATOR]&header=[HEADER]
|
|
21
23
|
Read usage: rdump csvfile://[PATH]?fields=[FIELDS]
|
|
22
24
|
[PATH]: path to file. Leave empty or "-" to output to stdout
|
|
23
25
|
|
|
24
26
|
Optional parameters:
|
|
27
|
+
[HEADER]: if set to false, it will not print the CSV header (default: true)
|
|
25
28
|
[TERMINATOR]: line terminator, default is \\r\\n
|
|
26
29
|
[FIELDS]: comma-separated list of CSV fields (in case of missing CSV header)
|
|
27
30
|
"""
|
|
@@ -34,6 +37,7 @@ class CsvfileWriter(AbstractWriter):
|
|
|
34
37
|
fields: str | list[str] | None = None,
|
|
35
38
|
exclude: str | list[str] | None = None,
|
|
36
39
|
lineterminator: str = "\r\n",
|
|
40
|
+
header: str = "true",
|
|
37
41
|
**kwargs,
|
|
38
42
|
):
|
|
39
43
|
self.fp = None
|
|
@@ -52,13 +56,16 @@ class CsvfileWriter(AbstractWriter):
|
|
|
52
56
|
self.fields = self.fields.split(",")
|
|
53
57
|
if isinstance(self.exclude, str):
|
|
54
58
|
self.exclude = self.exclude.split(",")
|
|
59
|
+
self.header = boolean_argument(header)
|
|
55
60
|
|
|
56
61
|
def write(self, r: Record) -> None:
|
|
57
62
|
rdict = r._asdict(fields=self.fields, exclude=self.exclude)
|
|
58
63
|
if not self.desc or self.desc != r._desc:
|
|
59
64
|
self.desc = r._desc
|
|
60
65
|
self.writer = csv.DictWriter(self.fp, rdict, lineterminator=self.lineterminator)
|
|
61
|
-
self.
|
|
66
|
+
if self.header:
|
|
67
|
+
# Write header only if it is requested
|
|
68
|
+
self.writer.writeheader()
|
|
62
69
|
self.writer.writerow(rdict)
|
|
63
70
|
|
|
64
71
|
def flush(self) -> None:
|
|
@@ -84,7 +91,8 @@ class CsvfileReader(AbstractReader):
|
|
|
84
91
|
|
|
85
92
|
self.dialect = "excel"
|
|
86
93
|
if self.fp.seekable():
|
|
87
|
-
|
|
94
|
+
with contextlib.suppress(csv.Error):
|
|
95
|
+
self.dialect = csv.Sniffer().sniff(self.fp.read(1024))
|
|
88
96
|
self.fp.seek(0)
|
|
89
97
|
self.reader = csv.reader(self.fp, dialect=self.dialect)
|
|
90
98
|
|
|
@@ -107,8 +115,10 @@ class CsvfileReader(AbstractReader):
|
|
|
107
115
|
self.fp = None
|
|
108
116
|
|
|
109
117
|
def __iter__(self) -> Iterator[Record]:
|
|
118
|
+
ctx = get_app_context()
|
|
119
|
+
selector = self.selector
|
|
110
120
|
for row in self.reader:
|
|
111
|
-
rdict = dict(zip(self.fields, row))
|
|
121
|
+
rdict = dict(zip(self.fields, row, strict=False))
|
|
112
122
|
record = self.desc.init_from_dict(rdict)
|
|
113
|
-
if
|
|
123
|
+
if match_record_with_context(record, selector, ctx):
|
|
114
124
|
yield record
|
|
@@ -4,8 +4,11 @@ import hashlib
|
|
|
4
4
|
import logging
|
|
5
5
|
import queue
|
|
6
6
|
import threading
|
|
7
|
+
from contextlib import suppress
|
|
7
8
|
from typing import TYPE_CHECKING
|
|
8
9
|
|
|
10
|
+
import urllib3
|
|
11
|
+
|
|
9
12
|
try:
|
|
10
13
|
import elasticsearch
|
|
11
14
|
import elasticsearch.helpers
|
|
@@ -17,8 +20,10 @@ except ImportError:
|
|
|
17
20
|
|
|
18
21
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
19
22
|
from flow.record.base import Record, RecordDescriptor
|
|
23
|
+
from flow.record.context import get_app_context, match_record_with_context
|
|
20
24
|
from flow.record.fieldtypes import fieldtype_for_value
|
|
21
25
|
from flow.record.jsonpacker import JsonRecordPacker
|
|
26
|
+
from flow.record.utils import boolean_argument
|
|
22
27
|
|
|
23
28
|
if TYPE_CHECKING:
|
|
24
29
|
from collections.abc import Iterator
|
|
@@ -72,10 +77,12 @@ class ElasticWriter(AbstractWriter):
|
|
|
72
77
|
|
|
73
78
|
self.index = index
|
|
74
79
|
self.uri = uri
|
|
75
|
-
verify_certs =
|
|
76
|
-
http_compress =
|
|
77
|
-
self.hash_record =
|
|
80
|
+
verify_certs = boolean_argument(verify_certs)
|
|
81
|
+
http_compress = boolean_argument(http_compress)
|
|
82
|
+
self.hash_record = boolean_argument(hash_record)
|
|
78
83
|
queue_size = int(queue_size)
|
|
84
|
+
request_timeout = int(request_timeout)
|
|
85
|
+
self.max_retries = int(max_retries)
|
|
79
86
|
|
|
80
87
|
if not uri.lower().startswith(("http://", "https://")):
|
|
81
88
|
uri = "http://" + uri
|
|
@@ -92,7 +99,7 @@ class ElasticWriter(AbstractWriter):
|
|
|
92
99
|
api_key=api_key,
|
|
93
100
|
request_timeout=request_timeout,
|
|
94
101
|
retry_on_timeout=True,
|
|
95
|
-
max_retries=max_retries,
|
|
102
|
+
max_retries=self.max_retries,
|
|
96
103
|
)
|
|
97
104
|
|
|
98
105
|
self.json_packer = JsonRecordPacker()
|
|
@@ -102,8 +109,6 @@ class ElasticWriter(AbstractWriter):
|
|
|
102
109
|
|
|
103
110
|
if not verify_certs:
|
|
104
111
|
# Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
|
|
105
|
-
import urllib3
|
|
106
|
-
|
|
107
112
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
108
113
|
|
|
109
114
|
self.metadata_fields = {}
|
|
@@ -112,10 +117,9 @@ class ElasticWriter(AbstractWriter):
|
|
|
112
117
|
self.metadata_fields[arg_key[6:]] = arg_val
|
|
113
118
|
|
|
114
119
|
def excepthook(self, exc: threading.ExceptHookArgs, *args, **kwargs) -> None:
|
|
115
|
-
log.error("Exception in thread: %s", exc)
|
|
116
120
|
self.exception = getattr(exc, "exc_value", exc)
|
|
121
|
+
self.exception = enrich_elastic_exception(self.exception)
|
|
117
122
|
self.event.set()
|
|
118
|
-
self.close()
|
|
119
123
|
|
|
120
124
|
def record_to_document(self, record: Record, index: str) -> dict:
|
|
121
125
|
"""Convert a record to a Elasticsearch compatible document dictionary"""
|
|
@@ -168,13 +172,13 @@ class ElasticWriter(AbstractWriter):
|
|
|
168
172
|
- https://elasticsearch-py.readthedocs.io/en/v8.17.1/helpers.html#elasticsearch.helpers.streaming_bulk
|
|
169
173
|
- https://github.com/elastic/elasticsearch-py/blob/main/elasticsearch/helpers/actions.py#L362
|
|
170
174
|
"""
|
|
175
|
+
|
|
171
176
|
for _ok, _item in elasticsearch.helpers.streaming_bulk(
|
|
172
177
|
self.es,
|
|
173
178
|
self.document_stream(),
|
|
174
179
|
raise_on_error=True,
|
|
175
180
|
raise_on_exception=True,
|
|
176
|
-
|
|
177
|
-
max_retries=3,
|
|
181
|
+
max_retries=self.max_retries,
|
|
178
182
|
):
|
|
179
183
|
pass
|
|
180
184
|
|
|
@@ -190,13 +194,17 @@ class ElasticWriter(AbstractWriter):
|
|
|
190
194
|
pass
|
|
191
195
|
|
|
192
196
|
def close(self) -> None:
|
|
193
|
-
self
|
|
194
|
-
|
|
197
|
+
if hasattr(self, "queue"):
|
|
198
|
+
self.queue.put(StopIteration)
|
|
199
|
+
|
|
200
|
+
if hasattr(self, "event"):
|
|
201
|
+
self.event.wait()
|
|
195
202
|
|
|
196
203
|
if hasattr(self, "es"):
|
|
197
|
-
|
|
204
|
+
with suppress(Exception):
|
|
205
|
+
self.es.close()
|
|
198
206
|
|
|
199
|
-
if self.exception:
|
|
207
|
+
if hasattr(self, "exception") and self.exception:
|
|
200
208
|
raise self.exception
|
|
201
209
|
|
|
202
210
|
|
|
@@ -216,8 +224,10 @@ class ElasticReader(AbstractReader):
|
|
|
216
224
|
self.index = index
|
|
217
225
|
self.uri = uri
|
|
218
226
|
self.selector = selector
|
|
219
|
-
verify_certs =
|
|
220
|
-
http_compress =
|
|
227
|
+
verify_certs = boolean_argument(verify_certs)
|
|
228
|
+
http_compress = boolean_argument(http_compress)
|
|
229
|
+
request_timeout = int(request_timeout)
|
|
230
|
+
max_retries = int(max_retries)
|
|
221
231
|
|
|
222
232
|
if not uri.lower().startswith(("http://", "https://")):
|
|
223
233
|
uri = "http://" + uri
|
|
@@ -234,11 +244,11 @@ class ElasticReader(AbstractReader):
|
|
|
234
244
|
|
|
235
245
|
if not verify_certs:
|
|
236
246
|
# Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
|
|
237
|
-
import urllib3
|
|
238
|
-
|
|
239
247
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
240
248
|
|
|
241
249
|
def __iter__(self) -> Iterator[Record]:
|
|
250
|
+
ctx = get_app_context()
|
|
251
|
+
selector = self.selector
|
|
242
252
|
res = self.es.search(index=self.index)
|
|
243
253
|
log.debug("ElasticSearch returned %u hits", res["hits"]["total"]["value"])
|
|
244
254
|
for hit in res["hits"]["hits"]:
|
|
@@ -248,9 +258,38 @@ class ElasticReader(AbstractReader):
|
|
|
248
258
|
fields = [(fieldtype_for_value(val, "string"), key) for key, val in source.items()]
|
|
249
259
|
desc = RecordDescriptor("elastic/record", fields)
|
|
250
260
|
obj = desc(**source)
|
|
251
|
-
if
|
|
261
|
+
if match_record_with_context(obj, selector, ctx):
|
|
252
262
|
yield obj
|
|
253
263
|
|
|
254
264
|
def close(self) -> None:
|
|
255
265
|
if hasattr(self, "es"):
|
|
256
266
|
self.es.close()
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def enrich_elastic_exception(exception: Exception) -> Exception:
|
|
270
|
+
"""Extend the exception with error information from Elastic.
|
|
271
|
+
|
|
272
|
+
Resources:
|
|
273
|
+
- https://elasticsearch-py.readthedocs.io/en/v8.17.1/exceptions.html
|
|
274
|
+
"""
|
|
275
|
+
errors = set()
|
|
276
|
+
if hasattr(exception, "errors"):
|
|
277
|
+
try:
|
|
278
|
+
for error in exception.errors:
|
|
279
|
+
index_dict = error.get("index", {})
|
|
280
|
+
status = index_dict.get("status")
|
|
281
|
+
error_dict = index_dict.get("error", {})
|
|
282
|
+
error_type = error_dict.get("type")
|
|
283
|
+
error_reason = error_dict.get("reason", "")
|
|
284
|
+
|
|
285
|
+
errors.add(f"({status} {error_type} {error_reason})")
|
|
286
|
+
except Exception:
|
|
287
|
+
errors.add("unable to extend errors")
|
|
288
|
+
|
|
289
|
+
# append errors to original exception message
|
|
290
|
+
error_str = ", ".join(errors)
|
|
291
|
+
original_message = exception.args[0] if exception.args else ""
|
|
292
|
+
new_message = f"{original_message} {error_str}"
|
|
293
|
+
exception.args = (new_message, *exception.args[1:])
|
|
294
|
+
|
|
295
|
+
return exception
|
|
@@ -6,9 +6,10 @@ from typing import TYPE_CHECKING, BinaryIO
|
|
|
6
6
|
from flow import record
|
|
7
7
|
from flow.record import JsonRecordPacker
|
|
8
8
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
9
|
+
from flow.record.context import get_app_context, match_record_with_context
|
|
9
10
|
from flow.record.fieldtypes import fieldtype_for_value
|
|
10
11
|
from flow.record.selector import make_selector
|
|
11
|
-
from flow.record.utils import is_stdout
|
|
12
|
+
from flow.record.utils import boolean_argument, is_stdout
|
|
12
13
|
|
|
13
14
|
if TYPE_CHECKING:
|
|
14
15
|
from collections.abc import Iterator
|
|
@@ -33,7 +34,7 @@ class JsonfileWriter(AbstractWriter):
|
|
|
33
34
|
def __init__(
|
|
34
35
|
self, path: str | Path | BinaryIO, indent: str | int | None = None, descriptors: bool = True, **kwargs
|
|
35
36
|
):
|
|
36
|
-
self.descriptors =
|
|
37
|
+
self.descriptors = boolean_argument(descriptors)
|
|
37
38
|
self.fp = record.open_path_or_stream(path, "w")
|
|
38
39
|
if isinstance(indent, str):
|
|
39
40
|
indent = int(indent)
|
|
@@ -75,10 +76,12 @@ class JsonfileReader(AbstractReader):
|
|
|
75
76
|
self.fp = None
|
|
76
77
|
|
|
77
78
|
def __iter__(self) -> Iterator[Record]:
|
|
79
|
+
ctx = get_app_context()
|
|
80
|
+
selector = self.selector
|
|
78
81
|
for line in self.fp:
|
|
79
82
|
obj = self.packer.unpack(line)
|
|
80
83
|
if isinstance(obj, record.Record):
|
|
81
|
-
if
|
|
84
|
+
if match_record_with_context(obj, selector, ctx):
|
|
82
85
|
yield obj
|
|
83
86
|
elif isinstance(obj, record.RecordDescriptor):
|
|
84
87
|
pass
|
|
@@ -90,5 +93,5 @@ class JsonfileReader(AbstractReader):
|
|
|
90
93
|
]
|
|
91
94
|
desc = record.RecordDescriptor("json/record", fields)
|
|
92
95
|
obj = desc(**jd)
|
|
93
|
-
if
|
|
96
|
+
if match_record_with_context(obj, selector, ctx):
|
|
94
97
|
yield obj
|
|
@@ -7,6 +7,7 @@ from pymongo import MongoClient
|
|
|
7
7
|
|
|
8
8
|
from flow import record
|
|
9
9
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
10
|
+
from flow.record.context import get_app_context, match_record_with_context
|
|
10
11
|
from flow.record.selector import make_selector
|
|
11
12
|
|
|
12
13
|
if TYPE_CHECKING:
|
|
@@ -91,6 +92,8 @@ class MongoReader(AbstractReader):
|
|
|
91
92
|
|
|
92
93
|
def __iter__(self) -> Iterator[Record]:
|
|
93
94
|
desc = None
|
|
95
|
+
ctx = get_app_context()
|
|
96
|
+
selector = self.selector
|
|
94
97
|
for r in self.collection.find():
|
|
95
98
|
if r["_type"] not in self.descriptors:
|
|
96
99
|
packed_desc = self.coll_descriptors.find({"name": r["_type"]})[0]["descriptor"]
|
|
@@ -106,5 +109,5 @@ class MongoReader(AbstractReader):
|
|
|
106
109
|
r[k] = int(r[k])
|
|
107
110
|
|
|
108
111
|
obj = desc(**r)
|
|
109
|
-
if
|
|
112
|
+
if match_record_with_context(obj, selector, ctx):
|
|
110
113
|
yield obj
|
|
@@ -18,7 +18,7 @@ except ImportError:
|
|
|
18
18
|
|
|
19
19
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
20
20
|
from flow.record.jsonpacker import JsonRecordPacker
|
|
21
|
-
from flow.record.utils import to_base64, to_bytes, to_str
|
|
21
|
+
from flow.record.utils import boolean_argument, to_base64, to_bytes, to_str
|
|
22
22
|
|
|
23
23
|
if TYPE_CHECKING:
|
|
24
24
|
from flow.record.base import Record
|
|
@@ -35,7 +35,7 @@ Write usage: rdump -w splunk+[PROTOCOL]://[IP]:[PORT]?tag=[TAG]&token=[TOKEN]&so
|
|
|
35
35
|
[SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTPS. Defaults to True.
|
|
36
36
|
"""
|
|
37
37
|
|
|
38
|
-
log = logging.getLogger(
|
|
38
|
+
log = logging.getLogger(__name__)
|
|
39
39
|
|
|
40
40
|
# Amount of records to bundle into a single request when sending data over HTTP(S).
|
|
41
41
|
RECORD_BUFFER_LIMIT = 20
|
|
@@ -218,7 +218,7 @@ class SplunkWriter(AbstractWriter):
|
|
|
218
218
|
self.token = f"Splunk {self.token}"
|
|
219
219
|
|
|
220
220
|
# Assume verify=True unless specified otherwise.
|
|
221
|
-
self.verify =
|
|
221
|
+
self.verify = boolean_argument(ssl_verify)
|
|
222
222
|
if not self.verify:
|
|
223
223
|
log.warning("Certificate verification is disabled")
|
|
224
224
|
|
|
@@ -9,6 +9,7 @@ from typing import TYPE_CHECKING
|
|
|
9
9
|
from flow.record import Record, RecordDescriptor
|
|
10
10
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
11
11
|
from flow.record.base import RESERVED_FIELDS, normalize_fieldname
|
|
12
|
+
from flow.record.context import get_app_context, match_record_with_context
|
|
12
13
|
from flow.record.selector import Selector, make_selector
|
|
13
14
|
|
|
14
15
|
if TYPE_CHECKING:
|
|
@@ -191,14 +192,16 @@ class SqliteReader(AbstractReader):
|
|
|
191
192
|
row[idx] = None
|
|
192
193
|
elif isinstance(value, str):
|
|
193
194
|
row[idx] = value.encode(errors="surrogateescape")
|
|
194
|
-
yield descriptor_cls.init_from_dict(dict(zip(fnames, row)))
|
|
195
|
+
yield descriptor_cls.init_from_dict(dict(zip(fnames, row, strict=False)))
|
|
195
196
|
|
|
196
197
|
def __iter__(self) -> Iterator[Record]:
|
|
197
198
|
"""Iterate over all tables in the database and yield records."""
|
|
199
|
+
ctx = get_app_context()
|
|
200
|
+
selector = self.selector
|
|
198
201
|
for table_name in self.table_names():
|
|
199
202
|
self.logger.debug("Reading table: %s", table_name)
|
|
200
203
|
for record in self.read_table(table_name):
|
|
201
|
-
if
|
|
204
|
+
if match_record_with_context(record, selector, ctx):
|
|
202
205
|
yield record
|
|
203
206
|
|
|
204
207
|
|