flow.record 3.21.dev4__tar.gz → 3.21.dev6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/PKG-INFO +3 -1
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/csvfile.py +3 -1
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/elastic.py +46 -9
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/splunk.py +1 -1
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/base.py +1 -1
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/jsonpacker.py +1 -1
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/stream.py +17 -6
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/tools/rdump.py +60 -6
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/utils.py +2 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/version.py +2 -2
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow.record.egg-info/PKG-INFO +3 -1
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow.record.egg-info/requires.txt +2 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/pyproject.toml +3 -1
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_rdump.py +0 -1
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_regression.py +10 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tox.ini +1 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/.git-blame-ignore-revs +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/COPYRIGHT +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/LICENSE +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/MANIFEST.in +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/README.md +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/examples/__init__.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/examples/filesystem.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/examples/passivedns.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/examples/records.json +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/examples/tcpconn.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/__init__.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/__init__.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/archive.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/avro.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/broker.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/duckdb.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/jsonfile.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/line.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/mongo.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/split.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/sqlite.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/stream.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/text.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/adapter/xlsx.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/exceptions.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/fieldtypes/__init__.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/fieldtypes/credential.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/fieldtypes/net/__init__.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/fieldtypes/net/ip.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/fieldtypes/net/ipv4.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/fieldtypes/net/tcp.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/fieldtypes/net/udp.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/packer.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/selector.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/tools/__init__.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/tools/geoip.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow/record/whitelist.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow.record.egg-info/SOURCES.txt +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow.record.egg-info/entry_points.txt +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/flow.record.egg-info/top_level.txt +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/setup.cfg +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/__init__.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/_utils.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/docs/Makefile +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/docs/conf.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/docs/index.rst +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/selector_explain_example.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/standalone_test.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_adapter_line.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_adapter_text.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_avro.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_avro_adapter.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_compiled_selector.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_csv_adapter.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_deprecations.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_elastic_adapter.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_fieldtype_ip.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_fieldtypes.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_json_packer.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_json_record_adapter.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_multi_timestamp.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_packer.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_record.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_record_adapter.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_record_descriptor.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_selector.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_splunk_adapter.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_sqlite_duckdb_adapter.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_utils.py +0 -0
- {flow_record-3.21.dev4 → flow_record-3.21.dev6}/tests/test_xlsx_adapter.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.21.
|
|
3
|
+
Version: 3.21.dev6
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -47,9 +47,11 @@ Requires-Dist: flow.record[xlsx]; extra == "test"
|
|
|
47
47
|
Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
48
48
|
Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
49
49
|
Requires-Dist: tqdm; extra == "test"
|
|
50
|
+
Requires-Dist: structlog; extra == "test"
|
|
50
51
|
Provides-Extra: full
|
|
51
52
|
Requires-Dist: flow.record[compression]; extra == "full"
|
|
52
53
|
Requires-Dist: tqdm; extra == "full"
|
|
54
|
+
Requires-Dist: structlog; extra == "full"
|
|
53
55
|
Dynamic: license-file
|
|
54
56
|
|
|
55
57
|
# flow.record
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import contextlib
|
|
3
4
|
import csv
|
|
4
5
|
import sys
|
|
5
6
|
from pathlib import Path
|
|
@@ -89,7 +90,8 @@ class CsvfileReader(AbstractReader):
|
|
|
89
90
|
|
|
90
91
|
self.dialect = "excel"
|
|
91
92
|
if self.fp.seekable():
|
|
92
|
-
|
|
93
|
+
with contextlib.suppress(csv.Error):
|
|
94
|
+
self.dialect = csv.Sniffer().sniff(self.fp.read(1024))
|
|
93
95
|
self.fp.seek(0)
|
|
94
96
|
self.reader = csv.reader(self.fp, dialect=self.dialect)
|
|
95
97
|
|
|
@@ -4,6 +4,7 @@ import hashlib
|
|
|
4
4
|
import logging
|
|
5
5
|
import queue
|
|
6
6
|
import threading
|
|
7
|
+
from contextlib import suppress
|
|
7
8
|
from typing import TYPE_CHECKING
|
|
8
9
|
|
|
9
10
|
import urllib3
|
|
@@ -79,6 +80,8 @@ class ElasticWriter(AbstractWriter):
|
|
|
79
80
|
http_compress = boolean_argument(http_compress)
|
|
80
81
|
self.hash_record = boolean_argument(hash_record)
|
|
81
82
|
queue_size = int(queue_size)
|
|
83
|
+
request_timeout = int(request_timeout)
|
|
84
|
+
self.max_retries = int(max_retries)
|
|
82
85
|
|
|
83
86
|
if not uri.lower().startswith(("http://", "https://")):
|
|
84
87
|
uri = "http://" + uri
|
|
@@ -95,7 +98,7 @@ class ElasticWriter(AbstractWriter):
|
|
|
95
98
|
api_key=api_key,
|
|
96
99
|
request_timeout=request_timeout,
|
|
97
100
|
retry_on_timeout=True,
|
|
98
|
-
max_retries=max_retries,
|
|
101
|
+
max_retries=self.max_retries,
|
|
99
102
|
)
|
|
100
103
|
|
|
101
104
|
self.json_packer = JsonRecordPacker()
|
|
@@ -113,10 +116,9 @@ class ElasticWriter(AbstractWriter):
|
|
|
113
116
|
self.metadata_fields[arg_key[6:]] = arg_val
|
|
114
117
|
|
|
115
118
|
def excepthook(self, exc: threading.ExceptHookArgs, *args, **kwargs) -> None:
|
|
116
|
-
log.error("Exception in thread: %s", exc)
|
|
117
119
|
self.exception = getattr(exc, "exc_value", exc)
|
|
120
|
+
self.exception = enrich_elastic_exception(self.exception)
|
|
118
121
|
self.event.set()
|
|
119
|
-
self.close()
|
|
120
122
|
|
|
121
123
|
def record_to_document(self, record: Record, index: str) -> dict:
|
|
122
124
|
"""Convert a record to a Elasticsearch compatible document dictionary"""
|
|
@@ -169,13 +171,13 @@ class ElasticWriter(AbstractWriter):
|
|
|
169
171
|
- https://elasticsearch-py.readthedocs.io/en/v8.17.1/helpers.html#elasticsearch.helpers.streaming_bulk
|
|
170
172
|
- https://github.com/elastic/elasticsearch-py/blob/main/elasticsearch/helpers/actions.py#L362
|
|
171
173
|
"""
|
|
174
|
+
|
|
172
175
|
for _ok, _item in elasticsearch.helpers.streaming_bulk(
|
|
173
176
|
self.es,
|
|
174
177
|
self.document_stream(),
|
|
175
178
|
raise_on_error=True,
|
|
176
179
|
raise_on_exception=True,
|
|
177
|
-
|
|
178
|
-
max_retries=3,
|
|
180
|
+
max_retries=self.max_retries,
|
|
179
181
|
):
|
|
180
182
|
pass
|
|
181
183
|
|
|
@@ -191,13 +193,17 @@ class ElasticWriter(AbstractWriter):
|
|
|
191
193
|
pass
|
|
192
194
|
|
|
193
195
|
def close(self) -> None:
|
|
194
|
-
self
|
|
195
|
-
|
|
196
|
+
if hasattr(self, "queue"):
|
|
197
|
+
self.queue.put(StopIteration)
|
|
198
|
+
|
|
199
|
+
if hasattr(self, "event"):
|
|
200
|
+
self.event.wait()
|
|
196
201
|
|
|
197
202
|
if hasattr(self, "es"):
|
|
198
|
-
|
|
203
|
+
with suppress(Exception):
|
|
204
|
+
self.es.close()
|
|
199
205
|
|
|
200
|
-
if self.exception:
|
|
206
|
+
if hasattr(self, "exception") and self.exception:
|
|
201
207
|
raise self.exception
|
|
202
208
|
|
|
203
209
|
|
|
@@ -219,6 +225,8 @@ class ElasticReader(AbstractReader):
|
|
|
219
225
|
self.selector = selector
|
|
220
226
|
verify_certs = boolean_argument(verify_certs)
|
|
221
227
|
http_compress = boolean_argument(http_compress)
|
|
228
|
+
request_timeout = int(request_timeout)
|
|
229
|
+
max_retries = int(max_retries)
|
|
222
230
|
|
|
223
231
|
if not uri.lower().startswith(("http://", "https://")):
|
|
224
232
|
uri = "http://" + uri
|
|
@@ -253,3 +261,32 @@ class ElasticReader(AbstractReader):
|
|
|
253
261
|
def close(self) -> None:
|
|
254
262
|
if hasattr(self, "es"):
|
|
255
263
|
self.es.close()
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def enrich_elastic_exception(exception: Exception) -> Exception:
|
|
267
|
+
"""Extend the exception with error information from Elastic.
|
|
268
|
+
|
|
269
|
+
Resources:
|
|
270
|
+
- https://elasticsearch-py.readthedocs.io/en/v8.17.1/exceptions.html
|
|
271
|
+
"""
|
|
272
|
+
errors = set()
|
|
273
|
+
if hasattr(exception, "errors"):
|
|
274
|
+
try:
|
|
275
|
+
for error in exception.errors:
|
|
276
|
+
index_dict = error.get("index", {})
|
|
277
|
+
status = index_dict.get("status")
|
|
278
|
+
error_dict = index_dict.get("error", {})
|
|
279
|
+
error_type = error_dict.get("type")
|
|
280
|
+
error_reason = error_dict.get("reason", "")
|
|
281
|
+
|
|
282
|
+
errors.add(f"({status} {error_type} {error_reason})")
|
|
283
|
+
except Exception:
|
|
284
|
+
errors.add("unable to extend errors")
|
|
285
|
+
|
|
286
|
+
# append errors to original exception message
|
|
287
|
+
error_str = ", ".join(errors)
|
|
288
|
+
original_message = exception.args[0] if exception.args else ""
|
|
289
|
+
new_message = f"{original_message} {error_str}"
|
|
290
|
+
exception.args = (new_message,) + exception.args[1:]
|
|
291
|
+
|
|
292
|
+
return exception
|
|
@@ -35,7 +35,7 @@ Write usage: rdump -w splunk+[PROTOCOL]://[IP]:[PORT]?tag=[TAG]&token=[TOKEN]&so
|
|
|
35
35
|
[SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTPS. Defaults to True.
|
|
36
36
|
"""
|
|
37
37
|
|
|
38
|
-
log = logging.getLogger(
|
|
38
|
+
log = logging.getLogger(__name__)
|
|
39
39
|
|
|
40
40
|
# Amount of records to bundle into a single request when sending data over HTTP(S).
|
|
41
41
|
RECORD_BUFFER_LIMIT = 20
|
|
@@ -11,7 +11,7 @@ from flow.record.base import Record, RecordDescriptor
|
|
|
11
11
|
from flow.record.exceptions import RecordDescriptorNotFound
|
|
12
12
|
from flow.record.utils import EventHandler
|
|
13
13
|
|
|
14
|
-
log = logging.getLogger(
|
|
14
|
+
log = logging.getLogger(__name__)
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class JsonRecordPacker:
|
|
@@ -15,14 +15,14 @@ from flow.record.base import Record, RecordDescriptor, RecordReader
|
|
|
15
15
|
from flow.record.fieldtypes import fieldtype_for_value
|
|
16
16
|
from flow.record.packer import RecordPacker
|
|
17
17
|
from flow.record.selector import make_selector
|
|
18
|
-
from flow.record.utils import is_stdout
|
|
18
|
+
from flow.record.utils import LOGGING_TRACE_LEVEL, is_stdout
|
|
19
19
|
|
|
20
20
|
if TYPE_CHECKING:
|
|
21
21
|
from collections.abc import Iterator
|
|
22
22
|
|
|
23
23
|
from flow.record.adapter import AbstractWriter
|
|
24
24
|
|
|
25
|
-
log = logging.getLogger(
|
|
25
|
+
log = logging.getLogger(__name__)
|
|
26
26
|
|
|
27
27
|
aRepr = reprlib.Repr()
|
|
28
28
|
aRepr.maxother = 255
|
|
@@ -146,8 +146,11 @@ class RecordStreamReader:
|
|
|
146
146
|
def record_stream(sources: list[str], selector: str | None = None) -> Iterator[Record]:
|
|
147
147
|
"""Return a Record stream generator from the given Record sources.
|
|
148
148
|
|
|
149
|
-
|
|
149
|
+
If there are multiple sources, exceptions are caught and logged, and the stream continues with the next source.
|
|
150
150
|
"""
|
|
151
|
+
|
|
152
|
+
trace = log.isEnabledFor(LOGGING_TRACE_LEVEL)
|
|
153
|
+
|
|
151
154
|
log.debug("Record stream with selector: %r", selector)
|
|
152
155
|
for src in sources:
|
|
153
156
|
# Inform user that we are reading from stdin
|
|
@@ -161,12 +164,20 @@ def record_stream(sources: list[str], selector: str | None = None) -> Iterator[R
|
|
|
161
164
|
yield from reader
|
|
162
165
|
reader.close()
|
|
163
166
|
except IOError as e:
|
|
164
|
-
|
|
167
|
+
if len(sources) == 1:
|
|
168
|
+
raise
|
|
169
|
+
else:
|
|
170
|
+
log.error("%s(%r): %s", reader, src, e)
|
|
171
|
+
if trace:
|
|
172
|
+
log.exception("Full traceback")
|
|
165
173
|
except KeyboardInterrupt:
|
|
166
174
|
raise
|
|
167
175
|
except Exception as e:
|
|
168
|
-
|
|
169
|
-
|
|
176
|
+
if len(sources) == 1:
|
|
177
|
+
raise
|
|
178
|
+
else:
|
|
179
|
+
log.warning("Exception in %r for %r: %s -- skipping to next reader", reader, src, aRepr.repr(e))
|
|
180
|
+
continue
|
|
170
181
|
|
|
171
182
|
|
|
172
183
|
class PathTemplateWriter:
|
|
@@ -15,7 +15,7 @@ import flow.record.adapter
|
|
|
15
15
|
from flow.record import RecordWriter, iter_timestamped_records, record_stream
|
|
16
16
|
from flow.record.selector import make_selector
|
|
17
17
|
from flow.record.stream import RecordFieldRewriter
|
|
18
|
-
from flow.record.utils import catch_sigpipe
|
|
18
|
+
from flow.record.utils import LOGGING_TRACE_LEVEL, catch_sigpipe
|
|
19
19
|
|
|
20
20
|
try:
|
|
21
21
|
from flow.record.version import version
|
|
@@ -30,6 +30,15 @@ try:
|
|
|
30
30
|
except ImportError:
|
|
31
31
|
HAS_TQDM = False
|
|
32
32
|
|
|
33
|
+
try:
|
|
34
|
+
import structlog
|
|
35
|
+
|
|
36
|
+
HAS_STRUCTLOG = True
|
|
37
|
+
|
|
38
|
+
except ImportError:
|
|
39
|
+
HAS_STRUCTLOG = False
|
|
40
|
+
|
|
41
|
+
|
|
33
42
|
log = logging.getLogger(__name__)
|
|
34
43
|
|
|
35
44
|
|
|
@@ -129,6 +138,11 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
129
138
|
action="store_true",
|
|
130
139
|
help="Show progress bar (requires tqdm)",
|
|
131
140
|
)
|
|
141
|
+
output.add_argument(
|
|
142
|
+
"--stats",
|
|
143
|
+
action="store_true",
|
|
144
|
+
help="Show count of processed records",
|
|
145
|
+
)
|
|
132
146
|
|
|
133
147
|
advanced = parser.add_argument_group("advanced")
|
|
134
148
|
advanced.add_argument(
|
|
@@ -195,10 +209,30 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
195
209
|
|
|
196
210
|
args = parser.parse_args(argv)
|
|
197
211
|
|
|
198
|
-
levels = [logging.WARNING, logging.INFO, logging.DEBUG]
|
|
212
|
+
levels = [logging.WARNING, logging.INFO, logging.DEBUG, LOGGING_TRACE_LEVEL]
|
|
199
213
|
level = levels[min(len(levels) - 1, args.verbose)]
|
|
200
214
|
logging.basicConfig(level=level, format="%(asctime)s %(levelname)s %(message)s")
|
|
201
215
|
|
|
216
|
+
if HAS_STRUCTLOG:
|
|
217
|
+
# We have structlog, configure Python logging to use it for rendering
|
|
218
|
+
console_renderer = structlog.dev.ConsoleRenderer()
|
|
219
|
+
handler = logging.StreamHandler()
|
|
220
|
+
handler.setFormatter(
|
|
221
|
+
structlog.stdlib.ProcessorFormatter(
|
|
222
|
+
processor=console_renderer,
|
|
223
|
+
foreign_pre_chain=[
|
|
224
|
+
structlog.stdlib.add_logger_name,
|
|
225
|
+
structlog.stdlib.add_log_level,
|
|
226
|
+
structlog.processors.TimeStamper(fmt="iso"),
|
|
227
|
+
],
|
|
228
|
+
)
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Clear existing handlers and add our structlog handler
|
|
232
|
+
root_logger = logging.getLogger()
|
|
233
|
+
root_logger.handlers.clear()
|
|
234
|
+
root_logger.addHandler(handler)
|
|
235
|
+
|
|
202
236
|
fields_to_exclude = args.exclude.split(",") if args.exclude else []
|
|
203
237
|
fields = args.fields.split(",") if args.fields else []
|
|
204
238
|
|
|
@@ -252,6 +286,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
252
286
|
|
|
253
287
|
count = 0
|
|
254
288
|
record_writer = None
|
|
289
|
+
ret = 0
|
|
255
290
|
|
|
256
291
|
try:
|
|
257
292
|
record_writer = RecordWriter(uri)
|
|
@@ -279,14 +314,33 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
279
314
|
else:
|
|
280
315
|
record_writer.write(rec)
|
|
281
316
|
|
|
317
|
+
except Exception as e:
|
|
318
|
+
print_error(e)
|
|
319
|
+
|
|
320
|
+
# Prevent throwing an exception twice when deconstructing the record writer.
|
|
321
|
+
if hasattr(record_writer, "exception") and record_writer.exception is e:
|
|
322
|
+
record_writer.exception = None
|
|
323
|
+
|
|
324
|
+
ret = 1
|
|
325
|
+
|
|
282
326
|
finally:
|
|
283
327
|
if record_writer:
|
|
284
|
-
|
|
328
|
+
# Exceptions raised in threads can be thrown when deconstructing the writer.
|
|
329
|
+
try:
|
|
330
|
+
record_writer.__exit__()
|
|
331
|
+
except Exception as e:
|
|
332
|
+
print_error(e)
|
|
333
|
+
|
|
334
|
+
if (args.list or args.stats) and not args.progress:
|
|
335
|
+
print(f"Processed {count} records", file=sys.stdout if args.list else sys.stderr)
|
|
336
|
+
|
|
337
|
+
return ret
|
|
285
338
|
|
|
286
|
-
if args.list:
|
|
287
|
-
print(f"Processed {count} records")
|
|
288
339
|
|
|
289
|
-
|
|
340
|
+
def print_error(e: Exception) -> None:
|
|
341
|
+
log.error("rdump encountered a fatal error: %s", e)
|
|
342
|
+
if log.isEnabledFor(LOGGING_TRACE_LEVEL):
|
|
343
|
+
log.exception("Full traceback")
|
|
290
344
|
|
|
291
345
|
|
|
292
346
|
if __name__ == "__main__":
|
|
@@ -17,5 +17,5 @@ __version__: str
|
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
|
18
18
|
version_tuple: VERSION_TUPLE
|
|
19
19
|
|
|
20
|
-
__version__ = version = '3.21.
|
|
21
|
-
__version_tuple__ = version_tuple = (3, 21, '
|
|
20
|
+
__version__ = version = '3.21.dev6'
|
|
21
|
+
__version_tuple__ = version_tuple = (3, 21, 'dev6')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.21.
|
|
3
|
+
Version: 3.21.dev6
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -47,9 +47,11 @@ Requires-Dist: flow.record[xlsx]; extra == "test"
|
|
|
47
47
|
Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
48
48
|
Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
49
49
|
Requires-Dist: tqdm; extra == "test"
|
|
50
|
+
Requires-Dist: structlog; extra == "test"
|
|
50
51
|
Provides-Extra: full
|
|
51
52
|
Requires-Dist: flow.record[compression]; extra == "full"
|
|
52
53
|
Requires-Dist: tqdm; extra == "full"
|
|
54
|
+
Requires-Dist: structlog; extra == "full"
|
|
53
55
|
Dynamic: license-file
|
|
54
56
|
|
|
55
57
|
# flow.record
|
|
@@ -23,6 +23,7 @@ elasticsearch
|
|
|
23
23
|
[full]
|
|
24
24
|
flow.record[compression]
|
|
25
25
|
tqdm
|
|
26
|
+
structlog
|
|
26
27
|
|
|
27
28
|
[geoip]
|
|
28
29
|
maxminddb
|
|
@@ -36,6 +37,7 @@ flow.record[avro]
|
|
|
36
37
|
flow.record[elastic]
|
|
37
38
|
flow.record[xlsx]
|
|
38
39
|
tqdm
|
|
40
|
+
structlog
|
|
39
41
|
|
|
40
42
|
[test:platform_python_implementation != "PyPy" and python_version < "3.12"]
|
|
41
43
|
duckdb
|
|
@@ -67,10 +67,12 @@ test = [
|
|
|
67
67
|
"duckdb; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
|
|
68
68
|
"pytz; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
|
|
69
69
|
"tqdm",
|
|
70
|
+
"structlog",
|
|
70
71
|
]
|
|
71
72
|
full = [
|
|
72
73
|
"flow.record[compression]",
|
|
73
74
|
"tqdm",
|
|
75
|
+
"structlog",
|
|
74
76
|
]
|
|
75
77
|
|
|
76
78
|
[project.scripts]
|
|
@@ -120,7 +122,7 @@ select = [
|
|
|
120
122
|
"FURB",
|
|
121
123
|
"RUF",
|
|
122
124
|
]
|
|
123
|
-
ignore = ["E203", "B904", "UP024", "ANN002", "ANN003", "ANN204", "ANN401", "SIM105", "TRY003"]
|
|
125
|
+
ignore = ["E203", "B904", "UP024", "ANN002", "ANN003", "ANN204", "ANN401", "SIM105", "TRY003", "TRY400"]
|
|
124
126
|
|
|
125
127
|
[tool.ruff.lint.per-file-ignores]
|
|
126
128
|
"tests/docs/**" = ["INP001"]
|
|
@@ -720,4 +720,3 @@ def test_rdump_list_progress(tmp_path: Path, capsys: pytest.CaptureFixture) -> N
|
|
|
720
720
|
|
|
721
721
|
# stdout should contain the RecordDescriptor definition and count
|
|
722
722
|
assert "# <RecordDescriptor test/rdump/progress, hash=eeb21156>" in captured.out
|
|
723
|
-
assert "Processed 100 records" in captured.out
|
|
@@ -711,5 +711,15 @@ def test_rdump_selected_fields(capsysbinary: pytest.CaptureFixture) -> None:
|
|
|
711
711
|
assert captured.out == b"Q42eWSaF,A sample pastebin record,text\r\n"
|
|
712
712
|
|
|
713
713
|
|
|
714
|
+
def test_rdump_csv_sniff(tmp_path: Path, capsysbinary: pytest.CaptureFixture) -> None:
|
|
715
|
+
csv_path = tmp_path / "test.csv"
|
|
716
|
+
csv_path.write_text("ip,common_name,vulnerable\n127.0.0.1,localhost,1\n192.168.4.20,")
|
|
717
|
+
rdump.main([str(csv_path)])
|
|
718
|
+
|
|
719
|
+
captured = capsysbinary.readouterr()
|
|
720
|
+
assert b"<csv/reader ip='127.0.0.1' common_name='localhost' vulnerable='1'>" in captured.out
|
|
721
|
+
assert b"<csv/reader ip='192.168.4.20' common_name='' vulnerable=None>" in captured.out
|
|
722
|
+
|
|
723
|
+
|
|
714
724
|
if __name__ == "__main__":
|
|
715
725
|
__import__("standalone_test").main(globals())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|