flow.record 3.22.dev6__tar.gz → 3.22.dev7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/PKG-INFO +1 -1
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/elastic.py +71 -32
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/tools/rdump.py +10 -3
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/version.py +3 -3
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow.record.egg-info/PKG-INFO +1 -1
- flow_record-3.22.dev7/tests/adapter/test_elastic.py +208 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/tools/test_rdump.py +34 -0
- flow_record-3.22.dev6/tests/adapter/test_elastic.py +0 -59
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/.git-blame-ignore-revs +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/.gitattributes +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/COPYRIGHT +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/LICENSE +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/MANIFEST.in +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/README.md +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/examples/__init__.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/examples/filesystem.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/examples/passivedns.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/examples/records.json +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/examples/selectors.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/examples/tcpconn.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/__init__.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/__init__.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/archive.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/avro.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/broker.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/csvfile.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/duckdb.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/jsonfile.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/line.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/mongo.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/split.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/splunk.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/sqlite.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/stream.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/text.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/adapter/xlsx.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/base.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/context.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/exceptions.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/fieldtypes/__init__.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/fieldtypes/credential.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/fieldtypes/net/__init__.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/fieldtypes/net/ip.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/fieldtypes/net/ipv4.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/fieldtypes/net/tcp.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/fieldtypes/net/udp.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/jsonpacker.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/packer.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/selector.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/stream.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/tools/__init__.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/tools/geoip.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/utils.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow/record/whitelist.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow.record.egg-info/SOURCES.txt +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow.record.egg-info/entry_points.txt +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow.record.egg-info/requires.txt +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/flow.record.egg-info/top_level.txt +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/pyproject.toml +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/setup.cfg +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/__init__.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/_data/.gitkeep +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/_docs/Makefile +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/_docs/conf.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/_docs/index.rst +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/_utils.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/adapter/__init__.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/adapter/test_avro.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/adapter/test_csv.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/adapter/test_json.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/adapter/test_line.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/adapter/test_splunk.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/adapter/test_sqlite_duckdb.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/adapter/test_text.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/adapter/test_xlsx.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/conftest.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/fieldtypes/__init__.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/fieldtypes/test_boolean.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/fieldtypes/test_fieldtypes.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/fieldtypes/test_ip.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/packer/__init__.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/packer/test_json_packer.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/packer/test_packer.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/record/__init__.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/record/test_adapter.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/record/test_context.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/record/test_descriptor.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/record/test_multi_timestamp.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/record/test_record.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/selector/__init__.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/selector/test_compiled.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/selector/test_selectors.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/test_deprecations.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/test_regressions.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/test_utils.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tests/tools/__init__.py +0 -0
- {flow_record-3.22.dev6 → flow_record-3.22.dev7}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.22.
|
|
3
|
+
Version: 3.22.dev7
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
|
+
import json
|
|
4
5
|
import logging
|
|
5
6
|
import queue
|
|
7
|
+
import sys
|
|
6
8
|
import threading
|
|
7
9
|
from contextlib import suppress
|
|
8
10
|
from typing import TYPE_CHECKING
|
|
9
11
|
|
|
10
|
-
import urllib3
|
|
11
|
-
|
|
12
12
|
try:
|
|
13
13
|
import elasticsearch
|
|
14
14
|
import elasticsearch.helpers
|
|
@@ -85,7 +85,7 @@ class ElasticWriter(AbstractWriter):
|
|
|
85
85
|
self.max_retries = int(max_retries)
|
|
86
86
|
|
|
87
87
|
if not uri.lower().startswith(("http://", "https://")):
|
|
88
|
-
uri = "
|
|
88
|
+
uri = "https://" + uri
|
|
89
89
|
|
|
90
90
|
self.queue: queue.Queue[Record | StopIteration] = queue.Queue(maxsize=queue_size)
|
|
91
91
|
self.event = threading.Event()
|
|
@@ -95,6 +95,7 @@ class ElasticWriter(AbstractWriter):
|
|
|
95
95
|
self.es = elasticsearch.Elasticsearch(
|
|
96
96
|
uri,
|
|
97
97
|
verify_certs=verify_certs,
|
|
98
|
+
ssl_show_warn=verify_certs,
|
|
98
99
|
http_compress=http_compress,
|
|
99
100
|
api_key=api_key,
|
|
100
101
|
request_timeout=request_timeout,
|
|
@@ -107,10 +108,6 @@ class ElasticWriter(AbstractWriter):
|
|
|
107
108
|
self.thread = threading.Thread(target=self.streaming_bulk_thread)
|
|
108
109
|
self.thread.start()
|
|
109
110
|
|
|
110
|
-
if not verify_certs:
|
|
111
|
-
# Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
|
|
112
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
113
|
-
|
|
114
111
|
self.metadata_fields = {}
|
|
115
112
|
for arg_key, arg_val in kwargs.items():
|
|
116
113
|
if arg_key.startswith("_meta_"):
|
|
@@ -118,7 +115,13 @@ class ElasticWriter(AbstractWriter):
|
|
|
118
115
|
|
|
119
116
|
def excepthook(self, exc: threading.ExceptHookArgs, *args, **kwargs) -> None:
|
|
120
117
|
self.exception = getattr(exc, "exc_value", exc)
|
|
121
|
-
|
|
118
|
+
|
|
119
|
+
# version guard for add_note(), which was added in Python 3.11
|
|
120
|
+
# TODO: Remove version guard after dropping support for Python 3.10
|
|
121
|
+
if sys.version_info >= (3, 11):
|
|
122
|
+
for note in create_elasticsearch_error_notes(getattr(self.exception, "errors", []), max_notes=5):
|
|
123
|
+
self.exception.add_note(note)
|
|
124
|
+
|
|
122
125
|
self.event.set()
|
|
123
126
|
|
|
124
127
|
def record_to_document(self, record: Record, index: str) -> dict:
|
|
@@ -230,11 +233,12 @@ class ElasticReader(AbstractReader):
|
|
|
230
233
|
max_retries = int(max_retries)
|
|
231
234
|
|
|
232
235
|
if not uri.lower().startswith(("http://", "https://")):
|
|
233
|
-
uri = "
|
|
236
|
+
uri = "https://" + uri
|
|
234
237
|
|
|
235
238
|
self.es = elasticsearch.Elasticsearch(
|
|
236
239
|
uri,
|
|
237
240
|
verify_certs=verify_certs,
|
|
241
|
+
ssl_show_warn=verify_certs,
|
|
238
242
|
http_compress=http_compress,
|
|
239
243
|
api_key=api_key,
|
|
240
244
|
request_timeout=request_timeout,
|
|
@@ -242,10 +246,6 @@ class ElasticReader(AbstractReader):
|
|
|
242
246
|
max_retries=max_retries,
|
|
243
247
|
)
|
|
244
248
|
|
|
245
|
-
if not verify_certs:
|
|
246
|
-
# Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
|
|
247
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
248
|
-
|
|
249
249
|
def __iter__(self) -> Iterator[Record]:
|
|
250
250
|
ctx = get_app_context()
|
|
251
251
|
selector = self.selector
|
|
@@ -266,30 +266,69 @@ class ElasticReader(AbstractReader):
|
|
|
266
266
|
self.es.close()
|
|
267
267
|
|
|
268
268
|
|
|
269
|
-
def
|
|
270
|
-
"""
|
|
269
|
+
def create_elasticsearch_error_notes(errors: list[dict] | dict, max_notes: int = 0) -> list[str]:
|
|
270
|
+
"""Convert Elasticsearch Exception errors into pretty formatted notes.
|
|
271
271
|
|
|
272
272
|
Resources:
|
|
273
273
|
- https://elasticsearch-py.readthedocs.io/en/v8.17.1/exceptions.html
|
|
274
|
+
|
|
275
|
+
Arguments:
|
|
276
|
+
errors: A list of error items from an Elasticsearch exception, or a single error
|
|
277
|
+
max_notes: Maximum number of notes to create. If 0, all errors will be converted into notes.
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
A list of formatted error notes.
|
|
274
281
|
"""
|
|
275
|
-
errors
|
|
276
|
-
|
|
282
|
+
if isinstance(errors, dict):
|
|
283
|
+
errors = [errors]
|
|
284
|
+
|
|
285
|
+
notes = []
|
|
286
|
+
for idx, error in enumerate(errors, 1):
|
|
287
|
+
# Extract index information
|
|
288
|
+
index = error.get("index", {})
|
|
289
|
+
index_name = index.get("_index", "unknown _index")
|
|
290
|
+
doc_id = index.get("_id", "unknown _id")
|
|
291
|
+
status = index.get("status")
|
|
292
|
+
|
|
293
|
+
# Extract error details
|
|
294
|
+
error = index.get("error", {})
|
|
295
|
+
error_type = error.get("type", "unknown error type")
|
|
296
|
+
error_reason = error.get("reason", "unknown reason")
|
|
297
|
+
|
|
298
|
+
# Create formatted note
|
|
299
|
+
note_parts = [
|
|
300
|
+
f"Error {idx}, {error_type!r} ({status=}):",
|
|
301
|
+
f" index: {index_name}",
|
|
302
|
+
f" document_id: {doc_id}",
|
|
303
|
+
f" reason: {error_reason}",
|
|
304
|
+
]
|
|
305
|
+
|
|
306
|
+
# Include caused_by information if available
|
|
307
|
+
if caused_by := error.get("caused_by"):
|
|
308
|
+
cause_type = caused_by.get("type")
|
|
309
|
+
cause_reason = caused_by.get("reason")
|
|
310
|
+
note_parts.append(f" caused_by: {cause_type}, reason: {cause_reason}")
|
|
311
|
+
|
|
312
|
+
# Extract the record_descriptor name from the "data" field if possible
|
|
277
313
|
try:
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
errors.add(f"({status} {error_type} {error_reason})")
|
|
314
|
+
data = json.loads(index.get("data", "{}"))
|
|
315
|
+
record_metadata = data.pop("_record_metadata", {})
|
|
316
|
+
descriptor = record_metadata.get("descriptor", {})
|
|
317
|
+
if descriptor_name := descriptor.get("name"):
|
|
318
|
+
note_parts.append(f" descriptor_name: {descriptor_name}")
|
|
319
|
+
if data:
|
|
320
|
+
note_parts.append(f" data: {json.dumps(data)}")
|
|
286
321
|
except Exception:
|
|
287
|
-
|
|
322
|
+
# failed to get descriptor_name and data, ignore
|
|
323
|
+
pass
|
|
324
|
+
|
|
325
|
+
notes.append("\n".join(note_parts) + "\n")
|
|
288
326
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
327
|
+
# if max_notes is reached, stop processing and add a final note about remaining errors
|
|
328
|
+
if max_notes > 0 and idx >= max_notes:
|
|
329
|
+
remaining = len(errors) - idx
|
|
330
|
+
if remaining > 0:
|
|
331
|
+
notes.append(f"... and {remaining} more error(s) not shown.")
|
|
332
|
+
break
|
|
294
333
|
|
|
295
|
-
return
|
|
334
|
+
return notes
|
|
@@ -433,10 +433,17 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
433
433
|
return ret
|
|
434
434
|
|
|
435
435
|
|
|
436
|
-
def print_error(
|
|
437
|
-
log.error("rdump encountered a fatal error: %s",
|
|
436
|
+
def print_error(exc: Exception) -> None:
|
|
437
|
+
log.error("rdump encountered a fatal error: %s", exc)
|
|
438
|
+
|
|
438
439
|
if log.isEnabledFor(LOGGING_TRACE_LEVEL):
|
|
439
|
-
|
|
440
|
+
raise
|
|
441
|
+
|
|
442
|
+
# Print any additional notes attached to the exception (e.g. from adapters) at warning level
|
|
443
|
+
for note in getattr(exc, "__notes__", []):
|
|
444
|
+
log.error(note)
|
|
445
|
+
|
|
446
|
+
log.warning("To show full traceback, run with -vvv")
|
|
440
447
|
|
|
441
448
|
|
|
442
449
|
if __name__ == "__main__":
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '3.22.
|
|
32
|
-
__version_tuple__ = version_tuple = (3, 22, '
|
|
31
|
+
__version__ = version = '3.22.dev7'
|
|
32
|
+
__version_tuple__ = version_tuple = (3, 22, 'dev7')
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'ge9118cf22'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.22.
|
|
3
|
+
Version: 3.22.dev7
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# ruff: noqa: E501
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import sys
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
from elasticsearch.helpers import BulkIndexError
|
|
10
|
+
|
|
11
|
+
from flow.record import RecordDescriptor
|
|
12
|
+
from flow.record.adapter.elastic import ElasticWriter, create_elasticsearch_error_notes
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from flow.record.base import Record
|
|
16
|
+
|
|
17
|
+
MyRecord = RecordDescriptor(
|
|
18
|
+
"my/record",
|
|
19
|
+
[
|
|
20
|
+
("string", "field_one"),
|
|
21
|
+
("string", "field_two"),
|
|
22
|
+
],
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@pytest.mark.parametrize(
|
|
27
|
+
"record",
|
|
28
|
+
[
|
|
29
|
+
MyRecord("first", "record"),
|
|
30
|
+
MyRecord("second", "record"),
|
|
31
|
+
],
|
|
32
|
+
)
|
|
33
|
+
def test_elastic_writer_metadata(record: Record) -> None:
|
|
34
|
+
options = {
|
|
35
|
+
"_meta_foo": "some value",
|
|
36
|
+
"_meta_bar": "another value",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
with ElasticWriter(uri="elasticsearch:9200", **options) as writer:
|
|
40
|
+
assert writer.metadata_fields == {"foo": "some value", "bar": "another value"}
|
|
41
|
+
|
|
42
|
+
assert writer.record_to_document(record, "some-index") == {
|
|
43
|
+
"_index": "some-index",
|
|
44
|
+
"_source": json.dumps(
|
|
45
|
+
{
|
|
46
|
+
"field_one": record.field_one,
|
|
47
|
+
"field_two": record.field_two,
|
|
48
|
+
"_record_metadata": {
|
|
49
|
+
"descriptor": {
|
|
50
|
+
"name": "my/record",
|
|
51
|
+
"hash": record._desc.descriptor_hash,
|
|
52
|
+
},
|
|
53
|
+
"source": None,
|
|
54
|
+
"classification": None,
|
|
55
|
+
"generated": record._generated.isoformat(),
|
|
56
|
+
"version": 1,
|
|
57
|
+
"foo": "some value",
|
|
58
|
+
"bar": "another value",
|
|
59
|
+
},
|
|
60
|
+
}
|
|
61
|
+
),
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_elastic_writer_metadata_exception() -> None:
|
|
66
|
+
with ElasticWriter(uri="elasticsearch:9200") as writer:
|
|
67
|
+
writer.excepthook(
|
|
68
|
+
BulkIndexError(
|
|
69
|
+
"1 document(s) failed to index.",
|
|
70
|
+
errors=[
|
|
71
|
+
{
|
|
72
|
+
"index": {
|
|
73
|
+
"_index": "example-index",
|
|
74
|
+
"_id": "bWFkZSB5b3UgbG9vayDwn5GA",
|
|
75
|
+
"status": 400,
|
|
76
|
+
"error": {
|
|
77
|
+
"type": "document_parsing_exception",
|
|
78
|
+
"reason": "[1:225] failed to parse field [example] of type [long] in document with id "
|
|
79
|
+
"'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'",
|
|
80
|
+
"caused_by": {
|
|
81
|
+
"type": "illegal_argument_exception",
|
|
82
|
+
"reason": 'For input string: "Foo"',
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
"data": '{"example":"Foo","_record_metadata":{"descriptor":{"name":"example/record",'
|
|
86
|
+
'"hash":1234567890},"source":"/path/to/source","classification":null,'
|
|
87
|
+
'"generated":"2025-12-31T12:34:56.789012+00:00","version":1}}',
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
],
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
with pytest.raises(BulkIndexError) as exc_info:
|
|
95
|
+
writer.__exit__()
|
|
96
|
+
|
|
97
|
+
writer.exception = None
|
|
98
|
+
exception = exc_info.value
|
|
99
|
+
assert isinstance(exception, BulkIndexError)
|
|
100
|
+
|
|
101
|
+
# version guard for __notes__ attribute, which was added in Python 3.11
|
|
102
|
+
# TODO: Remove after we drop support for Python 3.10
|
|
103
|
+
if sys.version_info >= (3, 11):
|
|
104
|
+
assert exception.__notes__ == [
|
|
105
|
+
"""\
|
|
106
|
+
Error 1, 'document_parsing_exception' (status=400):
|
|
107
|
+
index: example-index
|
|
108
|
+
document_id: bWFkZSB5b3UgbG9vayDwn5GA
|
|
109
|
+
reason: [1:225] failed to parse field [example] of type [long] in document with id 'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'
|
|
110
|
+
caused_by: illegal_argument_exception, reason: For input string: "Foo"
|
|
111
|
+
descriptor_name: example/record
|
|
112
|
+
data: {"example": "Foo"}
|
|
113
|
+
"""
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def test_create_elastic_notes() -> None:
|
|
118
|
+
exception = BulkIndexError(
|
|
119
|
+
"1 document(s) failed to index.",
|
|
120
|
+
errors=[
|
|
121
|
+
{
|
|
122
|
+
"index": {
|
|
123
|
+
"_index": "example-index",
|
|
124
|
+
"_id": "bWFkZSB5b3UgbG9vayDwn5GA",
|
|
125
|
+
"status": 400,
|
|
126
|
+
"error": {
|
|
127
|
+
"type": "document_parsing_exception",
|
|
128
|
+
"reason": "[1:225] failed to parse field [example] of type [long] in document with id "
|
|
129
|
+
"'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'",
|
|
130
|
+
"caused_by": {
|
|
131
|
+
"type": "illegal_argument_exception",
|
|
132
|
+
"reason": 'For input string: "Foo"',
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
"data": '{"example":"Foo","_record_metadata":{"descriptor":{"name":"example/record",'
|
|
136
|
+
'"hash":1234567890},"source":"/path/to/source","classification":null,'
|
|
137
|
+
'"generated":"2025-12-31T12:34:56.789012+00:00","version":1}}',
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"index": {
|
|
142
|
+
"_index": "my-index",
|
|
143
|
+
"_id": "4XuIRpwBbjwxMKSCr8TE",
|
|
144
|
+
"status": 400,
|
|
145
|
+
"error": {
|
|
146
|
+
"type": "document_parsing_exception",
|
|
147
|
+
"reason": "[1:150] failed to parse field [content] of type [date] in document with id '4XuIRpwBbjwxMKSCr8TE'. Preview of field's value: 'This is the content of a sampe pastebin record'",
|
|
148
|
+
"caused_by": {
|
|
149
|
+
"type": "illegal_argument_exception",
|
|
150
|
+
"reason": "failed to parse date field [This is the content of a sampe pastebin record] with format [strict_date_optional_time||epoch_millis]",
|
|
151
|
+
"caused_by": {
|
|
152
|
+
"type": "date_time_parse_exception",
|
|
153
|
+
"reason": "Failed to parse with all enclosed parsers",
|
|
154
|
+
},
|
|
155
|
+
},
|
|
156
|
+
},
|
|
157
|
+
"data": '{"key": "Q42eWSaF", "date": "2019-03-19T09:09:47+00:00", "expire_date": "1970-01-01T00:00:00+00:00", "title": "A sample pastebin record", "content": "This is the content of a sampe pastebin record", "user": "", "syntax": "text", "_record_metadata": {"descriptor": {"name": "text/paste", "hash": 831446724}, "source": "external/pastebin", "classification": "PUBLIC", "generated": "2019-03-19T09:11:04.706581+00:00", "version": 1}}',
|
|
158
|
+
}
|
|
159
|
+
},
|
|
160
|
+
],
|
|
161
|
+
)
|
|
162
|
+
errors = exception.errors
|
|
163
|
+
assert len(errors) == 2
|
|
164
|
+
|
|
165
|
+
# Test with max_notes=1, which should only include the first error and a summary note about the remaining errors
|
|
166
|
+
notes = create_elasticsearch_error_notes(errors, max_notes=1)
|
|
167
|
+
assert len(notes) == 2
|
|
168
|
+
assert (
|
|
169
|
+
notes[0]
|
|
170
|
+
== """\
|
|
171
|
+
Error 1, 'document_parsing_exception' (status=400):
|
|
172
|
+
index: example-index
|
|
173
|
+
document_id: bWFkZSB5b3UgbG9vayDwn5GA
|
|
174
|
+
reason: [1:225] failed to parse field [example] of type [long] in document with id 'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'
|
|
175
|
+
caused_by: illegal_argument_exception, reason: For input string: "Foo"
|
|
176
|
+
descriptor_name: example/record
|
|
177
|
+
data: {"example": "Foo"}
|
|
178
|
+
"""
|
|
179
|
+
)
|
|
180
|
+
assert notes[-1] == "... and 1 more error(s) not shown."
|
|
181
|
+
|
|
182
|
+
# Test with max_notes=2, which should show both errors without the summary note
|
|
183
|
+
notes = create_elasticsearch_error_notes(errors, max_notes=2)
|
|
184
|
+
assert len(notes) == 2
|
|
185
|
+
assert (
|
|
186
|
+
notes[0]
|
|
187
|
+
== """\
|
|
188
|
+
Error 1, 'document_parsing_exception' (status=400):
|
|
189
|
+
index: example-index
|
|
190
|
+
document_id: bWFkZSB5b3UgbG9vayDwn5GA
|
|
191
|
+
reason: [1:225] failed to parse field [example] of type [long] in document with id 'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'
|
|
192
|
+
caused_by: illegal_argument_exception, reason: For input string: "Foo"
|
|
193
|
+
descriptor_name: example/record
|
|
194
|
+
data: {"example": "Foo"}
|
|
195
|
+
"""
|
|
196
|
+
)
|
|
197
|
+
assert (
|
|
198
|
+
notes[1]
|
|
199
|
+
== """\
|
|
200
|
+
Error 2, 'document_parsing_exception' (status=400):
|
|
201
|
+
index: my-index
|
|
202
|
+
document_id: 4XuIRpwBbjwxMKSCr8TE
|
|
203
|
+
reason: [1:150] failed to parse field [content] of type [date] in document with id '4XuIRpwBbjwxMKSCr8TE'. Preview of field's value: 'This is the content of a sampe pastebin record'
|
|
204
|
+
caused_by: illegal_argument_exception, reason: failed to parse date field [This is the content of a sampe pastebin record] with format [strict_date_optional_time||epoch_millis]
|
|
205
|
+
descriptor_name: text/paste
|
|
206
|
+
data: {"key": "Q42eWSaF", "date": "2019-03-19T09:09:47+00:00", "expire_date": "1970-01-01T00:00:00+00:00", "title": "A sample pastebin record", "content": "This is the content of a sampe pastebin record", "user": "", "syntax": "text"}
|
|
207
|
+
"""
|
|
208
|
+
)
|
|
@@ -20,6 +20,7 @@ from flow.record import RecordDescriptor, RecordReader, RecordWriter
|
|
|
20
20
|
from flow.record.adapter.line import field_types_for_record_descriptor
|
|
21
21
|
from flow.record.fieldtypes import flow_record_tz
|
|
22
22
|
from flow.record.tools import rdump
|
|
23
|
+
from flow.record.utils import LOGGING_TRACE_LEVEL
|
|
23
24
|
from tests._utils import generate_plain_records
|
|
24
25
|
|
|
25
26
|
|
|
@@ -870,3 +871,36 @@ def test_rdump_invalid_stdin_pipe(stdin_bytes: bytes) -> None:
|
|
|
870
871
|
assert pipe.returncode == 1, "rdump should exit with error code 1 on invalid input"
|
|
871
872
|
assert b"rdump encountered a fatal error: Could not find adapter for file-like object" in stderr
|
|
872
873
|
assert b"Processed 0 records (matched=0, unmatched=0)" in stdout
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
@pytest.mark.skipif(sys.version_info < (3, 11), reason="skip on python 3.10 or lower")
|
|
877
|
+
def test_rdump_print_error_notes(
|
|
878
|
+
tmp_path: Path,
|
|
879
|
+
capsys: pytest.CaptureFixture,
|
|
880
|
+
caplog: pytest.LogCaptureFixture,
|
|
881
|
+
) -> None:
|
|
882
|
+
"""Test that rdump prints error notes when an exception occurs."""
|
|
883
|
+
|
|
884
|
+
path = tmp_path / "test.records"
|
|
885
|
+
path.touch() # create an empty file
|
|
886
|
+
|
|
887
|
+
exc = ValueError("something went wrong")
|
|
888
|
+
exc.add_note("Check the input format")
|
|
889
|
+
|
|
890
|
+
with mock.patch("flow.record.tools.rdump.RecordWriter", side_effect=exc):
|
|
891
|
+
rdump.main([str(path)])
|
|
892
|
+
_out, err = capsys.readouterr()
|
|
893
|
+
|
|
894
|
+
assert "something went wrong" in err
|
|
895
|
+
assert "Check the input format" in err
|
|
896
|
+
assert "To show full traceback, run with -vvv" in err
|
|
897
|
+
|
|
898
|
+
# with full traceback
|
|
899
|
+
with (
|
|
900
|
+
caplog.at_level(LOGGING_TRACE_LEVEL),
|
|
901
|
+
mock.patch("flow.record.tools.rdump.RecordWriter", side_effect=exc),
|
|
902
|
+
pytest.raises(ValueError, match="something went wrong\nCheck the input format"),
|
|
903
|
+
):
|
|
904
|
+
rdump.main([str(path), "-vvv"])
|
|
905
|
+
|
|
906
|
+
capsys.readouterr()
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
from typing import TYPE_CHECKING
|
|
5
|
-
|
|
6
|
-
import pytest
|
|
7
|
-
|
|
8
|
-
from flow.record import RecordDescriptor
|
|
9
|
-
from flow.record.adapter.elastic import ElasticWriter
|
|
10
|
-
|
|
11
|
-
if TYPE_CHECKING:
|
|
12
|
-
from flow.record.base import Record
|
|
13
|
-
|
|
14
|
-
MyRecord = RecordDescriptor(
|
|
15
|
-
"my/record",
|
|
16
|
-
[
|
|
17
|
-
("string", "field_one"),
|
|
18
|
-
("string", "field_two"),
|
|
19
|
-
],
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
@pytest.mark.parametrize(
|
|
24
|
-
"record",
|
|
25
|
-
[
|
|
26
|
-
MyRecord("first", "record"),
|
|
27
|
-
MyRecord("second", "record"),
|
|
28
|
-
],
|
|
29
|
-
)
|
|
30
|
-
def test_elastic_writer_metadata(record: Record) -> None:
|
|
31
|
-
options = {
|
|
32
|
-
"_meta_foo": "some value",
|
|
33
|
-
"_meta_bar": "another value",
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
with ElasticWriter(uri="elasticsearch:9200", **options) as writer:
|
|
37
|
-
assert writer.metadata_fields == {"foo": "some value", "bar": "another value"}
|
|
38
|
-
|
|
39
|
-
assert writer.record_to_document(record, "some-index") == {
|
|
40
|
-
"_index": "some-index",
|
|
41
|
-
"_source": json.dumps(
|
|
42
|
-
{
|
|
43
|
-
"field_one": record.field_one,
|
|
44
|
-
"field_two": record.field_two,
|
|
45
|
-
"_record_metadata": {
|
|
46
|
-
"descriptor": {
|
|
47
|
-
"name": "my/record",
|
|
48
|
-
"hash": record._desc.descriptor_hash,
|
|
49
|
-
},
|
|
50
|
-
"source": None,
|
|
51
|
-
"classification": None,
|
|
52
|
-
"generated": record._generated.isoformat(),
|
|
53
|
-
"version": 1,
|
|
54
|
-
"foo": "some value",
|
|
55
|
-
"bar": "another value",
|
|
56
|
-
},
|
|
57
|
-
}
|
|
58
|
-
),
|
|
59
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|