PyPI - flow.record - Versions diffs - 3.22.dev6__tar.gz → 3.22.dev8__tar.gz - Mend

flow.record 3.22.dev6tar.gz → 3.22.dev8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

{flow_record-3.22.dev6 → flow_record-3.22.dev8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: flow.record
-Version: 3.22.dev6
+Version: 3.22.dev8
 Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
 Author-email: Dissect Team <dissect@fox-it.com>
 License-Expression: AGPL-3.0-or-later

{flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/elastic.py RENAMED Viewed

@@ -1,14 +1,14 @@
 from __future__ import annotations
 import hashlib
+import json
 import logging
 import queue
+import sys
 import threading
 from contextlib import suppress
 from typing import TYPE_CHECKING
-import urllib3
 try:
     import elasticsearch
     import elasticsearch.helpers
@@ -85,7 +85,7 @@ class ElasticWriter(AbstractWriter):
         self.max_retries = int(max_retries)
         if not uri.lower().startswith(("http://", "https://")):
-            uri = "http://" + uri
+            uri = "https://" + uri
         self.queue: queue.Queue[Record | StopIteration] = queue.Queue(maxsize=queue_size)
         self.event = threading.Event()
@@ -95,6 +95,7 @@ class ElasticWriter(AbstractWriter):
         self.es = elasticsearch.Elasticsearch(
             uri,
             verify_certs=verify_certs,
+            ssl_show_warn=verify_certs,
             http_compress=http_compress,
             api_key=api_key,
             request_timeout=request_timeout,
@@ -107,10 +108,6 @@ class ElasticWriter(AbstractWriter):
         self.thread = threading.Thread(target=self.streaming_bulk_thread)
         self.thread.start()
-        if not verify_certs:
-            # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
-            urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
         self.metadata_fields = {}
         for arg_key, arg_val in kwargs.items():
             if arg_key.startswith("_meta_"):
@@ -118,7 +115,13 @@ class ElasticWriter(AbstractWriter):
     def excepthook(self, exc: threading.ExceptHookArgs, *args, **kwargs) -> None:
         self.exception = getattr(exc, "exc_value", exc)
-        self.exception = enrich_elastic_exception(self.exception)
+        # version guard for add_note(), which was added in Python 3.11
+        # TODO: Remove version guard after dropping support for Python 3.10
+        if sys.version_info >= (3, 11):
+            for note in create_elasticsearch_error_notes(getattr(self.exception, "errors", []), max_notes=5):
+                self.exception.add_note(note)
         self.event.set()
     def record_to_document(self, record: Record, index: str) -> dict:
@@ -230,11 +233,12 @@ class ElasticReader(AbstractReader):
         max_retries = int(max_retries)
         if not uri.lower().startswith(("http://", "https://")):
-            uri = "http://" + uri
+            uri = "https://" + uri
         self.es = elasticsearch.Elasticsearch(
             uri,
             verify_certs=verify_certs,
+            ssl_show_warn=verify_certs,
             http_compress=http_compress,
             api_key=api_key,
             request_timeout=request_timeout,
@@ -242,10 +246,6 @@ class ElasticReader(AbstractReader):
             max_retries=max_retries,
         )
-        if not verify_certs:
-            # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
-            urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
     def __iter__(self) -> Iterator[Record]:
         ctx = get_app_context()
         selector = self.selector
@@ -266,30 +266,69 @@ class ElasticReader(AbstractReader):
             self.es.close()
-def enrich_elastic_exception(exception: Exception) -> Exception:
-    """Extend the exception with error information from Elastic.
+def create_elasticsearch_error_notes(errors: list[dict] | dict, max_notes: int = 0) -> list[str]:
+    """Convert Elasticsearch Exception errors into pretty formatted notes.
     Resources:
         - https://elasticsearch-py.readthedocs.io/en/v8.17.1/exceptions.html
+    Arguments:
+        errors: A list of error items from an Elasticsearch exception, or a single error
+        max_notes: Maximum number of notes to create. If 0, all errors will be converted into notes.
+    Returns:
+        A list of formatted error notes.
     """
-    errors = set()
-    if hasattr(exception, "errors"):
+    if isinstance(errors, dict):
+        errors = [errors]
+    notes = []
+    for idx, error in enumerate(errors, 1):
+        # Extract index information
+        index = error.get("index", {})
+        index_name = index.get("_index", "unknown _index")
+        doc_id = index.get("_id", "unknown _id")
+        status = index.get("status")
+        # Extract error details
+        error = index.get("error", {})
+        error_type = error.get("type", "unknown error type")
+        error_reason = error.get("reason", "unknown reason")
+        # Create formatted note
+        note_parts = [
+            f"Error {idx}, {error_type!r} ({status=}):",
+            f"  index: {index_name}",
+            f"  document_id: {doc_id}",
+            f"  reason: {error_reason}",
+        ]
+        # Include caused_by information if available
+        if caused_by := error.get("caused_by"):
+            cause_type = caused_by.get("type")
+            cause_reason = caused_by.get("reason")
+            note_parts.append(f"  caused_by: {cause_type}, reason: {cause_reason}")
+        # Extract the record_descriptor name from the "data" field if possible
         try:
-            for error in exception.errors:
-                index_dict = error.get("index", {})
-                status = index_dict.get("status")
-                error_dict = index_dict.get("error", {})
-                error_type = error_dict.get("type")
-                error_reason = error_dict.get("reason", "")
-                errors.add(f"({status} {error_type} {error_reason})")
+            data = json.loads(index.get("data", "{}"))
+            record_metadata = data.pop("_record_metadata", {})
+            descriptor = record_metadata.get("descriptor", {})
+            if descriptor_name := descriptor.get("name"):
+                note_parts.append(f"  descriptor_name: {descriptor_name}")
+            if data:
+                note_parts.append(f"  data: {json.dumps(data)}")
         except Exception:
-            errors.add("unable to extend errors")
+            # failed to get descriptor_name and data, ignore
+            pass
+        notes.append("\n".join(note_parts) + "\n")
-    # append errors to original exception message
-    error_str = ", ".join(errors)
-    original_message = exception.args[0] if exception.args else ""
-    new_message = f"{original_message} {error_str}"
-    exception.args = (new_message, *exception.args[1:])
+        # if max_notes is reached, stop processing and add a final note about remaining errors
+        if max_notes > 0 and idx >= max_notes:
+            remaining = len(errors) - idx
+            if remaining > 0:
+                notes.append(f"... and {remaining} more error(s) not shown.")
+            break
-    return exception
+    return notes

{flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/tools/rdump.py RENAMED Viewed

@@ -321,8 +321,14 @@ def main(argv: list[str] | None = None) -> int:
         root_logger.handlers.clear()
         root_logger.addHandler(handler)
-    fields_to_exclude = args.exclude.split(",") if args.exclude else []
-    fields = args.fields.split(",") if args.fields else []
+    fields_to_exclude = list(filter(None, map(str.strip, args.exclude.split(",")))) if args.exclude else []
+    fields = list(filter(None, map(str.strip, args.fields.split(",")))) if args.fields else []
+    writer_options = {}
+    if fields:
+        writer_options["fields"] = fields
+    if fields_to_exclude:
+        writer_options["exclude"] = fields_to_exclude
     if args.list_adapters:
         list_adapters()
@@ -340,8 +346,6 @@ def main(argv: list[str] | None = None) -> int:
         }
         uri = mode_to_uri.get(args.mode, uri)
         qparams = {
-            "fields": args.fields,
-            "exclude": args.exclude,
             "format_spec": args.format,
         }
         query = urlencode({k: v for k, v in qparams.items() if v})
@@ -393,7 +397,7 @@ def main(argv: list[str] | None = None) -> int:
     ret = 0
     try:
-        with RecordWriter(uri) as record_writer:
+        with RecordWriter(uri, **writer_options) as record_writer:
             for count, rec in enumerate(record_iterator, start=1):  # noqa: B007
                 if args.record_source is not None:
                     rec._source = args.record_source
@@ -433,10 +437,17 @@ def main(argv: list[str] | None = None) -> int:
     return ret
-def print_error(e: Exception) -> None:
-    log.error("rdump encountered a fatal error: %s", e)
+def print_error(exc: Exception) -> None:
+    log.error("rdump encountered a fatal error: %s", exc)
     if log.isEnabledFor(LOGGING_TRACE_LEVEL):
-        log.exception("Full traceback")
+        raise
+    # Print any additional notes attached to the exception (e.g. from adapters) at warning level
+    for note in getattr(exc, "__notes__", []):
+        log.error(note)
+    log.warning("To show full traceback, run with -vvv")
 if __name__ == "__main__":

{flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/version.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '3.22.dev6'
-__version_tuple__ = version_tuple = (3, 22, 'dev6')
+__version__ = version = '3.22.dev8'
+__version_tuple__ = version_tuple = (3, 22, 'dev8')
-__commit_id__ = commit_id = 'g668138538'
+__commit_id__ = commit_id = 'g1ab6b5481'

{flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow.record.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: flow.record
-Version: 3.22.dev6
+Version: 3.22.dev8
 Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
 Author-email: Dissect Team <dissect@fox-it.com>
 License-Expression: AGPL-3.0-or-later

flow_record-3.22.dev8/tests/adapter/test_elastic.py ADDED Viewed

@@ -0,0 +1,208 @@
+# ruff: noqa: E501
+from __future__ import annotations
+import json
+import sys
+from typing import TYPE_CHECKING
+import pytest
+from elasticsearch.helpers import BulkIndexError
+from flow.record import RecordDescriptor
+from flow.record.adapter.elastic import ElasticWriter, create_elasticsearch_error_notes
+if TYPE_CHECKING:
+    from flow.record.base import Record
+MyRecord = RecordDescriptor(
+    "my/record",
+    [
+        ("string", "field_one"),
+        ("string", "field_two"),
+    ],
+)
+@pytest.mark.parametrize(
+    "record",
+    [
+        MyRecord("first", "record"),
+        MyRecord("second", "record"),
+    ],
+)
+def test_elastic_writer_metadata(record: Record) -> None:
+    options = {
+        "_meta_foo": "some value",
+        "_meta_bar": "another value",
+    }
+    with ElasticWriter(uri="elasticsearch:9200", **options) as writer:
+        assert writer.metadata_fields == {"foo": "some value", "bar": "another value"}
+        assert writer.record_to_document(record, "some-index") == {
+            "_index": "some-index",
+            "_source": json.dumps(
+                {
+                    "field_one": record.field_one,
+                    "field_two": record.field_two,
+                    "_record_metadata": {
+                        "descriptor": {
+                            "name": "my/record",
+                            "hash": record._desc.descriptor_hash,
+                        },
+                        "source": None,
+                        "classification": None,
+                        "generated": record._generated.isoformat(),
+                        "version": 1,
+                        "foo": "some value",
+                        "bar": "another value",
+                    },
+                }
+            ),
+        }
+def test_elastic_writer_metadata_exception() -> None:
+    with ElasticWriter(uri="elasticsearch:9200") as writer:
+        writer.excepthook(
+            BulkIndexError(
+                "1 document(s) failed to index.",
+                errors=[
+                    {
+                        "index": {
+                            "_index": "example-index",
+                            "_id": "bWFkZSB5b3UgbG9vayDwn5GA",
+                            "status": 400,
+                            "error": {
+                                "type": "document_parsing_exception",
+                                "reason": "[1:225] failed to parse field [example] of type [long] in document with id "
+                                "'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'",
+                                "caused_by": {
+                                    "type": "illegal_argument_exception",
+                                    "reason": 'For input string: "Foo"',
+                                },
+                            },
+                            "data": '{"example":"Foo","_record_metadata":{"descriptor":{"name":"example/record",'
+                            '"hash":1234567890},"source":"/path/to/source","classification":null,'
+                            '"generated":"2025-12-31T12:34:56.789012+00:00","version":1}}',
+                        }
+                    }
+                ],
+            )
+        )
+        with pytest.raises(BulkIndexError) as exc_info:
+            writer.__exit__()
+        writer.exception = None
+        exception = exc_info.value
+        assert isinstance(exception, BulkIndexError)
+        # version guard for __notes__ attribute, which was added in Python 3.11
+        # TODO: Remove after we drop support for Python 3.10
+        if sys.version_info >= (3, 11):
+            assert exception.__notes__ == [
+                """\
+Error 1, 'document_parsing_exception' (status=400):
+  index: example-index
+  document_id: bWFkZSB5b3UgbG9vayDwn5GA
+  reason: [1:225] failed to parse field [example] of type [long] in document with id 'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'
+  caused_by: illegal_argument_exception, reason: For input string: "Foo"
+  descriptor_name: example/record
+  data: {"example": "Foo"}
+"""
+            ]
+def test_create_elastic_notes() -> None:
+    exception = BulkIndexError(
+        "1 document(s) failed to index.",
+        errors=[
+            {
+                "index": {
+                    "_index": "example-index",
+                    "_id": "bWFkZSB5b3UgbG9vayDwn5GA",
+                    "status": 400,
+                    "error": {
+                        "type": "document_parsing_exception",
+                        "reason": "[1:225] failed to parse field [example] of type [long] in document with id "
+                        "'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'",
+                        "caused_by": {
+                            "type": "illegal_argument_exception",
+                            "reason": 'For input string: "Foo"',
+                        },
+                    },
+                    "data": '{"example":"Foo","_record_metadata":{"descriptor":{"name":"example/record",'
+                    '"hash":1234567890},"source":"/path/to/source","classification":null,'
+                    '"generated":"2025-12-31T12:34:56.789012+00:00","version":1}}',
+                },
+            },
+            {
+                "index": {
+                    "_index": "my-index",
+                    "_id": "4XuIRpwBbjwxMKSCr8TE",
+                    "status": 400,
+                    "error": {
+                        "type": "document_parsing_exception",
+                        "reason": "[1:150] failed to parse field [content] of type [date] in document with id '4XuIRpwBbjwxMKSCr8TE'. Preview of field's value: 'This is the content of a sampe pastebin record'",
+                        "caused_by": {
+                            "type": "illegal_argument_exception",
+                            "reason": "failed to parse date field [This is the content of a sampe pastebin record] with format [strict_date_optional_time||epoch_millis]",
+                            "caused_by": {
+                                "type": "date_time_parse_exception",
+                                "reason": "Failed to parse with all enclosed parsers",
+                            },
+                        },
+                    },
+                    "data": '{"key": "Q42eWSaF", "date": "2019-03-19T09:09:47+00:00", "expire_date": "1970-01-01T00:00:00+00:00", "title": "A sample pastebin record", "content": "This is the content of a sampe pastebin record", "user": "", "syntax": "text", "_record_metadata": {"descriptor": {"name": "text/paste", "hash": 831446724}, "source": "external/pastebin", "classification": "PUBLIC", "generated": "2019-03-19T09:11:04.706581+00:00", "version": 1}}',
+                }
+            },
+        ],
+    )
+    errors = exception.errors
+    assert len(errors) == 2
+    # Test with max_notes=1, which should only include the first error and a summary note about the remaining errors
+    notes = create_elasticsearch_error_notes(errors, max_notes=1)
+    assert len(notes) == 2
+    assert (
+        notes[0]
+        == """\
+Error 1, 'document_parsing_exception' (status=400):
+  index: example-index
+  document_id: bWFkZSB5b3UgbG9vayDwn5GA
+  reason: [1:225] failed to parse field [example] of type [long] in document with id 'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'
+  caused_by: illegal_argument_exception, reason: For input string: "Foo"
+  descriptor_name: example/record
+  data: {"example": "Foo"}
+"""
+    )
+    assert notes[-1] == "... and 1 more error(s) not shown."
+    # Test with max_notes=2, which should show both errors without the summary note
+    notes = create_elasticsearch_error_notes(errors, max_notes=2)
+    assert len(notes) == 2
+    assert (
+        notes[0]
+        == """\
+Error 1, 'document_parsing_exception' (status=400):
+  index: example-index
+  document_id: bWFkZSB5b3UgbG9vayDwn5GA
+  reason: [1:225] failed to parse field [example] of type [long] in document with id 'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'
+  caused_by: illegal_argument_exception, reason: For input string: "Foo"
+  descriptor_name: example/record
+  data: {"example": "Foo"}
+"""
+    )
+    assert (
+        notes[1]
+        == """\
+Error 2, 'document_parsing_exception' (status=400):
+  index: my-index
+  document_id: 4XuIRpwBbjwxMKSCr8TE
+  reason: [1:150] failed to parse field [content] of type [date] in document with id '4XuIRpwBbjwxMKSCr8TE'. Preview of field's value: 'This is the content of a sampe pastebin record'
+  caused_by: illegal_argument_exception, reason: failed to parse date field [This is the content of a sampe pastebin record] with format [strict_date_optional_time||epoch_millis]
+  descriptor_name: text/paste
+  data: {"key": "Q42eWSaF", "date": "2019-03-19T09:09:47+00:00", "expire_date": "1970-01-01T00:00:00+00:00", "title": "A sample pastebin record", "content": "This is the content of a sampe pastebin record", "user": "", "syntax": "text"}
+"""
+    )

{flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/tools/test_rdump.py RENAMED Viewed

@@ -20,6 +20,7 @@ from flow.record import RecordDescriptor, RecordReader, RecordWriter
 from flow.record.adapter.line import field_types_for_record_descriptor
 from flow.record.fieldtypes import flow_record_tz
 from flow.record.tools import rdump
+from flow.record.utils import LOGGING_TRACE_LEVEL
 from tests._utils import generate_plain_records
@@ -870,3 +871,73 @@ def test_rdump_invalid_stdin_pipe(stdin_bytes: bytes) -> None:
     assert pipe.returncode == 1, "rdump should exit with error code 1 on invalid input"
     assert b"rdump encountered a fatal error: Could not find adapter for file-like object" in stderr
     assert b"Processed 0 records (matched=0, unmatched=0)" in stdout
+@pytest.mark.skipif(sys.version_info < (3, 11), reason="skip on python 3.10 or lower")
+def test_rdump_print_error_notes(
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture,
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    """Test that rdump prints error notes when an exception occurs."""
+    path = tmp_path / "test.records"
+    path.touch()  # create an empty file
+    exc = ValueError("something went wrong")
+    exc.add_note("Check the input format")
+    with mock.patch("flow.record.tools.rdump.RecordWriter", side_effect=exc):
+        rdump.main([str(path)])
+    _out, err = capsys.readouterr()
+    assert "something went wrong" in err
+    assert "Check the input format" in err
+    assert "To show full traceback, run with -vvv" in err
+    # with full traceback
+    with (
+        caplog.at_level(LOGGING_TRACE_LEVEL),
+        mock.patch("flow.record.tools.rdump.RecordWriter", side_effect=exc),
+        pytest.raises(ValueError, match="something went wrong\nCheck the input format"),
+    ):
+        rdump.main([str(path), "-vvv"])
+    capsys.readouterr()
+def test_rdump_fields_with_spaces(tmp_path: Path, capsysbinary: pytest.CaptureFixture) -> None:
+    """Test if rdump handles spaces in field names gracefully."""
+    TestRecord = RecordDescriptor(
+        "test/record",
+        [
+            ("varint", "count"),
+            ("string", "foo"),
+            ("string", "bar"),
+        ],
+    )
+    path = tmp_path / "test.records"
+    out_path = tmp_path / "out.records"
+    with RecordWriter(path) as writer:
+        writer.write(TestRecord(count=0, foo="bar", bar="baz"))
+    # test if fields works with spaces in the name
+    rdump.main([str(path), "--fields", "foo, count  ", "-w", str(out_path)])
+    with RecordReader(out_path) as reader:
+        records = list(reader)
+    assert len(records) == 1
+    assert list(records[0]._desc.fields.keys()) == ["foo", "count"]
+    # test if exclude works with spaces in the field names
+    rdump.main([str(path), "--exclude", "  foo,   bar  ", "-w", str(out_path)])
+    with RecordReader(out_path) as reader:
+        records = list(reader)
+    assert len(records) == 1
+    assert list(records[0]._desc.fields.keys()) == ["count"]
+    # also test an adapter
+    rdump.main([str(path), "--exclude", "  foo,   bar  ", "--csv"])
+    captured = capsysbinary.readouterr()
+    assert captured.err == b""
+    assert b"count,_source,_classification,_generated,_version\r\n" in captured.out

flow_record-3.22.dev6/tests/adapter/test_elastic.py DELETED Viewed

@@ -1,59 +0,0 @@
-from __future__ import annotations
-import json
-from typing import TYPE_CHECKING
-import pytest
-from flow.record import RecordDescriptor
-from flow.record.adapter.elastic import ElasticWriter
-if TYPE_CHECKING:
-    from flow.record.base import Record
-MyRecord = RecordDescriptor(
-    "my/record",
-    [
-        ("string", "field_one"),
-        ("string", "field_two"),
-    ],
-)
-@pytest.mark.parametrize(
-    "record",
-    [
-        MyRecord("first", "record"),
-        MyRecord("second", "record"),
-    ],
-)
-def test_elastic_writer_metadata(record: Record) -> None:
-    options = {
-        "_meta_foo": "some value",
-        "_meta_bar": "another value",
-    }
-    with ElasticWriter(uri="elasticsearch:9200", **options) as writer:
-        assert writer.metadata_fields == {"foo": "some value", "bar": "another value"}
-        assert writer.record_to_document(record, "some-index") == {
-            "_index": "some-index",
-            "_source": json.dumps(
-                {
-                    "field_one": record.field_one,
-                    "field_two": record.field_two,
-                    "_record_metadata": {
-                        "descriptor": {
-                            "name": "my/record",
-                            "hash": record._desc.descriptor_hash,
-                        },
-                        "source": None,
-                        "classification": None,
-                        "generated": record._generated.isoformat(),
-                        "version": 1,
-                        "foo": "some value",
-                        "bar": "another value",
-                    },
-                }
-            ),
-        }