flow.record 3.22.dev6__tar.gz → 3.22.dev8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/PKG-INFO +1 -1
  2. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/elastic.py +71 -32
  3. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/tools/rdump.py +19 -8
  4. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/version.py +3 -3
  5. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow.record.egg-info/PKG-INFO +1 -1
  6. flow_record-3.22.dev8/tests/adapter/test_elastic.py +208 -0
  7. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/tools/test_rdump.py +71 -0
  8. flow_record-3.22.dev6/tests/adapter/test_elastic.py +0 -59
  9. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/.git-blame-ignore-revs +0 -0
  10. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/.gitattributes +0 -0
  11. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/COPYRIGHT +0 -0
  12. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/LICENSE +0 -0
  13. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/MANIFEST.in +0 -0
  14. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/README.md +0 -0
  15. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/examples/__init__.py +0 -0
  16. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/examples/filesystem.py +0 -0
  17. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/examples/passivedns.py +0 -0
  18. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/examples/records.json +0 -0
  19. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/examples/selectors.py +0 -0
  20. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/examples/tcpconn.py +0 -0
  21. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/__init__.py +0 -0
  22. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/__init__.py +0 -0
  23. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/archive.py +0 -0
  24. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/avro.py +0 -0
  25. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/broker.py +0 -0
  26. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/csvfile.py +0 -0
  27. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/duckdb.py +0 -0
  28. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/jsonfile.py +0 -0
  29. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/line.py +0 -0
  30. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/mongo.py +0 -0
  31. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/split.py +0 -0
  32. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/splunk.py +0 -0
  33. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/sqlite.py +0 -0
  34. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/stream.py +0 -0
  35. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/text.py +0 -0
  36. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/adapter/xlsx.py +0 -0
  37. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/base.py +0 -0
  38. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/context.py +0 -0
  39. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/exceptions.py +0 -0
  40. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/fieldtypes/__init__.py +0 -0
  41. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/fieldtypes/credential.py +0 -0
  42. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/fieldtypes/net/__init__.py +0 -0
  43. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/fieldtypes/net/ip.py +0 -0
  44. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/fieldtypes/net/ipv4.py +0 -0
  45. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/fieldtypes/net/tcp.py +0 -0
  46. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/fieldtypes/net/udp.py +0 -0
  47. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/jsonpacker.py +0 -0
  48. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/packer.py +0 -0
  49. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/selector.py +0 -0
  50. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/stream.py +0 -0
  51. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/tools/__init__.py +0 -0
  52. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/tools/geoip.py +0 -0
  53. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/utils.py +0 -0
  54. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow/record/whitelist.py +0 -0
  55. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow.record.egg-info/SOURCES.txt +0 -0
  56. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow.record.egg-info/dependency_links.txt +0 -0
  57. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow.record.egg-info/entry_points.txt +0 -0
  58. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow.record.egg-info/requires.txt +0 -0
  59. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/flow.record.egg-info/top_level.txt +0 -0
  60. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/pyproject.toml +0 -0
  61. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/setup.cfg +0 -0
  62. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/__init__.py +0 -0
  63. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/_data/.gitkeep +0 -0
  64. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/_docs/Makefile +0 -0
  65. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/_docs/conf.py +0 -0
  66. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/_docs/index.rst +0 -0
  67. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/_utils.py +0 -0
  68. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/adapter/__init__.py +0 -0
  69. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/adapter/test_avro.py +0 -0
  70. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/adapter/test_csv.py +0 -0
  71. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/adapter/test_json.py +0 -0
  72. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/adapter/test_line.py +0 -0
  73. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/adapter/test_splunk.py +0 -0
  74. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/adapter/test_sqlite_duckdb.py +0 -0
  75. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/adapter/test_text.py +0 -0
  76. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/adapter/test_xlsx.py +0 -0
  77. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/conftest.py +0 -0
  78. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/fieldtypes/__init__.py +0 -0
  79. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/fieldtypes/test_boolean.py +0 -0
  80. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/fieldtypes/test_fieldtypes.py +0 -0
  81. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/fieldtypes/test_ip.py +0 -0
  82. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/packer/__init__.py +0 -0
  83. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/packer/test_json_packer.py +0 -0
  84. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/packer/test_packer.py +0 -0
  85. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/record/__init__.py +0 -0
  86. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/record/test_adapter.py +0 -0
  87. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/record/test_context.py +0 -0
  88. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/record/test_descriptor.py +0 -0
  89. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/record/test_multi_timestamp.py +0 -0
  90. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/record/test_record.py +0 -0
  91. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/selector/__init__.py +0 -0
  92. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/selector/test_compiled.py +0 -0
  93. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/selector/test_selectors.py +0 -0
  94. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/test_deprecations.py +0 -0
  95. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/test_regressions.py +0 -0
  96. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/test_utils.py +0 -0
  97. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tests/tools/__init__.py +0 -0
  98. {flow_record-3.22.dev6 → flow_record-3.22.dev8}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flow.record
3
- Version: 3.22.dev6
3
+ Version: 3.22.dev8
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -1,14 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import hashlib
4
+ import json
4
5
  import logging
5
6
  import queue
7
+ import sys
6
8
  import threading
7
9
  from contextlib import suppress
8
10
  from typing import TYPE_CHECKING
9
11
 
10
- import urllib3
11
-
12
12
  try:
13
13
  import elasticsearch
14
14
  import elasticsearch.helpers
@@ -85,7 +85,7 @@ class ElasticWriter(AbstractWriter):
85
85
  self.max_retries = int(max_retries)
86
86
 
87
87
  if not uri.lower().startswith(("http://", "https://")):
88
- uri = "http://" + uri
88
+ uri = "https://" + uri
89
89
 
90
90
  self.queue: queue.Queue[Record | StopIteration] = queue.Queue(maxsize=queue_size)
91
91
  self.event = threading.Event()
@@ -95,6 +95,7 @@ class ElasticWriter(AbstractWriter):
95
95
  self.es = elasticsearch.Elasticsearch(
96
96
  uri,
97
97
  verify_certs=verify_certs,
98
+ ssl_show_warn=verify_certs,
98
99
  http_compress=http_compress,
99
100
  api_key=api_key,
100
101
  request_timeout=request_timeout,
@@ -107,10 +108,6 @@ class ElasticWriter(AbstractWriter):
107
108
  self.thread = threading.Thread(target=self.streaming_bulk_thread)
108
109
  self.thread.start()
109
110
 
110
- if not verify_certs:
111
- # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
112
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
113
-
114
111
  self.metadata_fields = {}
115
112
  for arg_key, arg_val in kwargs.items():
116
113
  if arg_key.startswith("_meta_"):
@@ -118,7 +115,13 @@ class ElasticWriter(AbstractWriter):
118
115
 
119
116
  def excepthook(self, exc: threading.ExceptHookArgs, *args, **kwargs) -> None:
120
117
  self.exception = getattr(exc, "exc_value", exc)
121
- self.exception = enrich_elastic_exception(self.exception)
118
+
119
+ # version guard for add_note(), which was added in Python 3.11
120
+ # TODO: Remove version guard after dropping support for Python 3.10
121
+ if sys.version_info >= (3, 11):
122
+ for note in create_elasticsearch_error_notes(getattr(self.exception, "errors", []), max_notes=5):
123
+ self.exception.add_note(note)
124
+
122
125
  self.event.set()
123
126
 
124
127
  def record_to_document(self, record: Record, index: str) -> dict:
@@ -230,11 +233,12 @@ class ElasticReader(AbstractReader):
230
233
  max_retries = int(max_retries)
231
234
 
232
235
  if not uri.lower().startswith(("http://", "https://")):
233
- uri = "http://" + uri
236
+ uri = "https://" + uri
234
237
 
235
238
  self.es = elasticsearch.Elasticsearch(
236
239
  uri,
237
240
  verify_certs=verify_certs,
241
+ ssl_show_warn=verify_certs,
238
242
  http_compress=http_compress,
239
243
  api_key=api_key,
240
244
  request_timeout=request_timeout,
@@ -242,10 +246,6 @@ class ElasticReader(AbstractReader):
242
246
  max_retries=max_retries,
243
247
  )
244
248
 
245
- if not verify_certs:
246
- # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
247
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
248
-
249
249
  def __iter__(self) -> Iterator[Record]:
250
250
  ctx = get_app_context()
251
251
  selector = self.selector
@@ -266,30 +266,69 @@ class ElasticReader(AbstractReader):
266
266
  self.es.close()
267
267
 
268
268
 
269
- def enrich_elastic_exception(exception: Exception) -> Exception:
270
- """Extend the exception with error information from Elastic.
269
+ def create_elasticsearch_error_notes(errors: list[dict] | dict, max_notes: int = 0) -> list[str]:
270
+ """Convert Elasticsearch Exception errors into pretty formatted notes.
271
271
 
272
272
  Resources:
273
273
  - https://elasticsearch-py.readthedocs.io/en/v8.17.1/exceptions.html
274
+
275
+ Arguments:
276
+ errors: A list of error items from an Elasticsearch exception, or a single error
277
+ max_notes: Maximum number of notes to create. If 0, all errors will be converted into notes.
278
+
279
+ Returns:
280
+ A list of formatted error notes.
274
281
  """
275
- errors = set()
276
- if hasattr(exception, "errors"):
282
+ if isinstance(errors, dict):
283
+ errors = [errors]
284
+
285
+ notes = []
286
+ for idx, error in enumerate(errors, 1):
287
+ # Extract index information
288
+ index = error.get("index", {})
289
+ index_name = index.get("_index", "unknown _index")
290
+ doc_id = index.get("_id", "unknown _id")
291
+ status = index.get("status")
292
+
293
+ # Extract error details
294
+ error = index.get("error", {})
295
+ error_type = error.get("type", "unknown error type")
296
+ error_reason = error.get("reason", "unknown reason")
297
+
298
+ # Create formatted note
299
+ note_parts = [
300
+ f"Error {idx}, {error_type!r} ({status=}):",
301
+ f" index: {index_name}",
302
+ f" document_id: {doc_id}",
303
+ f" reason: {error_reason}",
304
+ ]
305
+
306
+ # Include caused_by information if available
307
+ if caused_by := error.get("caused_by"):
308
+ cause_type = caused_by.get("type")
309
+ cause_reason = caused_by.get("reason")
310
+ note_parts.append(f" caused_by: {cause_type}, reason: {cause_reason}")
311
+
312
+ # Extract the record_descriptor name from the "data" field if possible
277
313
  try:
278
- for error in exception.errors:
279
- index_dict = error.get("index", {})
280
- status = index_dict.get("status")
281
- error_dict = index_dict.get("error", {})
282
- error_type = error_dict.get("type")
283
- error_reason = error_dict.get("reason", "")
284
-
285
- errors.add(f"({status} {error_type} {error_reason})")
314
+ data = json.loads(index.get("data", "{}"))
315
+ record_metadata = data.pop("_record_metadata", {})
316
+ descriptor = record_metadata.get("descriptor", {})
317
+ if descriptor_name := descriptor.get("name"):
318
+ note_parts.append(f" descriptor_name: {descriptor_name}")
319
+ if data:
320
+ note_parts.append(f" data: {json.dumps(data)}")
286
321
  except Exception:
287
- errors.add("unable to extend errors")
322
+ # failed to get descriptor_name and data, ignore
323
+ pass
324
+
325
+ notes.append("\n".join(note_parts) + "\n")
288
326
 
289
- # append errors to original exception message
290
- error_str = ", ".join(errors)
291
- original_message = exception.args[0] if exception.args else ""
292
- new_message = f"{original_message} {error_str}"
293
- exception.args = (new_message, *exception.args[1:])
327
+ # if max_notes is reached, stop processing and add a final note about remaining errors
328
+ if max_notes > 0 and idx >= max_notes:
329
+ remaining = len(errors) - idx
330
+ if remaining > 0:
331
+ notes.append(f"... and {remaining} more error(s) not shown.")
332
+ break
294
333
 
295
- return exception
334
+ return notes
@@ -321,8 +321,14 @@ def main(argv: list[str] | None = None) -> int:
321
321
  root_logger.handlers.clear()
322
322
  root_logger.addHandler(handler)
323
323
 
324
- fields_to_exclude = args.exclude.split(",") if args.exclude else []
325
- fields = args.fields.split(",") if args.fields else []
324
+ fields_to_exclude = list(filter(None, map(str.strip, args.exclude.split(",")))) if args.exclude else []
325
+ fields = list(filter(None, map(str.strip, args.fields.split(",")))) if args.fields else []
326
+
327
+ writer_options = {}
328
+ if fields:
329
+ writer_options["fields"] = fields
330
+ if fields_to_exclude:
331
+ writer_options["exclude"] = fields_to_exclude
326
332
 
327
333
  if args.list_adapters:
328
334
  list_adapters()
@@ -340,8 +346,6 @@ def main(argv: list[str] | None = None) -> int:
340
346
  }
341
347
  uri = mode_to_uri.get(args.mode, uri)
342
348
  qparams = {
343
- "fields": args.fields,
344
- "exclude": args.exclude,
345
349
  "format_spec": args.format,
346
350
  }
347
351
  query = urlencode({k: v for k, v in qparams.items() if v})
@@ -393,7 +397,7 @@ def main(argv: list[str] | None = None) -> int:
393
397
  ret = 0
394
398
 
395
399
  try:
396
- with RecordWriter(uri) as record_writer:
400
+ with RecordWriter(uri, **writer_options) as record_writer:
397
401
  for count, rec in enumerate(record_iterator, start=1): # noqa: B007
398
402
  if args.record_source is not None:
399
403
  rec._source = args.record_source
@@ -433,10 +437,17 @@ def main(argv: list[str] | None = None) -> int:
433
437
  return ret
434
438
 
435
439
 
436
- def print_error(e: Exception) -> None:
437
- log.error("rdump encountered a fatal error: %s", e)
440
+ def print_error(exc: Exception) -> None:
441
+ log.error("rdump encountered a fatal error: %s", exc)
442
+
438
443
  if log.isEnabledFor(LOGGING_TRACE_LEVEL):
439
- log.exception("Full traceback")
444
+ raise
445
+
446
+ # Print any additional notes attached to the exception (e.g. from adapters) at warning level
447
+ for note in getattr(exc, "__notes__", []):
448
+ log.error(note)
449
+
450
+ log.warning("To show full traceback, run with -vvv")
440
451
 
441
452
 
442
453
  if __name__ == "__main__":
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '3.22.dev6'
32
- __version_tuple__ = version_tuple = (3, 22, 'dev6')
31
+ __version__ = version = '3.22.dev8'
32
+ __version_tuple__ = version_tuple = (3, 22, 'dev8')
33
33
 
34
- __commit_id__ = commit_id = 'g668138538'
34
+ __commit_id__ = commit_id = 'g1ab6b5481'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flow.record
3
- Version: 3.22.dev6
3
+ Version: 3.22.dev8
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -0,0 +1,208 @@
1
+ # ruff: noqa: E501
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import sys
6
+ from typing import TYPE_CHECKING
7
+
8
+ import pytest
9
+ from elasticsearch.helpers import BulkIndexError
10
+
11
+ from flow.record import RecordDescriptor
12
+ from flow.record.adapter.elastic import ElasticWriter, create_elasticsearch_error_notes
13
+
14
+ if TYPE_CHECKING:
15
+ from flow.record.base import Record
16
+
17
+ MyRecord = RecordDescriptor(
18
+ "my/record",
19
+ [
20
+ ("string", "field_one"),
21
+ ("string", "field_two"),
22
+ ],
23
+ )
24
+
25
+
26
+ @pytest.mark.parametrize(
27
+ "record",
28
+ [
29
+ MyRecord("first", "record"),
30
+ MyRecord("second", "record"),
31
+ ],
32
+ )
33
+ def test_elastic_writer_metadata(record: Record) -> None:
34
+ options = {
35
+ "_meta_foo": "some value",
36
+ "_meta_bar": "another value",
37
+ }
38
+
39
+ with ElasticWriter(uri="elasticsearch:9200", **options) as writer:
40
+ assert writer.metadata_fields == {"foo": "some value", "bar": "another value"}
41
+
42
+ assert writer.record_to_document(record, "some-index") == {
43
+ "_index": "some-index",
44
+ "_source": json.dumps(
45
+ {
46
+ "field_one": record.field_one,
47
+ "field_two": record.field_two,
48
+ "_record_metadata": {
49
+ "descriptor": {
50
+ "name": "my/record",
51
+ "hash": record._desc.descriptor_hash,
52
+ },
53
+ "source": None,
54
+ "classification": None,
55
+ "generated": record._generated.isoformat(),
56
+ "version": 1,
57
+ "foo": "some value",
58
+ "bar": "another value",
59
+ },
60
+ }
61
+ ),
62
+ }
63
+
64
+
65
+ def test_elastic_writer_metadata_exception() -> None:
66
+ with ElasticWriter(uri="elasticsearch:9200") as writer:
67
+ writer.excepthook(
68
+ BulkIndexError(
69
+ "1 document(s) failed to index.",
70
+ errors=[
71
+ {
72
+ "index": {
73
+ "_index": "example-index",
74
+ "_id": "bWFkZSB5b3UgbG9vayDwn5GA",
75
+ "status": 400,
76
+ "error": {
77
+ "type": "document_parsing_exception",
78
+ "reason": "[1:225] failed to parse field [example] of type [long] in document with id "
79
+ "'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'",
80
+ "caused_by": {
81
+ "type": "illegal_argument_exception",
82
+ "reason": 'For input string: "Foo"',
83
+ },
84
+ },
85
+ "data": '{"example":"Foo","_record_metadata":{"descriptor":{"name":"example/record",'
86
+ '"hash":1234567890},"source":"/path/to/source","classification":null,'
87
+ '"generated":"2025-12-31T12:34:56.789012+00:00","version":1}}',
88
+ }
89
+ }
90
+ ],
91
+ )
92
+ )
93
+
94
+ with pytest.raises(BulkIndexError) as exc_info:
95
+ writer.__exit__()
96
+
97
+ writer.exception = None
98
+ exception = exc_info.value
99
+ assert isinstance(exception, BulkIndexError)
100
+
101
+ # version guard for __notes__ attribute, which was added in Python 3.11
102
+ # TODO: Remove after we drop support for Python 3.10
103
+ if sys.version_info >= (3, 11):
104
+ assert exception.__notes__ == [
105
+ """\
106
+ Error 1, 'document_parsing_exception' (status=400):
107
+ index: example-index
108
+ document_id: bWFkZSB5b3UgbG9vayDwn5GA
109
+ reason: [1:225] failed to parse field [example] of type [long] in document with id 'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'
110
+ caused_by: illegal_argument_exception, reason: For input string: "Foo"
111
+ descriptor_name: example/record
112
+ data: {"example": "Foo"}
113
+ """
114
+ ]
115
+
116
+
117
+ def test_create_elastic_notes() -> None:
118
+ exception = BulkIndexError(
119
+ "1 document(s) failed to index.",
120
+ errors=[
121
+ {
122
+ "index": {
123
+ "_index": "example-index",
124
+ "_id": "bWFkZSB5b3UgbG9vayDwn5GA",
125
+ "status": 400,
126
+ "error": {
127
+ "type": "document_parsing_exception",
128
+ "reason": "[1:225] failed to parse field [example] of type [long] in document with id "
129
+ "'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'",
130
+ "caused_by": {
131
+ "type": "illegal_argument_exception",
132
+ "reason": 'For input string: "Foo"',
133
+ },
134
+ },
135
+ "data": '{"example":"Foo","_record_metadata":{"descriptor":{"name":"example/record",'
136
+ '"hash":1234567890},"source":"/path/to/source","classification":null,'
137
+ '"generated":"2025-12-31T12:34:56.789012+00:00","version":1}}',
138
+ },
139
+ },
140
+ {
141
+ "index": {
142
+ "_index": "my-index",
143
+ "_id": "4XuIRpwBbjwxMKSCr8TE",
144
+ "status": 400,
145
+ "error": {
146
+ "type": "document_parsing_exception",
147
+ "reason": "[1:150] failed to parse field [content] of type [date] in document with id '4XuIRpwBbjwxMKSCr8TE'. Preview of field's value: 'This is the content of a sampe pastebin record'",
148
+ "caused_by": {
149
+ "type": "illegal_argument_exception",
150
+ "reason": "failed to parse date field [This is the content of a sampe pastebin record] with format [strict_date_optional_time||epoch_millis]",
151
+ "caused_by": {
152
+ "type": "date_time_parse_exception",
153
+ "reason": "Failed to parse with all enclosed parsers",
154
+ },
155
+ },
156
+ },
157
+ "data": '{"key": "Q42eWSaF", "date": "2019-03-19T09:09:47+00:00", "expire_date": "1970-01-01T00:00:00+00:00", "title": "A sample pastebin record", "content": "This is the content of a sampe pastebin record", "user": "", "syntax": "text", "_record_metadata": {"descriptor": {"name": "text/paste", "hash": 831446724}, "source": "external/pastebin", "classification": "PUBLIC", "generated": "2019-03-19T09:11:04.706581+00:00", "version": 1}}',
158
+ }
159
+ },
160
+ ],
161
+ )
162
+ errors = exception.errors
163
+ assert len(errors) == 2
164
+
165
+ # Test with max_notes=1, which should only include the first error and a summary note about the remaining errors
166
+ notes = create_elasticsearch_error_notes(errors, max_notes=1)
167
+ assert len(notes) == 2
168
+ assert (
169
+ notes[0]
170
+ == """\
171
+ Error 1, 'document_parsing_exception' (status=400):
172
+ index: example-index
173
+ document_id: bWFkZSB5b3UgbG9vayDwn5GA
174
+ reason: [1:225] failed to parse field [example] of type [long] in document with id 'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'
175
+ caused_by: illegal_argument_exception, reason: For input string: "Foo"
176
+ descriptor_name: example/record
177
+ data: {"example": "Foo"}
178
+ """
179
+ )
180
+ assert notes[-1] == "... and 1 more error(s) not shown."
181
+
182
+ # Test with max_notes=2, which should show both errors without the summary note
183
+ notes = create_elasticsearch_error_notes(errors, max_notes=2)
184
+ assert len(notes) == 2
185
+ assert (
186
+ notes[0]
187
+ == """\
188
+ Error 1, 'document_parsing_exception' (status=400):
189
+ index: example-index
190
+ document_id: bWFkZSB5b3UgbG9vayDwn5GA
191
+ reason: [1:225] failed to parse field [example] of type [long] in document with id 'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'
192
+ caused_by: illegal_argument_exception, reason: For input string: "Foo"
193
+ descriptor_name: example/record
194
+ data: {"example": "Foo"}
195
+ """
196
+ )
197
+ assert (
198
+ notes[1]
199
+ == """\
200
+ Error 2, 'document_parsing_exception' (status=400):
201
+ index: my-index
202
+ document_id: 4XuIRpwBbjwxMKSCr8TE
203
+ reason: [1:150] failed to parse field [content] of type [date] in document with id '4XuIRpwBbjwxMKSCr8TE'. Preview of field's value: 'This is the content of a sampe pastebin record'
204
+ caused_by: illegal_argument_exception, reason: failed to parse date field [This is the content of a sampe pastebin record] with format [strict_date_optional_time||epoch_millis]
205
+ descriptor_name: text/paste
206
+ data: {"key": "Q42eWSaF", "date": "2019-03-19T09:09:47+00:00", "expire_date": "1970-01-01T00:00:00+00:00", "title": "A sample pastebin record", "content": "This is the content of a sampe pastebin record", "user": "", "syntax": "text"}
207
+ """
208
+ )
@@ -20,6 +20,7 @@ from flow.record import RecordDescriptor, RecordReader, RecordWriter
20
20
  from flow.record.adapter.line import field_types_for_record_descriptor
21
21
  from flow.record.fieldtypes import flow_record_tz
22
22
  from flow.record.tools import rdump
23
+ from flow.record.utils import LOGGING_TRACE_LEVEL
23
24
  from tests._utils import generate_plain_records
24
25
 
25
26
 
@@ -870,3 +871,73 @@ def test_rdump_invalid_stdin_pipe(stdin_bytes: bytes) -> None:
870
871
  assert pipe.returncode == 1, "rdump should exit with error code 1 on invalid input"
871
872
  assert b"rdump encountered a fatal error: Could not find adapter for file-like object" in stderr
872
873
  assert b"Processed 0 records (matched=0, unmatched=0)" in stdout
874
+
875
+
876
+ @pytest.mark.skipif(sys.version_info < (3, 11), reason="skip on python 3.10 or lower")
877
+ def test_rdump_print_error_notes(
878
+ tmp_path: Path,
879
+ capsys: pytest.CaptureFixture,
880
+ caplog: pytest.LogCaptureFixture,
881
+ ) -> None:
882
+ """Test that rdump prints error notes when an exception occurs."""
883
+
884
+ path = tmp_path / "test.records"
885
+ path.touch() # create an empty file
886
+
887
+ exc = ValueError("something went wrong")
888
+ exc.add_note("Check the input format")
889
+
890
+ with mock.patch("flow.record.tools.rdump.RecordWriter", side_effect=exc):
891
+ rdump.main([str(path)])
892
+ _out, err = capsys.readouterr()
893
+
894
+ assert "something went wrong" in err
895
+ assert "Check the input format" in err
896
+ assert "To show full traceback, run with -vvv" in err
897
+
898
+ # with full traceback
899
+ with (
900
+ caplog.at_level(LOGGING_TRACE_LEVEL),
901
+ mock.patch("flow.record.tools.rdump.RecordWriter", side_effect=exc),
902
+ pytest.raises(ValueError, match="something went wrong\nCheck the input format"),
903
+ ):
904
+ rdump.main([str(path), "-vvv"])
905
+
906
+ capsys.readouterr()
907
+
908
+
909
+ def test_rdump_fields_with_spaces(tmp_path: Path, capsysbinary: pytest.CaptureFixture) -> None:
910
+ """Test if rdump handles spaces in field names gracefully."""
911
+ TestRecord = RecordDescriptor(
912
+ "test/record",
913
+ [
914
+ ("varint", "count"),
915
+ ("string", "foo"),
916
+ ("string", "bar"),
917
+ ],
918
+ )
919
+
920
+ path = tmp_path / "test.records"
921
+ out_path = tmp_path / "out.records"
922
+ with RecordWriter(path) as writer:
923
+ writer.write(TestRecord(count=0, foo="bar", bar="baz"))
924
+
925
+ # test if fields works with spaces in the name
926
+ rdump.main([str(path), "--fields", "foo, count ", "-w", str(out_path)])
927
+ with RecordReader(out_path) as reader:
928
+ records = list(reader)
929
+ assert len(records) == 1
930
+ assert list(records[0]._desc.fields.keys()) == ["foo", "count"]
931
+
932
+ # test if exclude works with spaces in the field names
933
+ rdump.main([str(path), "--exclude", " foo, bar ", "-w", str(out_path)])
934
+ with RecordReader(out_path) as reader:
935
+ records = list(reader)
936
+ assert len(records) == 1
937
+ assert list(records[0]._desc.fields.keys()) == ["count"]
938
+
939
+ # also test an adapter
940
+ rdump.main([str(path), "--exclude", " foo, bar ", "--csv"])
941
+ captured = capsysbinary.readouterr()
942
+ assert captured.err == b""
943
+ assert b"count,_source,_classification,_generated,_version\r\n" in captured.out
@@ -1,59 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- from typing import TYPE_CHECKING
5
-
6
- import pytest
7
-
8
- from flow.record import RecordDescriptor
9
- from flow.record.adapter.elastic import ElasticWriter
10
-
11
- if TYPE_CHECKING:
12
- from flow.record.base import Record
13
-
14
- MyRecord = RecordDescriptor(
15
- "my/record",
16
- [
17
- ("string", "field_one"),
18
- ("string", "field_two"),
19
- ],
20
- )
21
-
22
-
23
- @pytest.mark.parametrize(
24
- "record",
25
- [
26
- MyRecord("first", "record"),
27
- MyRecord("second", "record"),
28
- ],
29
- )
30
- def test_elastic_writer_metadata(record: Record) -> None:
31
- options = {
32
- "_meta_foo": "some value",
33
- "_meta_bar": "another value",
34
- }
35
-
36
- with ElasticWriter(uri="elasticsearch:9200", **options) as writer:
37
- assert writer.metadata_fields == {"foo": "some value", "bar": "another value"}
38
-
39
- assert writer.record_to_document(record, "some-index") == {
40
- "_index": "some-index",
41
- "_source": json.dumps(
42
- {
43
- "field_one": record.field_one,
44
- "field_two": record.field_two,
45
- "_record_metadata": {
46
- "descriptor": {
47
- "name": "my/record",
48
- "hash": record._desc.descriptor_hash,
49
- },
50
- "source": None,
51
- "classification": None,
52
- "generated": record._generated.isoformat(),
53
- "version": 1,
54
- "foo": "some value",
55
- "bar": "another value",
56
- },
57
- }
58
- ),
59
- }
File without changes
File without changes