flow.record 3.22.dev5__tar.gz → 3.22.dev7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/PKG-INFO +1 -1
  2. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/elastic.py +71 -32
  3. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/sqlite.py +5 -0
  4. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/tools/rdump.py +10 -3
  5. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/version.py +3 -3
  6. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow.record.egg-info/PKG-INFO +1 -1
  7. flow_record-3.22.dev7/tests/adapter/test_elastic.py +208 -0
  8. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/adapter/test_sqlite_duckdb.py +13 -8
  9. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/tools/test_rdump.py +34 -0
  10. flow_record-3.22.dev5/tests/adapter/test_elastic.py +0 -59
  11. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/.git-blame-ignore-revs +0 -0
  12. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/.gitattributes +0 -0
  13. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/COPYRIGHT +0 -0
  14. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/LICENSE +0 -0
  15. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/MANIFEST.in +0 -0
  16. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/README.md +0 -0
  17. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/examples/__init__.py +0 -0
  18. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/examples/filesystem.py +0 -0
  19. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/examples/passivedns.py +0 -0
  20. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/examples/records.json +0 -0
  21. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/examples/selectors.py +0 -0
  22. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/examples/tcpconn.py +0 -0
  23. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/__init__.py +0 -0
  24. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/__init__.py +0 -0
  25. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/archive.py +0 -0
  26. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/avro.py +0 -0
  27. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/broker.py +0 -0
  28. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/csvfile.py +0 -0
  29. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/duckdb.py +0 -0
  30. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/jsonfile.py +0 -0
  31. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/line.py +0 -0
  32. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/mongo.py +0 -0
  33. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/split.py +0 -0
  34. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/splunk.py +0 -0
  35. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/stream.py +0 -0
  36. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/text.py +0 -0
  37. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/adapter/xlsx.py +0 -0
  38. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/base.py +0 -0
  39. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/context.py +0 -0
  40. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/exceptions.py +0 -0
  41. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/fieldtypes/__init__.py +0 -0
  42. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/fieldtypes/credential.py +0 -0
  43. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/fieldtypes/net/__init__.py +0 -0
  44. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/fieldtypes/net/ip.py +0 -0
  45. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/fieldtypes/net/ipv4.py +0 -0
  46. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/fieldtypes/net/tcp.py +0 -0
  47. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/fieldtypes/net/udp.py +0 -0
  48. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/jsonpacker.py +0 -0
  49. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/packer.py +0 -0
  50. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/selector.py +0 -0
  51. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/stream.py +0 -0
  52. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/tools/__init__.py +0 -0
  53. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/tools/geoip.py +0 -0
  54. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/utils.py +0 -0
  55. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow/record/whitelist.py +0 -0
  56. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow.record.egg-info/SOURCES.txt +0 -0
  57. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow.record.egg-info/dependency_links.txt +0 -0
  58. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow.record.egg-info/entry_points.txt +0 -0
  59. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow.record.egg-info/requires.txt +0 -0
  60. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/flow.record.egg-info/top_level.txt +0 -0
  61. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/pyproject.toml +0 -0
  62. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/setup.cfg +0 -0
  63. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/__init__.py +0 -0
  64. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/_data/.gitkeep +0 -0
  65. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/_docs/Makefile +0 -0
  66. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/_docs/conf.py +0 -0
  67. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/_docs/index.rst +0 -0
  68. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/_utils.py +0 -0
  69. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/adapter/__init__.py +0 -0
  70. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/adapter/test_avro.py +0 -0
  71. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/adapter/test_csv.py +0 -0
  72. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/adapter/test_json.py +0 -0
  73. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/adapter/test_line.py +0 -0
  74. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/adapter/test_splunk.py +0 -0
  75. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/adapter/test_text.py +0 -0
  76. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/adapter/test_xlsx.py +0 -0
  77. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/conftest.py +0 -0
  78. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/fieldtypes/__init__.py +0 -0
  79. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/fieldtypes/test_boolean.py +0 -0
  80. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/fieldtypes/test_fieldtypes.py +0 -0
  81. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/fieldtypes/test_ip.py +0 -0
  82. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/packer/__init__.py +0 -0
  83. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/packer/test_json_packer.py +0 -0
  84. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/packer/test_packer.py +0 -0
  85. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/record/__init__.py +0 -0
  86. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/record/test_adapter.py +0 -0
  87. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/record/test_context.py +0 -0
  88. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/record/test_descriptor.py +0 -0
  89. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/record/test_multi_timestamp.py +0 -0
  90. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/record/test_record.py +0 -0
  91. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/selector/__init__.py +0 -0
  92. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/selector/test_compiled.py +0 -0
  93. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/selector/test_selectors.py +0 -0
  94. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/test_deprecations.py +0 -0
  95. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/test_regressions.py +0 -0
  96. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/test_utils.py +0 -0
  97. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tests/tools/__init__.py +0 -0
  98. {flow_record-3.22.dev5 → flow_record-3.22.dev7}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flow.record
3
- Version: 3.22.dev5
3
+ Version: 3.22.dev7
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -1,14 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import hashlib
4
+ import json
4
5
  import logging
5
6
  import queue
7
+ import sys
6
8
  import threading
7
9
  from contextlib import suppress
8
10
  from typing import TYPE_CHECKING
9
11
 
10
- import urllib3
11
-
12
12
  try:
13
13
  import elasticsearch
14
14
  import elasticsearch.helpers
@@ -85,7 +85,7 @@ class ElasticWriter(AbstractWriter):
85
85
  self.max_retries = int(max_retries)
86
86
 
87
87
  if not uri.lower().startswith(("http://", "https://")):
88
- uri = "http://" + uri
88
+ uri = "https://" + uri
89
89
 
90
90
  self.queue: queue.Queue[Record | StopIteration] = queue.Queue(maxsize=queue_size)
91
91
  self.event = threading.Event()
@@ -95,6 +95,7 @@ class ElasticWriter(AbstractWriter):
95
95
  self.es = elasticsearch.Elasticsearch(
96
96
  uri,
97
97
  verify_certs=verify_certs,
98
+ ssl_show_warn=verify_certs,
98
99
  http_compress=http_compress,
99
100
  api_key=api_key,
100
101
  request_timeout=request_timeout,
@@ -107,10 +108,6 @@ class ElasticWriter(AbstractWriter):
107
108
  self.thread = threading.Thread(target=self.streaming_bulk_thread)
108
109
  self.thread.start()
109
110
 
110
- if not verify_certs:
111
- # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
112
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
113
-
114
111
  self.metadata_fields = {}
115
112
  for arg_key, arg_val in kwargs.items():
116
113
  if arg_key.startswith("_meta_"):
@@ -118,7 +115,13 @@ class ElasticWriter(AbstractWriter):
118
115
 
119
116
  def excepthook(self, exc: threading.ExceptHookArgs, *args, **kwargs) -> None:
120
117
  self.exception = getattr(exc, "exc_value", exc)
121
- self.exception = enrich_elastic_exception(self.exception)
118
+
119
+ # version guard for add_note(), which was added in Python 3.11
120
+ # TODO: Remove version guard after dropping support for Python 3.10
121
+ if sys.version_info >= (3, 11):
122
+ for note in create_elasticsearch_error_notes(getattr(self.exception, "errors", []), max_notes=5):
123
+ self.exception.add_note(note)
124
+
122
125
  self.event.set()
123
126
 
124
127
  def record_to_document(self, record: Record, index: str) -> dict:
@@ -230,11 +233,12 @@ class ElasticReader(AbstractReader):
230
233
  max_retries = int(max_retries)
231
234
 
232
235
  if not uri.lower().startswith(("http://", "https://")):
233
- uri = "http://" + uri
236
+ uri = "https://" + uri
234
237
 
235
238
  self.es = elasticsearch.Elasticsearch(
236
239
  uri,
237
240
  verify_certs=verify_certs,
241
+ ssl_show_warn=verify_certs,
238
242
  http_compress=http_compress,
239
243
  api_key=api_key,
240
244
  request_timeout=request_timeout,
@@ -242,10 +246,6 @@ class ElasticReader(AbstractReader):
242
246
  max_retries=max_retries,
243
247
  )
244
248
 
245
- if not verify_certs:
246
- # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
247
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
248
-
249
249
  def __iter__(self) -> Iterator[Record]:
250
250
  ctx = get_app_context()
251
251
  selector = self.selector
@@ -266,30 +266,69 @@ class ElasticReader(AbstractReader):
266
266
  self.es.close()
267
267
 
268
268
 
269
- def enrich_elastic_exception(exception: Exception) -> Exception:
270
- """Extend the exception with error information from Elastic.
269
+ def create_elasticsearch_error_notes(errors: list[dict] | dict, max_notes: int = 0) -> list[str]:
270
+ """Convert Elasticsearch Exception errors into pretty formatted notes.
271
271
 
272
272
  Resources:
273
273
  - https://elasticsearch-py.readthedocs.io/en/v8.17.1/exceptions.html
274
+
275
+ Arguments:
276
+ errors: A list of error items from an Elasticsearch exception, or a single error
277
+ max_notes: Maximum number of notes to create. If 0, all errors will be converted into notes.
278
+
279
+ Returns:
280
+ A list of formatted error notes.
274
281
  """
275
- errors = set()
276
- if hasattr(exception, "errors"):
282
+ if isinstance(errors, dict):
283
+ errors = [errors]
284
+
285
+ notes = []
286
+ for idx, error in enumerate(errors, 1):
287
+ # Extract index information
288
+ index = error.get("index", {})
289
+ index_name = index.get("_index", "unknown _index")
290
+ doc_id = index.get("_id", "unknown _id")
291
+ status = index.get("status")
292
+
293
+ # Extract error details
294
+ error = index.get("error", {})
295
+ error_type = error.get("type", "unknown error type")
296
+ error_reason = error.get("reason", "unknown reason")
297
+
298
+ # Create formatted note
299
+ note_parts = [
300
+ f"Error {idx}, {error_type!r} ({status=}):",
301
+ f" index: {index_name}",
302
+ f" document_id: {doc_id}",
303
+ f" reason: {error_reason}",
304
+ ]
305
+
306
+ # Include caused_by information if available
307
+ if caused_by := error.get("caused_by"):
308
+ cause_type = caused_by.get("type")
309
+ cause_reason = caused_by.get("reason")
310
+ note_parts.append(f" caused_by: {cause_type}, reason: {cause_reason}")
311
+
312
+ # Extract the record_descriptor name from the "data" field if possible
277
313
  try:
278
- for error in exception.errors:
279
- index_dict = error.get("index", {})
280
- status = index_dict.get("status")
281
- error_dict = index_dict.get("error", {})
282
- error_type = error_dict.get("type")
283
- error_reason = error_dict.get("reason", "")
284
-
285
- errors.add(f"({status} {error_type} {error_reason})")
314
+ data = json.loads(index.get("data", "{}"))
315
+ record_metadata = data.pop("_record_metadata", {})
316
+ descriptor = record_metadata.get("descriptor", {})
317
+ if descriptor_name := descriptor.get("name"):
318
+ note_parts.append(f" descriptor_name: {descriptor_name}")
319
+ if data:
320
+ note_parts.append(f" data: {json.dumps(data)}")
286
321
  except Exception:
287
- errors.add("unable to extend errors")
322
+ # failed to get descriptor_name and data, ignore
323
+ pass
324
+
325
+ notes.append("\n".join(note_parts) + "\n")
288
326
 
289
- # append errors to original exception message
290
- error_str = ", ".join(errors)
291
- original_message = exception.args[0] if exception.args else ""
292
- new_message = f"{original_message} {error_str}"
293
- exception.args = (new_message, *exception.args[1:])
327
+ # if max_notes is reached, stop processing and add a final note about remaining errors
328
+ if max_notes > 0 and idx >= max_notes:
329
+ remaining = len(errors) - idx
330
+ if remaining > 0:
331
+ notes.append(f"... and {remaining} more error(s) not shown.")
332
+ break
294
333
 
295
- return exception
334
+ return notes
@@ -204,6 +204,11 @@ class SqliteReader(AbstractReader):
204
204
  if match_record_with_context(record, selector, ctx):
205
205
  yield record
206
206
 
207
+ def close(self) -> None:
208
+ if self.con:
209
+ self.con.close()
210
+ self.con = None
211
+
207
212
 
208
213
  class SqliteWriter(AbstractWriter):
209
214
  """SQLite writer."""
@@ -433,10 +433,17 @@ def main(argv: list[str] | None = None) -> int:
433
433
  return ret
434
434
 
435
435
 
436
- def print_error(e: Exception) -> None:
437
- log.error("rdump encountered a fatal error: %s", e)
436
+ def print_error(exc: Exception) -> None:
437
+ log.error("rdump encountered a fatal error: %s", exc)
438
+
438
439
  if log.isEnabledFor(LOGGING_TRACE_LEVEL):
439
- log.exception("Full traceback")
440
+ raise
441
+
442
+ # Print any additional notes attached to the exception (e.g. from adapters) at warning level
443
+ for note in getattr(exc, "__notes__", []):
444
+ log.error(note)
445
+
446
+ log.warning("To show full traceback, run with -vvv")
440
447
 
441
448
 
442
449
  if __name__ == "__main__":
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '3.22.dev5'
32
- __version_tuple__ = version_tuple = (3, 22, 'dev5')
31
+ __version__ = version = '3.22.dev7'
32
+ __version_tuple__ = version_tuple = (3, 22, 'dev7')
33
33
 
34
- __commit_id__ = commit_id = 'gc3f8cd8c6'
34
+ __commit_id__ = commit_id = 'ge9118cf22'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flow.record
3
- Version: 3.22.dev5
3
+ Version: 3.22.dev7
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -0,0 +1,208 @@
1
+ # ruff: noqa: E501
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import sys
6
+ from typing import TYPE_CHECKING
7
+
8
+ import pytest
9
+ from elasticsearch.helpers import BulkIndexError
10
+
11
+ from flow.record import RecordDescriptor
12
+ from flow.record.adapter.elastic import ElasticWriter, create_elasticsearch_error_notes
13
+
14
+ if TYPE_CHECKING:
15
+ from flow.record.base import Record
16
+
17
+ MyRecord = RecordDescriptor(
18
+ "my/record",
19
+ [
20
+ ("string", "field_one"),
21
+ ("string", "field_two"),
22
+ ],
23
+ )
24
+
25
+
26
+ @pytest.mark.parametrize(
27
+ "record",
28
+ [
29
+ MyRecord("first", "record"),
30
+ MyRecord("second", "record"),
31
+ ],
32
+ )
33
+ def test_elastic_writer_metadata(record: Record) -> None:
34
+ options = {
35
+ "_meta_foo": "some value",
36
+ "_meta_bar": "another value",
37
+ }
38
+
39
+ with ElasticWriter(uri="elasticsearch:9200", **options) as writer:
40
+ assert writer.metadata_fields == {"foo": "some value", "bar": "another value"}
41
+
42
+ assert writer.record_to_document(record, "some-index") == {
43
+ "_index": "some-index",
44
+ "_source": json.dumps(
45
+ {
46
+ "field_one": record.field_one,
47
+ "field_two": record.field_two,
48
+ "_record_metadata": {
49
+ "descriptor": {
50
+ "name": "my/record",
51
+ "hash": record._desc.descriptor_hash,
52
+ },
53
+ "source": None,
54
+ "classification": None,
55
+ "generated": record._generated.isoformat(),
56
+ "version": 1,
57
+ "foo": "some value",
58
+ "bar": "another value",
59
+ },
60
+ }
61
+ ),
62
+ }
63
+
64
+
65
+ def test_elastic_writer_metadata_exception() -> None:
66
+ with ElasticWriter(uri="elasticsearch:9200") as writer:
67
+ writer.excepthook(
68
+ BulkIndexError(
69
+ "1 document(s) failed to index.",
70
+ errors=[
71
+ {
72
+ "index": {
73
+ "_index": "example-index",
74
+ "_id": "bWFkZSB5b3UgbG9vayDwn5GA",
75
+ "status": 400,
76
+ "error": {
77
+ "type": "document_parsing_exception",
78
+ "reason": "[1:225] failed to parse field [example] of type [long] in document with id "
79
+ "'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'",
80
+ "caused_by": {
81
+ "type": "illegal_argument_exception",
82
+ "reason": 'For input string: "Foo"',
83
+ },
84
+ },
85
+ "data": '{"example":"Foo","_record_metadata":{"descriptor":{"name":"example/record",'
86
+ '"hash":1234567890},"source":"/path/to/source","classification":null,'
87
+ '"generated":"2025-12-31T12:34:56.789012+00:00","version":1}}',
88
+ }
89
+ }
90
+ ],
91
+ )
92
+ )
93
+
94
+ with pytest.raises(BulkIndexError) as exc_info:
95
+ writer.__exit__()
96
+
97
+ writer.exception = None
98
+ exception = exc_info.value
99
+ assert isinstance(exception, BulkIndexError)
100
+
101
+ # version guard for __notes__ attribute, which was added in Python 3.11
102
+ # TODO: Remove after we drop support for Python 3.10
103
+ if sys.version_info >= (3, 11):
104
+ assert exception.__notes__ == [
105
+ """\
106
+ Error 1, 'document_parsing_exception' (status=400):
107
+ index: example-index
108
+ document_id: bWFkZSB5b3UgbG9vayDwn5GA
109
+ reason: [1:225] failed to parse field [example] of type [long] in document with id 'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'
110
+ caused_by: illegal_argument_exception, reason: For input string: "Foo"
111
+ descriptor_name: example/record
112
+ data: {"example": "Foo"}
113
+ """
114
+ ]
115
+
116
+
117
+ def test_create_elastic_notes() -> None:
118
+ exception = BulkIndexError(
119
+ "1 document(s) failed to index.",
120
+ errors=[
121
+ {
122
+ "index": {
123
+ "_index": "example-index",
124
+ "_id": "bWFkZSB5b3UgbG9vayDwn5GA",
125
+ "status": 400,
126
+ "error": {
127
+ "type": "document_parsing_exception",
128
+ "reason": "[1:225] failed to parse field [example] of type [long] in document with id "
129
+ "'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'",
130
+ "caused_by": {
131
+ "type": "illegal_argument_exception",
132
+ "reason": 'For input string: "Foo"',
133
+ },
134
+ },
135
+ "data": '{"example":"Foo","_record_metadata":{"descriptor":{"name":"example/record",'
136
+ '"hash":1234567890},"source":"/path/to/source","classification":null,'
137
+ '"generated":"2025-12-31T12:34:56.789012+00:00","version":1}}',
138
+ },
139
+ },
140
+ {
141
+ "index": {
142
+ "_index": "my-index",
143
+ "_id": "4XuIRpwBbjwxMKSCr8TE",
144
+ "status": 400,
145
+ "error": {
146
+ "type": "document_parsing_exception",
147
+ "reason": "[1:150] failed to parse field [content] of type [date] in document with id '4XuIRpwBbjwxMKSCr8TE'. Preview of field's value: 'This is the content of a sampe pastebin record'",
148
+ "caused_by": {
149
+ "type": "illegal_argument_exception",
150
+ "reason": "failed to parse date field [This is the content of a sampe pastebin record] with format [strict_date_optional_time||epoch_millis]",
151
+ "caused_by": {
152
+ "type": "date_time_parse_exception",
153
+ "reason": "Failed to parse with all enclosed parsers",
154
+ },
155
+ },
156
+ },
157
+ "data": '{"key": "Q42eWSaF", "date": "2019-03-19T09:09:47+00:00", "expire_date": "1970-01-01T00:00:00+00:00", "title": "A sample pastebin record", "content": "This is the content of a sampe pastebin record", "user": "", "syntax": "text", "_record_metadata": {"descriptor": {"name": "text/paste", "hash": 831446724}, "source": "external/pastebin", "classification": "PUBLIC", "generated": "2019-03-19T09:11:04.706581+00:00", "version": 1}}',
158
+ }
159
+ },
160
+ ],
161
+ )
162
+ errors = exception.errors
163
+ assert len(errors) == 2
164
+
165
+ # Test with max_notes=1, which should only include the first error and a summary note about the remaining errors
166
+ notes = create_elasticsearch_error_notes(errors, max_notes=1)
167
+ assert len(notes) == 2
168
+ assert (
169
+ notes[0]
170
+ == """\
171
+ Error 1, 'document_parsing_exception' (status=400):
172
+ index: example-index
173
+ document_id: bWFkZSB5b3UgbG9vayDwn5GA
174
+ reason: [1:225] failed to parse field [example] of type [long] in document with id 'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'
175
+ caused_by: illegal_argument_exception, reason: For input string: "Foo"
176
+ descriptor_name: example/record
177
+ data: {"example": "Foo"}
178
+ """
179
+ )
180
+ assert notes[-1] == "... and 1 more error(s) not shown."
181
+
182
+ # Test with max_notes=2, which should show both errors without the summary note
183
+ notes = create_elasticsearch_error_notes(errors, max_notes=2)
184
+ assert len(notes) == 2
185
+ assert (
186
+ notes[0]
187
+ == """\
188
+ Error 1, 'document_parsing_exception' (status=400):
189
+ index: example-index
190
+ document_id: bWFkZSB5b3UgbG9vayDwn5GA
191
+ reason: [1:225] failed to parse field [example] of type [long] in document with id 'bWFkZSB5b3UgbG9vayDwn5GA'. Preview of field's value: 'Foo'
192
+ caused_by: illegal_argument_exception, reason: For input string: "Foo"
193
+ descriptor_name: example/record
194
+ data: {"example": "Foo"}
195
+ """
196
+ )
197
+ assert (
198
+ notes[1]
199
+ == """\
200
+ Error 2, 'document_parsing_exception' (status=400):
201
+ index: my-index
202
+ document_id: 4XuIRpwBbjwxMKSCr8TE
203
+ reason: [1:150] failed to parse field [content] of type [date] in document with id '4XuIRpwBbjwxMKSCr8TE'. Preview of field's value: 'This is the content of a sampe pastebin record'
204
+ caused_by: illegal_argument_exception, reason: failed to parse date field [This is the content of a sampe pastebin record] with format [strict_date_optional_time||epoch_millis]
205
+ descriptor_name: text/paste
206
+ data: {"key": "Q42eWSaF", "date": "2019-03-19T09:09:47+00:00", "expire_date": "1970-01-01T00:00:00+00:00", "title": "A sample pastebin record", "content": "This is the content of a sampe pastebin record", "user": "", "syntax": "text"}
207
+ """
208
+ )
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import sqlite3
4
+ from contextlib import closing
4
5
  from datetime import datetime, timezone
5
6
  from typing import TYPE_CHECKING, Any, NamedTuple
6
7
 
@@ -136,7 +137,7 @@ def test_write_to_sqlite(tmp_path: Path, count: int, db: Database) -> None:
136
137
  writer.write(record)
137
138
 
138
139
  record_count = 0
139
- with db.connector.connect(str(db_path)) as con:
140
+ with closing(db.connector.connect(str(db_path))) as con:
140
141
  cursor = con.execute("SELECT COUNT(*) FROM 'test/record'")
141
142
  record_count = cursor.fetchone()[0]
142
143
 
@@ -157,7 +158,7 @@ def test_read_from_sqlite(tmp_path: Path, db: Database) -> None:
157
158
  """Tests basic reading from a SQLite database."""
158
159
  # Generate a SQLite database
159
160
  db_path = tmp_path / "records.db"
160
- with db.connector.connect(str(db_path)) as con:
161
+ with closing(db.connector.connect(str(db_path))) as con:
161
162
  con.execute(
162
163
  """
163
164
  CREATE TABLE 'test/record' (
@@ -176,6 +177,7 @@ def test_read_from_sqlite(tmp_path: Path, db: Database) -> None:
176
177
  """,
177
178
  (f"record{i}", f"foobar{i}".encode(), dt_isoformat, 3.14 + i),
178
179
  )
180
+ con.commit()
179
181
 
180
182
  # Read the SQLite database using flow.record
181
183
  with RecordReader(f"{db.scheme}://{db_path}") as reader:
@@ -251,7 +253,7 @@ def test_write_zero_records(tmp_path: Path, db: Database) -> None:
251
253
  assert writer
252
254
 
253
255
  # test if it's a valid database
254
- with db.connector.connect(str(db_path)) as con:
256
+ with closing(db.connector.connect(str(db_path))) as con:
255
257
  assert con.execute("SELECT * FROM sqlite_master").fetchall() == []
256
258
 
257
259
 
@@ -272,9 +274,10 @@ def test_write_zero_records(tmp_path: Path, db: Database) -> None:
272
274
  def test_non_strict_sqlite_fields(tmp_path: Path, sqlite_coltype: str, sqlite_value: Any, expected_value: Any) -> None:
273
275
  """SQLite by default is non strict, meaning that the value could be of different type than the column type."""
274
276
  db = tmp_path / "records.db"
275
- with sqlite3.connect(db) as con:
277
+ with closing(sqlite3.connect(db)) as con:
276
278
  con.execute(f"CREATE TABLE 'strict-test' (field {sqlite_coltype})")
277
279
  con.execute("INSERT INTO 'strict-test' VALUES(?)", (sqlite_value,))
280
+ con.commit()
278
281
 
279
282
  with RecordReader(f"sqlite://{db}") as reader:
280
283
  record = next(iter(reader))
@@ -294,10 +297,11 @@ def test_invalid_table_names_quoting(tmp_path: Path, invalid_table_name: str) ->
294
297
 
295
298
  # Creating the tables with these invalid_table_names in SQLite is no problem
296
299
  db = tmp_path / "records.db"
297
- with sqlite3.connect(db) as con:
300
+ with closing(sqlite3.connect(db)) as con:
298
301
  con.execute(f"CREATE TABLE [{invalid_table_name}] (field TEXT, field2 TEXT)")
299
302
  con.execute(f"INSERT INTO [{invalid_table_name}] VALUES(?, ?)", ("hello", "world"))
300
303
  con.execute(f"INSERT INTO [{invalid_table_name}] VALUES(?, ?)", ("goodbye", "planet"))
304
+ con.commit()
301
305
 
302
306
  # However, these invalid_table_names should raise an exception when reading
303
307
  with (
@@ -320,10 +324,11 @@ def test_invalid_field_names_quoting(tmp_path: Path, invalid_field_name: str) ->
320
324
 
321
325
  # Creating the table with invalid field name in SQLite is no problem
322
326
  db = tmp_path / "records.db"
323
- with sqlite3.connect(db) as con:
327
+ with closing(sqlite3.connect(db)) as con:
324
328
  con.execute(f"CREATE TABLE [test] (field TEXT, [{invalid_field_name}] TEXT)")
325
329
  con.execute("INSERT INTO [test] VALUES(?, ?)", ("hello", "world"))
326
330
  con.execute("INSERT INTO [test] VALUES(?, ?)", ("goodbye", "planet"))
331
+ con.commit()
327
332
 
328
333
  # However, these field names are invalid in flow.record and should raise an exception
329
334
  with (
@@ -365,7 +370,7 @@ def test_batch_size(
365
370
  writer.write(next(records))
366
371
 
367
372
  # test count of records in table (no flush yet if batch_size > 1)
368
- with db.connector.connect(str(db_path)) as con:
373
+ with closing(db.connector.connect(str(db_path))) as con:
369
374
  x = con.execute('SELECT COUNT(*) FROM "test/record"')
370
375
  assert x.fetchone()[0] is expected_first
371
376
 
@@ -374,7 +379,7 @@ def test_batch_size(
374
379
  writer.write(next(records))
375
380
 
376
381
  # test count of records in table after flush
377
- with db.connector.connect(str(db_path)) as con:
382
+ with closing(db.connector.connect(str(db_path))) as con:
378
383
  x = con.execute('SELECT COUNT(*) FROM "test/record"')
379
384
  assert x.fetchone()[0] == expected_second
380
385
 
@@ -20,6 +20,7 @@ from flow.record import RecordDescriptor, RecordReader, RecordWriter
20
20
  from flow.record.adapter.line import field_types_for_record_descriptor
21
21
  from flow.record.fieldtypes import flow_record_tz
22
22
  from flow.record.tools import rdump
23
+ from flow.record.utils import LOGGING_TRACE_LEVEL
23
24
  from tests._utils import generate_plain_records
24
25
 
25
26
 
@@ -870,3 +871,36 @@ def test_rdump_invalid_stdin_pipe(stdin_bytes: bytes) -> None:
870
871
  assert pipe.returncode == 1, "rdump should exit with error code 1 on invalid input"
871
872
  assert b"rdump encountered a fatal error: Could not find adapter for file-like object" in stderr
872
873
  assert b"Processed 0 records (matched=0, unmatched=0)" in stdout
874
+
875
+
876
+ @pytest.mark.skipif(sys.version_info < (3, 11), reason="skip on python 3.10 or lower")
877
+ def test_rdump_print_error_notes(
878
+ tmp_path: Path,
879
+ capsys: pytest.CaptureFixture,
880
+ caplog: pytest.LogCaptureFixture,
881
+ ) -> None:
882
+ """Test that rdump prints error notes when an exception occurs."""
883
+
884
+ path = tmp_path / "test.records"
885
+ path.touch() # create an empty file
886
+
887
+ exc = ValueError("something went wrong")
888
+ exc.add_note("Check the input format")
889
+
890
+ with mock.patch("flow.record.tools.rdump.RecordWriter", side_effect=exc):
891
+ rdump.main([str(path)])
892
+ _out, err = capsys.readouterr()
893
+
894
+ assert "something went wrong" in err
895
+ assert "Check the input format" in err
896
+ assert "To show full traceback, run with -vvv" in err
897
+
898
+ # with full traceback
899
+ with (
900
+ caplog.at_level(LOGGING_TRACE_LEVEL),
901
+ mock.patch("flow.record.tools.rdump.RecordWriter", side_effect=exc),
902
+ pytest.raises(ValueError, match="something went wrong\nCheck the input format"),
903
+ ):
904
+ rdump.main([str(path), "-vvv"])
905
+
906
+ capsys.readouterr()
@@ -1,59 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- from typing import TYPE_CHECKING
5
-
6
- import pytest
7
-
8
- from flow.record import RecordDescriptor
9
- from flow.record.adapter.elastic import ElasticWriter
10
-
11
- if TYPE_CHECKING:
12
- from flow.record.base import Record
13
-
14
- MyRecord = RecordDescriptor(
15
- "my/record",
16
- [
17
- ("string", "field_one"),
18
- ("string", "field_two"),
19
- ],
20
- )
21
-
22
-
23
- @pytest.mark.parametrize(
24
- "record",
25
- [
26
- MyRecord("first", "record"),
27
- MyRecord("second", "record"),
28
- ],
29
- )
30
- def test_elastic_writer_metadata(record: Record) -> None:
31
- options = {
32
- "_meta_foo": "some value",
33
- "_meta_bar": "another value",
34
- }
35
-
36
- with ElasticWriter(uri="elasticsearch:9200", **options) as writer:
37
- assert writer.metadata_fields == {"foo": "some value", "bar": "another value"}
38
-
39
- assert writer.record_to_document(record, "some-index") == {
40
- "_index": "some-index",
41
- "_source": json.dumps(
42
- {
43
- "field_one": record.field_one,
44
- "field_two": record.field_two,
45
- "_record_metadata": {
46
- "descriptor": {
47
- "name": "my/record",
48
- "hash": record._desc.descriptor_hash,
49
- },
50
- "source": None,
51
- "classification": None,
52
- "generated": record._generated.isoformat(),
53
- "version": 1,
54
- "foo": "some value",
55
- "bar": "another value",
56
- },
57
- }
58
- ),
59
- }
File without changes
File without changes