flow.record 3.21.dev3__tar.gz → 3.21.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/PKG-INFO +6 -1
  2. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/examples/filesystem.py +28 -29
  3. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/examples/passivedns.py +12 -9
  4. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/examples/tcpconn.py +5 -3
  5. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/elastic.py +48 -13
  6. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/splunk.py +1 -1
  7. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/base.py +2 -1
  8. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/jsonpacker.py +1 -1
  9. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/stream.py +17 -6
  10. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/tools/rdump.py +61 -8
  11. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/utils.py +2 -0
  12. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/version.py +2 -2
  13. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow.record.egg-info/PKG-INFO +6 -1
  14. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow.record.egg-info/SOURCES.txt +1 -0
  15. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow.record.egg-info/requires.txt +6 -0
  16. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/pyproject.toml +7 -1
  17. flow_record-3.21.dev5/tests/__init__.py +0 -0
  18. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/standalone_test.py +1 -1
  19. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_fieldtypes.py +9 -3
  20. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_packer.py +7 -5
  21. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_rdump.py +1 -3
  22. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_xlsx_adapter.py +1 -2
  23. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tox.ini +1 -0
  24. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/.git-blame-ignore-revs +0 -0
  25. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/COPYRIGHT +0 -0
  26. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/LICENSE +0 -0
  27. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/MANIFEST.in +0 -0
  28. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/README.md +0 -0
  29. {flow_record-3.21.dev3/flow/record/tools → flow_record-3.21.dev5/examples}/__init__.py +0 -0
  30. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/examples/records.json +0 -0
  31. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/__init__.py +0 -0
  32. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/__init__.py +0 -0
  33. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/archive.py +0 -0
  34. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/avro.py +0 -0
  35. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/broker.py +0 -0
  36. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/csvfile.py +0 -0
  37. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/duckdb.py +0 -0
  38. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/jsonfile.py +0 -0
  39. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/line.py +0 -0
  40. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/mongo.py +0 -0
  41. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/split.py +0 -0
  42. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/sqlite.py +0 -0
  43. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/stream.py +0 -0
  44. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/text.py +0 -0
  45. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/adapter/xlsx.py +0 -0
  46. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/exceptions.py +0 -0
  47. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/fieldtypes/__init__.py +0 -0
  48. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/fieldtypes/credential.py +0 -0
  49. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/fieldtypes/net/__init__.py +0 -0
  50. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/fieldtypes/net/ip.py +0 -0
  51. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/fieldtypes/net/ipv4.py +0 -0
  52. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/fieldtypes/net/tcp.py +0 -0
  53. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/fieldtypes/net/udp.py +0 -0
  54. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/packer.py +0 -0
  55. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/selector.py +0 -0
  56. {flow_record-3.21.dev3/tests → flow_record-3.21.dev5/flow/record/tools}/__init__.py +0 -0
  57. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/tools/geoip.py +0 -0
  58. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow/record/whitelist.py +0 -0
  59. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow.record.egg-info/dependency_links.txt +0 -0
  60. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow.record.egg-info/entry_points.txt +0 -0
  61. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/flow.record.egg-info/top_level.txt +0 -0
  62. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/setup.cfg +0 -0
  63. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/_utils.py +0 -0
  64. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/docs/Makefile +0 -0
  65. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/docs/conf.py +0 -0
  66. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/docs/index.rst +0 -0
  67. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/selector_explain_example.py +0 -0
  68. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_adapter_line.py +0 -0
  69. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_adapter_text.py +0 -0
  70. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_avro.py +0 -0
  71. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_avro_adapter.py +0 -0
  72. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_compiled_selector.py +0 -0
  73. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_csv_adapter.py +0 -0
  74. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_deprecations.py +0 -0
  75. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_elastic_adapter.py +0 -0
  76. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_fieldtype_ip.py +0 -0
  77. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_json_packer.py +0 -0
  78. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_json_record_adapter.py +0 -0
  79. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_multi_timestamp.py +0 -0
  80. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_record.py +0 -0
  81. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_record_adapter.py +0 -0
  82. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_record_descriptor.py +0 -0
  83. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_regression.py +0 -0
  84. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_selector.py +0 -0
  85. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_splunk_adapter.py +0 -0
  86. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_sqlite_duckdb_adapter.py +0 -0
  87. {flow_record-3.21.dev3 → flow_record-3.21.dev5}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flow.record
3
- Version: 3.21.dev3
3
+ Version: 3.21.dev5
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -37,16 +37,21 @@ Requires-Dist: duckdb; extra == "duckdb"
37
37
  Requires-Dist: pytz; extra == "duckdb"
38
38
  Provides-Extra: splunk
39
39
  Requires-Dist: httpx; extra == "splunk"
40
+ Provides-Extra: xlsx
41
+ Requires-Dist: openpyxl; extra == "xlsx"
40
42
  Provides-Extra: test
41
43
  Requires-Dist: flow.record[compression]; extra == "test"
42
44
  Requires-Dist: flow.record[avro]; extra == "test"
43
45
  Requires-Dist: flow.record[elastic]; extra == "test"
46
+ Requires-Dist: flow.record[xlsx]; extra == "test"
44
47
  Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
45
48
  Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
46
49
  Requires-Dist: tqdm; extra == "test"
50
+ Requires-Dist: structlog; extra == "test"
47
51
  Provides-Extra: full
48
52
  Requires-Dist: flow.record[compression]; extra == "full"
49
53
  Requires-Dist: tqdm; extra == "full"
54
+ Requires-Dist: structlog; extra == "full"
50
55
  Dynamic: license-file
51
56
 
52
57
  # flow.record
@@ -1,10 +1,15 @@
1
- import os
2
- import stat
1
+ from __future__ import annotations
3
2
 
4
- from datetime import datetime
3
+ import stat
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING
5
6
 
6
7
  from flow.record import RecordDescriptor, RecordWriter
7
8
 
9
+ if TYPE_CHECKING:
10
+ from collections.abc import Iterator
11
+
12
+
8
13
  descriptor = """
9
14
  filesystem/unix/entry
10
15
  string path;
@@ -22,34 +27,32 @@ filesystem/unix/entry
22
27
  FilesystemFile = RecordDescriptor(descriptor)
23
28
 
24
29
 
25
- def hash_file(path, t):
26
- f = open(path, "rb")
27
- while 1:
28
- d = f.read(4096)
29
- if d == "":
30
- break
31
- f.close()
30
+ def hash_file(path: str | Path) -> None:
31
+ with Path(path).open("rb") as f:
32
+ while True:
33
+ d = f.read(4096)
34
+ if not d:
35
+ break
32
36
 
33
37
 
34
38
  class FilesystemIterator:
35
39
  basepath = None
36
40
 
37
- def __init__(self, basepath):
41
+ def __init__(self, basepath: str | None):
38
42
  self.basepath = basepath
39
43
  self.recordType = FilesystemFile
40
44
 
41
- def classify(self, source, classification):
45
+ def classify(self, source: str, classification: str) -> None:
42
46
  self.recordType = FilesystemFile.base(_source=source, _classification=classification)
43
47
 
44
- def iter(self, path):
45
- path = os.path.abspath(path)
46
- return self._iter(path)
48
+ def iter(self, path: str | Path) -> Iterator[FilesystemFile]:
49
+ return self._iter(Path(path).resolve())
47
50
 
48
- def _iter(self, path):
49
- if path.startswith("/proc"):
51
+ def _iter(self, path: Path) -> Iterator[FilesystemFile]:
52
+ if path.is_relative_to("/proc"):
50
53
  return
51
54
 
52
- st = os.lstat(path)
55
+ st = path.lstat()
53
56
 
54
57
  abspath = path
55
58
  if self.basepath and abspath.startswith(self.basepath):
@@ -59,7 +62,7 @@ class FilesystemIterator:
59
62
 
60
63
  link = None
61
64
  if ifmt == stat.S_IFLNK:
62
- link = os.readlink(path)
65
+ link = path.readlink()
63
66
 
64
67
  yield self.recordType(
65
68
  path=abspath,
@@ -69,20 +72,16 @@ class FilesystemIterator:
69
72
  size=st.st_size,
70
73
  uid=st.st_uid,
71
74
  gid=st.st_gid,
72
- ctime=datetime.fromtimestamp(st.st_ctime),
73
- mtime=datetime.fromtimestamp(st.st_mtime),
74
- atime=datetime.fromtimestamp(st.st_atime),
75
+ ctime=st.st_ctime,
76
+ mtime=st.st_mtime,
77
+ atime=st.st_atime,
75
78
  link=link,
76
79
  )
77
80
 
78
81
  if ifmt == stat.S_IFDIR:
79
- for i in os.listdir(path):
80
- if i in (".", ".."):
81
- continue
82
-
83
- fullpath = os.path.join(path, i)
84
- for e in self.iter(fullpath):
85
- yield e
82
+ for i in path.iterdir():
83
+ fullpath = path.joinpath(i)
84
+ yield from self.iter(fullpath)
86
85
 
87
86
 
88
87
  chunk = []
@@ -1,18 +1,21 @@
1
1
  #!/usr/bin/env pypy
2
- import record
2
+ from __future__ import annotations
3
+
3
4
  import sys
4
- import datetime
5
+ from datetime import datetime, timezone
5
6
 
6
7
  import net.ipv4
7
-
8
+ import record
8
9
  from fileprocessing import DirectoryProcessor
9
10
 
11
+ UTC_TIMEZONE = timezone.utc
12
+
10
13
 
11
- def ts(s):
12
- return datetime.datetime.fromtimestamp(float(s))
14
+ def ts(s: float) -> datetime:
15
+ return datetime.fromtimestamp(float(s), tz=UTC_TIMEZONE)
13
16
 
14
17
 
15
- def ip(s):
18
+ def ip(s: str) -> net.ipv4.Address:
16
19
  return net.ipv4.Address(s)
17
20
 
18
21
 
@@ -21,7 +24,7 @@ class SeparatedFile:
21
24
  seperator = None
22
25
  format = None
23
26
 
24
- def __init__(self, fp, seperator, format):
27
+ def __init__(self, fp: list[str], seperator: str | None, format: list[tuple]):
25
28
  self.fp = fp
26
29
  self.seperator = seperator
27
30
  self.format = format
@@ -46,7 +49,7 @@ class SeparatedFile:
46
49
  yield recordtype(**r)
47
50
 
48
51
 
49
- def PassiveDnsFile(fp):
52
+ def PassiveDnsFile(fp: list[str]) -> SeparatedFile:
50
53
  return SeparatedFile(fp, "||", PASSIVEDNS_FORMAT)
51
54
 
52
55
 
@@ -63,7 +66,7 @@ PASSIVEDNS_FORMAT = [
63
66
  ]
64
67
 
65
68
 
66
- def main():
69
+ def main() -> None:
67
70
  rs = record.RecordOutput(sys.stdout)
68
71
  for r in DirectoryProcessor(sys.argv[1], PassiveDnsFile, r"\.log\.gz"):
69
72
  rs.write(r)
@@ -1,8 +1,10 @@
1
1
  import random
2
+ from datetime import datetime, timezone
2
3
 
3
- from datetime import datetime
4
4
  from flow import record
5
5
 
6
+ UTC_TIMEZONE = timezone.utc
7
+
6
8
  descriptor = """
7
9
  network/traffic/tcp/connection
8
10
  datetime ts;
@@ -32,9 +34,9 @@ port_list = [
32
34
 
33
35
  rs = record.RecordWriter()
34
36
 
35
- for i in range(500):
37
+ for _ in range(500):
36
38
  r = conn(
37
- ts=datetime.now(),
39
+ ts=datetime.now(tz=UTC_TIMEZONE),
38
40
  src=random.choice(ip_list),
39
41
  srcport=random.choice(port_list),
40
42
  dst=random.choice(ip_list),
@@ -4,8 +4,11 @@ import hashlib
4
4
  import logging
5
5
  import queue
6
6
  import threading
7
+ from contextlib import suppress
7
8
  from typing import TYPE_CHECKING
8
9
 
10
+ import urllib3
11
+
9
12
  try:
10
13
  import elasticsearch
11
14
  import elasticsearch.helpers
@@ -77,6 +80,8 @@ class ElasticWriter(AbstractWriter):
77
80
  http_compress = boolean_argument(http_compress)
78
81
  self.hash_record = boolean_argument(hash_record)
79
82
  queue_size = int(queue_size)
83
+ request_timeout = int(request_timeout)
84
+ self.max_retries = int(max_retries)
80
85
 
81
86
  if not uri.lower().startswith(("http://", "https://")):
82
87
  uri = "http://" + uri
@@ -93,7 +98,7 @@ class ElasticWriter(AbstractWriter):
93
98
  api_key=api_key,
94
99
  request_timeout=request_timeout,
95
100
  retry_on_timeout=True,
96
- max_retries=max_retries,
101
+ max_retries=self.max_retries,
97
102
  )
98
103
 
99
104
  self.json_packer = JsonRecordPacker()
@@ -103,8 +108,6 @@ class ElasticWriter(AbstractWriter):
103
108
 
104
109
  if not verify_certs:
105
110
  # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
106
- import urllib3
107
-
108
111
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
109
112
 
110
113
  self.metadata_fields = {}
@@ -113,10 +116,9 @@ class ElasticWriter(AbstractWriter):
113
116
  self.metadata_fields[arg_key[6:]] = arg_val
114
117
 
115
118
  def excepthook(self, exc: threading.ExceptHookArgs, *args, **kwargs) -> None:
116
- log.error("Exception in thread: %s", exc)
117
119
  self.exception = getattr(exc, "exc_value", exc)
120
+ self.exception = enrich_elastic_exception(self.exception)
118
121
  self.event.set()
119
- self.close()
120
122
 
121
123
  def record_to_document(self, record: Record, index: str) -> dict:
122
124
  """Convert a record to a Elasticsearch compatible document dictionary"""
@@ -169,13 +171,13 @@ class ElasticWriter(AbstractWriter):
169
171
  - https://elasticsearch-py.readthedocs.io/en/v8.17.1/helpers.html#elasticsearch.helpers.streaming_bulk
170
172
  - https://github.com/elastic/elasticsearch-py/blob/main/elasticsearch/helpers/actions.py#L362
171
173
  """
174
+
172
175
  for _ok, _item in elasticsearch.helpers.streaming_bulk(
173
176
  self.es,
174
177
  self.document_stream(),
175
178
  raise_on_error=True,
176
179
  raise_on_exception=True,
177
- # Some settings have to be redefined because streaming_bulk does not inherit them from the self.es instance.
178
- max_retries=3,
180
+ max_retries=self.max_retries,
179
181
  ):
180
182
  pass
181
183
 
@@ -191,13 +193,17 @@ class ElasticWriter(AbstractWriter):
191
193
  pass
192
194
 
193
195
  def close(self) -> None:
194
- self.queue.put(StopIteration)
195
- self.event.wait()
196
+ if hasattr(self, "queue"):
197
+ self.queue.put(StopIteration)
198
+
199
+ if hasattr(self, "event"):
200
+ self.event.wait()
196
201
 
197
202
  if hasattr(self, "es"):
198
- self.es.close()
203
+ with suppress(Exception):
204
+ self.es.close()
199
205
 
200
- if self.exception:
206
+ if hasattr(self, "exception") and self.exception:
201
207
  raise self.exception
202
208
 
203
209
 
@@ -219,6 +225,8 @@ class ElasticReader(AbstractReader):
219
225
  self.selector = selector
220
226
  verify_certs = boolean_argument(verify_certs)
221
227
  http_compress = boolean_argument(http_compress)
228
+ request_timeout = int(request_timeout)
229
+ max_retries = int(max_retries)
222
230
 
223
231
  if not uri.lower().startswith(("http://", "https://")):
224
232
  uri = "http://" + uri
@@ -235,8 +243,6 @@ class ElasticReader(AbstractReader):
235
243
 
236
244
  if not verify_certs:
237
245
  # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
238
- import urllib3
239
-
240
246
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
241
247
 
242
248
  def __iter__(self) -> Iterator[Record]:
@@ -255,3 +261,32 @@ class ElasticReader(AbstractReader):
255
261
  def close(self) -> None:
256
262
  if hasattr(self, "es"):
257
263
  self.es.close()
264
+
265
+
266
+ def enrich_elastic_exception(exception: Exception) -> Exception:
267
+ """Extend the exception with error information from Elastic.
268
+
269
+ Resources:
270
+ - https://elasticsearch-py.readthedocs.io/en/v8.17.1/exceptions.html
271
+ """
272
+ errors = set()
273
+ if hasattr(exception, "errors"):
274
+ try:
275
+ for error in exception.errors:
276
+ index_dict = error.get("index", {})
277
+ status = index_dict.get("status")
278
+ error_dict = index_dict.get("error", {})
279
+ error_type = error_dict.get("type")
280
+ error_reason = error_dict.get("reason", "")
281
+
282
+ errors.add(f"({status} {error_type} {error_reason})")
283
+ except Exception:
284
+ errors.add("unable to extend errors")
285
+
286
+ # append errors to original exception message
287
+ error_str = ", ".join(errors)
288
+ original_message = exception.args[0] if exception.args else ""
289
+ new_message = f"{original_message} {error_str}"
290
+ exception.args = (new_message,) + exception.args[1:]
291
+
292
+ return exception
@@ -35,7 +35,7 @@ Write usage: rdump -w splunk+[PROTOCOL]://[IP]:[PORT]?tag=[TAG]&token=[TOKEN]&so
35
35
  [SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTPS. Defaults to True.
36
36
  """
37
37
 
38
- log = logging.getLogger(__package__)
38
+ log = logging.getLogger(__name__)
39
39
 
40
40
  # Amount of records to bundle into a single request when sending data over HTTP(S).
41
41
  RECORD_BUFFER_LIMIT = 20
@@ -64,7 +64,7 @@ if TYPE_CHECKING:
64
64
 
65
65
  from flow.record.adapter import AbstractReader, AbstractWriter
66
66
 
67
- log = logging.getLogger(__package__)
67
+ log = logging.getLogger(__name__)
68
68
  _utcnow = functools.partial(datetime.now, timezone.utc)
69
69
 
70
70
  RECORD_VERSION = 1
@@ -186,6 +186,7 @@ class Record:
186
186
  return OrderedDict((k, getattr(self, k)) for k in self.__slots__ if k not in exclude)
187
187
 
188
188
  if TYPE_CHECKING:
189
+
189
190
  def __getattr__(self, name: str) -> Any: ...
190
191
 
191
192
  def __setattr__(self, k: str, v: Any) -> None:
@@ -11,7 +11,7 @@ from flow.record.base import Record, RecordDescriptor
11
11
  from flow.record.exceptions import RecordDescriptorNotFound
12
12
  from flow.record.utils import EventHandler
13
13
 
14
- log = logging.getLogger(__package__)
14
+ log = logging.getLogger(__name__)
15
15
 
16
16
 
17
17
  class JsonRecordPacker:
@@ -15,14 +15,14 @@ from flow.record.base import Record, RecordDescriptor, RecordReader
15
15
  from flow.record.fieldtypes import fieldtype_for_value
16
16
  from flow.record.packer import RecordPacker
17
17
  from flow.record.selector import make_selector
18
- from flow.record.utils import is_stdout
18
+ from flow.record.utils import LOGGING_TRACE_LEVEL, is_stdout
19
19
 
20
20
  if TYPE_CHECKING:
21
21
  from collections.abc import Iterator
22
22
 
23
23
  from flow.record.adapter import AbstractWriter
24
24
 
25
- log = logging.getLogger(__package__)
25
+ log = logging.getLogger(__name__)
26
26
 
27
27
  aRepr = reprlib.Repr()
28
28
  aRepr.maxother = 255
@@ -146,8 +146,11 @@ class RecordStreamReader:
146
146
  def record_stream(sources: list[str], selector: str | None = None) -> Iterator[Record]:
147
147
  """Return a Record stream generator from the given Record sources.
148
148
 
149
- Exceptions in a Record source will be caught so the stream is not interrupted.
149
+ If there are multiple sources, exceptions are caught and logged, and the stream continues with the next source.
150
150
  """
151
+
152
+ trace = log.isEnabledFor(LOGGING_TRACE_LEVEL)
153
+
151
154
  log.debug("Record stream with selector: %r", selector)
152
155
  for src in sources:
153
156
  # Inform user that we are reading from stdin
@@ -161,12 +164,20 @@ def record_stream(sources: list[str], selector: str | None = None) -> Iterator[R
161
164
  yield from reader
162
165
  reader.close()
163
166
  except IOError as e:
164
- log.exception("%s(%r): %s", reader, src, e) # noqa: TRY401
167
+ if len(sources) == 1:
168
+ raise
169
+ else:
170
+ log.error("%s(%r): %s", reader, src, e)
171
+ if trace:
172
+ log.exception("Full traceback")
165
173
  except KeyboardInterrupt:
166
174
  raise
167
175
  except Exception as e:
168
- log.warning("Exception in %r for %r: %s -- skipping to next reader", reader, src, aRepr.repr(e))
169
- continue
176
+ if len(sources) == 1:
177
+ raise
178
+ else:
179
+ log.warning("Exception in %r for %r: %s -- skipping to next reader", reader, src, aRepr.repr(e))
180
+ continue
170
181
 
171
182
 
172
183
  class PathTemplateWriter:
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  from __future__ import annotations
3
3
 
4
+ import argparse
4
5
  import logging
5
6
  import sys
6
7
  from importlib import import_module
@@ -14,7 +15,7 @@ import flow.record.adapter
14
15
  from flow.record import RecordWriter, iter_timestamped_records, record_stream
15
16
  from flow.record.selector import make_selector
16
17
  from flow.record.stream import RecordFieldRewriter
17
- from flow.record.utils import catch_sigpipe
18
+ from flow.record.utils import LOGGING_TRACE_LEVEL, catch_sigpipe
18
19
 
19
20
  try:
20
21
  from flow.record.version import version
@@ -29,6 +30,15 @@ try:
29
30
  except ImportError:
30
31
  HAS_TQDM = False
31
32
 
33
+ try:
34
+ import structlog
35
+
36
+ HAS_STRUCTLOG = True
37
+
38
+ except ImportError:
39
+ HAS_STRUCTLOG = False
40
+
41
+
32
42
  log = logging.getLogger(__name__)
33
43
 
34
44
 
@@ -69,8 +79,6 @@ def list_adapters() -> None:
69
79
 
70
80
  @catch_sigpipe
71
81
  def main(argv: list[str] | None = None) -> int:
72
- import argparse
73
-
74
82
  parser = argparse.ArgumentParser(
75
83
  description="Record dumper, a tool that can read, write and filter records",
76
84
  formatter_class=argparse.ArgumentDefaultsHelpFormatter,
@@ -130,6 +138,11 @@ def main(argv: list[str] | None = None) -> int:
130
138
  action="store_true",
131
139
  help="Show progress bar (requires tqdm)",
132
140
  )
141
+ output.add_argument(
142
+ "--stats",
143
+ action="store_true",
144
+ help="Show count of processed records",
145
+ )
133
146
 
134
147
  advanced = parser.add_argument_group("advanced")
135
148
  advanced.add_argument(
@@ -196,10 +209,30 @@ def main(argv: list[str] | None = None) -> int:
196
209
 
197
210
  args = parser.parse_args(argv)
198
211
 
199
- levels = [logging.WARNING, logging.INFO, logging.DEBUG]
212
+ levels = [logging.WARNING, logging.INFO, logging.DEBUG, LOGGING_TRACE_LEVEL]
200
213
  level = levels[min(len(levels) - 1, args.verbose)]
201
214
  logging.basicConfig(level=level, format="%(asctime)s %(levelname)s %(message)s")
202
215
 
216
+ if HAS_STRUCTLOG:
217
+ # We have structlog, configure Python logging to use it for rendering
218
+ console_renderer = structlog.dev.ConsoleRenderer()
219
+ handler = logging.StreamHandler()
220
+ handler.setFormatter(
221
+ structlog.stdlib.ProcessorFormatter(
222
+ processor=console_renderer,
223
+ foreign_pre_chain=[
224
+ structlog.stdlib.add_logger_name,
225
+ structlog.stdlib.add_log_level,
226
+ structlog.processors.TimeStamper(fmt="iso"),
227
+ ],
228
+ )
229
+ )
230
+
231
+ # Clear existing handlers and add our structlog handler
232
+ root_logger = logging.getLogger()
233
+ root_logger.handlers.clear()
234
+ root_logger.addHandler(handler)
235
+
203
236
  fields_to_exclude = args.exclude.split(",") if args.exclude else []
204
237
  fields = args.fields.split(",") if args.fields else []
205
238
 
@@ -253,6 +286,7 @@ def main(argv: list[str] | None = None) -> int:
253
286
 
254
287
  count = 0
255
288
  record_writer = None
289
+ ret = 0
256
290
 
257
291
  try:
258
292
  record_writer = RecordWriter(uri)
@@ -280,14 +314,33 @@ def main(argv: list[str] | None = None) -> int:
280
314
  else:
281
315
  record_writer.write(rec)
282
316
 
317
+ except Exception as e:
318
+ print_error(e)
319
+
320
+ # Prevent throwing an exception twice when deconstructing the record writer.
321
+ if hasattr(record_writer, "exception") and record_writer.exception is e:
322
+ record_writer.exception = None
323
+
324
+ ret = 1
325
+
283
326
  finally:
284
327
  if record_writer:
285
- record_writer.__exit__()
328
+ # Exceptions raised in threads can be thrown when deconstructing the writer.
329
+ try:
330
+ record_writer.__exit__()
331
+ except Exception as e:
332
+ print_error(e)
333
+
334
+ if (args.list or args.stats) and not args.progress:
335
+ print(f"Processed {count} records", file=sys.stdout if args.list else sys.stderr)
336
+
337
+ return ret
286
338
 
287
- if args.list:
288
- print(f"Processed {count} records")
289
339
 
290
- return 0
340
+ def print_error(e: Exception) -> None:
341
+ log.error("rdump encountered a fatal error: %s", e)
342
+ if log.isEnabledFor(LOGGING_TRACE_LEVEL):
343
+ log.exception("Full traceback")
291
344
 
292
345
 
293
346
  if __name__ == "__main__":
@@ -7,6 +7,8 @@ import warnings
7
7
  from functools import wraps
8
8
  from typing import Any, BinaryIO, Callable, TextIO
9
9
 
10
+ LOGGING_TRACE_LEVEL = 5
11
+
10
12
 
11
13
  def get_stdout(binary: bool = False) -> TextIO | BinaryIO:
12
14
  """Return the stdout stream as binary or text stream.
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '3.21.dev3'
21
- __version_tuple__ = version_tuple = (3, 21, 'dev3')
20
+ __version__ = version = '3.21.dev5'
21
+ __version_tuple__ = version_tuple = (3, 21, 'dev5')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flow.record
3
- Version: 3.21.dev3
3
+ Version: 3.21.dev5
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -37,16 +37,21 @@ Requires-Dist: duckdb; extra == "duckdb"
37
37
  Requires-Dist: pytz; extra == "duckdb"
38
38
  Provides-Extra: splunk
39
39
  Requires-Dist: httpx; extra == "splunk"
40
+ Provides-Extra: xlsx
41
+ Requires-Dist: openpyxl; extra == "xlsx"
40
42
  Provides-Extra: test
41
43
  Requires-Dist: flow.record[compression]; extra == "test"
42
44
  Requires-Dist: flow.record[avro]; extra == "test"
43
45
  Requires-Dist: flow.record[elastic]; extra == "test"
46
+ Requires-Dist: flow.record[xlsx]; extra == "test"
44
47
  Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
45
48
  Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
46
49
  Requires-Dist: tqdm; extra == "test"
50
+ Requires-Dist: structlog; extra == "test"
47
51
  Provides-Extra: full
48
52
  Requires-Dist: flow.record[compression]; extra == "full"
49
53
  Requires-Dist: tqdm; extra == "full"
54
+ Requires-Dist: structlog; extra == "full"
50
55
  Dynamic: license-file
51
56
 
52
57
  # flow.record
@@ -5,6 +5,7 @@ MANIFEST.in
5
5
  README.md
6
6
  pyproject.toml
7
7
  tox.ini
8
+ examples/__init__.py
8
9
  examples/filesystem.py
9
10
  examples/passivedns.py
10
11
  examples/records.json
@@ -23,6 +23,7 @@ elasticsearch
23
23
  [full]
24
24
  flow.record[compression]
25
25
  tqdm
26
+ structlog
26
27
 
27
28
  [geoip]
28
29
  maxminddb
@@ -34,8 +35,13 @@ httpx
34
35
  flow.record[compression]
35
36
  flow.record[avro]
36
37
  flow.record[elastic]
38
+ flow.record[xlsx]
37
39
  tqdm
40
+ structlog
38
41
 
39
42
  [test:platform_python_implementation != "PyPy" and python_version < "3.12"]
40
43
  duckdb
41
44
  pytz
45
+
46
+ [xlsx]
47
+ openpyxl
@@ -56,17 +56,23 @@ duckdb = [
56
56
  splunk = [
57
57
  "httpx",
58
58
  ]
59
+ xlsx = [
60
+ "openpyxl",
61
+ ]
59
62
  test = [
60
63
  "flow.record[compression]",
61
64
  "flow.record[avro]",
62
65
  "flow.record[elastic]",
66
+ "flow.record[xlsx]",
63
67
  "duckdb; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
64
68
  "pytz; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
65
69
  "tqdm",
70
+ "structlog",
66
71
  ]
67
72
  full = [
68
73
  "flow.record[compression]",
69
74
  "tqdm",
75
+ "structlog",
70
76
  ]
71
77
 
72
78
  [project.scripts]
@@ -116,7 +122,7 @@ select = [
116
122
  "FURB",
117
123
  "RUF",
118
124
  ]
119
- ignore = ["E203", "B904", "UP024", "ANN002", "ANN003", "ANN204", "ANN401", "SIM105", "TRY003"]
125
+ ignore = ["E203", "B904", "UP024", "ANN002", "ANN003", "ANN204", "ANN401", "SIM105", "TRY003", "TRY400"]
120
126
 
121
127
  [tool.ruff.lint.per-file-ignores]
122
128
  "tests/docs/**" = ["INP001"]
File without changes
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import traceback
3
4
  from typing import Callable
4
5
 
5
6
 
@@ -14,6 +15,5 @@ def main(glob: dict[str, Callable[..., None]]) -> None:
14
15
  print("PASSED")
15
16
  except Exception:
16
17
  print("FAILED")
17
- import traceback
18
18
 
19
19
  traceback.print_exc()
@@ -376,15 +376,21 @@ def test_uri_type() -> None:
376
376
  assert r.path.protocol == "http"
377
377
  assert r.path.hostname == "example.com"
378
378
 
379
- with pytest.warns(DeprecationWarning):
379
+ with pytest.warns(
380
+ DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
381
+ ):
380
382
  r = TestRecord(uri.from_windows(r"c:\windows\program files\Fox-IT B.V\flow.exe"))
381
383
  assert r.path.filename == "flow.exe"
382
384
 
383
385
  r = TestRecord()
384
- with pytest.warns(DeprecationWarning):
386
+ with pytest.warns(
387
+ DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
388
+ ):
385
389
  r.path = uri.normalize(r"c:\Users\Fox-IT\Downloads\autoruns.exe")
386
390
  assert r.path.filename == "autoruns.exe"
387
- with pytest.warns(DeprecationWarning):
391
+ with pytest.warns(
392
+ DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
393
+ ):
388
394
  assert r.path.dirname == uri.normalize(r"\Users\Fox-IT\Downloads")
389
395
  assert r.path.dirname == "/Users/Fox-IT/Downloads"
390
396
 
@@ -22,7 +22,7 @@ def test_uri_packing() -> None:
22
22
  ],
23
23
  )
24
24
 
25
- # construct with an url
25
+ # Construct with an url
26
26
  record = TestRecord("http://www.google.com/evil.bin")
27
27
  data = packer.pack(record)
28
28
  record = packer.unpack(data)
@@ -30,8 +30,9 @@ def test_uri_packing() -> None:
30
30
  assert record.path.filename == "evil.bin"
31
31
  assert record.path.dirname == "/"
32
32
 
33
- # construct from uri() -> for windows=True
34
- with pytest.warns(DeprecationWarning):
33
+ with pytest.warns(
34
+ DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
35
+ ):
35
36
  path = uri.from_windows(r"c:\Program Files\Fox-IT\flow is awesome.exe")
36
37
  record = TestRecord(path)
37
38
  data = packer.pack(record)
@@ -40,8 +41,9 @@ def test_uri_packing() -> None:
40
41
  assert record.path.filename == "flow is awesome.exe"
41
42
  assert record.path.dirname == "/Program Files/Fox-IT"
42
43
 
43
- # construct using uri.from_windows()
44
- with pytest.warns(DeprecationWarning):
44
+ with pytest.warns(
45
+ DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
46
+ ):
45
47
  path = uri.from_windows(r"c:\Users\Hello World\foo.bar.exe")
46
48
  record = TestRecord(path)
47
49
  data = packer.pack(record)
@@ -17,6 +17,7 @@ import pytest
17
17
 
18
18
  import flow.record.fieldtypes
19
19
  from flow.record import RecordDescriptor, RecordReader, RecordWriter
20
+ from flow.record.adapter.line import field_types_for_record_descriptor
20
21
  from flow.record.fieldtypes import flow_record_tz
21
22
  from flow.record.tools import rdump
22
23
 
@@ -681,8 +682,6 @@ def test_rdump_line_verbose(tmp_path: Path, capsys: pytest.CaptureFixture, rdump
681
682
  writer.write(TestRecord(counter=2))
682
683
  writer.write(TestRecord(counter=3))
683
684
 
684
- from flow.record.adapter.line import field_types_for_record_descriptor
685
-
686
685
  field_types_for_record_descriptor.cache_clear()
687
686
  assert field_types_for_record_descriptor.cache_info().currsize == 0
688
687
  rdump.main([str(record_path), *rdump_params])
@@ -721,4 +720,3 @@ def test_rdump_list_progress(tmp_path: Path, capsys: pytest.CaptureFixture) -> N
721
720
 
722
721
  # stdout should contain the RecordDescriptor definition and count
723
722
  assert "# <RecordDescriptor test/rdump/progress, hash=eeb21156>" in captured.out
724
- assert "Processed 100 records" in captured.out
@@ -9,6 +9,7 @@ from unittest.mock import MagicMock
9
9
  import pytest
10
10
 
11
11
  from flow.record import fieldtypes
12
+ from flow.record.adapter.xlsx import sanitize_fieldvalues
12
13
 
13
14
  if TYPE_CHECKING:
14
15
  from collections.abc import Iterator
@@ -27,8 +28,6 @@ def mock_openpyxl_package(monkeypatch: pytest.MonkeyPatch) -> Iterator[MagicMock
27
28
 
28
29
 
29
30
  def test_sanitize_field_values(mock_openpyxl_package: MagicMock) -> None:
30
- from flow.record.adapter.xlsx import sanitize_fieldvalues
31
-
32
31
  assert list(
33
32
  sanitize_fieldvalues(
34
33
  [
@@ -35,6 +35,7 @@ package = skip
35
35
  deps =
36
36
  ruff==0.9.2
37
37
  commands =
38
+ ruff check --fix flow tests
38
39
  ruff format flow tests
39
40
 
40
41
  [testenv:lint]
File without changes