flow.record 3.21.dev2__tar.gz → 3.21.dev4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/PKG-INFO +4 -1
  2. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/examples/filesystem.py +28 -29
  3. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/examples/passivedns.py +12 -9
  4. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/examples/tcpconn.py +5 -3
  5. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/csvfile.py +8 -3
  6. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/elastic.py +8 -9
  7. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/jsonfile.py +2 -2
  8. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/splunk.py +2 -2
  9. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/base.py +1 -0
  10. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/tools/rdump.py +18 -5
  11. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/utils.py +29 -0
  12. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/version.py +2 -2
  13. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow.record.egg-info/PKG-INFO +4 -1
  14. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow.record.egg-info/SOURCES.txt +2 -0
  15. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow.record.egg-info/requires.txt +4 -0
  16. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/pyproject.toml +4 -0
  17. flow_record-3.21.dev4/tests/__init__.py +0 -0
  18. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/standalone_test.py +1 -1
  19. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_fieldtypes.py +9 -3
  20. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_packer.py +7 -5
  21. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_rdump.py +1 -2
  22. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_record_adapter.py +1 -1
  23. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_regression.py +20 -0
  24. flow_record-3.21.dev4/tests/test_utils.py +25 -0
  25. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_xlsx_adapter.py +1 -2
  26. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/.git-blame-ignore-revs +0 -0
  27. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/COPYRIGHT +0 -0
  28. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/LICENSE +0 -0
  29. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/MANIFEST.in +0 -0
  30. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/README.md +0 -0
  31. {flow_record-3.21.dev2/flow/record/tools → flow_record-3.21.dev4/examples}/__init__.py +0 -0
  32. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/examples/records.json +0 -0
  33. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/__init__.py +0 -0
  34. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/__init__.py +0 -0
  35. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/archive.py +0 -0
  36. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/avro.py +0 -0
  37. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/broker.py +0 -0
  38. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/duckdb.py +0 -0
  39. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/line.py +0 -0
  40. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/mongo.py +0 -0
  41. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/split.py +0 -0
  42. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/sqlite.py +0 -0
  43. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/stream.py +0 -0
  44. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/text.py +0 -0
  45. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/adapter/xlsx.py +0 -0
  46. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/exceptions.py +0 -0
  47. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/fieldtypes/__init__.py +0 -0
  48. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/fieldtypes/credential.py +0 -0
  49. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/fieldtypes/net/__init__.py +0 -0
  50. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/fieldtypes/net/ip.py +0 -0
  51. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/fieldtypes/net/ipv4.py +0 -0
  52. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/fieldtypes/net/tcp.py +0 -0
  53. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/fieldtypes/net/udp.py +0 -0
  54. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/jsonpacker.py +0 -0
  55. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/packer.py +0 -0
  56. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/selector.py +0 -0
  57. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/stream.py +0 -0
  58. {flow_record-3.21.dev2/tests → flow_record-3.21.dev4/flow/record/tools}/__init__.py +0 -0
  59. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/tools/geoip.py +0 -0
  60. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow/record/whitelist.py +0 -0
  61. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow.record.egg-info/dependency_links.txt +0 -0
  62. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow.record.egg-info/entry_points.txt +0 -0
  63. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/flow.record.egg-info/top_level.txt +0 -0
  64. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/setup.cfg +0 -0
  65. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/_utils.py +0 -0
  66. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/docs/Makefile +0 -0
  67. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/docs/conf.py +0 -0
  68. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/docs/index.rst +0 -0
  69. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/selector_explain_example.py +0 -0
  70. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_adapter_line.py +0 -0
  71. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_adapter_text.py +0 -0
  72. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_avro.py +0 -0
  73. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_avro_adapter.py +0 -0
  74. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_compiled_selector.py +0 -0
  75. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_csv_adapter.py +0 -0
  76. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_deprecations.py +0 -0
  77. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_elastic_adapter.py +0 -0
  78. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_fieldtype_ip.py +0 -0
  79. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_json_packer.py +0 -0
  80. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_json_record_adapter.py +0 -0
  81. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_multi_timestamp.py +0 -0
  82. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_record.py +0 -0
  83. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_record_descriptor.py +0 -0
  84. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_selector.py +0 -0
  85. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_splunk_adapter.py +0 -0
  86. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tests/test_sqlite_duckdb_adapter.py +0 -0
  87. {flow_record-3.21.dev2 → flow_record-3.21.dev4}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flow.record
3
- Version: 3.21.dev2
3
+ Version: 3.21.dev4
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -37,10 +37,13 @@ Requires-Dist: duckdb; extra == "duckdb"
37
37
  Requires-Dist: pytz; extra == "duckdb"
38
38
  Provides-Extra: splunk
39
39
  Requires-Dist: httpx; extra == "splunk"
40
+ Provides-Extra: xlsx
41
+ Requires-Dist: openpyxl; extra == "xlsx"
40
42
  Provides-Extra: test
41
43
  Requires-Dist: flow.record[compression]; extra == "test"
42
44
  Requires-Dist: flow.record[avro]; extra == "test"
43
45
  Requires-Dist: flow.record[elastic]; extra == "test"
46
+ Requires-Dist: flow.record[xlsx]; extra == "test"
44
47
  Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
45
48
  Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
46
49
  Requires-Dist: tqdm; extra == "test"
@@ -1,10 +1,15 @@
1
- import os
2
- import stat
1
+ from __future__ import annotations
3
2
 
4
- from datetime import datetime
3
+ import stat
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING
5
6
 
6
7
  from flow.record import RecordDescriptor, RecordWriter
7
8
 
9
+ if TYPE_CHECKING:
10
+ from collections.abc import Iterator
11
+
12
+
8
13
  descriptor = """
9
14
  filesystem/unix/entry
10
15
  string path;
@@ -22,34 +27,32 @@ filesystem/unix/entry
22
27
  FilesystemFile = RecordDescriptor(descriptor)
23
28
 
24
29
 
25
- def hash_file(path, t):
26
- f = open(path, "rb")
27
- while 1:
28
- d = f.read(4096)
29
- if d == "":
30
- break
31
- f.close()
30
+ def hash_file(path: str | Path) -> None:
31
+ with Path(path).open("rb") as f:
32
+ while True:
33
+ d = f.read(4096)
34
+ if not d:
35
+ break
32
36
 
33
37
 
34
38
  class FilesystemIterator:
35
39
  basepath = None
36
40
 
37
- def __init__(self, basepath):
41
+ def __init__(self, basepath: str | None):
38
42
  self.basepath = basepath
39
43
  self.recordType = FilesystemFile
40
44
 
41
- def classify(self, source, classification):
45
+ def classify(self, source: str, classification: str) -> None:
42
46
  self.recordType = FilesystemFile.base(_source=source, _classification=classification)
43
47
 
44
- def iter(self, path):
45
- path = os.path.abspath(path)
46
- return self._iter(path)
48
+ def iter(self, path: str | Path) -> Iterator[FilesystemFile]:
49
+ return self._iter(Path(path).resolve())
47
50
 
48
- def _iter(self, path):
49
- if path.startswith("/proc"):
51
+ def _iter(self, path: Path) -> Iterator[FilesystemFile]:
52
+ if path.is_relative_to("/proc"):
50
53
  return
51
54
 
52
- st = os.lstat(path)
55
+ st = path.lstat()
53
56
 
54
57
  abspath = path
55
58
  if self.basepath and abspath.startswith(self.basepath):
@@ -59,7 +62,7 @@ class FilesystemIterator:
59
62
 
60
63
  link = None
61
64
  if ifmt == stat.S_IFLNK:
62
- link = os.readlink(path)
65
+ link = path.readlink()
63
66
 
64
67
  yield self.recordType(
65
68
  path=abspath,
@@ -69,20 +72,16 @@ class FilesystemIterator:
69
72
  size=st.st_size,
70
73
  uid=st.st_uid,
71
74
  gid=st.st_gid,
72
- ctime=datetime.fromtimestamp(st.st_ctime),
73
- mtime=datetime.fromtimestamp(st.st_mtime),
74
- atime=datetime.fromtimestamp(st.st_atime),
75
+ ctime=st.st_ctime,
76
+ mtime=st.st_mtime,
77
+ atime=st.st_atime,
75
78
  link=link,
76
79
  )
77
80
 
78
81
  if ifmt == stat.S_IFDIR:
79
- for i in os.listdir(path):
80
- if i in (".", ".."):
81
- continue
82
-
83
- fullpath = os.path.join(path, i)
84
- for e in self.iter(fullpath):
85
- yield e
82
+ for i in path.iterdir():
83
+ fullpath = path.joinpath(i)
84
+ yield from self.iter(fullpath)
86
85
 
87
86
 
88
87
  chunk = []
@@ -1,18 +1,21 @@
1
1
  #!/usr/bin/env pypy
2
- import record
2
+ from __future__ import annotations
3
+
3
4
  import sys
4
- import datetime
5
+ from datetime import datetime, timezone
5
6
 
6
7
  import net.ipv4
7
-
8
+ import record
8
9
  from fileprocessing import DirectoryProcessor
9
10
 
11
+ UTC_TIMEZONE = timezone.utc
12
+
10
13
 
11
- def ts(s):
12
- return datetime.datetime.fromtimestamp(float(s))
14
+ def ts(s: float) -> datetime:
15
+ return datetime.fromtimestamp(float(s), tz=UTC_TIMEZONE)
13
16
 
14
17
 
15
- def ip(s):
18
+ def ip(s: str) -> net.ipv4.Address:
16
19
  return net.ipv4.Address(s)
17
20
 
18
21
 
@@ -21,7 +24,7 @@ class SeparatedFile:
21
24
  seperator = None
22
25
  format = None
23
26
 
24
- def __init__(self, fp, seperator, format):
27
+ def __init__(self, fp: list[str], seperator: str | None, format: list[tuple]):
25
28
  self.fp = fp
26
29
  self.seperator = seperator
27
30
  self.format = format
@@ -46,7 +49,7 @@ class SeparatedFile:
46
49
  yield recordtype(**r)
47
50
 
48
51
 
49
- def PassiveDnsFile(fp):
52
+ def PassiveDnsFile(fp: list[str]) -> SeparatedFile:
50
53
  return SeparatedFile(fp, "||", PASSIVEDNS_FORMAT)
51
54
 
52
55
 
@@ -63,7 +66,7 @@ PASSIVEDNS_FORMAT = [
63
66
  ]
64
67
 
65
68
 
66
- def main():
69
+ def main() -> None:
67
70
  rs = record.RecordOutput(sys.stdout)
68
71
  for r in DirectoryProcessor(sys.argv[1], PassiveDnsFile, r"\.log\.gz"):
69
72
  rs.write(r)
@@ -1,8 +1,10 @@
1
1
  import random
2
+ from datetime import datetime, timezone
2
3
 
3
- from datetime import datetime
4
4
  from flow import record
5
5
 
6
+ UTC_TIMEZONE = timezone.utc
7
+
6
8
  descriptor = """
7
9
  network/traffic/tcp/connection
8
10
  datetime ts;
@@ -32,9 +34,9 @@ port_list = [
32
34
 
33
35
  rs = record.RecordWriter()
34
36
 
35
- for i in range(500):
37
+ for _ in range(500):
36
38
  r = conn(
37
- ts=datetime.now(),
39
+ ts=datetime.now(tz=UTC_TIMEZONE),
38
40
  src=random.choice(ip_list),
39
41
  srcport=random.choice(port_list),
40
42
  dst=random.choice(ip_list),
@@ -9,7 +9,7 @@ from flow.record import RecordDescriptor
9
9
  from flow.record.adapter import AbstractReader, AbstractWriter
10
10
  from flow.record.base import Record, normalize_fieldname
11
11
  from flow.record.selector import make_selector
12
- from flow.record.utils import is_stdout
12
+ from flow.record.utils import boolean_argument, is_stdout
13
13
 
14
14
  if TYPE_CHECKING:
15
15
  from collections.abc import Iterator
@@ -17,11 +17,12 @@ if TYPE_CHECKING:
17
17
  __usage__ = """
18
18
  Comma-separated values (CSV) adapter
19
19
  ---
20
- Write usage: rdump -w csvfile://[PATH]?lineterminator=[TERMINATOR]
20
+ Write usage: rdump -w csvfile://[PATH]?lineterminator=[TERMINATOR]&header=[HEADER]
21
21
  Read usage: rdump csvfile://[PATH]?fields=[FIELDS]
22
22
  [PATH]: path to file. Leave empty or "-" to output to stdout
23
23
 
24
24
  Optional parameters:
25
+ [HEADER]: if set to false, it will not print the CSV header (default: true)
25
26
  [TERMINATOR]: line terminator, default is \\r\\n
26
27
  [FIELDS]: comma-separated list of CSV fields (in case of missing CSV header)
27
28
  """
@@ -34,6 +35,7 @@ class CsvfileWriter(AbstractWriter):
34
35
  fields: str | list[str] | None = None,
35
36
  exclude: str | list[str] | None = None,
36
37
  lineterminator: str = "\r\n",
38
+ header: str = "true",
37
39
  **kwargs,
38
40
  ):
39
41
  self.fp = None
@@ -52,13 +54,16 @@ class CsvfileWriter(AbstractWriter):
52
54
  self.fields = self.fields.split(",")
53
55
  if isinstance(self.exclude, str):
54
56
  self.exclude = self.exclude.split(",")
57
+ self.header = boolean_argument(header)
55
58
 
56
59
  def write(self, r: Record) -> None:
57
60
  rdict = r._asdict(fields=self.fields, exclude=self.exclude)
58
61
  if not self.desc or self.desc != r._desc:
59
62
  self.desc = r._desc
60
63
  self.writer = csv.DictWriter(self.fp, rdict, lineterminator=self.lineterminator)
61
- self.writer.writeheader()
64
+ if self.header:
65
+ # Write header only if it is requested
66
+ self.writer.writeheader()
62
67
  self.writer.writerow(rdict)
63
68
 
64
69
  def flush(self) -> None:
@@ -6,6 +6,8 @@ import queue
6
6
  import threading
7
7
  from typing import TYPE_CHECKING
8
8
 
9
+ import urllib3
10
+
9
11
  try:
10
12
  import elasticsearch
11
13
  import elasticsearch.helpers
@@ -19,6 +21,7 @@ from flow.record.adapter import AbstractReader, AbstractWriter
19
21
  from flow.record.base import Record, RecordDescriptor
20
22
  from flow.record.fieldtypes import fieldtype_for_value
21
23
  from flow.record.jsonpacker import JsonRecordPacker
24
+ from flow.record.utils import boolean_argument
22
25
 
23
26
  if TYPE_CHECKING:
24
27
  from collections.abc import Iterator
@@ -72,9 +75,9 @@ class ElasticWriter(AbstractWriter):
72
75
 
73
76
  self.index = index
74
77
  self.uri = uri
75
- verify_certs = str(verify_certs).lower() in ("1", "true")
76
- http_compress = str(http_compress).lower() in ("1", "true")
77
- self.hash_record = str(hash_record).lower() in ("1", "true")
78
+ verify_certs = boolean_argument(verify_certs)
79
+ http_compress = boolean_argument(http_compress)
80
+ self.hash_record = boolean_argument(hash_record)
78
81
  queue_size = int(queue_size)
79
82
 
80
83
  if not uri.lower().startswith(("http://", "https://")):
@@ -102,8 +105,6 @@ class ElasticWriter(AbstractWriter):
102
105
 
103
106
  if not verify_certs:
104
107
  # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
105
- import urllib3
106
-
107
108
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
108
109
 
109
110
  self.metadata_fields = {}
@@ -216,8 +217,8 @@ class ElasticReader(AbstractReader):
216
217
  self.index = index
217
218
  self.uri = uri
218
219
  self.selector = selector
219
- verify_certs = str(verify_certs).lower() in ("1", "true")
220
- http_compress = str(http_compress).lower() in ("1", "true")
220
+ verify_certs = boolean_argument(verify_certs)
221
+ http_compress = boolean_argument(http_compress)
221
222
 
222
223
  if not uri.lower().startswith(("http://", "https://")):
223
224
  uri = "http://" + uri
@@ -234,8 +235,6 @@ class ElasticReader(AbstractReader):
234
235
 
235
236
  if not verify_certs:
236
237
  # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
237
- import urllib3
238
-
239
238
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
240
239
 
241
240
  def __iter__(self) -> Iterator[Record]:
@@ -8,7 +8,7 @@ from flow.record import JsonRecordPacker
8
8
  from flow.record.adapter import AbstractReader, AbstractWriter
9
9
  from flow.record.fieldtypes import fieldtype_for_value
10
10
  from flow.record.selector import make_selector
11
- from flow.record.utils import is_stdout
11
+ from flow.record.utils import boolean_argument, is_stdout
12
12
 
13
13
  if TYPE_CHECKING:
14
14
  from collections.abc import Iterator
@@ -33,7 +33,7 @@ class JsonfileWriter(AbstractWriter):
33
33
  def __init__(
34
34
  self, path: str | Path | BinaryIO, indent: str | int | None = None, descriptors: bool = True, **kwargs
35
35
  ):
36
- self.descriptors = str(descriptors).lower() in ("true", "1")
36
+ self.descriptors = boolean_argument(descriptors)
37
37
  self.fp = record.open_path_or_stream(path, "w")
38
38
  if isinstance(indent, str):
39
39
  indent = int(indent)
@@ -18,7 +18,7 @@ except ImportError:
18
18
 
19
19
  from flow.record.adapter import AbstractReader, AbstractWriter
20
20
  from flow.record.jsonpacker import JsonRecordPacker
21
- from flow.record.utils import to_base64, to_bytes, to_str
21
+ from flow.record.utils import boolean_argument, to_base64, to_bytes, to_str
22
22
 
23
23
  if TYPE_CHECKING:
24
24
  from flow.record.base import Record
@@ -218,7 +218,7 @@ class SplunkWriter(AbstractWriter):
218
218
  self.token = f"Splunk {self.token}"
219
219
 
220
220
  # Assume verify=True unless specified otherwise.
221
- self.verify = str(ssl_verify).lower() not in ("0", "false")
221
+ self.verify = boolean_argument(ssl_verify)
222
222
  if not self.verify:
223
223
  log.warning("Certificate verification is disabled")
224
224
 
@@ -186,6 +186,7 @@ class Record:
186
186
  return OrderedDict((k, getattr(self, k)) for k in self.__slots__ if k not in exclude)
187
187
 
188
188
  if TYPE_CHECKING:
189
+
189
190
  def __getattr__(self, name: str) -> Any: ...
190
191
 
191
192
  def __setattr__(self, k: str, v: Any) -> None:
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  from __future__ import annotations
3
3
 
4
+ import argparse
4
5
  import logging
5
6
  import sys
6
7
  from importlib import import_module
@@ -69,8 +70,6 @@ def list_adapters() -> None:
69
70
 
70
71
  @catch_sigpipe
71
72
  def main(argv: list[str] | None = None) -> int:
72
- import argparse
73
-
74
73
  parser = argparse.ArgumentParser(
75
74
  description="Record dumper, a tool that can read, write and filter records",
76
75
  formatter_class=argparse.ArgumentDefaultsHelpFormatter,
@@ -107,7 +106,11 @@ def main(argv: list[str] | None = None) -> int:
107
106
  output.add_argument("--skip", metavar="COUNT", type=int, default=0, help="Skip the first COUNT records")
108
107
  output.add_argument("-w", "--writer", metavar="OUTPUT", default=None, help="Write records to output")
109
108
  output.add_argument(
110
- "-m", "--mode", default=None, choices=("csv", "json", "jsonlines", "line", "line-verbose"), help="Output mode"
109
+ "-m",
110
+ "--mode",
111
+ default=None,
112
+ choices=("csv", "csv-no-header", "json", "jsonlines", "line", "line-verbose"),
113
+ help="Output mode",
111
114
  )
112
115
  output.add_argument(
113
116
  "--split", metavar="COUNT", default=None, type=int, help="Write record files smaller than COUNT records"
@@ -180,6 +183,15 @@ def main(argv: list[str] | None = None) -> int:
180
183
  default=argparse.SUPPRESS,
181
184
  help="Short for --mode=line-verbose",
182
185
  )
186
+ aliases.add_argument(
187
+ "-Cn",
188
+ "--csv-no-header",
189
+ action="store_const",
190
+ const="csv-no-header",
191
+ dest="mode",
192
+ default=argparse.SUPPRESS,
193
+ help="Short for --mode=csv-no-header",
194
+ )
183
195
 
184
196
  args = parser.parse_args(argv)
185
197
 
@@ -198,6 +210,7 @@ def main(argv: list[str] | None = None) -> int:
198
210
  if not args.writer:
199
211
  mode_to_uri = {
200
212
  "csv": "csvfile://",
213
+ "csv-no-header": "csvfile://?header=false",
201
214
  "json": "jsonfile://?indent=2&descriptors=false",
202
215
  "jsonlines": "jsonfile://?descriptors=false",
203
216
  "line": "line://",
@@ -210,7 +223,7 @@ def main(argv: list[str] | None = None) -> int:
210
223
  "format_spec": args.format,
211
224
  }
212
225
  query = urlencode({k: v for k, v in qparams.items() if v})
213
- uri += "&" if urlparse(uri).query else "?" + query
226
+ uri += f"&{query}" if urlparse(uri).query else f"?{query}"
214
227
 
215
228
  if args.split:
216
229
  if not args.writer:
@@ -221,7 +234,7 @@ def main(argv: list[str] | None = None) -> int:
221
234
  query_dict = dict(parse_qsl(parsed.query))
222
235
  query_dict.update({"count": args.split, "suffix-length": args.suffix_length})
223
236
  query = urlencode(query_dict)
224
- uri = parsed.scheme + "://" + parsed.netloc + parsed.path + "?" + query
237
+ uri = f"{parsed.scheme}://{parsed.netloc}{parsed.path}?{query}"
225
238
 
226
239
  record_field_rewriter = None
227
240
  if fields or fields_to_exclude or args.exec_expression:
@@ -117,3 +117,32 @@ class EventHandler:
117
117
  def __call__(self, *args, **kwargs) -> None:
118
118
  for h in self.handlers:
119
119
  h(*args, **kwargs)
120
+
121
+
122
+ def boolean_argument(value: str | bool | int) -> bool:
123
+ """Convert a string, boolean, or integer to a boolean value.
124
+
125
+ This function interprets various string representations of boolean values,
126
+ such as "true", "false", "1", "0", "yes", "no".
127
+ It also accepts boolean and integer values directly.
128
+
129
+ Arguments:
130
+ value: The value to convert. Can be a string, boolean, or integer.
131
+
132
+ Returns:
133
+ bool: The converted boolean value.
134
+
135
+ Raises:
136
+ ValueError: If the value cannot be interpreted as a boolean.
137
+ """
138
+ if isinstance(value, bool):
139
+ return value
140
+ if isinstance(value, int):
141
+ return bool(value)
142
+ if isinstance(value, str):
143
+ value = value.lower()
144
+ if value in ("true", "1", "y", "yes", "on"):
145
+ return True
146
+ if value in ("false", "0", "n", "no", "off"):
147
+ return False
148
+ raise ValueError(f"Invalid boolean argument: {value}")
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '3.21.dev2'
21
- __version_tuple__ = version_tuple = (3, 21, 'dev2')
20
+ __version__ = version = '3.21.dev4'
21
+ __version_tuple__ = version_tuple = (3, 21, 'dev4')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flow.record
3
- Version: 3.21.dev2
3
+ Version: 3.21.dev4
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -37,10 +37,13 @@ Requires-Dist: duckdb; extra == "duckdb"
37
37
  Requires-Dist: pytz; extra == "duckdb"
38
38
  Provides-Extra: splunk
39
39
  Requires-Dist: httpx; extra == "splunk"
40
+ Provides-Extra: xlsx
41
+ Requires-Dist: openpyxl; extra == "xlsx"
40
42
  Provides-Extra: test
41
43
  Requires-Dist: flow.record[compression]; extra == "test"
42
44
  Requires-Dist: flow.record[avro]; extra == "test"
43
45
  Requires-Dist: flow.record[elastic]; extra == "test"
46
+ Requires-Dist: flow.record[xlsx]; extra == "test"
44
47
  Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
45
48
  Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
46
49
  Requires-Dist: tqdm; extra == "test"
@@ -5,6 +5,7 @@ MANIFEST.in
5
5
  README.md
6
6
  pyproject.toml
7
7
  tox.ini
8
+ examples/__init__.py
8
9
  examples/filesystem.py
9
10
  examples/passivedns.py
10
11
  examples/records.json
@@ -77,6 +78,7 @@ tests/test_regression.py
77
78
  tests/test_selector.py
78
79
  tests/test_splunk_adapter.py
79
80
  tests/test_sqlite_duckdb_adapter.py
81
+ tests/test_utils.py
80
82
  tests/test_xlsx_adapter.py
81
83
  tests/docs/Makefile
82
84
  tests/docs/conf.py
@@ -34,8 +34,12 @@ httpx
34
34
  flow.record[compression]
35
35
  flow.record[avro]
36
36
  flow.record[elastic]
37
+ flow.record[xlsx]
37
38
  tqdm
38
39
 
39
40
  [test:platform_python_implementation != "PyPy" and python_version < "3.12"]
40
41
  duckdb
41
42
  pytz
43
+
44
+ [xlsx]
45
+ openpyxl
@@ -56,10 +56,14 @@ duckdb = [
56
56
  splunk = [
57
57
  "httpx",
58
58
  ]
59
+ xlsx = [
60
+ "openpyxl",
61
+ ]
59
62
  test = [
60
63
  "flow.record[compression]",
61
64
  "flow.record[avro]",
62
65
  "flow.record[elastic]",
66
+ "flow.record[xlsx]",
63
67
  "duckdb; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
64
68
  "pytz; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
65
69
  "tqdm",
File without changes
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import traceback
3
4
  from typing import Callable
4
5
 
5
6
 
@@ -14,6 +15,5 @@ def main(glob: dict[str, Callable[..., None]]) -> None:
14
15
  print("PASSED")
15
16
  except Exception:
16
17
  print("FAILED")
17
- import traceback
18
18
 
19
19
  traceback.print_exc()
@@ -376,15 +376,21 @@ def test_uri_type() -> None:
376
376
  assert r.path.protocol == "http"
377
377
  assert r.path.hostname == "example.com"
378
378
 
379
- with pytest.warns(DeprecationWarning):
379
+ with pytest.warns(
380
+ DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
381
+ ):
380
382
  r = TestRecord(uri.from_windows(r"c:\windows\program files\Fox-IT B.V\flow.exe"))
381
383
  assert r.path.filename == "flow.exe"
382
384
 
383
385
  r = TestRecord()
384
- with pytest.warns(DeprecationWarning):
386
+ with pytest.warns(
387
+ DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
388
+ ):
385
389
  r.path = uri.normalize(r"c:\Users\Fox-IT\Downloads\autoruns.exe")
386
390
  assert r.path.filename == "autoruns.exe"
387
- with pytest.warns(DeprecationWarning):
391
+ with pytest.warns(
392
+ DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
393
+ ):
388
394
  assert r.path.dirname == uri.normalize(r"\Users\Fox-IT\Downloads")
389
395
  assert r.path.dirname == "/Users/Fox-IT/Downloads"
390
396
 
@@ -22,7 +22,7 @@ def test_uri_packing() -> None:
22
22
  ],
23
23
  )
24
24
 
25
- # construct with an url
25
+ # Construct with an url
26
26
  record = TestRecord("http://www.google.com/evil.bin")
27
27
  data = packer.pack(record)
28
28
  record = packer.unpack(data)
@@ -30,8 +30,9 @@ def test_uri_packing() -> None:
30
30
  assert record.path.filename == "evil.bin"
31
31
  assert record.path.dirname == "/"
32
32
 
33
- # construct from uri() -> for windows=True
34
- with pytest.warns(DeprecationWarning):
33
+ with pytest.warns(
34
+ DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
35
+ ):
35
36
  path = uri.from_windows(r"c:\Program Files\Fox-IT\flow is awesome.exe")
36
37
  record = TestRecord(path)
37
38
  data = packer.pack(record)
@@ -40,8 +41,9 @@ def test_uri_packing() -> None:
40
41
  assert record.path.filename == "flow is awesome.exe"
41
42
  assert record.path.dirname == "/Program Files/Fox-IT"
42
43
 
43
- # construct using uri.from_windows()
44
- with pytest.warns(DeprecationWarning):
44
+ with pytest.warns(
45
+ DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
46
+ ):
45
47
  path = uri.from_windows(r"c:\Users\Hello World\foo.bar.exe")
46
48
  record = TestRecord(path)
47
49
  data = packer.pack(record)
@@ -17,6 +17,7 @@ import pytest
17
17
 
18
18
  import flow.record.fieldtypes
19
19
  from flow.record import RecordDescriptor, RecordReader, RecordWriter
20
+ from flow.record.adapter.line import field_types_for_record_descriptor
20
21
  from flow.record.fieldtypes import flow_record_tz
21
22
  from flow.record.tools import rdump
22
23
 
@@ -681,8 +682,6 @@ def test_rdump_line_verbose(tmp_path: Path, capsys: pytest.CaptureFixture, rdump
681
682
  writer.write(TestRecord(counter=2))
682
683
  writer.write(TestRecord(counter=3))
683
684
 
684
- from flow.record.adapter.line import field_types_for_record_descriptor
685
-
686
685
  field_types_for_record_descriptor.cache_clear()
687
686
  assert field_types_for_record_descriptor.cache_info().currsize == 0
688
687
  rdump.main([str(record_path), *rdump_params])
@@ -242,7 +242,7 @@ def test_record_adapter_archive(tmp_path: Path) -> None:
242
242
 
243
243
  # defaults to always archive by /YEAR/MONTH/DAY/ dir structure
244
244
  outdir = tmp_path.joinpath(f"{dt:%Y/%m/%d}")
245
- assert len(list(outdir.iterdir()))
245
+ assert list(outdir.iterdir())
246
246
 
247
247
  # read the archived records and test filename and counts
248
248
  count2 = 0
@@ -8,6 +8,7 @@ import subprocess
8
8
  import sys
9
9
  from datetime import datetime, timezone
10
10
  from io import BytesIO
11
+ from pathlib import Path
11
12
  from typing import Callable
12
13
  from unittest.mock import MagicMock, patch
13
14
 
@@ -691,5 +692,24 @@ def test_record_writer_default_stdout(capsysbinary: pytest.CaptureFixture) -> No
691
692
  assert stdout.startswith(b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n")
692
693
 
693
694
 
695
+ def test_rdump_selected_fields(capsysbinary: pytest.CaptureFixture) -> None:
696
+ """Test rdump regression where selected fields was not propagated properly to adapter."""
697
+
698
+ # Pastebin record used for this test
699
+ example_records_json_path = Path(__file__).parent.parent / "examples" / "records.json"
700
+
701
+ # rdump --fields key,title,syntax --csv
702
+ rdump.main([str(example_records_json_path), "--fields", "key,title,syntax", "--csv"])
703
+ captured = capsysbinary.readouterr()
704
+ assert captured.err == b""
705
+ assert captured.out == b"key,title,syntax\r\nQ42eWSaF,A sample pastebin record,text\r\n"
706
+
707
+ # rdump --fields key,title,syntax --csv
708
+ rdump.main([str(example_records_json_path), "--fields", "key,title,syntax", "--csv-no-header"])
709
+ captured = capsysbinary.readouterr()
710
+ assert captured.err == b""
711
+ assert captured.out == b"Q42eWSaF,A sample pastebin record,text\r\n"
712
+
713
+
694
714
  if __name__ == "__main__":
695
715
  __import__("standalone_test").main(globals())
@@ -0,0 +1,25 @@
1
+ import pytest
2
+
3
+ from flow.record.utils import boolean_argument
4
+
5
+
6
+ def test_boolean_argument() -> None:
7
+ assert boolean_argument("True") is True
8
+ assert boolean_argument("true") is True
9
+ assert boolean_argument("trUe") is True
10
+ assert boolean_argument("False") is False
11
+ assert boolean_argument("false") is False
12
+ assert boolean_argument("1") is True
13
+ assert boolean_argument("0") is False
14
+ assert boolean_argument("yes") is True
15
+ assert boolean_argument("no") is False
16
+ assert boolean_argument("y") is True
17
+ assert boolean_argument("n") is False
18
+ assert boolean_argument("on") is True
19
+ assert boolean_argument("off") is False
20
+ assert boolean_argument(True) is True
21
+ assert boolean_argument(False) is False
22
+ assert boolean_argument(1) is True
23
+ assert boolean_argument(0) is False
24
+ with pytest.raises(ValueError, match="Invalid boolean argument: .*"):
25
+ boolean_argument("maybe")
@@ -9,6 +9,7 @@ from unittest.mock import MagicMock
9
9
  import pytest
10
10
 
11
11
  from flow.record import fieldtypes
12
+ from flow.record.adapter.xlsx import sanitize_fieldvalues
12
13
 
13
14
  if TYPE_CHECKING:
14
15
  from collections.abc import Iterator
@@ -27,8 +28,6 @@ def mock_openpyxl_package(monkeypatch: pytest.MonkeyPatch) -> Iterator[MagicMock
27
28
 
28
29
 
29
30
  def test_sanitize_field_values(mock_openpyxl_package: MagicMock) -> None:
30
- from flow.record.adapter.xlsx import sanitize_fieldvalues
31
-
32
31
  assert list(
33
32
  sanitize_fieldvalues(
34
33
  [
File without changes
File without changes