flow.record 3.20.dev1__tar.gz → 3.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. flow_record-3.21/.gitattributes +1 -0
  2. {flow_record-3.20.dev1 → flow_record-3.21}/PKG-INFO +11 -16
  3. {flow_record-3.20.dev1 → flow_record-3.21}/examples/filesystem.py +28 -29
  4. {flow_record-3.20.dev1 → flow_record-3.21}/examples/passivedns.py +12 -9
  5. {flow_record-3.20.dev1 → flow_record-3.21}/examples/tcpconn.py +5 -3
  6. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/avro.py +4 -1
  7. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/broker.py +1 -0
  8. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/csvfile.py +16 -6
  9. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/elastic.py +58 -19
  10. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/jsonfile.py +7 -4
  11. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/mongo.py +4 -1
  12. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/splunk.py +3 -3
  13. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/sqlite.py +5 -2
  14. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/xlsx.py +5 -2
  15. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/base.py +11 -5
  16. flow_record-3.21/flow/record/context.py +69 -0
  17. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/__init__.py +10 -0
  18. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/net/ip.py +6 -18
  19. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/net/ipv4.py +3 -3
  20. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/jsonpacker.py +3 -2
  21. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/selector.py +2 -2
  22. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/stream.py +25 -8
  23. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/tools/rdump.py +209 -46
  24. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/utils.py +35 -1
  25. flow_record-3.21/flow/record/version.py +34 -0
  26. {flow_record-3.20.dev1 → flow_record-3.21}/flow.record.egg-info/PKG-INFO +11 -16
  27. {flow_record-3.20.dev1 → flow_record-3.21}/flow.record.egg-info/SOURCES.txt +38 -27
  28. flow_record-3.21/flow.record.egg-info/requires.txt +36 -0
  29. {flow_record-3.20.dev1 → flow_record-3.21}/pyproject.toml +65 -21
  30. flow_record-3.21/tests/__init__.py +0 -0
  31. flow_record-3.21/tests/_data/.gitkeep +0 -0
  32. flow_record-3.21/tests/adapter/__init__.py +0 -0
  33. {flow_record-3.20.dev1/tests → flow_record-3.21/tests/adapter}/test_avro.py +50 -1
  34. flow_record-3.20.dev1/tests/test_json_record_adapter.py → flow_record-3.21/tests/adapter/test_json.py +1 -2
  35. flow_record-3.20.dev1/tests/test_splunk_adapter.py → flow_record-3.21/tests/adapter/test_splunk.py +7 -3
  36. flow_record-3.20.dev1/tests/test_sqlite_duckdb_adapter.py → flow_record-3.21/tests/adapter/test_sqlite_duckdb.py +3 -2
  37. flow_record-3.20.dev1/tests/test_xlsx_adapter.py → flow_record-3.21/tests/adapter/test_xlsx.py +1 -2
  38. flow_record-3.21/tests/conftest.py +13 -0
  39. flow_record-3.21/tests/fieldtypes/__init__.py +0 -0
  40. {flow_record-3.20.dev1/tests → flow_record-3.21/tests/fieldtypes}/test_fieldtypes.py +15 -6
  41. flow_record-3.20.dev1/tests/test_fieldtype_ip.py → flow_record-3.21/tests/fieldtypes/test_ip.py +2 -2
  42. flow_record-3.21/tests/packer/__init__.py +0 -0
  43. {flow_record-3.20.dev1/tests → flow_record-3.21/tests/packer}/test_json_packer.py +27 -1
  44. {flow_record-3.20.dev1/tests → flow_record-3.21/tests/packer}/test_packer.py +8 -6
  45. flow_record-3.21/tests/record/__init__.py +0 -0
  46. flow_record-3.20.dev1/tests/test_record_adapter.py → flow_record-3.21/tests/record/test_adapter.py +8 -9
  47. flow_record-3.21/tests/record/test_context.py +66 -0
  48. {flow_record-3.20.dev1/tests → flow_record-3.21/tests/record}/test_record.py +27 -7
  49. flow_record-3.21/tests/selector/__init__.py +0 -0
  50. flow_record-3.20.dev1/tests/test_selector.py → flow_record-3.21/tests/selector/test_selectors.py +7 -3
  51. flow_record-3.20.dev1/tests/test_regression.py → flow_record-3.21/tests/test_regressions.py +79 -19
  52. flow_record-3.21/tests/test_utils.py +25 -0
  53. flow_record-3.21/tests/tools/__init__.py +0 -0
  54. {flow_record-3.20.dev1/tests → flow_record-3.21/tests/tools}/test_rdump.py +82 -7
  55. {flow_record-3.20.dev1 → flow_record-3.21}/tox.ini +12 -14
  56. flow_record-3.20.dev1/flow/record/version.py +0 -16
  57. flow_record-3.20.dev1/flow.record.egg-info/requires.txt +0 -41
  58. flow_record-3.20.dev1/tests/standalone_test.py +0 -19
  59. flow_record-3.20.dev1/tests/test_avro_adapter.py +0 -58
  60. {flow_record-3.20.dev1 → flow_record-3.21}/.git-blame-ignore-revs +0 -0
  61. {flow_record-3.20.dev1 → flow_record-3.21}/COPYRIGHT +0 -0
  62. {flow_record-3.20.dev1 → flow_record-3.21}/LICENSE +0 -0
  63. {flow_record-3.20.dev1 → flow_record-3.21}/MANIFEST.in +0 -0
  64. {flow_record-3.20.dev1 → flow_record-3.21}/README.md +0 -0
  65. {flow_record-3.20.dev1/flow/record/tools → flow_record-3.21/examples}/__init__.py +0 -0
  66. {flow_record-3.20.dev1 → flow_record-3.21}/examples/records.json +0 -0
  67. /flow_record-3.20.dev1/tests/selector_explain_example.py → /flow_record-3.21/examples/selectors.py +0 -0
  68. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/__init__.py +0 -0
  69. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/__init__.py +0 -0
  70. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/archive.py +0 -0
  71. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/duckdb.py +0 -0
  72. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/line.py +0 -0
  73. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/split.py +0 -0
  74. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/stream.py +0 -0
  75. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/adapter/text.py +0 -0
  76. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/exceptions.py +0 -0
  77. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/credential.py +0 -0
  78. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/net/__init__.py +0 -0
  79. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/net/tcp.py +0 -0
  80. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/fieldtypes/net/udp.py +0 -0
  81. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/packer.py +0 -0
  82. {flow_record-3.20.dev1/tests → flow_record-3.21/flow/record/tools}/__init__.py +0 -0
  83. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/tools/geoip.py +0 -0
  84. {flow_record-3.20.dev1 → flow_record-3.21}/flow/record/whitelist.py +0 -0
  85. {flow_record-3.20.dev1 → flow_record-3.21}/flow.record.egg-info/dependency_links.txt +0 -0
  86. {flow_record-3.20.dev1 → flow_record-3.21}/flow.record.egg-info/entry_points.txt +0 -0
  87. {flow_record-3.20.dev1 → flow_record-3.21}/flow.record.egg-info/top_level.txt +0 -0
  88. {flow_record-3.20.dev1 → flow_record-3.21}/setup.cfg +0 -0
  89. {flow_record-3.20.dev1/tests/docs → flow_record-3.21/tests/_docs}/Makefile +0 -0
  90. {flow_record-3.20.dev1/tests/docs → flow_record-3.21/tests/_docs}/conf.py +0 -0
  91. {flow_record-3.20.dev1/tests/docs → flow_record-3.21/tests/_docs}/index.rst +0 -0
  92. {flow_record-3.20.dev1 → flow_record-3.21}/tests/_utils.py +0 -0
  93. /flow_record-3.20.dev1/tests/test_csv_adapter.py → /flow_record-3.21/tests/adapter/test_csv.py +0 -0
  94. /flow_record-3.20.dev1/tests/test_elastic_adapter.py → /flow_record-3.21/tests/adapter/test_elastic.py +0 -0
  95. /flow_record-3.20.dev1/tests/test_adapter_line.py → /flow_record-3.21/tests/adapter/test_line.py +0 -0
  96. /flow_record-3.20.dev1/tests/test_adapter_text.py → /flow_record-3.21/tests/adapter/test_text.py +0 -0
  97. /flow_record-3.20.dev1/tests/test_record_descriptor.py → /flow_record-3.21/tests/record/test_descriptor.py +0 -0
  98. {flow_record-3.20.dev1/tests → flow_record-3.21/tests/record}/test_multi_timestamp.py +0 -0
  99. /flow_record-3.20.dev1/tests/test_compiled_selector.py → /flow_record-3.21/tests/selector/test_compiled.py +0 -0
  100. {flow_record-3.20.dev1 → flow_record-3.21}/tests/test_deprecations.py +0 -0
@@ -0,0 +1 @@
1
+ tests/_data/** filter=lfs diff=lfs merge=lfs -text
@@ -1,9 +1,9 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: flow.record
3
- Version: 3.20.dev1
3
+ Version: 3.21
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
- License: Affero General Public License v3
6
+ License-Expression: AGPL-3.0-or-later
7
7
  Project-URL: homepage, https://dissect.tools
8
8
  Project-URL: documentation, https://docs.dissect.tools/en/latest/projects/flow.record
9
9
  Project-URL: repository, https://github.com/fox-it/flow.record
@@ -11,12 +11,11 @@ Classifier: Development Status :: 5 - Production/Stable
11
11
  Classifier: Environment :: Console
12
12
  Classifier: Intended Audience :: Developers
13
13
  Classifier: Intended Audience :: Information Technology
14
- Classifier: License :: OSI Approved
15
14
  Classifier: Operating System :: OS Independent
16
15
  Classifier: Programming Language :: Python :: 3
17
16
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
18
17
  Classifier: Topic :: Utilities
19
- Requires-Python: ~=3.9
18
+ Requires-Python: >=3.10
20
19
  Description-Content-Type: text/markdown
21
20
  License-File: LICENSE
22
21
  License-File: COPYRIGHT
@@ -24,29 +23,25 @@ Requires-Dist: msgpack>=0.5.2
24
23
  Requires-Dist: tzdata; platform_system == "Windows"
25
24
  Provides-Extra: compression
26
25
  Requires-Dist: lz4; extra == "compression"
27
- Requires-Dist: zstandard; extra == "compression"
26
+ Requires-Dist: zstandard; platform_python_implementation != "PyPy" and extra == "compression"
28
27
  Provides-Extra: elastic
29
28
  Requires-Dist: elasticsearch; extra == "elastic"
30
29
  Provides-Extra: geoip
31
30
  Requires-Dist: maxminddb; extra == "geoip"
32
31
  Provides-Extra: avro
33
- Requires-Dist: cramjam<2.8.4; (platform_python_implementation == "PyPy" and python_version == "3.9") and extra == "avro"
34
32
  Requires-Dist: fastavro[snappy]; extra == "avro"
35
33
  Provides-Extra: duckdb
36
- Requires-Dist: duckdb; extra == "duckdb"
37
- Requires-Dist: pytz; extra == "duckdb"
34
+ Requires-Dist: duckdb; platform_python_implementation != "PyPy" and extra == "duckdb"
35
+ Requires-Dist: pytz; platform_python_implementation != "PyPy" and extra == "duckdb"
38
36
  Provides-Extra: splunk
39
37
  Requires-Dist: httpx; extra == "splunk"
40
- Provides-Extra: test
41
- Requires-Dist: flow.record[compression]; extra == "test"
42
- Requires-Dist: flow.record[avro]; extra == "test"
43
- Requires-Dist: flow.record[elastic]; extra == "test"
44
- Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
45
- Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
46
- Requires-Dist: tqdm; extra == "test"
38
+ Provides-Extra: xlsx
39
+ Requires-Dist: openpyxl; extra == "xlsx"
47
40
  Provides-Extra: full
48
41
  Requires-Dist: flow.record[compression]; extra == "full"
49
42
  Requires-Dist: tqdm; extra == "full"
43
+ Requires-Dist: structlog; extra == "full"
44
+ Dynamic: license-file
50
45
 
51
46
  # flow.record
52
47
 
@@ -1,10 +1,15 @@
1
- import os
2
- import stat
1
+ from __future__ import annotations
3
2
 
4
- from datetime import datetime
3
+ import stat
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING
5
6
 
6
7
  from flow.record import RecordDescriptor, RecordWriter
7
8
 
9
+ if TYPE_CHECKING:
10
+ from collections.abc import Iterator
11
+
12
+
8
13
  descriptor = """
9
14
  filesystem/unix/entry
10
15
  string path;
@@ -22,34 +27,32 @@ filesystem/unix/entry
22
27
  FilesystemFile = RecordDescriptor(descriptor)
23
28
 
24
29
 
25
- def hash_file(path, t):
26
- f = open(path, "rb")
27
- while 1:
28
- d = f.read(4096)
29
- if d == "":
30
- break
31
- f.close()
30
+ def hash_file(path: str | Path) -> None:
31
+ with Path(path).open("rb") as f:
32
+ while True:
33
+ d = f.read(4096)
34
+ if not d:
35
+ break
32
36
 
33
37
 
34
38
  class FilesystemIterator:
35
39
  basepath = None
36
40
 
37
- def __init__(self, basepath):
41
+ def __init__(self, basepath: str | None):
38
42
  self.basepath = basepath
39
43
  self.recordType = FilesystemFile
40
44
 
41
- def classify(self, source, classification):
45
+ def classify(self, source: str, classification: str) -> None:
42
46
  self.recordType = FilesystemFile.base(_source=source, _classification=classification)
43
47
 
44
- def iter(self, path):
45
- path = os.path.abspath(path)
46
- return self._iter(path)
48
+ def iter(self, path: str | Path) -> Iterator[FilesystemFile]:
49
+ return self._iter(Path(path).resolve())
47
50
 
48
- def _iter(self, path):
49
- if path.startswith("/proc"):
51
+ def _iter(self, path: Path) -> Iterator[FilesystemFile]:
52
+ if path.is_relative_to("/proc"):
50
53
  return
51
54
 
52
- st = os.lstat(path)
55
+ st = path.lstat()
53
56
 
54
57
  abspath = path
55
58
  if self.basepath and abspath.startswith(self.basepath):
@@ -59,7 +62,7 @@ class FilesystemIterator:
59
62
 
60
63
  link = None
61
64
  if ifmt == stat.S_IFLNK:
62
- link = os.readlink(path)
65
+ link = path.readlink()
63
66
 
64
67
  yield self.recordType(
65
68
  path=abspath,
@@ -69,20 +72,16 @@ class FilesystemIterator:
69
72
  size=st.st_size,
70
73
  uid=st.st_uid,
71
74
  gid=st.st_gid,
72
- ctime=datetime.fromtimestamp(st.st_ctime),
73
- mtime=datetime.fromtimestamp(st.st_mtime),
74
- atime=datetime.fromtimestamp(st.st_atime),
75
+ ctime=st.st_ctime,
76
+ mtime=st.st_mtime,
77
+ atime=st.st_atime,
75
78
  link=link,
76
79
  )
77
80
 
78
81
  if ifmt == stat.S_IFDIR:
79
- for i in os.listdir(path):
80
- if i in (".", ".."):
81
- continue
82
-
83
- fullpath = os.path.join(path, i)
84
- for e in self.iter(fullpath):
85
- yield e
82
+ for i in path.iterdir():
83
+ fullpath = path.joinpath(i)
84
+ yield from self.iter(fullpath)
86
85
 
87
86
 
88
87
  chunk = []
@@ -1,18 +1,21 @@
1
1
  #!/usr/bin/env pypy
2
- import record
2
+ from __future__ import annotations
3
+
3
4
  import sys
4
- import datetime
5
+ from datetime import datetime, timezone
5
6
 
6
7
  import net.ipv4
7
-
8
+ import record
8
9
  from fileprocessing import DirectoryProcessor
9
10
 
11
+ UTC_TIMEZONE = timezone.utc
12
+
10
13
 
11
- def ts(s):
12
- return datetime.datetime.fromtimestamp(float(s))
14
+ def ts(s: float) -> datetime:
15
+ return datetime.fromtimestamp(float(s), tz=UTC_TIMEZONE)
13
16
 
14
17
 
15
- def ip(s):
18
+ def ip(s: str) -> net.ipv4.Address:
16
19
  return net.ipv4.Address(s)
17
20
 
18
21
 
@@ -21,7 +24,7 @@ class SeparatedFile:
21
24
  seperator = None
22
25
  format = None
23
26
 
24
- def __init__(self, fp, seperator, format):
27
+ def __init__(self, fp: list[str], seperator: str | None, format: list[tuple]):
25
28
  self.fp = fp
26
29
  self.seperator = seperator
27
30
  self.format = format
@@ -46,7 +49,7 @@ class SeparatedFile:
46
49
  yield recordtype(**r)
47
50
 
48
51
 
49
- def PassiveDnsFile(fp):
52
+ def PassiveDnsFile(fp: list[str]) -> SeparatedFile:
50
53
  return SeparatedFile(fp, "||", PASSIVEDNS_FORMAT)
51
54
 
52
55
 
@@ -63,7 +66,7 @@ PASSIVEDNS_FORMAT = [
63
66
  ]
64
67
 
65
68
 
66
- def main():
69
+ def main() -> None:
67
70
  rs = record.RecordOutput(sys.stdout)
68
71
  for r in DirectoryProcessor(sys.argv[1], PassiveDnsFile, r"\.log\.gz"):
69
72
  rs.write(r)
@@ -1,8 +1,10 @@
1
1
  import random
2
+ from datetime import datetime, timezone
2
3
 
3
- from datetime import datetime
4
4
  from flow import record
5
5
 
6
+ UTC_TIMEZONE = timezone.utc
7
+
6
8
  descriptor = """
7
9
  network/traffic/tcp/connection
8
10
  datetime ts;
@@ -32,9 +34,9 @@ port_list = [
32
34
 
33
35
  rs = record.RecordWriter()
34
36
 
35
- for i in range(500):
37
+ for _ in range(500):
36
38
  r = conn(
37
- ts=datetime.now(),
39
+ ts=datetime.now(tz=UTC_TIMEZONE),
38
40
  src=random.choice(ip_list),
39
41
  srcport=random.choice(port_list),
40
42
  dst=random.choice(ip_list),
@@ -9,6 +9,7 @@ import fastavro
9
9
 
10
10
  from flow import record
11
11
  from flow.record.adapter import AbstractReader, AbstractWriter
12
+ from flow.record.context import get_app_context, match_record_with_context
12
13
  from flow.record.selector import make_selector
13
14
  from flow.record.utils import is_stdout
14
15
 
@@ -113,6 +114,8 @@ class AvroReader(AbstractReader):
113
114
  }
114
115
 
115
116
  def __iter__(self) -> Iterator[record.Record]:
117
+ ctx = get_app_context()
118
+ selector = self.selector
116
119
  for obj in self.reader:
117
120
  # Convert timestamp-micros fields back to datetime fields
118
121
  for field_name in self.datetime_fields:
@@ -121,7 +124,7 @@ class AvroReader(AbstractReader):
121
124
  obj[field_name] = EPOCH + timedelta(microseconds=value)
122
125
 
123
126
  rec = self.desc.recordType(**obj)
124
- if not self.selector or self.selector.match(rec):
127
+ if match_record_with_context(rec, selector, ctx):
125
128
  yield rec
126
129
 
127
130
  def close(self) -> None:
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  from typing import TYPE_CHECKING
4
4
 
5
5
  from flow.broker import Publisher, Subscriber
6
+
6
7
  from flow.record.adapter import AbstractReader, AbstractWriter
7
8
 
8
9
  if TYPE_CHECKING:
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import contextlib
3
4
  import csv
4
5
  import sys
5
6
  from pathlib import Path
@@ -8,8 +9,9 @@ from typing import TYPE_CHECKING
8
9
  from flow.record import RecordDescriptor
9
10
  from flow.record.adapter import AbstractReader, AbstractWriter
10
11
  from flow.record.base import Record, normalize_fieldname
12
+ from flow.record.context import get_app_context, match_record_with_context
11
13
  from flow.record.selector import make_selector
12
- from flow.record.utils import is_stdout
14
+ from flow.record.utils import boolean_argument, is_stdout
13
15
 
14
16
  if TYPE_CHECKING:
15
17
  from collections.abc import Iterator
@@ -17,11 +19,12 @@ if TYPE_CHECKING:
17
19
  __usage__ = """
18
20
  Comma-separated values (CSV) adapter
19
21
  ---
20
- Write usage: rdump -w csvfile://[PATH]?lineterminator=[TERMINATOR]
22
+ Write usage: rdump -w csvfile://[PATH]?lineterminator=[TERMINATOR]&header=[HEADER]
21
23
  Read usage: rdump csvfile://[PATH]?fields=[FIELDS]
22
24
  [PATH]: path to file. Leave empty or "-" to output to stdout
23
25
 
24
26
  Optional parameters:
27
+ [HEADER]: if set to false, it will not print the CSV header (default: true)
25
28
  [TERMINATOR]: line terminator, default is \\r\\n
26
29
  [FIELDS]: comma-separated list of CSV fields (in case of missing CSV header)
27
30
  """
@@ -34,6 +37,7 @@ class CsvfileWriter(AbstractWriter):
34
37
  fields: str | list[str] | None = None,
35
38
  exclude: str | list[str] | None = None,
36
39
  lineterminator: str = "\r\n",
40
+ header: str = "true",
37
41
  **kwargs,
38
42
  ):
39
43
  self.fp = None
@@ -52,13 +56,16 @@ class CsvfileWriter(AbstractWriter):
52
56
  self.fields = self.fields.split(",")
53
57
  if isinstance(self.exclude, str):
54
58
  self.exclude = self.exclude.split(",")
59
+ self.header = boolean_argument(header)
55
60
 
56
61
  def write(self, r: Record) -> None:
57
62
  rdict = r._asdict(fields=self.fields, exclude=self.exclude)
58
63
  if not self.desc or self.desc != r._desc:
59
64
  self.desc = r._desc
60
65
  self.writer = csv.DictWriter(self.fp, rdict, lineterminator=self.lineterminator)
61
- self.writer.writeheader()
66
+ if self.header:
67
+ # Write header only if it is requested
68
+ self.writer.writeheader()
62
69
  self.writer.writerow(rdict)
63
70
 
64
71
  def flush(self) -> None:
@@ -84,7 +91,8 @@ class CsvfileReader(AbstractReader):
84
91
 
85
92
  self.dialect = "excel"
86
93
  if self.fp.seekable():
87
- self.dialect = csv.Sniffer().sniff(self.fp.read(1024))
94
+ with contextlib.suppress(csv.Error):
95
+ self.dialect = csv.Sniffer().sniff(self.fp.read(1024))
88
96
  self.fp.seek(0)
89
97
  self.reader = csv.reader(self.fp, dialect=self.dialect)
90
98
 
@@ -107,8 +115,10 @@ class CsvfileReader(AbstractReader):
107
115
  self.fp = None
108
116
 
109
117
  def __iter__(self) -> Iterator[Record]:
118
+ ctx = get_app_context()
119
+ selector = self.selector
110
120
  for row in self.reader:
111
- rdict = dict(zip(self.fields, row))
121
+ rdict = dict(zip(self.fields, row, strict=False))
112
122
  record = self.desc.init_from_dict(rdict)
113
- if not self.selector or self.selector.match(record):
123
+ if match_record_with_context(record, selector, ctx):
114
124
  yield record
@@ -4,8 +4,11 @@ import hashlib
4
4
  import logging
5
5
  import queue
6
6
  import threading
7
+ from contextlib import suppress
7
8
  from typing import TYPE_CHECKING
8
9
 
10
+ import urllib3
11
+
9
12
  try:
10
13
  import elasticsearch
11
14
  import elasticsearch.helpers
@@ -17,8 +20,10 @@ except ImportError:
17
20
 
18
21
  from flow.record.adapter import AbstractReader, AbstractWriter
19
22
  from flow.record.base import Record, RecordDescriptor
23
+ from flow.record.context import get_app_context, match_record_with_context
20
24
  from flow.record.fieldtypes import fieldtype_for_value
21
25
  from flow.record.jsonpacker import JsonRecordPacker
26
+ from flow.record.utils import boolean_argument
22
27
 
23
28
  if TYPE_CHECKING:
24
29
  from collections.abc import Iterator
@@ -72,10 +77,12 @@ class ElasticWriter(AbstractWriter):
72
77
 
73
78
  self.index = index
74
79
  self.uri = uri
75
- verify_certs = str(verify_certs).lower() in ("1", "true")
76
- http_compress = str(http_compress).lower() in ("1", "true")
77
- self.hash_record = str(hash_record).lower() in ("1", "true")
80
+ verify_certs = boolean_argument(verify_certs)
81
+ http_compress = boolean_argument(http_compress)
82
+ self.hash_record = boolean_argument(hash_record)
78
83
  queue_size = int(queue_size)
84
+ request_timeout = int(request_timeout)
85
+ self.max_retries = int(max_retries)
79
86
 
80
87
  if not uri.lower().startswith(("http://", "https://")):
81
88
  uri = "http://" + uri
@@ -92,7 +99,7 @@ class ElasticWriter(AbstractWriter):
92
99
  api_key=api_key,
93
100
  request_timeout=request_timeout,
94
101
  retry_on_timeout=True,
95
- max_retries=max_retries,
102
+ max_retries=self.max_retries,
96
103
  )
97
104
 
98
105
  self.json_packer = JsonRecordPacker()
@@ -102,8 +109,6 @@ class ElasticWriter(AbstractWriter):
102
109
 
103
110
  if not verify_certs:
104
111
  # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
105
- import urllib3
106
-
107
112
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
108
113
 
109
114
  self.metadata_fields = {}
@@ -112,10 +117,9 @@ class ElasticWriter(AbstractWriter):
112
117
  self.metadata_fields[arg_key[6:]] = arg_val
113
118
 
114
119
  def excepthook(self, exc: threading.ExceptHookArgs, *args, **kwargs) -> None:
115
- log.error("Exception in thread: %s", exc)
116
120
  self.exception = getattr(exc, "exc_value", exc)
121
+ self.exception = enrich_elastic_exception(self.exception)
117
122
  self.event.set()
118
- self.close()
119
123
 
120
124
  def record_to_document(self, record: Record, index: str) -> dict:
121
125
  """Convert a record to a Elasticsearch compatible document dictionary"""
@@ -168,13 +172,13 @@ class ElasticWriter(AbstractWriter):
168
172
  - https://elasticsearch-py.readthedocs.io/en/v8.17.1/helpers.html#elasticsearch.helpers.streaming_bulk
169
173
  - https://github.com/elastic/elasticsearch-py/blob/main/elasticsearch/helpers/actions.py#L362
170
174
  """
175
+
171
176
  for _ok, _item in elasticsearch.helpers.streaming_bulk(
172
177
  self.es,
173
178
  self.document_stream(),
174
179
  raise_on_error=True,
175
180
  raise_on_exception=True,
176
- # Some settings have to be redefined because streaming_bulk does not inherit them from the self.es instance.
177
- max_retries=3,
181
+ max_retries=self.max_retries,
178
182
  ):
179
183
  pass
180
184
 
@@ -190,13 +194,17 @@ class ElasticWriter(AbstractWriter):
190
194
  pass
191
195
 
192
196
  def close(self) -> None:
193
- self.queue.put(StopIteration)
194
- self.event.wait()
197
+ if hasattr(self, "queue"):
198
+ self.queue.put(StopIteration)
199
+
200
+ if hasattr(self, "event"):
201
+ self.event.wait()
195
202
 
196
203
  if hasattr(self, "es"):
197
- self.es.close()
204
+ with suppress(Exception):
205
+ self.es.close()
198
206
 
199
- if self.exception:
207
+ if hasattr(self, "exception") and self.exception:
200
208
  raise self.exception
201
209
 
202
210
 
@@ -216,8 +224,10 @@ class ElasticReader(AbstractReader):
216
224
  self.index = index
217
225
  self.uri = uri
218
226
  self.selector = selector
219
- verify_certs = str(verify_certs).lower() in ("1", "true")
220
- http_compress = str(http_compress).lower() in ("1", "true")
227
+ verify_certs = boolean_argument(verify_certs)
228
+ http_compress = boolean_argument(http_compress)
229
+ request_timeout = int(request_timeout)
230
+ max_retries = int(max_retries)
221
231
 
222
232
  if not uri.lower().startswith(("http://", "https://")):
223
233
  uri = "http://" + uri
@@ -234,11 +244,11 @@ class ElasticReader(AbstractReader):
234
244
 
235
245
  if not verify_certs:
236
246
  # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
237
- import urllib3
238
-
239
247
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
240
248
 
241
249
  def __iter__(self) -> Iterator[Record]:
250
+ ctx = get_app_context()
251
+ selector = self.selector
242
252
  res = self.es.search(index=self.index)
243
253
  log.debug("ElasticSearch returned %u hits", res["hits"]["total"]["value"])
244
254
  for hit in res["hits"]["hits"]:
@@ -248,9 +258,38 @@ class ElasticReader(AbstractReader):
248
258
  fields = [(fieldtype_for_value(val, "string"), key) for key, val in source.items()]
249
259
  desc = RecordDescriptor("elastic/record", fields)
250
260
  obj = desc(**source)
251
- if not self.selector or self.selector.match(obj):
261
+ if match_record_with_context(obj, selector, ctx):
252
262
  yield obj
253
263
 
254
264
  def close(self) -> None:
255
265
  if hasattr(self, "es"):
256
266
  self.es.close()
267
+
268
+
269
+ def enrich_elastic_exception(exception: Exception) -> Exception:
270
+ """Extend the exception with error information from Elastic.
271
+
272
+ Resources:
273
+ - https://elasticsearch-py.readthedocs.io/en/v8.17.1/exceptions.html
274
+ """
275
+ errors = set()
276
+ if hasattr(exception, "errors"):
277
+ try:
278
+ for error in exception.errors:
279
+ index_dict = error.get("index", {})
280
+ status = index_dict.get("status")
281
+ error_dict = index_dict.get("error", {})
282
+ error_type = error_dict.get("type")
283
+ error_reason = error_dict.get("reason", "")
284
+
285
+ errors.add(f"({status} {error_type} {error_reason})")
286
+ except Exception:
287
+ errors.add("unable to extend errors")
288
+
289
+ # append errors to original exception message
290
+ error_str = ", ".join(errors)
291
+ original_message = exception.args[0] if exception.args else ""
292
+ new_message = f"{original_message} {error_str}"
293
+ exception.args = (new_message, *exception.args[1:])
294
+
295
+ return exception
@@ -6,9 +6,10 @@ from typing import TYPE_CHECKING, BinaryIO
6
6
  from flow import record
7
7
  from flow.record import JsonRecordPacker
8
8
  from flow.record.adapter import AbstractReader, AbstractWriter
9
+ from flow.record.context import get_app_context, match_record_with_context
9
10
  from flow.record.fieldtypes import fieldtype_for_value
10
11
  from flow.record.selector import make_selector
11
- from flow.record.utils import is_stdout
12
+ from flow.record.utils import boolean_argument, is_stdout
12
13
 
13
14
  if TYPE_CHECKING:
14
15
  from collections.abc import Iterator
@@ -33,7 +34,7 @@ class JsonfileWriter(AbstractWriter):
33
34
  def __init__(
34
35
  self, path: str | Path | BinaryIO, indent: str | int | None = None, descriptors: bool = True, **kwargs
35
36
  ):
36
- self.descriptors = str(descriptors).lower() in ("true", "1")
37
+ self.descriptors = boolean_argument(descriptors)
37
38
  self.fp = record.open_path_or_stream(path, "w")
38
39
  if isinstance(indent, str):
39
40
  indent = int(indent)
@@ -75,10 +76,12 @@ class JsonfileReader(AbstractReader):
75
76
  self.fp = None
76
77
 
77
78
  def __iter__(self) -> Iterator[Record]:
79
+ ctx = get_app_context()
80
+ selector = self.selector
78
81
  for line in self.fp:
79
82
  obj = self.packer.unpack(line)
80
83
  if isinstance(obj, record.Record):
81
- if not self.selector or self.selector.match(obj):
84
+ if match_record_with_context(obj, selector, ctx):
82
85
  yield obj
83
86
  elif isinstance(obj, record.RecordDescriptor):
84
87
  pass
@@ -90,5 +93,5 @@ class JsonfileReader(AbstractReader):
90
93
  ]
91
94
  desc = record.RecordDescriptor("json/record", fields)
92
95
  obj = desc(**jd)
93
- if not self.selector or self.selector.match(obj):
96
+ if match_record_with_context(obj, selector, ctx):
94
97
  yield obj
@@ -7,6 +7,7 @@ from pymongo import MongoClient
7
7
 
8
8
  from flow import record
9
9
  from flow.record.adapter import AbstractReader, AbstractWriter
10
+ from flow.record.context import get_app_context, match_record_with_context
10
11
  from flow.record.selector import make_selector
11
12
 
12
13
  if TYPE_CHECKING:
@@ -91,6 +92,8 @@ class MongoReader(AbstractReader):
91
92
 
92
93
  def __iter__(self) -> Iterator[Record]:
93
94
  desc = None
95
+ ctx = get_app_context()
96
+ selector = self.selector
94
97
  for r in self.collection.find():
95
98
  if r["_type"] not in self.descriptors:
96
99
  packed_desc = self.coll_descriptors.find({"name": r["_type"]})[0]["descriptor"]
@@ -106,5 +109,5 @@ class MongoReader(AbstractReader):
106
109
  r[k] = int(r[k])
107
110
 
108
111
  obj = desc(**r)
109
- if not self.selector or self.selector.match(obj):
112
+ if match_record_with_context(obj, selector, ctx):
110
113
  yield obj
@@ -18,7 +18,7 @@ except ImportError:
18
18
 
19
19
  from flow.record.adapter import AbstractReader, AbstractWriter
20
20
  from flow.record.jsonpacker import JsonRecordPacker
21
- from flow.record.utils import to_base64, to_bytes, to_str
21
+ from flow.record.utils import boolean_argument, to_base64, to_bytes, to_str
22
22
 
23
23
  if TYPE_CHECKING:
24
24
  from flow.record.base import Record
@@ -35,7 +35,7 @@ Write usage: rdump -w splunk+[PROTOCOL]://[IP]:[PORT]?tag=[TAG]&token=[TOKEN]&so
35
35
  [SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTPS. Defaults to True.
36
36
  """
37
37
 
38
- log = logging.getLogger(__package__)
38
+ log = logging.getLogger(__name__)
39
39
 
40
40
  # Amount of records to bundle into a single request when sending data over HTTP(S).
41
41
  RECORD_BUFFER_LIMIT = 20
@@ -218,7 +218,7 @@ class SplunkWriter(AbstractWriter):
218
218
  self.token = f"Splunk {self.token}"
219
219
 
220
220
  # Assume verify=True unless specified otherwise.
221
- self.verify = str(ssl_verify).lower() not in ("0", "false")
221
+ self.verify = boolean_argument(ssl_verify)
222
222
  if not self.verify:
223
223
  log.warning("Certificate verification is disabled")
224
224
 
@@ -9,6 +9,7 @@ from typing import TYPE_CHECKING
9
9
  from flow.record import Record, RecordDescriptor
10
10
  from flow.record.adapter import AbstractReader, AbstractWriter
11
11
  from flow.record.base import RESERVED_FIELDS, normalize_fieldname
12
+ from flow.record.context import get_app_context, match_record_with_context
12
13
  from flow.record.selector import Selector, make_selector
13
14
 
14
15
  if TYPE_CHECKING:
@@ -191,14 +192,16 @@ class SqliteReader(AbstractReader):
191
192
  row[idx] = None
192
193
  elif isinstance(value, str):
193
194
  row[idx] = value.encode(errors="surrogateescape")
194
- yield descriptor_cls.init_from_dict(dict(zip(fnames, row)))
195
+ yield descriptor_cls.init_from_dict(dict(zip(fnames, row, strict=False)))
195
196
 
196
197
  def __iter__(self) -> Iterator[Record]:
197
198
  """Iterate over all tables in the database and yield records."""
199
+ ctx = get_app_context()
200
+ selector = self.selector
198
201
  for table_name in self.table_names():
199
202
  self.logger.debug("Reading table: %s", table_name)
200
203
  for record in self.read_table(table_name):
201
- if not self.selector or self.selector.match(record):
204
+ if match_record_with_context(record, selector, ctx):
202
205
  yield record
203
206
 
204
207