flow.record 3.17.dev4__tar.gz → 3.17.dev6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flow_record-3.17.dev4/flow.record.egg-info → flow_record-3.17.dev6}/PKG-INFO +1 -1
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/elastic.py +1 -1
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/line.py +1 -1
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/sqlite.py +1 -1
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/text.py +1 -1
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/xlsx.py +2 -2
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/base.py +2 -2
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/fieldtypes/__init__.py +2 -27
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/fieldtypes/net/ip.py +37 -18
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/fieldtypes/net/ipv4.py +0 -7
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/jsonpacker.py +1 -5
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/utils.py +18 -22
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/version.py +2 -2
- {flow_record-3.17.dev4 → flow_record-3.17.dev6/flow.record.egg-info}/PKG-INFO +1 -1
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow.record.egg-info/SOURCES.txt +2 -1
- flow_record-3.17.dev6/tests/test_adapter_line.py +29 -0
- flow_record-3.17.dev6/tests/test_adapter_text.py +28 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_fieldtype_ip.py +15 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_fieldtypes.py +2 -9
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_json_packer.py +20 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_record.py +25 -4
- flow_record-3.17.dev4/tests/utils_inspect.py +0 -58
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/COPYRIGHT +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/LICENSE +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/MANIFEST.in +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/README.md +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/examples/filesystem.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/examples/passivedns.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/examples/records.json +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/examples/tcpconn.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/__init__.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/__init__.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/archive.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/avro.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/broker.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/csvfile.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/duckdb.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/jsonfile.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/mongo.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/split.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/splunk.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/adapter/stream.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/exceptions.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/fieldtypes/credential.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/fieldtypes/net/__init__.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/fieldtypes/net/tcp.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/fieldtypes/net/udp.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/packer.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/selector.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/stream.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/tools/__init__.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/tools/geoip.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/tools/rdump.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow/record/whitelist.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow.record.egg-info/entry_points.txt +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow.record.egg-info/requires.txt +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/flow.record.egg-info/top_level.txt +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/pyproject.toml +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/setup.cfg +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/__init__.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/_utils.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/docs/Makefile +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/docs/conf.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/docs/index.rst +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/selector_explain_example.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/standalone_test.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_avro.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_avro_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_compiled_selector.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_csv_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_deprecations.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_elastic_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_json_record_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_multi_timestamp.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_packer.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_rdump.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_record_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_record_descriptor.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_regression.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_selector.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_splunk_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_sqlite_duckdb_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tests/test_xlsx_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev6}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.17.
|
|
3
|
+
Version: 3.17.dev6
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -106,7 +106,7 @@ class ElasticWriter(AbstractWriter):
|
|
|
106
106
|
}
|
|
107
107
|
|
|
108
108
|
if self.hash_record:
|
|
109
|
-
document["_id"] = hashlib.md5(document["_source"].encode()).hexdigest()
|
|
109
|
+
document["_id"] = hashlib.md5(document["_source"].encode(errors="surrogateescape")).hexdigest()
|
|
110
110
|
|
|
111
111
|
return document
|
|
112
112
|
|
|
@@ -69,7 +69,7 @@ class LineWriter(AbstractWriter):
|
|
|
69
69
|
for key, value in rdict.items():
|
|
70
70
|
if rdict_types:
|
|
71
71
|
key = f"{key} ({rdict_types[key]})"
|
|
72
|
-
self.fp.write(fmt.format(key, value).encode())
|
|
72
|
+
self.fp.write(fmt.format(key, value).encode(errors="surrogateescape"))
|
|
73
73
|
|
|
74
74
|
def flush(self) -> None:
|
|
75
75
|
if self.fp:
|
|
@@ -187,7 +187,7 @@ class SqliteReader(AbstractReader):
|
|
|
187
187
|
if value == 0:
|
|
188
188
|
row[idx] = None
|
|
189
189
|
elif isinstance(value, str):
|
|
190
|
-
row[idx] = value.encode("
|
|
190
|
+
row[idx] = value.encode(errors="surrogateescape")
|
|
191
191
|
yield descriptor_cls.init_from_dict(dict(zip(fnames, row)))
|
|
192
192
|
|
|
193
193
|
def __iter__(self) -> Iterator[Record]:
|
|
@@ -41,7 +41,7 @@ class TextWriter(AbstractWriter):
|
|
|
41
41
|
buf = self.format_spec.format_map(DefaultMissing(rec._asdict()))
|
|
42
42
|
else:
|
|
43
43
|
buf = repr(rec)
|
|
44
|
-
self.fp.write(buf.encode() + b"\n")
|
|
44
|
+
self.fp.write(buf.encode(errors="surrogateescape") + b"\n")
|
|
45
45
|
|
|
46
46
|
# because stdout is usually line buffered we force flush here if wanted
|
|
47
47
|
if self.auto_flush:
|
|
@@ -36,7 +36,7 @@ def sanitize_fieldvalues(values: Iterator[Any]) -> Iterator[Any]:
|
|
|
36
36
|
elif isinstance(value, bytes):
|
|
37
37
|
base64_encode = False
|
|
38
38
|
try:
|
|
39
|
-
new_value = 'b"' + value.decode() + '"'
|
|
39
|
+
new_value = 'b"' + value.decode(errors="surrogateescape") + '"'
|
|
40
40
|
if ILLEGAL_CHARACTERS_RE.search(new_value):
|
|
41
41
|
base64_encode = True
|
|
42
42
|
else:
|
|
@@ -142,7 +142,7 @@ class XlsxReader(AbstractReader):
|
|
|
142
142
|
if field_types[idx] == "bytes":
|
|
143
143
|
if value[1] == '"': # If so, we know this is b""
|
|
144
144
|
# Cut of the b" at the start and the trailing "
|
|
145
|
-
value = value[2:-1].encode()
|
|
145
|
+
value = value[2:-1].encode(errors="surrogateescape")
|
|
146
146
|
else:
|
|
147
147
|
# If not, we know it is base64 encoded (so we cut of the starting 'base64:')
|
|
148
148
|
value = b64decode(value[7:])
|
|
@@ -61,7 +61,7 @@ except ImportError:
|
|
|
61
61
|
|
|
62
62
|
from collections import OrderedDict
|
|
63
63
|
|
|
64
|
-
from .utils import
|
|
64
|
+
from .utils import to_str
|
|
65
65
|
from .whitelist import WHITELIST, WHITELIST_TREE
|
|
66
66
|
|
|
67
67
|
log = logging.getLogger(__package__)
|
|
@@ -513,7 +513,7 @@ class RecordDescriptor:
|
|
|
513
513
|
name, fields = parse_def(name)
|
|
514
514
|
|
|
515
515
|
self.name = name
|
|
516
|
-
self._field_tuples = tuple([(
|
|
516
|
+
self._field_tuples = tuple([(to_str(k), to_str(v)) for k, v in fields])
|
|
517
517
|
self.recordType = _generate_record_class(name, self._field_tuples)
|
|
518
518
|
self.recordType._desc = self
|
|
519
519
|
|
|
@@ -28,7 +28,6 @@ except ImportError:
|
|
|
28
28
|
from flow.record.base import FieldType
|
|
29
29
|
|
|
30
30
|
RE_NORMALIZE_PATH = re.compile(r"[\\/]+")
|
|
31
|
-
NATIVE_UNICODE = isinstance("", str)
|
|
32
31
|
|
|
33
32
|
UTC = timezone.utc
|
|
34
33
|
|
|
@@ -207,10 +206,7 @@ class stringlist(list, FieldType):
|
|
|
207
206
|
class string(string_type, FieldType):
|
|
208
207
|
def __new__(cls, value):
|
|
209
208
|
if isinstance(value, bytes_type):
|
|
210
|
-
value =
|
|
211
|
-
if isinstance(value, bytes_type):
|
|
212
|
-
# Still bytes, so decoding failed (Python 2)
|
|
213
|
-
return bytes(value)
|
|
209
|
+
value = value.decode(errors="surrogateescape")
|
|
214
210
|
return super().__new__(cls, value)
|
|
215
211
|
|
|
216
212
|
def _pack(self):
|
|
@@ -221,27 +217,6 @@ class string(string_type, FieldType):
|
|
|
221
217
|
return defang(self)
|
|
222
218
|
return str.__format__(self, spec)
|
|
223
219
|
|
|
224
|
-
@classmethod
|
|
225
|
-
def _decode(cls, data, encoding):
|
|
226
|
-
"""Decode a byte-string into a unicode-string.
|
|
227
|
-
|
|
228
|
-
Python 3: When `data` contains invalid unicode characters a `UnicodeDecodeError` is raised.
|
|
229
|
-
Python 2: When `data` contains invalid unicode characters the original byte-string is returned.
|
|
230
|
-
"""
|
|
231
|
-
if NATIVE_UNICODE:
|
|
232
|
-
# Raises exception on decode error
|
|
233
|
-
return data.decode(encoding)
|
|
234
|
-
try:
|
|
235
|
-
return data.decode(encoding)
|
|
236
|
-
except UnicodeDecodeError:
|
|
237
|
-
# Fallback to bytes (Python 2 only)
|
|
238
|
-
preview = data[:16].encode("hex_codec") + (".." if len(data) > 16 else "")
|
|
239
|
-
warnings.warn(
|
|
240
|
-
"Got binary data in string field (hex: {}). Compatibility is not guaranteed.".format(preview),
|
|
241
|
-
RuntimeWarning,
|
|
242
|
-
)
|
|
243
|
-
return data
|
|
244
|
-
|
|
245
220
|
|
|
246
221
|
# Alias for backwards compatibility
|
|
247
222
|
wstring = string
|
|
@@ -278,7 +253,7 @@ class datetime(_dt, FieldType):
|
|
|
278
253
|
if len(args) == 1 and not kwargs:
|
|
279
254
|
arg = args[0]
|
|
280
255
|
if isinstance(arg, bytes_type):
|
|
281
|
-
arg = arg.decode("
|
|
256
|
+
arg = arg.decode(errors="surrogateescape")
|
|
282
257
|
if isinstance(arg, string_type):
|
|
283
258
|
# If we are on Python 3.11 or newer, we can use fromisoformat() to parse the string (fast path)
|
|
284
259
|
#
|
|
@@ -1,38 +1,54 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from ipaddress import (
|
|
4
|
+
IPv4Address,
|
|
5
|
+
IPv4Network,
|
|
6
|
+
IPv6Address,
|
|
7
|
+
IPv6Network,
|
|
8
|
+
ip_address,
|
|
9
|
+
ip_network,
|
|
10
|
+
)
|
|
11
|
+
from typing import Union
|
|
2
12
|
|
|
3
13
|
from flow.record.base import FieldType
|
|
4
14
|
from flow.record.fieldtypes import defang
|
|
5
15
|
|
|
16
|
+
_IPNetwork = Union[IPv4Network, IPv6Network]
|
|
17
|
+
_IPAddress = Union[IPv4Address, IPv6Address]
|
|
18
|
+
|
|
6
19
|
|
|
7
20
|
class ipaddress(FieldType):
|
|
8
21
|
val = None
|
|
9
22
|
_type = "net.ipaddress"
|
|
10
23
|
|
|
11
|
-
def __init__(self, addr):
|
|
24
|
+
def __init__(self, addr: str | int | bytes):
|
|
12
25
|
self.val = ip_address(addr)
|
|
13
26
|
|
|
14
|
-
def __eq__(self, b):
|
|
27
|
+
def __eq__(self, b: str | int | bytes | _IPAddress) -> bool:
|
|
15
28
|
try:
|
|
16
29
|
return self.val == ip_address(b)
|
|
17
30
|
except ValueError:
|
|
18
31
|
return False
|
|
19
32
|
|
|
20
|
-
def
|
|
33
|
+
def __hash__(self) -> int:
|
|
34
|
+
return hash(self.val)
|
|
35
|
+
|
|
36
|
+
def __str__(self) -> str:
|
|
21
37
|
return str(self.val)
|
|
22
38
|
|
|
23
|
-
def __repr__(self):
|
|
24
|
-
return "{}({!r})"
|
|
39
|
+
def __repr__(self) -> str:
|
|
40
|
+
return f"{self._type}({str(self)!r})"
|
|
25
41
|
|
|
26
|
-
def __format__(self, spec):
|
|
42
|
+
def __format__(self, spec: str) -> str:
|
|
27
43
|
if spec == "defang":
|
|
28
44
|
return defang(str(self))
|
|
29
45
|
return str.__format__(str(self), spec)
|
|
30
46
|
|
|
31
|
-
def _pack(self):
|
|
47
|
+
def _pack(self) -> int:
|
|
32
48
|
return int(self.val)
|
|
33
49
|
|
|
34
50
|
@staticmethod
|
|
35
|
-
def _unpack(data):
|
|
51
|
+
def _unpack(data: int) -> ipaddress:
|
|
36
52
|
return ipaddress(data)
|
|
37
53
|
|
|
38
54
|
|
|
@@ -40,17 +56,20 @@ class ipnetwork(FieldType):
|
|
|
40
56
|
val = None
|
|
41
57
|
_type = "net.ipnetwork"
|
|
42
58
|
|
|
43
|
-
def __init__(self, addr):
|
|
59
|
+
def __init__(self, addr: str | int | bytes):
|
|
44
60
|
self.val = ip_network(addr)
|
|
45
61
|
|
|
46
|
-
def __eq__(self, b):
|
|
62
|
+
def __eq__(self, b: str | int | bytes | _IPNetwork) -> bool:
|
|
47
63
|
try:
|
|
48
64
|
return self.val == ip_network(b)
|
|
49
65
|
except ValueError:
|
|
50
66
|
return False
|
|
51
67
|
|
|
68
|
+
def __hash__(self) -> int:
|
|
69
|
+
return hash(self.val)
|
|
70
|
+
|
|
52
71
|
@staticmethod
|
|
53
|
-
def _is_subnet_of(a, b):
|
|
72
|
+
def _is_subnet_of(a: _IPNetwork, b: _IPNetwork) -> bool:
|
|
54
73
|
try:
|
|
55
74
|
# Always false if one is v4 and the other is v6.
|
|
56
75
|
if a._version != b._version:
|
|
@@ -59,23 +78,23 @@ class ipnetwork(FieldType):
|
|
|
59
78
|
except AttributeError:
|
|
60
79
|
raise TypeError("Unable to test subnet containment " "between {} and {}".format(a, b))
|
|
61
80
|
|
|
62
|
-
def __contains__(self, b):
|
|
81
|
+
def __contains__(self, b: str | int | bytes | _IPAddress) -> bool:
|
|
63
82
|
try:
|
|
64
83
|
return self._is_subnet_of(ip_network(b), self.val)
|
|
65
84
|
except (ValueError, TypeError):
|
|
66
85
|
return False
|
|
67
86
|
|
|
68
|
-
def __str__(self):
|
|
87
|
+
def __str__(self) -> str:
|
|
69
88
|
return str(self.val)
|
|
70
89
|
|
|
71
|
-
def __repr__(self):
|
|
72
|
-
return "{}({!r})"
|
|
90
|
+
def __repr__(self) -> str:
|
|
91
|
+
return f"{self._type}({str(self)!r})"
|
|
73
92
|
|
|
74
|
-
def _pack(self):
|
|
93
|
+
def _pack(self) -> str:
|
|
75
94
|
return self.val.compressed
|
|
76
95
|
|
|
77
96
|
@staticmethod
|
|
78
|
-
def _unpack(data):
|
|
97
|
+
def _unpack(data: str) -> ipnetwork:
|
|
79
98
|
return ipnetwork(data)
|
|
80
99
|
|
|
81
100
|
|
|
@@ -3,7 +3,6 @@ import struct
|
|
|
3
3
|
import warnings
|
|
4
4
|
|
|
5
5
|
from flow.record import FieldType
|
|
6
|
-
from flow.record.utils import to_native_str
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
def addr_long(s):
|
|
@@ -45,9 +44,6 @@ class subnet(FieldType):
|
|
|
45
44
|
DeprecationWarning,
|
|
46
45
|
stacklevel=5,
|
|
47
46
|
)
|
|
48
|
-
if isinstance(addr, type("")):
|
|
49
|
-
addr = to_native_str(addr)
|
|
50
|
-
|
|
51
47
|
if not isinstance(addr, str):
|
|
52
48
|
raise TypeError("Subnet() argument 1 must be string, not {}".format(type(addr).__name__))
|
|
53
49
|
|
|
@@ -67,9 +63,6 @@ class subnet(FieldType):
|
|
|
67
63
|
if addr is None:
|
|
68
64
|
return False
|
|
69
65
|
|
|
70
|
-
if isinstance(addr, type("")):
|
|
71
|
-
addr = to_native_str(addr)
|
|
72
|
-
|
|
73
66
|
if isinstance(addr, str):
|
|
74
67
|
addr = addr_long(addr)
|
|
75
68
|
|
|
@@ -47,12 +47,8 @@ class JsonRecordPacker:
|
|
|
47
47
|
serial["_recorddescriptor"] = obj._desc.identifier
|
|
48
48
|
|
|
49
49
|
for field_type, field_name in obj._desc.get_field_tuples():
|
|
50
|
-
# PYTHON2: Because "bytes" are also "str" we have to handle this here
|
|
51
|
-
if field_type == "bytes" and isinstance(serial[field_name], str):
|
|
52
|
-
serial[field_name] = base64.b64encode(serial[field_name]).decode()
|
|
53
|
-
|
|
54
50
|
# Boolean field types should be cast to a bool instead of staying ints
|
|
55
|
-
|
|
51
|
+
if field_type == "boolean" and isinstance(serial[field_name], int):
|
|
56
52
|
serial[field_name] = bool(serial[field_name])
|
|
57
53
|
|
|
58
54
|
return serial
|
|
@@ -3,13 +3,10 @@ from __future__ import annotations
|
|
|
3
3
|
import base64
|
|
4
4
|
import os
|
|
5
5
|
import sys
|
|
6
|
+
import warnings
|
|
6
7
|
from functools import wraps
|
|
7
8
|
from typing import BinaryIO, TextIO
|
|
8
9
|
|
|
9
|
-
_native = str
|
|
10
|
-
_unicode = type("")
|
|
11
|
-
_bytes = type(b"")
|
|
12
|
-
|
|
13
10
|
|
|
14
11
|
def get_stdout(binary: bool = False) -> TextIO | BinaryIO:
|
|
15
12
|
"""Return the stdout stream as binary or text stream.
|
|
@@ -50,33 +47,32 @@ def is_stdout(fp: TextIO | BinaryIO) -> bool:
|
|
|
50
47
|
|
|
51
48
|
def to_bytes(value):
|
|
52
49
|
"""Convert a value to a byte string."""
|
|
53
|
-
if value is None or isinstance(value,
|
|
50
|
+
if value is None or isinstance(value, bytes):
|
|
54
51
|
return value
|
|
55
|
-
if isinstance(value,
|
|
56
|
-
return value.encode("
|
|
57
|
-
return
|
|
52
|
+
if isinstance(value, str):
|
|
53
|
+
return value.encode(errors="surrogateescape")
|
|
54
|
+
return bytes(value)
|
|
58
55
|
|
|
59
56
|
|
|
60
57
|
def to_str(value):
|
|
61
58
|
"""Convert a value to a unicode string."""
|
|
62
|
-
if value is None or isinstance(value,
|
|
59
|
+
if value is None or isinstance(value, str):
|
|
63
60
|
return value
|
|
64
|
-
if isinstance(value,
|
|
65
|
-
return value.decode("
|
|
66
|
-
return
|
|
61
|
+
if isinstance(value, bytes):
|
|
62
|
+
return value.decode(errors="surrogateescape")
|
|
63
|
+
return str(value)
|
|
67
64
|
|
|
68
65
|
|
|
69
66
|
def to_native_str(value):
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
return _native(value)
|
|
67
|
+
warnings.warn(
|
|
68
|
+
(
|
|
69
|
+
"The to_native_str() function is deprecated, "
|
|
70
|
+
"this function will be removed in flow.record 3.20, "
|
|
71
|
+
"use to_str() instead"
|
|
72
|
+
),
|
|
73
|
+
DeprecationWarning,
|
|
74
|
+
)
|
|
75
|
+
return to_str(value)
|
|
80
76
|
|
|
81
77
|
|
|
82
78
|
def to_base64(value):
|
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '3.17.
|
|
16
|
-
__version_tuple__ = version_tuple = (3, 17, '
|
|
15
|
+
__version__ = version = '3.17.dev6'
|
|
16
|
+
__version_tuple__ = version_tuple = (3, 17, 'dev6')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.17.
|
|
3
|
+
Version: 3.17.dev6
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -54,6 +54,8 @@ tests/__init__.py
|
|
|
54
54
|
tests/_utils.py
|
|
55
55
|
tests/selector_explain_example.py
|
|
56
56
|
tests/standalone_test.py
|
|
57
|
+
tests/test_adapter_line.py
|
|
58
|
+
tests/test_adapter_text.py
|
|
57
59
|
tests/test_avro.py
|
|
58
60
|
tests/test_avro_adapter.py
|
|
59
61
|
tests/test_compiled_selector.py
|
|
@@ -75,7 +77,6 @@ tests/test_selector.py
|
|
|
75
77
|
tests/test_splunk_adapter.py
|
|
76
78
|
tests/test_sqlite_duckdb_adapter.py
|
|
77
79
|
tests/test_xlsx_adapter.py
|
|
78
|
-
tests/utils_inspect.py
|
|
79
80
|
tests/docs/Makefile
|
|
80
81
|
tests/docs/conf.py
|
|
81
82
|
tests/docs/index.rst
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
|
|
3
|
+
from flow.record import RecordDescriptor
|
|
4
|
+
from flow.record.adapter.line import LineWriter
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_line_writer_write_surrogateescape():
|
|
8
|
+
output = BytesIO()
|
|
9
|
+
|
|
10
|
+
lw = LineWriter(
|
|
11
|
+
path=output,
|
|
12
|
+
fields="name",
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
TestRecord = RecordDescriptor(
|
|
16
|
+
"test/string",
|
|
17
|
+
[
|
|
18
|
+
("string", "name"),
|
|
19
|
+
],
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# construct from 'bytes' but with invalid unicode bytes
|
|
23
|
+
record = TestRecord(b"R\xc3\xa9\xeamy")
|
|
24
|
+
lw.write(record)
|
|
25
|
+
|
|
26
|
+
output.seek(0)
|
|
27
|
+
data = output.read()
|
|
28
|
+
|
|
29
|
+
assert data == b"--[ RECORD 1 ]--\nname = R\xc3\xa9\xeamy\n"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
|
|
3
|
+
from flow.record import RecordDescriptor
|
|
4
|
+
from flow.record.adapter.text import TextWriter
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_text_writer_write_surrogateescape():
|
|
8
|
+
output = BytesIO()
|
|
9
|
+
|
|
10
|
+
tw = TextWriter(
|
|
11
|
+
path=output,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
TestRecord = RecordDescriptor(
|
|
15
|
+
"test/string",
|
|
16
|
+
[
|
|
17
|
+
("string", "name"),
|
|
18
|
+
],
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# construct from 'bytes' but with invalid unicode bytes
|
|
22
|
+
record = TestRecord(b"R\xc3\xa9\xeamy")
|
|
23
|
+
tw.write(record)
|
|
24
|
+
|
|
25
|
+
output.seek(0)
|
|
26
|
+
data = output.read()
|
|
27
|
+
|
|
28
|
+
assert data == b"<test/string name='R\xc3\xa9\\udceamy'>\n"
|
|
@@ -48,12 +48,19 @@ def test_record_ipaddress():
|
|
|
48
48
|
assert TestRecord("0.0.0.0").ip == "0.0.0.0"
|
|
49
49
|
assert TestRecord("192.168.0.1").ip == "192.168.0.1"
|
|
50
50
|
assert TestRecord("255.255.255.255").ip == "255.255.255.255"
|
|
51
|
+
assert hash(TestRecord("192.168.0.1").ip) == hash(net.ipaddress("192.168.0.1"))
|
|
51
52
|
|
|
52
53
|
# ipv6
|
|
53
54
|
assert TestRecord("::1").ip == "::1"
|
|
54
55
|
assert TestRecord("2001:4860:4860::8888").ip == "2001:4860:4860::8888"
|
|
55
56
|
assert TestRecord("2001:4860:4860::4444").ip == "2001:4860:4860::4444"
|
|
56
57
|
|
|
58
|
+
# Test whether it functions in a set
|
|
59
|
+
data = {TestRecord(ip).ip for ip in ["192.168.0.1", "192.168.0.1", "::1", "::1"]}
|
|
60
|
+
assert len(data) == 2
|
|
61
|
+
assert net.ipaddress("::1") in data
|
|
62
|
+
assert net.ipaddress("192.168.0.1") in data
|
|
63
|
+
|
|
57
64
|
# instantiate from different types
|
|
58
65
|
assert TestRecord(1).ip == "0.0.0.1"
|
|
59
66
|
assert TestRecord(0x7F0000FF).ip == "127.0.0.255"
|
|
@@ -90,6 +97,7 @@ def test_record_ipnetwork():
|
|
|
90
97
|
assert "192.168.1.1" not in r.subnet
|
|
91
98
|
assert isinstance(r.subnet, net.ipnetwork)
|
|
92
99
|
assert repr(r.subnet) == "net.ipnetwork('192.168.0.0/24')"
|
|
100
|
+
assert hash(r.subnet) == hash(net.ipnetwork("192.168.0.0/24"))
|
|
93
101
|
|
|
94
102
|
r = TestRecord("192.168.1.1/32")
|
|
95
103
|
assert r.subnet == "192.168.1.1"
|
|
@@ -111,6 +119,13 @@ def test_record_ipnetwork():
|
|
|
111
119
|
assert "64:ff9b::0.0.0.0" in r.subnet
|
|
112
120
|
assert "64:ff9b::255.255.255.255" in r.subnet
|
|
113
121
|
|
|
122
|
+
# Test whether it functions in a set
|
|
123
|
+
data = {TestRecord(x).subnet for x in ["192.168.0.0/24", "192.168.0.0/24", "::1", "::1"]}
|
|
124
|
+
assert len(data) == 2
|
|
125
|
+
assert net.ipnetwork("::1") in data
|
|
126
|
+
assert net.ipnetwork("192.168.0.0/24") in data
|
|
127
|
+
assert "::1" not in data
|
|
128
|
+
|
|
114
129
|
|
|
115
130
|
@pytest.mark.parametrize("PSelector", [Selector, CompiledSelector])
|
|
116
131
|
def test_selector_ipaddress(PSelector):
|
|
@@ -213,15 +213,8 @@ def test_string():
|
|
|
213
213
|
assert r.name == "Rémy"
|
|
214
214
|
|
|
215
215
|
# construct from 'bytes' but with invalid unicode bytes
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
with pytest.raises(UnicodeDecodeError):
|
|
219
|
-
TestRecord(b"R\xc3\xa9\xeamy")
|
|
220
|
-
else:
|
|
221
|
-
# Python 2
|
|
222
|
-
with pytest.warns(RuntimeWarning):
|
|
223
|
-
r = TestRecord(b"R\xc3\xa9\xeamy")
|
|
224
|
-
assert r.name
|
|
216
|
+
r = TestRecord(b"R\xc3\xa9\xeamy")
|
|
217
|
+
assert r.name == "Ré\udceamy"
|
|
225
218
|
|
|
226
219
|
|
|
227
220
|
def test_wstring():
|
|
@@ -90,3 +90,23 @@ def test_record_pack_bool_regression() -> None:
|
|
|
90
90
|
|
|
91
91
|
# pack the json string back to a record and make sure it is the same as before
|
|
92
92
|
assert packer.unpack(data) == record
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_record_pack_surrogateescape() -> None:
|
|
96
|
+
TestRecord = RecordDescriptor(
|
|
97
|
+
"test/string",
|
|
98
|
+
[
|
|
99
|
+
("string", "name"),
|
|
100
|
+
],
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
record = TestRecord(b"R\xc3\xa9\xeamy")
|
|
104
|
+
packer = JsonRecordPacker()
|
|
105
|
+
|
|
106
|
+
data = packer.pack(record)
|
|
107
|
+
|
|
108
|
+
# pack to json string and check if the 3rd and 4th byte are properly surrogate escaped
|
|
109
|
+
assert data.startswith('{"name": "R\\u00e9\\udceamy",')
|
|
110
|
+
|
|
111
|
+
# pack the json string back to a record and make sure it is the same as before
|
|
112
|
+
assert packer.unpack(data) == record
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import importlib
|
|
2
|
+
import inspect
|
|
2
3
|
import os
|
|
3
4
|
import sys
|
|
4
5
|
from unittest.mock import patch
|
|
@@ -27,8 +28,6 @@ from flow.record.base import (
|
|
|
27
28
|
from flow.record.exceptions import RecordDescriptorError
|
|
28
29
|
from flow.record.stream import RecordFieldRewriter
|
|
29
30
|
|
|
30
|
-
from . import utils_inspect as inspect
|
|
31
|
-
|
|
32
31
|
|
|
33
32
|
def test_record_creation():
|
|
34
33
|
TestRecord = RecordDescriptor(
|
|
@@ -288,8 +287,30 @@ def test_record_printer_stdout(capsys):
|
|
|
288
287
|
writer.write(record)
|
|
289
288
|
|
|
290
289
|
out, err = capsys.readouterr()
|
|
291
|
-
|
|
292
|
-
|
|
290
|
+
expected = "<test/a a_string='hello' common='world' a_count=10>\n"
|
|
291
|
+
assert out == expected
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def test_record_printer_stdout_surrogateescape(capsys):
|
|
295
|
+
Record = RecordDescriptor(
|
|
296
|
+
"test/a",
|
|
297
|
+
[
|
|
298
|
+
("string", "name"),
|
|
299
|
+
],
|
|
300
|
+
)
|
|
301
|
+
record = Record(b"R\xc3\xa9\xeamy")
|
|
302
|
+
|
|
303
|
+
# fake capsys to be a tty.
|
|
304
|
+
def isatty():
|
|
305
|
+
return True
|
|
306
|
+
|
|
307
|
+
capsys._capture.out.tmpfile.isatty = isatty
|
|
308
|
+
|
|
309
|
+
writer = RecordPrinter(getattr(sys.stdout, "buffer", sys.stdout))
|
|
310
|
+
writer.write(record)
|
|
311
|
+
|
|
312
|
+
out, err = capsys.readouterr()
|
|
313
|
+
expected = "<test/a name='Ré\\udceamy'>\n"
|
|
293
314
|
assert out == expected
|
|
294
315
|
|
|
295
316
|
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Backport of `inspect.signature` for Python 2.
|
|
3
|
-
|
|
4
|
-
Based on: https://github.com/python/cpython/blob/3.7/Lib/inspect.py
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import collections
|
|
8
|
-
import inspect
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class _empty:
|
|
12
|
-
pass
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class Parameter:
|
|
16
|
-
POSITIONAL_ONLY = 0
|
|
17
|
-
POSITIONAL_OR_KEYWORD = 1
|
|
18
|
-
VAR_POSITIONAL = 2
|
|
19
|
-
KEYWORD_ONLY = 3
|
|
20
|
-
VAR_KEYWORD = 4
|
|
21
|
-
|
|
22
|
-
empty = _empty
|
|
23
|
-
|
|
24
|
-
def __init__(self, name, kind, default=_empty):
|
|
25
|
-
self.name = name
|
|
26
|
-
self.kind = kind
|
|
27
|
-
self.default = default
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class Signature:
|
|
31
|
-
empty = _empty
|
|
32
|
-
|
|
33
|
-
def __init__(self, parameters=None):
|
|
34
|
-
self.parameters = parameters
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def signature(obj):
|
|
38
|
-
try:
|
|
39
|
-
# Python 3
|
|
40
|
-
return inspect.signature(obj)
|
|
41
|
-
except AttributeError:
|
|
42
|
-
# Python 2
|
|
43
|
-
spec = inspect.getargspec(obj)
|
|
44
|
-
|
|
45
|
-
# Create parameter objects which are compatible with python 3 objects
|
|
46
|
-
parameters = collections.OrderedDict()
|
|
47
|
-
for i in range(0, len(spec.args)):
|
|
48
|
-
arg = spec.args[i]
|
|
49
|
-
default = _empty
|
|
50
|
-
if spec.defaults and (len(spec.args) - i <= len(spec.defaults)):
|
|
51
|
-
default = spec.defaults[i - len(spec.args)]
|
|
52
|
-
parameters[arg] = Parameter(name=arg, default=default, kind=Parameter.POSITIONAL_OR_KEYWORD)
|
|
53
|
-
if spec.varargs:
|
|
54
|
-
parameters[spec.varargs] = Parameter(name=spec.varargs, kind=Parameter.VAR_POSITIONAL)
|
|
55
|
-
if spec.keywords:
|
|
56
|
-
parameters[spec.keywords] = Parameter(name=spec.keywords, kind=Parameter.VAR_KEYWORD)
|
|
57
|
-
|
|
58
|
-
return Signature(parameters=parameters)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|