flow.record 3.17.dev4__tar.gz → 3.17.dev5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flow_record-3.17.dev4/flow.record.egg-info → flow_record-3.17.dev5}/PKG-INFO +1 -1
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/elastic.py +1 -1
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/line.py +1 -1
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/sqlite.py +1 -1
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/text.py +1 -1
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/xlsx.py +2 -2
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/base.py +2 -2
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/__init__.py +2 -27
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/ipv4.py +0 -7
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/jsonpacker.py +1 -5
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/utils.py +18 -22
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/version.py +2 -2
- {flow_record-3.17.dev4 → flow_record-3.17.dev5/flow.record.egg-info}/PKG-INFO +1 -1
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow.record.egg-info/SOURCES.txt +2 -1
- flow_record-3.17.dev5/tests/test_adapter_line.py +29 -0
- flow_record-3.17.dev5/tests/test_adapter_text.py +28 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_fieldtypes.py +2 -9
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_json_packer.py +20 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_record.py +25 -4
- flow_record-3.17.dev4/tests/utils_inspect.py +0 -58
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/COPYRIGHT +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/LICENSE +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/MANIFEST.in +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/README.md +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/examples/filesystem.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/examples/passivedns.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/examples/records.json +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/examples/tcpconn.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/__init__.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/__init__.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/archive.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/avro.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/broker.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/csvfile.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/duckdb.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/jsonfile.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/mongo.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/split.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/splunk.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/stream.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/exceptions.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/credential.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/__init__.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/ip.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/tcp.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/udp.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/packer.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/selector.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/stream.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/tools/__init__.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/tools/geoip.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/tools/rdump.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/whitelist.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow.record.egg-info/entry_points.txt +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow.record.egg-info/requires.txt +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow.record.egg-info/top_level.txt +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/pyproject.toml +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/setup.cfg +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/__init__.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/_utils.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/docs/Makefile +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/docs/conf.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/docs/index.rst +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/selector_explain_example.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/standalone_test.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_avro.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_avro_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_compiled_selector.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_csv_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_deprecations.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_elastic_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_fieldtype_ip.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_json_record_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_multi_timestamp.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_packer.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_rdump.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_record_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_record_descriptor.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_regression.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_selector.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_splunk_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_sqlite_duckdb_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_xlsx_adapter.py +0 -0
- {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.17.
|
|
3
|
+
Version: 3.17.dev5
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -106,7 +106,7 @@ class ElasticWriter(AbstractWriter):
|
|
|
106
106
|
}
|
|
107
107
|
|
|
108
108
|
if self.hash_record:
|
|
109
|
-
document["_id"] = hashlib.md5(document["_source"].encode()).hexdigest()
|
|
109
|
+
document["_id"] = hashlib.md5(document["_source"].encode(errors="surrogateescape")).hexdigest()
|
|
110
110
|
|
|
111
111
|
return document
|
|
112
112
|
|
|
@@ -69,7 +69,7 @@ class LineWriter(AbstractWriter):
|
|
|
69
69
|
for key, value in rdict.items():
|
|
70
70
|
if rdict_types:
|
|
71
71
|
key = f"{key} ({rdict_types[key]})"
|
|
72
|
-
self.fp.write(fmt.format(key, value).encode())
|
|
72
|
+
self.fp.write(fmt.format(key, value).encode(errors="surrogateescape"))
|
|
73
73
|
|
|
74
74
|
def flush(self) -> None:
|
|
75
75
|
if self.fp:
|
|
@@ -187,7 +187,7 @@ class SqliteReader(AbstractReader):
|
|
|
187
187
|
if value == 0:
|
|
188
188
|
row[idx] = None
|
|
189
189
|
elif isinstance(value, str):
|
|
190
|
-
row[idx] = value.encode("
|
|
190
|
+
row[idx] = value.encode(errors="surrogateescape")
|
|
191
191
|
yield descriptor_cls.init_from_dict(dict(zip(fnames, row)))
|
|
192
192
|
|
|
193
193
|
def __iter__(self) -> Iterator[Record]:
|
|
@@ -41,7 +41,7 @@ class TextWriter(AbstractWriter):
|
|
|
41
41
|
buf = self.format_spec.format_map(DefaultMissing(rec._asdict()))
|
|
42
42
|
else:
|
|
43
43
|
buf = repr(rec)
|
|
44
|
-
self.fp.write(buf.encode() + b"\n")
|
|
44
|
+
self.fp.write(buf.encode(errors="surrogateescape") + b"\n")
|
|
45
45
|
|
|
46
46
|
# because stdout is usually line buffered we force flush here if wanted
|
|
47
47
|
if self.auto_flush:
|
|
@@ -36,7 +36,7 @@ def sanitize_fieldvalues(values: Iterator[Any]) -> Iterator[Any]:
|
|
|
36
36
|
elif isinstance(value, bytes):
|
|
37
37
|
base64_encode = False
|
|
38
38
|
try:
|
|
39
|
-
new_value = 'b"' + value.decode() + '"'
|
|
39
|
+
new_value = 'b"' + value.decode(errors="surrogateescape") + '"'
|
|
40
40
|
if ILLEGAL_CHARACTERS_RE.search(new_value):
|
|
41
41
|
base64_encode = True
|
|
42
42
|
else:
|
|
@@ -142,7 +142,7 @@ class XlsxReader(AbstractReader):
|
|
|
142
142
|
if field_types[idx] == "bytes":
|
|
143
143
|
if value[1] == '"': # If so, we know this is b""
|
|
144
144
|
# Cut of the b" at the start and the trailing "
|
|
145
|
-
value = value[2:-1].encode()
|
|
145
|
+
value = value[2:-1].encode(errors="surrogateescape")
|
|
146
146
|
else:
|
|
147
147
|
# If not, we know it is base64 encoded (so we cut of the starting 'base64:')
|
|
148
148
|
value = b64decode(value[7:])
|
|
@@ -61,7 +61,7 @@ except ImportError:
|
|
|
61
61
|
|
|
62
62
|
from collections import OrderedDict
|
|
63
63
|
|
|
64
|
-
from .utils import
|
|
64
|
+
from .utils import to_str
|
|
65
65
|
from .whitelist import WHITELIST, WHITELIST_TREE
|
|
66
66
|
|
|
67
67
|
log = logging.getLogger(__package__)
|
|
@@ -513,7 +513,7 @@ class RecordDescriptor:
|
|
|
513
513
|
name, fields = parse_def(name)
|
|
514
514
|
|
|
515
515
|
self.name = name
|
|
516
|
-
self._field_tuples = tuple([(
|
|
516
|
+
self._field_tuples = tuple([(to_str(k), to_str(v)) for k, v in fields])
|
|
517
517
|
self.recordType = _generate_record_class(name, self._field_tuples)
|
|
518
518
|
self.recordType._desc = self
|
|
519
519
|
|
|
@@ -28,7 +28,6 @@ except ImportError:
|
|
|
28
28
|
from flow.record.base import FieldType
|
|
29
29
|
|
|
30
30
|
RE_NORMALIZE_PATH = re.compile(r"[\\/]+")
|
|
31
|
-
NATIVE_UNICODE = isinstance("", str)
|
|
32
31
|
|
|
33
32
|
UTC = timezone.utc
|
|
34
33
|
|
|
@@ -207,10 +206,7 @@ class stringlist(list, FieldType):
|
|
|
207
206
|
class string(string_type, FieldType):
|
|
208
207
|
def __new__(cls, value):
|
|
209
208
|
if isinstance(value, bytes_type):
|
|
210
|
-
value =
|
|
211
|
-
if isinstance(value, bytes_type):
|
|
212
|
-
# Still bytes, so decoding failed (Python 2)
|
|
213
|
-
return bytes(value)
|
|
209
|
+
value = value.decode(errors="surrogateescape")
|
|
214
210
|
return super().__new__(cls, value)
|
|
215
211
|
|
|
216
212
|
def _pack(self):
|
|
@@ -221,27 +217,6 @@ class string(string_type, FieldType):
|
|
|
221
217
|
return defang(self)
|
|
222
218
|
return str.__format__(self, spec)
|
|
223
219
|
|
|
224
|
-
@classmethod
|
|
225
|
-
def _decode(cls, data, encoding):
|
|
226
|
-
"""Decode a byte-string into a unicode-string.
|
|
227
|
-
|
|
228
|
-
Python 3: When `data` contains invalid unicode characters a `UnicodeDecodeError` is raised.
|
|
229
|
-
Python 2: When `data` contains invalid unicode characters the original byte-string is returned.
|
|
230
|
-
"""
|
|
231
|
-
if NATIVE_UNICODE:
|
|
232
|
-
# Raises exception on decode error
|
|
233
|
-
return data.decode(encoding)
|
|
234
|
-
try:
|
|
235
|
-
return data.decode(encoding)
|
|
236
|
-
except UnicodeDecodeError:
|
|
237
|
-
# Fallback to bytes (Python 2 only)
|
|
238
|
-
preview = data[:16].encode("hex_codec") + (".." if len(data) > 16 else "")
|
|
239
|
-
warnings.warn(
|
|
240
|
-
"Got binary data in string field (hex: {}). Compatibility is not guaranteed.".format(preview),
|
|
241
|
-
RuntimeWarning,
|
|
242
|
-
)
|
|
243
|
-
return data
|
|
244
|
-
|
|
245
220
|
|
|
246
221
|
# Alias for backwards compatibility
|
|
247
222
|
wstring = string
|
|
@@ -278,7 +253,7 @@ class datetime(_dt, FieldType):
|
|
|
278
253
|
if len(args) == 1 and not kwargs:
|
|
279
254
|
arg = args[0]
|
|
280
255
|
if isinstance(arg, bytes_type):
|
|
281
|
-
arg = arg.decode("
|
|
256
|
+
arg = arg.decode(errors="surrogateescape")
|
|
282
257
|
if isinstance(arg, string_type):
|
|
283
258
|
# If we are on Python 3.11 or newer, we can use fromisoformat() to parse the string (fast path)
|
|
284
259
|
#
|
|
@@ -3,7 +3,6 @@ import struct
|
|
|
3
3
|
import warnings
|
|
4
4
|
|
|
5
5
|
from flow.record import FieldType
|
|
6
|
-
from flow.record.utils import to_native_str
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
def addr_long(s):
|
|
@@ -45,9 +44,6 @@ class subnet(FieldType):
|
|
|
45
44
|
DeprecationWarning,
|
|
46
45
|
stacklevel=5,
|
|
47
46
|
)
|
|
48
|
-
if isinstance(addr, type("")):
|
|
49
|
-
addr = to_native_str(addr)
|
|
50
|
-
|
|
51
47
|
if not isinstance(addr, str):
|
|
52
48
|
raise TypeError("Subnet() argument 1 must be string, not {}".format(type(addr).__name__))
|
|
53
49
|
|
|
@@ -67,9 +63,6 @@ class subnet(FieldType):
|
|
|
67
63
|
if addr is None:
|
|
68
64
|
return False
|
|
69
65
|
|
|
70
|
-
if isinstance(addr, type("")):
|
|
71
|
-
addr = to_native_str(addr)
|
|
72
|
-
|
|
73
66
|
if isinstance(addr, str):
|
|
74
67
|
addr = addr_long(addr)
|
|
75
68
|
|
|
@@ -47,12 +47,8 @@ class JsonRecordPacker:
|
|
|
47
47
|
serial["_recorddescriptor"] = obj._desc.identifier
|
|
48
48
|
|
|
49
49
|
for field_type, field_name in obj._desc.get_field_tuples():
|
|
50
|
-
# PYTHON2: Because "bytes" are also "str" we have to handle this here
|
|
51
|
-
if field_type == "bytes" and isinstance(serial[field_name], str):
|
|
52
|
-
serial[field_name] = base64.b64encode(serial[field_name]).decode()
|
|
53
|
-
|
|
54
50
|
# Boolean field types should be cast to a bool instead of staying ints
|
|
55
|
-
|
|
51
|
+
if field_type == "boolean" and isinstance(serial[field_name], int):
|
|
56
52
|
serial[field_name] = bool(serial[field_name])
|
|
57
53
|
|
|
58
54
|
return serial
|
|
@@ -3,13 +3,10 @@ from __future__ import annotations
|
|
|
3
3
|
import base64
|
|
4
4
|
import os
|
|
5
5
|
import sys
|
|
6
|
+
import warnings
|
|
6
7
|
from functools import wraps
|
|
7
8
|
from typing import BinaryIO, TextIO
|
|
8
9
|
|
|
9
|
-
_native = str
|
|
10
|
-
_unicode = type("")
|
|
11
|
-
_bytes = type(b"")
|
|
12
|
-
|
|
13
10
|
|
|
14
11
|
def get_stdout(binary: bool = False) -> TextIO | BinaryIO:
|
|
15
12
|
"""Return the stdout stream as binary or text stream.
|
|
@@ -50,33 +47,32 @@ def is_stdout(fp: TextIO | BinaryIO) -> bool:
|
|
|
50
47
|
|
|
51
48
|
def to_bytes(value):
|
|
52
49
|
"""Convert a value to a byte string."""
|
|
53
|
-
if value is None or isinstance(value,
|
|
50
|
+
if value is None or isinstance(value, bytes):
|
|
54
51
|
return value
|
|
55
|
-
if isinstance(value,
|
|
56
|
-
return value.encode("
|
|
57
|
-
return
|
|
52
|
+
if isinstance(value, str):
|
|
53
|
+
return value.encode(errors="surrogateescape")
|
|
54
|
+
return bytes(value)
|
|
58
55
|
|
|
59
56
|
|
|
60
57
|
def to_str(value):
|
|
61
58
|
"""Convert a value to a unicode string."""
|
|
62
|
-
if value is None or isinstance(value,
|
|
59
|
+
if value is None or isinstance(value, str):
|
|
63
60
|
return value
|
|
64
|
-
if isinstance(value,
|
|
65
|
-
return value.decode("
|
|
66
|
-
return
|
|
61
|
+
if isinstance(value, bytes):
|
|
62
|
+
return value.decode(errors="surrogateescape")
|
|
63
|
+
return str(value)
|
|
67
64
|
|
|
68
65
|
|
|
69
66
|
def to_native_str(value):
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
return _native(value)
|
|
67
|
+
warnings.warn(
|
|
68
|
+
(
|
|
69
|
+
"The to_native_str() function is deprecated, "
|
|
70
|
+
"this function will be removed in flow.record 3.20, "
|
|
71
|
+
"use to_str() instead"
|
|
72
|
+
),
|
|
73
|
+
DeprecationWarning,
|
|
74
|
+
)
|
|
75
|
+
return to_str(value)
|
|
80
76
|
|
|
81
77
|
|
|
82
78
|
def to_base64(value):
|
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '3.17.
|
|
16
|
-
__version_tuple__ = version_tuple = (3, 17, '
|
|
15
|
+
__version__ = version = '3.17.dev5'
|
|
16
|
+
__version_tuple__ = version_tuple = (3, 17, 'dev5')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.17.
|
|
3
|
+
Version: 3.17.dev5
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -54,6 +54,8 @@ tests/__init__.py
|
|
|
54
54
|
tests/_utils.py
|
|
55
55
|
tests/selector_explain_example.py
|
|
56
56
|
tests/standalone_test.py
|
|
57
|
+
tests/test_adapter_line.py
|
|
58
|
+
tests/test_adapter_text.py
|
|
57
59
|
tests/test_avro.py
|
|
58
60
|
tests/test_avro_adapter.py
|
|
59
61
|
tests/test_compiled_selector.py
|
|
@@ -75,7 +77,6 @@ tests/test_selector.py
|
|
|
75
77
|
tests/test_splunk_adapter.py
|
|
76
78
|
tests/test_sqlite_duckdb_adapter.py
|
|
77
79
|
tests/test_xlsx_adapter.py
|
|
78
|
-
tests/utils_inspect.py
|
|
79
80
|
tests/docs/Makefile
|
|
80
81
|
tests/docs/conf.py
|
|
81
82
|
tests/docs/index.rst
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
|
|
3
|
+
from flow.record import RecordDescriptor
|
|
4
|
+
from flow.record.adapter.line import LineWriter
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_line_writer_write_surrogateescape():
|
|
8
|
+
output = BytesIO()
|
|
9
|
+
|
|
10
|
+
lw = LineWriter(
|
|
11
|
+
path=output,
|
|
12
|
+
fields="name",
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
TestRecord = RecordDescriptor(
|
|
16
|
+
"test/string",
|
|
17
|
+
[
|
|
18
|
+
("string", "name"),
|
|
19
|
+
],
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# construct from 'bytes' but with invalid unicode bytes
|
|
23
|
+
record = TestRecord(b"R\xc3\xa9\xeamy")
|
|
24
|
+
lw.write(record)
|
|
25
|
+
|
|
26
|
+
output.seek(0)
|
|
27
|
+
data = output.read()
|
|
28
|
+
|
|
29
|
+
assert data == b"--[ RECORD 1 ]--\nname = R\xc3\xa9\xeamy\n"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
|
|
3
|
+
from flow.record import RecordDescriptor
|
|
4
|
+
from flow.record.adapter.text import TextWriter
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_text_writer_write_surrogateescape():
|
|
8
|
+
output = BytesIO()
|
|
9
|
+
|
|
10
|
+
tw = TextWriter(
|
|
11
|
+
path=output,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
TestRecord = RecordDescriptor(
|
|
15
|
+
"test/string",
|
|
16
|
+
[
|
|
17
|
+
("string", "name"),
|
|
18
|
+
],
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# construct from 'bytes' but with invalid unicode bytes
|
|
22
|
+
record = TestRecord(b"R\xc3\xa9\xeamy")
|
|
23
|
+
tw.write(record)
|
|
24
|
+
|
|
25
|
+
output.seek(0)
|
|
26
|
+
data = output.read()
|
|
27
|
+
|
|
28
|
+
assert data == b"<test/string name='R\xc3\xa9\\udceamy'>\n"
|
|
@@ -213,15 +213,8 @@ def test_string():
|
|
|
213
213
|
assert r.name == "Rémy"
|
|
214
214
|
|
|
215
215
|
# construct from 'bytes' but with invalid unicode bytes
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
with pytest.raises(UnicodeDecodeError):
|
|
219
|
-
TestRecord(b"R\xc3\xa9\xeamy")
|
|
220
|
-
else:
|
|
221
|
-
# Python 2
|
|
222
|
-
with pytest.warns(RuntimeWarning):
|
|
223
|
-
r = TestRecord(b"R\xc3\xa9\xeamy")
|
|
224
|
-
assert r.name
|
|
216
|
+
r = TestRecord(b"R\xc3\xa9\xeamy")
|
|
217
|
+
assert r.name == "Ré\udceamy"
|
|
225
218
|
|
|
226
219
|
|
|
227
220
|
def test_wstring():
|
|
@@ -90,3 +90,23 @@ def test_record_pack_bool_regression() -> None:
|
|
|
90
90
|
|
|
91
91
|
# pack the json string back to a record and make sure it is the same as before
|
|
92
92
|
assert packer.unpack(data) == record
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_record_pack_surrogateescape() -> None:
|
|
96
|
+
TestRecord = RecordDescriptor(
|
|
97
|
+
"test/string",
|
|
98
|
+
[
|
|
99
|
+
("string", "name"),
|
|
100
|
+
],
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
record = TestRecord(b"R\xc3\xa9\xeamy")
|
|
104
|
+
packer = JsonRecordPacker()
|
|
105
|
+
|
|
106
|
+
data = packer.pack(record)
|
|
107
|
+
|
|
108
|
+
# pack to json string and check if the 3rd and 4th byte are properly surrogate escaped
|
|
109
|
+
assert data.startswith('{"name": "R\\u00e9\\udceamy",')
|
|
110
|
+
|
|
111
|
+
# pack the json string back to a record and make sure it is the same as before
|
|
112
|
+
assert packer.unpack(data) == record
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import importlib
|
|
2
|
+
import inspect
|
|
2
3
|
import os
|
|
3
4
|
import sys
|
|
4
5
|
from unittest.mock import patch
|
|
@@ -27,8 +28,6 @@ from flow.record.base import (
|
|
|
27
28
|
from flow.record.exceptions import RecordDescriptorError
|
|
28
29
|
from flow.record.stream import RecordFieldRewriter
|
|
29
30
|
|
|
30
|
-
from . import utils_inspect as inspect
|
|
31
|
-
|
|
32
31
|
|
|
33
32
|
def test_record_creation():
|
|
34
33
|
TestRecord = RecordDescriptor(
|
|
@@ -288,8 +287,30 @@ def test_record_printer_stdout(capsys):
|
|
|
288
287
|
writer.write(record)
|
|
289
288
|
|
|
290
289
|
out, err = capsys.readouterr()
|
|
291
|
-
|
|
292
|
-
|
|
290
|
+
expected = "<test/a a_string='hello' common='world' a_count=10>\n"
|
|
291
|
+
assert out == expected
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def test_record_printer_stdout_surrogateescape(capsys):
|
|
295
|
+
Record = RecordDescriptor(
|
|
296
|
+
"test/a",
|
|
297
|
+
[
|
|
298
|
+
("string", "name"),
|
|
299
|
+
],
|
|
300
|
+
)
|
|
301
|
+
record = Record(b"R\xc3\xa9\xeamy")
|
|
302
|
+
|
|
303
|
+
# fake capsys to be a tty.
|
|
304
|
+
def isatty():
|
|
305
|
+
return True
|
|
306
|
+
|
|
307
|
+
capsys._capture.out.tmpfile.isatty = isatty
|
|
308
|
+
|
|
309
|
+
writer = RecordPrinter(getattr(sys.stdout, "buffer", sys.stdout))
|
|
310
|
+
writer.write(record)
|
|
311
|
+
|
|
312
|
+
out, err = capsys.readouterr()
|
|
313
|
+
expected = "<test/a name='Ré\\udceamy'>\n"
|
|
293
314
|
assert out == expected
|
|
294
315
|
|
|
295
316
|
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Backport of `inspect.signature` for Python 2.
|
|
3
|
-
|
|
4
|
-
Based on: https://github.com/python/cpython/blob/3.7/Lib/inspect.py
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import collections
|
|
8
|
-
import inspect
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class _empty:
|
|
12
|
-
pass
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class Parameter:
|
|
16
|
-
POSITIONAL_ONLY = 0
|
|
17
|
-
POSITIONAL_OR_KEYWORD = 1
|
|
18
|
-
VAR_POSITIONAL = 2
|
|
19
|
-
KEYWORD_ONLY = 3
|
|
20
|
-
VAR_KEYWORD = 4
|
|
21
|
-
|
|
22
|
-
empty = _empty
|
|
23
|
-
|
|
24
|
-
def __init__(self, name, kind, default=_empty):
|
|
25
|
-
self.name = name
|
|
26
|
-
self.kind = kind
|
|
27
|
-
self.default = default
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class Signature:
|
|
31
|
-
empty = _empty
|
|
32
|
-
|
|
33
|
-
def __init__(self, parameters=None):
|
|
34
|
-
self.parameters = parameters
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def signature(obj):
|
|
38
|
-
try:
|
|
39
|
-
# Python 3
|
|
40
|
-
return inspect.signature(obj)
|
|
41
|
-
except AttributeError:
|
|
42
|
-
# Python 2
|
|
43
|
-
spec = inspect.getargspec(obj)
|
|
44
|
-
|
|
45
|
-
# Create parameter objects which are compatible with python 3 objects
|
|
46
|
-
parameters = collections.OrderedDict()
|
|
47
|
-
for i in range(0, len(spec.args)):
|
|
48
|
-
arg = spec.args[i]
|
|
49
|
-
default = _empty
|
|
50
|
-
if spec.defaults and (len(spec.args) - i <= len(spec.defaults)):
|
|
51
|
-
default = spec.defaults[i - len(spec.args)]
|
|
52
|
-
parameters[arg] = Parameter(name=arg, default=default, kind=Parameter.POSITIONAL_OR_KEYWORD)
|
|
53
|
-
if spec.varargs:
|
|
54
|
-
parameters[spec.varargs] = Parameter(name=spec.varargs, kind=Parameter.VAR_POSITIONAL)
|
|
55
|
-
if spec.keywords:
|
|
56
|
-
parameters[spec.keywords] = Parameter(name=spec.keywords, kind=Parameter.VAR_KEYWORD)
|
|
57
|
-
|
|
58
|
-
return Signature(parameters=parameters)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|