flow.record 3.17.dev3__tar.gz → 3.17.dev5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flow_record-3.17.dev3/flow.record.egg-info → flow_record-3.17.dev5}/PKG-INFO +1 -1
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/elastic.py +1 -1
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/line.py +1 -1
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/sqlite.py +1 -1
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/text.py +1 -1
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/xlsx.py +2 -2
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/base.py +48 -37
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/__init__.py +2 -27
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/ipv4.py +0 -7
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/jsonpacker.py +1 -5
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/utils.py +18 -22
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/version.py +2 -2
- {flow_record-3.17.dev3 → flow_record-3.17.dev5/flow.record.egg-info}/PKG-INFO +1 -1
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow.record.egg-info/SOURCES.txt +2 -1
- flow_record-3.17.dev5/tests/test_adapter_line.py +29 -0
- flow_record-3.17.dev5/tests/test_adapter_text.py +28 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_fieldtypes.py +2 -9
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_json_packer.py +20 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_record.py +25 -4
- flow_record-3.17.dev3/tests/utils_inspect.py +0 -58
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/COPYRIGHT +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/LICENSE +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/MANIFEST.in +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/README.md +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/examples/filesystem.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/examples/passivedns.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/examples/records.json +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/examples/tcpconn.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/__init__.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/__init__.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/archive.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/avro.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/broker.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/csvfile.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/duckdb.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/jsonfile.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/mongo.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/split.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/splunk.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/stream.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/exceptions.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/credential.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/__init__.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/ip.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/tcp.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/udp.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/packer.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/selector.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/stream.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/tools/__init__.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/tools/geoip.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/tools/rdump.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/whitelist.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow.record.egg-info/entry_points.txt +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow.record.egg-info/requires.txt +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow.record.egg-info/top_level.txt +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/pyproject.toml +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/setup.cfg +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/__init__.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/_utils.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/docs/Makefile +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/docs/conf.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/docs/index.rst +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/selector_explain_example.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/standalone_test.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_avro.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_avro_adapter.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_compiled_selector.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_csv_adapter.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_deprecations.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_elastic_adapter.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_fieldtype_ip.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_json_record_adapter.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_multi_timestamp.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_packer.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_rdump.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_record_adapter.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_record_descriptor.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_regression.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_selector.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_splunk_adapter.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_sqlite_duckdb_adapter.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_xlsx_adapter.py +0 -0
- {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.17.
|
|
3
|
+
Version: 3.17.dev5
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -106,7 +106,7 @@ class ElasticWriter(AbstractWriter):
|
|
|
106
106
|
}
|
|
107
107
|
|
|
108
108
|
if self.hash_record:
|
|
109
|
-
document["_id"] = hashlib.md5(document["_source"].encode()).hexdigest()
|
|
109
|
+
document["_id"] = hashlib.md5(document["_source"].encode(errors="surrogateescape")).hexdigest()
|
|
110
110
|
|
|
111
111
|
return document
|
|
112
112
|
|
|
@@ -69,7 +69,7 @@ class LineWriter(AbstractWriter):
|
|
|
69
69
|
for key, value in rdict.items():
|
|
70
70
|
if rdict_types:
|
|
71
71
|
key = f"{key} ({rdict_types[key]})"
|
|
72
|
-
self.fp.write(fmt.format(key, value).encode())
|
|
72
|
+
self.fp.write(fmt.format(key, value).encode(errors="surrogateescape"))
|
|
73
73
|
|
|
74
74
|
def flush(self) -> None:
|
|
75
75
|
if self.fp:
|
|
@@ -187,7 +187,7 @@ class SqliteReader(AbstractReader):
|
|
|
187
187
|
if value == 0:
|
|
188
188
|
row[idx] = None
|
|
189
189
|
elif isinstance(value, str):
|
|
190
|
-
row[idx] = value.encode("
|
|
190
|
+
row[idx] = value.encode(errors="surrogateescape")
|
|
191
191
|
yield descriptor_cls.init_from_dict(dict(zip(fnames, row)))
|
|
192
192
|
|
|
193
193
|
def __iter__(self) -> Iterator[Record]:
|
|
@@ -41,7 +41,7 @@ class TextWriter(AbstractWriter):
|
|
|
41
41
|
buf = self.format_spec.format_map(DefaultMissing(rec._asdict()))
|
|
42
42
|
else:
|
|
43
43
|
buf = repr(rec)
|
|
44
|
-
self.fp.write(buf.encode() + b"\n")
|
|
44
|
+
self.fp.write(buf.encode(errors="surrogateescape") + b"\n")
|
|
45
45
|
|
|
46
46
|
# because stdout is usually line buffered we force flush here if wanted
|
|
47
47
|
if self.auto_flush:
|
|
@@ -36,7 +36,7 @@ def sanitize_fieldvalues(values: Iterator[Any]) -> Iterator[Any]:
|
|
|
36
36
|
elif isinstance(value, bytes):
|
|
37
37
|
base64_encode = False
|
|
38
38
|
try:
|
|
39
|
-
new_value = 'b"' + value.decode() + '"'
|
|
39
|
+
new_value = 'b"' + value.decode(errors="surrogateescape") + '"'
|
|
40
40
|
if ILLEGAL_CHARACTERS_RE.search(new_value):
|
|
41
41
|
base64_encode = True
|
|
42
42
|
else:
|
|
@@ -142,7 +142,7 @@ class XlsxReader(AbstractReader):
|
|
|
142
142
|
if field_types[idx] == "bytes":
|
|
143
143
|
if value[1] == '"': # If so, we know this is b""
|
|
144
144
|
# Cut of the b" at the start and the trailing "
|
|
145
|
-
value = value[2:-1].encode()
|
|
145
|
+
value = value[2:-1].encode(errors="surrogateescape")
|
|
146
146
|
else:
|
|
147
147
|
# If not, we know it is base64 encoded (so we cut of the starting 'base64:')
|
|
148
148
|
value = b64decode(value[7:])
|
|
@@ -61,7 +61,7 @@ except ImportError:
|
|
|
61
61
|
|
|
62
62
|
from collections import OrderedDict
|
|
63
63
|
|
|
64
|
-
from .utils import
|
|
64
|
+
from .utils import to_str
|
|
65
65
|
from .whitelist import WHITELIST, WHITELIST_TREE
|
|
66
66
|
|
|
67
67
|
log = logging.getLogger(__package__)
|
|
@@ -513,7 +513,7 @@ class RecordDescriptor:
|
|
|
513
513
|
name, fields = parse_def(name)
|
|
514
514
|
|
|
515
515
|
self.name = name
|
|
516
|
-
self._field_tuples = tuple([(
|
|
516
|
+
self._field_tuples = tuple([(to_str(k), to_str(v)) for k, v in fields])
|
|
517
517
|
self.recordType = _generate_record_class(name, self._field_tuples)
|
|
518
518
|
self.recordType._desc = self
|
|
519
519
|
|
|
@@ -523,12 +523,14 @@ class RecordDescriptor:
|
|
|
523
523
|
"""
|
|
524
524
|
Get required fields mapping. eg:
|
|
525
525
|
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
526
|
+
.. code-block:: text
|
|
527
|
+
|
|
528
|
+
{
|
|
529
|
+
"_source": RecordField("_source", "string"),
|
|
530
|
+
"_classification": RecordField("_classification", "datetime"),
|
|
531
|
+
"_generated": RecordField("_generated", "datetime"),
|
|
532
|
+
"_version": RecordField("_version", "vaeint"),
|
|
533
|
+
}
|
|
532
534
|
|
|
533
535
|
Returns:
|
|
534
536
|
Mapping of required fields
|
|
@@ -540,10 +542,12 @@ class RecordDescriptor:
|
|
|
540
542
|
"""
|
|
541
543
|
Get fields mapping (without required fields). eg:
|
|
542
544
|
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
545
|
+
.. code-block:: text
|
|
546
|
+
|
|
547
|
+
{
|
|
548
|
+
"foo": RecordField("foo", "string"),
|
|
549
|
+
"bar": RecordField("bar", "varint"),
|
|
550
|
+
}
|
|
547
551
|
|
|
548
552
|
Returns:
|
|
549
553
|
Mapping of Record fields
|
|
@@ -556,15 +560,17 @@ class RecordDescriptor:
|
|
|
556
560
|
"""
|
|
557
561
|
Get all fields including required meta fields. eg:
|
|
558
562
|
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
563
|
+
.. code-block:: text
|
|
564
|
+
|
|
565
|
+
{
|
|
566
|
+
"ts": RecordField("ts", "datetime"),
|
|
567
|
+
"foo": RecordField("foo", "string"),
|
|
568
|
+
"bar": RecordField("bar", "varint"),
|
|
569
|
+
"_source": RecordField("_source", "string"),
|
|
570
|
+
"_classification": RecordField("_classification", "datetime"),
|
|
571
|
+
"_generated": RecordField("_generated", "datetime"),
|
|
572
|
+
"_version": RecordField("_version", "varint"),
|
|
573
|
+
}
|
|
568
574
|
|
|
569
575
|
Returns:
|
|
570
576
|
Mapping of all Record fields
|
|
@@ -591,18 +597,18 @@ class RecordDescriptor:
|
|
|
591
597
|
return RecordFieldSet(field for field in self.fields.values() if field.typename == name)
|
|
592
598
|
|
|
593
599
|
def __call__(self, *args, **kwargs) -> Record:
|
|
594
|
-
"""Create a new Record initialized with
|
|
600
|
+
"""Create a new Record initialized with ``args`` and ``kwargs``."""
|
|
595
601
|
return self.recordType(*args, **kwargs)
|
|
596
602
|
|
|
597
603
|
def init_from_dict(self, rdict: dict[str, Any], raise_unknown=False) -> Record:
|
|
598
|
-
"""Create a new Record initialized with key, value pairs from
|
|
604
|
+
"""Create a new Record initialized with key, value pairs from ``rdict``.
|
|
599
605
|
|
|
600
|
-
If
|
|
606
|
+
If ``raise_unknown=True`` then fields on ``rdict`` that are unknown to this
|
|
601
607
|
RecordDescriptor will raise a TypeError exception due to initializing
|
|
602
608
|
with unknown keyword arguments. (default: False)
|
|
603
609
|
|
|
604
610
|
Returns:
|
|
605
|
-
Record with data from
|
|
611
|
+
Record with data from ``rdict``
|
|
606
612
|
"""
|
|
607
613
|
|
|
608
614
|
if not raise_unknown:
|
|
@@ -610,14 +616,14 @@ class RecordDescriptor:
|
|
|
610
616
|
return self.recordType(**rdict)
|
|
611
617
|
|
|
612
618
|
def init_from_record(self, record: Record, raise_unknown=False) -> Record:
|
|
613
|
-
"""Create a new Record initialized with data from another
|
|
619
|
+
"""Create a new Record initialized with data from another ``record``.
|
|
614
620
|
|
|
615
|
-
If
|
|
621
|
+
If ``raise_unknown=True`` then fields on ``record`` that are unknown to this
|
|
616
622
|
RecordDescriptor will raise a TypeError exception due to initializing
|
|
617
623
|
with unknown keyword arguments. (default: False)
|
|
618
624
|
|
|
619
625
|
Returns:
|
|
620
|
-
Record with data from
|
|
626
|
+
Record with data from ``record``
|
|
621
627
|
"""
|
|
622
628
|
return self.init_from_dict(record._asdict(), raise_unknown=raise_unknown)
|
|
623
629
|
|
|
@@ -633,7 +639,9 @@ class RecordDescriptor:
|
|
|
633
639
|
def get_field_tuples(self) -> tuple[tuple[str, str]]:
|
|
634
640
|
"""Returns a tuple containing the (typename, name) tuples, eg:
|
|
635
641
|
|
|
636
|
-
|
|
642
|
+
.. code-block:: text
|
|
643
|
+
|
|
644
|
+
(('boolean', 'foo'), ('string', 'bar'))
|
|
637
645
|
|
|
638
646
|
Returns:
|
|
639
647
|
Tuple of (typename, name) tuples
|
|
@@ -676,7 +684,7 @@ class RecordDescriptor:
|
|
|
676
684
|
def definition(self, reserved: bool = True) -> str:
|
|
677
685
|
"""Return the RecordDescriptor as Python definition string.
|
|
678
686
|
|
|
679
|
-
If
|
|
687
|
+
If ``reserved`` is True it will also return the reserved fields.
|
|
680
688
|
|
|
681
689
|
Returns:
|
|
682
690
|
Descriptor definition string
|
|
@@ -769,7 +777,7 @@ def open_path(path: str, mode: str, clobber: bool = True) -> IO:
|
|
|
769
777
|
Args:
|
|
770
778
|
path: Filename or path to filename to open
|
|
771
779
|
mode: Could be "r", "rb" to open file for reading, "w", "wb" for writing
|
|
772
|
-
clobber: Overwrite file if it already exists if
|
|
780
|
+
clobber: Overwrite file if it already exists if ``clobber=True``, else raises IOError.
|
|
773
781
|
|
|
774
782
|
"""
|
|
775
783
|
binary = "b" in mode
|
|
@@ -1040,6 +1048,8 @@ def normalize_fieldname(field_name: str) -> str:
|
|
|
1040
1048
|
This normalizes the name so it can still be used in flow.record.
|
|
1041
1049
|
Reserved field_names are not normalized.
|
|
1042
1050
|
|
|
1051
|
+
.. code-block:: text
|
|
1052
|
+
|
|
1043
1053
|
>>> normalize_fieldname("my-variable-name-with-dashes")
|
|
1044
1054
|
'my_variable_name_with_dashes'
|
|
1045
1055
|
>>> normalize_fieldname("_my_name_starting_with_underscore")
|
|
@@ -1100,25 +1110,26 @@ TimestampRecord = RecordDescriptor(
|
|
|
1100
1110
|
|
|
1101
1111
|
|
|
1102
1112
|
def iter_timestamped_records(record: Record) -> Iterator[Record]:
|
|
1103
|
-
"""Yields timestamped annotated records for each
|
|
1104
|
-
If
|
|
1113
|
+
"""Yields timestamped annotated records for each ``datetime`` fieldtype in ``record``.
|
|
1114
|
+
If ``record`` does not have any ``datetime`` fields the original record is returned.
|
|
1105
1115
|
|
|
1106
1116
|
Args:
|
|
1107
1117
|
record: Record to add timestamp fields for.
|
|
1108
1118
|
|
|
1109
1119
|
Yields:
|
|
1110
|
-
Record annotated with
|
|
1120
|
+
Record annotated with ``ts`` and ``ts_description`` fields for each ``datetime`` fieldtype.
|
|
1111
1121
|
"""
|
|
1112
|
-
|
|
1122
|
+
|
|
1123
|
+
# get all ``datetime`` fields. (excluding _generated).
|
|
1113
1124
|
dt_fields = record._desc.getfields("datetime")
|
|
1114
1125
|
if not dt_fields:
|
|
1115
1126
|
yield record
|
|
1116
1127
|
return
|
|
1117
1128
|
|
|
1118
|
-
# yield a new record for each
|
|
1129
|
+
# yield a new record for each ``datetime`` field assigned as ``ts``.
|
|
1119
1130
|
record_name = record._desc.name
|
|
1120
1131
|
for field in dt_fields:
|
|
1121
1132
|
ts_record = TimestampRecord(getattr(record, field.name), field.name)
|
|
1122
|
-
# we extend
|
|
1133
|
+
# we extend ``ts_record`` with original ``record`` so TSRecord info goes first.
|
|
1123
1134
|
record = extend_record(ts_record, [record], name=record_name)
|
|
1124
1135
|
yield record
|
|
@@ -28,7 +28,6 @@ except ImportError:
|
|
|
28
28
|
from flow.record.base import FieldType
|
|
29
29
|
|
|
30
30
|
RE_NORMALIZE_PATH = re.compile(r"[\\/]+")
|
|
31
|
-
NATIVE_UNICODE = isinstance("", str)
|
|
32
31
|
|
|
33
32
|
UTC = timezone.utc
|
|
34
33
|
|
|
@@ -207,10 +206,7 @@ class stringlist(list, FieldType):
|
|
|
207
206
|
class string(string_type, FieldType):
|
|
208
207
|
def __new__(cls, value):
|
|
209
208
|
if isinstance(value, bytes_type):
|
|
210
|
-
value =
|
|
211
|
-
if isinstance(value, bytes_type):
|
|
212
|
-
# Still bytes, so decoding failed (Python 2)
|
|
213
|
-
return bytes(value)
|
|
209
|
+
value = value.decode(errors="surrogateescape")
|
|
214
210
|
return super().__new__(cls, value)
|
|
215
211
|
|
|
216
212
|
def _pack(self):
|
|
@@ -221,27 +217,6 @@ class string(string_type, FieldType):
|
|
|
221
217
|
return defang(self)
|
|
222
218
|
return str.__format__(self, spec)
|
|
223
219
|
|
|
224
|
-
@classmethod
|
|
225
|
-
def _decode(cls, data, encoding):
|
|
226
|
-
"""Decode a byte-string into a unicode-string.
|
|
227
|
-
|
|
228
|
-
Python 3: When `data` contains invalid unicode characters a `UnicodeDecodeError` is raised.
|
|
229
|
-
Python 2: When `data` contains invalid unicode characters the original byte-string is returned.
|
|
230
|
-
"""
|
|
231
|
-
if NATIVE_UNICODE:
|
|
232
|
-
# Raises exception on decode error
|
|
233
|
-
return data.decode(encoding)
|
|
234
|
-
try:
|
|
235
|
-
return data.decode(encoding)
|
|
236
|
-
except UnicodeDecodeError:
|
|
237
|
-
# Fallback to bytes (Python 2 only)
|
|
238
|
-
preview = data[:16].encode("hex_codec") + (".." if len(data) > 16 else "")
|
|
239
|
-
warnings.warn(
|
|
240
|
-
"Got binary data in string field (hex: {}). Compatibility is not guaranteed.".format(preview),
|
|
241
|
-
RuntimeWarning,
|
|
242
|
-
)
|
|
243
|
-
return data
|
|
244
|
-
|
|
245
220
|
|
|
246
221
|
# Alias for backwards compatibility
|
|
247
222
|
wstring = string
|
|
@@ -278,7 +253,7 @@ class datetime(_dt, FieldType):
|
|
|
278
253
|
if len(args) == 1 and not kwargs:
|
|
279
254
|
arg = args[0]
|
|
280
255
|
if isinstance(arg, bytes_type):
|
|
281
|
-
arg = arg.decode("
|
|
256
|
+
arg = arg.decode(errors="surrogateescape")
|
|
282
257
|
if isinstance(arg, string_type):
|
|
283
258
|
# If we are on Python 3.11 or newer, we can use fromisoformat() to parse the string (fast path)
|
|
284
259
|
#
|
|
@@ -3,7 +3,6 @@ import struct
|
|
|
3
3
|
import warnings
|
|
4
4
|
|
|
5
5
|
from flow.record import FieldType
|
|
6
|
-
from flow.record.utils import to_native_str
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
def addr_long(s):
|
|
@@ -45,9 +44,6 @@ class subnet(FieldType):
|
|
|
45
44
|
DeprecationWarning,
|
|
46
45
|
stacklevel=5,
|
|
47
46
|
)
|
|
48
|
-
if isinstance(addr, type("")):
|
|
49
|
-
addr = to_native_str(addr)
|
|
50
|
-
|
|
51
47
|
if not isinstance(addr, str):
|
|
52
48
|
raise TypeError("Subnet() argument 1 must be string, not {}".format(type(addr).__name__))
|
|
53
49
|
|
|
@@ -67,9 +63,6 @@ class subnet(FieldType):
|
|
|
67
63
|
if addr is None:
|
|
68
64
|
return False
|
|
69
65
|
|
|
70
|
-
if isinstance(addr, type("")):
|
|
71
|
-
addr = to_native_str(addr)
|
|
72
|
-
|
|
73
66
|
if isinstance(addr, str):
|
|
74
67
|
addr = addr_long(addr)
|
|
75
68
|
|
|
@@ -47,12 +47,8 @@ class JsonRecordPacker:
|
|
|
47
47
|
serial["_recorddescriptor"] = obj._desc.identifier
|
|
48
48
|
|
|
49
49
|
for field_type, field_name in obj._desc.get_field_tuples():
|
|
50
|
-
# PYTHON2: Because "bytes" are also "str" we have to handle this here
|
|
51
|
-
if field_type == "bytes" and isinstance(serial[field_name], str):
|
|
52
|
-
serial[field_name] = base64.b64encode(serial[field_name]).decode()
|
|
53
|
-
|
|
54
50
|
# Boolean field types should be cast to a bool instead of staying ints
|
|
55
|
-
|
|
51
|
+
if field_type == "boolean" and isinstance(serial[field_name], int):
|
|
56
52
|
serial[field_name] = bool(serial[field_name])
|
|
57
53
|
|
|
58
54
|
return serial
|
|
@@ -3,13 +3,10 @@ from __future__ import annotations
|
|
|
3
3
|
import base64
|
|
4
4
|
import os
|
|
5
5
|
import sys
|
|
6
|
+
import warnings
|
|
6
7
|
from functools import wraps
|
|
7
8
|
from typing import BinaryIO, TextIO
|
|
8
9
|
|
|
9
|
-
_native = str
|
|
10
|
-
_unicode = type("")
|
|
11
|
-
_bytes = type(b"")
|
|
12
|
-
|
|
13
10
|
|
|
14
11
|
def get_stdout(binary: bool = False) -> TextIO | BinaryIO:
|
|
15
12
|
"""Return the stdout stream as binary or text stream.
|
|
@@ -50,33 +47,32 @@ def is_stdout(fp: TextIO | BinaryIO) -> bool:
|
|
|
50
47
|
|
|
51
48
|
def to_bytes(value):
|
|
52
49
|
"""Convert a value to a byte string."""
|
|
53
|
-
if value is None or isinstance(value,
|
|
50
|
+
if value is None or isinstance(value, bytes):
|
|
54
51
|
return value
|
|
55
|
-
if isinstance(value,
|
|
56
|
-
return value.encode("
|
|
57
|
-
return
|
|
52
|
+
if isinstance(value, str):
|
|
53
|
+
return value.encode(errors="surrogateescape")
|
|
54
|
+
return bytes(value)
|
|
58
55
|
|
|
59
56
|
|
|
60
57
|
def to_str(value):
|
|
61
58
|
"""Convert a value to a unicode string."""
|
|
62
|
-
if value is None or isinstance(value,
|
|
59
|
+
if value is None or isinstance(value, str):
|
|
63
60
|
return value
|
|
64
|
-
if isinstance(value,
|
|
65
|
-
return value.decode("
|
|
66
|
-
return
|
|
61
|
+
if isinstance(value, bytes):
|
|
62
|
+
return value.decode(errors="surrogateescape")
|
|
63
|
+
return str(value)
|
|
67
64
|
|
|
68
65
|
|
|
69
66
|
def to_native_str(value):
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
return _native(value)
|
|
67
|
+
warnings.warn(
|
|
68
|
+
(
|
|
69
|
+
"The to_native_str() function is deprecated, "
|
|
70
|
+
"this function will be removed in flow.record 3.20, "
|
|
71
|
+
"use to_str() instead"
|
|
72
|
+
),
|
|
73
|
+
DeprecationWarning,
|
|
74
|
+
)
|
|
75
|
+
return to_str(value)
|
|
80
76
|
|
|
81
77
|
|
|
82
78
|
def to_base64(value):
|
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '3.17.
|
|
16
|
-
__version_tuple__ = version_tuple = (3, 17, '
|
|
15
|
+
__version__ = version = '3.17.dev5'
|
|
16
|
+
__version_tuple__ = version_tuple = (3, 17, 'dev5')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.17.
|
|
3
|
+
Version: 3.17.dev5
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -54,6 +54,8 @@ tests/__init__.py
|
|
|
54
54
|
tests/_utils.py
|
|
55
55
|
tests/selector_explain_example.py
|
|
56
56
|
tests/standalone_test.py
|
|
57
|
+
tests/test_adapter_line.py
|
|
58
|
+
tests/test_adapter_text.py
|
|
57
59
|
tests/test_avro.py
|
|
58
60
|
tests/test_avro_adapter.py
|
|
59
61
|
tests/test_compiled_selector.py
|
|
@@ -75,7 +77,6 @@ tests/test_selector.py
|
|
|
75
77
|
tests/test_splunk_adapter.py
|
|
76
78
|
tests/test_sqlite_duckdb_adapter.py
|
|
77
79
|
tests/test_xlsx_adapter.py
|
|
78
|
-
tests/utils_inspect.py
|
|
79
80
|
tests/docs/Makefile
|
|
80
81
|
tests/docs/conf.py
|
|
81
82
|
tests/docs/index.rst
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
|
|
3
|
+
from flow.record import RecordDescriptor
|
|
4
|
+
from flow.record.adapter.line import LineWriter
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_line_writer_write_surrogateescape():
|
|
8
|
+
output = BytesIO()
|
|
9
|
+
|
|
10
|
+
lw = LineWriter(
|
|
11
|
+
path=output,
|
|
12
|
+
fields="name",
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
TestRecord = RecordDescriptor(
|
|
16
|
+
"test/string",
|
|
17
|
+
[
|
|
18
|
+
("string", "name"),
|
|
19
|
+
],
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# construct from 'bytes' but with invalid unicode bytes
|
|
23
|
+
record = TestRecord(b"R\xc3\xa9\xeamy")
|
|
24
|
+
lw.write(record)
|
|
25
|
+
|
|
26
|
+
output.seek(0)
|
|
27
|
+
data = output.read()
|
|
28
|
+
|
|
29
|
+
assert data == b"--[ RECORD 1 ]--\nname = R\xc3\xa9\xeamy\n"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
|
|
3
|
+
from flow.record import RecordDescriptor
|
|
4
|
+
from flow.record.adapter.text import TextWriter
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_text_writer_write_surrogateescape():
|
|
8
|
+
output = BytesIO()
|
|
9
|
+
|
|
10
|
+
tw = TextWriter(
|
|
11
|
+
path=output,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
TestRecord = RecordDescriptor(
|
|
15
|
+
"test/string",
|
|
16
|
+
[
|
|
17
|
+
("string", "name"),
|
|
18
|
+
],
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# construct from 'bytes' but with invalid unicode bytes
|
|
22
|
+
record = TestRecord(b"R\xc3\xa9\xeamy")
|
|
23
|
+
tw.write(record)
|
|
24
|
+
|
|
25
|
+
output.seek(0)
|
|
26
|
+
data = output.read()
|
|
27
|
+
|
|
28
|
+
assert data == b"<test/string name='R\xc3\xa9\\udceamy'>\n"
|
|
@@ -213,15 +213,8 @@ def test_string():
|
|
|
213
213
|
assert r.name == "Rémy"
|
|
214
214
|
|
|
215
215
|
# construct from 'bytes' but with invalid unicode bytes
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
with pytest.raises(UnicodeDecodeError):
|
|
219
|
-
TestRecord(b"R\xc3\xa9\xeamy")
|
|
220
|
-
else:
|
|
221
|
-
# Python 2
|
|
222
|
-
with pytest.warns(RuntimeWarning):
|
|
223
|
-
r = TestRecord(b"R\xc3\xa9\xeamy")
|
|
224
|
-
assert r.name
|
|
216
|
+
r = TestRecord(b"R\xc3\xa9\xeamy")
|
|
217
|
+
assert r.name == "Ré\udceamy"
|
|
225
218
|
|
|
226
219
|
|
|
227
220
|
def test_wstring():
|
|
@@ -90,3 +90,23 @@ def test_record_pack_bool_regression() -> None:
|
|
|
90
90
|
|
|
91
91
|
# pack the json string back to a record and make sure it is the same as before
|
|
92
92
|
assert packer.unpack(data) == record
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_record_pack_surrogateescape() -> None:
|
|
96
|
+
TestRecord = RecordDescriptor(
|
|
97
|
+
"test/string",
|
|
98
|
+
[
|
|
99
|
+
("string", "name"),
|
|
100
|
+
],
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
record = TestRecord(b"R\xc3\xa9\xeamy")
|
|
104
|
+
packer = JsonRecordPacker()
|
|
105
|
+
|
|
106
|
+
data = packer.pack(record)
|
|
107
|
+
|
|
108
|
+
# pack to json string and check if the 3rd and 4th byte are properly surrogate escaped
|
|
109
|
+
assert data.startswith('{"name": "R\\u00e9\\udceamy",')
|
|
110
|
+
|
|
111
|
+
# pack the json string back to a record and make sure it is the same as before
|
|
112
|
+
assert packer.unpack(data) == record
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import importlib
|
|
2
|
+
import inspect
|
|
2
3
|
import os
|
|
3
4
|
import sys
|
|
4
5
|
from unittest.mock import patch
|
|
@@ -27,8 +28,6 @@ from flow.record.base import (
|
|
|
27
28
|
from flow.record.exceptions import RecordDescriptorError
|
|
28
29
|
from flow.record.stream import RecordFieldRewriter
|
|
29
30
|
|
|
30
|
-
from . import utils_inspect as inspect
|
|
31
|
-
|
|
32
31
|
|
|
33
32
|
def test_record_creation():
|
|
34
33
|
TestRecord = RecordDescriptor(
|
|
@@ -288,8 +287,30 @@ def test_record_printer_stdout(capsys):
|
|
|
288
287
|
writer.write(record)
|
|
289
288
|
|
|
290
289
|
out, err = capsys.readouterr()
|
|
291
|
-
|
|
292
|
-
|
|
290
|
+
expected = "<test/a a_string='hello' common='world' a_count=10>\n"
|
|
291
|
+
assert out == expected
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def test_record_printer_stdout_surrogateescape(capsys):
|
|
295
|
+
Record = RecordDescriptor(
|
|
296
|
+
"test/a",
|
|
297
|
+
[
|
|
298
|
+
("string", "name"),
|
|
299
|
+
],
|
|
300
|
+
)
|
|
301
|
+
record = Record(b"R\xc3\xa9\xeamy")
|
|
302
|
+
|
|
303
|
+
# fake capsys to be a tty.
|
|
304
|
+
def isatty():
|
|
305
|
+
return True
|
|
306
|
+
|
|
307
|
+
capsys._capture.out.tmpfile.isatty = isatty
|
|
308
|
+
|
|
309
|
+
writer = RecordPrinter(getattr(sys.stdout, "buffer", sys.stdout))
|
|
310
|
+
writer.write(record)
|
|
311
|
+
|
|
312
|
+
out, err = capsys.readouterr()
|
|
313
|
+
expected = "<test/a name='Ré\\udceamy'>\n"
|
|
293
314
|
assert out == expected
|
|
294
315
|
|
|
295
316
|
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Backport of `inspect.signature` for Python 2.
|
|
3
|
-
|
|
4
|
-
Based on: https://github.com/python/cpython/blob/3.7/Lib/inspect.py
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import collections
|
|
8
|
-
import inspect
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class _empty:
|
|
12
|
-
pass
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class Parameter:
|
|
16
|
-
POSITIONAL_ONLY = 0
|
|
17
|
-
POSITIONAL_OR_KEYWORD = 1
|
|
18
|
-
VAR_POSITIONAL = 2
|
|
19
|
-
KEYWORD_ONLY = 3
|
|
20
|
-
VAR_KEYWORD = 4
|
|
21
|
-
|
|
22
|
-
empty = _empty
|
|
23
|
-
|
|
24
|
-
def __init__(self, name, kind, default=_empty):
|
|
25
|
-
self.name = name
|
|
26
|
-
self.kind = kind
|
|
27
|
-
self.default = default
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class Signature:
|
|
31
|
-
empty = _empty
|
|
32
|
-
|
|
33
|
-
def __init__(self, parameters=None):
|
|
34
|
-
self.parameters = parameters
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def signature(obj):
|
|
38
|
-
try:
|
|
39
|
-
# Python 3
|
|
40
|
-
return inspect.signature(obj)
|
|
41
|
-
except AttributeError:
|
|
42
|
-
# Python 2
|
|
43
|
-
spec = inspect.getargspec(obj)
|
|
44
|
-
|
|
45
|
-
# Create parameter objects which are compatible with python 3 objects
|
|
46
|
-
parameters = collections.OrderedDict()
|
|
47
|
-
for i in range(0, len(spec.args)):
|
|
48
|
-
arg = spec.args[i]
|
|
49
|
-
default = _empty
|
|
50
|
-
if spec.defaults and (len(spec.args) - i <= len(spec.defaults)):
|
|
51
|
-
default = spec.defaults[i - len(spec.args)]
|
|
52
|
-
parameters[arg] = Parameter(name=arg, default=default, kind=Parameter.POSITIONAL_OR_KEYWORD)
|
|
53
|
-
if spec.varargs:
|
|
54
|
-
parameters[spec.varargs] = Parameter(name=spec.varargs, kind=Parameter.VAR_POSITIONAL)
|
|
55
|
-
if spec.keywords:
|
|
56
|
-
parameters[spec.keywords] = Parameter(name=spec.keywords, kind=Parameter.VAR_KEYWORD)
|
|
57
|
-
|
|
58
|
-
return Signature(parameters=parameters)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|