flow.record 3.14.dev2__tar.gz → 3.14.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flow.record-3.14.dev2/flow.record.egg-info → flow.record-3.14.dev4}/PKG-INFO +1 -1
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/csvfile.py +20 -10
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/sqlite.py +4 -24
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/base.py +30 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/stream.py +5 -1
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/version.py +2 -2
- {flow.record-3.14.dev2 → flow.record-3.14.dev4/flow.record.egg-info}/PKG-INFO +1 -1
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow.record.egg-info/SOURCES.txt +1 -0
- flow.record-3.14.dev4/tests/test_csv_adapter.py +75 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_record.py +12 -1
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_sqlite_adapter.py +3 -2
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/COPYRIGHT +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/LICENSE +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/MANIFEST.in +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/README.md +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/examples/filesystem.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/examples/passivedns.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/examples/records.json +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/examples/tcpconn.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/__init__.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/__init__.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/archive.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/avro.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/broker.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/elastic.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/jsonfile.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/line.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/mongo.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/split.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/splunk.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/stream.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/text.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/adapter/xlsx.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/exceptions.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/fieldtypes/__init__.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/fieldtypes/credential.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/fieldtypes/net/__init__.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/fieldtypes/net/ip.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/fieldtypes/net/ipv4.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/fieldtypes/net/tcp.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/fieldtypes/net/udp.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/jsonpacker.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/packer.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/selector.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/tools/__init__.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/tools/geoip.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/tools/rdump.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/utils.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow/record/whitelist.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow.record.egg-info/entry_points.txt +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow.record.egg-info/requires.txt +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/flow.record.egg-info/top_level.txt +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/pyproject.toml +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/setup.cfg +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/__init__.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/_utils.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/docs/Makefile +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/docs/conf.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/docs/index.rst +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/selector_explain_example.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/standalone_test.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_avro.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_avro_adapter.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_compiled_selector.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_deprecations.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_fieldtype_ip.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_fieldtypes.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_json_packer.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_json_record_adapter.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_multi_timestamp.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_packer.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_rdump.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_record_adapter.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_record_descriptor.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_regression.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_selector.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/test_splunk_adapter.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tests/utils_inspect.py +0 -0
- {flow.record-3.14.dev2 → flow.record-3.14.dev4}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.14.
|
|
3
|
+
Version: 3.14.dev4
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -5,6 +5,7 @@ import sys
|
|
|
5
5
|
|
|
6
6
|
from flow.record import RecordDescriptor
|
|
7
7
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
8
|
+
from flow.record.base import normalize_fieldname
|
|
8
9
|
from flow.record.selector import make_selector
|
|
9
10
|
from flow.record.utils import is_stdout
|
|
10
11
|
|
|
@@ -14,15 +15,16 @@ Comma-separated values (CSV) adapter
|
|
|
14
15
|
Write usage: rdump -w csvfile://[PATH]?lineterminator=[TERMINATOR]
|
|
15
16
|
Read usage: rdump csvfile://[PATH]?fields=[FIELDS]
|
|
16
17
|
[PATH]: path to file. Leave empty or "-" to output to stdout
|
|
17
|
-
|
|
18
|
-
|
|
18
|
+
|
|
19
|
+
Optional parameters:
|
|
20
|
+
[TERMINATOR]: line terminator, default is \\r\\n
|
|
21
|
+
[FIELDS]: comma-separated list of CSV fields (in case of missing CSV header)
|
|
19
22
|
"""
|
|
20
23
|
|
|
21
24
|
|
|
22
25
|
class CsvfileWriter(AbstractWriter):
|
|
23
|
-
fp = None
|
|
24
|
-
|
|
25
26
|
def __init__(self, path, fields=None, exclude=None, lineterminator=None, **kwargs):
|
|
27
|
+
self.fp = None
|
|
26
28
|
if path in (None, "", "-"):
|
|
27
29
|
self.fp = sys.stdout
|
|
28
30
|
else:
|
|
@@ -58,15 +60,19 @@ class CsvfileWriter(AbstractWriter):
|
|
|
58
60
|
|
|
59
61
|
|
|
60
62
|
class CsvfileReader(AbstractReader):
|
|
61
|
-
fp = None
|
|
62
|
-
|
|
63
63
|
def __init__(self, path, selector=None, fields=None, **kwargs):
|
|
64
|
+
self.fp = None
|
|
64
65
|
self.selector = make_selector(selector)
|
|
65
66
|
if path in (None, "", "-"):
|
|
66
67
|
self.fp = sys.stdin
|
|
67
68
|
else:
|
|
68
69
|
self.fp = open(path, "r", newline="")
|
|
69
|
-
|
|
70
|
+
|
|
71
|
+
self.dialect = "excel"
|
|
72
|
+
if self.fp.seekable():
|
|
73
|
+
self.dialect = csv.Sniffer().sniff(self.fp.read(1024))
|
|
74
|
+
self.fp.seek(0)
|
|
75
|
+
self.reader = csv.reader(self.fp, dialect=self.dialect)
|
|
70
76
|
|
|
71
77
|
if isinstance(fields, str):
|
|
72
78
|
# parse fields from fields argument (comma-separated string)
|
|
@@ -75,8 +81,11 @@ class CsvfileReader(AbstractReader):
|
|
|
75
81
|
# parse fields from first CSV row
|
|
76
82
|
self.fields = next(self.reader)
|
|
77
83
|
|
|
78
|
-
#
|
|
79
|
-
self.
|
|
84
|
+
# clean field names
|
|
85
|
+
self.fields = [normalize_fieldname(col) for col in self.fields]
|
|
86
|
+
|
|
87
|
+
# Create RecordDescriptor from fields, skipping fields starting with "_" (reserved for internal use)
|
|
88
|
+
self.desc = RecordDescriptor("csv/reader", [("string", col) for col in self.fields if not col.startswith("_")])
|
|
80
89
|
|
|
81
90
|
def close(self):
|
|
82
91
|
if self.fp:
|
|
@@ -85,6 +94,7 @@ class CsvfileReader(AbstractReader):
|
|
|
85
94
|
|
|
86
95
|
def __iter__(self):
|
|
87
96
|
for row in self.reader:
|
|
88
|
-
|
|
97
|
+
rdict = dict(zip(self.fields, row))
|
|
98
|
+
record = self.desc.init_from_dict(rdict)
|
|
89
99
|
if not self.selector or self.selector.match(record):
|
|
90
100
|
yield record
|
|
@@ -8,7 +8,7 @@ from typing import Iterator
|
|
|
8
8
|
|
|
9
9
|
from flow.record import Record, RecordDescriptor
|
|
10
10
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
11
|
-
from flow.record.base import RESERVED_FIELDS
|
|
11
|
+
from flow.record.base import RESERVED_FIELDS, normalize_fieldname
|
|
12
12
|
from flow.record.selector import Selector, make_selector
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
@@ -50,27 +50,6 @@ SQLITE_FIELD_MAP = {
|
|
|
50
50
|
}
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
def sanitized_name(name: str) -> str:
|
|
54
|
-
"""Returns a sanitized version of name.
|
|
55
|
-
|
|
56
|
-
Some (field) names are not allowed in flow.record, while they can be allowed in SQLite.
|
|
57
|
-
This sanitizes the name so it can still be used in flow.record.
|
|
58
|
-
|
|
59
|
-
>>> sanitized_name("my-variable-name-with-dashes")
|
|
60
|
-
'my_variable_name_with_dashes'
|
|
61
|
-
>>> sanitized_name("_my_name_starting_with_underscore")
|
|
62
|
-
'n__my_name_starting_with_underscore'
|
|
63
|
-
>>> sanitized_name("1337")
|
|
64
|
-
'n_1337'
|
|
65
|
-
"""
|
|
66
|
-
|
|
67
|
-
if name not in RESERVED_FIELDS:
|
|
68
|
-
name = name.replace("-", "_")
|
|
69
|
-
if name.startswith("_") or name[0].isdecimal():
|
|
70
|
-
name = "n_" + name
|
|
71
|
-
return name
|
|
72
|
-
|
|
73
|
-
|
|
74
53
|
def create_descriptor_table(con: sqlite3.Connection, descriptor: RecordDescriptor) -> None:
|
|
75
54
|
"""Create table for a RecordDescriptor if it doesn't exists yet."""
|
|
76
55
|
table_name = descriptor.name
|
|
@@ -162,7 +141,7 @@ class SqliteReader(AbstractReader):
|
|
|
162
141
|
# flow.record is quite strict with what is allowed in fieldnames or decriptor name.
|
|
163
142
|
# While SQLite is less strict, we need to sanitize the names to make them compatible.
|
|
164
143
|
table_name_org = table_name
|
|
165
|
-
table_name =
|
|
144
|
+
table_name = normalize_fieldname(table_name)
|
|
166
145
|
|
|
167
146
|
schema = self.con.execute(
|
|
168
147
|
"SELECT c.type, c.name FROM pragma_table_info(?) c",
|
|
@@ -174,7 +153,7 @@ class SqliteReader(AbstractReader):
|
|
|
174
153
|
fname_to_type = {}
|
|
175
154
|
for idx, row in enumerate(schema):
|
|
176
155
|
ftype, fname = row
|
|
177
|
-
fname =
|
|
156
|
+
fname = normalize_fieldname(fname)
|
|
178
157
|
ftype = SQLITE_FIELD_MAP.get(ftype, "string")
|
|
179
158
|
fname_to_type[fname] = ftype
|
|
180
159
|
if fname not in RESERVED_FIELDS:
|
|
@@ -182,6 +161,7 @@ class SqliteReader(AbstractReader):
|
|
|
182
161
|
fnames.append(fname)
|
|
183
162
|
|
|
184
163
|
descriptor_cls = RecordDescriptor(table_name, fields)
|
|
164
|
+
table_name_org = table_name_org.replace("`", r"\\\`")
|
|
185
165
|
cursor = self.con.execute(f"SELECT * FROM `{table_name_org}`")
|
|
186
166
|
while True:
|
|
187
167
|
rows = cursor.fetchmany(self.batch_size)
|
|
@@ -971,6 +971,36 @@ def extend_record(
|
|
|
971
971
|
return ExtendedRecord.init_from_dict(collections.ChainMap(*kv_maps))
|
|
972
972
|
|
|
973
973
|
|
|
974
|
+
@functools.lru_cache(maxsize=4096)
|
|
975
|
+
def normalize_fieldname(field_name: str) -> str:
|
|
976
|
+
"""Returns a normalized version of ``field_name``.
|
|
977
|
+
|
|
978
|
+
Some (field) names are not allowed in flow.record, while they can be allowed in other formats.
|
|
979
|
+
This normalizes the name so it can still be used in flow.record.
|
|
980
|
+
Reserved field_names are not normalized.
|
|
981
|
+
|
|
982
|
+
>>> normalize_fieldname("my-variable-name-with-dashes")
|
|
983
|
+
'my_variable_name_with_dashes'
|
|
984
|
+
>>> normalize_fieldname("_my_name_starting_with_underscore")
|
|
985
|
+
'x__my_name_starting_with_underscore'
|
|
986
|
+
>>> normalize_fieldname("1337")
|
|
987
|
+
'x_1337'
|
|
988
|
+
>>> normalize_fieldname("my name with spaces")
|
|
989
|
+
'my_name_with_spaces'
|
|
990
|
+
>>> normalize_fieldname("my name (with) parentheses")
|
|
991
|
+
'my_name__with__parentheses'
|
|
992
|
+
>>> normalize_fieldname("_generated")
|
|
993
|
+
'_generated'
|
|
994
|
+
"""
|
|
995
|
+
|
|
996
|
+
if field_name not in RESERVED_FIELDS:
|
|
997
|
+
field_name = re.sub(r"[- ()]", "_", field_name)
|
|
998
|
+
# prepend `n_` if field_name is empty or starts with underscore or digit
|
|
999
|
+
if len(field_name) == 0 or field_name.startswith("_") or field_name[0].isdecimal():
|
|
1000
|
+
field_name = "x_" + field_name
|
|
1001
|
+
return field_name
|
|
1002
|
+
|
|
1003
|
+
|
|
974
1004
|
class DynamicFieldtypeModule:
|
|
975
1005
|
def __init__(self, path=""):
|
|
976
1006
|
self.path = path
|
|
@@ -3,6 +3,7 @@ from __future__ import print_function
|
|
|
3
3
|
import datetime
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
|
+
import reprlib
|
|
6
7
|
import struct
|
|
7
8
|
import sys
|
|
8
9
|
from collections import ChainMap
|
|
@@ -17,6 +18,9 @@ from .packer import RecordPacker
|
|
|
17
18
|
|
|
18
19
|
log = logging.getLogger(__package__)
|
|
19
20
|
|
|
21
|
+
aRepr = reprlib.Repr()
|
|
22
|
+
aRepr.maxother = 255
|
|
23
|
+
|
|
20
24
|
|
|
21
25
|
def RecordOutput(fp):
|
|
22
26
|
"""Return a RecordPrinter if `fp` is a tty otherwise a RecordStreamWriter."""
|
|
@@ -156,7 +160,7 @@ def record_stream(sources, selector=None):
|
|
|
156
160
|
except KeyboardInterrupt:
|
|
157
161
|
raise
|
|
158
162
|
except Exception as e: # noqa: B902
|
|
159
|
-
log.warning("Exception in
|
|
163
|
+
log.warning("Exception in %r for %r: %s -- skipping to next reader", reader, src, aRepr.repr(e))
|
|
160
164
|
continue
|
|
161
165
|
|
|
162
166
|
|
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '3.14.
|
|
16
|
-
__version_tuple__ = version_tuple = (3, 14, '
|
|
15
|
+
__version__ = version = '3.14.dev4'
|
|
16
|
+
__version_tuple__ = version_tuple = (3, 14, 'dev4')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.14.
|
|
3
|
+
Version: 3.14.dev4
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from flow.record import RecordReader
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.mark.parametrize("delimiter", [",", ";", "\t", "|"])
|
|
10
|
+
def test_csv_sniff(tmp_path: Path, delimiter: str) -> None:
|
|
11
|
+
"""Test CSV adapter with sniffing the dialect."""
|
|
12
|
+
input_data = delimiter.join(["title", "year", "imdb"]) + "\n"
|
|
13
|
+
input_data += delimiter.join(["The Shawshank Redemption", "1994", "tt0111161"]) + "\n"
|
|
14
|
+
input_data += delimiter.join(["The Matrix", "1998", "tt0133093"]) + "\n"
|
|
15
|
+
|
|
16
|
+
csv_path = tmp_path / "test.csv"
|
|
17
|
+
csv_path.write_text(input_data)
|
|
18
|
+
|
|
19
|
+
with RecordReader(csv_path) as reader:
|
|
20
|
+
records = list(reader)
|
|
21
|
+
assert len(records) == 2
|
|
22
|
+
|
|
23
|
+
assert records[0].title == "The Shawshank Redemption"
|
|
24
|
+
assert records[0].year == "1994"
|
|
25
|
+
assert records[0].imdb == "tt0111161"
|
|
26
|
+
|
|
27
|
+
assert records[1].title == "The Matrix"
|
|
28
|
+
assert records[1].year == "1998"
|
|
29
|
+
assert records[1].imdb == "tt0133093"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_csv_non_standard_headers(tmp_path: Path) -> None:
|
|
33
|
+
"""Test CSV adapter with header names that need to be cleaned up."""
|
|
34
|
+
input_data = "Filename,Full Path,Size (bytes)\n"
|
|
35
|
+
input_data += "passwd,/etc/passwd,2370\n"
|
|
36
|
+
input_data += "shadow,/etc/shadow,1290\n"
|
|
37
|
+
|
|
38
|
+
csv_path = tmp_path / "test.csv"
|
|
39
|
+
csv_path.write_text(input_data)
|
|
40
|
+
|
|
41
|
+
with RecordReader(csv_path) as reader:
|
|
42
|
+
records = list(reader)
|
|
43
|
+
assert len(records) == 2
|
|
44
|
+
|
|
45
|
+
assert records[0].Filename == "passwd"
|
|
46
|
+
assert records[0].Full_Path == "/etc/passwd"
|
|
47
|
+
assert records[0].Size__bytes_ == "2370"
|
|
48
|
+
|
|
49
|
+
assert records[1].Filename == "shadow"
|
|
50
|
+
assert records[1].Full_Path == "/etc/shadow"
|
|
51
|
+
assert records[1].Size__bytes_ == "1290"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_csv_read_reserved_fields(tmp_path: Path) -> None:
|
|
55
|
+
"""Test CSV adapter with reading reserved field names."""
|
|
56
|
+
input_data = "_generated,_source,foo,bar\n"
|
|
57
|
+
input_data += "2023-11-11 11:11:11.111111+11:11,single,hello,world\n"
|
|
58
|
+
input_data += "2023-11-14T22:13:20+00:00,epoch,goodbye,planet\n"
|
|
59
|
+
|
|
60
|
+
csv_path = tmp_path / "test.csv"
|
|
61
|
+
csv_path.write_text(input_data)
|
|
62
|
+
|
|
63
|
+
with RecordReader(csv_path) as reader:
|
|
64
|
+
records = list(reader)
|
|
65
|
+
assert len(records) == 2
|
|
66
|
+
|
|
67
|
+
assert records[0]._generated == datetime.fromisoformat("2023-11-11 11:11:11.111111+11:11")
|
|
68
|
+
assert records[0]._source == "single"
|
|
69
|
+
assert records[0].foo == "hello"
|
|
70
|
+
assert records[0].bar == "world"
|
|
71
|
+
|
|
72
|
+
assert records[1]._generated == datetime.fromtimestamp(1700000000, tz=timezone.utc)
|
|
73
|
+
assert records[1]._source == "epoch"
|
|
74
|
+
assert records[1].foo == "goodbye"
|
|
75
|
+
assert records[1].bar == "planet"
|
|
@@ -15,7 +15,7 @@ from flow.record import (
|
|
|
15
15
|
fieldtypes,
|
|
16
16
|
record_stream,
|
|
17
17
|
)
|
|
18
|
-
from flow.record.base import merge_record_descriptors
|
|
18
|
+
from flow.record.base import merge_record_descriptors, normalize_fieldname
|
|
19
19
|
from flow.record.exceptions import RecordDescriptorError
|
|
20
20
|
from flow.record.stream import RecordFieldRewriter
|
|
21
21
|
|
|
@@ -781,3 +781,14 @@ def test_merge_record_descriptor_name():
|
|
|
781
781
|
assert MergedRecord.name == "test/ip_record"
|
|
782
782
|
record = MergedRecord()
|
|
783
783
|
assert record._desc.name == "test/ip_record"
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
def test_normalize_fieldname():
|
|
787
|
+
assert normalize_fieldname("hello") == "hello"
|
|
788
|
+
assert normalize_fieldname("my-variable-name-with-dashes") == "my_variable_name_with_dashes"
|
|
789
|
+
assert normalize_fieldname("_my_name_starting_with_underscore") == "x__my_name_starting_with_underscore"
|
|
790
|
+
assert normalize_fieldname("1337") == "x_1337"
|
|
791
|
+
assert normalize_fieldname("my name with spaces") == "my_name_with_spaces"
|
|
792
|
+
assert normalize_fieldname("my name (with) parentheses") == "my_name__with__parentheses"
|
|
793
|
+
assert normalize_fieldname("_generated") == "_generated"
|
|
794
|
+
assert normalize_fieldname("_source") == "_source"
|
|
@@ -6,7 +6,8 @@ from typing import Any, Iterator
|
|
|
6
6
|
import pytest
|
|
7
7
|
|
|
8
8
|
from flow.record import Record, RecordDescriptor, RecordReader, RecordWriter
|
|
9
|
-
from flow.record.adapter.sqlite import prepare_insert_sql
|
|
9
|
+
from flow.record.adapter.sqlite import prepare_insert_sql
|
|
10
|
+
from flow.record.base import normalize_fieldname
|
|
10
11
|
from flow.record.exceptions import RecordDescriptorError
|
|
11
12
|
|
|
12
13
|
|
|
@@ -78,7 +79,7 @@ def test_field_name_sanitization(tmp_path: Path, field_name: str) -> None:
|
|
|
78
79
|
con.close()
|
|
79
80
|
|
|
80
81
|
data_records = []
|
|
81
|
-
sanitized_field_name =
|
|
82
|
+
sanitized_field_name = normalize_fieldname(field_name)
|
|
82
83
|
|
|
83
84
|
with RecordReader(f"sqlite://{db}") as reader:
|
|
84
85
|
data_records = [(getattr(record, sanitized_field_name),) for record in reader]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|