flow.record 3.15.dev16__tar.gz → 3.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flow_record-3.15.dev16/flow.record.egg-info → flow_record-3.16}/PKG-INFO +2 -1
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/__init__.py +2 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/elastic.py +39 -6
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/splunk.py +79 -51
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/base.py +3 -5
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/__init__.py +29 -9
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/jsonpacker.py +6 -1
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/selector.py +1 -24
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/stream.py +2 -1
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/utils.py +38 -2
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/version.py +2 -2
- {flow_record-3.15.dev16 → flow_record-3.16/flow.record.egg-info}/PKG-INFO +2 -1
- {flow_record-3.15.dev16 → flow_record-3.16}/flow.record.egg-info/SOURCES.txt +1 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow.record.egg-info/requires.txt +1 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/pyproject.toml +1 -1
- flow_record-3.16/tests/test_elastic_adapter.py +53 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_fieldtypes.py +126 -7
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_json_packer.py +21 -0
- flow_record-3.16/tests/test_splunk_adapter.py +433 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tox.ini +5 -4
- flow_record-3.15.dev16/tests/test_splunk_adapter.py +0 -403
- {flow_record-3.15.dev16 → flow_record-3.16}/COPYRIGHT +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/LICENSE +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/MANIFEST.in +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/README.md +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/examples/filesystem.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/examples/passivedns.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/examples/records.json +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/examples/tcpconn.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/__init__.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/archive.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/avro.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/broker.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/csvfile.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/duckdb.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/jsonfile.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/line.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/mongo.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/split.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/sqlite.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/stream.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/text.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/xlsx.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/exceptions.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/credential.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/net/__init__.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/net/ip.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/net/ipv4.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/net/tcp.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/net/udp.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/packer.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/tools/__init__.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/tools/geoip.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/tools/rdump.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/whitelist.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow.record.egg-info/entry_points.txt +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/flow.record.egg-info/top_level.txt +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/setup.cfg +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/__init__.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/_utils.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/docs/Makefile +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/docs/conf.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/docs/index.rst +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/selector_explain_example.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/standalone_test.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_avro.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_avro_adapter.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_compiled_selector.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_csv_adapter.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_deprecations.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_fieldtype_ip.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_json_record_adapter.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_multi_timestamp.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_packer.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_rdump.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_record.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_record_adapter.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_record_descriptor.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_regression.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_selector.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_sqlite_duckdb_adapter.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16}/tests/utils_inspect.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.16
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -40,6 +40,7 @@ Requires-Dist: httpx; extra == "splunk"
|
|
|
40
40
|
Provides-Extra: test
|
|
41
41
|
Requires-Dist: flow.record[compression]; extra == "test"
|
|
42
42
|
Requires-Dist: flow.record[avro]; extra == "test"
|
|
43
|
+
Requires-Dist: flow.record[elastic]; extra == "test"
|
|
43
44
|
Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
44
45
|
Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
45
46
|
|
|
@@ -17,6 +17,7 @@ from flow.record.base import (
|
|
|
17
17
|
RecordWriter,
|
|
18
18
|
dynamic_fieldtype,
|
|
19
19
|
extend_record,
|
|
20
|
+
ignore_fields_for_comparison,
|
|
20
21
|
iter_timestamped_records,
|
|
21
22
|
open_path,
|
|
22
23
|
open_path_or_stream,
|
|
@@ -57,6 +58,7 @@ __all__ = [
|
|
|
57
58
|
"open_path_or_stream",
|
|
58
59
|
"open_path",
|
|
59
60
|
"open_stream",
|
|
61
|
+
"ignore_fields_for_comparison",
|
|
60
62
|
"set_ignored_fields_for_comparison",
|
|
61
63
|
"stream",
|
|
62
64
|
"dynamic_fieldtype",
|
|
@@ -2,7 +2,7 @@ import hashlib
|
|
|
2
2
|
import logging
|
|
3
3
|
import queue
|
|
4
4
|
import threading
|
|
5
|
-
from typing import Iterator, Union
|
|
5
|
+
from typing import Iterator, Optional, Union
|
|
6
6
|
|
|
7
7
|
import elasticsearch
|
|
8
8
|
import elasticsearch.helpers
|
|
@@ -22,9 +22,11 @@ Read usage: rdump elastic+[PROTOCOL]://[IP]:[PORT]?index=[INDEX]
|
|
|
22
22
|
[PROTOCOL]: http or https. Defaults to https when "+[PROTOCOL]" is omitted
|
|
23
23
|
|
|
24
24
|
Optional arguments:
|
|
25
|
+
[API_KEY]: base64 encoded api key to authenticate with (default: False)
|
|
25
26
|
[INDEX]: name of the index to use (default: records)
|
|
26
27
|
[VERIFY_CERTS]: verify certs of Elasticsearch instance (default: True)
|
|
27
28
|
[HASH_RECORD]: make record unique by hashing record [slow] (default: False)
|
|
29
|
+
[_META_*]: record metadata fields (default: None)
|
|
28
30
|
"""
|
|
29
31
|
|
|
30
32
|
log = logging.getLogger(__name__)
|
|
@@ -38,6 +40,7 @@ class ElasticWriter(AbstractWriter):
|
|
|
38
40
|
verify_certs: Union[str, bool] = True,
|
|
39
41
|
http_compress: Union[str, bool] = True,
|
|
40
42
|
hash_record: Union[str, bool] = False,
|
|
43
|
+
api_key: Optional[str] = None,
|
|
41
44
|
**kwargs,
|
|
42
45
|
) -> None:
|
|
43
46
|
self.index = index
|
|
@@ -45,7 +48,17 @@ class ElasticWriter(AbstractWriter):
|
|
|
45
48
|
verify_certs = str(verify_certs).lower() in ("1", "true")
|
|
46
49
|
http_compress = str(http_compress).lower() in ("1", "true")
|
|
47
50
|
self.hash_record = str(hash_record).lower() in ("1", "true")
|
|
48
|
-
|
|
51
|
+
|
|
52
|
+
if not uri.lower().startswith(("http://", "https://")):
|
|
53
|
+
uri = "http://" + uri
|
|
54
|
+
|
|
55
|
+
self.es = elasticsearch.Elasticsearch(
|
|
56
|
+
uri,
|
|
57
|
+
verify_certs=verify_certs,
|
|
58
|
+
http_compress=http_compress,
|
|
59
|
+
api_key=api_key,
|
|
60
|
+
)
|
|
61
|
+
|
|
49
62
|
self.json_packer = JsonRecordPacker()
|
|
50
63
|
self.queue: queue.Queue[Union[Record, StopIteration]] = queue.Queue()
|
|
51
64
|
self.event = threading.Event()
|
|
@@ -58,25 +71,34 @@ class ElasticWriter(AbstractWriter):
|
|
|
58
71
|
|
|
59
72
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
60
73
|
|
|
74
|
+
self.metadata_fields = {}
|
|
75
|
+
for arg_key, arg_val in kwargs.items():
|
|
76
|
+
if arg_key.startswith("_meta_"):
|
|
77
|
+
self.metadata_fields[arg_key[6:]] = arg_val
|
|
78
|
+
|
|
61
79
|
def record_to_document(self, record: Record, index: str) -> dict:
|
|
62
80
|
"""Convert a record to a Elasticsearch compatible document dictionary"""
|
|
63
81
|
rdict = record._asdict()
|
|
64
82
|
|
|
65
|
-
# Store record metadata under `_record_metadata
|
|
83
|
+
# Store record metadata under `_record_metadata`.
|
|
66
84
|
rdict_meta = {
|
|
67
85
|
"descriptor": {
|
|
68
86
|
"name": record._desc.name,
|
|
69
87
|
"hash": record._desc.descriptor_hash,
|
|
70
88
|
},
|
|
71
89
|
}
|
|
90
|
+
|
|
72
91
|
# Move all dunder fields to `_record_metadata` to avoid naming clash with ES.
|
|
73
92
|
dunder_keys = [key for key in rdict if key.startswith("_")]
|
|
74
93
|
for key in dunder_keys:
|
|
75
94
|
rdict_meta[key.lstrip("_")] = rdict.pop(key)
|
|
76
|
-
|
|
95
|
+
|
|
96
|
+
# Remove _generated field from metadata to ensure determinstic documents.
|
|
77
97
|
if self.hash_record:
|
|
78
98
|
rdict_meta.pop("generated", None)
|
|
79
|
-
|
|
99
|
+
|
|
100
|
+
rdict["_record_metadata"] = rdict_meta.copy()
|
|
101
|
+
rdict["_record_metadata"].update(self.metadata_fields)
|
|
80
102
|
|
|
81
103
|
document = {
|
|
82
104
|
"_index": index,
|
|
@@ -106,6 +128,7 @@ class ElasticWriter(AbstractWriter):
|
|
|
106
128
|
):
|
|
107
129
|
if not ok:
|
|
108
130
|
log.error("Failed to insert %r", item)
|
|
131
|
+
|
|
109
132
|
self.event.set()
|
|
110
133
|
|
|
111
134
|
def write(self, record: Record) -> None:
|
|
@@ -129,6 +152,7 @@ class ElasticReader(AbstractReader):
|
|
|
129
152
|
verify_certs: Union[str, bool] = True,
|
|
130
153
|
http_compress: Union[str, bool] = True,
|
|
131
154
|
selector: Union[None, Selector, CompiledSelector] = None,
|
|
155
|
+
api_key: Optional[str] = None,
|
|
132
156
|
**kwargs,
|
|
133
157
|
) -> None:
|
|
134
158
|
self.index = index
|
|
@@ -136,7 +160,16 @@ class ElasticReader(AbstractReader):
|
|
|
136
160
|
self.selector = selector
|
|
137
161
|
verify_certs = str(verify_certs).lower() in ("1", "true")
|
|
138
162
|
http_compress = str(http_compress).lower() in ("1", "true")
|
|
139
|
-
|
|
163
|
+
|
|
164
|
+
if not uri.lower().startswith(("http://", "https://")):
|
|
165
|
+
uri = "http://" + uri
|
|
166
|
+
|
|
167
|
+
self.es = elasticsearch.Elasticsearch(
|
|
168
|
+
uri,
|
|
169
|
+
verify_certs=verify_certs,
|
|
170
|
+
http_compress=http_compress,
|
|
171
|
+
api_key=api_key,
|
|
172
|
+
)
|
|
140
173
|
|
|
141
174
|
if not verify_certs:
|
|
142
175
|
# Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
|
|
@@ -28,7 +28,7 @@ Write usage: rdump -w splunk+[PROTOCOL]://[IP]:[PORT]?tag=[TAG]&token=[TOKEN]&so
|
|
|
28
28
|
[TAG]: optional value to add as "rdtag" output field when writing
|
|
29
29
|
[TOKEN]: Authentication token for sending data over HTTP(S)
|
|
30
30
|
[SOURCETYPE]: Set sourcetype of data. Defaults to records, but can also be set to JSON.
|
|
31
|
-
[SSL_VERIFY]: Whether to verify the server certificate when sending data over
|
|
31
|
+
[SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTPS. Defaults to True.
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
34
|
log = logging.getLogger(__package__)
|
|
@@ -36,21 +36,38 @@ log = logging.getLogger(__package__)
|
|
|
36
36
|
# Amount of records to bundle into a single request when sending data over HTTP(S).
|
|
37
37
|
RECORD_BUFFER_LIMIT = 20
|
|
38
38
|
|
|
39
|
-
#
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
39
|
+
# List of reserved splunk fields that do not start with an `_`, as those will be escaped anyway.
|
|
40
|
+
# See: https://docs.splunk.com/Documentation/Splunk/9.2.1/Data/Aboutdefaultfields
|
|
41
|
+
RESERVED_SPLUNK_FIELDS = set(
|
|
42
|
+
[
|
|
43
|
+
"host",
|
|
44
|
+
"index",
|
|
45
|
+
"linecount",
|
|
46
|
+
"punct",
|
|
47
|
+
"source",
|
|
48
|
+
"sourcetype",
|
|
49
|
+
"splunk_server",
|
|
50
|
+
"timestamp",
|
|
51
|
+
],
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
RESERVED_SPLUNK_APP_FIELDS = set(
|
|
55
|
+
[
|
|
56
|
+
"tag",
|
|
57
|
+
"type",
|
|
58
|
+
]
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
RESERVED_RDUMP_FIELDS = set(
|
|
62
|
+
[
|
|
63
|
+
"rdtag",
|
|
64
|
+
"rdtype",
|
|
65
|
+
],
|
|
66
|
+
)
|
|
50
67
|
|
|
51
|
-
|
|
68
|
+
RESERVED_FIELDS = RESERVED_SPLUNK_FIELDS.union(RESERVED_SPLUNK_APP_FIELDS.union(RESERVED_RDUMP_FIELDS))
|
|
52
69
|
|
|
53
|
-
|
|
70
|
+
ESCAPE = "rd_"
|
|
54
71
|
|
|
55
72
|
|
|
56
73
|
class Protocol(Enum):
|
|
@@ -64,7 +81,13 @@ class SourceType(Enum):
|
|
|
64
81
|
RECORDS = "records"
|
|
65
82
|
|
|
66
83
|
|
|
67
|
-
def
|
|
84
|
+
def escape_field_name(field: str) -> str:
|
|
85
|
+
if field.startswith(("_", ESCAPE)) or field in RESERVED_FIELDS:
|
|
86
|
+
field = f"{ESCAPE}{field}"
|
|
87
|
+
return field
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def record_to_splunk_kv_line(record: Record, tag: Optional[str] = None) -> str:
|
|
68
91
|
ret = []
|
|
69
92
|
|
|
70
93
|
ret.append(f'rdtype="{record._desc.name}"')
|
|
@@ -81,8 +104,7 @@ def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
|
|
|
81
104
|
|
|
82
105
|
val = getattr(record, field)
|
|
83
106
|
|
|
84
|
-
|
|
85
|
-
field = f"rd_{field}"
|
|
107
|
+
field = escape_field_name(field)
|
|
86
108
|
|
|
87
109
|
if val is None:
|
|
88
110
|
ret.append(f"{field}=None")
|
|
@@ -94,7 +116,25 @@ def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
|
|
|
94
116
|
return " ".join(ret)
|
|
95
117
|
|
|
96
118
|
|
|
97
|
-
def
|
|
119
|
+
def record_to_splunk_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> dict:
|
|
120
|
+
record_as_dict = packer.pack_obj(record)
|
|
121
|
+
json_dict = {}
|
|
122
|
+
|
|
123
|
+
for field, value in record_as_dict.items():
|
|
124
|
+
# Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
|
|
125
|
+
if field == "_version":
|
|
126
|
+
continue
|
|
127
|
+
escaped_field = escape_field_name(field)
|
|
128
|
+
json_dict[escaped_field] = value
|
|
129
|
+
|
|
130
|
+
# Add rdump specific fields
|
|
131
|
+
json_dict["rdtag"] = tag
|
|
132
|
+
json_dict["rdtype"] = record._desc.name
|
|
133
|
+
|
|
134
|
+
return json_dict
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def record_to_splunk_http_api_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
|
|
98
138
|
ret = {}
|
|
99
139
|
|
|
100
140
|
indexer_fields = [
|
|
@@ -115,29 +155,13 @@ def splunkify_json(packer: JsonRecordPacker, record: Record, tag: Optional[str]
|
|
|
115
155
|
continue
|
|
116
156
|
ret[splunk_name] = to_str(val)
|
|
117
157
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
# Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
|
|
121
|
-
del record_as_dict["_version"]
|
|
122
|
-
|
|
123
|
-
# These fields end up in the 'event', but we have a few reserved field names. If those field names are in the
|
|
124
|
-
# record, we prefix them with 'rd_' (short for record descriptor)
|
|
125
|
-
for field in PREFIX_WITH_RD:
|
|
126
|
-
if field not in record_as_dict:
|
|
127
|
-
continue
|
|
128
|
-
new_field = f"rd_{field}"
|
|
129
|
-
|
|
130
|
-
record_as_dict[new_field] = record_as_dict[field]
|
|
131
|
-
del record_as_dict[field]
|
|
132
|
-
|
|
133
|
-
# Almost done, just have to add the tag and the type (i.e the record descriptor's name) to the event.
|
|
134
|
-
record_as_dict["rdtag"] = tag
|
|
158
|
+
ret["event"] = record_to_splunk_json(packer, record, tag)
|
|
159
|
+
return json.dumps(ret, default=packer.pack_obj)
|
|
135
160
|
|
|
136
|
-
# Yes.
|
|
137
|
-
record_as_dict["rdtype"] = record._desc.name
|
|
138
161
|
|
|
139
|
-
|
|
140
|
-
|
|
162
|
+
def record_to_splunk_tcp_api_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
|
|
163
|
+
record_dict = record_to_splunk_json(packer, record, tag)
|
|
164
|
+
return json.dumps(record_dict, default=packer.pack_obj)
|
|
141
165
|
|
|
142
166
|
|
|
143
167
|
class SplunkWriter(AbstractWriter):
|
|
@@ -159,31 +183,31 @@ class SplunkWriter(AbstractWriter):
|
|
|
159
183
|
|
|
160
184
|
if sourcetype is None:
|
|
161
185
|
log.warning("No sourcetype provided, assuming 'records' sourcetype")
|
|
162
|
-
sourcetype = SourceType.RECORDS
|
|
186
|
+
self.sourcetype = SourceType.RECORDS
|
|
187
|
+
else:
|
|
188
|
+
self.sourcetype = SourceType(sourcetype)
|
|
163
189
|
|
|
164
190
|
parsed_url = urlparse(uri)
|
|
165
191
|
url_scheme = parsed_url.scheme.lower()
|
|
166
|
-
|
|
167
|
-
self.sourcetype = SourceType(sourcetype)
|
|
168
192
|
self.protocol = Protocol(url_scheme)
|
|
169
|
-
|
|
170
|
-
if self.protocol == Protocol.TCP and self.sourcetype != SourceType.RECORDS:
|
|
171
|
-
raise ValueError("For sending data to Splunk over TCP, only the 'records' sourcetype is allowed")
|
|
172
|
-
|
|
173
193
|
self.host = parsed_url.hostname
|
|
174
194
|
self.port = parsed_url.port
|
|
195
|
+
|
|
175
196
|
self.tag = tag
|
|
176
197
|
self.record_buffer = []
|
|
177
198
|
self._warned = False
|
|
178
199
|
self.packer = None
|
|
179
|
-
|
|
180
|
-
if self.sourcetype == SourceType.JSON:
|
|
181
|
-
self.packer = JsonRecordPacker(indent=4, pack_descriptors=False)
|
|
200
|
+
self.json_converter = None
|
|
182
201
|
|
|
183
202
|
if self.protocol == Protocol.TCP:
|
|
184
203
|
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP)
|
|
185
204
|
self.sock.connect((self.host, self.port))
|
|
186
205
|
self._send = self._send_tcp
|
|
206
|
+
|
|
207
|
+
if self.sourcetype == SourceType.JSON:
|
|
208
|
+
self.packer = JsonRecordPacker(indent=None, pack_descriptors=False)
|
|
209
|
+
self.json_converter = record_to_splunk_tcp_api_json
|
|
210
|
+
|
|
187
211
|
elif self.protocol in (Protocol.HTTP, Protocol.HTTPS):
|
|
188
212
|
if not HAS_HTTPX:
|
|
189
213
|
raise ImportError("The httpx library is required for sending data over HTTP(S)")
|
|
@@ -214,6 +238,10 @@ class SplunkWriter(AbstractWriter):
|
|
|
214
238
|
|
|
215
239
|
self._send = self._send_http
|
|
216
240
|
|
|
241
|
+
if self.sourcetype == SourceType.JSON:
|
|
242
|
+
self.packer = JsonRecordPacker(indent=4, pack_descriptors=False)
|
|
243
|
+
self.json_converter = record_to_splunk_http_api_json
|
|
244
|
+
|
|
217
245
|
def _cache_records_for_http(self, data: Optional[bytes] = None, flush: bool = False) -> Optional[bytes]:
|
|
218
246
|
# It's possible to call this function without any data, purely to flush. Hence this check.
|
|
219
247
|
if data:
|
|
@@ -252,9 +280,9 @@ class SplunkWriter(AbstractWriter):
|
|
|
252
280
|
)
|
|
253
281
|
|
|
254
282
|
if self.sourcetype == SourceType.RECORDS:
|
|
255
|
-
rec =
|
|
283
|
+
rec = record_to_splunk_kv_line(record, self.tag)
|
|
256
284
|
else:
|
|
257
|
-
rec =
|
|
285
|
+
rec = self.json_converter(self.packer, record, self.tag)
|
|
258
286
|
|
|
259
287
|
# Trail with a newline for line breaking.
|
|
260
288
|
data = to_bytes(rec) + b"\n"
|
|
@@ -31,6 +31,7 @@ from urllib.parse import parse_qsl, urlparse
|
|
|
31
31
|
|
|
32
32
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
33
33
|
from flow.record.exceptions import RecordAdapterNotFound, RecordDescriptorError
|
|
34
|
+
from flow.record.utils import get_stdin, get_stdout
|
|
34
35
|
|
|
35
36
|
try:
|
|
36
37
|
import lz4.frame as lz4
|
|
@@ -812,10 +813,7 @@ def open_path(path: str, mode: str, clobber: bool = True) -> IO:
|
|
|
812
813
|
# normal file or stdio for reading or writing
|
|
813
814
|
if not fp:
|
|
814
815
|
if is_stdio:
|
|
815
|
-
if binary
|
|
816
|
-
fp = getattr(sys.stdout, "buffer", sys.stdout) if out else getattr(sys.stdin, "buffer", sys.stdin)
|
|
817
|
-
else:
|
|
818
|
-
fp = sys.stdout if out else sys.stdin
|
|
816
|
+
fp = get_stdout(binary=binary) if out else get_stdin(binary=binary)
|
|
819
817
|
else:
|
|
820
818
|
fp = io.open(path, mode)
|
|
821
819
|
# check if we are reading a compressed stream
|
|
@@ -867,7 +865,7 @@ def RecordAdapter(
|
|
|
867
865
|
if url in ("-", "", None) and fileobj is None:
|
|
868
866
|
# For reading stdin, we cannot rely on an extension to know what sort of stream is incoming. Thus, we will
|
|
869
867
|
# treat it as a 'fileobj', where we can peek into the stream and try to select the appropriate adapter.
|
|
870
|
-
fileobj =
|
|
868
|
+
fileobj = get_stdin(binary=True)
|
|
871
869
|
if fileobj is not None:
|
|
872
870
|
# This record adapter has received a file-like object for record reading
|
|
873
871
|
# We just need to find the right adapter by peeking into the first few bytes.
|
|
@@ -32,8 +32,8 @@ NATIVE_UNICODE = isinstance("", str)
|
|
|
32
32
|
|
|
33
33
|
UTC = timezone.utc
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
PY_311_OR_HIGHER = sys.version_info >= (3, 11, 0)
|
|
36
|
+
PY_312_OR_HIGHER = sys.version_info >= (3, 12, 0)
|
|
37
37
|
|
|
38
38
|
TYPE_POSIX = 0
|
|
39
39
|
TYPE_WINDOWS = 1
|
|
@@ -288,7 +288,7 @@ class datetime(_dt, FieldType):
|
|
|
288
288
|
# - Python 3.10 and older requires "T" between date and time in fromisoformat()
|
|
289
289
|
#
|
|
290
290
|
# There are other incompatibilities, but we don't care about those for now.
|
|
291
|
-
if not
|
|
291
|
+
if not PY_311_OR_HIGHER:
|
|
292
292
|
# Convert Z to +00:00 so that fromisoformat() works correctly on Python 3.10 and older
|
|
293
293
|
if arg[-1] == "Z":
|
|
294
294
|
arg = arg[:-1] + "+00:00"
|
|
@@ -633,6 +633,8 @@ def _is_windowslike_path(path: Any):
|
|
|
633
633
|
|
|
634
634
|
|
|
635
635
|
class path(pathlib.PurePath, FieldType):
|
|
636
|
+
_empty_path = False
|
|
637
|
+
|
|
636
638
|
def __new__(cls, *args):
|
|
637
639
|
# This is modelled after pathlib.PurePath's __new__(), which means you
|
|
638
640
|
# will never get an instance of path, only instances of either
|
|
@@ -647,7 +649,7 @@ class path(pathlib.PurePath, FieldType):
|
|
|
647
649
|
for path_part in args:
|
|
648
650
|
if isinstance(path_part, pathlib.PureWindowsPath):
|
|
649
651
|
cls = windows_path
|
|
650
|
-
if not
|
|
652
|
+
if not PY_312_OR_HIGHER:
|
|
651
653
|
# For Python < 3.12, the (string) representation of a
|
|
652
654
|
# pathlib.PureWindowsPath is not round trip equivalent if a path
|
|
653
655
|
# starts with a \ or / followed by a drive letter, e.g.: \C:\...
|
|
@@ -663,15 +665,15 @@ class path(pathlib.PurePath, FieldType):
|
|
|
663
665
|
#
|
|
664
666
|
# This construction works around that by converting all path parts
|
|
665
667
|
# to strings first.
|
|
666
|
-
args = (str(arg) for arg in args)
|
|
668
|
+
args = tuple(str(arg) for arg in args)
|
|
667
669
|
elif isinstance(path_part, pathlib.PurePosixPath):
|
|
668
670
|
cls = posix_path
|
|
669
671
|
elif _is_windowslike_path(path_part):
|
|
670
672
|
# This handles any custom PurePath based implementations that have a windows
|
|
671
673
|
# like path separator (\).
|
|
672
674
|
cls = windows_path
|
|
673
|
-
if not
|
|
674
|
-
args = (str(arg) for arg in args)
|
|
675
|
+
if not PY_312_OR_HIGHER:
|
|
676
|
+
args = tuple(str(arg) for arg in args)
|
|
675
677
|
elif _is_posixlike_path(path_part):
|
|
676
678
|
# This handles any custom PurePath based implementations that don't have a
|
|
677
679
|
# windows like path separator (\).
|
|
@@ -680,20 +682,37 @@ class path(pathlib.PurePath, FieldType):
|
|
|
680
682
|
continue
|
|
681
683
|
break
|
|
682
684
|
|
|
683
|
-
if
|
|
685
|
+
if PY_312_OR_HIGHER:
|
|
684
686
|
obj = super().__new__(cls)
|
|
685
687
|
else:
|
|
686
688
|
obj = cls._from_parts(args)
|
|
689
|
+
|
|
690
|
+
obj._empty_path = False
|
|
691
|
+
if not args or args == ("",):
|
|
692
|
+
obj._empty_path = True
|
|
687
693
|
return obj
|
|
688
694
|
|
|
689
695
|
def __eq__(self, other: Any) -> bool:
|
|
690
696
|
if isinstance(other, str):
|
|
691
697
|
return str(self) == other or self == self.__class__(other)
|
|
698
|
+
elif isinstance(other, self.__class__) and (self._empty_path or other._empty_path):
|
|
699
|
+
return self._empty_path == other._empty_path
|
|
692
700
|
return super().__eq__(other)
|
|
693
701
|
|
|
702
|
+
def __str__(self) -> str:
|
|
703
|
+
if self._empty_path:
|
|
704
|
+
return ""
|
|
705
|
+
return super().__str__()
|
|
706
|
+
|
|
694
707
|
def __repr__(self) -> str:
|
|
695
708
|
return repr(str(self))
|
|
696
709
|
|
|
710
|
+
@property
|
|
711
|
+
def parent(self):
|
|
712
|
+
if self._empty_path:
|
|
713
|
+
return self
|
|
714
|
+
return super().parent
|
|
715
|
+
|
|
697
716
|
def _pack(self):
|
|
698
717
|
path_type = TYPE_WINDOWS if isinstance(self, windows_path) else TYPE_POSIX
|
|
699
718
|
return (str(self), path_type)
|
|
@@ -756,7 +775,8 @@ class command(FieldType):
|
|
|
756
775
|
# an '%' for an environment variable
|
|
757
776
|
# r'\\' for a UNC path
|
|
758
777
|
# the strip and check for ":" on the second line is for `<drive_letter>:`
|
|
759
|
-
|
|
778
|
+
stripped_value = value.lstrip("\"'")
|
|
779
|
+
windows = value.startswith((r"\\", "%")) or (len(stripped_value) >= 2 and stripped_value[1] == ":")
|
|
760
780
|
|
|
761
781
|
if windows:
|
|
762
782
|
cls = windows_command
|
|
@@ -41,15 +41,20 @@ class JsonRecordPacker:
|
|
|
41
41
|
if obj._desc.identifier not in self.descriptors:
|
|
42
42
|
self.register(obj._desc, True)
|
|
43
43
|
serial = obj._asdict()
|
|
44
|
+
|
|
44
45
|
if self.pack_descriptors:
|
|
45
46
|
serial["_type"] = "record"
|
|
46
47
|
serial["_recorddescriptor"] = obj._desc.identifier
|
|
47
48
|
|
|
48
|
-
# PYTHON2: Because "bytes" are also "str" we have to handle this here
|
|
49
49
|
for field_type, field_name in obj._desc.get_field_tuples():
|
|
50
|
+
# PYTHON2: Because "bytes" are also "str" we have to handle this here
|
|
50
51
|
if field_type == "bytes" and isinstance(serial[field_name], str):
|
|
51
52
|
serial[field_name] = base64.b64encode(serial[field_name]).decode()
|
|
52
53
|
|
|
54
|
+
# Boolean field types should be cast to a bool instead of staying ints
|
|
55
|
+
elif field_type == "boolean" and isinstance(serial[field_name], int):
|
|
56
|
+
serial[field_name] = bool(serial[field_name])
|
|
57
|
+
|
|
53
58
|
return serial
|
|
54
59
|
if isinstance(obj, RecordDescriptor):
|
|
55
60
|
serial = {
|
|
@@ -17,25 +17,6 @@ except ImportError:
|
|
|
17
17
|
|
|
18
18
|
string_types = (str, type(""))
|
|
19
19
|
|
|
20
|
-
AST_NODE_S_TYPES = tuple(
|
|
21
|
-
filter(
|
|
22
|
-
None,
|
|
23
|
-
[
|
|
24
|
-
getattr(ast, "Str", None),
|
|
25
|
-
getattr(ast, "Bytes", None),
|
|
26
|
-
],
|
|
27
|
-
),
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
AST_NODE_VALUE_TYPES = tuple(
|
|
31
|
-
filter(
|
|
32
|
-
None,
|
|
33
|
-
[
|
|
34
|
-
getattr(ast, "NameConstant", None),
|
|
35
|
-
getattr(ast, "Constant", None),
|
|
36
|
-
],
|
|
37
|
-
),
|
|
38
|
-
)
|
|
39
20
|
|
|
40
21
|
AST_OPERATORS = {
|
|
41
22
|
ast.Add: operator.add,
|
|
@@ -581,11 +562,7 @@ class RecordContextMatcher:
|
|
|
581
562
|
return r
|
|
582
563
|
|
|
583
564
|
def _eval(self, node):
|
|
584
|
-
if isinstance(node, ast.
|
|
585
|
-
return node.n
|
|
586
|
-
elif isinstance(node, AST_NODE_S_TYPES):
|
|
587
|
-
return node.s
|
|
588
|
-
elif isinstance(node, AST_NODE_VALUE_TYPES):
|
|
565
|
+
if isinstance(node, ast.Constant):
|
|
589
566
|
return node.value
|
|
590
567
|
elif isinstance(node, ast.List):
|
|
591
568
|
return list(map(self.eval, node.elts))
|
|
@@ -12,6 +12,7 @@ from functools import lru_cache
|
|
|
12
12
|
from flow.record import RECORDSTREAM_MAGIC, RecordWriter
|
|
13
13
|
from flow.record.fieldtypes import fieldtype_for_value
|
|
14
14
|
from flow.record.selector import make_selector
|
|
15
|
+
from flow.record.utils import is_stdout
|
|
15
16
|
|
|
16
17
|
from .base import RecordDescriptor, RecordReader
|
|
17
18
|
from .packer import RecordPacker
|
|
@@ -70,7 +71,7 @@ class RecordStreamWriter:
|
|
|
70
71
|
self.write(descriptor)
|
|
71
72
|
|
|
72
73
|
def close(self):
|
|
73
|
-
if self.fp and self.fp
|
|
74
|
+
if self.fp and not is_stdout(self.fp):
|
|
74
75
|
self.fp.close()
|
|
75
76
|
self.fp = None
|
|
76
77
|
|
|
@@ -1,15 +1,51 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import base64
|
|
2
4
|
import os
|
|
3
5
|
import sys
|
|
4
6
|
from functools import wraps
|
|
7
|
+
from typing import BinaryIO, TextIO
|
|
5
8
|
|
|
6
9
|
_native = str
|
|
7
10
|
_unicode = type("")
|
|
8
11
|
_bytes = type(b"")
|
|
9
12
|
|
|
10
13
|
|
|
11
|
-
def
|
|
12
|
-
|
|
14
|
+
def get_stdout(binary: bool = False) -> TextIO | BinaryIO:
|
|
15
|
+
"""Return the stdout stream as binary or text stream.
|
|
16
|
+
|
|
17
|
+
This function is the preferred way to get the stdout stream in flow.record.
|
|
18
|
+
|
|
19
|
+
Arguments:
|
|
20
|
+
binary: Whether to return the stream as binary stream.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
The stdout stream.
|
|
24
|
+
"""
|
|
25
|
+
fp = getattr(sys.stdout, "buffer", sys.stdout) if binary else sys.stdout
|
|
26
|
+
fp._is_stdout = True
|
|
27
|
+
return fp
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_stdin(binary: bool = False) -> TextIO | BinaryIO:
|
|
31
|
+
"""Return the stdin stream as binary or text stream.
|
|
32
|
+
|
|
33
|
+
This function is the preferred way to get the stdin stream in flow.record.
|
|
34
|
+
|
|
35
|
+
Arguments:
|
|
36
|
+
binary: Whether to return the stream as binary stream.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
The stdin stream.
|
|
40
|
+
"""
|
|
41
|
+
fp = getattr(sys.stdin, "buffer", sys.stdin) if binary else sys.stdin
|
|
42
|
+
fp._is_stdin = True
|
|
43
|
+
return fp
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def is_stdout(fp: TextIO | BinaryIO) -> bool:
|
|
47
|
+
"""Returns True if ``fp`` is the stdout stream."""
|
|
48
|
+
return fp in (sys.stdout, sys.stdout.buffer) or hasattr(fp, "_is_stdout")
|
|
13
49
|
|
|
14
50
|
|
|
15
51
|
def to_bytes(value):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.16
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -40,6 +40,7 @@ Requires-Dist: httpx; extra == "splunk"
|
|
|
40
40
|
Provides-Extra: test
|
|
41
41
|
Requires-Dist: flow.record[compression]; extra == "test"
|
|
42
42
|
Requires-Dist: flow.record[avro]; extra == "test"
|
|
43
|
+
Requires-Dist: flow.record[elastic]; extra == "test"
|
|
43
44
|
Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
44
45
|
Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
45
46
|
|