flow.record 3.16.dev5__tar.gz → 3.16.dev6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flow_record-3.16.dev5/flow.record.egg-info → flow_record-3.16.dev6}/PKG-INFO +1 -1
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/splunk.py +79 -51
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/version.py +2 -2
- {flow_record-3.16.dev5 → flow_record-3.16.dev6/flow.record.egg-info}/PKG-INFO +1 -1
- flow_record-3.16.dev6/tests/test_splunk_adapter.py +433 -0
- flow_record-3.16.dev5/tests/test_splunk_adapter.py +0 -403
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/COPYRIGHT +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/LICENSE +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/MANIFEST.in +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/README.md +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/examples/filesystem.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/examples/passivedns.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/examples/records.json +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/examples/tcpconn.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/__init__.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/__init__.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/archive.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/avro.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/broker.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/csvfile.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/duckdb.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/elastic.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/jsonfile.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/line.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/mongo.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/split.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/sqlite.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/stream.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/text.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/adapter/xlsx.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/base.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/exceptions.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/fieldtypes/__init__.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/fieldtypes/credential.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/fieldtypes/net/__init__.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/fieldtypes/net/ip.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/fieldtypes/net/ipv4.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/fieldtypes/net/tcp.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/fieldtypes/net/udp.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/jsonpacker.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/packer.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/selector.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/stream.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/tools/__init__.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/tools/geoip.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/tools/rdump.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/utils.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow/record/whitelist.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow.record.egg-info/SOURCES.txt +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow.record.egg-info/entry_points.txt +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow.record.egg-info/requires.txt +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/flow.record.egg-info/top_level.txt +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/pyproject.toml +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/setup.cfg +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/__init__.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/_utils.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/docs/Makefile +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/docs/conf.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/docs/index.rst +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/selector_explain_example.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/standalone_test.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_avro.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_avro_adapter.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_compiled_selector.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_csv_adapter.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_deprecations.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_elastic_adapter.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_fieldtype_ip.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_fieldtypes.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_json_packer.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_json_record_adapter.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_multi_timestamp.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_packer.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_rdump.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_record.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_record_adapter.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_record_descriptor.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_regression.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_selector.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/test_sqlite_duckdb_adapter.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tests/utils_inspect.py +0 -0
- {flow_record-3.16.dev5 → flow_record-3.16.dev6}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.16.
|
|
3
|
+
Version: 3.16.dev6
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -28,7 +28,7 @@ Write usage: rdump -w splunk+[PROTOCOL]://[IP]:[PORT]?tag=[TAG]&token=[TOKEN]&so
|
|
|
28
28
|
[TAG]: optional value to add as "rdtag" output field when writing
|
|
29
29
|
[TOKEN]: Authentication token for sending data over HTTP(S)
|
|
30
30
|
[SOURCETYPE]: Set sourcetype of data. Defaults to records, but can also be set to JSON.
|
|
31
|
-
[SSL_VERIFY]: Whether to verify the server certificate when sending data over
|
|
31
|
+
[SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTPS. Defaults to True.
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
34
|
log = logging.getLogger(__package__)
|
|
@@ -36,21 +36,38 @@ log = logging.getLogger(__package__)
|
|
|
36
36
|
# Amount of records to bundle into a single request when sending data over HTTP(S).
|
|
37
37
|
RECORD_BUFFER_LIMIT = 20
|
|
38
38
|
|
|
39
|
-
#
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
39
|
+
# List of reserved splunk fields that do not start with an `_`, as those will be escaped anyway.
|
|
40
|
+
# See: https://docs.splunk.com/Documentation/Splunk/9.2.1/Data/Aboutdefaultfields
|
|
41
|
+
RESERVED_SPLUNK_FIELDS = set(
|
|
42
|
+
[
|
|
43
|
+
"host",
|
|
44
|
+
"index",
|
|
45
|
+
"linecount",
|
|
46
|
+
"punct",
|
|
47
|
+
"source",
|
|
48
|
+
"sourcetype",
|
|
49
|
+
"splunk_server",
|
|
50
|
+
"timestamp",
|
|
51
|
+
],
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
RESERVED_SPLUNK_APP_FIELDS = set(
|
|
55
|
+
[
|
|
56
|
+
"tag",
|
|
57
|
+
"type",
|
|
58
|
+
]
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
RESERVED_RDUMP_FIELDS = set(
|
|
62
|
+
[
|
|
63
|
+
"rdtag",
|
|
64
|
+
"rdtype",
|
|
65
|
+
],
|
|
66
|
+
)
|
|
50
67
|
|
|
51
|
-
|
|
68
|
+
RESERVED_FIELDS = RESERVED_SPLUNK_FIELDS.union(RESERVED_SPLUNK_APP_FIELDS.union(RESERVED_RDUMP_FIELDS))
|
|
52
69
|
|
|
53
|
-
|
|
70
|
+
ESCAPE = "rd_"
|
|
54
71
|
|
|
55
72
|
|
|
56
73
|
class Protocol(Enum):
|
|
@@ -64,7 +81,13 @@ class SourceType(Enum):
|
|
|
64
81
|
RECORDS = "records"
|
|
65
82
|
|
|
66
83
|
|
|
67
|
-
def
|
|
84
|
+
def escape_field_name(field: str) -> str:
|
|
85
|
+
if field.startswith(("_", ESCAPE)) or field in RESERVED_FIELDS:
|
|
86
|
+
field = f"{ESCAPE}{field}"
|
|
87
|
+
return field
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def record_to_splunk_kv_line(record: Record, tag: Optional[str] = None) -> str:
|
|
68
91
|
ret = []
|
|
69
92
|
|
|
70
93
|
ret.append(f'rdtype="{record._desc.name}"')
|
|
@@ -81,8 +104,7 @@ def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
|
|
|
81
104
|
|
|
82
105
|
val = getattr(record, field)
|
|
83
106
|
|
|
84
|
-
|
|
85
|
-
field = f"rd_{field}"
|
|
107
|
+
field = escape_field_name(field)
|
|
86
108
|
|
|
87
109
|
if val is None:
|
|
88
110
|
ret.append(f"{field}=None")
|
|
@@ -94,7 +116,25 @@ def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
|
|
|
94
116
|
return " ".join(ret)
|
|
95
117
|
|
|
96
118
|
|
|
97
|
-
def
|
|
119
|
+
def record_to_splunk_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> dict:
|
|
120
|
+
record_as_dict = packer.pack_obj(record)
|
|
121
|
+
json_dict = {}
|
|
122
|
+
|
|
123
|
+
for field, value in record_as_dict.items():
|
|
124
|
+
# Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
|
|
125
|
+
if field == "_version":
|
|
126
|
+
continue
|
|
127
|
+
escaped_field = escape_field_name(field)
|
|
128
|
+
json_dict[escaped_field] = value
|
|
129
|
+
|
|
130
|
+
# Add rdump specific fields
|
|
131
|
+
json_dict["rdtag"] = tag
|
|
132
|
+
json_dict["rdtype"] = record._desc.name
|
|
133
|
+
|
|
134
|
+
return json_dict
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def record_to_splunk_http_api_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
|
|
98
138
|
ret = {}
|
|
99
139
|
|
|
100
140
|
indexer_fields = [
|
|
@@ -115,29 +155,13 @@ def splunkify_json(packer: JsonRecordPacker, record: Record, tag: Optional[str]
|
|
|
115
155
|
continue
|
|
116
156
|
ret[splunk_name] = to_str(val)
|
|
117
157
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
# Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
|
|
121
|
-
del record_as_dict["_version"]
|
|
122
|
-
|
|
123
|
-
# These fields end up in the 'event', but we have a few reserved field names. If those field names are in the
|
|
124
|
-
# record, we prefix them with 'rd_' (short for record descriptor)
|
|
125
|
-
for field in PREFIX_WITH_RD:
|
|
126
|
-
if field not in record_as_dict:
|
|
127
|
-
continue
|
|
128
|
-
new_field = f"rd_{field}"
|
|
129
|
-
|
|
130
|
-
record_as_dict[new_field] = record_as_dict[field]
|
|
131
|
-
del record_as_dict[field]
|
|
132
|
-
|
|
133
|
-
# Almost done, just have to add the tag and the type (i.e the record descriptor's name) to the event.
|
|
134
|
-
record_as_dict["rdtag"] = tag
|
|
158
|
+
ret["event"] = record_to_splunk_json(packer, record, tag)
|
|
159
|
+
return json.dumps(ret, default=packer.pack_obj)
|
|
135
160
|
|
|
136
|
-
# Yes.
|
|
137
|
-
record_as_dict["rdtype"] = record._desc.name
|
|
138
161
|
|
|
139
|
-
|
|
140
|
-
|
|
162
|
+
def record_to_splunk_tcp_api_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
|
|
163
|
+
record_dict = record_to_splunk_json(packer, record, tag)
|
|
164
|
+
return json.dumps(record_dict, default=packer.pack_obj)
|
|
141
165
|
|
|
142
166
|
|
|
143
167
|
class SplunkWriter(AbstractWriter):
|
|
@@ -159,31 +183,31 @@ class SplunkWriter(AbstractWriter):
|
|
|
159
183
|
|
|
160
184
|
if sourcetype is None:
|
|
161
185
|
log.warning("No sourcetype provided, assuming 'records' sourcetype")
|
|
162
|
-
sourcetype = SourceType.RECORDS
|
|
186
|
+
self.sourcetype = SourceType.RECORDS
|
|
187
|
+
else:
|
|
188
|
+
self.sourcetype = SourceType(sourcetype)
|
|
163
189
|
|
|
164
190
|
parsed_url = urlparse(uri)
|
|
165
191
|
url_scheme = parsed_url.scheme.lower()
|
|
166
|
-
|
|
167
|
-
self.sourcetype = SourceType(sourcetype)
|
|
168
192
|
self.protocol = Protocol(url_scheme)
|
|
169
|
-
|
|
170
|
-
if self.protocol == Protocol.TCP and self.sourcetype != SourceType.RECORDS:
|
|
171
|
-
raise ValueError("For sending data to Splunk over TCP, only the 'records' sourcetype is allowed")
|
|
172
|
-
|
|
173
193
|
self.host = parsed_url.hostname
|
|
174
194
|
self.port = parsed_url.port
|
|
195
|
+
|
|
175
196
|
self.tag = tag
|
|
176
197
|
self.record_buffer = []
|
|
177
198
|
self._warned = False
|
|
178
199
|
self.packer = None
|
|
179
|
-
|
|
180
|
-
if self.sourcetype == SourceType.JSON:
|
|
181
|
-
self.packer = JsonRecordPacker(indent=4, pack_descriptors=False)
|
|
200
|
+
self.json_converter = None
|
|
182
201
|
|
|
183
202
|
if self.protocol == Protocol.TCP:
|
|
184
203
|
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP)
|
|
185
204
|
self.sock.connect((self.host, self.port))
|
|
186
205
|
self._send = self._send_tcp
|
|
206
|
+
|
|
207
|
+
if self.sourcetype == SourceType.JSON:
|
|
208
|
+
self.packer = JsonRecordPacker(indent=None, pack_descriptors=False)
|
|
209
|
+
self.json_converter = record_to_splunk_tcp_api_json
|
|
210
|
+
|
|
187
211
|
elif self.protocol in (Protocol.HTTP, Protocol.HTTPS):
|
|
188
212
|
if not HAS_HTTPX:
|
|
189
213
|
raise ImportError("The httpx library is required for sending data over HTTP(S)")
|
|
@@ -214,6 +238,10 @@ class SplunkWriter(AbstractWriter):
|
|
|
214
238
|
|
|
215
239
|
self._send = self._send_http
|
|
216
240
|
|
|
241
|
+
if self.sourcetype == SourceType.JSON:
|
|
242
|
+
self.packer = JsonRecordPacker(indent=4, pack_descriptors=False)
|
|
243
|
+
self.json_converter = record_to_splunk_http_api_json
|
|
244
|
+
|
|
217
245
|
def _cache_records_for_http(self, data: Optional[bytes] = None, flush: bool = False) -> Optional[bytes]:
|
|
218
246
|
# It's possible to call this function without any data, purely to flush. Hence this check.
|
|
219
247
|
if data:
|
|
@@ -252,9 +280,9 @@ class SplunkWriter(AbstractWriter):
|
|
|
252
280
|
)
|
|
253
281
|
|
|
254
282
|
if self.sourcetype == SourceType.RECORDS:
|
|
255
|
-
rec =
|
|
283
|
+
rec = record_to_splunk_kv_line(record, self.tag)
|
|
256
284
|
else:
|
|
257
|
-
rec =
|
|
285
|
+
rec = self.json_converter(self.packer, record, self.tag)
|
|
258
286
|
|
|
259
287
|
# Trail with a newline for line breaking.
|
|
260
288
|
data = to_bytes(rec) + b"\n"
|
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '3.16.
|
|
16
|
-
__version_tuple__ = version_tuple = (3, 16, '
|
|
15
|
+
__version__ = version = '3.16.dev6'
|
|
16
|
+
__version_tuple__ = version_tuple = (3, 16, 'dev6')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.16.
|
|
3
|
+
Version: 3.16.dev6
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import json
|
|
3
|
+
import sys
|
|
4
|
+
from typing import Iterator
|
|
5
|
+
from unittest.mock import ANY, MagicMock, patch
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
import flow.record.adapter.splunk
|
|
10
|
+
from flow.record import RecordDescriptor
|
|
11
|
+
from flow.record.adapter.splunk import (
|
|
12
|
+
ESCAPE,
|
|
13
|
+
RESERVED_FIELDS,
|
|
14
|
+
Protocol,
|
|
15
|
+
SourceType,
|
|
16
|
+
SplunkWriter,
|
|
17
|
+
escape_field_name,
|
|
18
|
+
record_to_splunk_http_api_json,
|
|
19
|
+
record_to_splunk_kv_line,
|
|
20
|
+
record_to_splunk_tcp_api_json,
|
|
21
|
+
)
|
|
22
|
+
from flow.record.jsonpacker import JsonRecordPacker
|
|
23
|
+
|
|
24
|
+
# These base fields are always part of the splunk output. As they are ordered
|
|
25
|
+
# and ordered last in the record fields we can append them to any check of the
|
|
26
|
+
# splunk output values.
|
|
27
|
+
BASE_FIELD_JSON_VALUES = {
|
|
28
|
+
f"{ESCAPE}_source": None,
|
|
29
|
+
f"{ESCAPE}_classification": None,
|
|
30
|
+
f"{ESCAPE}_generated": ANY,
|
|
31
|
+
}
|
|
32
|
+
BASE_FIELDS_KV_SUFFIX = f'{ESCAPE}_source=None {ESCAPE}_classification=None {ESCAPE}_generated="'
|
|
33
|
+
|
|
34
|
+
JSON_PACKER = JsonRecordPacker(pack_descriptors=False)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@pytest.fixture
|
|
38
|
+
def mock_httpx_package(monkeypatch: pytest.MonkeyPatch) -> Iterator[MagicMock]:
|
|
39
|
+
with monkeypatch.context() as m:
|
|
40
|
+
mock_httpx = MagicMock()
|
|
41
|
+
m.setitem(sys.modules, "httpx", mock_httpx)
|
|
42
|
+
|
|
43
|
+
yield mock_httpx
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
escaped_fields = list(
|
|
47
|
+
RESERVED_FIELDS.union(
|
|
48
|
+
set(["_underscore_field"]),
|
|
49
|
+
),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@pytest.mark.parametrize(
|
|
54
|
+
"field, escaped", list(zip(escaped_fields, [True] * len(escaped_fields))) + [("not_escaped", False)]
|
|
55
|
+
)
|
|
56
|
+
def test_escape_field_name(field, escaped):
|
|
57
|
+
if escaped:
|
|
58
|
+
assert escape_field_name(field) == f"{ESCAPE}{field}"
|
|
59
|
+
else:
|
|
60
|
+
assert escape_field_name(field) == field
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_splunkify_reserved_field():
|
|
64
|
+
test_record_descriptor = RecordDescriptor(
|
|
65
|
+
"test/record",
|
|
66
|
+
[("string", "rdtag")],
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
test_record = test_record_descriptor(rdtag="bar")
|
|
70
|
+
|
|
71
|
+
output_key_value = record_to_splunk_kv_line(test_record)
|
|
72
|
+
output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record)
|
|
73
|
+
output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record)
|
|
74
|
+
|
|
75
|
+
json_dict = dict(
|
|
76
|
+
{
|
|
77
|
+
"rdtag": None,
|
|
78
|
+
"rdtype": "test/record",
|
|
79
|
+
f"{ESCAPE}rdtag": "bar",
|
|
80
|
+
},
|
|
81
|
+
**BASE_FIELD_JSON_VALUES,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
assert output_key_value.startswith(f'rdtype="test/record" rdtag=None {ESCAPE}rdtag="bar" {BASE_FIELDS_KV_SUFFIX}')
|
|
85
|
+
assert json.loads(output_http_json) == {"event": json_dict}
|
|
86
|
+
assert json.loads(output_tcp_json) == json_dict
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_splunkify_normal_field():
|
|
90
|
+
test_record_descriptor = RecordDescriptor(
|
|
91
|
+
"test/record",
|
|
92
|
+
[("string", "foo")],
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
test_record = test_record_descriptor(foo="bar")
|
|
96
|
+
|
|
97
|
+
output_key_value = record_to_splunk_kv_line(test_record)
|
|
98
|
+
output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record)
|
|
99
|
+
output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record)
|
|
100
|
+
|
|
101
|
+
json_dict = dict(
|
|
102
|
+
{
|
|
103
|
+
"rdtag": None,
|
|
104
|
+
"rdtype": "test/record",
|
|
105
|
+
"foo": "bar",
|
|
106
|
+
},
|
|
107
|
+
**BASE_FIELD_JSON_VALUES,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
assert output_key_value.startswith(f'rdtype="test/record" rdtag=None foo="bar" {BASE_FIELDS_KV_SUFFIX}')
|
|
111
|
+
assert json.loads(output_http_json) == {"event": json_dict}
|
|
112
|
+
assert json.loads(output_tcp_json) == json_dict
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def test_splunkify_source_field():
|
|
116
|
+
test_record_descriptor = RecordDescriptor(
|
|
117
|
+
"test/record",
|
|
118
|
+
[("string", "source")],
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
test_record = test_record_descriptor(source="file_on_target")
|
|
122
|
+
test_record._source = "path_of_target"
|
|
123
|
+
|
|
124
|
+
output_key_value = record_to_splunk_kv_line(test_record)
|
|
125
|
+
output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record)
|
|
126
|
+
output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record)
|
|
127
|
+
|
|
128
|
+
base_fields_kv_suffix = BASE_FIELDS_KV_SUFFIX.replace(
|
|
129
|
+
f"{ESCAPE}_source=None",
|
|
130
|
+
f'{ESCAPE}_source="{test_record._source}"',
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
base_field_json_values = BASE_FIELD_JSON_VALUES.copy()
|
|
134
|
+
base_field_json_values[f"{ESCAPE}_source"] = test_record._source
|
|
135
|
+
|
|
136
|
+
json_dict = dict(
|
|
137
|
+
{
|
|
138
|
+
"rdtag": None,
|
|
139
|
+
"rdtype": "test/record",
|
|
140
|
+
f"{ESCAPE}source": "file_on_target",
|
|
141
|
+
},
|
|
142
|
+
**base_field_json_values,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
assert output_key_value.startswith(
|
|
146
|
+
f'rdtype="test/record" rdtag=None {ESCAPE}source="file_on_target" {base_fields_kv_suffix}'
|
|
147
|
+
)
|
|
148
|
+
assert json.loads(output_http_json) == {"event": json_dict}
|
|
149
|
+
assert json.loads(output_tcp_json) == json_dict
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def test_splunkify_rdtag_field():
|
|
153
|
+
test_record_descriptor = RecordDescriptor("test/record", [])
|
|
154
|
+
|
|
155
|
+
test_record = test_record_descriptor()
|
|
156
|
+
|
|
157
|
+
output_key_value = record_to_splunk_kv_line(test_record, tag="bar")
|
|
158
|
+
output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record, tag="bar")
|
|
159
|
+
output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record, tag="bar")
|
|
160
|
+
|
|
161
|
+
json_dict = dict(
|
|
162
|
+
{
|
|
163
|
+
"rdtag": "bar",
|
|
164
|
+
"rdtype": "test/record",
|
|
165
|
+
},
|
|
166
|
+
**BASE_FIELD_JSON_VALUES,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
assert output_key_value.startswith(f'rdtype="test/record" rdtag="bar" {BASE_FIELDS_KV_SUFFIX}')
|
|
170
|
+
assert json.loads(output_http_json) == {"event": json_dict}
|
|
171
|
+
assert json.loads(output_tcp_json) == json_dict
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def test_splunkify_none_field():
|
|
175
|
+
test_record_descriptor = RecordDescriptor(
|
|
176
|
+
"test/record",
|
|
177
|
+
[("string", "foo")],
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
test_record = test_record_descriptor()
|
|
181
|
+
|
|
182
|
+
output_key_value = record_to_splunk_kv_line(test_record)
|
|
183
|
+
output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record)
|
|
184
|
+
output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record)
|
|
185
|
+
|
|
186
|
+
json_dict = dict(
|
|
187
|
+
{
|
|
188
|
+
"rdtag": None,
|
|
189
|
+
"rdtype": "test/record",
|
|
190
|
+
"foo": None,
|
|
191
|
+
},
|
|
192
|
+
**BASE_FIELD_JSON_VALUES,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
assert output_key_value.startswith(f'rdtype="test/record" rdtag=None foo=None {BASE_FIELDS_KV_SUFFIX}')
|
|
196
|
+
assert json.loads(output_http_json) == {"event": json_dict}
|
|
197
|
+
assert json.loads(output_tcp_json) == json_dict
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def test_splunkify_byte_field():
|
|
201
|
+
test_record_descriptor = RecordDescriptor(
|
|
202
|
+
"test/record",
|
|
203
|
+
[("bytes", "foo")],
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
test_record = test_record_descriptor(foo=b"bar")
|
|
207
|
+
|
|
208
|
+
output_key_value = record_to_splunk_kv_line(test_record)
|
|
209
|
+
output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record)
|
|
210
|
+
output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record)
|
|
211
|
+
|
|
212
|
+
json_dict = dict(
|
|
213
|
+
{
|
|
214
|
+
"rdtag": None,
|
|
215
|
+
"rdtype": "test/record",
|
|
216
|
+
"foo": "YmFy",
|
|
217
|
+
},
|
|
218
|
+
**BASE_FIELD_JSON_VALUES,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
assert output_key_value.startswith(f'rdtype="test/record" rdtag=None foo="YmFy" {BASE_FIELDS_KV_SUFFIX}')
|
|
222
|
+
assert json.loads(output_http_json) == {"event": json_dict}
|
|
223
|
+
assert json.loads(output_tcp_json) == json_dict
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def test_splunkify_backslash_quote_field():
|
|
227
|
+
test_record_descriptor = RecordDescriptor(
|
|
228
|
+
"test/record",
|
|
229
|
+
[("string", "foo")],
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
test_record = test_record_descriptor(foo=b'\\"')
|
|
233
|
+
|
|
234
|
+
output = record_to_splunk_kv_line(test_record)
|
|
235
|
+
output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record)
|
|
236
|
+
output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record)
|
|
237
|
+
|
|
238
|
+
json_dict = dict(
|
|
239
|
+
{
|
|
240
|
+
"rdtag": None,
|
|
241
|
+
"rdtype": "test/record",
|
|
242
|
+
"foo": '\\"',
|
|
243
|
+
},
|
|
244
|
+
**BASE_FIELD_JSON_VALUES,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
assert output.startswith(f'rdtype="test/record" rdtag=None foo="\\\\\\"" {BASE_FIELDS_KV_SUFFIX}')
|
|
248
|
+
assert json.loads(output_http_json) == {"event": json_dict}
|
|
249
|
+
assert json.loads(output_tcp_json) == json_dict
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def test_record_to_splunk_http_api_json_special_fields():
|
|
253
|
+
test_record_descriptor = RecordDescriptor(
|
|
254
|
+
"test/record",
|
|
255
|
+
[
|
|
256
|
+
("datetime", "ts"),
|
|
257
|
+
("string", "hostname"),
|
|
258
|
+
("string", "foo"),
|
|
259
|
+
],
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# Datetimes should be converted to epoch
|
|
263
|
+
test_record = test_record_descriptor(ts=datetime.datetime(1970, 1, 1, 4, 0), hostname="RECYCLOPS", foo="bar")
|
|
264
|
+
|
|
265
|
+
output = record_to_splunk_http_api_json(JSON_PACKER, test_record)
|
|
266
|
+
assert '"time": 14400.0,' in output
|
|
267
|
+
assert '"host": "RECYCLOPS"' in output
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def test_tcp_protocol_records_sourcetype():
|
|
271
|
+
with patch("socket.socket") as mock_socket:
|
|
272
|
+
tcp_writer = SplunkWriter("splunk:1337")
|
|
273
|
+
assert tcp_writer.host == "splunk"
|
|
274
|
+
assert tcp_writer.port == 1337
|
|
275
|
+
assert tcp_writer.protocol == Protocol.TCP
|
|
276
|
+
assert tcp_writer.sourcetype == SourceType.RECORDS
|
|
277
|
+
|
|
278
|
+
mock_socket.assert_called()
|
|
279
|
+
mock_socket.return_value.connect.assert_called_with(("splunk", 1337))
|
|
280
|
+
|
|
281
|
+
test_record_descriptor = RecordDescriptor(
|
|
282
|
+
"test/record",
|
|
283
|
+
[("string", "foo")],
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
test_record = test_record_descriptor(foo="bar")
|
|
287
|
+
tcp_writer.write(test_record)
|
|
288
|
+
|
|
289
|
+
args, _ = mock_socket.return_value.sendall.call_args
|
|
290
|
+
written_to_splunk = args[0]
|
|
291
|
+
|
|
292
|
+
assert written_to_splunk.startswith(
|
|
293
|
+
b'rdtype="test/record" rdtag=None foo="bar" ' + BASE_FIELDS_KV_SUFFIX.encode()
|
|
294
|
+
)
|
|
295
|
+
assert written_to_splunk.endswith(b'"\n')
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def test_tcp_protocol_json_sourcetype():
|
|
299
|
+
with patch("socket.socket") as mock_socket:
|
|
300
|
+
tcp_writer = SplunkWriter("splunk:1337", sourcetype="json")
|
|
301
|
+
assert tcp_writer.host == "splunk"
|
|
302
|
+
assert tcp_writer.port == 1337
|
|
303
|
+
assert tcp_writer.protocol == Protocol.TCP
|
|
304
|
+
assert tcp_writer.sourcetype == SourceType.JSON
|
|
305
|
+
|
|
306
|
+
mock_socket.assert_called()
|
|
307
|
+
mock_socket.return_value.connect.assert_called_with(("splunk", 1337))
|
|
308
|
+
|
|
309
|
+
test_record_descriptor = RecordDescriptor(
|
|
310
|
+
"test/record",
|
|
311
|
+
[("string", "foo")],
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
test_record = test_record_descriptor(foo="bar")
|
|
315
|
+
tcp_writer.write(test_record)
|
|
316
|
+
|
|
317
|
+
args, _ = mock_socket.return_value.sendall.call_args
|
|
318
|
+
written_to_splunk = args[0]
|
|
319
|
+
|
|
320
|
+
json_dict = dict(
|
|
321
|
+
{
|
|
322
|
+
"rdtag": None,
|
|
323
|
+
"rdtype": "test/record",
|
|
324
|
+
"foo": "bar",
|
|
325
|
+
},
|
|
326
|
+
**BASE_FIELD_JSON_VALUES,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
assert json.loads(written_to_splunk) == json_dict
|
|
330
|
+
assert written_to_splunk.endswith(b"\n")
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def test_https_protocol_records_sourcetype(mock_httpx_package: MagicMock):
|
|
334
|
+
if "flow.record.adapter.splunk" in sys.modules:
|
|
335
|
+
del sys.modules["flow.record.adapter.splunk"]
|
|
336
|
+
|
|
337
|
+
from flow.record.adapter.splunk import Protocol, SourceType, SplunkWriter
|
|
338
|
+
|
|
339
|
+
with patch.object(
|
|
340
|
+
flow.record.adapter.splunk,
|
|
341
|
+
"HAS_HTTPX",
|
|
342
|
+
True,
|
|
343
|
+
):
|
|
344
|
+
mock_httpx_package.Client.return_value.post.return_value.status_code = 200
|
|
345
|
+
https_writer = SplunkWriter("https://splunk:8088", token="password123")
|
|
346
|
+
|
|
347
|
+
assert https_writer.host == "splunk"
|
|
348
|
+
assert https_writer.protocol == Protocol.HTTPS
|
|
349
|
+
assert https_writer.sourcetype == SourceType.RECORDS
|
|
350
|
+
assert https_writer.verify is True
|
|
351
|
+
assert https_writer.url == "https://splunk:8088/services/collector/raw?auto_extract_timestamp=true"
|
|
352
|
+
|
|
353
|
+
_, kwargs = mock_httpx_package.Client.call_args
|
|
354
|
+
assert kwargs["verify"] is True
|
|
355
|
+
|
|
356
|
+
given_headers = kwargs["headers"]
|
|
357
|
+
assert given_headers["Authorization"] == "Splunk password123"
|
|
358
|
+
assert "X-Splunk-Request-Channel" in given_headers
|
|
359
|
+
|
|
360
|
+
test_record_descriptor = RecordDescriptor(
|
|
361
|
+
"test/record",
|
|
362
|
+
[("string", "foo")],
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
test_record = test_record_descriptor(foo="bar")
|
|
366
|
+
https_writer.write(test_record)
|
|
367
|
+
|
|
368
|
+
mock_httpx_package.Client.return_value.post.assert_not_called()
|
|
369
|
+
|
|
370
|
+
https_writer.close()
|
|
371
|
+
mock_httpx_package.Client.return_value.post.assert_called_with(
|
|
372
|
+
"https://splunk:8088/services/collector/raw?auto_extract_timestamp=true",
|
|
373
|
+
data=ANY,
|
|
374
|
+
)
|
|
375
|
+
_, kwargs = mock_httpx_package.Client.return_value.post.call_args
|
|
376
|
+
sent_data = kwargs["data"]
|
|
377
|
+
assert sent_data.startswith(b'rdtype="test/record" rdtag=None foo="bar" ' + BASE_FIELDS_KV_SUFFIX.encode())
|
|
378
|
+
assert sent_data.endswith(b'"\n')
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def test_https_protocol_json_sourcetype(mock_httpx_package: MagicMock):
|
|
382
|
+
if "flow.record.adapter.splunk" in sys.modules:
|
|
383
|
+
del sys.modules["flow.record.adapter.splunk"]
|
|
384
|
+
|
|
385
|
+
from flow.record.adapter.splunk import SplunkWriter
|
|
386
|
+
|
|
387
|
+
with patch.object(
|
|
388
|
+
flow.record.adapter.splunk,
|
|
389
|
+
"HAS_HTTPX",
|
|
390
|
+
True,
|
|
391
|
+
):
|
|
392
|
+
mock_httpx_package.Client.return_value.post.return_value.status_code = 200
|
|
393
|
+
|
|
394
|
+
https_writer = SplunkWriter("https://splunk:8088", token="password123", sourcetype="json")
|
|
395
|
+
|
|
396
|
+
test_record_descriptor = RecordDescriptor(
|
|
397
|
+
"test/record",
|
|
398
|
+
[("string", "foo")],
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
https_writer.write(test_record_descriptor(foo="bar"))
|
|
402
|
+
https_writer.write(test_record_descriptor(foo="baz"))
|
|
403
|
+
mock_httpx_package.Client.return_value.post.assert_not_called()
|
|
404
|
+
|
|
405
|
+
https_writer.close()
|
|
406
|
+
mock_httpx_package.Client.return_value.post.assert_called_with(
|
|
407
|
+
"https://splunk:8088/services/collector/event?auto_extract_timestamp=true",
|
|
408
|
+
data=ANY,
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
_, kwargs = mock_httpx_package.Client.return_value.post.call_args
|
|
412
|
+
sent_data = kwargs["data"]
|
|
413
|
+
first_record_json, _, second_record_json = sent_data.partition(b"\n")
|
|
414
|
+
assert json.loads(first_record_json) == {
|
|
415
|
+
"event": dict(
|
|
416
|
+
{
|
|
417
|
+
"rdtag": None,
|
|
418
|
+
"rdtype": "test/record",
|
|
419
|
+
"foo": "bar",
|
|
420
|
+
},
|
|
421
|
+
**BASE_FIELD_JSON_VALUES,
|
|
422
|
+
)
|
|
423
|
+
}
|
|
424
|
+
assert json.loads(second_record_json) == {
|
|
425
|
+
"event": dict(
|
|
426
|
+
{
|
|
427
|
+
"rdtag": None,
|
|
428
|
+
"rdtype": "test/record",
|
|
429
|
+
"foo": "baz",
|
|
430
|
+
},
|
|
431
|
+
**BASE_FIELD_JSON_VALUES,
|
|
432
|
+
)
|
|
433
|
+
}
|