flow.record 3.15.dev13__tar.gz → 3.15.dev15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flow.record-3.15.dev13/flow.record.egg-info → flow.record-3.15.dev15}/PKG-INFO +3 -1
- flow.record-3.15.dev15/flow/record/adapter/splunk.py +282 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/version.py +2 -2
- {flow.record-3.15.dev13 → flow.record-3.15.dev15/flow.record.egg-info}/PKG-INFO +3 -1
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow.record.egg-info/requires.txt +3 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/pyproject.toml +3 -0
- flow.record-3.15.dev15/tests/test_splunk_adapter.py +403 -0
- flow.record-3.15.dev13/flow/record/adapter/splunk.py +0 -90
- flow.record-3.15.dev13/tests/test_splunk_adapter.py +0 -104
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/COPYRIGHT +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/LICENSE +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/MANIFEST.in +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/README.md +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/examples/filesystem.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/examples/passivedns.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/examples/records.json +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/examples/tcpconn.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/__init__.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/__init__.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/archive.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/avro.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/broker.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/csvfile.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/duckdb.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/elastic.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/jsonfile.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/line.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/mongo.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/split.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/sqlite.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/stream.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/text.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/adapter/xlsx.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/base.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/exceptions.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/fieldtypes/__init__.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/fieldtypes/credential.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/fieldtypes/net/__init__.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/fieldtypes/net/ip.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/fieldtypes/net/ipv4.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/fieldtypes/net/tcp.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/fieldtypes/net/udp.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/jsonpacker.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/packer.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/selector.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/stream.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/tools/__init__.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/tools/geoip.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/tools/rdump.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/utils.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow/record/whitelist.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow.record.egg-info/SOURCES.txt +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow.record.egg-info/entry_points.txt +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/flow.record.egg-info/top_level.txt +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/setup.cfg +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/__init__.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/_utils.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/docs/Makefile +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/docs/conf.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/docs/index.rst +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/selector_explain_example.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/standalone_test.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_avro.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_avro_adapter.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_compiled_selector.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_csv_adapter.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_deprecations.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_fieldtype_ip.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_fieldtypes.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_json_packer.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_json_record_adapter.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_multi_timestamp.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_packer.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_rdump.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_record.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_record_adapter.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_record_descriptor.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_regression.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_selector.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/test_sqlite_duckdb_adapter.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tests/utils_inspect.py +0 -0
- {flow.record-3.15.dev13 → flow.record-3.15.dev15}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.15.
|
|
3
|
+
Version: 3.15.dev15
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -35,6 +35,8 @@ Requires-Dist: fastavro[snappy]; extra == "avro"
|
|
|
35
35
|
Provides-Extra: duckdb
|
|
36
36
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
37
37
|
Requires-Dist: pytz; extra == "duckdb"
|
|
38
|
+
Provides-Extra: splunk
|
|
39
|
+
Requires-Dist: httpx; extra == "splunk"
|
|
38
40
|
Provides-Extra: test
|
|
39
41
|
Requires-Dist: flow.record[compression]; extra == "test"
|
|
40
42
|
Requires-Dist: flow.record[avro]; extra == "test"
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import socket
|
|
4
|
+
import uuid
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Optional
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import httpx
|
|
12
|
+
|
|
13
|
+
HAS_HTTPX = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
HAS_HTTPX = False
|
|
16
|
+
|
|
17
|
+
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
18
|
+
from flow.record.base import Record
|
|
19
|
+
from flow.record.jsonpacker import JsonRecordPacker
|
|
20
|
+
from flow.record.utils import to_base64, to_bytes, to_str
|
|
21
|
+
|
|
22
|
+
__usage__ = """
|
|
23
|
+
Splunk output adapter (writer only)
|
|
24
|
+
---
|
|
25
|
+
Write usage: rdump -w splunk+[PROTOCOL]://[IP]:[PORT]?tag=[TAG]&token=[TOKEN]&sourcetype=[SOURCETYPE]
|
|
26
|
+
[PROTOCOL]: Protocol to use for forwarding data. Can be tcp, http or https, defaults to tcp if omitted.
|
|
27
|
+
[IP]:[PORT]: ip and port to a splunk instance
|
|
28
|
+
[TAG]: optional value to add as "rdtag" output field when writing
|
|
29
|
+
[TOKEN]: Authentication token for sending data over HTTP(S)
|
|
30
|
+
[SOURCETYPE]: Set sourcetype of data. Defaults to records, but can also be set to JSON.
|
|
31
|
+
[SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTP(S). Defaults to True.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
log = logging.getLogger(__package__)
|
|
35
|
+
|
|
36
|
+
# Amount of records to bundle into a single request when sending data over HTTP(S).
|
|
37
|
+
RECORD_BUFFER_LIMIT = 20
|
|
38
|
+
|
|
39
|
+
# https://docs.splunk.com/Documentation/Splunk/7.3.1/Data/Configureindex-timefieldextraction
|
|
40
|
+
RESERVED_SPLUNK_FIELDS = [
|
|
41
|
+
"_indextime",
|
|
42
|
+
"_time",
|
|
43
|
+
"index",
|
|
44
|
+
"punct",
|
|
45
|
+
"source",
|
|
46
|
+
"sourcetype",
|
|
47
|
+
"tag",
|
|
48
|
+
"type",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
RESERVED_RECORD_FIELDS = ["_classification", "_generated", "_source"]
|
|
52
|
+
|
|
53
|
+
PREFIX_WITH_RD = set(RESERVED_SPLUNK_FIELDS + RESERVED_RECORD_FIELDS)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class Protocol(Enum):
|
|
57
|
+
HTTP = "http"
|
|
58
|
+
HTTPS = "https"
|
|
59
|
+
TCP = "tcp"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class SourceType(Enum):
|
|
63
|
+
JSON = "json"
|
|
64
|
+
RECORDS = "records"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
|
|
68
|
+
ret = []
|
|
69
|
+
|
|
70
|
+
ret.append(f'rdtype="{record._desc.name}"')
|
|
71
|
+
|
|
72
|
+
if tag is None:
|
|
73
|
+
ret.append("rdtag=None")
|
|
74
|
+
else:
|
|
75
|
+
ret.append(f'rdtag="{tag}"')
|
|
76
|
+
|
|
77
|
+
for field in record._desc.get_all_fields():
|
|
78
|
+
# Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
|
|
79
|
+
if field == "_version":
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
val = getattr(record, field)
|
|
83
|
+
|
|
84
|
+
if field in PREFIX_WITH_RD:
|
|
85
|
+
field = f"rd_{field}"
|
|
86
|
+
|
|
87
|
+
if val is None:
|
|
88
|
+
ret.append(f"{field}=None")
|
|
89
|
+
else:
|
|
90
|
+
val = to_base64(val) if isinstance(val, bytes) else to_str(val)
|
|
91
|
+
val = val.replace("\\", "\\\\").replace('"', '\\"')
|
|
92
|
+
ret.append(f'{field}="{val}"')
|
|
93
|
+
|
|
94
|
+
return " ".join(ret)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def splunkify_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
|
|
98
|
+
ret = {}
|
|
99
|
+
|
|
100
|
+
indexer_fields = [
|
|
101
|
+
("host", "host"),
|
|
102
|
+
("host", "hostname"),
|
|
103
|
+
("time", "ts"),
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
# When converting a record to json text for splunk, we distinguish between the 'event' (containing the data) and a
|
|
107
|
+
# few other fields that are splunk-specific for indexing. We add those 'indexer_fields' to the return object first.
|
|
108
|
+
for splunk_name, field_name in indexer_fields:
|
|
109
|
+
if hasattr(record, field_name):
|
|
110
|
+
val = getattr(record, field_name)
|
|
111
|
+
if val:
|
|
112
|
+
if isinstance(val, datetime):
|
|
113
|
+
# Convert datetime objects to epoch timestamp for reserved fields.
|
|
114
|
+
ret[splunk_name] = val.timestamp()
|
|
115
|
+
continue
|
|
116
|
+
ret[splunk_name] = to_str(val)
|
|
117
|
+
|
|
118
|
+
record_as_dict = packer.pack_obj(record)
|
|
119
|
+
|
|
120
|
+
# Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
|
|
121
|
+
del record_as_dict["_version"]
|
|
122
|
+
|
|
123
|
+
# These fields end up in the 'event', but we have a few reserved field names. If those field names are in the
|
|
124
|
+
# record, we prefix them with 'rd_' (short for record descriptor)
|
|
125
|
+
for field in PREFIX_WITH_RD:
|
|
126
|
+
if field not in record_as_dict:
|
|
127
|
+
continue
|
|
128
|
+
new_field = f"rd_{field}"
|
|
129
|
+
|
|
130
|
+
record_as_dict[new_field] = record_as_dict[field]
|
|
131
|
+
del record_as_dict[field]
|
|
132
|
+
|
|
133
|
+
# Almost done, just have to add the tag and the type (i.e the record descriptor's name) to the event.
|
|
134
|
+
record_as_dict["rdtag"] = tag
|
|
135
|
+
|
|
136
|
+
# Yes.
|
|
137
|
+
record_as_dict["rdtype"] = record._desc.name
|
|
138
|
+
|
|
139
|
+
ret["event"] = record_as_dict
|
|
140
|
+
return json.dumps(ret, default=packer.pack_obj)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class SplunkWriter(AbstractWriter):
|
|
144
|
+
sock = None
|
|
145
|
+
session = None
|
|
146
|
+
|
|
147
|
+
def __init__(
|
|
148
|
+
self,
|
|
149
|
+
uri: str,
|
|
150
|
+
tag: Optional[str] = None,
|
|
151
|
+
token: Optional[str] = None,
|
|
152
|
+
sourcetype: Optional[str] = None,
|
|
153
|
+
ssl_verify: bool = True,
|
|
154
|
+
**kwargs,
|
|
155
|
+
):
|
|
156
|
+
# If the writer is initiated without a protocol, we assume we will be writing over tcp
|
|
157
|
+
if "://" not in uri:
|
|
158
|
+
uri = f"tcp://{uri}"
|
|
159
|
+
|
|
160
|
+
if sourcetype is None:
|
|
161
|
+
log.warning("No sourcetype provided, assuming 'records' sourcetype")
|
|
162
|
+
sourcetype = SourceType.RECORDS
|
|
163
|
+
|
|
164
|
+
parsed_url = urlparse(uri)
|
|
165
|
+
url_scheme = parsed_url.scheme.lower()
|
|
166
|
+
|
|
167
|
+
self.sourcetype = SourceType(sourcetype)
|
|
168
|
+
self.protocol = Protocol(url_scheme)
|
|
169
|
+
|
|
170
|
+
if self.protocol == Protocol.TCP and self.sourcetype != SourceType.RECORDS:
|
|
171
|
+
raise ValueError("For sending data to Splunk over TCP, only the 'records' sourcetype is allowed")
|
|
172
|
+
|
|
173
|
+
self.host = parsed_url.hostname
|
|
174
|
+
self.port = parsed_url.port
|
|
175
|
+
self.tag = tag
|
|
176
|
+
self.record_buffer = []
|
|
177
|
+
self._warned = False
|
|
178
|
+
self.packer = None
|
|
179
|
+
|
|
180
|
+
if self.sourcetype == SourceType.JSON:
|
|
181
|
+
self.packer = JsonRecordPacker(indent=4, pack_descriptors=False)
|
|
182
|
+
|
|
183
|
+
if self.protocol == Protocol.TCP:
|
|
184
|
+
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP)
|
|
185
|
+
self.sock.connect((self.host, self.port))
|
|
186
|
+
self._send = self._send_tcp
|
|
187
|
+
elif self.protocol in (Protocol.HTTP, Protocol.HTTPS):
|
|
188
|
+
if not HAS_HTTPX:
|
|
189
|
+
raise ImportError("The httpx library is required for sending data over HTTP(S)")
|
|
190
|
+
|
|
191
|
+
scheme = self.protocol.value
|
|
192
|
+
self.token = token
|
|
193
|
+
if not self.token:
|
|
194
|
+
raise ValueError("An authorization token is required for the HTTP collector")
|
|
195
|
+
if not self.token.startswith("Splunk "):
|
|
196
|
+
self.token = f"Splunk {self.token}"
|
|
197
|
+
|
|
198
|
+
# Assume verify=True unless specified otherwise.
|
|
199
|
+
self.verify = str(ssl_verify).lower() not in ("0", "false")
|
|
200
|
+
if not self.verify:
|
|
201
|
+
log.warning("Certificate verification is disabled")
|
|
202
|
+
|
|
203
|
+
endpoint = "event" if self.sourcetype != SourceType.RECORDS else "raw"
|
|
204
|
+
port = f":{self.port}" if self.port else ""
|
|
205
|
+
self.url = f"{scheme}://{self.host}{port}/services/collector/{endpoint}?auto_extract_timestamp=true"
|
|
206
|
+
|
|
207
|
+
self.headers = {
|
|
208
|
+
"Authorization": self.token,
|
|
209
|
+
# A randomized value so that Splunk can loadbalance between different incoming datastreams
|
|
210
|
+
"X-Splunk-Request-Channel": str(uuid.uuid4()),
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
self.session = httpx.Client(verify=self.verify, headers=self.headers)
|
|
214
|
+
|
|
215
|
+
self._send = self._send_http
|
|
216
|
+
|
|
217
|
+
def _cache_records_for_http(self, data: Optional[bytes] = None, flush: bool = False) -> Optional[bytes]:
|
|
218
|
+
# It's possible to call this function without any data, purely to flush. Hence this check.
|
|
219
|
+
if data:
|
|
220
|
+
self.record_buffer.append(data)
|
|
221
|
+
if len(self.record_buffer) < RECORD_BUFFER_LIMIT and not flush:
|
|
222
|
+
# Buffer limit not exceeded yet, so we do not return a buffer yet, unless buffer is explicitly flushed.
|
|
223
|
+
return
|
|
224
|
+
buf = b"".join(self.record_buffer)
|
|
225
|
+
if not buf:
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
# We're going to be returning a buffer for the writer to send, so we can clear the internal record buffer.
|
|
229
|
+
self.record_buffer.clear()
|
|
230
|
+
return buf
|
|
231
|
+
|
|
232
|
+
def _send(self, data: bytes) -> None:
|
|
233
|
+
raise RuntimeError("This method should be overridden at runtime")
|
|
234
|
+
|
|
235
|
+
def _send_http(self, data: Optional[bytes] = None, flush: bool = False) -> None:
|
|
236
|
+
buf = self._cache_records_for_http(data, flush)
|
|
237
|
+
if not buf:
|
|
238
|
+
return
|
|
239
|
+
response = self.session.post(self.url, data=buf)
|
|
240
|
+
if response.status_code != 200:
|
|
241
|
+
raise ConnectionError(f"{response.text} ({response.status_code})")
|
|
242
|
+
|
|
243
|
+
def _send_tcp(self, data: bytes) -> None:
|
|
244
|
+
self.sock.sendall(data)
|
|
245
|
+
|
|
246
|
+
def write(self, record: Record) -> None:
|
|
247
|
+
if not self._warned and "rdtag" in record._desc.fields:
|
|
248
|
+
self._warned = True
|
|
249
|
+
log.warning(
|
|
250
|
+
"Record has 'rdtag' field which conflicts with the Splunk adapter -- "
|
|
251
|
+
"Splunk output will have duplicate 'rdtag' fields",
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
if self.sourcetype == SourceType.RECORDS:
|
|
255
|
+
rec = splunkify_key_value(record, self.tag)
|
|
256
|
+
else:
|
|
257
|
+
rec = splunkify_json(self.packer, record, self.tag)
|
|
258
|
+
|
|
259
|
+
# Trail with a newline for line breaking.
|
|
260
|
+
data = to_bytes(rec) + b"\n"
|
|
261
|
+
|
|
262
|
+
self._send(data)
|
|
263
|
+
|
|
264
|
+
def flush(self) -> None:
|
|
265
|
+
if self.protocol in [Protocol.HTTP, Protocol.HTTPS]:
|
|
266
|
+
self._send_http(flush=True)
|
|
267
|
+
|
|
268
|
+
def close(self) -> None:
|
|
269
|
+
# For TCP
|
|
270
|
+
if self.sock:
|
|
271
|
+
self.sock.close()
|
|
272
|
+
self.sock = None
|
|
273
|
+
|
|
274
|
+
if self.session:
|
|
275
|
+
self.flush()
|
|
276
|
+
self.session.close()
|
|
277
|
+
self.session = None
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class SplunkReader(AbstractReader):
|
|
281
|
+
def __init__(self, path, selector=None, **kwargs):
|
|
282
|
+
raise NotImplementedError()
|
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '3.15.
|
|
16
|
-
__version_tuple__ = version_tuple = (3, 15, '
|
|
15
|
+
__version__ = version = '3.15.dev15'
|
|
16
|
+
__version_tuple__ = version_tuple = (3, 15, 'dev15')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.15.
|
|
3
|
+
Version: 3.15.dev15
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -35,6 +35,8 @@ Requires-Dist: fastavro[snappy]; extra == "avro"
|
|
|
35
35
|
Provides-Extra: duckdb
|
|
36
36
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
37
37
|
Requires-Dist: pytz; extra == "duckdb"
|
|
38
|
+
Provides-Extra: splunk
|
|
39
|
+
Requires-Dist: httpx; extra == "splunk"
|
|
38
40
|
Provides-Extra: test
|
|
39
41
|
Requires-Dist: flow.record[compression]; extra == "test"
|
|
40
42
|
Requires-Dist: flow.record[avro]; extra == "test"
|
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import json
|
|
3
|
+
import sys
|
|
4
|
+
from typing import Iterator
|
|
5
|
+
from unittest.mock import ANY, MagicMock, patch
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
import flow.record.adapter.splunk
|
|
10
|
+
from flow.record import RecordDescriptor
|
|
11
|
+
from flow.record.adapter.splunk import (
|
|
12
|
+
Protocol,
|
|
13
|
+
SplunkWriter,
|
|
14
|
+
splunkify_json,
|
|
15
|
+
splunkify_key_value,
|
|
16
|
+
)
|
|
17
|
+
from flow.record.jsonpacker import JsonRecordPacker
|
|
18
|
+
|
|
19
|
+
BASE_FIELD_VALUES = {
|
|
20
|
+
"rd__source": None,
|
|
21
|
+
"rd__classification": None,
|
|
22
|
+
"rd__generated": ANY,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
JSON_PACKER = JsonRecordPacker(pack_descriptors=False)
|
|
26
|
+
|
|
27
|
+
# Reserved fields is an ordered dict so we can make assertions with a static order of reserved fields.
|
|
28
|
+
RESERVED_FIELDS_KEY_VALUE_SUFFIX = 'rd__source=None rd__classification=None rd__generated="'
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pytest.fixture
|
|
32
|
+
def mock_httpx_package(monkeypatch: pytest.MonkeyPatch) -> Iterator[MagicMock]:
|
|
33
|
+
with monkeypatch.context() as m:
|
|
34
|
+
mock_httpx = MagicMock()
|
|
35
|
+
m.setitem(sys.modules, "httpx", mock_httpx)
|
|
36
|
+
|
|
37
|
+
yield mock_httpx
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_splunkify_reserved_field():
|
|
41
|
+
with patch.object(
|
|
42
|
+
flow.record.adapter.splunk,
|
|
43
|
+
"PREFIX_WITH_RD",
|
|
44
|
+
set(["foo", "_generated", "_classification", "_source"]),
|
|
45
|
+
):
|
|
46
|
+
test_record_descriptor = RecordDescriptor(
|
|
47
|
+
"test/record",
|
|
48
|
+
[("string", "foo")],
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
test_record = test_record_descriptor(foo="bar")
|
|
52
|
+
|
|
53
|
+
output_key_value = splunkify_key_value(test_record)
|
|
54
|
+
output_json = splunkify_json(JSON_PACKER, test_record)
|
|
55
|
+
|
|
56
|
+
assert output_key_value.startswith(
|
|
57
|
+
f'rdtype="test/record" rdtag=None rd_foo="bar" {RESERVED_FIELDS_KEY_VALUE_SUFFIX}'
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
assert json.loads(output_json) == {
|
|
61
|
+
"event": dict(
|
|
62
|
+
{
|
|
63
|
+
"rdtag": None,
|
|
64
|
+
"rdtype": "test/record",
|
|
65
|
+
"rd_foo": "bar",
|
|
66
|
+
},
|
|
67
|
+
**BASE_FIELD_VALUES,
|
|
68
|
+
)
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_splunkify_normal_field():
|
|
73
|
+
with patch.object(
|
|
74
|
+
flow.record.adapter.splunk,
|
|
75
|
+
"RESERVED_SPLUNK_FIELDS",
|
|
76
|
+
set(),
|
|
77
|
+
):
|
|
78
|
+
test_record_descriptor = RecordDescriptor(
|
|
79
|
+
"test/record",
|
|
80
|
+
[("string", "foo")],
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
test_record = test_record_descriptor(foo="bar")
|
|
84
|
+
|
|
85
|
+
output_key_value = splunkify_key_value(test_record)
|
|
86
|
+
output_json = splunkify_json(JSON_PACKER, test_record)
|
|
87
|
+
assert output_key_value.startswith(
|
|
88
|
+
f'rdtype="test/record" rdtag=None foo="bar" {RESERVED_FIELDS_KEY_VALUE_SUFFIX}'
|
|
89
|
+
)
|
|
90
|
+
assert json.loads(output_json) == {
|
|
91
|
+
"event": dict(
|
|
92
|
+
{
|
|
93
|
+
"rdtag": None,
|
|
94
|
+
"rdtype": "test/record",
|
|
95
|
+
"foo": "bar",
|
|
96
|
+
},
|
|
97
|
+
**BASE_FIELD_VALUES,
|
|
98
|
+
)
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def test_splunkify_source_field():
|
|
103
|
+
with patch.object(
|
|
104
|
+
flow.record.adapter.splunk,
|
|
105
|
+
"RESERVED_SPLUNK_FIELDS",
|
|
106
|
+
set(),
|
|
107
|
+
):
|
|
108
|
+
test_record_descriptor = RecordDescriptor(
|
|
109
|
+
"test/record",
|
|
110
|
+
[("string", "source")],
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
test_record = test_record_descriptor(source="file_on_target")
|
|
114
|
+
test_record._source = "path_of_target"
|
|
115
|
+
|
|
116
|
+
output_key_value = splunkify_key_value(test_record)
|
|
117
|
+
output_json = splunkify_json(JSON_PACKER, test_record)
|
|
118
|
+
assert output_key_value.startswith(
|
|
119
|
+
'rdtype="test/record" rdtag=None rd_source="file_on_target" rd__source="path_of_target"'
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
expected_base_field_values = BASE_FIELD_VALUES.copy()
|
|
123
|
+
expected_base_field_values["rd__source"] = "path_of_target"
|
|
124
|
+
|
|
125
|
+
assert json.loads(output_json) == {
|
|
126
|
+
"event": dict(
|
|
127
|
+
{
|
|
128
|
+
"rdtag": None,
|
|
129
|
+
"rdtype": "test/record",
|
|
130
|
+
"rd_source": "file_on_target",
|
|
131
|
+
"rd__source": "path_of_target",
|
|
132
|
+
"rd__generated": ANY,
|
|
133
|
+
"rd__classification": None,
|
|
134
|
+
},
|
|
135
|
+
),
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_splunkify_rdtag_field():
|
|
140
|
+
with patch.object(
|
|
141
|
+
flow.record.adapter.splunk,
|
|
142
|
+
"RESERVED_SPLUNK_FIELDS",
|
|
143
|
+
set(),
|
|
144
|
+
):
|
|
145
|
+
test_record_descriptor = RecordDescriptor("test/record", [])
|
|
146
|
+
|
|
147
|
+
test_record = test_record_descriptor()
|
|
148
|
+
|
|
149
|
+
output_key_value = splunkify_key_value(test_record, tag="bar")
|
|
150
|
+
output_json = splunkify_json(JSON_PACKER, test_record, tag="bar")
|
|
151
|
+
assert output_key_value.startswith(f'rdtype="test/record" rdtag="bar" {RESERVED_FIELDS_KEY_VALUE_SUFFIX}')
|
|
152
|
+
assert json.loads(output_json) == {
|
|
153
|
+
"event": dict(
|
|
154
|
+
{
|
|
155
|
+
"rdtag": "bar",
|
|
156
|
+
"rdtype": "test/record",
|
|
157
|
+
},
|
|
158
|
+
**BASE_FIELD_VALUES,
|
|
159
|
+
)
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def test_splunkify_none_field():
|
|
164
|
+
with patch.object(
|
|
165
|
+
flow.record.adapter.splunk,
|
|
166
|
+
"RESERVED_SPLUNK_FIELDS",
|
|
167
|
+
set(),
|
|
168
|
+
):
|
|
169
|
+
test_record_descriptor = RecordDescriptor(
|
|
170
|
+
"test/record",
|
|
171
|
+
[("string", "foo")],
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
test_record = test_record_descriptor()
|
|
175
|
+
|
|
176
|
+
output_key_value = splunkify_key_value(test_record)
|
|
177
|
+
output_json = splunkify_json(JSON_PACKER, test_record)
|
|
178
|
+
assert output_key_value.startswith(
|
|
179
|
+
f'rdtype="test/record" rdtag=None foo=None {RESERVED_FIELDS_KEY_VALUE_SUFFIX}'
|
|
180
|
+
)
|
|
181
|
+
assert json.loads(output_json) == {
|
|
182
|
+
"event": dict(
|
|
183
|
+
{
|
|
184
|
+
"rdtag": None,
|
|
185
|
+
"rdtype": "test/record",
|
|
186
|
+
"foo": None,
|
|
187
|
+
},
|
|
188
|
+
**BASE_FIELD_VALUES,
|
|
189
|
+
)
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def test_splunkify_byte_field():
|
|
194
|
+
with patch.object(
|
|
195
|
+
flow.record.adapter.splunk,
|
|
196
|
+
"RESERVED_SPLUNK_FIELDS",
|
|
197
|
+
set(),
|
|
198
|
+
):
|
|
199
|
+
test_record_descriptor = RecordDescriptor(
|
|
200
|
+
"test/record",
|
|
201
|
+
[("bytes", "foo")],
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
test_record = test_record_descriptor(foo=b"bar")
|
|
205
|
+
|
|
206
|
+
output_key_value = splunkify_key_value(test_record)
|
|
207
|
+
output_json = splunkify_json(JSON_PACKER, test_record)
|
|
208
|
+
assert output_key_value.startswith(
|
|
209
|
+
f'rdtype="test/record" rdtag=None foo="YmFy" {RESERVED_FIELDS_KEY_VALUE_SUFFIX}'
|
|
210
|
+
)
|
|
211
|
+
assert json.loads(output_json) == {
|
|
212
|
+
"event": dict(
|
|
213
|
+
{
|
|
214
|
+
"rdtag": None,
|
|
215
|
+
"rdtype": "test/record",
|
|
216
|
+
"foo": "YmFy",
|
|
217
|
+
},
|
|
218
|
+
**BASE_FIELD_VALUES,
|
|
219
|
+
)
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def test_splunkify_backslash_quote_field():
|
|
224
|
+
with patch.object(
|
|
225
|
+
flow.record.adapter.splunk,
|
|
226
|
+
"RESERVED_SPLUNK_FIELDS",
|
|
227
|
+
set(),
|
|
228
|
+
):
|
|
229
|
+
test_record_descriptor = RecordDescriptor(
|
|
230
|
+
"test/record",
|
|
231
|
+
[("string", "foo")],
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
test_record = test_record_descriptor(foo=b'\\"')
|
|
235
|
+
|
|
236
|
+
output = splunkify_key_value(test_record)
|
|
237
|
+
output_json = splunkify_json(JSON_PACKER, test_record)
|
|
238
|
+
assert output.startswith(f'rdtype="test/record" rdtag=None foo="\\\\\\"" {RESERVED_FIELDS_KEY_VALUE_SUFFIX}')
|
|
239
|
+
assert json.loads(output_json) == {
|
|
240
|
+
"event": dict(
|
|
241
|
+
{
|
|
242
|
+
"rdtag": None,
|
|
243
|
+
"rdtype": "test/record",
|
|
244
|
+
"foo": '\\"',
|
|
245
|
+
},
|
|
246
|
+
**BASE_FIELD_VALUES,
|
|
247
|
+
)
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def test_splunkify_json_special_fields():
|
|
252
|
+
with patch.object(
|
|
253
|
+
flow.record.adapter.splunk,
|
|
254
|
+
"RESERVED_SPLUNK_FIELDS",
|
|
255
|
+
set(),
|
|
256
|
+
):
|
|
257
|
+
test_record_descriptor = RecordDescriptor(
|
|
258
|
+
"test/record",
|
|
259
|
+
[
|
|
260
|
+
("datetime", "ts"),
|
|
261
|
+
("string", "hostname"),
|
|
262
|
+
("string", "foo"),
|
|
263
|
+
],
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Datetimes should be converted to epoch
|
|
267
|
+
test_record = test_record_descriptor(ts=datetime.datetime(1970, 1, 1, 4, 0), hostname="RECYCLOPS", foo="bar")
|
|
268
|
+
|
|
269
|
+
output = splunkify_json(JSON_PACKER, test_record)
|
|
270
|
+
assert '"time": 14400.0,' in output
|
|
271
|
+
assert '"host": "RECYCLOPS"' in output
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def test_tcp_protocol():
|
|
275
|
+
with patch("socket.socket") as mock_socket:
|
|
276
|
+
tcp_writer = SplunkWriter("splunk:1337")
|
|
277
|
+
assert tcp_writer.host == "splunk"
|
|
278
|
+
assert tcp_writer.port == 1337
|
|
279
|
+
assert tcp_writer.protocol == Protocol.TCP
|
|
280
|
+
|
|
281
|
+
mock_socket.assert_called()
|
|
282
|
+
mock_socket.return_value.connect.assert_called_with(("splunk", 1337))
|
|
283
|
+
|
|
284
|
+
test_record_descriptor = RecordDescriptor(
|
|
285
|
+
"test/record",
|
|
286
|
+
[("string", "foo")],
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
test_record = test_record_descriptor(foo="bar")
|
|
290
|
+
tcp_writer.write(test_record)
|
|
291
|
+
|
|
292
|
+
args, _ = mock_socket.return_value.sendall.call_args
|
|
293
|
+
written_to_splunk = args[0]
|
|
294
|
+
|
|
295
|
+
assert written_to_splunk.startswith(
|
|
296
|
+
b'rdtype="test/record" rdtag=None foo="bar" ' + RESERVED_FIELDS_KEY_VALUE_SUFFIX.encode()
|
|
297
|
+
)
|
|
298
|
+
assert written_to_splunk.endswith(b'"\n')
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def test_https_protocol_records_sourcetype(mock_httpx_package: MagicMock):
|
|
302
|
+
if "flow.record.adapter.splunk" in sys.modules:
|
|
303
|
+
del sys.modules["flow.record.adapter.splunk"]
|
|
304
|
+
|
|
305
|
+
from flow.record.adapter.splunk import Protocol, SourceType, SplunkWriter
|
|
306
|
+
|
|
307
|
+
with patch.object(
|
|
308
|
+
flow.record.adapter.splunk,
|
|
309
|
+
"HAS_HTTPX",
|
|
310
|
+
True,
|
|
311
|
+
):
|
|
312
|
+
mock_httpx_package.Client.return_value.post.return_value.status_code = 200
|
|
313
|
+
https_writer = SplunkWriter("https://splunk:8088", token="password123")
|
|
314
|
+
|
|
315
|
+
assert https_writer.host == "splunk"
|
|
316
|
+
assert https_writer.protocol == Protocol.HTTPS
|
|
317
|
+
assert https_writer.sourcetype == SourceType.RECORDS
|
|
318
|
+
assert https_writer.verify is True
|
|
319
|
+
assert https_writer.url == "https://splunk:8088/services/collector/raw?auto_extract_timestamp=true"
|
|
320
|
+
|
|
321
|
+
_, kwargs = mock_httpx_package.Client.call_args
|
|
322
|
+
assert kwargs["verify"] is True
|
|
323
|
+
|
|
324
|
+
given_headers = kwargs["headers"]
|
|
325
|
+
assert given_headers["Authorization"] == "Splunk password123"
|
|
326
|
+
assert "X-Splunk-Request-Channel" in given_headers
|
|
327
|
+
|
|
328
|
+
test_record_descriptor = RecordDescriptor(
|
|
329
|
+
"test/record",
|
|
330
|
+
[("string", "foo")],
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
test_record = test_record_descriptor(foo="bar")
|
|
334
|
+
https_writer.write(test_record)
|
|
335
|
+
|
|
336
|
+
mock_httpx_package.Client.return_value.post.assert_not_called()
|
|
337
|
+
|
|
338
|
+
https_writer.close()
|
|
339
|
+
mock_httpx_package.Client.return_value.post.assert_called_with(
|
|
340
|
+
"https://splunk:8088/services/collector/raw?auto_extract_timestamp=true",
|
|
341
|
+
data=ANY,
|
|
342
|
+
)
|
|
343
|
+
_, kwargs = mock_httpx_package.Client.return_value.post.call_args
|
|
344
|
+
sent_data = kwargs["data"]
|
|
345
|
+
assert sent_data.startswith(
|
|
346
|
+
b'rdtype="test/record" rdtag=None foo="bar" ' + RESERVED_FIELDS_KEY_VALUE_SUFFIX.encode()
|
|
347
|
+
)
|
|
348
|
+
assert sent_data.endswith(b'"\n')
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def test_https_protocol_json_sourcetype(mock_httpx_package: MagicMock):
|
|
352
|
+
if "flow.record.adapter.splunk" in sys.modules:
|
|
353
|
+
del sys.modules["flow.record.adapter.splunk"]
|
|
354
|
+
|
|
355
|
+
from flow.record.adapter.splunk import SplunkWriter
|
|
356
|
+
|
|
357
|
+
with patch.object(
|
|
358
|
+
flow.record.adapter.splunk,
|
|
359
|
+
"HAS_HTTPX",
|
|
360
|
+
True,
|
|
361
|
+
):
|
|
362
|
+
mock_httpx_package.Client.return_value.post.return_value.status_code = 200
|
|
363
|
+
|
|
364
|
+
https_writer = SplunkWriter("https://splunk:8088", token="password123", sourcetype="json")
|
|
365
|
+
|
|
366
|
+
test_record_descriptor = RecordDescriptor(
|
|
367
|
+
"test/record",
|
|
368
|
+
[("string", "foo")],
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
https_writer.write(test_record_descriptor(foo="bar"))
|
|
372
|
+
https_writer.write(test_record_descriptor(foo="baz"))
|
|
373
|
+
mock_httpx_package.Client.return_value.post.assert_not_called()
|
|
374
|
+
|
|
375
|
+
https_writer.close()
|
|
376
|
+
mock_httpx_package.Client.return_value.post.assert_called_with(
|
|
377
|
+
"https://splunk:8088/services/collector/event?auto_extract_timestamp=true",
|
|
378
|
+
data=ANY,
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
_, kwargs = mock_httpx_package.Client.return_value.post.call_args
|
|
382
|
+
sent_data = kwargs["data"]
|
|
383
|
+
first_record_json, _, second_record_json = sent_data.partition(b"\n")
|
|
384
|
+
assert json.loads(first_record_json) == {
|
|
385
|
+
"event": dict(
|
|
386
|
+
{
|
|
387
|
+
"rdtag": None,
|
|
388
|
+
"rdtype": "test/record",
|
|
389
|
+
"foo": "bar",
|
|
390
|
+
},
|
|
391
|
+
**BASE_FIELD_VALUES,
|
|
392
|
+
)
|
|
393
|
+
}
|
|
394
|
+
assert json.loads(second_record_json) == {
|
|
395
|
+
"event": dict(
|
|
396
|
+
{
|
|
397
|
+
"rdtag": None,
|
|
398
|
+
"rdtype": "test/record",
|
|
399
|
+
"foo": "baz",
|
|
400
|
+
},
|
|
401
|
+
**BASE_FIELD_VALUES,
|
|
402
|
+
)
|
|
403
|
+
}
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import socket
|
|
3
|
-
|
|
4
|
-
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
5
|
-
from flow.record.utils import to_base64, to_bytes, to_str
|
|
6
|
-
|
|
7
|
-
__usage__ = """
|
|
8
|
-
Splunk output adapter (writer only)
|
|
9
|
-
---
|
|
10
|
-
Write usage: rdump -w splunk://[IP]:[PORT]?tag=[TAG]
|
|
11
|
-
[IP]:[PORT]: ip and port to a splunk instance
|
|
12
|
-
[TAG]: optional value to add as "rdtag" output field when writing
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
log = logging.getLogger(__package__)
|
|
16
|
-
|
|
17
|
-
RESERVED_SPLUNK_FIELDS = set(
|
|
18
|
-
[
|
|
19
|
-
"_indextime",
|
|
20
|
-
"_time",
|
|
21
|
-
"index",
|
|
22
|
-
"punct",
|
|
23
|
-
"source",
|
|
24
|
-
"sourcetype",
|
|
25
|
-
"tag",
|
|
26
|
-
]
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def splunkify(record, tag=None):
|
|
31
|
-
ret = []
|
|
32
|
-
|
|
33
|
-
ret.append(f'type="{record._desc.name}"')
|
|
34
|
-
|
|
35
|
-
if tag is None:
|
|
36
|
-
ret.append("rdtag=None")
|
|
37
|
-
else:
|
|
38
|
-
ret.append(f'rdtag="{tag}"')
|
|
39
|
-
|
|
40
|
-
for field in record._desc.fields:
|
|
41
|
-
val = getattr(record, field)
|
|
42
|
-
if val is None:
|
|
43
|
-
ret.append(f"{field}=None")
|
|
44
|
-
else:
|
|
45
|
-
val = to_base64(val) if isinstance(val, bytes) else to_str(val)
|
|
46
|
-
val = val.replace("\\", "\\\\").replace('"', '\\"')
|
|
47
|
-
if field in RESERVED_SPLUNK_FIELDS:
|
|
48
|
-
field = f"rd_{field}"
|
|
49
|
-
ret.append(f'{field}="{val}"')
|
|
50
|
-
|
|
51
|
-
return " ".join(ret)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class SplunkWriter(AbstractWriter):
|
|
55
|
-
sock = None
|
|
56
|
-
|
|
57
|
-
def __init__(self, path, tag=None, **kwargs):
|
|
58
|
-
p = path.strip("/").split("/")
|
|
59
|
-
host, port = p[0].split(":")
|
|
60
|
-
port = int(port)
|
|
61
|
-
|
|
62
|
-
self.tag = tag
|
|
63
|
-
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP)
|
|
64
|
-
self.sock.connect((host, port))
|
|
65
|
-
self.descriptors = {}
|
|
66
|
-
self._warned = False
|
|
67
|
-
|
|
68
|
-
def write(self, record):
|
|
69
|
-
if not self._warned and "rdtag" in record._desc.fields:
|
|
70
|
-
self._warned = True
|
|
71
|
-
log.warning(
|
|
72
|
-
"Record has 'rdtag' field which conflicts with the Splunk adapter -- "
|
|
73
|
-
"Splunk output will have duplicate 'rdtag' fields",
|
|
74
|
-
)
|
|
75
|
-
rec = splunkify(record, tag=self.tag)
|
|
76
|
-
data = to_bytes(rec) + b"\n"
|
|
77
|
-
self.sock.sendall(data)
|
|
78
|
-
|
|
79
|
-
def flush(self):
|
|
80
|
-
pass
|
|
81
|
-
|
|
82
|
-
def close(self):
|
|
83
|
-
if self.sock:
|
|
84
|
-
self.sock.close()
|
|
85
|
-
self.sock = None
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
class SplunkReader(AbstractReader):
|
|
89
|
-
def __init__(self, path, selector=None, **kwargs):
|
|
90
|
-
raise NotImplementedError()
|
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
from unittest import mock
|
|
2
|
-
|
|
3
|
-
import flow.record.adapter.splunk
|
|
4
|
-
from flow.record import RecordDescriptor
|
|
5
|
-
from flow.record.adapter.splunk import splunkify
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def test_splunkify_reserved_field():
|
|
9
|
-
with mock.patch.object(
|
|
10
|
-
flow.record.adapter.splunk,
|
|
11
|
-
"RESERVED_SPLUNK_FIELDS",
|
|
12
|
-
set(["foo"]),
|
|
13
|
-
):
|
|
14
|
-
test_record_descriptor = RecordDescriptor(
|
|
15
|
-
"test/record",
|
|
16
|
-
[("string", "foo")],
|
|
17
|
-
)
|
|
18
|
-
|
|
19
|
-
test_record = test_record_descriptor(foo="bar")
|
|
20
|
-
|
|
21
|
-
output = splunkify(test_record)
|
|
22
|
-
assert output == 'type="test/record" rdtag=None rd_foo="bar"'
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def test_splunkify_normal_field():
|
|
26
|
-
with mock.patch.object(
|
|
27
|
-
flow.record.adapter.splunk,
|
|
28
|
-
"RESERVED_SPLUNK_FIELDS",
|
|
29
|
-
set(),
|
|
30
|
-
):
|
|
31
|
-
test_record_descriptor = RecordDescriptor(
|
|
32
|
-
"test/record",
|
|
33
|
-
[("string", "foo")],
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
test_record = test_record_descriptor(foo="bar")
|
|
37
|
-
|
|
38
|
-
output = splunkify(test_record)
|
|
39
|
-
assert output == 'type="test/record" rdtag=None foo="bar"'
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def test_splunkify_rdtag_field():
|
|
43
|
-
with mock.patch.object(
|
|
44
|
-
flow.record.adapter.splunk,
|
|
45
|
-
"RESERVED_SPLUNK_FIELDS",
|
|
46
|
-
set(),
|
|
47
|
-
):
|
|
48
|
-
test_record_descriptor = RecordDescriptor("test/record", [])
|
|
49
|
-
|
|
50
|
-
test_record = test_record_descriptor()
|
|
51
|
-
|
|
52
|
-
output = splunkify(test_record, tag="bar")
|
|
53
|
-
assert output == 'type="test/record" rdtag="bar"'
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def test_splunkify_none_field():
|
|
57
|
-
with mock.patch.object(
|
|
58
|
-
flow.record.adapter.splunk,
|
|
59
|
-
"RESERVED_SPLUNK_FIELDS",
|
|
60
|
-
set(),
|
|
61
|
-
):
|
|
62
|
-
test_record_descriptor = RecordDescriptor(
|
|
63
|
-
"test/record",
|
|
64
|
-
[("string", "foo")],
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
test_record = test_record_descriptor()
|
|
68
|
-
|
|
69
|
-
output = splunkify(test_record)
|
|
70
|
-
assert output == 'type="test/record" rdtag=None foo=None'
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def test_splunkify_byte_field():
|
|
74
|
-
with mock.patch.object(
|
|
75
|
-
flow.record.adapter.splunk,
|
|
76
|
-
"RESERVED_SPLUNK_FIELDS",
|
|
77
|
-
set(),
|
|
78
|
-
):
|
|
79
|
-
test_record_descriptor = RecordDescriptor(
|
|
80
|
-
"test/record",
|
|
81
|
-
[("bytes", "foo")],
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
test_record = test_record_descriptor(foo=b"bar")
|
|
85
|
-
|
|
86
|
-
output = splunkify(test_record)
|
|
87
|
-
assert output == 'type="test/record" rdtag=None foo="YmFy"'
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def test_splunkify_backslash_quote_field():
|
|
91
|
-
with mock.patch.object(
|
|
92
|
-
flow.record.adapter.splunk,
|
|
93
|
-
"RESERVED_SPLUNK_FIELDS",
|
|
94
|
-
set(),
|
|
95
|
-
):
|
|
96
|
-
test_record_descriptor = RecordDescriptor(
|
|
97
|
-
"test/record",
|
|
98
|
-
[("string", "foo")],
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
test_record = test_record_descriptor(foo=b'\\"')
|
|
102
|
-
|
|
103
|
-
output = splunkify(test_record)
|
|
104
|
-
assert output == 'type="test/record" rdtag=None foo="\\\\\\""'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|