flow.record 3.15.dev14__tar.gz → 3.15.dev16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flow.record-3.15.dev14/flow.record.egg-info → flow_record-3.15.dev16}/PKG-INFO +3 -1
- flow_record-3.15.dev16/flow/record/adapter/splunk.py +282 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/__init__.py +120 -7
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/jsonpacker.py +5 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/version.py +2 -2
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/whitelist.py +1 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16/flow.record.egg-info}/PKG-INFO +3 -1
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow.record.egg-info/requires.txt +3 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/pyproject.toml +3 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_fieldtypes.py +141 -7
- flow_record-3.15.dev16/tests/test_splunk_adapter.py +403 -0
- flow.record-3.15.dev14/flow/record/adapter/splunk.py +0 -90
- flow.record-3.15.dev14/tests/test_splunk_adapter.py +0 -104
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/COPYRIGHT +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/LICENSE +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/MANIFEST.in +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/README.md +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/examples/filesystem.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/examples/passivedns.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/examples/records.json +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/examples/tcpconn.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/__init__.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/__init__.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/archive.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/avro.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/broker.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/csvfile.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/duckdb.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/elastic.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/jsonfile.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/line.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/mongo.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/split.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/sqlite.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/stream.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/text.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/xlsx.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/base.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/exceptions.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/credential.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/net/__init__.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/net/ip.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/net/ipv4.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/net/tcp.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/net/udp.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/packer.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/selector.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/stream.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/tools/__init__.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/tools/geoip.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/tools/rdump.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/utils.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow.record.egg-info/SOURCES.txt +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow.record.egg-info/entry_points.txt +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow.record.egg-info/top_level.txt +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/setup.cfg +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/__init__.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/_utils.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/docs/Makefile +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/docs/conf.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/docs/index.rst +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/selector_explain_example.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/standalone_test.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_avro.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_avro_adapter.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_compiled_selector.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_csv_adapter.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_deprecations.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_fieldtype_ip.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_json_packer.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_json_record_adapter.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_multi_timestamp.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_packer.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_rdump.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_record.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_record_adapter.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_record_descriptor.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_regression.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_selector.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_sqlite_duckdb_adapter.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/utils_inspect.py +0 -0
- {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.15.
|
|
3
|
+
Version: 3.15.dev16
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -35,6 +35,8 @@ Requires-Dist: fastavro[snappy]; extra == "avro"
|
|
|
35
35
|
Provides-Extra: duckdb
|
|
36
36
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
37
37
|
Requires-Dist: pytz; extra == "duckdb"
|
|
38
|
+
Provides-Extra: splunk
|
|
39
|
+
Requires-Dist: httpx; extra == "splunk"
|
|
38
40
|
Provides-Extra: test
|
|
39
41
|
Requires-Dist: flow.record[compression]; extra == "test"
|
|
40
42
|
Requires-Dist: flow.record[avro]; extra == "test"
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import socket
|
|
4
|
+
import uuid
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Optional
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import httpx
|
|
12
|
+
|
|
13
|
+
HAS_HTTPX = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
HAS_HTTPX = False
|
|
16
|
+
|
|
17
|
+
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
18
|
+
from flow.record.base import Record
|
|
19
|
+
from flow.record.jsonpacker import JsonRecordPacker
|
|
20
|
+
from flow.record.utils import to_base64, to_bytes, to_str
|
|
21
|
+
|
|
22
|
+
__usage__ = """
|
|
23
|
+
Splunk output adapter (writer only)
|
|
24
|
+
---
|
|
25
|
+
Write usage: rdump -w splunk+[PROTOCOL]://[IP]:[PORT]?tag=[TAG]&token=[TOKEN]&sourcetype=[SOURCETYPE]
|
|
26
|
+
[PROTOCOL]: Protocol to use for forwarding data. Can be tcp, http or https, defaults to tcp if omitted.
|
|
27
|
+
[IP]:[PORT]: ip and port to a splunk instance
|
|
28
|
+
[TAG]: optional value to add as "rdtag" output field when writing
|
|
29
|
+
[TOKEN]: Authentication token for sending data over HTTP(S)
|
|
30
|
+
[SOURCETYPE]: Set sourcetype of data. Defaults to records, but can also be set to JSON.
|
|
31
|
+
[SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTP(S). Defaults to True.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
log = logging.getLogger(__package__)
|
|
35
|
+
|
|
36
|
+
# Amount of records to bundle into a single request when sending data over HTTP(S).
|
|
37
|
+
RECORD_BUFFER_LIMIT = 20
|
|
38
|
+
|
|
39
|
+
# https://docs.splunk.com/Documentation/Splunk/7.3.1/Data/Configureindex-timefieldextraction
|
|
40
|
+
RESERVED_SPLUNK_FIELDS = [
|
|
41
|
+
"_indextime",
|
|
42
|
+
"_time",
|
|
43
|
+
"index",
|
|
44
|
+
"punct",
|
|
45
|
+
"source",
|
|
46
|
+
"sourcetype",
|
|
47
|
+
"tag",
|
|
48
|
+
"type",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
RESERVED_RECORD_FIELDS = ["_classification", "_generated", "_source"]
|
|
52
|
+
|
|
53
|
+
PREFIX_WITH_RD = set(RESERVED_SPLUNK_FIELDS + RESERVED_RECORD_FIELDS)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class Protocol(Enum):
|
|
57
|
+
HTTP = "http"
|
|
58
|
+
HTTPS = "https"
|
|
59
|
+
TCP = "tcp"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class SourceType(Enum):
|
|
63
|
+
JSON = "json"
|
|
64
|
+
RECORDS = "records"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
|
|
68
|
+
ret = []
|
|
69
|
+
|
|
70
|
+
ret.append(f'rdtype="{record._desc.name}"')
|
|
71
|
+
|
|
72
|
+
if tag is None:
|
|
73
|
+
ret.append("rdtag=None")
|
|
74
|
+
else:
|
|
75
|
+
ret.append(f'rdtag="{tag}"')
|
|
76
|
+
|
|
77
|
+
for field in record._desc.get_all_fields():
|
|
78
|
+
# Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
|
|
79
|
+
if field == "_version":
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
val = getattr(record, field)
|
|
83
|
+
|
|
84
|
+
if field in PREFIX_WITH_RD:
|
|
85
|
+
field = f"rd_{field}"
|
|
86
|
+
|
|
87
|
+
if val is None:
|
|
88
|
+
ret.append(f"{field}=None")
|
|
89
|
+
else:
|
|
90
|
+
val = to_base64(val) if isinstance(val, bytes) else to_str(val)
|
|
91
|
+
val = val.replace("\\", "\\\\").replace('"', '\\"')
|
|
92
|
+
ret.append(f'{field}="{val}"')
|
|
93
|
+
|
|
94
|
+
return " ".join(ret)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def splunkify_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
|
|
98
|
+
ret = {}
|
|
99
|
+
|
|
100
|
+
indexer_fields = [
|
|
101
|
+
("host", "host"),
|
|
102
|
+
("host", "hostname"),
|
|
103
|
+
("time", "ts"),
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
# When converting a record to json text for splunk, we distinguish between the 'event' (containing the data) and a
|
|
107
|
+
# few other fields that are splunk-specific for indexing. We add those 'indexer_fields' to the return object first.
|
|
108
|
+
for splunk_name, field_name in indexer_fields:
|
|
109
|
+
if hasattr(record, field_name):
|
|
110
|
+
val = getattr(record, field_name)
|
|
111
|
+
if val:
|
|
112
|
+
if isinstance(val, datetime):
|
|
113
|
+
# Convert datetime objects to epoch timestamp for reserved fields.
|
|
114
|
+
ret[splunk_name] = val.timestamp()
|
|
115
|
+
continue
|
|
116
|
+
ret[splunk_name] = to_str(val)
|
|
117
|
+
|
|
118
|
+
record_as_dict = packer.pack_obj(record)
|
|
119
|
+
|
|
120
|
+
# Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
|
|
121
|
+
del record_as_dict["_version"]
|
|
122
|
+
|
|
123
|
+
# These fields end up in the 'event', but we have a few reserved field names. If those field names are in the
|
|
124
|
+
# record, we prefix them with 'rd_' (short for record descriptor)
|
|
125
|
+
for field in PREFIX_WITH_RD:
|
|
126
|
+
if field not in record_as_dict:
|
|
127
|
+
continue
|
|
128
|
+
new_field = f"rd_{field}"
|
|
129
|
+
|
|
130
|
+
record_as_dict[new_field] = record_as_dict[field]
|
|
131
|
+
del record_as_dict[field]
|
|
132
|
+
|
|
133
|
+
# Almost done, just have to add the tag and the type (i.e the record descriptor's name) to the event.
|
|
134
|
+
record_as_dict["rdtag"] = tag
|
|
135
|
+
|
|
136
|
+
# Yes.
|
|
137
|
+
record_as_dict["rdtype"] = record._desc.name
|
|
138
|
+
|
|
139
|
+
ret["event"] = record_as_dict
|
|
140
|
+
return json.dumps(ret, default=packer.pack_obj)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class SplunkWriter(AbstractWriter):
|
|
144
|
+
sock = None
|
|
145
|
+
session = None
|
|
146
|
+
|
|
147
|
+
def __init__(
|
|
148
|
+
self,
|
|
149
|
+
uri: str,
|
|
150
|
+
tag: Optional[str] = None,
|
|
151
|
+
token: Optional[str] = None,
|
|
152
|
+
sourcetype: Optional[str] = None,
|
|
153
|
+
ssl_verify: bool = True,
|
|
154
|
+
**kwargs,
|
|
155
|
+
):
|
|
156
|
+
# If the writer is initiated without a protocol, we assume we will be writing over tcp
|
|
157
|
+
if "://" not in uri:
|
|
158
|
+
uri = f"tcp://{uri}"
|
|
159
|
+
|
|
160
|
+
if sourcetype is None:
|
|
161
|
+
log.warning("No sourcetype provided, assuming 'records' sourcetype")
|
|
162
|
+
sourcetype = SourceType.RECORDS
|
|
163
|
+
|
|
164
|
+
parsed_url = urlparse(uri)
|
|
165
|
+
url_scheme = parsed_url.scheme.lower()
|
|
166
|
+
|
|
167
|
+
self.sourcetype = SourceType(sourcetype)
|
|
168
|
+
self.protocol = Protocol(url_scheme)
|
|
169
|
+
|
|
170
|
+
if self.protocol == Protocol.TCP and self.sourcetype != SourceType.RECORDS:
|
|
171
|
+
raise ValueError("For sending data to Splunk over TCP, only the 'records' sourcetype is allowed")
|
|
172
|
+
|
|
173
|
+
self.host = parsed_url.hostname
|
|
174
|
+
self.port = parsed_url.port
|
|
175
|
+
self.tag = tag
|
|
176
|
+
self.record_buffer = []
|
|
177
|
+
self._warned = False
|
|
178
|
+
self.packer = None
|
|
179
|
+
|
|
180
|
+
if self.sourcetype == SourceType.JSON:
|
|
181
|
+
self.packer = JsonRecordPacker(indent=4, pack_descriptors=False)
|
|
182
|
+
|
|
183
|
+
if self.protocol == Protocol.TCP:
|
|
184
|
+
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP)
|
|
185
|
+
self.sock.connect((self.host, self.port))
|
|
186
|
+
self._send = self._send_tcp
|
|
187
|
+
elif self.protocol in (Protocol.HTTP, Protocol.HTTPS):
|
|
188
|
+
if not HAS_HTTPX:
|
|
189
|
+
raise ImportError("The httpx library is required for sending data over HTTP(S)")
|
|
190
|
+
|
|
191
|
+
scheme = self.protocol.value
|
|
192
|
+
self.token = token
|
|
193
|
+
if not self.token:
|
|
194
|
+
raise ValueError("An authorization token is required for the HTTP collector")
|
|
195
|
+
if not self.token.startswith("Splunk "):
|
|
196
|
+
self.token = f"Splunk {self.token}"
|
|
197
|
+
|
|
198
|
+
# Assume verify=True unless specified otherwise.
|
|
199
|
+
self.verify = str(ssl_verify).lower() not in ("0", "false")
|
|
200
|
+
if not self.verify:
|
|
201
|
+
log.warning("Certificate verification is disabled")
|
|
202
|
+
|
|
203
|
+
endpoint = "event" if self.sourcetype != SourceType.RECORDS else "raw"
|
|
204
|
+
port = f":{self.port}" if self.port else ""
|
|
205
|
+
self.url = f"{scheme}://{self.host}{port}/services/collector/{endpoint}?auto_extract_timestamp=true"
|
|
206
|
+
|
|
207
|
+
self.headers = {
|
|
208
|
+
"Authorization": self.token,
|
|
209
|
+
# A randomized value so that Splunk can loadbalance between different incoming datastreams
|
|
210
|
+
"X-Splunk-Request-Channel": str(uuid.uuid4()),
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
self.session = httpx.Client(verify=self.verify, headers=self.headers)
|
|
214
|
+
|
|
215
|
+
self._send = self._send_http
|
|
216
|
+
|
|
217
|
+
def _cache_records_for_http(self, data: Optional[bytes] = None, flush: bool = False) -> Optional[bytes]:
|
|
218
|
+
# It's possible to call this function without any data, purely to flush. Hence this check.
|
|
219
|
+
if data:
|
|
220
|
+
self.record_buffer.append(data)
|
|
221
|
+
if len(self.record_buffer) < RECORD_BUFFER_LIMIT and not flush:
|
|
222
|
+
# Buffer limit not exceeded yet, so we do not return a buffer yet, unless buffer is explicitly flushed.
|
|
223
|
+
return
|
|
224
|
+
buf = b"".join(self.record_buffer)
|
|
225
|
+
if not buf:
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
# We're going to be returning a buffer for the writer to send, so we can clear the internal record buffer.
|
|
229
|
+
self.record_buffer.clear()
|
|
230
|
+
return buf
|
|
231
|
+
|
|
232
|
+
def _send(self, data: bytes) -> None:
|
|
233
|
+
raise RuntimeError("This method should be overridden at runtime")
|
|
234
|
+
|
|
235
|
+
def _send_http(self, data: Optional[bytes] = None, flush: bool = False) -> None:
|
|
236
|
+
buf = self._cache_records_for_http(data, flush)
|
|
237
|
+
if not buf:
|
|
238
|
+
return
|
|
239
|
+
response = self.session.post(self.url, data=buf)
|
|
240
|
+
if response.status_code != 200:
|
|
241
|
+
raise ConnectionError(f"{response.text} ({response.status_code})")
|
|
242
|
+
|
|
243
|
+
def _send_tcp(self, data: bytes) -> None:
|
|
244
|
+
self.sock.sendall(data)
|
|
245
|
+
|
|
246
|
+
def write(self, record: Record) -> None:
|
|
247
|
+
if not self._warned and "rdtag" in record._desc.fields:
|
|
248
|
+
self._warned = True
|
|
249
|
+
log.warning(
|
|
250
|
+
"Record has 'rdtag' field which conflicts with the Splunk adapter -- "
|
|
251
|
+
"Splunk output will have duplicate 'rdtag' fields",
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
if self.sourcetype == SourceType.RECORDS:
|
|
255
|
+
rec = splunkify_key_value(record, self.tag)
|
|
256
|
+
else:
|
|
257
|
+
rec = splunkify_json(self.packer, record, self.tag)
|
|
258
|
+
|
|
259
|
+
# Trail with a newline for line breaking.
|
|
260
|
+
data = to_bytes(rec) + b"\n"
|
|
261
|
+
|
|
262
|
+
self._send(data)
|
|
263
|
+
|
|
264
|
+
def flush(self) -> None:
|
|
265
|
+
if self.protocol in [Protocol.HTTP, Protocol.HTTPS]:
|
|
266
|
+
self._send_http(flush=True)
|
|
267
|
+
|
|
268
|
+
def close(self) -> None:
|
|
269
|
+
# For TCP
|
|
270
|
+
if self.sock:
|
|
271
|
+
self.sock.close()
|
|
272
|
+
self.sock = None
|
|
273
|
+
|
|
274
|
+
if self.session:
|
|
275
|
+
self.flush()
|
|
276
|
+
self.session.close()
|
|
277
|
+
self.session = None
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class SplunkReader(AbstractReader):
|
|
281
|
+
def __init__(self, path, selector=None, **kwargs):
|
|
282
|
+
raise NotImplementedError()
|
|
@@ -5,13 +5,14 @@ import math
|
|
|
5
5
|
import os
|
|
6
6
|
import pathlib
|
|
7
7
|
import re
|
|
8
|
+
import shlex
|
|
8
9
|
import sys
|
|
9
10
|
import warnings
|
|
10
11
|
from binascii import a2b_hex, b2a_hex
|
|
11
12
|
from datetime import datetime as _dt
|
|
12
13
|
from datetime import timezone
|
|
13
14
|
from posixpath import basename, dirname
|
|
14
|
-
from typing import Any, Optional
|
|
15
|
+
from typing import Any, Optional
|
|
15
16
|
from urllib.parse import urlparse
|
|
16
17
|
|
|
17
18
|
try:
|
|
@@ -34,8 +35,8 @@ UTC = timezone.utc
|
|
|
34
35
|
PY_311 = sys.version_info >= (3, 11, 0)
|
|
35
36
|
PY_312 = sys.version_info >= (3, 12, 0)
|
|
36
37
|
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
TYPE_POSIX = 0
|
|
39
|
+
TYPE_WINDOWS = 1
|
|
39
40
|
|
|
40
41
|
string_type = str
|
|
41
42
|
varint_type = int
|
|
@@ -694,15 +695,15 @@ class path(pathlib.PurePath, FieldType):
|
|
|
694
695
|
return repr(str(self))
|
|
695
696
|
|
|
696
697
|
def _pack(self):
|
|
697
|
-
path_type =
|
|
698
|
+
path_type = TYPE_WINDOWS if isinstance(self, windows_path) else TYPE_POSIX
|
|
698
699
|
return (str(self), path_type)
|
|
699
700
|
|
|
700
701
|
@classmethod
|
|
701
|
-
def _unpack(cls, data:
|
|
702
|
+
def _unpack(cls, data: tuple[str, str]):
|
|
702
703
|
path_, path_type = data
|
|
703
|
-
if path_type ==
|
|
704
|
+
if path_type == TYPE_POSIX:
|
|
704
705
|
return posix_path(path_)
|
|
705
|
-
elif path_type ==
|
|
706
|
+
elif path_type == TYPE_WINDOWS:
|
|
706
707
|
return windows_path(path_)
|
|
707
708
|
else:
|
|
708
709
|
# Catch all: default to posix_path
|
|
@@ -734,3 +735,115 @@ class windows_path(pathlib.PureWindowsPath, path):
|
|
|
734
735
|
quote = '"'
|
|
735
736
|
|
|
736
737
|
return f"{quote}{s}{quote}"
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
class command(FieldType):
|
|
741
|
+
executable: Optional[path] = None
|
|
742
|
+
args: Optional[list[str]] = None
|
|
743
|
+
|
|
744
|
+
_path_type: type[path] = None
|
|
745
|
+
_posix: bool
|
|
746
|
+
|
|
747
|
+
def __new__(cls, value: str) -> command:
|
|
748
|
+
if cls is not command:
|
|
749
|
+
return super().__new__(cls)
|
|
750
|
+
|
|
751
|
+
if not isinstance(value, str):
|
|
752
|
+
raise ValueError(f"Expected a value of type 'str' not {type(value)}")
|
|
753
|
+
|
|
754
|
+
# pre checking for windows like paths
|
|
755
|
+
# This checks for windows like starts of a path:
|
|
756
|
+
# an '%' for an environment variable
|
|
757
|
+
# r'\\' for a UNC path
|
|
758
|
+
# the strip and check for ":" on the second line is for `<drive_letter>:`
|
|
759
|
+
windows = value.startswith((r"\\", "%")) or value.lstrip("\"'")[1] == ":"
|
|
760
|
+
|
|
761
|
+
if windows:
|
|
762
|
+
cls = windows_command
|
|
763
|
+
else:
|
|
764
|
+
cls = posix_command
|
|
765
|
+
return super().__new__(cls)
|
|
766
|
+
|
|
767
|
+
def __init__(self, value: str | tuple[str, tuple[str]] | None):
|
|
768
|
+
if value is None:
|
|
769
|
+
return
|
|
770
|
+
|
|
771
|
+
if isinstance(value, str):
|
|
772
|
+
self.executable, self.args = self._split(value)
|
|
773
|
+
return
|
|
774
|
+
|
|
775
|
+
executable, self.args = value
|
|
776
|
+
self.executable = self._path_type(executable)
|
|
777
|
+
self.args = list(self.args)
|
|
778
|
+
|
|
779
|
+
def __repr__(self) -> str:
|
|
780
|
+
return f"(executable={self.executable!r}, args={self.args})"
|
|
781
|
+
|
|
782
|
+
def __eq__(self, other: Any) -> bool:
|
|
783
|
+
if isinstance(other, command):
|
|
784
|
+
return self.executable == other.executable and self.args == other.args
|
|
785
|
+
elif isinstance(other, str):
|
|
786
|
+
return self._join() == other
|
|
787
|
+
elif isinstance(other, (tuple, list)):
|
|
788
|
+
return self.executable == other[0] and self.args == list(other[1:])
|
|
789
|
+
|
|
790
|
+
return False
|
|
791
|
+
|
|
792
|
+
def _split(self, value: str) -> tuple[str, list[str]]:
|
|
793
|
+
executable, *args = shlex.split(value, posix=self._posix)
|
|
794
|
+
executable = executable.strip("'\" ")
|
|
795
|
+
|
|
796
|
+
return self._path_type(executable), args
|
|
797
|
+
|
|
798
|
+
def _join(self) -> str:
|
|
799
|
+
return shlex.join([str(self.executable)] + self.args)
|
|
800
|
+
|
|
801
|
+
def _pack(self) -> tuple[tuple[str, list], str]:
|
|
802
|
+
command_type = TYPE_WINDOWS if isinstance(self, windows_command) else TYPE_POSIX
|
|
803
|
+
if self.executable:
|
|
804
|
+
_exec, _ = self.executable._pack()
|
|
805
|
+
return ((_exec, self.args), command_type)
|
|
806
|
+
else:
|
|
807
|
+
return (None, command_type)
|
|
808
|
+
|
|
809
|
+
@classmethod
|
|
810
|
+
def _unpack(cls, data: tuple[tuple[str, tuple] | None, int]) -> command:
|
|
811
|
+
_value, _type = data
|
|
812
|
+
if _type == TYPE_WINDOWS:
|
|
813
|
+
return windows_command(_value)
|
|
814
|
+
|
|
815
|
+
return posix_command(_value)
|
|
816
|
+
|
|
817
|
+
@classmethod
|
|
818
|
+
def from_posix(cls, value: str) -> command:
|
|
819
|
+
return posix_command(value)
|
|
820
|
+
|
|
821
|
+
@classmethod
|
|
822
|
+
def from_windows(cls, value: str) -> command:
|
|
823
|
+
return windows_command(value)
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
class posix_command(command):
|
|
827
|
+
_posix = True
|
|
828
|
+
_path_type = posix_path
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
class windows_command(command):
|
|
832
|
+
_posix = False
|
|
833
|
+
_path_type = windows_path
|
|
834
|
+
|
|
835
|
+
def _split(self, value: str) -> tuple[str, list[str]]:
|
|
836
|
+
executable, args = super()._split(value)
|
|
837
|
+
if args:
|
|
838
|
+
args = [" ".join(args)]
|
|
839
|
+
|
|
840
|
+
return executable, args
|
|
841
|
+
|
|
842
|
+
def _join(self) -> str:
|
|
843
|
+
arg = f" {self.args[0]}" if self.args else ""
|
|
844
|
+
executable_str = str(self.executable)
|
|
845
|
+
|
|
846
|
+
if " " in executable_str:
|
|
847
|
+
return f"'{executable_str}'{arg}"
|
|
848
|
+
|
|
849
|
+
return f"{executable_str}{arg}"
|
|
@@ -72,6 +72,11 @@ class JsonRecordPacker:
|
|
|
72
72
|
return base64.b64encode(obj).decode()
|
|
73
73
|
if isinstance(obj, fieldtypes.path):
|
|
74
74
|
return str(obj)
|
|
75
|
+
if isinstance(obj, fieldtypes.command):
|
|
76
|
+
return {
|
|
77
|
+
"executable": obj.executable,
|
|
78
|
+
"args": obj.args,
|
|
79
|
+
}
|
|
75
80
|
|
|
76
81
|
raise Exception("Unpackable type " + str(type(obj)))
|
|
77
82
|
|
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '3.15.
|
|
16
|
-
__version_tuple__ = version_tuple = (3, 15, '
|
|
15
|
+
__version__ = version = '3.15.dev16'
|
|
16
|
+
__version_tuple__ = version_tuple = (3, 15, 'dev16')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.15.
|
|
3
|
+
Version: 3.15.dev16
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -35,6 +35,8 @@ Requires-Dist: fastavro[snappy]; extra == "avro"
|
|
|
35
35
|
Provides-Extra: duckdb
|
|
36
36
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
37
37
|
Requires-Dist: pytz; extra == "duckdb"
|
|
38
|
+
Provides-Extra: splunk
|
|
39
|
+
Requires-Dist: httpx; extra == "splunk"
|
|
38
40
|
Provides-Extra: test
|
|
39
41
|
Requires-Dist: flow.record[compression]; extra == "test"
|
|
40
42
|
Requires-Dist: flow.record[avro]; extra == "test"
|