flow.record 3.15.dev14__tar.gz → 3.15.dev16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {flow.record-3.15.dev14/flow.record.egg-info → flow_record-3.15.dev16}/PKG-INFO +3 -1
  2. flow_record-3.15.dev16/flow/record/adapter/splunk.py +282 -0
  3. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/__init__.py +120 -7
  4. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/jsonpacker.py +5 -0
  5. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/version.py +2 -2
  6. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/whitelist.py +1 -0
  7. {flow.record-3.15.dev14 → flow_record-3.15.dev16/flow.record.egg-info}/PKG-INFO +3 -1
  8. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow.record.egg-info/requires.txt +3 -0
  9. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/pyproject.toml +3 -0
  10. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_fieldtypes.py +141 -7
  11. flow_record-3.15.dev16/tests/test_splunk_adapter.py +403 -0
  12. flow.record-3.15.dev14/flow/record/adapter/splunk.py +0 -90
  13. flow.record-3.15.dev14/tests/test_splunk_adapter.py +0 -104
  14. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/COPYRIGHT +0 -0
  15. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/LICENSE +0 -0
  16. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/MANIFEST.in +0 -0
  17. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/README.md +0 -0
  18. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/examples/filesystem.py +0 -0
  19. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/examples/passivedns.py +0 -0
  20. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/examples/records.json +0 -0
  21. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/examples/tcpconn.py +0 -0
  22. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/__init__.py +0 -0
  23. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/__init__.py +0 -0
  24. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/archive.py +0 -0
  25. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/avro.py +0 -0
  26. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/broker.py +0 -0
  27. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/csvfile.py +0 -0
  28. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/duckdb.py +0 -0
  29. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/elastic.py +0 -0
  30. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/jsonfile.py +0 -0
  31. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/line.py +0 -0
  32. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/mongo.py +0 -0
  33. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/split.py +0 -0
  34. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/sqlite.py +0 -0
  35. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/stream.py +0 -0
  36. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/text.py +0 -0
  37. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/adapter/xlsx.py +0 -0
  38. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/base.py +0 -0
  39. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/exceptions.py +0 -0
  40. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/credential.py +0 -0
  41. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/net/__init__.py +0 -0
  42. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/net/ip.py +0 -0
  43. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/net/ipv4.py +0 -0
  44. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/net/tcp.py +0 -0
  45. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/fieldtypes/net/udp.py +0 -0
  46. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/packer.py +0 -0
  47. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/selector.py +0 -0
  48. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/stream.py +0 -0
  49. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/tools/__init__.py +0 -0
  50. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/tools/geoip.py +0 -0
  51. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/tools/rdump.py +0 -0
  52. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow/record/utils.py +0 -0
  53. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow.record.egg-info/SOURCES.txt +0 -0
  54. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow.record.egg-info/dependency_links.txt +0 -0
  55. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow.record.egg-info/entry_points.txt +0 -0
  56. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/flow.record.egg-info/top_level.txt +0 -0
  57. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/setup.cfg +0 -0
  58. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/__init__.py +0 -0
  59. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/_utils.py +0 -0
  60. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/docs/Makefile +0 -0
  61. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/docs/conf.py +0 -0
  62. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/docs/index.rst +0 -0
  63. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/selector_explain_example.py +0 -0
  64. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/standalone_test.py +0 -0
  65. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_avro.py +0 -0
  66. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_avro_adapter.py +0 -0
  67. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_compiled_selector.py +0 -0
  68. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_csv_adapter.py +0 -0
  69. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_deprecations.py +0 -0
  70. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_fieldtype_ip.py +0 -0
  71. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_json_packer.py +0 -0
  72. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_json_record_adapter.py +0 -0
  73. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_multi_timestamp.py +0 -0
  74. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_packer.py +0 -0
  75. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_rdump.py +0 -0
  76. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_record.py +0 -0
  77. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_record_adapter.py +0 -0
  78. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_record_descriptor.py +0 -0
  79. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_regression.py +0 -0
  80. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_selector.py +0 -0
  81. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/test_sqlite_duckdb_adapter.py +0 -0
  82. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tests/utils_inspect.py +0 -0
  83. {flow.record-3.15.dev14 → flow_record-3.15.dev16}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.15.dev14
3
+ Version: 3.15.dev16
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -35,6 +35,8 @@ Requires-Dist: fastavro[snappy]; extra == "avro"
35
35
  Provides-Extra: duckdb
36
36
  Requires-Dist: duckdb; extra == "duckdb"
37
37
  Requires-Dist: pytz; extra == "duckdb"
38
+ Provides-Extra: splunk
39
+ Requires-Dist: httpx; extra == "splunk"
38
40
  Provides-Extra: test
39
41
  Requires-Dist: flow.record[compression]; extra == "test"
40
42
  Requires-Dist: flow.record[avro]; extra == "test"
@@ -0,0 +1,282 @@
1
+ import json
2
+ import logging
3
+ import socket
4
+ import uuid
5
+ from datetime import datetime
6
+ from enum import Enum
7
+ from typing import Optional
8
+ from urllib.parse import urlparse
9
+
10
+ try:
11
+ import httpx
12
+
13
+ HAS_HTTPX = True
14
+ except ImportError:
15
+ HAS_HTTPX = False
16
+
17
+ from flow.record.adapter import AbstractReader, AbstractWriter
18
+ from flow.record.base import Record
19
+ from flow.record.jsonpacker import JsonRecordPacker
20
+ from flow.record.utils import to_base64, to_bytes, to_str
21
+
22
+ __usage__ = """
23
+ Splunk output adapter (writer only)
24
+ ---
25
+ Write usage: rdump -w splunk+[PROTOCOL]://[IP]:[PORT]?tag=[TAG]&token=[TOKEN]&sourcetype=[SOURCETYPE]
26
+ [PROTOCOL]: Protocol to use for forwarding data. Can be tcp, http or https, defaults to tcp if omitted.
27
+ [IP]:[PORT]: ip and port to a splunk instance
28
+ [TAG]: optional value to add as "rdtag" output field when writing
29
+ [TOKEN]: Authentication token for sending data over HTTP(S)
30
+ [SOURCETYPE]: Set sourcetype of data. Defaults to records, but can also be set to JSON.
31
+ [SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTP(S). Defaults to True.
32
+ """
33
+
34
+ log = logging.getLogger(__package__)
35
+
36
+ # Amount of records to bundle into a single request when sending data over HTTP(S).
37
+ RECORD_BUFFER_LIMIT = 20
38
+
39
+ # https://docs.splunk.com/Documentation/Splunk/7.3.1/Data/Configureindex-timefieldextraction
40
+ RESERVED_SPLUNK_FIELDS = [
41
+ "_indextime",
42
+ "_time",
43
+ "index",
44
+ "punct",
45
+ "source",
46
+ "sourcetype",
47
+ "tag",
48
+ "type",
49
+ ]
50
+
51
+ RESERVED_RECORD_FIELDS = ["_classification", "_generated", "_source"]
52
+
53
+ PREFIX_WITH_RD = set(RESERVED_SPLUNK_FIELDS + RESERVED_RECORD_FIELDS)
54
+
55
+
56
+ class Protocol(Enum):
57
+ HTTP = "http"
58
+ HTTPS = "https"
59
+ TCP = "tcp"
60
+
61
+
62
+ class SourceType(Enum):
63
+ JSON = "json"
64
+ RECORDS = "records"
65
+
66
+
67
+ def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
68
+ ret = []
69
+
70
+ ret.append(f'rdtype="{record._desc.name}"')
71
+
72
+ if tag is None:
73
+ ret.append("rdtag=None")
74
+ else:
75
+ ret.append(f'rdtag="{tag}"')
76
+
77
+ for field in record._desc.get_all_fields():
78
+ # Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
79
+ if field == "_version":
80
+ continue
81
+
82
+ val = getattr(record, field)
83
+
84
+ if field in PREFIX_WITH_RD:
85
+ field = f"rd_{field}"
86
+
87
+ if val is None:
88
+ ret.append(f"{field}=None")
89
+ else:
90
+ val = to_base64(val) if isinstance(val, bytes) else to_str(val)
91
+ val = val.replace("\\", "\\\\").replace('"', '\\"')
92
+ ret.append(f'{field}="{val}"')
93
+
94
+ return " ".join(ret)
95
+
96
+
97
+ def splunkify_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
98
+ ret = {}
99
+
100
+ indexer_fields = [
101
+ ("host", "host"),
102
+ ("host", "hostname"),
103
+ ("time", "ts"),
104
+ ]
105
+
106
+ # When converting a record to json text for splunk, we distinguish between the 'event' (containing the data) and a
107
+ # few other fields that are splunk-specific for indexing. We add those 'indexer_fields' to the return object first.
108
+ for splunk_name, field_name in indexer_fields:
109
+ if hasattr(record, field_name):
110
+ val = getattr(record, field_name)
111
+ if val:
112
+ if isinstance(val, datetime):
113
+ # Convert datetime objects to epoch timestamp for reserved fields.
114
+ ret[splunk_name] = val.timestamp()
115
+ continue
116
+ ret[splunk_name] = to_str(val)
117
+
118
+ record_as_dict = packer.pack_obj(record)
119
+
120
+ # Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
121
+ del record_as_dict["_version"]
122
+
123
+ # These fields end up in the 'event', but we have a few reserved field names. If those field names are in the
124
+ # record, we prefix them with 'rd_' (short for record descriptor)
125
+ for field in PREFIX_WITH_RD:
126
+ if field not in record_as_dict:
127
+ continue
128
+ new_field = f"rd_{field}"
129
+
130
+ record_as_dict[new_field] = record_as_dict[field]
131
+ del record_as_dict[field]
132
+
133
+ # Almost done, just have to add the tag and the type (i.e the record descriptor's name) to the event.
134
+ record_as_dict["rdtag"] = tag
135
+
136
+ # Yes.
137
+ record_as_dict["rdtype"] = record._desc.name
138
+
139
+ ret["event"] = record_as_dict
140
+ return json.dumps(ret, default=packer.pack_obj)
141
+
142
+
143
+ class SplunkWriter(AbstractWriter):
144
+ sock = None
145
+ session = None
146
+
147
+ def __init__(
148
+ self,
149
+ uri: str,
150
+ tag: Optional[str] = None,
151
+ token: Optional[str] = None,
152
+ sourcetype: Optional[str] = None,
153
+ ssl_verify: bool = True,
154
+ **kwargs,
155
+ ):
156
+ # If the writer is initiated without a protocol, we assume we will be writing over tcp
157
+ if "://" not in uri:
158
+ uri = f"tcp://{uri}"
159
+
160
+ if sourcetype is None:
161
+ log.warning("No sourcetype provided, assuming 'records' sourcetype")
162
+ sourcetype = SourceType.RECORDS
163
+
164
+ parsed_url = urlparse(uri)
165
+ url_scheme = parsed_url.scheme.lower()
166
+
167
+ self.sourcetype = SourceType(sourcetype)
168
+ self.protocol = Protocol(url_scheme)
169
+
170
+ if self.protocol == Protocol.TCP and self.sourcetype != SourceType.RECORDS:
171
+ raise ValueError("For sending data to Splunk over TCP, only the 'records' sourcetype is allowed")
172
+
173
+ self.host = parsed_url.hostname
174
+ self.port = parsed_url.port
175
+ self.tag = tag
176
+ self.record_buffer = []
177
+ self._warned = False
178
+ self.packer = None
179
+
180
+ if self.sourcetype == SourceType.JSON:
181
+ self.packer = JsonRecordPacker(indent=4, pack_descriptors=False)
182
+
183
+ if self.protocol == Protocol.TCP:
184
+ self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP)
185
+ self.sock.connect((self.host, self.port))
186
+ self._send = self._send_tcp
187
+ elif self.protocol in (Protocol.HTTP, Protocol.HTTPS):
188
+ if not HAS_HTTPX:
189
+ raise ImportError("The httpx library is required for sending data over HTTP(S)")
190
+
191
+ scheme = self.protocol.value
192
+ self.token = token
193
+ if not self.token:
194
+ raise ValueError("An authorization token is required for the HTTP collector")
195
+ if not self.token.startswith("Splunk "):
196
+ self.token = f"Splunk {self.token}"
197
+
198
+ # Assume verify=True unless specified otherwise.
199
+ self.verify = str(ssl_verify).lower() not in ("0", "false")
200
+ if not self.verify:
201
+ log.warning("Certificate verification is disabled")
202
+
203
+ endpoint = "event" if self.sourcetype != SourceType.RECORDS else "raw"
204
+ port = f":{self.port}" if self.port else ""
205
+ self.url = f"{scheme}://{self.host}{port}/services/collector/{endpoint}?auto_extract_timestamp=true"
206
+
207
+ self.headers = {
208
+ "Authorization": self.token,
209
+ # A randomized value so that Splunk can loadbalance between different incoming datastreams
210
+ "X-Splunk-Request-Channel": str(uuid.uuid4()),
211
+ }
212
+
213
+ self.session = httpx.Client(verify=self.verify, headers=self.headers)
214
+
215
+ self._send = self._send_http
216
+
217
+ def _cache_records_for_http(self, data: Optional[bytes] = None, flush: bool = False) -> Optional[bytes]:
218
+ # It's possible to call this function without any data, purely to flush. Hence this check.
219
+ if data:
220
+ self.record_buffer.append(data)
221
+ if len(self.record_buffer) < RECORD_BUFFER_LIMIT and not flush:
222
+ # Buffer limit not exceeded yet, so we do not return a buffer yet, unless buffer is explicitly flushed.
223
+ return
224
+ buf = b"".join(self.record_buffer)
225
+ if not buf:
226
+ return
227
+
228
+ # We're going to be returning a buffer for the writer to send, so we can clear the internal record buffer.
229
+ self.record_buffer.clear()
230
+ return buf
231
+
232
+ def _send(self, data: bytes) -> None:
233
+ raise RuntimeError("This method should be overridden at runtime")
234
+
235
+ def _send_http(self, data: Optional[bytes] = None, flush: bool = False) -> None:
236
+ buf = self._cache_records_for_http(data, flush)
237
+ if not buf:
238
+ return
239
+ response = self.session.post(self.url, data=buf)
240
+ if response.status_code != 200:
241
+ raise ConnectionError(f"{response.text} ({response.status_code})")
242
+
243
+ def _send_tcp(self, data: bytes) -> None:
244
+ self.sock.sendall(data)
245
+
246
+ def write(self, record: Record) -> None:
247
+ if not self._warned and "rdtag" in record._desc.fields:
248
+ self._warned = True
249
+ log.warning(
250
+ "Record has 'rdtag' field which conflicts with the Splunk adapter -- "
251
+ "Splunk output will have duplicate 'rdtag' fields",
252
+ )
253
+
254
+ if self.sourcetype == SourceType.RECORDS:
255
+ rec = splunkify_key_value(record, self.tag)
256
+ else:
257
+ rec = splunkify_json(self.packer, record, self.tag)
258
+
259
+ # Trail with a newline for line breaking.
260
+ data = to_bytes(rec) + b"\n"
261
+
262
+ self._send(data)
263
+
264
+ def flush(self) -> None:
265
+ if self.protocol in [Protocol.HTTP, Protocol.HTTPS]:
266
+ self._send_http(flush=True)
267
+
268
+ def close(self) -> None:
269
+ # For TCP
270
+ if self.sock:
271
+ self.sock.close()
272
+ self.sock = None
273
+
274
+ if self.session:
275
+ self.flush()
276
+ self.session.close()
277
+ self.session = None
278
+
279
+
280
+ class SplunkReader(AbstractReader):
281
+ def __init__(self, path, selector=None, **kwargs):
282
+ raise NotImplementedError()
@@ -5,13 +5,14 @@ import math
5
5
  import os
6
6
  import pathlib
7
7
  import re
8
+ import shlex
8
9
  import sys
9
10
  import warnings
10
11
  from binascii import a2b_hex, b2a_hex
11
12
  from datetime import datetime as _dt
12
13
  from datetime import timezone
13
14
  from posixpath import basename, dirname
14
- from typing import Any, Optional, Tuple
15
+ from typing import Any, Optional
15
16
  from urllib.parse import urlparse
16
17
 
17
18
  try:
@@ -34,8 +35,8 @@ UTC = timezone.utc
34
35
  PY_311 = sys.version_info >= (3, 11, 0)
35
36
  PY_312 = sys.version_info >= (3, 12, 0)
36
37
 
37
- PATH_POSIX = 0
38
- PATH_WINDOWS = 1
38
+ TYPE_POSIX = 0
39
+ TYPE_WINDOWS = 1
39
40
 
40
41
  string_type = str
41
42
  varint_type = int
@@ -694,15 +695,15 @@ class path(pathlib.PurePath, FieldType):
694
695
  return repr(str(self))
695
696
 
696
697
  def _pack(self):
697
- path_type = PATH_WINDOWS if isinstance(self, windows_path) else PATH_POSIX
698
+ path_type = TYPE_WINDOWS if isinstance(self, windows_path) else TYPE_POSIX
698
699
  return (str(self), path_type)
699
700
 
700
701
  @classmethod
701
- def _unpack(cls, data: Tuple[str, str]):
702
+ def _unpack(cls, data: tuple[str, str]):
702
703
  path_, path_type = data
703
- if path_type == PATH_POSIX:
704
+ if path_type == TYPE_POSIX:
704
705
  return posix_path(path_)
705
- elif path_type == PATH_WINDOWS:
706
+ elif path_type == TYPE_WINDOWS:
706
707
  return windows_path(path_)
707
708
  else:
708
709
  # Catch all: default to posix_path
@@ -734,3 +735,115 @@ class windows_path(pathlib.PureWindowsPath, path):
734
735
  quote = '"'
735
736
 
736
737
  return f"{quote}{s}{quote}"
738
+
739
+
740
+ class command(FieldType):
741
+ executable: Optional[path] = None
742
+ args: Optional[list[str]] = None
743
+
744
+ _path_type: type[path] = None
745
+ _posix: bool
746
+
747
+ def __new__(cls, value: str) -> command:
748
+ if cls is not command:
749
+ return super().__new__(cls)
750
+
751
+ if not isinstance(value, str):
752
+ raise ValueError(f"Expected a value of type 'str' not {type(value)}")
753
+
754
+ # pre checking for windows like paths
755
+ # This checks for windows like starts of a path:
756
+ # an '%' for an environment variable
757
+ # r'\\' for a UNC path
758
+ # the strip and check for ":" on the second line is for `<drive_letter>:`
759
+ windows = value.startswith((r"\\", "%")) or value.lstrip("\"'")[1] == ":"
760
+
761
+ if windows:
762
+ cls = windows_command
763
+ else:
764
+ cls = posix_command
765
+ return super().__new__(cls)
766
+
767
+ def __init__(self, value: str | tuple[str, tuple[str]] | None):
768
+ if value is None:
769
+ return
770
+
771
+ if isinstance(value, str):
772
+ self.executable, self.args = self._split(value)
773
+ return
774
+
775
+ executable, self.args = value
776
+ self.executable = self._path_type(executable)
777
+ self.args = list(self.args)
778
+
779
+ def __repr__(self) -> str:
780
+ return f"(executable={self.executable!r}, args={self.args})"
781
+
782
+ def __eq__(self, other: Any) -> bool:
783
+ if isinstance(other, command):
784
+ return self.executable == other.executable and self.args == other.args
785
+ elif isinstance(other, str):
786
+ return self._join() == other
787
+ elif isinstance(other, (tuple, list)):
788
+ return self.executable == other[0] and self.args == list(other[1:])
789
+
790
+ return False
791
+
792
+ def _split(self, value: str) -> tuple[str, list[str]]:
793
+ executable, *args = shlex.split(value, posix=self._posix)
794
+ executable = executable.strip("'\" ")
795
+
796
+ return self._path_type(executable), args
797
+
798
+ def _join(self) -> str:
799
+ return shlex.join([str(self.executable)] + self.args)
800
+
801
+ def _pack(self) -> tuple[tuple[str, list], str]:
802
+ command_type = TYPE_WINDOWS if isinstance(self, windows_command) else TYPE_POSIX
803
+ if self.executable:
804
+ _exec, _ = self.executable._pack()
805
+ return ((_exec, self.args), command_type)
806
+ else:
807
+ return (None, command_type)
808
+
809
+ @classmethod
810
+ def _unpack(cls, data: tuple[tuple[str, tuple] | None, int]) -> command:
811
+ _value, _type = data
812
+ if _type == TYPE_WINDOWS:
813
+ return windows_command(_value)
814
+
815
+ return posix_command(_value)
816
+
817
+ @classmethod
818
+ def from_posix(cls, value: str) -> command:
819
+ return posix_command(value)
820
+
821
+ @classmethod
822
+ def from_windows(cls, value: str) -> command:
823
+ return windows_command(value)
824
+
825
+
826
+ class posix_command(command):
827
+ _posix = True
828
+ _path_type = posix_path
829
+
830
+
831
+ class windows_command(command):
832
+ _posix = False
833
+ _path_type = windows_path
834
+
835
+ def _split(self, value: str) -> tuple[str, list[str]]:
836
+ executable, args = super()._split(value)
837
+ if args:
838
+ args = [" ".join(args)]
839
+
840
+ return executable, args
841
+
842
+ def _join(self) -> str:
843
+ arg = f" {self.args[0]}" if self.args else ""
844
+ executable_str = str(self.executable)
845
+
846
+ if " " in executable_str:
847
+ return f"'{executable_str}'{arg}"
848
+
849
+ return f"{executable_str}{arg}"
@@ -72,6 +72,11 @@ class JsonRecordPacker:
72
72
  return base64.b64encode(obj).decode()
73
73
  if isinstance(obj, fieldtypes.path):
74
74
  return str(obj)
75
+ if isinstance(obj, fieldtypes.command):
76
+ return {
77
+ "executable": obj.executable,
78
+ "args": obj.args,
79
+ }
75
80
 
76
81
  raise Exception("Unpackable type " + str(type(obj)))
77
82
 
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.15.dev14'
16
- __version_tuple__ = version_tuple = (3, 15, 'dev14')
15
+ __version__ = version = '3.15.dev16'
16
+ __version_tuple__ = version_tuple = (3, 15, 'dev16')
@@ -1,5 +1,6 @@
1
1
  WHITELIST = [
2
2
  "boolean",
3
+ "command",
3
4
  "dynamic",
4
5
  "datetime",
5
6
  "filesize",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.15.dev14
3
+ Version: 3.15.dev16
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -35,6 +35,8 @@ Requires-Dist: fastavro[snappy]; extra == "avro"
35
35
  Provides-Extra: duckdb
36
36
  Requires-Dist: duckdb; extra == "duckdb"
37
37
  Requires-Dist: pytz; extra == "duckdb"
38
+ Provides-Extra: splunk
39
+ Requires-Dist: httpx; extra == "splunk"
38
40
  Provides-Extra: test
39
41
  Requires-Dist: flow.record[compression]; extra == "test"
40
42
  Requires-Dist: flow.record[avro]; extra == "test"
@@ -23,6 +23,9 @@ elasticsearch
23
23
  [geoip]
24
24
  maxminddb
25
25
 
26
+ [splunk]
27
+ httpx
28
+
26
29
  [test]
27
30
  flow.record[compression]
28
31
  flow.record[avro]
@@ -53,6 +53,9 @@ duckdb = [
53
53
  "duckdb",
54
54
  "pytz", # duckdb requires pytz for timezone support
55
55
  ]
56
+ splunk = [
57
+ "httpx",
58
+ ]
56
59
  test = [
57
60
  "flow.record[compression]",
58
61
  "flow.record[avro]",