flow.record 3.16.dev4__tar.gz → 3.16.dev6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {flow_record-3.16.dev4/flow.record.egg-info → flow_record-3.16.dev6}/PKG-INFO +1 -1
  2. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/__init__.py +2 -0
  3. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/splunk.py +79 -51
  4. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/version.py +2 -2
  5. {flow_record-3.16.dev4 → flow_record-3.16.dev6/flow.record.egg-info}/PKG-INFO +1 -1
  6. flow_record-3.16.dev6/tests/test_splunk_adapter.py +433 -0
  7. flow_record-3.16.dev4/tests/test_splunk_adapter.py +0 -403
  8. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/COPYRIGHT +0 -0
  9. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/LICENSE +0 -0
  10. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/MANIFEST.in +0 -0
  11. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/README.md +0 -0
  12. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/examples/filesystem.py +0 -0
  13. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/examples/passivedns.py +0 -0
  14. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/examples/records.json +0 -0
  15. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/examples/tcpconn.py +0 -0
  16. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/__init__.py +0 -0
  17. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/archive.py +0 -0
  18. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/avro.py +0 -0
  19. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/broker.py +0 -0
  20. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/csvfile.py +0 -0
  21. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/duckdb.py +0 -0
  22. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/elastic.py +0 -0
  23. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/jsonfile.py +0 -0
  24. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/line.py +0 -0
  25. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/mongo.py +0 -0
  26. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/split.py +0 -0
  27. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/sqlite.py +0 -0
  28. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/stream.py +0 -0
  29. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/text.py +0 -0
  30. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/adapter/xlsx.py +0 -0
  31. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/base.py +0 -0
  32. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/exceptions.py +0 -0
  33. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/fieldtypes/__init__.py +0 -0
  34. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/fieldtypes/credential.py +0 -0
  35. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/fieldtypes/net/__init__.py +0 -0
  36. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/fieldtypes/net/ip.py +0 -0
  37. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/fieldtypes/net/ipv4.py +0 -0
  38. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/fieldtypes/net/tcp.py +0 -0
  39. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/fieldtypes/net/udp.py +0 -0
  40. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/jsonpacker.py +0 -0
  41. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/packer.py +0 -0
  42. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/selector.py +0 -0
  43. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/stream.py +0 -0
  44. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/tools/__init__.py +0 -0
  45. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/tools/geoip.py +0 -0
  46. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/tools/rdump.py +0 -0
  47. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/utils.py +0 -0
  48. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow/record/whitelist.py +0 -0
  49. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow.record.egg-info/SOURCES.txt +0 -0
  50. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow.record.egg-info/dependency_links.txt +0 -0
  51. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow.record.egg-info/entry_points.txt +0 -0
  52. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow.record.egg-info/requires.txt +0 -0
  53. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/flow.record.egg-info/top_level.txt +0 -0
  54. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/pyproject.toml +0 -0
  55. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/setup.cfg +0 -0
  56. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/__init__.py +0 -0
  57. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/_utils.py +0 -0
  58. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/docs/Makefile +0 -0
  59. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/docs/conf.py +0 -0
  60. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/docs/index.rst +0 -0
  61. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/selector_explain_example.py +0 -0
  62. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/standalone_test.py +0 -0
  63. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_avro.py +0 -0
  64. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_avro_adapter.py +0 -0
  65. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_compiled_selector.py +0 -0
  66. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_csv_adapter.py +0 -0
  67. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_deprecations.py +0 -0
  68. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_elastic_adapter.py +0 -0
  69. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_fieldtype_ip.py +0 -0
  70. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_fieldtypes.py +0 -0
  71. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_json_packer.py +0 -0
  72. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_json_record_adapter.py +0 -0
  73. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_multi_timestamp.py +0 -0
  74. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_packer.py +0 -0
  75. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_rdump.py +0 -0
  76. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_record.py +0 -0
  77. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_record_adapter.py +0 -0
  78. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_record_descriptor.py +0 -0
  79. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_regression.py +0 -0
  80. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_selector.py +0 -0
  81. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/test_sqlite_duckdb_adapter.py +0 -0
  82. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tests/utils_inspect.py +0 -0
  83. {flow_record-3.16.dev4 → flow_record-3.16.dev6}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.16.dev4
3
+ Version: 3.16.dev6
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -17,6 +17,7 @@ from flow.record.base import (
17
17
  RecordWriter,
18
18
  dynamic_fieldtype,
19
19
  extend_record,
20
+ ignore_fields_for_comparison,
20
21
  iter_timestamped_records,
21
22
  open_path,
22
23
  open_path_or_stream,
@@ -57,6 +58,7 @@ __all__ = [
57
58
  "open_path_or_stream",
58
59
  "open_path",
59
60
  "open_stream",
61
+ "ignore_fields_for_comparison",
60
62
  "set_ignored_fields_for_comparison",
61
63
  "stream",
62
64
  "dynamic_fieldtype",
@@ -28,7 +28,7 @@ Write usage: rdump -w splunk+[PROTOCOL]://[IP]:[PORT]?tag=[TAG]&token=[TOKEN]&so
28
28
  [TAG]: optional value to add as "rdtag" output field when writing
29
29
  [TOKEN]: Authentication token for sending data over HTTP(S)
30
30
  [SOURCETYPE]: Set sourcetype of data. Defaults to records, but can also be set to JSON.
31
- [SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTP(S). Defaults to True.
31
+ [SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTPS. Defaults to True.
32
32
  """
33
33
 
34
34
  log = logging.getLogger(__package__)
@@ -36,21 +36,38 @@ log = logging.getLogger(__package__)
36
36
  # Amount of records to bundle into a single request when sending data over HTTP(S).
37
37
  RECORD_BUFFER_LIMIT = 20
38
38
 
39
- # https://docs.splunk.com/Documentation/Splunk/7.3.1/Data/Configureindex-timefieldextraction
40
- RESERVED_SPLUNK_FIELDS = [
41
- "_indextime",
42
- "_time",
43
- "index",
44
- "punct",
45
- "source",
46
- "sourcetype",
47
- "tag",
48
- "type",
49
- ]
39
+ # List of reserved splunk fields that do not start with an `_`, as those will be escaped anyway.
40
+ # See: https://docs.splunk.com/Documentation/Splunk/9.2.1/Data/Aboutdefaultfields
41
+ RESERVED_SPLUNK_FIELDS = set(
42
+ [
43
+ "host",
44
+ "index",
45
+ "linecount",
46
+ "punct",
47
+ "source",
48
+ "sourcetype",
49
+ "splunk_server",
50
+ "timestamp",
51
+ ],
52
+ )
53
+
54
+ RESERVED_SPLUNK_APP_FIELDS = set(
55
+ [
56
+ "tag",
57
+ "type",
58
+ ]
59
+ )
60
+
61
+ RESERVED_RDUMP_FIELDS = set(
62
+ [
63
+ "rdtag",
64
+ "rdtype",
65
+ ],
66
+ )
50
67
 
51
- RESERVED_RECORD_FIELDS = ["_classification", "_generated", "_source"]
68
+ RESERVED_FIELDS = RESERVED_SPLUNK_FIELDS.union(RESERVED_SPLUNK_APP_FIELDS.union(RESERVED_RDUMP_FIELDS))
52
69
 
53
- PREFIX_WITH_RD = set(RESERVED_SPLUNK_FIELDS + RESERVED_RECORD_FIELDS)
70
+ ESCAPE = "rd_"
54
71
 
55
72
 
56
73
  class Protocol(Enum):
@@ -64,7 +81,13 @@ class SourceType(Enum):
64
81
  RECORDS = "records"
65
82
 
66
83
 
67
- def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
84
+ def escape_field_name(field: str) -> str:
85
+ if field.startswith(("_", ESCAPE)) or field in RESERVED_FIELDS:
86
+ field = f"{ESCAPE}{field}"
87
+ return field
88
+
89
+
90
+ def record_to_splunk_kv_line(record: Record, tag: Optional[str] = None) -> str:
68
91
  ret = []
69
92
 
70
93
  ret.append(f'rdtype="{record._desc.name}"')
@@ -81,8 +104,7 @@ def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
81
104
 
82
105
  val = getattr(record, field)
83
106
 
84
- if field in PREFIX_WITH_RD:
85
- field = f"rd_{field}"
107
+ field = escape_field_name(field)
86
108
 
87
109
  if val is None:
88
110
  ret.append(f"{field}=None")
@@ -94,7 +116,25 @@ def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
94
116
  return " ".join(ret)
95
117
 
96
118
 
97
- def splunkify_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
119
+ def record_to_splunk_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> dict:
120
+ record_as_dict = packer.pack_obj(record)
121
+ json_dict = {}
122
+
123
+ for field, value in record_as_dict.items():
124
+ # Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
125
+ if field == "_version":
126
+ continue
127
+ escaped_field = escape_field_name(field)
128
+ json_dict[escaped_field] = value
129
+
130
+ # Add rdump specific fields
131
+ json_dict["rdtag"] = tag
132
+ json_dict["rdtype"] = record._desc.name
133
+
134
+ return json_dict
135
+
136
+
137
+ def record_to_splunk_http_api_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
98
138
  ret = {}
99
139
 
100
140
  indexer_fields = [
@@ -115,29 +155,13 @@ def splunkify_json(packer: JsonRecordPacker, record: Record, tag: Optional[str]
115
155
  continue
116
156
  ret[splunk_name] = to_str(val)
117
157
 
118
- record_as_dict = packer.pack_obj(record)
119
-
120
- # Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
121
- del record_as_dict["_version"]
122
-
123
- # These fields end up in the 'event', but we have a few reserved field names. If those field names are in the
124
- # record, we prefix them with 'rd_' (short for record descriptor)
125
- for field in PREFIX_WITH_RD:
126
- if field not in record_as_dict:
127
- continue
128
- new_field = f"rd_{field}"
129
-
130
- record_as_dict[new_field] = record_as_dict[field]
131
- del record_as_dict[field]
132
-
133
- # Almost done, just have to add the tag and the type (i.e the record descriptor's name) to the event.
134
- record_as_dict["rdtag"] = tag
158
+ ret["event"] = record_to_splunk_json(packer, record, tag)
159
+ return json.dumps(ret, default=packer.pack_obj)
135
160
 
136
- # Yes.
137
- record_as_dict["rdtype"] = record._desc.name
138
161
 
139
- ret["event"] = record_as_dict
140
- return json.dumps(ret, default=packer.pack_obj)
162
+ def record_to_splunk_tcp_api_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
163
+ record_dict = record_to_splunk_json(packer, record, tag)
164
+ return json.dumps(record_dict, default=packer.pack_obj)
141
165
 
142
166
 
143
167
  class SplunkWriter(AbstractWriter):
@@ -159,31 +183,31 @@ class SplunkWriter(AbstractWriter):
159
183
 
160
184
  if sourcetype is None:
161
185
  log.warning("No sourcetype provided, assuming 'records' sourcetype")
162
- sourcetype = SourceType.RECORDS
186
+ self.sourcetype = SourceType.RECORDS
187
+ else:
188
+ self.sourcetype = SourceType(sourcetype)
163
189
 
164
190
  parsed_url = urlparse(uri)
165
191
  url_scheme = parsed_url.scheme.lower()
166
-
167
- self.sourcetype = SourceType(sourcetype)
168
192
  self.protocol = Protocol(url_scheme)
169
-
170
- if self.protocol == Protocol.TCP and self.sourcetype != SourceType.RECORDS:
171
- raise ValueError("For sending data to Splunk over TCP, only the 'records' sourcetype is allowed")
172
-
173
193
  self.host = parsed_url.hostname
174
194
  self.port = parsed_url.port
195
+
175
196
  self.tag = tag
176
197
  self.record_buffer = []
177
198
  self._warned = False
178
199
  self.packer = None
179
-
180
- if self.sourcetype == SourceType.JSON:
181
- self.packer = JsonRecordPacker(indent=4, pack_descriptors=False)
200
+ self.json_converter = None
182
201
 
183
202
  if self.protocol == Protocol.TCP:
184
203
  self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP)
185
204
  self.sock.connect((self.host, self.port))
186
205
  self._send = self._send_tcp
206
+
207
+ if self.sourcetype == SourceType.JSON:
208
+ self.packer = JsonRecordPacker(indent=None, pack_descriptors=False)
209
+ self.json_converter = record_to_splunk_tcp_api_json
210
+
187
211
  elif self.protocol in (Protocol.HTTP, Protocol.HTTPS):
188
212
  if not HAS_HTTPX:
189
213
  raise ImportError("The httpx library is required for sending data over HTTP(S)")
@@ -214,6 +238,10 @@ class SplunkWriter(AbstractWriter):
214
238
 
215
239
  self._send = self._send_http
216
240
 
241
+ if self.sourcetype == SourceType.JSON:
242
+ self.packer = JsonRecordPacker(indent=4, pack_descriptors=False)
243
+ self.json_converter = record_to_splunk_http_api_json
244
+
217
245
  def _cache_records_for_http(self, data: Optional[bytes] = None, flush: bool = False) -> Optional[bytes]:
218
246
  # It's possible to call this function without any data, purely to flush. Hence this check.
219
247
  if data:
@@ -252,9 +280,9 @@ class SplunkWriter(AbstractWriter):
252
280
  )
253
281
 
254
282
  if self.sourcetype == SourceType.RECORDS:
255
- rec = splunkify_key_value(record, self.tag)
283
+ rec = record_to_splunk_kv_line(record, self.tag)
256
284
  else:
257
- rec = splunkify_json(self.packer, record, self.tag)
285
+ rec = self.json_converter(self.packer, record, self.tag)
258
286
 
259
287
  # Trail with a newline for line breaking.
260
288
  data = to_bytes(rec) + b"\n"
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.16.dev4'
16
- __version_tuple__ = version_tuple = (3, 16, 'dev4')
15
+ __version__ = version = '3.16.dev6'
16
+ __version_tuple__ = version_tuple = (3, 16, 'dev6')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.16.dev4
3
+ Version: 3.16.dev6
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -0,0 +1,433 @@
1
+ import datetime
2
+ import json
3
+ import sys
4
+ from typing import Iterator
5
+ from unittest.mock import ANY, MagicMock, patch
6
+
7
+ import pytest
8
+
9
+ import flow.record.adapter.splunk
10
+ from flow.record import RecordDescriptor
11
+ from flow.record.adapter.splunk import (
12
+ ESCAPE,
13
+ RESERVED_FIELDS,
14
+ Protocol,
15
+ SourceType,
16
+ SplunkWriter,
17
+ escape_field_name,
18
+ record_to_splunk_http_api_json,
19
+ record_to_splunk_kv_line,
20
+ record_to_splunk_tcp_api_json,
21
+ )
22
+ from flow.record.jsonpacker import JsonRecordPacker
23
+
24
+ # These base fields are always part of the splunk output. As they are ordered
25
+ # and ordered last in the record fields we can append them to any check of the
26
+ # splunk output values.
27
+ BASE_FIELD_JSON_VALUES = {
28
+ f"{ESCAPE}_source": None,
29
+ f"{ESCAPE}_classification": None,
30
+ f"{ESCAPE}_generated": ANY,
31
+ }
32
+ BASE_FIELDS_KV_SUFFIX = f'{ESCAPE}_source=None {ESCAPE}_classification=None {ESCAPE}_generated="'
33
+
34
+ JSON_PACKER = JsonRecordPacker(pack_descriptors=False)
35
+
36
+
37
+ @pytest.fixture
38
+ def mock_httpx_package(monkeypatch: pytest.MonkeyPatch) -> Iterator[MagicMock]:
39
+ with monkeypatch.context() as m:
40
+ mock_httpx = MagicMock()
41
+ m.setitem(sys.modules, "httpx", mock_httpx)
42
+
43
+ yield mock_httpx
44
+
45
+
46
+ escaped_fields = list(
47
+ RESERVED_FIELDS.union(
48
+ set(["_underscore_field"]),
49
+ ),
50
+ )
51
+
52
+
53
+ @pytest.mark.parametrize(
54
+ "field, escaped", list(zip(escaped_fields, [True] * len(escaped_fields))) + [("not_escaped", False)]
55
+ )
56
+ def test_escape_field_name(field, escaped):
57
+ if escaped:
58
+ assert escape_field_name(field) == f"{ESCAPE}{field}"
59
+ else:
60
+ assert escape_field_name(field) == field
61
+
62
+
63
+ def test_splunkify_reserved_field():
64
+ test_record_descriptor = RecordDescriptor(
65
+ "test/record",
66
+ [("string", "rdtag")],
67
+ )
68
+
69
+ test_record = test_record_descriptor(rdtag="bar")
70
+
71
+ output_key_value = record_to_splunk_kv_line(test_record)
72
+ output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record)
73
+ output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record)
74
+
75
+ json_dict = dict(
76
+ {
77
+ "rdtag": None,
78
+ "rdtype": "test/record",
79
+ f"{ESCAPE}rdtag": "bar",
80
+ },
81
+ **BASE_FIELD_JSON_VALUES,
82
+ )
83
+
84
+ assert output_key_value.startswith(f'rdtype="test/record" rdtag=None {ESCAPE}rdtag="bar" {BASE_FIELDS_KV_SUFFIX}')
85
+ assert json.loads(output_http_json) == {"event": json_dict}
86
+ assert json.loads(output_tcp_json) == json_dict
87
+
88
+
89
+ def test_splunkify_normal_field():
90
+ test_record_descriptor = RecordDescriptor(
91
+ "test/record",
92
+ [("string", "foo")],
93
+ )
94
+
95
+ test_record = test_record_descriptor(foo="bar")
96
+
97
+ output_key_value = record_to_splunk_kv_line(test_record)
98
+ output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record)
99
+ output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record)
100
+
101
+ json_dict = dict(
102
+ {
103
+ "rdtag": None,
104
+ "rdtype": "test/record",
105
+ "foo": "bar",
106
+ },
107
+ **BASE_FIELD_JSON_VALUES,
108
+ )
109
+
110
+ assert output_key_value.startswith(f'rdtype="test/record" rdtag=None foo="bar" {BASE_FIELDS_KV_SUFFIX}')
111
+ assert json.loads(output_http_json) == {"event": json_dict}
112
+ assert json.loads(output_tcp_json) == json_dict
113
+
114
+
115
+ def test_splunkify_source_field():
116
+ test_record_descriptor = RecordDescriptor(
117
+ "test/record",
118
+ [("string", "source")],
119
+ )
120
+
121
+ test_record = test_record_descriptor(source="file_on_target")
122
+ test_record._source = "path_of_target"
123
+
124
+ output_key_value = record_to_splunk_kv_line(test_record)
125
+ output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record)
126
+ output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record)
127
+
128
+ base_fields_kv_suffix = BASE_FIELDS_KV_SUFFIX.replace(
129
+ f"{ESCAPE}_source=None",
130
+ f'{ESCAPE}_source="{test_record._source}"',
131
+ )
132
+
133
+ base_field_json_values = BASE_FIELD_JSON_VALUES.copy()
134
+ base_field_json_values[f"{ESCAPE}_source"] = test_record._source
135
+
136
+ json_dict = dict(
137
+ {
138
+ "rdtag": None,
139
+ "rdtype": "test/record",
140
+ f"{ESCAPE}source": "file_on_target",
141
+ },
142
+ **base_field_json_values,
143
+ )
144
+
145
+ assert output_key_value.startswith(
146
+ f'rdtype="test/record" rdtag=None {ESCAPE}source="file_on_target" {base_fields_kv_suffix}'
147
+ )
148
+ assert json.loads(output_http_json) == {"event": json_dict}
149
+ assert json.loads(output_tcp_json) == json_dict
150
+
151
+
152
+ def test_splunkify_rdtag_field():
153
+ test_record_descriptor = RecordDescriptor("test/record", [])
154
+
155
+ test_record = test_record_descriptor()
156
+
157
+ output_key_value = record_to_splunk_kv_line(test_record, tag="bar")
158
+ output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record, tag="bar")
159
+ output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record, tag="bar")
160
+
161
+ json_dict = dict(
162
+ {
163
+ "rdtag": "bar",
164
+ "rdtype": "test/record",
165
+ },
166
+ **BASE_FIELD_JSON_VALUES,
167
+ )
168
+
169
+ assert output_key_value.startswith(f'rdtype="test/record" rdtag="bar" {BASE_FIELDS_KV_SUFFIX}')
170
+ assert json.loads(output_http_json) == {"event": json_dict}
171
+ assert json.loads(output_tcp_json) == json_dict
172
+
173
+
174
+ def test_splunkify_none_field():
175
+ test_record_descriptor = RecordDescriptor(
176
+ "test/record",
177
+ [("string", "foo")],
178
+ )
179
+
180
+ test_record = test_record_descriptor()
181
+
182
+ output_key_value = record_to_splunk_kv_line(test_record)
183
+ output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record)
184
+ output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record)
185
+
186
+ json_dict = dict(
187
+ {
188
+ "rdtag": None,
189
+ "rdtype": "test/record",
190
+ "foo": None,
191
+ },
192
+ **BASE_FIELD_JSON_VALUES,
193
+ )
194
+
195
+ assert output_key_value.startswith(f'rdtype="test/record" rdtag=None foo=None {BASE_FIELDS_KV_SUFFIX}')
196
+ assert json.loads(output_http_json) == {"event": json_dict}
197
+ assert json.loads(output_tcp_json) == json_dict
198
+
199
+
200
+ def test_splunkify_byte_field():
201
+ test_record_descriptor = RecordDescriptor(
202
+ "test/record",
203
+ [("bytes", "foo")],
204
+ )
205
+
206
+ test_record = test_record_descriptor(foo=b"bar")
207
+
208
+ output_key_value = record_to_splunk_kv_line(test_record)
209
+ output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record)
210
+ output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record)
211
+
212
+ json_dict = dict(
213
+ {
214
+ "rdtag": None,
215
+ "rdtype": "test/record",
216
+ "foo": "YmFy",
217
+ },
218
+ **BASE_FIELD_JSON_VALUES,
219
+ )
220
+
221
+ assert output_key_value.startswith(f'rdtype="test/record" rdtag=None foo="YmFy" {BASE_FIELDS_KV_SUFFIX}')
222
+ assert json.loads(output_http_json) == {"event": json_dict}
223
+ assert json.loads(output_tcp_json) == json_dict
224
+
225
+
226
+ def test_splunkify_backslash_quote_field():
227
+ test_record_descriptor = RecordDescriptor(
228
+ "test/record",
229
+ [("string", "foo")],
230
+ )
231
+
232
+ test_record = test_record_descriptor(foo=b'\\"')
233
+
234
+ output = record_to_splunk_kv_line(test_record)
235
+ output_http_json = record_to_splunk_http_api_json(JSON_PACKER, test_record)
236
+ output_tcp_json = record_to_splunk_tcp_api_json(JSON_PACKER, test_record)
237
+
238
+ json_dict = dict(
239
+ {
240
+ "rdtag": None,
241
+ "rdtype": "test/record",
242
+ "foo": '\\"',
243
+ },
244
+ **BASE_FIELD_JSON_VALUES,
245
+ )
246
+
247
+ assert output.startswith(f'rdtype="test/record" rdtag=None foo="\\\\\\"" {BASE_FIELDS_KV_SUFFIX}')
248
+ assert json.loads(output_http_json) == {"event": json_dict}
249
+ assert json.loads(output_tcp_json) == json_dict
250
+
251
+
252
+ def test_record_to_splunk_http_api_json_special_fields():
253
+ test_record_descriptor = RecordDescriptor(
254
+ "test/record",
255
+ [
256
+ ("datetime", "ts"),
257
+ ("string", "hostname"),
258
+ ("string", "foo"),
259
+ ],
260
+ )
261
+
262
+ # Datetimes should be converted to epoch
263
+ test_record = test_record_descriptor(ts=datetime.datetime(1970, 1, 1, 4, 0), hostname="RECYCLOPS", foo="bar")
264
+
265
+ output = record_to_splunk_http_api_json(JSON_PACKER, test_record)
266
+ assert '"time": 14400.0,' in output
267
+ assert '"host": "RECYCLOPS"' in output
268
+
269
+
270
+ def test_tcp_protocol_records_sourcetype():
271
+ with patch("socket.socket") as mock_socket:
272
+ tcp_writer = SplunkWriter("splunk:1337")
273
+ assert tcp_writer.host == "splunk"
274
+ assert tcp_writer.port == 1337
275
+ assert tcp_writer.protocol == Protocol.TCP
276
+ assert tcp_writer.sourcetype == SourceType.RECORDS
277
+
278
+ mock_socket.assert_called()
279
+ mock_socket.return_value.connect.assert_called_with(("splunk", 1337))
280
+
281
+ test_record_descriptor = RecordDescriptor(
282
+ "test/record",
283
+ [("string", "foo")],
284
+ )
285
+
286
+ test_record = test_record_descriptor(foo="bar")
287
+ tcp_writer.write(test_record)
288
+
289
+ args, _ = mock_socket.return_value.sendall.call_args
290
+ written_to_splunk = args[0]
291
+
292
+ assert written_to_splunk.startswith(
293
+ b'rdtype="test/record" rdtag=None foo="bar" ' + BASE_FIELDS_KV_SUFFIX.encode()
294
+ )
295
+ assert written_to_splunk.endswith(b'"\n')
296
+
297
+
298
+ def test_tcp_protocol_json_sourcetype():
299
+ with patch("socket.socket") as mock_socket:
300
+ tcp_writer = SplunkWriter("splunk:1337", sourcetype="json")
301
+ assert tcp_writer.host == "splunk"
302
+ assert tcp_writer.port == 1337
303
+ assert tcp_writer.protocol == Protocol.TCP
304
+ assert tcp_writer.sourcetype == SourceType.JSON
305
+
306
+ mock_socket.assert_called()
307
+ mock_socket.return_value.connect.assert_called_with(("splunk", 1337))
308
+
309
+ test_record_descriptor = RecordDescriptor(
310
+ "test/record",
311
+ [("string", "foo")],
312
+ )
313
+
314
+ test_record = test_record_descriptor(foo="bar")
315
+ tcp_writer.write(test_record)
316
+
317
+ args, _ = mock_socket.return_value.sendall.call_args
318
+ written_to_splunk = args[0]
319
+
320
+ json_dict = dict(
321
+ {
322
+ "rdtag": None,
323
+ "rdtype": "test/record",
324
+ "foo": "bar",
325
+ },
326
+ **BASE_FIELD_JSON_VALUES,
327
+ )
328
+
329
+ assert json.loads(written_to_splunk) == json_dict
330
+ assert written_to_splunk.endswith(b"\n")
331
+
332
+
333
+ def test_https_protocol_records_sourcetype(mock_httpx_package: MagicMock):
334
+ if "flow.record.adapter.splunk" in sys.modules:
335
+ del sys.modules["flow.record.adapter.splunk"]
336
+
337
+ from flow.record.adapter.splunk import Protocol, SourceType, SplunkWriter
338
+
339
+ with patch.object(
340
+ flow.record.adapter.splunk,
341
+ "HAS_HTTPX",
342
+ True,
343
+ ):
344
+ mock_httpx_package.Client.return_value.post.return_value.status_code = 200
345
+ https_writer = SplunkWriter("https://splunk:8088", token="password123")
346
+
347
+ assert https_writer.host == "splunk"
348
+ assert https_writer.protocol == Protocol.HTTPS
349
+ assert https_writer.sourcetype == SourceType.RECORDS
350
+ assert https_writer.verify is True
351
+ assert https_writer.url == "https://splunk:8088/services/collector/raw?auto_extract_timestamp=true"
352
+
353
+ _, kwargs = mock_httpx_package.Client.call_args
354
+ assert kwargs["verify"] is True
355
+
356
+ given_headers = kwargs["headers"]
357
+ assert given_headers["Authorization"] == "Splunk password123"
358
+ assert "X-Splunk-Request-Channel" in given_headers
359
+
360
+ test_record_descriptor = RecordDescriptor(
361
+ "test/record",
362
+ [("string", "foo")],
363
+ )
364
+
365
+ test_record = test_record_descriptor(foo="bar")
366
+ https_writer.write(test_record)
367
+
368
+ mock_httpx_package.Client.return_value.post.assert_not_called()
369
+
370
+ https_writer.close()
371
+ mock_httpx_package.Client.return_value.post.assert_called_with(
372
+ "https://splunk:8088/services/collector/raw?auto_extract_timestamp=true",
373
+ data=ANY,
374
+ )
375
+ _, kwargs = mock_httpx_package.Client.return_value.post.call_args
376
+ sent_data = kwargs["data"]
377
+ assert sent_data.startswith(b'rdtype="test/record" rdtag=None foo="bar" ' + BASE_FIELDS_KV_SUFFIX.encode())
378
+ assert sent_data.endswith(b'"\n')
379
+
380
+
381
+ def test_https_protocol_json_sourcetype(mock_httpx_package: MagicMock):
382
+ if "flow.record.adapter.splunk" in sys.modules:
383
+ del sys.modules["flow.record.adapter.splunk"]
384
+
385
+ from flow.record.adapter.splunk import SplunkWriter
386
+
387
+ with patch.object(
388
+ flow.record.adapter.splunk,
389
+ "HAS_HTTPX",
390
+ True,
391
+ ):
392
+ mock_httpx_package.Client.return_value.post.return_value.status_code = 200
393
+
394
+ https_writer = SplunkWriter("https://splunk:8088", token="password123", sourcetype="json")
395
+
396
+ test_record_descriptor = RecordDescriptor(
397
+ "test/record",
398
+ [("string", "foo")],
399
+ )
400
+
401
+ https_writer.write(test_record_descriptor(foo="bar"))
402
+ https_writer.write(test_record_descriptor(foo="baz"))
403
+ mock_httpx_package.Client.return_value.post.assert_not_called()
404
+
405
+ https_writer.close()
406
+ mock_httpx_package.Client.return_value.post.assert_called_with(
407
+ "https://splunk:8088/services/collector/event?auto_extract_timestamp=true",
408
+ data=ANY,
409
+ )
410
+
411
+ _, kwargs = mock_httpx_package.Client.return_value.post.call_args
412
+ sent_data = kwargs["data"]
413
+ first_record_json, _, second_record_json = sent_data.partition(b"\n")
414
+ assert json.loads(first_record_json) == {
415
+ "event": dict(
416
+ {
417
+ "rdtag": None,
418
+ "rdtype": "test/record",
419
+ "foo": "bar",
420
+ },
421
+ **BASE_FIELD_JSON_VALUES,
422
+ )
423
+ }
424
+ assert json.loads(second_record_json) == {
425
+ "event": dict(
426
+ {
427
+ "rdtag": None,
428
+ "rdtype": "test/record",
429
+ "foo": "baz",
430
+ },
431
+ **BASE_FIELD_JSON_VALUES,
432
+ )
433
+ }