flow.record 3.16.dev5__tar.gz → 3.16.dev7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {flow_record-3.16.dev5/flow.record.egg-info → flow_record-3.16.dev7}/PKG-INFO +1 -1
  2. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/splunk.py +79 -51
  3. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/fieldtypes/__init__.py +2 -1
  4. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/version.py +2 -2
  5. {flow_record-3.16.dev5 → flow_record-3.16.dev7/flow.record.egg-info}/PKG-INFO +1 -1
  6. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_fieldtypes.py +9 -2
  7. flow_record-3.16.dev7/tests/test_splunk_adapter.py +433 -0
  8. flow_record-3.16.dev5/tests/test_splunk_adapter.py +0 -403
  9. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/COPYRIGHT +0 -0
  10. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/LICENSE +0 -0
  11. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/MANIFEST.in +0 -0
  12. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/README.md +0 -0
  13. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/examples/filesystem.py +0 -0
  14. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/examples/passivedns.py +0 -0
  15. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/examples/records.json +0 -0
  16. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/examples/tcpconn.py +0 -0
  17. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/__init__.py +0 -0
  18. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/__init__.py +0 -0
  19. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/archive.py +0 -0
  20. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/avro.py +0 -0
  21. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/broker.py +0 -0
  22. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/csvfile.py +0 -0
  23. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/duckdb.py +0 -0
  24. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/elastic.py +0 -0
  25. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/jsonfile.py +0 -0
  26. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/line.py +0 -0
  27. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/mongo.py +0 -0
  28. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/split.py +0 -0
  29. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/sqlite.py +0 -0
  30. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/stream.py +0 -0
  31. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/text.py +0 -0
  32. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/adapter/xlsx.py +0 -0
  33. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/base.py +0 -0
  34. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/exceptions.py +0 -0
  35. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/fieldtypes/credential.py +0 -0
  36. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/fieldtypes/net/__init__.py +0 -0
  37. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/fieldtypes/net/ip.py +0 -0
  38. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/fieldtypes/net/ipv4.py +0 -0
  39. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/fieldtypes/net/tcp.py +0 -0
  40. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/fieldtypes/net/udp.py +0 -0
  41. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/jsonpacker.py +0 -0
  42. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/packer.py +0 -0
  43. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/selector.py +0 -0
  44. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/stream.py +0 -0
  45. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/tools/__init__.py +0 -0
  46. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/tools/geoip.py +0 -0
  47. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/tools/rdump.py +0 -0
  48. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/utils.py +0 -0
  49. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow/record/whitelist.py +0 -0
  50. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow.record.egg-info/SOURCES.txt +0 -0
  51. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow.record.egg-info/dependency_links.txt +0 -0
  52. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow.record.egg-info/entry_points.txt +0 -0
  53. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow.record.egg-info/requires.txt +0 -0
  54. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/flow.record.egg-info/top_level.txt +0 -0
  55. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/pyproject.toml +0 -0
  56. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/setup.cfg +0 -0
  57. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/__init__.py +0 -0
  58. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/_utils.py +0 -0
  59. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/docs/Makefile +0 -0
  60. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/docs/conf.py +0 -0
  61. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/docs/index.rst +0 -0
  62. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/selector_explain_example.py +0 -0
  63. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/standalone_test.py +0 -0
  64. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_avro.py +0 -0
  65. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_avro_adapter.py +0 -0
  66. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_compiled_selector.py +0 -0
  67. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_csv_adapter.py +0 -0
  68. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_deprecations.py +0 -0
  69. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_elastic_adapter.py +0 -0
  70. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_fieldtype_ip.py +0 -0
  71. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_json_packer.py +0 -0
  72. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_json_record_adapter.py +0 -0
  73. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_multi_timestamp.py +0 -0
  74. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_packer.py +0 -0
  75. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_rdump.py +0 -0
  76. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_record.py +0 -0
  77. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_record_adapter.py +0 -0
  78. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_record_descriptor.py +0 -0
  79. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_regression.py +0 -0
  80. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_selector.py +0 -0
  81. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/test_sqlite_duckdb_adapter.py +0 -0
  82. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tests/utils_inspect.py +0 -0
  83. {flow_record-3.16.dev5 → flow_record-3.16.dev7}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.16.dev5
3
+ Version: 3.16.dev7
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -28,7 +28,7 @@ Write usage: rdump -w splunk+[PROTOCOL]://[IP]:[PORT]?tag=[TAG]&token=[TOKEN]&so
28
28
  [TAG]: optional value to add as "rdtag" output field when writing
29
29
  [TOKEN]: Authentication token for sending data over HTTP(S)
30
30
  [SOURCETYPE]: Set sourcetype of data. Defaults to records, but can also be set to JSON.
31
- [SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTP(S). Defaults to True.
31
+ [SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTPS. Defaults to True.
32
32
  """
33
33
 
34
34
  log = logging.getLogger(__package__)
@@ -36,21 +36,38 @@ log = logging.getLogger(__package__)
36
36
  # Amount of records to bundle into a single request when sending data over HTTP(S).
37
37
  RECORD_BUFFER_LIMIT = 20
38
38
 
39
- # https://docs.splunk.com/Documentation/Splunk/7.3.1/Data/Configureindex-timefieldextraction
40
- RESERVED_SPLUNK_FIELDS = [
41
- "_indextime",
42
- "_time",
43
- "index",
44
- "punct",
45
- "source",
46
- "sourcetype",
47
- "tag",
48
- "type",
49
- ]
39
+ # List of reserved splunk fields that do not start with an `_`, as those will be escaped anyway.
40
+ # See: https://docs.splunk.com/Documentation/Splunk/9.2.1/Data/Aboutdefaultfields
41
+ RESERVED_SPLUNK_FIELDS = set(
42
+ [
43
+ "host",
44
+ "index",
45
+ "linecount",
46
+ "punct",
47
+ "source",
48
+ "sourcetype",
49
+ "splunk_server",
50
+ "timestamp",
51
+ ],
52
+ )
53
+
54
+ RESERVED_SPLUNK_APP_FIELDS = set(
55
+ [
56
+ "tag",
57
+ "type",
58
+ ]
59
+ )
60
+
61
+ RESERVED_RDUMP_FIELDS = set(
62
+ [
63
+ "rdtag",
64
+ "rdtype",
65
+ ],
66
+ )
50
67
 
51
- RESERVED_RECORD_FIELDS = ["_classification", "_generated", "_source"]
68
+ RESERVED_FIELDS = RESERVED_SPLUNK_FIELDS.union(RESERVED_SPLUNK_APP_FIELDS.union(RESERVED_RDUMP_FIELDS))
52
69
 
53
- PREFIX_WITH_RD = set(RESERVED_SPLUNK_FIELDS + RESERVED_RECORD_FIELDS)
70
+ ESCAPE = "rd_"
54
71
 
55
72
 
56
73
  class Protocol(Enum):
@@ -64,7 +81,13 @@ class SourceType(Enum):
64
81
  RECORDS = "records"
65
82
 
66
83
 
67
- def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
84
+ def escape_field_name(field: str) -> str:
85
+ if field.startswith(("_", ESCAPE)) or field in RESERVED_FIELDS:
86
+ field = f"{ESCAPE}{field}"
87
+ return field
88
+
89
+
90
+ def record_to_splunk_kv_line(record: Record, tag: Optional[str] = None) -> str:
68
91
  ret = []
69
92
 
70
93
  ret.append(f'rdtype="{record._desc.name}"')
@@ -81,8 +104,7 @@ def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
81
104
 
82
105
  val = getattr(record, field)
83
106
 
84
- if field in PREFIX_WITH_RD:
85
- field = f"rd_{field}"
107
+ field = escape_field_name(field)
86
108
 
87
109
  if val is None:
88
110
  ret.append(f"{field}=None")
@@ -94,7 +116,25 @@ def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
94
116
  return " ".join(ret)
95
117
 
96
118
 
97
- def splunkify_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
119
+ def record_to_splunk_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> dict:
120
+ record_as_dict = packer.pack_obj(record)
121
+ json_dict = {}
122
+
123
+ for field, value in record_as_dict.items():
124
+ # Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
125
+ if field == "_version":
126
+ continue
127
+ escaped_field = escape_field_name(field)
128
+ json_dict[escaped_field] = value
129
+
130
+ # Add rdump specific fields
131
+ json_dict["rdtag"] = tag
132
+ json_dict["rdtype"] = record._desc.name
133
+
134
+ return json_dict
135
+
136
+
137
+ def record_to_splunk_http_api_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
98
138
  ret = {}
99
139
 
100
140
  indexer_fields = [
@@ -115,29 +155,13 @@ def splunkify_json(packer: JsonRecordPacker, record: Record, tag: Optional[str]
115
155
  continue
116
156
  ret[splunk_name] = to_str(val)
117
157
 
118
- record_as_dict = packer.pack_obj(record)
119
-
120
- # Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
121
- del record_as_dict["_version"]
122
-
123
- # These fields end up in the 'event', but we have a few reserved field names. If those field names are in the
124
- # record, we prefix them with 'rd_' (short for record descriptor)
125
- for field in PREFIX_WITH_RD:
126
- if field not in record_as_dict:
127
- continue
128
- new_field = f"rd_{field}"
129
-
130
- record_as_dict[new_field] = record_as_dict[field]
131
- del record_as_dict[field]
132
-
133
- # Almost done, just have to add the tag and the type (i.e the record descriptor's name) to the event.
134
- record_as_dict["rdtag"] = tag
158
+ ret["event"] = record_to_splunk_json(packer, record, tag)
159
+ return json.dumps(ret, default=packer.pack_obj)
135
160
 
136
- # Yes.
137
- record_as_dict["rdtype"] = record._desc.name
138
161
 
139
- ret["event"] = record_as_dict
140
- return json.dumps(ret, default=packer.pack_obj)
162
+ def record_to_splunk_tcp_api_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
163
+ record_dict = record_to_splunk_json(packer, record, tag)
164
+ return json.dumps(record_dict, default=packer.pack_obj)
141
165
 
142
166
 
143
167
  class SplunkWriter(AbstractWriter):
@@ -159,31 +183,31 @@ class SplunkWriter(AbstractWriter):
159
183
 
160
184
  if sourcetype is None:
161
185
  log.warning("No sourcetype provided, assuming 'records' sourcetype")
162
- sourcetype = SourceType.RECORDS
186
+ self.sourcetype = SourceType.RECORDS
187
+ else:
188
+ self.sourcetype = SourceType(sourcetype)
163
189
 
164
190
  parsed_url = urlparse(uri)
165
191
  url_scheme = parsed_url.scheme.lower()
166
-
167
- self.sourcetype = SourceType(sourcetype)
168
192
  self.protocol = Protocol(url_scheme)
169
-
170
- if self.protocol == Protocol.TCP and self.sourcetype != SourceType.RECORDS:
171
- raise ValueError("For sending data to Splunk over TCP, only the 'records' sourcetype is allowed")
172
-
173
193
  self.host = parsed_url.hostname
174
194
  self.port = parsed_url.port
195
+
175
196
  self.tag = tag
176
197
  self.record_buffer = []
177
198
  self._warned = False
178
199
  self.packer = None
179
-
180
- if self.sourcetype == SourceType.JSON:
181
- self.packer = JsonRecordPacker(indent=4, pack_descriptors=False)
200
+ self.json_converter = None
182
201
 
183
202
  if self.protocol == Protocol.TCP:
184
203
  self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP)
185
204
  self.sock.connect((self.host, self.port))
186
205
  self._send = self._send_tcp
206
+
207
+ if self.sourcetype == SourceType.JSON:
208
+ self.packer = JsonRecordPacker(indent=None, pack_descriptors=False)
209
+ self.json_converter = record_to_splunk_tcp_api_json
210
+
187
211
  elif self.protocol in (Protocol.HTTP, Protocol.HTTPS):
188
212
  if not HAS_HTTPX:
189
213
  raise ImportError("The httpx library is required for sending data over HTTP(S)")
@@ -214,6 +238,10 @@ class SplunkWriter(AbstractWriter):
214
238
 
215
239
  self._send = self._send_http
216
240
 
241
+ if self.sourcetype == SourceType.JSON:
242
+ self.packer = JsonRecordPacker(indent=4, pack_descriptors=False)
243
+ self.json_converter = record_to_splunk_http_api_json
244
+
217
245
  def _cache_records_for_http(self, data: Optional[bytes] = None, flush: bool = False) -> Optional[bytes]:
218
246
  # It's possible to call this function without any data, purely to flush. Hence this check.
219
247
  if data:
@@ -252,9 +280,9 @@ class SplunkWriter(AbstractWriter):
252
280
  )
253
281
 
254
282
  if self.sourcetype == SourceType.RECORDS:
255
- rec = splunkify_key_value(record, self.tag)
283
+ rec = record_to_splunk_kv_line(record, self.tag)
256
284
  else:
257
- rec = splunkify_json(self.packer, record, self.tag)
285
+ rec = self.json_converter(self.packer, record, self.tag)
258
286
 
259
287
  # Trail with a newline for line breaking.
260
288
  data = to_bytes(rec) + b"\n"
@@ -767,7 +767,8 @@ class command(FieldType):
767
767
  # an '%' for an environment variable
768
768
  # r'\\' for a UNC path
769
769
  # the strip and check for ":" on the second line is for `<drive_letter>:`
770
- windows = value.startswith((r"\\", "%")) or value.lstrip("\"'")[1] == ":"
770
+ stripped_value = value.lstrip("\"'")
771
+ windows = value.startswith((r"\\", "%")) or (len(stripped_value) >= 2 and stripped_value[1] == ":")
771
772
 
772
773
  if windows:
773
774
  cls = windows_command
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.16.dev5'
16
- __version_tuple__ = version_tuple = (3, 16, 'dev5')
15
+ __version__ = version = '3.16.dev7'
16
+ __version_tuple__ = version_tuple = (3, 16, 'dev7')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.16.dev5
3
+ Version: 3.16.dev7
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -1075,9 +1075,16 @@ def test_command_integration_none(tmp_path: pathlib.Path) -> None:
1075
1075
  # Test a quoted path
1076
1076
  (r"'c:\path to some exe' /d /a", r"c:\path to some exe", [r"/d /a"]),
1077
1077
  # Test a unquoted path
1078
- (r"'c:\Program Files\hello.exe'", r"c:\Program Files\hello.exe", []),
1078
+ (r"\Users\test\hello.exe", r"\Users\test\hello.exe", []),
1079
1079
  # Test an unquoted path with a path as argument
1080
- (r"'c:\Program Files\hello.exe' c:\startmepls.exe", r"c:\Program Files\hello.exe", [r"c:\startmepls.exe"]),
1080
+ (r"\Users\test\hello.exe c:\startmepls.exe", r"\Users\test\hello.exe", [r"c:\startmepls.exe"]),
1081
+ # Test a quoted UNC path
1082
+ (r"'\\192.168.1.2\Program Files\hello.exe'", r"\\192.168.1.2\Program Files\hello.exe", []),
1083
+ # Test an unquoted UNC path
1084
+ (r"\\192.168.1.2\Users\test\hello.exe /d /a", r"\\192.168.1.2\Users\test\hello.exe", [r"/d /a"]),
1085
+ # Test an empty command string
1086
+ (r"''", r"", []),
1087
+ # Test None
1081
1088
  (None, None, None),
1082
1089
  ],
1083
1090
  )