flow.record 3.15.dev16__tar.gz → 3.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {flow_record-3.15.dev16/flow.record.egg-info → flow_record-3.16}/PKG-INFO +2 -1
  2. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/__init__.py +2 -0
  3. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/elastic.py +39 -6
  4. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/splunk.py +79 -51
  5. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/base.py +3 -5
  6. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/__init__.py +29 -9
  7. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/jsonpacker.py +6 -1
  8. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/selector.py +1 -24
  9. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/stream.py +2 -1
  10. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/utils.py +38 -2
  11. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/version.py +2 -2
  12. {flow_record-3.15.dev16 → flow_record-3.16/flow.record.egg-info}/PKG-INFO +2 -1
  13. {flow_record-3.15.dev16 → flow_record-3.16}/flow.record.egg-info/SOURCES.txt +1 -0
  14. {flow_record-3.15.dev16 → flow_record-3.16}/flow.record.egg-info/requires.txt +1 -0
  15. {flow_record-3.15.dev16 → flow_record-3.16}/pyproject.toml +1 -1
  16. flow_record-3.16/tests/test_elastic_adapter.py +53 -0
  17. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_fieldtypes.py +126 -7
  18. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_json_packer.py +21 -0
  19. flow_record-3.16/tests/test_splunk_adapter.py +433 -0
  20. {flow_record-3.15.dev16 → flow_record-3.16}/tox.ini +5 -4
  21. flow_record-3.15.dev16/tests/test_splunk_adapter.py +0 -403
  22. {flow_record-3.15.dev16 → flow_record-3.16}/COPYRIGHT +0 -0
  23. {flow_record-3.15.dev16 → flow_record-3.16}/LICENSE +0 -0
  24. {flow_record-3.15.dev16 → flow_record-3.16}/MANIFEST.in +0 -0
  25. {flow_record-3.15.dev16 → flow_record-3.16}/README.md +0 -0
  26. {flow_record-3.15.dev16 → flow_record-3.16}/examples/filesystem.py +0 -0
  27. {flow_record-3.15.dev16 → flow_record-3.16}/examples/passivedns.py +0 -0
  28. {flow_record-3.15.dev16 → flow_record-3.16}/examples/records.json +0 -0
  29. {flow_record-3.15.dev16 → flow_record-3.16}/examples/tcpconn.py +0 -0
  30. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/__init__.py +0 -0
  31. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/archive.py +0 -0
  32. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/avro.py +0 -0
  33. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/broker.py +0 -0
  34. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/csvfile.py +0 -0
  35. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/duckdb.py +0 -0
  36. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/jsonfile.py +0 -0
  37. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/line.py +0 -0
  38. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/mongo.py +0 -0
  39. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/split.py +0 -0
  40. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/sqlite.py +0 -0
  41. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/stream.py +0 -0
  42. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/text.py +0 -0
  43. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/adapter/xlsx.py +0 -0
  44. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/exceptions.py +0 -0
  45. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/credential.py +0 -0
  46. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/net/__init__.py +0 -0
  47. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/net/ip.py +0 -0
  48. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/net/ipv4.py +0 -0
  49. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/net/tcp.py +0 -0
  50. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/fieldtypes/net/udp.py +0 -0
  51. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/packer.py +0 -0
  52. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/tools/__init__.py +0 -0
  53. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/tools/geoip.py +0 -0
  54. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/tools/rdump.py +0 -0
  55. {flow_record-3.15.dev16 → flow_record-3.16}/flow/record/whitelist.py +0 -0
  56. {flow_record-3.15.dev16 → flow_record-3.16}/flow.record.egg-info/dependency_links.txt +0 -0
  57. {flow_record-3.15.dev16 → flow_record-3.16}/flow.record.egg-info/entry_points.txt +0 -0
  58. {flow_record-3.15.dev16 → flow_record-3.16}/flow.record.egg-info/top_level.txt +0 -0
  59. {flow_record-3.15.dev16 → flow_record-3.16}/setup.cfg +0 -0
  60. {flow_record-3.15.dev16 → flow_record-3.16}/tests/__init__.py +0 -0
  61. {flow_record-3.15.dev16 → flow_record-3.16}/tests/_utils.py +0 -0
  62. {flow_record-3.15.dev16 → flow_record-3.16}/tests/docs/Makefile +0 -0
  63. {flow_record-3.15.dev16 → flow_record-3.16}/tests/docs/conf.py +0 -0
  64. {flow_record-3.15.dev16 → flow_record-3.16}/tests/docs/index.rst +0 -0
  65. {flow_record-3.15.dev16 → flow_record-3.16}/tests/selector_explain_example.py +0 -0
  66. {flow_record-3.15.dev16 → flow_record-3.16}/tests/standalone_test.py +0 -0
  67. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_avro.py +0 -0
  68. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_avro_adapter.py +0 -0
  69. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_compiled_selector.py +0 -0
  70. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_csv_adapter.py +0 -0
  71. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_deprecations.py +0 -0
  72. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_fieldtype_ip.py +0 -0
  73. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_json_record_adapter.py +0 -0
  74. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_multi_timestamp.py +0 -0
  75. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_packer.py +0 -0
  76. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_rdump.py +0 -0
  77. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_record.py +0 -0
  78. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_record_adapter.py +0 -0
  79. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_record_descriptor.py +0 -0
  80. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_regression.py +0 -0
  81. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_selector.py +0 -0
  82. {flow_record-3.15.dev16 → flow_record-3.16}/tests/test_sqlite_duckdb_adapter.py +0 -0
  83. {flow_record-3.15.dev16 → flow_record-3.16}/tests/utils_inspect.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.15.dev16
3
+ Version: 3.16
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -40,6 +40,7 @@ Requires-Dist: httpx; extra == "splunk"
40
40
  Provides-Extra: test
41
41
  Requires-Dist: flow.record[compression]; extra == "test"
42
42
  Requires-Dist: flow.record[avro]; extra == "test"
43
+ Requires-Dist: flow.record[elastic]; extra == "test"
43
44
  Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
44
45
  Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
45
46
 
@@ -17,6 +17,7 @@ from flow.record.base import (
17
17
  RecordWriter,
18
18
  dynamic_fieldtype,
19
19
  extend_record,
20
+ ignore_fields_for_comparison,
20
21
  iter_timestamped_records,
21
22
  open_path,
22
23
  open_path_or_stream,
@@ -57,6 +58,7 @@ __all__ = [
57
58
  "open_path_or_stream",
58
59
  "open_path",
59
60
  "open_stream",
61
+ "ignore_fields_for_comparison",
60
62
  "set_ignored_fields_for_comparison",
61
63
  "stream",
62
64
  "dynamic_fieldtype",
@@ -2,7 +2,7 @@ import hashlib
2
2
  import logging
3
3
  import queue
4
4
  import threading
5
- from typing import Iterator, Union
5
+ from typing import Iterator, Optional, Union
6
6
 
7
7
  import elasticsearch
8
8
  import elasticsearch.helpers
@@ -22,9 +22,11 @@ Read usage: rdump elastic+[PROTOCOL]://[IP]:[PORT]?index=[INDEX]
22
22
  [PROTOCOL]: http or https. Defaults to https when "+[PROTOCOL]" is omitted
23
23
 
24
24
  Optional arguments:
25
+ [API_KEY]: base64 encoded api key to authenticate with (default: False)
25
26
  [INDEX]: name of the index to use (default: records)
26
27
  [VERIFY_CERTS]: verify certs of Elasticsearch instance (default: True)
27
28
  [HASH_RECORD]: make record unique by hashing record [slow] (default: False)
29
+ [_META_*]: record metadata fields (default: None)
28
30
  """
29
31
 
30
32
  log = logging.getLogger(__name__)
@@ -38,6 +40,7 @@ class ElasticWriter(AbstractWriter):
38
40
  verify_certs: Union[str, bool] = True,
39
41
  http_compress: Union[str, bool] = True,
40
42
  hash_record: Union[str, bool] = False,
43
+ api_key: Optional[str] = None,
41
44
  **kwargs,
42
45
  ) -> None:
43
46
  self.index = index
@@ -45,7 +48,17 @@ class ElasticWriter(AbstractWriter):
45
48
  verify_certs = str(verify_certs).lower() in ("1", "true")
46
49
  http_compress = str(http_compress).lower() in ("1", "true")
47
50
  self.hash_record = str(hash_record).lower() in ("1", "true")
48
- self.es = elasticsearch.Elasticsearch(uri, verify_certs=verify_certs, http_compress=http_compress)
51
+
52
+ if not uri.lower().startswith(("http://", "https://")):
53
+ uri = "http://" + uri
54
+
55
+ self.es = elasticsearch.Elasticsearch(
56
+ uri,
57
+ verify_certs=verify_certs,
58
+ http_compress=http_compress,
59
+ api_key=api_key,
60
+ )
61
+
49
62
  self.json_packer = JsonRecordPacker()
50
63
  self.queue: queue.Queue[Union[Record, StopIteration]] = queue.Queue()
51
64
  self.event = threading.Event()
@@ -58,25 +71,34 @@ class ElasticWriter(AbstractWriter):
58
71
 
59
72
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
60
73
 
74
+ self.metadata_fields = {}
75
+ for arg_key, arg_val in kwargs.items():
76
+ if arg_key.startswith("_meta_"):
77
+ self.metadata_fields[arg_key[6:]] = arg_val
78
+
61
79
  def record_to_document(self, record: Record, index: str) -> dict:
62
80
  """Convert a record to a Elasticsearch compatible document dictionary"""
63
81
  rdict = record._asdict()
64
82
 
65
- # Store record metadata under `_record_metadata`
83
+ # Store record metadata under `_record_metadata`.
66
84
  rdict_meta = {
67
85
  "descriptor": {
68
86
  "name": record._desc.name,
69
87
  "hash": record._desc.descriptor_hash,
70
88
  },
71
89
  }
90
+
72
91
  # Move all dunder fields to `_record_metadata` to avoid naming clash with ES.
73
92
  dunder_keys = [key for key in rdict if key.startswith("_")]
74
93
  for key in dunder_keys:
75
94
  rdict_meta[key.lstrip("_")] = rdict.pop(key)
76
- # remove _generated field from metadata to ensure determinstic documents
95
+
96
+ # Remove _generated field from metadata to ensure determinstic documents.
77
97
  if self.hash_record:
78
98
  rdict_meta.pop("generated", None)
79
- rdict["_record_metadata"] = rdict_meta
99
+
100
+ rdict["_record_metadata"] = rdict_meta.copy()
101
+ rdict["_record_metadata"].update(self.metadata_fields)
80
102
 
81
103
  document = {
82
104
  "_index": index,
@@ -106,6 +128,7 @@ class ElasticWriter(AbstractWriter):
106
128
  ):
107
129
  if not ok:
108
130
  log.error("Failed to insert %r", item)
131
+
109
132
  self.event.set()
110
133
 
111
134
  def write(self, record: Record) -> None:
@@ -129,6 +152,7 @@ class ElasticReader(AbstractReader):
129
152
  verify_certs: Union[str, bool] = True,
130
153
  http_compress: Union[str, bool] = True,
131
154
  selector: Union[None, Selector, CompiledSelector] = None,
155
+ api_key: Optional[str] = None,
132
156
  **kwargs,
133
157
  ) -> None:
134
158
  self.index = index
@@ -136,7 +160,16 @@ class ElasticReader(AbstractReader):
136
160
  self.selector = selector
137
161
  verify_certs = str(verify_certs).lower() in ("1", "true")
138
162
  http_compress = str(http_compress).lower() in ("1", "true")
139
- self.es = elasticsearch.Elasticsearch(uri, verify_certs=verify_certs, http_compress=http_compress)
163
+
164
+ if not uri.lower().startswith(("http://", "https://")):
165
+ uri = "http://" + uri
166
+
167
+ self.es = elasticsearch.Elasticsearch(
168
+ uri,
169
+ verify_certs=verify_certs,
170
+ http_compress=http_compress,
171
+ api_key=api_key,
172
+ )
140
173
 
141
174
  if not verify_certs:
142
175
  # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
@@ -28,7 +28,7 @@ Write usage: rdump -w splunk+[PROTOCOL]://[IP]:[PORT]?tag=[TAG]&token=[TOKEN]&so
28
28
  [TAG]: optional value to add as "rdtag" output field when writing
29
29
  [TOKEN]: Authentication token for sending data over HTTP(S)
30
30
  [SOURCETYPE]: Set sourcetype of data. Defaults to records, but can also be set to JSON.
31
- [SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTP(S). Defaults to True.
31
+ [SSL_VERIFY]: Whether to verify the server certificate when sending data over HTTPS. Defaults to True.
32
32
  """
33
33
 
34
34
  log = logging.getLogger(__package__)
@@ -36,21 +36,38 @@ log = logging.getLogger(__package__)
36
36
  # Amount of records to bundle into a single request when sending data over HTTP(S).
37
37
  RECORD_BUFFER_LIMIT = 20
38
38
 
39
- # https://docs.splunk.com/Documentation/Splunk/7.3.1/Data/Configureindex-timefieldextraction
40
- RESERVED_SPLUNK_FIELDS = [
41
- "_indextime",
42
- "_time",
43
- "index",
44
- "punct",
45
- "source",
46
- "sourcetype",
47
- "tag",
48
- "type",
49
- ]
39
+ # List of reserved splunk fields that do not start with an `_`, as those will be escaped anyway.
40
+ # See: https://docs.splunk.com/Documentation/Splunk/9.2.1/Data/Aboutdefaultfields
41
+ RESERVED_SPLUNK_FIELDS = set(
42
+ [
43
+ "host",
44
+ "index",
45
+ "linecount",
46
+ "punct",
47
+ "source",
48
+ "sourcetype",
49
+ "splunk_server",
50
+ "timestamp",
51
+ ],
52
+ )
53
+
54
+ RESERVED_SPLUNK_APP_FIELDS = set(
55
+ [
56
+ "tag",
57
+ "type",
58
+ ]
59
+ )
60
+
61
+ RESERVED_RDUMP_FIELDS = set(
62
+ [
63
+ "rdtag",
64
+ "rdtype",
65
+ ],
66
+ )
50
67
 
51
- RESERVED_RECORD_FIELDS = ["_classification", "_generated", "_source"]
68
+ RESERVED_FIELDS = RESERVED_SPLUNK_FIELDS.union(RESERVED_SPLUNK_APP_FIELDS.union(RESERVED_RDUMP_FIELDS))
52
69
 
53
- PREFIX_WITH_RD = set(RESERVED_SPLUNK_FIELDS + RESERVED_RECORD_FIELDS)
70
+ ESCAPE = "rd_"
54
71
 
55
72
 
56
73
  class Protocol(Enum):
@@ -64,7 +81,13 @@ class SourceType(Enum):
64
81
  RECORDS = "records"
65
82
 
66
83
 
67
- def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
84
+ def escape_field_name(field: str) -> str:
85
+ if field.startswith(("_", ESCAPE)) or field in RESERVED_FIELDS:
86
+ field = f"{ESCAPE}{field}"
87
+ return field
88
+
89
+
90
+ def record_to_splunk_kv_line(record: Record, tag: Optional[str] = None) -> str:
68
91
  ret = []
69
92
 
70
93
  ret.append(f'rdtype="{record._desc.name}"')
@@ -81,8 +104,7 @@ def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
81
104
 
82
105
  val = getattr(record, field)
83
106
 
84
- if field in PREFIX_WITH_RD:
85
- field = f"rd_{field}"
107
+ field = escape_field_name(field)
86
108
 
87
109
  if val is None:
88
110
  ret.append(f"{field}=None")
@@ -94,7 +116,25 @@ def splunkify_key_value(record: Record, tag: Optional[str] = None) -> str:
94
116
  return " ".join(ret)
95
117
 
96
118
 
97
- def splunkify_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
119
+ def record_to_splunk_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> dict:
120
+ record_as_dict = packer.pack_obj(record)
121
+ json_dict = {}
122
+
123
+ for field, value in record_as_dict.items():
124
+ # Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
125
+ if field == "_version":
126
+ continue
127
+ escaped_field = escape_field_name(field)
128
+ json_dict[escaped_field] = value
129
+
130
+ # Add rdump specific fields
131
+ json_dict["rdtag"] = tag
132
+ json_dict["rdtype"] = record._desc.name
133
+
134
+ return json_dict
135
+
136
+
137
+ def record_to_splunk_http_api_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
98
138
  ret = {}
99
139
 
100
140
  indexer_fields = [
@@ -115,29 +155,13 @@ def splunkify_json(packer: JsonRecordPacker, record: Record, tag: Optional[str]
115
155
  continue
116
156
  ret[splunk_name] = to_str(val)
117
157
 
118
- record_as_dict = packer.pack_obj(record)
119
-
120
- # Omit the _version field as the Splunk adapter has no reader support for deserialising records back.
121
- del record_as_dict["_version"]
122
-
123
- # These fields end up in the 'event', but we have a few reserved field names. If those field names are in the
124
- # record, we prefix them with 'rd_' (short for record descriptor)
125
- for field in PREFIX_WITH_RD:
126
- if field not in record_as_dict:
127
- continue
128
- new_field = f"rd_{field}"
129
-
130
- record_as_dict[new_field] = record_as_dict[field]
131
- del record_as_dict[field]
132
-
133
- # Almost done, just have to add the tag and the type (i.e the record descriptor's name) to the event.
134
- record_as_dict["rdtag"] = tag
158
+ ret["event"] = record_to_splunk_json(packer, record, tag)
159
+ return json.dumps(ret, default=packer.pack_obj)
135
160
 
136
- # Yes.
137
- record_as_dict["rdtype"] = record._desc.name
138
161
 
139
- ret["event"] = record_as_dict
140
- return json.dumps(ret, default=packer.pack_obj)
162
+ def record_to_splunk_tcp_api_json(packer: JsonRecordPacker, record: Record, tag: Optional[str] = None) -> str:
163
+ record_dict = record_to_splunk_json(packer, record, tag)
164
+ return json.dumps(record_dict, default=packer.pack_obj)
141
165
 
142
166
 
143
167
  class SplunkWriter(AbstractWriter):
@@ -159,31 +183,31 @@ class SplunkWriter(AbstractWriter):
159
183
 
160
184
  if sourcetype is None:
161
185
  log.warning("No sourcetype provided, assuming 'records' sourcetype")
162
- sourcetype = SourceType.RECORDS
186
+ self.sourcetype = SourceType.RECORDS
187
+ else:
188
+ self.sourcetype = SourceType(sourcetype)
163
189
 
164
190
  parsed_url = urlparse(uri)
165
191
  url_scheme = parsed_url.scheme.lower()
166
-
167
- self.sourcetype = SourceType(sourcetype)
168
192
  self.protocol = Protocol(url_scheme)
169
-
170
- if self.protocol == Protocol.TCP and self.sourcetype != SourceType.RECORDS:
171
- raise ValueError("For sending data to Splunk over TCP, only the 'records' sourcetype is allowed")
172
-
173
193
  self.host = parsed_url.hostname
174
194
  self.port = parsed_url.port
195
+
175
196
  self.tag = tag
176
197
  self.record_buffer = []
177
198
  self._warned = False
178
199
  self.packer = None
179
-
180
- if self.sourcetype == SourceType.JSON:
181
- self.packer = JsonRecordPacker(indent=4, pack_descriptors=False)
200
+ self.json_converter = None
182
201
 
183
202
  if self.protocol == Protocol.TCP:
184
203
  self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP)
185
204
  self.sock.connect((self.host, self.port))
186
205
  self._send = self._send_tcp
206
+
207
+ if self.sourcetype == SourceType.JSON:
208
+ self.packer = JsonRecordPacker(indent=None, pack_descriptors=False)
209
+ self.json_converter = record_to_splunk_tcp_api_json
210
+
187
211
  elif self.protocol in (Protocol.HTTP, Protocol.HTTPS):
188
212
  if not HAS_HTTPX:
189
213
  raise ImportError("The httpx library is required for sending data over HTTP(S)")
@@ -214,6 +238,10 @@ class SplunkWriter(AbstractWriter):
214
238
 
215
239
  self._send = self._send_http
216
240
 
241
+ if self.sourcetype == SourceType.JSON:
242
+ self.packer = JsonRecordPacker(indent=4, pack_descriptors=False)
243
+ self.json_converter = record_to_splunk_http_api_json
244
+
217
245
  def _cache_records_for_http(self, data: Optional[bytes] = None, flush: bool = False) -> Optional[bytes]:
218
246
  # It's possible to call this function without any data, purely to flush. Hence this check.
219
247
  if data:
@@ -252,9 +280,9 @@ class SplunkWriter(AbstractWriter):
252
280
  )
253
281
 
254
282
  if self.sourcetype == SourceType.RECORDS:
255
- rec = splunkify_key_value(record, self.tag)
283
+ rec = record_to_splunk_kv_line(record, self.tag)
256
284
  else:
257
- rec = splunkify_json(self.packer, record, self.tag)
285
+ rec = self.json_converter(self.packer, record, self.tag)
258
286
 
259
287
  # Trail with a newline for line breaking.
260
288
  data = to_bytes(rec) + b"\n"
@@ -31,6 +31,7 @@ from urllib.parse import parse_qsl, urlparse
31
31
 
32
32
  from flow.record.adapter import AbstractReader, AbstractWriter
33
33
  from flow.record.exceptions import RecordAdapterNotFound, RecordDescriptorError
34
+ from flow.record.utils import get_stdin, get_stdout
34
35
 
35
36
  try:
36
37
  import lz4.frame as lz4
@@ -812,10 +813,7 @@ def open_path(path: str, mode: str, clobber: bool = True) -> IO:
812
813
  # normal file or stdio for reading or writing
813
814
  if not fp:
814
815
  if is_stdio:
815
- if binary:
816
- fp = getattr(sys.stdout, "buffer", sys.stdout) if out else getattr(sys.stdin, "buffer", sys.stdin)
817
- else:
818
- fp = sys.stdout if out else sys.stdin
816
+ fp = get_stdout(binary=binary) if out else get_stdin(binary=binary)
819
817
  else:
820
818
  fp = io.open(path, mode)
821
819
  # check if we are reading a compressed stream
@@ -867,7 +865,7 @@ def RecordAdapter(
867
865
  if url in ("-", "", None) and fileobj is None:
868
866
  # For reading stdin, we cannot rely on an extension to know what sort of stream is incoming. Thus, we will
869
867
  # treat it as a 'fileobj', where we can peek into the stream and try to select the appropriate adapter.
870
- fileobj = getattr(sys.stdin, "buffer", sys.stdin)
868
+ fileobj = get_stdin(binary=True)
871
869
  if fileobj is not None:
872
870
  # This record adapter has received a file-like object for record reading
873
871
  # We just need to find the right adapter by peeking into the first few bytes.
@@ -32,8 +32,8 @@ NATIVE_UNICODE = isinstance("", str)
32
32
 
33
33
  UTC = timezone.utc
34
34
 
35
- PY_311 = sys.version_info >= (3, 11, 0)
36
- PY_312 = sys.version_info >= (3, 12, 0)
35
+ PY_311_OR_HIGHER = sys.version_info >= (3, 11, 0)
36
+ PY_312_OR_HIGHER = sys.version_info >= (3, 12, 0)
37
37
 
38
38
  TYPE_POSIX = 0
39
39
  TYPE_WINDOWS = 1
@@ -288,7 +288,7 @@ class datetime(_dt, FieldType):
288
288
  # - Python 3.10 and older requires "T" between date and time in fromisoformat()
289
289
  #
290
290
  # There are other incompatibilities, but we don't care about those for now.
291
- if not PY_311:
291
+ if not PY_311_OR_HIGHER:
292
292
  # Convert Z to +00:00 so that fromisoformat() works correctly on Python 3.10 and older
293
293
  if arg[-1] == "Z":
294
294
  arg = arg[:-1] + "+00:00"
@@ -633,6 +633,8 @@ def _is_windowslike_path(path: Any):
633
633
 
634
634
 
635
635
  class path(pathlib.PurePath, FieldType):
636
+ _empty_path = False
637
+
636
638
  def __new__(cls, *args):
637
639
  # This is modelled after pathlib.PurePath's __new__(), which means you
638
640
  # will never get an instance of path, only instances of either
@@ -647,7 +649,7 @@ class path(pathlib.PurePath, FieldType):
647
649
  for path_part in args:
648
650
  if isinstance(path_part, pathlib.PureWindowsPath):
649
651
  cls = windows_path
650
- if not PY_312:
652
+ if not PY_312_OR_HIGHER:
651
653
  # For Python < 3.12, the (string) representation of a
652
654
  # pathlib.PureWindowsPath is not round trip equivalent if a path
653
655
  # starts with a \ or / followed by a drive letter, e.g.: \C:\...
@@ -663,15 +665,15 @@ class path(pathlib.PurePath, FieldType):
663
665
  #
664
666
  # This construction works around that by converting all path parts
665
667
  # to strings first.
666
- args = (str(arg) for arg in args)
668
+ args = tuple(str(arg) for arg in args)
667
669
  elif isinstance(path_part, pathlib.PurePosixPath):
668
670
  cls = posix_path
669
671
  elif _is_windowslike_path(path_part):
670
672
  # This handles any custom PurePath based implementations that have a windows
671
673
  # like path separator (\).
672
674
  cls = windows_path
673
- if not PY_312:
674
- args = (str(arg) for arg in args)
675
+ if not PY_312_OR_HIGHER:
676
+ args = tuple(str(arg) for arg in args)
675
677
  elif _is_posixlike_path(path_part):
676
678
  # This handles any custom PurePath based implementations that don't have a
677
679
  # windows like path separator (\).
@@ -680,20 +682,37 @@ class path(pathlib.PurePath, FieldType):
680
682
  continue
681
683
  break
682
684
 
683
- if PY_312:
685
+ if PY_312_OR_HIGHER:
684
686
  obj = super().__new__(cls)
685
687
  else:
686
688
  obj = cls._from_parts(args)
689
+
690
+ obj._empty_path = False
691
+ if not args or args == ("",):
692
+ obj._empty_path = True
687
693
  return obj
688
694
 
689
695
  def __eq__(self, other: Any) -> bool:
690
696
  if isinstance(other, str):
691
697
  return str(self) == other or self == self.__class__(other)
698
+ elif isinstance(other, self.__class__) and (self._empty_path or other._empty_path):
699
+ return self._empty_path == other._empty_path
692
700
  return super().__eq__(other)
693
701
 
702
+ def __str__(self) -> str:
703
+ if self._empty_path:
704
+ return ""
705
+ return super().__str__()
706
+
694
707
  def __repr__(self) -> str:
695
708
  return repr(str(self))
696
709
 
710
+ @property
711
+ def parent(self):
712
+ if self._empty_path:
713
+ return self
714
+ return super().parent
715
+
697
716
  def _pack(self):
698
717
  path_type = TYPE_WINDOWS if isinstance(self, windows_path) else TYPE_POSIX
699
718
  return (str(self), path_type)
@@ -756,7 +775,8 @@ class command(FieldType):
756
775
  # an '%' for an environment variable
757
776
  # r'\\' for a UNC path
758
777
  # the strip and check for ":" on the second line is for `<drive_letter>:`
759
- windows = value.startswith((r"\\", "%")) or value.lstrip("\"'")[1] == ":"
778
+ stripped_value = value.lstrip("\"'")
779
+ windows = value.startswith((r"\\", "%")) or (len(stripped_value) >= 2 and stripped_value[1] == ":")
760
780
 
761
781
  if windows:
762
782
  cls = windows_command
@@ -41,15 +41,20 @@ class JsonRecordPacker:
41
41
  if obj._desc.identifier not in self.descriptors:
42
42
  self.register(obj._desc, True)
43
43
  serial = obj._asdict()
44
+
44
45
  if self.pack_descriptors:
45
46
  serial["_type"] = "record"
46
47
  serial["_recorddescriptor"] = obj._desc.identifier
47
48
 
48
- # PYTHON2: Because "bytes" are also "str" we have to handle this here
49
49
  for field_type, field_name in obj._desc.get_field_tuples():
50
+ # PYTHON2: Because "bytes" are also "str" we have to handle this here
50
51
  if field_type == "bytes" and isinstance(serial[field_name], str):
51
52
  serial[field_name] = base64.b64encode(serial[field_name]).decode()
52
53
 
54
+ # Boolean field types should be cast to a bool instead of staying ints
55
+ elif field_type == "boolean" and isinstance(serial[field_name], int):
56
+ serial[field_name] = bool(serial[field_name])
57
+
53
58
  return serial
54
59
  if isinstance(obj, RecordDescriptor):
55
60
  serial = {
@@ -17,25 +17,6 @@ except ImportError:
17
17
 
18
18
  string_types = (str, type(""))
19
19
 
20
- AST_NODE_S_TYPES = tuple(
21
- filter(
22
- None,
23
- [
24
- getattr(ast, "Str", None),
25
- getattr(ast, "Bytes", None),
26
- ],
27
- ),
28
- )
29
-
30
- AST_NODE_VALUE_TYPES = tuple(
31
- filter(
32
- None,
33
- [
34
- getattr(ast, "NameConstant", None),
35
- getattr(ast, "Constant", None),
36
- ],
37
- ),
38
- )
39
20
 
40
21
  AST_OPERATORS = {
41
22
  ast.Add: operator.add,
@@ -581,11 +562,7 @@ class RecordContextMatcher:
581
562
  return r
582
563
 
583
564
  def _eval(self, node):
584
- if isinstance(node, ast.Num):
585
- return node.n
586
- elif isinstance(node, AST_NODE_S_TYPES):
587
- return node.s
588
- elif isinstance(node, AST_NODE_VALUE_TYPES):
565
+ if isinstance(node, ast.Constant):
589
566
  return node.value
590
567
  elif isinstance(node, ast.List):
591
568
  return list(map(self.eval, node.elts))
@@ -12,6 +12,7 @@ from functools import lru_cache
12
12
  from flow.record import RECORDSTREAM_MAGIC, RecordWriter
13
13
  from flow.record.fieldtypes import fieldtype_for_value
14
14
  from flow.record.selector import make_selector
15
+ from flow.record.utils import is_stdout
15
16
 
16
17
  from .base import RecordDescriptor, RecordReader
17
18
  from .packer import RecordPacker
@@ -70,7 +71,7 @@ class RecordStreamWriter:
70
71
  self.write(descriptor)
71
72
 
72
73
  def close(self):
73
- if self.fp and self.fp != getattr(sys.stdout, "buffer", sys.stdout):
74
+ if self.fp and not is_stdout(self.fp):
74
75
  self.fp.close()
75
76
  self.fp = None
76
77
 
@@ -1,15 +1,51 @@
1
+ from __future__ import annotations
2
+
1
3
  import base64
2
4
  import os
3
5
  import sys
4
6
  from functools import wraps
7
+ from typing import BinaryIO, TextIO
5
8
 
6
9
  _native = str
7
10
  _unicode = type("")
8
11
  _bytes = type(b"")
9
12
 
10
13
 
11
- def is_stdout(fp):
12
- return fp in (sys.stdout, sys.stdout.buffer)
14
+ def get_stdout(binary: bool = False) -> TextIO | BinaryIO:
15
+ """Return the stdout stream as binary or text stream.
16
+
17
+ This function is the preferred way to get the stdout stream in flow.record.
18
+
19
+ Arguments:
20
+ binary: Whether to return the stream as binary stream.
21
+
22
+ Returns:
23
+ The stdout stream.
24
+ """
25
+ fp = getattr(sys.stdout, "buffer", sys.stdout) if binary else sys.stdout
26
+ fp._is_stdout = True
27
+ return fp
28
+
29
+
30
+ def get_stdin(binary: bool = False) -> TextIO | BinaryIO:
31
+ """Return the stdin stream as binary or text stream.
32
+
33
+ This function is the preferred way to get the stdin stream in flow.record.
34
+
35
+ Arguments:
36
+ binary: Whether to return the stream as binary stream.
37
+
38
+ Returns:
39
+ The stdin stream.
40
+ """
41
+ fp = getattr(sys.stdin, "buffer", sys.stdin) if binary else sys.stdin
42
+ fp._is_stdin = True
43
+ return fp
44
+
45
+
46
+ def is_stdout(fp: TextIO | BinaryIO) -> bool:
47
+ """Returns True if ``fp`` is the stdout stream."""
48
+ return fp in (sys.stdout, sys.stdout.buffer) or hasattr(fp, "_is_stdout")
13
49
 
14
50
 
15
51
  def to_bytes(value):
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.15.dev16'
16
- __version_tuple__ = version_tuple = (3, 15, 'dev16')
15
+ __version__ = version = '3.16'
16
+ __version_tuple__ = version_tuple = (3, 16)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.15.dev16
3
+ Version: 3.16
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -40,6 +40,7 @@ Requires-Dist: httpx; extra == "splunk"
40
40
  Provides-Extra: test
41
41
  Requires-Dist: flow.record[compression]; extra == "test"
42
42
  Requires-Dist: flow.record[avro]; extra == "test"
43
+ Requires-Dist: flow.record[elastic]; extra == "test"
43
44
  Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
44
45
  Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
45
46
 
@@ -59,6 +59,7 @@ tests/test_avro_adapter.py
59
59
  tests/test_compiled_selector.py
60
60
  tests/test_csv_adapter.py
61
61
  tests/test_deprecations.py
62
+ tests/test_elastic_adapter.py
62
63
  tests/test_fieldtype_ip.py
63
64
  tests/test_fieldtypes.py
64
65
  tests/test_json_packer.py
@@ -29,6 +29,7 @@ httpx
29
29
  [test]
30
30
  flow.record[compression]
31
31
  flow.record[avro]
32
+ flow.record[elastic]
32
33
 
33
34
  [test:platform_python_implementation != "PyPy" and python_version < "3.12"]
34
35
  duckdb