flow.record 3.15.dev16__tar.gz → 3.16.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flow_record-3.15.dev16/flow.record.egg-info → flow_record-3.16.dev1}/PKG-INFO +2 -1
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/elastic.py +39 -6
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/version.py +2 -2
- {flow_record-3.15.dev16 → flow_record-3.16.dev1/flow.record.egg-info}/PKG-INFO +2 -1
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow.record.egg-info/SOURCES.txt +1 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow.record.egg-info/requires.txt +1 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/pyproject.toml +1 -0
- flow_record-3.16.dev1/tests/test_elastic_adapter.py +53 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/COPYRIGHT +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/LICENSE +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/MANIFEST.in +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/README.md +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/examples/filesystem.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/examples/passivedns.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/examples/records.json +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/examples/tcpconn.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/__init__.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/__init__.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/archive.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/avro.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/broker.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/csvfile.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/duckdb.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/jsonfile.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/line.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/mongo.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/split.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/splunk.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/sqlite.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/stream.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/text.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/xlsx.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/base.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/exceptions.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/__init__.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/credential.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/net/__init__.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/net/ip.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/net/ipv4.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/net/tcp.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/net/udp.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/jsonpacker.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/packer.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/selector.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/stream.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/tools/__init__.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/tools/geoip.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/tools/rdump.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/utils.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/whitelist.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow.record.egg-info/entry_points.txt +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow.record.egg-info/top_level.txt +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/setup.cfg +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/__init__.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/_utils.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/docs/Makefile +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/docs/conf.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/docs/index.rst +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/selector_explain_example.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/standalone_test.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_avro.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_avro_adapter.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_compiled_selector.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_csv_adapter.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_deprecations.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_fieldtype_ip.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_fieldtypes.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_json_packer.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_json_record_adapter.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_multi_timestamp.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_packer.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_rdump.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_record.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_record_adapter.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_record_descriptor.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_regression.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_selector.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_splunk_adapter.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_sqlite_duckdb_adapter.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/utils_inspect.py +0 -0
- {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.16.dev1
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -40,6 +40,7 @@ Requires-Dist: httpx; extra == "splunk"
|
|
|
40
40
|
Provides-Extra: test
|
|
41
41
|
Requires-Dist: flow.record[compression]; extra == "test"
|
|
42
42
|
Requires-Dist: flow.record[avro]; extra == "test"
|
|
43
|
+
Requires-Dist: flow.record[elastic]; extra == "test"
|
|
43
44
|
Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
44
45
|
Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
45
46
|
|
|
@@ -2,7 +2,7 @@ import hashlib
|
|
|
2
2
|
import logging
|
|
3
3
|
import queue
|
|
4
4
|
import threading
|
|
5
|
-
from typing import Iterator, Union
|
|
5
|
+
from typing import Iterator, Optional, Union
|
|
6
6
|
|
|
7
7
|
import elasticsearch
|
|
8
8
|
import elasticsearch.helpers
|
|
@@ -22,9 +22,11 @@ Read usage: rdump elastic+[PROTOCOL]://[IP]:[PORT]?index=[INDEX]
|
|
|
22
22
|
[PROTOCOL]: http or https. Defaults to https when "+[PROTOCOL]" is omitted
|
|
23
23
|
|
|
24
24
|
Optional arguments:
|
|
25
|
+
[API_KEY]: base64 encoded api key to authenticate with (default: False)
|
|
25
26
|
[INDEX]: name of the index to use (default: records)
|
|
26
27
|
[VERIFY_CERTS]: verify certs of Elasticsearch instance (default: True)
|
|
27
28
|
[HASH_RECORD]: make record unique by hashing record [slow] (default: False)
|
|
29
|
+
[_META_*]: record metadata fields (default: None)
|
|
28
30
|
"""
|
|
29
31
|
|
|
30
32
|
log = logging.getLogger(__name__)
|
|
@@ -38,6 +40,7 @@ class ElasticWriter(AbstractWriter):
|
|
|
38
40
|
verify_certs: Union[str, bool] = True,
|
|
39
41
|
http_compress: Union[str, bool] = True,
|
|
40
42
|
hash_record: Union[str, bool] = False,
|
|
43
|
+
api_key: Optional[str] = None,
|
|
41
44
|
**kwargs,
|
|
42
45
|
) -> None:
|
|
43
46
|
self.index = index
|
|
@@ -45,7 +48,17 @@ class ElasticWriter(AbstractWriter):
|
|
|
45
48
|
verify_certs = str(verify_certs).lower() in ("1", "true")
|
|
46
49
|
http_compress = str(http_compress).lower() in ("1", "true")
|
|
47
50
|
self.hash_record = str(hash_record).lower() in ("1", "true")
|
|
48
|
-
|
|
51
|
+
|
|
52
|
+
if not uri.lower().startswith(("http://", "https://")):
|
|
53
|
+
uri = "http://" + uri
|
|
54
|
+
|
|
55
|
+
self.es = elasticsearch.Elasticsearch(
|
|
56
|
+
uri,
|
|
57
|
+
verify_certs=verify_certs,
|
|
58
|
+
http_compress=http_compress,
|
|
59
|
+
api_key=api_key,
|
|
60
|
+
)
|
|
61
|
+
|
|
49
62
|
self.json_packer = JsonRecordPacker()
|
|
50
63
|
self.queue: queue.Queue[Union[Record, StopIteration]] = queue.Queue()
|
|
51
64
|
self.event = threading.Event()
|
|
@@ -58,25 +71,34 @@ class ElasticWriter(AbstractWriter):
|
|
|
58
71
|
|
|
59
72
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
60
73
|
|
|
74
|
+
self.metadata_fields = {}
|
|
75
|
+
for arg_key, arg_val in kwargs.items():
|
|
76
|
+
if arg_key.startswith("_meta_"):
|
|
77
|
+
self.metadata_fields[arg_key[6:]] = arg_val
|
|
78
|
+
|
|
61
79
|
def record_to_document(self, record: Record, index: str) -> dict:
|
|
62
80
|
"""Convert a record to a Elasticsearch compatible document dictionary"""
|
|
63
81
|
rdict = record._asdict()
|
|
64
82
|
|
|
65
|
-
# Store record metadata under `_record_metadata
|
|
83
|
+
# Store record metadata under `_record_metadata`.
|
|
66
84
|
rdict_meta = {
|
|
67
85
|
"descriptor": {
|
|
68
86
|
"name": record._desc.name,
|
|
69
87
|
"hash": record._desc.descriptor_hash,
|
|
70
88
|
},
|
|
71
89
|
}
|
|
90
|
+
|
|
72
91
|
# Move all dunder fields to `_record_metadata` to avoid naming clash with ES.
|
|
73
92
|
dunder_keys = [key for key in rdict if key.startswith("_")]
|
|
74
93
|
for key in dunder_keys:
|
|
75
94
|
rdict_meta[key.lstrip("_")] = rdict.pop(key)
|
|
76
|
-
|
|
95
|
+
|
|
96
|
+
# Remove _generated field from metadata to ensure determinstic documents.
|
|
77
97
|
if self.hash_record:
|
|
78
98
|
rdict_meta.pop("generated", None)
|
|
79
|
-
|
|
99
|
+
|
|
100
|
+
rdict["_record_metadata"] = rdict_meta.copy()
|
|
101
|
+
rdict["_record_metadata"].update(self.metadata_fields)
|
|
80
102
|
|
|
81
103
|
document = {
|
|
82
104
|
"_index": index,
|
|
@@ -106,6 +128,7 @@ class ElasticWriter(AbstractWriter):
|
|
|
106
128
|
):
|
|
107
129
|
if not ok:
|
|
108
130
|
log.error("Failed to insert %r", item)
|
|
131
|
+
|
|
109
132
|
self.event.set()
|
|
110
133
|
|
|
111
134
|
def write(self, record: Record) -> None:
|
|
@@ -129,6 +152,7 @@ class ElasticReader(AbstractReader):
|
|
|
129
152
|
verify_certs: Union[str, bool] = True,
|
|
130
153
|
http_compress: Union[str, bool] = True,
|
|
131
154
|
selector: Union[None, Selector, CompiledSelector] = None,
|
|
155
|
+
api_key: Optional[str] = None,
|
|
132
156
|
**kwargs,
|
|
133
157
|
) -> None:
|
|
134
158
|
self.index = index
|
|
@@ -136,7 +160,16 @@ class ElasticReader(AbstractReader):
|
|
|
136
160
|
self.selector = selector
|
|
137
161
|
verify_certs = str(verify_certs).lower() in ("1", "true")
|
|
138
162
|
http_compress = str(http_compress).lower() in ("1", "true")
|
|
139
|
-
|
|
163
|
+
|
|
164
|
+
if not uri.lower().startswith(("http://", "https://")):
|
|
165
|
+
uri = "http://" + uri
|
|
166
|
+
|
|
167
|
+
self.es = elasticsearch.Elasticsearch(
|
|
168
|
+
uri,
|
|
169
|
+
verify_certs=verify_certs,
|
|
170
|
+
http_compress=http_compress,
|
|
171
|
+
api_key=api_key,
|
|
172
|
+
)
|
|
140
173
|
|
|
141
174
|
if not verify_certs:
|
|
142
175
|
# Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
|
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '3.
|
|
16
|
-
__version_tuple__ = version_tuple = (3,
|
|
15
|
+
__version__ = version = '3.16.dev1'
|
|
16
|
+
__version_tuple__ = version_tuple = (3, 16, 'dev1')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.16.dev1
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -40,6 +40,7 @@ Requires-Dist: httpx; extra == "splunk"
|
|
|
40
40
|
Provides-Extra: test
|
|
41
41
|
Requires-Dist: flow.record[compression]; extra == "test"
|
|
42
42
|
Requires-Dist: flow.record[avro]; extra == "test"
|
|
43
|
+
Requires-Dist: flow.record[elastic]; extra == "test"
|
|
43
44
|
Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
44
45
|
Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
|
|
45
46
|
|
|
@@ -59,6 +59,7 @@ splunk = [
|
|
|
59
59
|
test = [
|
|
60
60
|
"flow.record[compression]",
|
|
61
61
|
"flow.record[avro]",
|
|
62
|
+
"flow.record[elastic]",
|
|
62
63
|
"duckdb; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
|
|
63
64
|
"pytz; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
|
|
64
65
|
]
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from flow.record import RecordDescriptor
|
|
6
|
+
from flow.record.adapter.elastic import ElasticWriter
|
|
7
|
+
|
|
8
|
+
MyRecord = RecordDescriptor(
|
|
9
|
+
"my/record",
|
|
10
|
+
[
|
|
11
|
+
("string", "field_one"),
|
|
12
|
+
("string", "field_two"),
|
|
13
|
+
],
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@pytest.mark.parametrize(
|
|
18
|
+
"record",
|
|
19
|
+
[
|
|
20
|
+
MyRecord("first", "record"),
|
|
21
|
+
MyRecord("second", "record"),
|
|
22
|
+
],
|
|
23
|
+
)
|
|
24
|
+
def test_elastic_writer_metadata(record):
|
|
25
|
+
options = {
|
|
26
|
+
"_meta_foo": "some value",
|
|
27
|
+
"_meta_bar": "another value",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
with ElasticWriter(uri="elasticsearch:9200", **options) as writer:
|
|
31
|
+
assert writer.metadata_fields == {"foo": "some value", "bar": "another value"}
|
|
32
|
+
|
|
33
|
+
assert writer.record_to_document(record, "some-index") == {
|
|
34
|
+
"_index": "some-index",
|
|
35
|
+
"_source": json.dumps(
|
|
36
|
+
{
|
|
37
|
+
"field_one": record.field_one,
|
|
38
|
+
"field_two": record.field_two,
|
|
39
|
+
"_record_metadata": {
|
|
40
|
+
"descriptor": {
|
|
41
|
+
"name": "my/record",
|
|
42
|
+
"hash": record._desc.descriptor_hash,
|
|
43
|
+
},
|
|
44
|
+
"source": None,
|
|
45
|
+
"classification": None,
|
|
46
|
+
"generated": record._generated.isoformat(),
|
|
47
|
+
"version": 1,
|
|
48
|
+
"foo": "some value",
|
|
49
|
+
"bar": "another value",
|
|
50
|
+
},
|
|
51
|
+
}
|
|
52
|
+
),
|
|
53
|
+
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|