flow.record 3.19.dev8__tar.gz → 3.19.dev9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/PKG-INFO +1 -1
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/elastic.py +45 -10
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/version.py +2 -2
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow.record.egg-info/PKG-INFO +1 -1
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/.git-blame-ignore-revs +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/COPYRIGHT +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/LICENSE +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/MANIFEST.in +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/README.md +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/examples/filesystem.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/examples/passivedns.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/examples/records.json +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/examples/tcpconn.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/__init__.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/__init__.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/archive.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/avro.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/broker.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/csvfile.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/duckdb.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/jsonfile.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/line.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/mongo.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/split.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/splunk.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/sqlite.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/stream.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/text.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/xlsx.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/base.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/exceptions.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/__init__.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/credential.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/net/__init__.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/net/ip.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/net/ipv4.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/net/tcp.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/net/udp.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/jsonpacker.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/packer.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/selector.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/stream.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/tools/__init__.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/tools/geoip.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/tools/rdump.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/utils.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/whitelist.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow.record.egg-info/SOURCES.txt +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow.record.egg-info/dependency_links.txt +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow.record.egg-info/entry_points.txt +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow.record.egg-info/requires.txt +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow.record.egg-info/top_level.txt +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/pyproject.toml +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/setup.cfg +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/__init__.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/_utils.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/docs/Makefile +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/docs/conf.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/docs/index.rst +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/selector_explain_example.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/standalone_test.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_adapter_line.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_adapter_text.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_avro.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_avro_adapter.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_compiled_selector.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_csv_adapter.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_deprecations.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_elastic_adapter.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_fieldtype_ip.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_fieldtypes.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_json_packer.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_json_record_adapter.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_multi_timestamp.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_packer.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_rdump.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_record.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_record_adapter.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_record_descriptor.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_regression.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_selector.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_splunk_adapter.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_sqlite_duckdb_adapter.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_xlsx_adapter.py +0 -0
- {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.19.
|
|
3
|
+
Version: 3.19.dev9
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -6,8 +6,14 @@ import queue
|
|
|
6
6
|
import threading
|
|
7
7
|
from typing import TYPE_CHECKING
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
import elasticsearch
|
|
9
|
+
try:
|
|
10
|
+
import elasticsearch
|
|
11
|
+
import elasticsearch.helpers
|
|
12
|
+
|
|
13
|
+
HAS_ELASTIC = True
|
|
14
|
+
|
|
15
|
+
except ImportError:
|
|
16
|
+
HAS_ELASTIC = False
|
|
11
17
|
|
|
12
18
|
from flow.record.adapter import AbstractReader, AbstractWriter
|
|
13
19
|
from flow.record.base import Record, RecordDescriptor
|
|
@@ -33,6 +39,8 @@ Optional arguments:
|
|
|
33
39
|
[INDEX]: name of the index to use (default: records)
|
|
34
40
|
[VERIFY_CERTS]: verify certs of Elasticsearch instance (default: True)
|
|
35
41
|
[HASH_RECORD]: make record unique by hashing record [slow] (default: False)
|
|
42
|
+
[REQUEST_TIMEOUT]: maximum duration in seconds for a request to Elastic (default: 30)
|
|
43
|
+
[MAX_RETRIES]: maximum retries before a record is marked as failed (default: 3)
|
|
36
44
|
[_META_*]: record metadata fields (default: None)
|
|
37
45
|
"""
|
|
38
46
|
|
|
@@ -49,8 +57,19 @@ class ElasticWriter(AbstractWriter):
|
|
|
49
57
|
hash_record: str | bool = False,
|
|
50
58
|
api_key: str | None = None,
|
|
51
59
|
queue_size: int = 100000,
|
|
60
|
+
request_timeout: int = 30,
|
|
61
|
+
max_retries: int = 3,
|
|
52
62
|
**kwargs,
|
|
53
63
|
) -> None:
|
|
64
|
+
"""Initialize the ElasticWriter.
|
|
65
|
+
|
|
66
|
+
Resources:
|
|
67
|
+
- https://elasticsearch-py.readthedocs.io/en/v8.17.1/api/elasticsearch.html
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
if not HAS_ELASTIC:
|
|
71
|
+
raise RuntimeError("Required dependency 'elasticsearch' missing")
|
|
72
|
+
|
|
54
73
|
self.index = index
|
|
55
74
|
self.uri = uri
|
|
56
75
|
verify_certs = str(verify_certs).lower() in ("1", "true")
|
|
@@ -63,20 +82,23 @@ class ElasticWriter(AbstractWriter):
|
|
|
63
82
|
|
|
64
83
|
self.queue: queue.Queue[Record | StopIteration] = queue.Queue(maxsize=queue_size)
|
|
65
84
|
self.event = threading.Event()
|
|
85
|
+
self.exception: Exception | None = None
|
|
86
|
+
threading.excepthook = self.excepthook
|
|
66
87
|
|
|
67
88
|
self.es = elasticsearch.Elasticsearch(
|
|
68
89
|
uri,
|
|
69
90
|
verify_certs=verify_certs,
|
|
70
91
|
http_compress=http_compress,
|
|
71
92
|
api_key=api_key,
|
|
93
|
+
request_timeout=request_timeout,
|
|
94
|
+
retry_on_timeout=True,
|
|
95
|
+
max_retries=max_retries,
|
|
72
96
|
)
|
|
73
97
|
|
|
74
98
|
self.json_packer = JsonRecordPacker()
|
|
75
99
|
|
|
76
100
|
self.thread = threading.Thread(target=self.streaming_bulk_thread)
|
|
77
101
|
self.thread.start()
|
|
78
|
-
self.exception: Exception | None = None
|
|
79
|
-
threading.excepthook = self.excepthook
|
|
80
102
|
|
|
81
103
|
if not verify_certs:
|
|
82
104
|
# Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
|
|
@@ -140,20 +162,28 @@ class ElasticWriter(AbstractWriter):
|
|
|
140
162
|
yield self.record_to_document(record, index=self.index)
|
|
141
163
|
|
|
142
164
|
def streaming_bulk_thread(self) -> None:
|
|
143
|
-
"""Thread that streams the documents to ES via the bulk api
|
|
165
|
+
"""Thread that streams the documents to ES via the bulk api.
|
|
144
166
|
|
|
145
|
-
|
|
167
|
+
Resources:
|
|
168
|
+
- https://elasticsearch-py.readthedocs.io/en/v8.17.1/helpers.html#elasticsearch.helpers.streaming_bulk
|
|
169
|
+
- https://github.com/elastic/elasticsearch-py/blob/main/elasticsearch/helpers/actions.py#L362
|
|
170
|
+
"""
|
|
171
|
+
for _ok, _item in elasticsearch.helpers.streaming_bulk(
|
|
146
172
|
self.es,
|
|
147
173
|
self.document_stream(),
|
|
148
|
-
raise_on_error=
|
|
149
|
-
raise_on_exception=
|
|
174
|
+
raise_on_error=True,
|
|
175
|
+
raise_on_exception=True,
|
|
176
|
+
# Some settings have to be redefined because streaming_bulk does not inherit them from the self.es instance.
|
|
177
|
+
max_retries=3,
|
|
150
178
|
):
|
|
151
|
-
|
|
152
|
-
log.error("Failed to insert %r", item)
|
|
179
|
+
pass
|
|
153
180
|
|
|
154
181
|
self.event.set()
|
|
155
182
|
|
|
156
183
|
def write(self, record: Record) -> None:
|
|
184
|
+
if self.exception:
|
|
185
|
+
raise self.exception
|
|
186
|
+
|
|
157
187
|
self.queue.put(record)
|
|
158
188
|
|
|
159
189
|
def flush(self) -> None:
|
|
@@ -179,6 +209,8 @@ class ElasticReader(AbstractReader):
|
|
|
179
209
|
http_compress: str | bool = True,
|
|
180
210
|
selector: None | Selector | CompiledSelector = None,
|
|
181
211
|
api_key: str | None = None,
|
|
212
|
+
request_timeout: int = 30,
|
|
213
|
+
max_retries: int = 3,
|
|
182
214
|
**kwargs,
|
|
183
215
|
) -> None:
|
|
184
216
|
self.index = index
|
|
@@ -195,6 +227,9 @@ class ElasticReader(AbstractReader):
|
|
|
195
227
|
verify_certs=verify_certs,
|
|
196
228
|
http_compress=http_compress,
|
|
197
229
|
api_key=api_key,
|
|
230
|
+
request_timeout=request_timeout,
|
|
231
|
+
retry_on_timeout=True,
|
|
232
|
+
max_retries=max_retries,
|
|
198
233
|
)
|
|
199
234
|
|
|
200
235
|
if not verify_certs:
|
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '3.19.
|
|
16
|
-
__version_tuple__ = version_tuple = (3, 19, '
|
|
15
|
+
__version__ = version = '3.19.dev9'
|
|
16
|
+
__version_tuple__ = version_tuple = (3, 19, 'dev9')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: flow.record
|
|
3
|
-
Version: 3.19.
|
|
3
|
+
Version: 3.19.dev9
|
|
4
4
|
Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|