flow.record 3.19.dev8__tar.gz → 3.19.dev9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/PKG-INFO +1 -1
  2. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/elastic.py +45 -10
  3. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/version.py +2 -2
  4. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow.record.egg-info/PKG-INFO +1 -1
  5. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/.git-blame-ignore-revs +0 -0
  6. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/COPYRIGHT +0 -0
  7. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/LICENSE +0 -0
  8. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/MANIFEST.in +0 -0
  9. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/README.md +0 -0
  10. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/examples/filesystem.py +0 -0
  11. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/examples/passivedns.py +0 -0
  12. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/examples/records.json +0 -0
  13. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/examples/tcpconn.py +0 -0
  14. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/__init__.py +0 -0
  15. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/__init__.py +0 -0
  16. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/archive.py +0 -0
  17. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/avro.py +0 -0
  18. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/broker.py +0 -0
  19. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/csvfile.py +0 -0
  20. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/duckdb.py +0 -0
  21. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/jsonfile.py +0 -0
  22. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/line.py +0 -0
  23. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/mongo.py +0 -0
  24. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/split.py +0 -0
  25. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/splunk.py +0 -0
  26. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/sqlite.py +0 -0
  27. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/stream.py +0 -0
  28. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/text.py +0 -0
  29. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/adapter/xlsx.py +0 -0
  30. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/base.py +0 -0
  31. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/exceptions.py +0 -0
  32. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/__init__.py +0 -0
  33. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/credential.py +0 -0
  34. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/net/__init__.py +0 -0
  35. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/net/ip.py +0 -0
  36. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/net/ipv4.py +0 -0
  37. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/net/tcp.py +0 -0
  38. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/fieldtypes/net/udp.py +0 -0
  39. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/jsonpacker.py +0 -0
  40. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/packer.py +0 -0
  41. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/selector.py +0 -0
  42. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/stream.py +0 -0
  43. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/tools/__init__.py +0 -0
  44. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/tools/geoip.py +0 -0
  45. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/tools/rdump.py +0 -0
  46. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/utils.py +0 -0
  47. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow/record/whitelist.py +0 -0
  48. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow.record.egg-info/SOURCES.txt +0 -0
  49. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow.record.egg-info/dependency_links.txt +0 -0
  50. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow.record.egg-info/entry_points.txt +0 -0
  51. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow.record.egg-info/requires.txt +0 -0
  52. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/flow.record.egg-info/top_level.txt +0 -0
  53. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/pyproject.toml +0 -0
  54. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/setup.cfg +0 -0
  55. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/__init__.py +0 -0
  56. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/_utils.py +0 -0
  57. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/docs/Makefile +0 -0
  58. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/docs/conf.py +0 -0
  59. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/docs/index.rst +0 -0
  60. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/selector_explain_example.py +0 -0
  61. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/standalone_test.py +0 -0
  62. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_adapter_line.py +0 -0
  63. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_adapter_text.py +0 -0
  64. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_avro.py +0 -0
  65. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_avro_adapter.py +0 -0
  66. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_compiled_selector.py +0 -0
  67. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_csv_adapter.py +0 -0
  68. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_deprecations.py +0 -0
  69. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_elastic_adapter.py +0 -0
  70. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_fieldtype_ip.py +0 -0
  71. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_fieldtypes.py +0 -0
  72. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_json_packer.py +0 -0
  73. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_json_record_adapter.py +0 -0
  74. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_multi_timestamp.py +0 -0
  75. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_packer.py +0 -0
  76. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_rdump.py +0 -0
  77. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_record.py +0 -0
  78. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_record_adapter.py +0 -0
  79. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_record_descriptor.py +0 -0
  80. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_regression.py +0 -0
  81. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_selector.py +0 -0
  82. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_splunk_adapter.py +0 -0
  83. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_sqlite_duckdb_adapter.py +0 -0
  84. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tests/test_xlsx_adapter.py +0 -0
  85. {flow_record-3.19.dev8 → flow_record-3.19.dev9}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: flow.record
3
- Version: 3.19.dev8
3
+ Version: 3.19.dev9
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -6,8 +6,14 @@ import queue
6
6
  import threading
7
7
  from typing import TYPE_CHECKING
8
8
 
9
- import elasticsearch
10
- import elasticsearch.helpers
9
+ try:
10
+ import elasticsearch
11
+ import elasticsearch.helpers
12
+
13
+ HAS_ELASTIC = True
14
+
15
+ except ImportError:
16
+ HAS_ELASTIC = False
11
17
 
12
18
  from flow.record.adapter import AbstractReader, AbstractWriter
13
19
  from flow.record.base import Record, RecordDescriptor
@@ -33,6 +39,8 @@ Optional arguments:
33
39
  [INDEX]: name of the index to use (default: records)
34
40
  [VERIFY_CERTS]: verify certs of Elasticsearch instance (default: True)
35
41
  [HASH_RECORD]: make record unique by hashing record [slow] (default: False)
42
+ [REQUEST_TIMEOUT]: maximum duration in seconds for a request to Elastic (default: 30)
43
+ [MAX_RETRIES]: maximum retries before a record is marked as failed (default: 3)
36
44
  [_META_*]: record metadata fields (default: None)
37
45
  """
38
46
 
@@ -49,8 +57,19 @@ class ElasticWriter(AbstractWriter):
49
57
  hash_record: str | bool = False,
50
58
  api_key: str | None = None,
51
59
  queue_size: int = 100000,
60
+ request_timeout: int = 30,
61
+ max_retries: int = 3,
52
62
  **kwargs,
53
63
  ) -> None:
64
+ """Initialize the ElasticWriter.
65
+
66
+ Resources:
67
+ - https://elasticsearch-py.readthedocs.io/en/v8.17.1/api/elasticsearch.html
68
+ """
69
+
70
+ if not HAS_ELASTIC:
71
+ raise RuntimeError("Required dependency 'elasticsearch' missing")
72
+
54
73
  self.index = index
55
74
  self.uri = uri
56
75
  verify_certs = str(verify_certs).lower() in ("1", "true")
@@ -63,20 +82,23 @@ class ElasticWriter(AbstractWriter):
63
82
 
64
83
  self.queue: queue.Queue[Record | StopIteration] = queue.Queue(maxsize=queue_size)
65
84
  self.event = threading.Event()
85
+ self.exception: Exception | None = None
86
+ threading.excepthook = self.excepthook
66
87
 
67
88
  self.es = elasticsearch.Elasticsearch(
68
89
  uri,
69
90
  verify_certs=verify_certs,
70
91
  http_compress=http_compress,
71
92
  api_key=api_key,
93
+ request_timeout=request_timeout,
94
+ retry_on_timeout=True,
95
+ max_retries=max_retries,
72
96
  )
73
97
 
74
98
  self.json_packer = JsonRecordPacker()
75
99
 
76
100
  self.thread = threading.Thread(target=self.streaming_bulk_thread)
77
101
  self.thread.start()
78
- self.exception: Exception | None = None
79
- threading.excepthook = self.excepthook
80
102
 
81
103
  if not verify_certs:
82
104
  # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
@@ -140,20 +162,28 @@ class ElasticWriter(AbstractWriter):
140
162
  yield self.record_to_document(record, index=self.index)
141
163
 
142
164
  def streaming_bulk_thread(self) -> None:
143
- """Thread that streams the documents to ES via the bulk api"""
165
+ """Thread that streams the documents to ES via the bulk api.
144
166
 
145
- for ok, item in elasticsearch.helpers.streaming_bulk(
167
+ Resources:
168
+ - https://elasticsearch-py.readthedocs.io/en/v8.17.1/helpers.html#elasticsearch.helpers.streaming_bulk
169
+ - https://github.com/elastic/elasticsearch-py/blob/main/elasticsearch/helpers/actions.py#L362
170
+ """
171
+ for _ok, _item in elasticsearch.helpers.streaming_bulk(
146
172
  self.es,
147
173
  self.document_stream(),
148
- raise_on_error=False,
149
- raise_on_exception=False,
174
+ raise_on_error=True,
175
+ raise_on_exception=True,
176
+ # Some settings have to be redefined because streaming_bulk does not inherit them from the self.es instance.
177
+ max_retries=3,
150
178
  ):
151
- if not ok:
152
- log.error("Failed to insert %r", item)
179
+ pass
153
180
 
154
181
  self.event.set()
155
182
 
156
183
  def write(self, record: Record) -> None:
184
+ if self.exception:
185
+ raise self.exception
186
+
157
187
  self.queue.put(record)
158
188
 
159
189
  def flush(self) -> None:
@@ -179,6 +209,8 @@ class ElasticReader(AbstractReader):
179
209
  http_compress: str | bool = True,
180
210
  selector: None | Selector | CompiledSelector = None,
181
211
  api_key: str | None = None,
212
+ request_timeout: int = 30,
213
+ max_retries: int = 3,
182
214
  **kwargs,
183
215
  ) -> None:
184
216
  self.index = index
@@ -195,6 +227,9 @@ class ElasticReader(AbstractReader):
195
227
  verify_certs=verify_certs,
196
228
  http_compress=http_compress,
197
229
  api_key=api_key,
230
+ request_timeout=request_timeout,
231
+ retry_on_timeout=True,
232
+ max_retries=max_retries,
198
233
  )
199
234
 
200
235
  if not verify_certs:
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.19.dev8'
16
- __version_tuple__ = version_tuple = (3, 19, 'dev8')
15
+ __version__ = version = '3.19.dev9'
16
+ __version_tuple__ = version_tuple = (3, 19, 'dev9')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: flow.record
3
- Version: 3.19.dev8
3
+ Version: 3.19.dev9
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
File without changes
File without changes