flow.record 3.15.dev16__tar.gz → 3.16.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {flow_record-3.15.dev16/flow.record.egg-info → flow_record-3.16.dev1}/PKG-INFO +2 -1
  2. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/elastic.py +39 -6
  3. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/version.py +2 -2
  4. {flow_record-3.15.dev16 → flow_record-3.16.dev1/flow.record.egg-info}/PKG-INFO +2 -1
  5. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow.record.egg-info/SOURCES.txt +1 -0
  6. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow.record.egg-info/requires.txt +1 -0
  7. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/pyproject.toml +1 -0
  8. flow_record-3.16.dev1/tests/test_elastic_adapter.py +53 -0
  9. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/COPYRIGHT +0 -0
  10. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/LICENSE +0 -0
  11. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/MANIFEST.in +0 -0
  12. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/README.md +0 -0
  13. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/examples/filesystem.py +0 -0
  14. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/examples/passivedns.py +0 -0
  15. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/examples/records.json +0 -0
  16. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/examples/tcpconn.py +0 -0
  17. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/__init__.py +0 -0
  18. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/__init__.py +0 -0
  19. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/archive.py +0 -0
  20. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/avro.py +0 -0
  21. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/broker.py +0 -0
  22. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/csvfile.py +0 -0
  23. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/duckdb.py +0 -0
  24. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/jsonfile.py +0 -0
  25. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/line.py +0 -0
  26. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/mongo.py +0 -0
  27. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/split.py +0 -0
  28. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/splunk.py +0 -0
  29. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/sqlite.py +0 -0
  30. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/stream.py +0 -0
  31. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/text.py +0 -0
  32. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/adapter/xlsx.py +0 -0
  33. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/base.py +0 -0
  34. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/exceptions.py +0 -0
  35. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/__init__.py +0 -0
  36. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/credential.py +0 -0
  37. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/net/__init__.py +0 -0
  38. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/net/ip.py +0 -0
  39. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/net/ipv4.py +0 -0
  40. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/net/tcp.py +0 -0
  41. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/fieldtypes/net/udp.py +0 -0
  42. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/jsonpacker.py +0 -0
  43. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/packer.py +0 -0
  44. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/selector.py +0 -0
  45. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/stream.py +0 -0
  46. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/tools/__init__.py +0 -0
  47. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/tools/geoip.py +0 -0
  48. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/tools/rdump.py +0 -0
  49. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/utils.py +0 -0
  50. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow/record/whitelist.py +0 -0
  51. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow.record.egg-info/dependency_links.txt +0 -0
  52. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow.record.egg-info/entry_points.txt +0 -0
  53. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/flow.record.egg-info/top_level.txt +0 -0
  54. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/setup.cfg +0 -0
  55. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/__init__.py +0 -0
  56. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/_utils.py +0 -0
  57. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/docs/Makefile +0 -0
  58. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/docs/conf.py +0 -0
  59. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/docs/index.rst +0 -0
  60. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/selector_explain_example.py +0 -0
  61. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/standalone_test.py +0 -0
  62. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_avro.py +0 -0
  63. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_avro_adapter.py +0 -0
  64. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_compiled_selector.py +0 -0
  65. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_csv_adapter.py +0 -0
  66. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_deprecations.py +0 -0
  67. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_fieldtype_ip.py +0 -0
  68. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_fieldtypes.py +0 -0
  69. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_json_packer.py +0 -0
  70. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_json_record_adapter.py +0 -0
  71. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_multi_timestamp.py +0 -0
  72. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_packer.py +0 -0
  73. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_rdump.py +0 -0
  74. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_record.py +0 -0
  75. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_record_adapter.py +0 -0
  76. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_record_descriptor.py +0 -0
  77. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_regression.py +0 -0
  78. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_selector.py +0 -0
  79. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_splunk_adapter.py +0 -0
  80. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/test_sqlite_duckdb_adapter.py +0 -0
  81. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tests/utils_inspect.py +0 -0
  82. {flow_record-3.15.dev16 → flow_record-3.16.dev1}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.15.dev16
3
+ Version: 3.16.dev1
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -40,6 +40,7 @@ Requires-Dist: httpx; extra == "splunk"
40
40
  Provides-Extra: test
41
41
  Requires-Dist: flow.record[compression]; extra == "test"
42
42
  Requires-Dist: flow.record[avro]; extra == "test"
43
+ Requires-Dist: flow.record[elastic]; extra == "test"
43
44
  Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
44
45
  Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
45
46
 
@@ -2,7 +2,7 @@ import hashlib
2
2
  import logging
3
3
  import queue
4
4
  import threading
5
- from typing import Iterator, Union
5
+ from typing import Iterator, Optional, Union
6
6
 
7
7
  import elasticsearch
8
8
  import elasticsearch.helpers
@@ -22,9 +22,11 @@ Read usage: rdump elastic+[PROTOCOL]://[IP]:[PORT]?index=[INDEX]
22
22
  [PROTOCOL]: http or https. Defaults to https when "+[PROTOCOL]" is omitted
23
23
 
24
24
  Optional arguments:
25
+ [API_KEY]: base64 encoded api key to authenticate with (default: False)
25
26
  [INDEX]: name of the index to use (default: records)
26
27
  [VERIFY_CERTS]: verify certs of Elasticsearch instance (default: True)
27
28
  [HASH_RECORD]: make record unique by hashing record [slow] (default: False)
29
+ [_META_*]: record metadata fields (default: None)
28
30
  """
29
31
 
30
32
  log = logging.getLogger(__name__)
@@ -38,6 +40,7 @@ class ElasticWriter(AbstractWriter):
38
40
  verify_certs: Union[str, bool] = True,
39
41
  http_compress: Union[str, bool] = True,
40
42
  hash_record: Union[str, bool] = False,
43
+ api_key: Optional[str] = None,
41
44
  **kwargs,
42
45
  ) -> None:
43
46
  self.index = index
@@ -45,7 +48,17 @@ class ElasticWriter(AbstractWriter):
45
48
  verify_certs = str(verify_certs).lower() in ("1", "true")
46
49
  http_compress = str(http_compress).lower() in ("1", "true")
47
50
  self.hash_record = str(hash_record).lower() in ("1", "true")
48
- self.es = elasticsearch.Elasticsearch(uri, verify_certs=verify_certs, http_compress=http_compress)
51
+
52
+ if not uri.lower().startswith(("http://", "https://")):
53
+ uri = "http://" + uri
54
+
55
+ self.es = elasticsearch.Elasticsearch(
56
+ uri,
57
+ verify_certs=verify_certs,
58
+ http_compress=http_compress,
59
+ api_key=api_key,
60
+ )
61
+
49
62
  self.json_packer = JsonRecordPacker()
50
63
  self.queue: queue.Queue[Union[Record, StopIteration]] = queue.Queue()
51
64
  self.event = threading.Event()
@@ -58,25 +71,34 @@ class ElasticWriter(AbstractWriter):
58
71
 
59
72
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
60
73
 
74
+ self.metadata_fields = {}
75
+ for arg_key, arg_val in kwargs.items():
76
+ if arg_key.startswith("_meta_"):
77
+ self.metadata_fields[arg_key[6:]] = arg_val
78
+
61
79
  def record_to_document(self, record: Record, index: str) -> dict:
62
80
  """Convert a record to a Elasticsearch compatible document dictionary"""
63
81
  rdict = record._asdict()
64
82
 
65
- # Store record metadata under `_record_metadata`
83
+ # Store record metadata under `_record_metadata`.
66
84
  rdict_meta = {
67
85
  "descriptor": {
68
86
  "name": record._desc.name,
69
87
  "hash": record._desc.descriptor_hash,
70
88
  },
71
89
  }
90
+
72
91
  # Move all dunder fields to `_record_metadata` to avoid naming clash with ES.
73
92
  dunder_keys = [key for key in rdict if key.startswith("_")]
74
93
  for key in dunder_keys:
75
94
  rdict_meta[key.lstrip("_")] = rdict.pop(key)
76
- # remove _generated field from metadata to ensure determinstic documents
95
+
96
+ # Remove _generated field from metadata to ensure determinstic documents.
77
97
  if self.hash_record:
78
98
  rdict_meta.pop("generated", None)
79
- rdict["_record_metadata"] = rdict_meta
99
+
100
+ rdict["_record_metadata"] = rdict_meta.copy()
101
+ rdict["_record_metadata"].update(self.metadata_fields)
80
102
 
81
103
  document = {
82
104
  "_index": index,
@@ -106,6 +128,7 @@ class ElasticWriter(AbstractWriter):
106
128
  ):
107
129
  if not ok:
108
130
  log.error("Failed to insert %r", item)
131
+
109
132
  self.event.set()
110
133
 
111
134
  def write(self, record: Record) -> None:
@@ -129,6 +152,7 @@ class ElasticReader(AbstractReader):
129
152
  verify_certs: Union[str, bool] = True,
130
153
  http_compress: Union[str, bool] = True,
131
154
  selector: Union[None, Selector, CompiledSelector] = None,
155
+ api_key: Optional[str] = None,
132
156
  **kwargs,
133
157
  ) -> None:
134
158
  self.index = index
@@ -136,7 +160,16 @@ class ElasticReader(AbstractReader):
136
160
  self.selector = selector
137
161
  verify_certs = str(verify_certs).lower() in ("1", "true")
138
162
  http_compress = str(http_compress).lower() in ("1", "true")
139
- self.es = elasticsearch.Elasticsearch(uri, verify_certs=verify_certs, http_compress=http_compress)
163
+
164
+ if not uri.lower().startswith(("http://", "https://")):
165
+ uri = "http://" + uri
166
+
167
+ self.es = elasticsearch.Elasticsearch(
168
+ uri,
169
+ verify_certs=verify_certs,
170
+ http_compress=http_compress,
171
+ api_key=api_key,
172
+ )
140
173
 
141
174
  if not verify_certs:
142
175
  # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.15.dev16'
16
- __version_tuple__ = version_tuple = (3, 15, 'dev16')
15
+ __version__ = version = '3.16.dev1'
16
+ __version_tuple__ = version_tuple = (3, 16, 'dev1')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.15.dev16
3
+ Version: 3.16.dev1
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -40,6 +40,7 @@ Requires-Dist: httpx; extra == "splunk"
40
40
  Provides-Extra: test
41
41
  Requires-Dist: flow.record[compression]; extra == "test"
42
42
  Requires-Dist: flow.record[avro]; extra == "test"
43
+ Requires-Dist: flow.record[elastic]; extra == "test"
43
44
  Requires-Dist: duckdb; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
44
45
  Requires-Dist: pytz; (platform_python_implementation != "PyPy" and python_version < "3.12") and extra == "test"
45
46
 
@@ -59,6 +59,7 @@ tests/test_avro_adapter.py
59
59
  tests/test_compiled_selector.py
60
60
  tests/test_csv_adapter.py
61
61
  tests/test_deprecations.py
62
+ tests/test_elastic_adapter.py
62
63
  tests/test_fieldtype_ip.py
63
64
  tests/test_fieldtypes.py
64
65
  tests/test_json_packer.py
@@ -29,6 +29,7 @@ httpx
29
29
  [test]
30
30
  flow.record[compression]
31
31
  flow.record[avro]
32
+ flow.record[elastic]
32
33
 
33
34
  [test:platform_python_implementation != "PyPy" and python_version < "3.12"]
34
35
  duckdb
@@ -59,6 +59,7 @@ splunk = [
59
59
  test = [
60
60
  "flow.record[compression]",
61
61
  "flow.record[avro]",
62
+ "flow.record[elastic]",
62
63
  "duckdb; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
63
64
  "pytz; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
64
65
  ]
@@ -0,0 +1,53 @@
1
+ import json
2
+
3
+ import pytest
4
+
5
+ from flow.record import RecordDescriptor
6
+ from flow.record.adapter.elastic import ElasticWriter
7
+
8
+ MyRecord = RecordDescriptor(
9
+ "my/record",
10
+ [
11
+ ("string", "field_one"),
12
+ ("string", "field_two"),
13
+ ],
14
+ )
15
+
16
+
17
+ @pytest.mark.parametrize(
18
+ "record",
19
+ [
20
+ MyRecord("first", "record"),
21
+ MyRecord("second", "record"),
22
+ ],
23
+ )
24
+ def test_elastic_writer_metadata(record):
25
+ options = {
26
+ "_meta_foo": "some value",
27
+ "_meta_bar": "another value",
28
+ }
29
+
30
+ with ElasticWriter(uri="elasticsearch:9200", **options) as writer:
31
+ assert writer.metadata_fields == {"foo": "some value", "bar": "another value"}
32
+
33
+ assert writer.record_to_document(record, "some-index") == {
34
+ "_index": "some-index",
35
+ "_source": json.dumps(
36
+ {
37
+ "field_one": record.field_one,
38
+ "field_two": record.field_two,
39
+ "_record_metadata": {
40
+ "descriptor": {
41
+ "name": "my/record",
42
+ "hash": record._desc.descriptor_hash,
43
+ },
44
+ "source": None,
45
+ "classification": None,
46
+ "generated": record._generated.isoformat(),
47
+ "version": 1,
48
+ "foo": "some value",
49
+ "bar": "another value",
50
+ },
51
+ }
52
+ ),
53
+ }