flow.record 3.17.dev1__tar.gz → 3.17.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {flow_record-3.17.dev1/flow.record.egg-info → flow_record-3.17.dev3}/PKG-INFO +2 -1
  2. flow_record-3.17.dev3/flow/record/adapter/xlsx.py +152 -0
  3. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/version.py +2 -2
  4. {flow_record-3.17.dev1 → flow_record-3.17.dev3/flow.record.egg-info}/PKG-INFO +2 -1
  5. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow.record.egg-info/SOURCES.txt +1 -0
  6. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow.record.egg-info/requires.txt +3 -0
  7. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/pyproject.toml +1 -0
  8. flow_record-3.17.dev3/tests/test_xlsx_adapter.py +55 -0
  9. flow_record-3.17.dev1/flow/record/adapter/xlsx.py +0 -73
  10. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/COPYRIGHT +0 -0
  11. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/LICENSE +0 -0
  12. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/MANIFEST.in +0 -0
  13. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/README.md +0 -0
  14. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/examples/filesystem.py +0 -0
  15. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/examples/passivedns.py +0 -0
  16. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/examples/records.json +0 -0
  17. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/examples/tcpconn.py +0 -0
  18. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/__init__.py +0 -0
  19. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/__init__.py +0 -0
  20. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/archive.py +0 -0
  21. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/avro.py +0 -0
  22. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/broker.py +0 -0
  23. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/csvfile.py +0 -0
  24. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/duckdb.py +0 -0
  25. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/elastic.py +0 -0
  26. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/jsonfile.py +0 -0
  27. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/line.py +0 -0
  28. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/mongo.py +0 -0
  29. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/split.py +0 -0
  30. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/splunk.py +0 -0
  31. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/sqlite.py +0 -0
  32. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/stream.py +0 -0
  33. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/adapter/text.py +0 -0
  34. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/base.py +0 -0
  35. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/exceptions.py +0 -0
  36. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/fieldtypes/__init__.py +0 -0
  37. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/fieldtypes/credential.py +0 -0
  38. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/fieldtypes/net/__init__.py +0 -0
  39. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/fieldtypes/net/ip.py +0 -0
  40. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/fieldtypes/net/ipv4.py +0 -0
  41. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/fieldtypes/net/tcp.py +0 -0
  42. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/fieldtypes/net/udp.py +0 -0
  43. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/jsonpacker.py +0 -0
  44. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/packer.py +0 -0
  45. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/selector.py +0 -0
  46. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/stream.py +0 -0
  47. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/tools/__init__.py +0 -0
  48. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/tools/geoip.py +0 -0
  49. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/tools/rdump.py +0 -0
  50. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/utils.py +0 -0
  51. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow/record/whitelist.py +0 -0
  52. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow.record.egg-info/dependency_links.txt +0 -0
  53. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow.record.egg-info/entry_points.txt +0 -0
  54. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/flow.record.egg-info/top_level.txt +0 -0
  55. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/setup.cfg +0 -0
  56. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/__init__.py +0 -0
  57. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/_utils.py +0 -0
  58. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/docs/Makefile +0 -0
  59. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/docs/conf.py +0 -0
  60. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/docs/index.rst +0 -0
  61. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/selector_explain_example.py +0 -0
  62. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/standalone_test.py +0 -0
  63. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_avro.py +0 -0
  64. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_avro_adapter.py +0 -0
  65. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_compiled_selector.py +0 -0
  66. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_csv_adapter.py +0 -0
  67. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_deprecations.py +0 -0
  68. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_elastic_adapter.py +0 -0
  69. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_fieldtype_ip.py +0 -0
  70. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_fieldtypes.py +0 -0
  71. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_json_packer.py +0 -0
  72. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_json_record_adapter.py +0 -0
  73. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_multi_timestamp.py +0 -0
  74. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_packer.py +0 -0
  75. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_rdump.py +0 -0
  76. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_record.py +0 -0
  77. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_record_adapter.py +0 -0
  78. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_record_descriptor.py +0 -0
  79. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_regression.py +0 -0
  80. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_selector.py +0 -0
  81. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_splunk_adapter.py +0 -0
  82. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/test_sqlite_duckdb_adapter.py +0 -0
  83. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tests/utils_inspect.py +0 -0
  84. {flow_record-3.17.dev1 → flow_record-3.17.dev3}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.17.dev1
3
+ Version: 3.17.dev3
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -31,6 +31,7 @@ Requires-Dist: elasticsearch; extra == "elastic"
31
31
  Provides-Extra: geoip
32
32
  Requires-Dist: maxminddb; extra == "geoip"
33
33
  Provides-Extra: avro
34
+ Requires-Dist: cramjam<2.8.4; (platform_python_implementation == "PyPy" and python_version == "3.9") and extra == "avro"
34
35
  Requires-Dist: fastavro[snappy]; extra == "avro"
35
36
  Provides-Extra: duckdb
36
37
  Requires-Dist: duckdb; extra == "duckdb"
@@ -0,0 +1,152 @@
1
+ from base64 import b64decode, b64encode
2
+ from datetime import datetime, timezone
3
+ from typing import Any, Iterator
4
+
5
+ from openpyxl import Workbook, load_workbook
6
+ from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
7
+
8
+ from flow import record
9
+ from flow.record import fieldtypes
10
+ from flow.record.adapter import AbstractReader, AbstractWriter
11
+ from flow.record.fieldtypes.net import ipaddress
12
+ from flow.record.selector import make_selector
13
+ from flow.record.utils import is_stdout
14
+
15
+ __usage__ = """
16
+ Microsoft Excel spreadsheet adapter
17
+ ---
18
+ Write usage: rdump -w xlsx://[PATH]
19
+ Read usage: rdump xlsx://[PATH]
20
+ [PATH]: path to file. Leave empty or "-" to output to stdout
21
+ """
22
+
23
+
24
+ def sanitize_fieldvalues(values: Iterator[Any]) -> Iterator[Any]:
25
+ """Sanitize field values so openpyxl will accept them."""
26
+
27
+ for value in values:
28
+ # openpyxl doesn't support timezone-aware datetime instances,
29
+ # so we convert to UTC and then remove the timezone info.
30
+ if isinstance(value, datetime) and value.tzinfo is not None:
31
+ value = value.astimezone(timezone.utc).replace(tzinfo=None)
32
+
33
+ elif type(value) in [ipaddress, list, fieldtypes.posix_path, fieldtypes.windows_path]:
34
+ value = str(value)
35
+
36
+ elif isinstance(value, bytes):
37
+ base64_encode = False
38
+ try:
39
+ new_value = 'b"' + value.decode() + '"'
40
+ if ILLEGAL_CHARACTERS_RE.search(new_value):
41
+ base64_encode = True
42
+ else:
43
+ value = new_value
44
+ except UnicodeDecodeError:
45
+ base64_encode = True
46
+ if base64_encode:
47
+ value = "base64:" + b64encode(value).decode()
48
+
49
+ yield value
50
+
51
+
52
+ class XlsxWriter(AbstractWriter):
53
+ fp = None
54
+ wb = None
55
+
56
+ def __init__(self, path, **kwargs):
57
+ self.fp = record.open_path_or_stream(path, "wb")
58
+ self.wb = Workbook()
59
+ self.ws = self.wb.active
60
+
61
+ # Remove the active work sheet, every Record Descriptor will have its own sheet.
62
+ self.wb.remove(self.ws)
63
+ self.descs = []
64
+ self._last_dec = None
65
+
66
+ def write(self, r):
67
+ if r._desc not in self.descs:
68
+ self.descs.append(r._desc)
69
+ ws = self.wb.create_sheet(r._desc.name.strip().replace("/", "-"))
70
+ field_types = []
71
+ field_names = []
72
+
73
+ for field_name, field in r._desc.get_all_fields().items():
74
+ field_types.append(field.typename)
75
+ field_names.append(field_name)
76
+
77
+ ws.append(field_types)
78
+ ws.append(field_names)
79
+
80
+ if r._desc != self._last_dec:
81
+ self._last_dec = r._desc
82
+ self.ws = self.wb[r._desc.name.strip().replace("/", "-")]
83
+
84
+ values = list(sanitize_fieldvalues(value for value in r._asdict().values()))
85
+
86
+ try:
87
+ self.ws.append(values)
88
+ except ValueError as e:
89
+ raise ValueError(f"Unable to write values to workbook: {str(e)}")
90
+
91
+ def flush(self):
92
+ if self.wb:
93
+ self.wb.save(self.fp)
94
+
95
+ def close(self):
96
+ if self.wb:
97
+ self.wb.close()
98
+ self.wb = None
99
+
100
+ if self.fp and not is_stdout(self.fp):
101
+ self.fp.close()
102
+ self.fp = None
103
+
104
+
105
+ class XlsxReader(AbstractReader):
106
+ fp = None
107
+
108
+ def __init__(self, path, selector=None, **kwargs):
109
+ self.selector = make_selector(selector)
110
+ self.fp = record.open_path_or_stream(path, "rb")
111
+ self.desc = None
112
+ self.wb = load_workbook(self.fp)
113
+ self.ws = self.wb.active
114
+
115
+ def close(self):
116
+ if self.fp:
117
+ self.fp.close()
118
+ self.fp = None
119
+
120
+ def __iter__(self):
121
+ for worksheet in self.wb.worksheets:
122
+ desc = None
123
+ desc_name = worksheet.title.replace("-", "/")
124
+ field_names = None
125
+ field_types = None
126
+ for row in worksheet:
127
+ if field_types is None:
128
+ field_types = [col.value for col in row if col.value]
129
+ continue
130
+ if field_names is None:
131
+ field_names = [
132
+ col.value.replace(" ", "_").lower()
133
+ for col in row
134
+ if col.value and not col.value.startswith("_")
135
+ ]
136
+ desc = record.RecordDescriptor(desc_name, list(zip(field_types, field_names)))
137
+ continue
138
+
139
+ record_values = []
140
+ for idx, col in enumerate(row):
141
+ value = col.value
142
+ if field_types[idx] == "bytes":
143
+ if value[1] == '"': # If so, we know this is b""
144
+ # Cut of the b" at the start and the trailing "
145
+ value = value[2:-1].encode()
146
+ else:
147
+ # If not, we know it is base64 encoded (so we cut of the starting 'base64:')
148
+ value = b64decode(value[7:])
149
+ record_values.append(value)
150
+ obj = desc(*record_values)
151
+ if not self.selector or self.selector.match(obj):
152
+ yield obj
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.17.dev1'
16
- __version_tuple__ = version_tuple = (3, 17, 'dev1')
15
+ __version__ = version = '3.17.dev3'
16
+ __version_tuple__ = version_tuple = (3, 17, 'dev3')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.17.dev1
3
+ Version: 3.17.dev3
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -31,6 +31,7 @@ Requires-Dist: elasticsearch; extra == "elastic"
31
31
  Provides-Extra: geoip
32
32
  Requires-Dist: maxminddb; extra == "geoip"
33
33
  Provides-Extra: avro
34
+ Requires-Dist: cramjam<2.8.4; (platform_python_implementation == "PyPy" and python_version == "3.9") and extra == "avro"
34
35
  Requires-Dist: fastavro[snappy]; extra == "avro"
35
36
  Provides-Extra: duckdb
36
37
  Requires-Dist: duckdb; extra == "duckdb"
@@ -74,6 +74,7 @@ tests/test_regression.py
74
74
  tests/test_selector.py
75
75
  tests/test_splunk_adapter.py
76
76
  tests/test_sqlite_duckdb_adapter.py
77
+ tests/test_xlsx_adapter.py
77
78
  tests/utils_inspect.py
78
79
  tests/docs/Makefile
79
80
  tests/docs/conf.py
@@ -9,6 +9,9 @@ backports.zoneinfo[tzdata]
9
9
  [avro]
10
10
  fastavro[snappy]
11
11
 
12
+ [avro:platform_python_implementation == "PyPy" and python_version == "3.9"]
13
+ cramjam<2.8.4
14
+
12
15
  [compression]
13
16
  lz4
14
17
  zstandard
@@ -47,6 +47,7 @@ geoip = [
47
47
  "maxminddb",
48
48
  ]
49
49
  avro = [
50
+ "cramjam<2.8.4; platform_python_implementation == 'PyPy' and python_version == '3.9'",
50
51
  "fastavro[snappy]",
51
52
  ]
52
53
  duckdb = [
@@ -0,0 +1,55 @@
1
+ import re
2
+ import sys
3
+ from datetime import datetime, timedelta, timezone
4
+ from typing import Iterator
5
+ from unittest.mock import MagicMock
6
+
7
+ import pytest
8
+
9
+ from flow.record import fieldtypes
10
+
11
+
12
+ @pytest.fixture
13
+ def mock_openpyxl_package(monkeypatch: pytest.MonkeyPatch) -> Iterator[MagicMock]:
14
+ with monkeypatch.context() as m:
15
+ mock_openpyxl = MagicMock()
16
+ mock_cell = MagicMock()
17
+ mock_cell.ILLEGAL_CHARACTERS_RE = re.compile(r"[\000-\010]|[\013-\014]|[\016-\037]")
18
+ m.setitem(sys.modules, "openpyxl", mock_openpyxl)
19
+ m.setitem(sys.modules, "openpyxl.cell.cell", mock_cell)
20
+
21
+ yield mock_openpyxl
22
+
23
+
24
+ def test_sanitize_field_values(mock_openpyxl_package):
25
+ from flow.record.adapter.xlsx import sanitize_fieldvalues
26
+
27
+ assert list(
28
+ sanitize_fieldvalues(
29
+ [
30
+ 7,
31
+ datetime(1920, 11, 11, 13, 37, 0, tzinfo=timezone(timedelta(hours=2))),
32
+ "James",
33
+ b"Bond",
34
+ b"\x00\x07",
35
+ fieldtypes.net.ipaddress("13.37.13.37"),
36
+ ["Shaken", "Not", "Stirred"],
37
+ fieldtypes.posix_path("/home/user"),
38
+ fieldtypes.posix_command("/bin/bash -c 'echo hello world'"),
39
+ fieldtypes.windows_path("C:\\Users\\user\\Desktop"),
40
+ fieldtypes.windows_command("C:\\Some.exe /?"),
41
+ ]
42
+ )
43
+ ) == [
44
+ 7,
45
+ datetime(1920, 11, 11, 11, 37, 0), # UTC normalization
46
+ "James",
47
+ 'b"Bond"', # When possible, encode bytes in a printable way
48
+ "base64:AAc=", # If not, base64 encode
49
+ "13.37.13.37", # Stringify an ip address
50
+ "['Shaken', 'Not', 'Stirred']", # Stringify a list
51
+ "/home/user", # Stringify a posix path
52
+ "/bin/bash -c 'echo hello world'", # Stringify a posix command
53
+ "C:\\Users\\user\\Desktop", # Stringify a windows path
54
+ "C:\\Some.exe /?", # Stringify a windows command
55
+ ]
@@ -1,73 +0,0 @@
1
- import openpyxl
2
-
3
- from flow import record
4
- from flow.record.adapter import AbstractReader, AbstractWriter
5
- from flow.record.selector import make_selector
6
- from flow.record.utils import is_stdout
7
-
8
- __usage__ = """
9
- Microsoft Excel spreadsheet adapter
10
- ---
11
- Write usage: rdump -w xlsx://[PATH]
12
- Read usage: rdump xlsx://[PATH]
13
- [PATH]: path to file. Leave empty or "-" to output to stdout
14
- """
15
-
16
-
17
- class XlsxWriter(AbstractWriter):
18
- fp = None
19
- wb = None
20
-
21
- def __init__(self, path, **kwargs):
22
- self.fp = record.open_path_or_stream(path, "wb")
23
- self.wb = openpyxl.Workbook()
24
- self.ws = self.wb.active
25
- self.desc = None
26
- # self.ws.title = "Records"
27
-
28
- def write(self, r):
29
- if not self.desc:
30
- self.desc = r._desc
31
- self.ws.append(r._desc.fields)
32
-
33
- self.ws.append(r._asdict().values())
34
-
35
- def flush(self):
36
- if self.wb:
37
- self.wb.save(self.fp)
38
-
39
- def close(self):
40
- if self.wb:
41
- self.wb.close()
42
- self.wb = None
43
-
44
- if self.fp and not is_stdout(self.fp):
45
- self.fp.close()
46
- self.fp = None
47
-
48
-
49
- class XlsxReader(AbstractReader):
50
- fp = None
51
-
52
- def __init__(self, path, selector=None, **kwargs):
53
- self.selector = make_selector(selector)
54
- self.fp = record.open_path_or_stream(path, "rb")
55
- self.desc = None
56
- self.wb = openpyxl.load_workbook(self.fp)
57
- self.ws = self.wb.active
58
-
59
- def close(self):
60
- if self.fp:
61
- self.fp.close()
62
- self.fp = None
63
-
64
- def __iter__(self):
65
- desc = None
66
- for row in self.ws.rows:
67
- if not desc:
68
- desc = record.RecordDescriptor([col.value.replace(" ", "_").lower() for col in row])
69
- continue
70
-
71
- obj = desc(*[col.value for col in row])
72
- if not self.selector or self.selector.match(obj):
73
- yield obj
File without changes
File without changes