flow.record 3.6.dev15__tar.gz → 3.10.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. flow.record-3.10.dev3/MANIFEST.in +2 -0
  2. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/PKG-INFO +7 -4
  3. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/__init__.py +2 -0
  4. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/adapter/__init__.py +1 -0
  5. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/adapter/avro.py +9 -4
  6. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/adapter/broker.py +2 -1
  7. flow.record-3.10.dev3/flow/record/adapter/csvfile.py +90 -0
  8. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/adapter/jsonfile.py +4 -3
  9. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/adapter/line.py +2 -2
  10. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/adapter/mongo.py +2 -1
  11. flow.record-3.10.dev3/flow/record/adapter/split.py +74 -0
  12. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/adapter/splunk.py +2 -2
  13. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/adapter/xlsx.py +2 -2
  14. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/base.py +343 -209
  15. flow.record-3.10.dev3/flow/record/exceptions.py +6 -0
  16. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/fieldtypes/__init__.py +17 -10
  17. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/fieldtypes/net/__init__.py +2 -1
  18. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/fieldtypes/net/ip.py +1 -0
  19. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/jsonpacker.py +9 -4
  20. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/packer.py +18 -6
  21. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/tools/geoip.py +7 -12
  22. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/tools/rdump.py +41 -24
  23. flow.record-3.10.dev3/flow/record/version.py +4 -0
  24. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow.record.egg-info/PKG-INFO +7 -4
  25. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow.record.egg-info/SOURCES.txt +4 -5
  26. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow.record.egg-info/entry_points.txt +0 -1
  27. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow.record.egg-info/requires.txt +8 -0
  28. flow.record-3.10.dev3/pyproject.toml +67 -0
  29. flow.record-3.10.dev3/setup.cfg +4 -0
  30. flow.record-3.10.dev3/tests/_utils.py +36 -0
  31. flow.record-3.10.dev3/tests/test_avro_adapter.py +51 -0
  32. flow.record-3.10.dev3/tests/test_deprecations.py +60 -0
  33. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_fieldtype_ip.py +4 -4
  34. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_fieldtypes.py +154 -6
  35. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_json_packer.py +24 -2
  36. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_json_record_adapter.py +2 -20
  37. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_multi_timestamp.py +63 -2
  38. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_packer.py +22 -4
  39. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_rdump.py +152 -3
  40. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_record.py +90 -7
  41. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_record_adapter.py +72 -28
  42. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_record_descriptor.py +23 -4
  43. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_regression.py +165 -30
  44. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_splunk_adapter.py +2 -10
  45. flow.record-3.10.dev3/tox.ini +60 -0
  46. flow.record-3.6.dev15/.github/workflows/dissect-ci.yml +0 -7
  47. flow.record-3.6.dev15/MANIFEST.in +0 -2
  48. flow.record-3.6.dev15/flow/record/adapter/csvfile.py +0 -51
  49. flow.record-3.6.dev15/flow/record/tools/multi_timestamp.py +0 -60
  50. flow.record-3.6.dev15/flow/record/version.py +0 -5
  51. flow.record-3.6.dev15/flow.record.egg-info/namespace_packages.txt +0 -1
  52. flow.record-3.6.dev15/pyproject.toml +0 -14
  53. flow.record-3.6.dev15/setup.cfg +0 -19
  54. flow.record-3.6.dev15/setup.py +0 -30
  55. flow.record-3.6.dev15/tests/test_deprecations.py +0 -27
  56. flow.record-3.6.dev15/tox.ini +0 -62
  57. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/COPYRIGHT +0 -0
  58. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/LICENSE +0 -0
  59. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/README.md +0 -0
  60. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/examples/filesystem.py +0 -0
  61. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/examples/passivedns.py +0 -0
  62. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/examples/records.json +0 -0
  63. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/examples/tcpconn.py +0 -0
  64. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/adapter/archive.py +0 -0
  65. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/adapter/elastic.py +0 -0
  66. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/adapter/stream.py +1 -1
  67. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/adapter/text.py +1 -1
  68. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/fieldtypes/credential.py +0 -0
  69. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/fieldtypes/net/ipv4.py +1 -1
  70. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/fieldtypes/net/tcp.py +0 -0
  71. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/fieldtypes/net/udp.py +0 -0
  72. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/selector.py +0 -0
  73. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/stream.py +7 -7
  74. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/tools/__init__.py +0 -0
  75. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/utils.py +1 -1
  76. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow/record/whitelist.py +0 -0
  77. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow.record.egg-info/dependency_links.txt +0 -0
  78. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/flow.record.egg-info/top_level.txt +0 -0
  79. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/__init__.py +0 -0
  80. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/selector_explain_example.py +1 -1
  81. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/standalone_test.py +0 -0
  82. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_compiled_selector.py +0 -0
  83. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/test_selector.py +0 -0
  84. {flow.record-3.6.dev15 → flow.record-3.10.dev3}/tests/utils_inspect.py +1 -1
@@ -0,0 +1,2 @@
1
+ exclude .gitignore
2
+ recursive-exclude .github/ *
@@ -1,17 +1,20 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.6.dev15
3
+ Version: 3.10.dev3
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
- Home-page: https://github.com/fox-it/flow.record
6
- Author: Dissect Team
7
- Author-email: dissect@fox-it.com
5
+ Author-email: Dissect Team <dissect@fox-it.com>
8
6
  License: Affero General Public License v3
7
+ Project-URL: homepage, https://dissect.tools
8
+ Project-URL: documentation, https://docs.dissect.tools/en/latest/projects/flow.record
9
+ Project-URL: repository, https://github.com/fox-it/flow.record
9
10
  Classifier: Programming Language :: Python :: 3
10
11
  Requires-Python: ~=3.7
11
12
  Description-Content-Type: text/markdown
12
13
  Provides-Extra: compression
13
14
  Provides-Extra: elastic
14
15
  Provides-Extra: geoip
16
+ Provides-Extra: avro
17
+ Provides-Extra: test
15
18
  License-File: LICENSE
16
19
  License-File: COPYRIGHT
17
20
 
@@ -15,6 +15,7 @@ from flow.record.base import (
15
15
  RecordWriter,
16
16
  dynamic_fieldtype,
17
17
  extend_record,
18
+ iter_timestamped_records,
18
19
  open_path,
19
20
  stream,
20
21
  )
@@ -55,6 +56,7 @@ __all__ = [
55
56
  "RecordDescriptorError",
56
57
  "record_stream",
57
58
  "extend_record",
59
+ "iter_timestamped_records",
58
60
  ]
59
61
 
60
62
 
@@ -4,6 +4,7 @@ import abc
4
4
 
5
5
  def with_metaclass(meta, *bases):
6
6
  """Create a base class with a metaclass. Python 2 and 3 compatible."""
7
+
7
8
  # This requires a bit of explanation: the basic idea is to make a dummy
8
9
  # metaclass for one level of class instantiation that replaces itself with
9
10
  # the actual metaclass.
@@ -71,8 +71,13 @@ class AvroWriter(AbstractWriter):
71
71
  self.writer.write(r._packdict())
72
72
 
73
73
  def flush(self):
74
- if self.writer:
75
- self.writer.flush()
74
+ if not self.writer:
75
+ self.writer = fastavro.write.Writer(
76
+ self.fp,
77
+ fastavro.parse_schema({"type": "record", "name": "empty"}),
78
+ codec=self.codec,
79
+ )
80
+ self.writer.flush()
76
81
 
77
82
  def close(self):
78
83
  if self.fp and not is_stdout(self.fp):
@@ -89,7 +94,7 @@ class AvroReader(AbstractReader):
89
94
  self.selector = make_selector(selector)
90
95
 
91
96
  self.reader = fastavro.reader(self.fp)
92
- self.schema = self.reader.schema
97
+ self.schema = self.reader.writer_schema
93
98
  if not self.schema:
94
99
  raise Exception("Missing Avro schema")
95
100
 
@@ -185,7 +190,7 @@ def avro_type_to_flow_type(ftype):
185
190
  return "{}[]".format(item_type)
186
191
  else:
187
192
  logical_type = t.get("logicalType")
188
- if logical_type and "time" in logical_type or "date" in logical_type:
193
+ if logical_type and ("time" in logical_type or "date" in logical_type):
189
194
  return "datetime"
190
195
 
191
196
  if t == "null":
@@ -1,6 +1,7 @@
1
- from flow.record.adapter import AbstractWriter, AbstractReader
2
1
  from flow.broker import Publisher, Subscriber
3
2
 
3
+ from flow.record.adapter import AbstractReader, AbstractWriter
4
+
4
5
  __usage__ = """
5
6
  PubSub adapter using flow.broker
6
7
  ---
@@ -0,0 +1,90 @@
1
+ from __future__ import absolute_import
2
+
3
+ import csv
4
+ import sys
5
+
6
+ from flow.record import RecordDescriptor
7
+ from flow.record.adapter import AbstractReader, AbstractWriter
8
+ from flow.record.selector import make_selector
9
+ from flow.record.utils import is_stdout
10
+
11
+ __usage__ = """
12
+ Comma-separated values (CSV) adapter
13
+ ---
14
+ Write usage: rdump -w csvfile://[PATH]?lineterminator=[TERMINATOR]
15
+ Read usage: rdump csvfile://[PATH]?fields=[FIELDS]
16
+ [PATH]: path to file. Leave empty or "-" to output to stdout
17
+ [TERMINATOR]: line terminator, default is \\r\\n
18
+ [FIELDS]: comma-separated list of CSV fields (in case of missing CSV header)
19
+ """
20
+
21
+
22
+ class CsvfileWriter(AbstractWriter):
23
+ fp = None
24
+
25
+ def __init__(self, path, fields=None, exclude=None, lineterminator=None, **kwargs):
26
+ if path in (None, "", "-"):
27
+ self.fp = sys.stdout
28
+ else:
29
+ self.fp = open(path, "w", newline="")
30
+ self.lineterminator = lineterminator or "\r\n"
31
+ for r, n in ((r"\r", "\r"), (r"\n", "\n"), (r"\t", "\t")):
32
+ self.lineterminator = self.lineterminator.replace(r, n)
33
+ self.desc = None
34
+ self.writer = None
35
+ self.fields = fields
36
+ self.exclude = exclude
37
+ if isinstance(self.fields, str):
38
+ self.fields = self.fields.split(",")
39
+ if isinstance(self.exclude, str):
40
+ self.exclude = self.exclude.split(",")
41
+
42
+ def write(self, r):
43
+ rdict = r._asdict(fields=self.fields, exclude=self.exclude)
44
+ if not self.desc or self.desc != r._desc:
45
+ self.desc = r._desc
46
+ self.writer = csv.DictWriter(self.fp, rdict, lineterminator=self.lineterminator)
47
+ self.writer.writeheader()
48
+ self.writer.writerow(rdict)
49
+
50
+ def flush(self):
51
+ if self.fp:
52
+ self.fp.flush()
53
+
54
+ def close(self):
55
+ if self.fp and not is_stdout(self.fp):
56
+ self.fp.close()
57
+ self.fp = None
58
+
59
+
60
+ class CsvfileReader(AbstractReader):
61
+ fp = None
62
+
63
+ def __init__(self, path, selector=None, fields=None, **kwargs):
64
+ self.selector = make_selector(selector)
65
+ if path in (None, "", "-"):
66
+ self.fp = sys.stdin
67
+ else:
68
+ self.fp = open(path, "r", newline="")
69
+ self.reader = csv.reader(self.fp)
70
+
71
+ if isinstance(fields, str):
72
+ # parse fields from fields argument (comma-separated string)
73
+ self.fields = fields.split(",")
74
+ else:
75
+ # parse fields from first CSV row
76
+ self.fields = next(self.reader)
77
+
78
+ # Create RecordDescriptor from fields
79
+ self.desc = RecordDescriptor("csv/reader", [("string", col) for col in self.fields])
80
+
81
+ def close(self):
82
+ if self.fp:
83
+ self.fp.close()
84
+ self.fp = None
85
+
86
+ def __iter__(self):
87
+ for row in self.reader:
88
+ record = self.desc(*row)
89
+ if not self.selector or self.selector.match(record):
90
+ yield record
@@ -1,10 +1,11 @@
1
1
  import json
2
+
2
3
  from flow import record
3
4
  from flow.record import JsonRecordPacker
4
- from flow.record.utils import is_stdout
5
- from flow.record.selector import make_selector
6
- from flow.record.adapter import AbstractWriter, AbstractReader
5
+ from flow.record.adapter import AbstractReader, AbstractWriter
7
6
  from flow.record.fieldtypes import fieldtype_for_value
7
+ from flow.record.selector import make_selector
8
+ from flow.record.utils import is_stdout
8
9
 
9
10
  __usage__ = """
10
11
  JSON adapter
@@ -1,5 +1,5 @@
1
- from flow.record.adapter import AbstractWriter
2
1
  from flow.record import open_path
2
+ from flow.record.adapter import AbstractWriter
3
3
  from flow.record.utils import is_stdout
4
4
 
5
5
  __usage__ = """
@@ -31,7 +31,7 @@ class LineWriter(AbstractWriter):
31
31
  self.fp.write("--[ RECORD {} ]--\n".format(self.count).encode())
32
32
  if rdict:
33
33
  fmt = "{{:>{width}}} = {{}}\n".format(width=max(len(k) for k in rdict))
34
- for (key, value) in rdict.items():
34
+ for key, value in rdict.items():
35
35
  self.fp.write(fmt.format(key, value).encode())
36
36
 
37
37
  def flush(self):
@@ -1,8 +1,9 @@
1
1
  import bson
2
+ from pymongo import MongoClient
3
+
2
4
  from flow import record
3
5
  from flow.record.adapter import AbstractReader, AbstractWriter
4
6
  from flow.record.selector import make_selector
5
- from pymongo import MongoClient
6
7
 
7
8
  __usage__ = """
8
9
  MongoDB adapter
@@ -0,0 +1,74 @@
1
+ from pathlib import Path
2
+ from urllib.parse import urlparse
3
+
4
+ from flow.record.adapter import AbstractWriter
5
+ from flow.record.base import RecordWriter
6
+
7
+ DEFAULT_RECORD_COUNT = 1000
8
+ DEFAULT_SUFFIX_LENGTH = 2
9
+
10
+ __usage__ = f"""
11
+ Record split adapter, splits records into multiple destination files (writer only)
12
+ ---
13
+ Write usage: rdump -w split://[PATH]?count=[COUNT]&suffix-length=[SUFFIX-LENGTH]
14
+ [PATH]: output path or uri
15
+ [COUNT]: maximum record count per file (default: {DEFAULT_RECORD_COUNT})
16
+ [SUFFIX-LENGTH]: length of suffix (default: {DEFAULT_SUFFIX_LENGTH})
17
+ """
18
+
19
+
20
+ class SplitWriter(AbstractWriter):
21
+ writer = None
22
+
23
+ def __init__(self, path, **kwargs):
24
+ self.path = str(path)
25
+ self.kwargs = kwargs
26
+
27
+ self.written = 0
28
+ self.count = int(kwargs.get("count", DEFAULT_RECORD_COUNT))
29
+ self.suffix_length = int(kwargs.get("suffix-length", DEFAULT_SUFFIX_LENGTH))
30
+ self.file_count = 0
31
+
32
+ parsed = urlparse(self.path)
33
+ self.is_stdout = parsed.netloc in ("", "-") and parsed.path == ""
34
+
35
+ self.writer = RecordWriter(self._next_path(), **self.kwargs)
36
+
37
+ def _next_path(self):
38
+ if self.is_stdout:
39
+ return self.path
40
+
41
+ path = self.path
42
+ scheme = ""
43
+ sep = ""
44
+ if "://" in path:
45
+ scheme, sep, path = path.partition("://")
46
+
47
+ suffix = str(self.file_count).rjust(self.suffix_length, "0")
48
+ path = Path(path)
49
+ path = path.with_suffix(f".{suffix}{path.suffix}")
50
+
51
+ self.file_count += 1
52
+ return scheme + sep + str(path)
53
+
54
+ def write(self, r):
55
+ self.writer.write(r)
56
+
57
+ if self.is_stdout:
58
+ return
59
+
60
+ self.written += 1
61
+ if self.written >= self.count:
62
+ self.flush()
63
+ self.close()
64
+ self.written = 0
65
+ self.writer = RecordWriter(self._next_path(), **self.kwargs)
66
+
67
+ def flush(self):
68
+ if self.writer:
69
+ self.writer.flush()
70
+
71
+ def close(self):
72
+ if self.writer:
73
+ self.writer.close()
74
+ self.writer = None
@@ -1,8 +1,8 @@
1
- import socket
2
1
  import logging
2
+ import socket
3
3
 
4
4
  from flow.record.adapter import AbstractReader, AbstractWriter
5
- from flow.record.utils import to_str, to_bytes, to_base64
5
+ from flow.record.utils import to_base64, to_bytes, to_str
6
6
 
7
7
  __usage__ = """
8
8
  Splunk output adapter (writer only)
@@ -1,9 +1,9 @@
1
1
  import openpyxl
2
2
 
3
3
  from flow import record
4
- from flow.record.utils import is_stdout
4
+ from flow.record.adapter import AbstractReader, AbstractWriter
5
5
  from flow.record.selector import make_selector
6
- from flow.record.adapter import AbstractWriter, AbstractReader
6
+ from flow.record.utils import is_stdout
7
7
 
8
8
  __usage__ = """
9
9
  Microsoft Excel spreadsheet adapter