flow.record 3.10.dev1__tar.gz → 3.10.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {flow.record-3.10.dev1/flow.record.egg-info → flow.record-3.10.dev3}/PKG-INFO +3 -1
  2. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/avro.py +9 -4
  3. flow.record-3.10.dev3/flow/record/version.py +4 -0
  4. {flow.record-3.10.dev1 → flow.record-3.10.dev3/flow.record.egg-info}/PKG-INFO +3 -1
  5. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow.record.egg-info/SOURCES.txt +2 -0
  6. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow.record.egg-info/requires.txt +8 -0
  7. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/pyproject.toml +8 -0
  8. flow.record-3.10.dev3/tests/_utils.py +36 -0
  9. flow.record-3.10.dev3/tests/test_avro_adapter.py +51 -0
  10. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_json_record_adapter.py +3 -22
  11. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_record_adapter.py +7 -19
  12. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tox.ini +1 -0
  13. flow.record-3.10.dev1/flow/record/version.py +0 -4
  14. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/COPYRIGHT +0 -0
  15. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/LICENSE +0 -0
  16. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/MANIFEST.in +0 -0
  17. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/README.md +0 -0
  18. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/examples/filesystem.py +0 -0
  19. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/examples/passivedns.py +0 -0
  20. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/examples/records.json +0 -0
  21. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/examples/tcpconn.py +0 -0
  22. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/__init__.py +0 -0
  23. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/__init__.py +0 -0
  24. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/archive.py +0 -0
  25. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/broker.py +0 -0
  26. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/csvfile.py +0 -0
  27. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/elastic.py +0 -0
  28. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/jsonfile.py +0 -0
  29. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/line.py +0 -0
  30. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/mongo.py +0 -0
  31. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/split.py +0 -0
  32. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/splunk.py +0 -0
  33. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/stream.py +0 -0
  34. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/text.py +0 -0
  35. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/adapter/xlsx.py +0 -0
  36. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/base.py +0 -0
  37. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/exceptions.py +0 -0
  38. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/fieldtypes/__init__.py +0 -0
  39. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/fieldtypes/credential.py +0 -0
  40. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/fieldtypes/net/__init__.py +0 -0
  41. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/fieldtypes/net/ip.py +0 -0
  42. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/fieldtypes/net/ipv4.py +0 -0
  43. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/fieldtypes/net/tcp.py +0 -0
  44. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/fieldtypes/net/udp.py +0 -0
  45. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/jsonpacker.py +0 -0
  46. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/packer.py +0 -0
  47. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/selector.py +0 -0
  48. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/stream.py +0 -0
  49. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/tools/__init__.py +0 -0
  50. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/tools/geoip.py +0 -0
  51. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/tools/rdump.py +0 -0
  52. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/utils.py +0 -0
  53. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow/record/whitelist.py +0 -0
  54. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow.record.egg-info/dependency_links.txt +0 -0
  55. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow.record.egg-info/entry_points.txt +0 -0
  56. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/flow.record.egg-info/top_level.txt +0 -0
  57. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/setup.cfg +0 -0
  58. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/__init__.py +0 -0
  59. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/selector_explain_example.py +0 -0
  60. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/standalone_test.py +0 -0
  61. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_compiled_selector.py +0 -0
  62. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_deprecations.py +0 -0
  63. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_fieldtype_ip.py +0 -0
  64. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_fieldtypes.py +0 -0
  65. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_json_packer.py +0 -0
  66. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_multi_timestamp.py +0 -0
  67. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_packer.py +0 -0
  68. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_rdump.py +0 -0
  69. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_record.py +0 -0
  70. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_record_descriptor.py +0 -0
  71. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_regression.py +0 -0
  72. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_selector.py +0 -0
  73. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/test_splunk_adapter.py +0 -0
  74. {flow.record-3.10.dev1 → flow.record-3.10.dev3}/tests/utils_inspect.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.10.dev1
3
+ Version: 3.10.dev3
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -13,6 +13,8 @@ Description-Content-Type: text/markdown
13
13
  Provides-Extra: compression
14
14
  Provides-Extra: elastic
15
15
  Provides-Extra: geoip
16
+ Provides-Extra: avro
17
+ Provides-Extra: test
16
18
  License-File: LICENSE
17
19
  License-File: COPYRIGHT
18
20
 
@@ -71,8 +71,13 @@ class AvroWriter(AbstractWriter):
71
71
  self.writer.write(r._packdict())
72
72
 
73
73
  def flush(self):
74
- if self.writer:
75
- self.writer.flush()
74
+ if not self.writer:
75
+ self.writer = fastavro.write.Writer(
76
+ self.fp,
77
+ fastavro.parse_schema({"type": "record", "name": "empty"}),
78
+ codec=self.codec,
79
+ )
80
+ self.writer.flush()
76
81
 
77
82
  def close(self):
78
83
  if self.fp and not is_stdout(self.fp):
@@ -89,7 +94,7 @@ class AvroReader(AbstractReader):
89
94
  self.selector = make_selector(selector)
90
95
 
91
96
  self.reader = fastavro.reader(self.fp)
92
- self.schema = self.reader.schema
97
+ self.schema = self.reader.writer_schema
93
98
  if not self.schema:
94
99
  raise Exception("Missing Avro schema")
95
100
 
@@ -185,7 +190,7 @@ def avro_type_to_flow_type(ftype):
185
190
  return "{}[]".format(item_type)
186
191
  else:
187
192
  logical_type = t.get("logicalType")
188
- if logical_type and "time" in logical_type or "date" in logical_type:
193
+ if logical_type and ("time" in logical_type or "date" in logical_type):
189
194
  return "datetime"
190
195
 
191
196
  if t == "null":
@@ -0,0 +1,4 @@
1
+ # file generated by setuptools_scm
2
+ # don't change, don't track in version control
3
+ __version__ = version = '3.10.dev3'
4
+ __version_tuple__ = version_tuple = (3, 10, 'dev3')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.10.dev1
3
+ Version: 3.10.dev3
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -13,6 +13,8 @@ Description-Content-Type: text/markdown
13
13
  Provides-Extra: compression
14
14
  Provides-Extra: elastic
15
15
  Provides-Extra: geoip
16
+ Provides-Extra: avro
17
+ Provides-Extra: test
16
18
  License-File: LICENSE
17
19
  License-File: COPYRIGHT
18
20
 
@@ -49,8 +49,10 @@ flow/record/tools/__init__.py
49
49
  flow/record/tools/geoip.py
50
50
  flow/record/tools/rdump.py
51
51
  tests/__init__.py
52
+ tests/_utils.py
52
53
  tests/selector_explain_example.py
53
54
  tests/standalone_test.py
55
+ tests/test_avro_adapter.py
54
56
  tests/test_compiled_selector.py
55
57
  tests/test_deprecations.py
56
58
  tests/test_fieldtype_ip.py
@@ -1,5 +1,8 @@
1
1
  msgpack>=0.5.2
2
2
 
3
+ [avro]
4
+ fastavro[snappy]
5
+
3
6
  [compression]
4
7
  lz4
5
8
  zstandard
@@ -9,3 +12,8 @@ elasticsearch
9
12
 
10
13
  [geoip]
11
14
  maxminddb
15
+
16
+ [test]
17
+ lz4
18
+ zstandard
19
+ fastavro
@@ -36,6 +36,14 @@ elastic = [
36
36
  geoip = [
37
37
  "maxminddb",
38
38
  ]
39
+ avro = [
40
+ "fastavro[snappy]",
41
+ ]
42
+ test = [
43
+ "lz4",
44
+ "zstandard",
45
+ "fastavro",
46
+ ]
39
47
 
40
48
  [project.scripts]
41
49
  rdump = "flow.record.tools.rdump:main"
@@ -0,0 +1,36 @@
1
+ import datetime
2
+
3
+ from flow.record import RecordDescriptor
4
+
5
+
6
+ def generate_records(count=100):
7
+ TestRecordEmbedded = RecordDescriptor(
8
+ "test/embedded_record",
9
+ [
10
+ ("datetime", "dt"),
11
+ ],
12
+ )
13
+ TestRecord = RecordDescriptor(
14
+ "test/adapter",
15
+ [
16
+ ("uint32", "number"),
17
+ ("record", "record"),
18
+ ],
19
+ )
20
+
21
+ for i in range(count):
22
+ embedded = TestRecordEmbedded(datetime.datetime.utcnow())
23
+ yield TestRecord(number=i, record=embedded)
24
+
25
+
26
+ def generate_plain_records(count=100):
27
+ TestRecord = RecordDescriptor(
28
+ "test/adapter/plain",
29
+ [
30
+ ("uint32", "number"),
31
+ ("datetime", "dt"),
32
+ ],
33
+ )
34
+
35
+ for i in range(count):
36
+ yield TestRecord(number=i, dt=datetime.datetime.utcnow())
@@ -0,0 +1,51 @@
1
+ from flow.record import RecordReader, RecordWriter
2
+
3
+ from ._utils import generate_plain_records
4
+
5
+
6
+ def test_avro_adapter(tmpdir):
7
+ json_file = tmpdir.join("records.avro")
8
+ record_adapter_path = "avro://{}".format(json_file)
9
+ writer = RecordWriter(record_adapter_path)
10
+ nr_records = 1337
11
+
12
+ for record in generate_plain_records(nr_records):
13
+ writer.write(record)
14
+ writer.flush()
15
+
16
+ nr_received_records = 0
17
+ reader = RecordReader(record_adapter_path)
18
+ for _ in reader:
19
+ nr_received_records += 1
20
+
21
+ assert nr_records == nr_received_records
22
+
23
+
24
+ def test_avro_adapter_contextmanager(tmpdir):
25
+ json_file = tmpdir.join("records.avro")
26
+ record_adapter_path = "avro://{}".format(json_file)
27
+ with RecordWriter(record_adapter_path) as writer:
28
+ nr_records = 1337
29
+ for record in generate_plain_records(nr_records):
30
+ writer.write(record)
31
+
32
+ nr_received_records = 0
33
+ with RecordReader(record_adapter_path) as reader:
34
+ for _ in reader:
35
+ nr_received_records += 1
36
+
37
+ assert nr_records == nr_received_records
38
+
39
+
40
+ def test_avro_adapter_empty(tmpdir):
41
+ json_file = tmpdir.join("records.avro")
42
+ record_adapter_path = "avro://{}".format(json_file)
43
+ with RecordWriter(record_adapter_path):
44
+ pass
45
+
46
+ nr_received_records = 0
47
+ with RecordReader(record_adapter_path) as reader:
48
+ for _ in reader:
49
+ nr_received_records += 1
50
+
51
+ assert nr_received_records == 0
@@ -1,29 +1,10 @@
1
- import datetime
2
1
  import json
3
2
 
4
3
  import pytest
5
4
 
6
- from flow.record import RecordDescriptor, RecordReader, RecordWriter
7
-
8
-
9
- def generate_records(count=100):
10
- TestRecordEmbedded = RecordDescriptor(
11
- "test/embedded_record",
12
- [
13
- ("datetime", "dt"),
14
- ],
15
- )
16
- TestRecord = RecordDescriptor(
17
- "test/adapter",
18
- [
19
- ("uint32", "number"),
20
- ("record", "record"),
21
- ],
22
- )
23
-
24
- for i in range(count):
25
- embedded = TestRecordEmbedded(datetime.datetime.utcnow())
26
- yield TestRecord(number=i, record=embedded)
5
+ from flow.record import RecordReader, RecordWriter
6
+
7
+ from ._utils import generate_records
27
8
 
28
9
 
29
10
  def test_json_adapter(tmpdir):
@@ -1,4 +1,5 @@
1
1
  import datetime
2
+ import platform
2
3
  import sys
3
4
 
4
5
  import pytest
@@ -27,25 +28,7 @@ from flow.record.base import (
27
28
  )
28
29
  from flow.record.selector import CompiledSelector, Selector
29
30
 
30
-
31
- def generate_records(count=100):
32
- TestRecordEmbedded = RecordDescriptor(
33
- "test/embedded_record",
34
- [
35
- ("datetime", "dt"),
36
- ],
37
- )
38
- TestRecord = RecordDescriptor(
39
- "test/adapter",
40
- [
41
- ("uint32", "number"),
42
- ("record", "record"),
43
- ],
44
- )
45
-
46
- for i in range(count):
47
- embedded = TestRecordEmbedded(datetime.datetime.utcnow())
48
- yield TestRecord(number=i, record=embedded)
31
+ from ._utils import generate_records
49
32
 
50
33
 
51
34
  def test_stream_writer_reader():
@@ -85,6 +68,11 @@ def test_compressed_writer_reader(tmpdir, compression):
85
68
  if compression == "zstd" and not HAS_ZSTD:
86
69
  pytest.skip("zstandard module not installed")
87
70
 
71
+ if compression == "lz4" and platform.python_implementation() == "PyPy":
72
+ pytest.skip("lz4 module not supported on PyPy")
73
+ if compression == "zstd" and platform.python_implementation() == "PyPy":
74
+ pytest.skip("zstandard module not supported on PyPy")
75
+
88
76
  p = tmpdir.mkdir("{}-test".format(compression))
89
77
  path = str(p.join("test.records.{}".format(compression)))
90
78
 
@@ -15,6 +15,7 @@ deps =
15
15
  pytest
16
16
  pytest-cov
17
17
  coverage
18
+ extras = test
18
19
  commands =
19
20
  # Capturing output will fail on pypy, possibly due to this issue: https://github.com/pytest-dev/pytest/issues/5502
20
21
  pytest --basetemp="{envtmpdir}" {posargs:--color=yes --capture=no --cov=flow --cov-report=term-missing -v tests}
@@ -1,4 +0,0 @@
1
- # file generated by setuptools_scm
2
- # don't change, don't track in version control
3
- __version__ = version = '3.10.dev1'
4
- __version_tuple__ = version_tuple = (3, 10, 'dev1')
File without changes