flow.record 3.17.dev3__tar.gz → 3.17.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {flow_record-3.17.dev3/flow.record.egg-info → flow_record-3.17.dev5}/PKG-INFO +1 -1
  2. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/elastic.py +1 -1
  3. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/line.py +1 -1
  4. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/sqlite.py +1 -1
  5. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/text.py +1 -1
  6. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/xlsx.py +2 -2
  7. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/base.py +48 -37
  8. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/__init__.py +2 -27
  9. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/ipv4.py +0 -7
  10. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/jsonpacker.py +1 -5
  11. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/utils.py +18 -22
  12. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/version.py +2 -2
  13. {flow_record-3.17.dev3 → flow_record-3.17.dev5/flow.record.egg-info}/PKG-INFO +1 -1
  14. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow.record.egg-info/SOURCES.txt +2 -1
  15. flow_record-3.17.dev5/tests/test_adapter_line.py +29 -0
  16. flow_record-3.17.dev5/tests/test_adapter_text.py +28 -0
  17. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_fieldtypes.py +2 -9
  18. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_json_packer.py +20 -0
  19. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_record.py +25 -4
  20. flow_record-3.17.dev3/tests/utils_inspect.py +0 -58
  21. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/COPYRIGHT +0 -0
  22. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/LICENSE +0 -0
  23. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/MANIFEST.in +0 -0
  24. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/README.md +0 -0
  25. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/examples/filesystem.py +0 -0
  26. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/examples/passivedns.py +0 -0
  27. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/examples/records.json +0 -0
  28. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/examples/tcpconn.py +0 -0
  29. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/__init__.py +0 -0
  30. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/__init__.py +0 -0
  31. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/archive.py +0 -0
  32. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/avro.py +0 -0
  33. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/broker.py +0 -0
  34. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/csvfile.py +0 -0
  35. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/duckdb.py +0 -0
  36. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/jsonfile.py +0 -0
  37. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/mongo.py +0 -0
  38. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/split.py +0 -0
  39. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/splunk.py +0 -0
  40. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/adapter/stream.py +0 -0
  41. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/exceptions.py +0 -0
  42. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/credential.py +0 -0
  43. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/__init__.py +0 -0
  44. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/ip.py +0 -0
  45. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/tcp.py +0 -0
  46. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/udp.py +0 -0
  47. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/packer.py +0 -0
  48. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/selector.py +0 -0
  49. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/stream.py +0 -0
  50. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/tools/__init__.py +0 -0
  51. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/tools/geoip.py +0 -0
  52. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/tools/rdump.py +0 -0
  53. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow/record/whitelist.py +0 -0
  54. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow.record.egg-info/dependency_links.txt +0 -0
  55. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow.record.egg-info/entry_points.txt +0 -0
  56. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow.record.egg-info/requires.txt +0 -0
  57. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/flow.record.egg-info/top_level.txt +0 -0
  58. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/pyproject.toml +0 -0
  59. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/setup.cfg +0 -0
  60. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/__init__.py +0 -0
  61. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/_utils.py +0 -0
  62. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/docs/Makefile +0 -0
  63. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/docs/conf.py +0 -0
  64. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/docs/index.rst +0 -0
  65. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/selector_explain_example.py +0 -0
  66. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/standalone_test.py +0 -0
  67. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_avro.py +0 -0
  68. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_avro_adapter.py +0 -0
  69. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_compiled_selector.py +0 -0
  70. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_csv_adapter.py +0 -0
  71. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_deprecations.py +0 -0
  72. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_elastic_adapter.py +0 -0
  73. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_fieldtype_ip.py +0 -0
  74. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_json_record_adapter.py +0 -0
  75. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_multi_timestamp.py +0 -0
  76. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_packer.py +0 -0
  77. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_rdump.py +0 -0
  78. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_record_adapter.py +0 -0
  79. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_record_descriptor.py +0 -0
  80. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_regression.py +0 -0
  81. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_selector.py +0 -0
  82. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_splunk_adapter.py +0 -0
  83. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_sqlite_duckdb_adapter.py +0 -0
  84. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tests/test_xlsx_adapter.py +0 -0
  85. {flow_record-3.17.dev3 → flow_record-3.17.dev5}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.17.dev3
3
+ Version: 3.17.dev5
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -106,7 +106,7 @@ class ElasticWriter(AbstractWriter):
106
106
  }
107
107
 
108
108
  if self.hash_record:
109
- document["_id"] = hashlib.md5(document["_source"].encode()).hexdigest()
109
+ document["_id"] = hashlib.md5(document["_source"].encode(errors="surrogateescape")).hexdigest()
110
110
 
111
111
  return document
112
112
 
@@ -69,7 +69,7 @@ class LineWriter(AbstractWriter):
69
69
  for key, value in rdict.items():
70
70
  if rdict_types:
71
71
  key = f"{key} ({rdict_types[key]})"
72
- self.fp.write(fmt.format(key, value).encode())
72
+ self.fp.write(fmt.format(key, value).encode(errors="surrogateescape"))
73
73
 
74
74
  def flush(self) -> None:
75
75
  if self.fp:
@@ -187,7 +187,7 @@ class SqliteReader(AbstractReader):
187
187
  if value == 0:
188
188
  row[idx] = None
189
189
  elif isinstance(value, str):
190
- row[idx] = value.encode("utf-8")
190
+ row[idx] = value.encode(errors="surrogateescape")
191
191
  yield descriptor_cls.init_from_dict(dict(zip(fnames, row)))
192
192
 
193
193
  def __iter__(self) -> Iterator[Record]:
@@ -41,7 +41,7 @@ class TextWriter(AbstractWriter):
41
41
  buf = self.format_spec.format_map(DefaultMissing(rec._asdict()))
42
42
  else:
43
43
  buf = repr(rec)
44
- self.fp.write(buf.encode() + b"\n")
44
+ self.fp.write(buf.encode(errors="surrogateescape") + b"\n")
45
45
 
46
46
  # because stdout is usually line buffered we force flush here if wanted
47
47
  if self.auto_flush:
@@ -36,7 +36,7 @@ def sanitize_fieldvalues(values: Iterator[Any]) -> Iterator[Any]:
36
36
  elif isinstance(value, bytes):
37
37
  base64_encode = False
38
38
  try:
39
- new_value = 'b"' + value.decode() + '"'
39
+ new_value = 'b"' + value.decode(errors="surrogateescape") + '"'
40
40
  if ILLEGAL_CHARACTERS_RE.search(new_value):
41
41
  base64_encode = True
42
42
  else:
@@ -142,7 +142,7 @@ class XlsxReader(AbstractReader):
142
142
  if field_types[idx] == "bytes":
143
143
  if value[1] == '"': # If so, we know this is b""
144
144
  # Cut of the b" at the start and the trailing "
145
- value = value[2:-1].encode()
145
+ value = value[2:-1].encode(errors="surrogateescape")
146
146
  else:
147
147
  # If not, we know it is base64 encoded (so we cut of the starting 'base64:')
148
148
  value = b64decode(value[7:])
@@ -61,7 +61,7 @@ except ImportError:
61
61
 
62
62
  from collections import OrderedDict
63
63
 
64
- from .utils import to_native_str, to_str
64
+ from .utils import to_str
65
65
  from .whitelist import WHITELIST, WHITELIST_TREE
66
66
 
67
67
  log = logging.getLogger(__package__)
@@ -513,7 +513,7 @@ class RecordDescriptor:
513
513
  name, fields = parse_def(name)
514
514
 
515
515
  self.name = name
516
- self._field_tuples = tuple([(to_native_str(k), to_str(v)) for k, v in fields])
516
+ self._field_tuples = tuple([(to_str(k), to_str(v)) for k, v in fields])
517
517
  self.recordType = _generate_record_class(name, self._field_tuples)
518
518
  self.recordType._desc = self
519
519
 
@@ -523,12 +523,14 @@ class RecordDescriptor:
523
523
  """
524
524
  Get required fields mapping. eg:
525
525
 
526
- {
527
- "_source": RecordField("_source", "string"),
528
- "_classification": RecordField("_classification", "datetime"),
529
- "_generated": RecordField("_generated", "datetime"),
530
- "_version": RecordField("_version", "vaeint"),
531
- }
526
+ .. code-block:: text
527
+
528
+ {
529
+ "_source": RecordField("_source", "string"),
530
+ "_classification": RecordField("_classification", "datetime"),
531
+ "_generated": RecordField("_generated", "datetime"),
532
+ "_version": RecordField("_version", "vaeint"),
533
+ }
532
534
 
533
535
  Returns:
534
536
  Mapping of required fields
@@ -540,10 +542,12 @@ class RecordDescriptor:
540
542
  """
541
543
  Get fields mapping (without required fields). eg:
542
544
 
543
- {
544
- "foo": RecordField("foo", "string"),
545
- "bar": RecordField("bar", "varint"),
546
- }
545
+ .. code-block:: text
546
+
547
+ {
548
+ "foo": RecordField("foo", "string"),
549
+ "bar": RecordField("bar", "varint"),
550
+ }
547
551
 
548
552
  Returns:
549
553
  Mapping of Record fields
@@ -556,15 +560,17 @@ class RecordDescriptor:
556
560
  """
557
561
  Get all fields including required meta fields. eg:
558
562
 
559
- {
560
- "ts": RecordField("ts", "datetime"),
561
- "foo": RecordField("foo", "string"),
562
- "bar": RecordField("bar", "varint"),
563
- "_source": RecordField("_source", "string"),
564
- "_classification": RecordField("_classification", "datetime"),
565
- "_generated": RecordField("_generated", "datetime"),
566
- "_version": RecordField("_version", "varint"),
567
- }
563
+ .. code-block:: text
564
+
565
+ {
566
+ "ts": RecordField("ts", "datetime"),
567
+ "foo": RecordField("foo", "string"),
568
+ "bar": RecordField("bar", "varint"),
569
+ "_source": RecordField("_source", "string"),
570
+ "_classification": RecordField("_classification", "datetime"),
571
+ "_generated": RecordField("_generated", "datetime"),
572
+ "_version": RecordField("_version", "varint"),
573
+ }
568
574
 
569
575
  Returns:
570
576
  Mapping of all Record fields
@@ -591,18 +597,18 @@ class RecordDescriptor:
591
597
  return RecordFieldSet(field for field in self.fields.values() if field.typename == name)
592
598
 
593
599
  def __call__(self, *args, **kwargs) -> Record:
594
- """Create a new Record initialized with `args` and `kwargs`."""
600
+ """Create a new Record initialized with ``args`` and ``kwargs``."""
595
601
  return self.recordType(*args, **kwargs)
596
602
 
597
603
  def init_from_dict(self, rdict: dict[str, Any], raise_unknown=False) -> Record:
598
- """Create a new Record initialized with key, value pairs from `rdict`.
604
+ """Create a new Record initialized with key, value pairs from ``rdict``.
599
605
 
600
- If `raise_unknown=True` then fields on `rdict` that are unknown to this
606
+ If ``raise_unknown=True`` then fields on ``rdict`` that are unknown to this
601
607
  RecordDescriptor will raise a TypeError exception due to initializing
602
608
  with unknown keyword arguments. (default: False)
603
609
 
604
610
  Returns:
605
- Record with data from `rdict`
611
+ Record with data from ``rdict``
606
612
  """
607
613
 
608
614
  if not raise_unknown:
@@ -610,14 +616,14 @@ class RecordDescriptor:
610
616
  return self.recordType(**rdict)
611
617
 
612
618
  def init_from_record(self, record: Record, raise_unknown=False) -> Record:
613
- """Create a new Record initialized with data from another `record`.
619
+ """Create a new Record initialized with data from another ``record``.
614
620
 
615
- If `raise_unknown=True` then fields on `record` that are unknown to this
621
+ If ``raise_unknown=True`` then fields on ``record`` that are unknown to this
616
622
  RecordDescriptor will raise a TypeError exception due to initializing
617
623
  with unknown keyword arguments. (default: False)
618
624
 
619
625
  Returns:
620
- Record with data from `record`
626
+ Record with data from ``record``
621
627
  """
622
628
  return self.init_from_dict(record._asdict(), raise_unknown=raise_unknown)
623
629
 
@@ -633,7 +639,9 @@ class RecordDescriptor:
633
639
  def get_field_tuples(self) -> tuple[tuple[str, str]]:
634
640
  """Returns a tuple containing the (typename, name) tuples, eg:
635
641
 
636
- (('boolean', 'foo'), ('string', 'bar'))
642
+ .. code-block:: text
643
+
644
+ (('boolean', 'foo'), ('string', 'bar'))
637
645
 
638
646
  Returns:
639
647
  Tuple of (typename, name) tuples
@@ -676,7 +684,7 @@ class RecordDescriptor:
676
684
  def definition(self, reserved: bool = True) -> str:
677
685
  """Return the RecordDescriptor as Python definition string.
678
686
 
679
- If `reserved` is True it will also return the reserved fields.
687
+ If ``reserved`` is True it will also return the reserved fields.
680
688
 
681
689
  Returns:
682
690
  Descriptor definition string
@@ -769,7 +777,7 @@ def open_path(path: str, mode: str, clobber: bool = True) -> IO:
769
777
  Args:
770
778
  path: Filename or path to filename to open
771
779
  mode: Could be "r", "rb" to open file for reading, "w", "wb" for writing
772
- clobber: Overwrite file if it already exists if `clobber=True`, else raises IOError.
780
+ clobber: Overwrite file if it already exists if ``clobber=True``, else raises IOError.
773
781
 
774
782
  """
775
783
  binary = "b" in mode
@@ -1040,6 +1048,8 @@ def normalize_fieldname(field_name: str) -> str:
1040
1048
  This normalizes the name so it can still be used in flow.record.
1041
1049
  Reserved field_names are not normalized.
1042
1050
 
1051
+ .. code-block:: text
1052
+
1043
1053
  >>> normalize_fieldname("my-variable-name-with-dashes")
1044
1054
  'my_variable_name_with_dashes'
1045
1055
  >>> normalize_fieldname("_my_name_starting_with_underscore")
@@ -1100,25 +1110,26 @@ TimestampRecord = RecordDescriptor(
1100
1110
 
1101
1111
 
1102
1112
  def iter_timestamped_records(record: Record) -> Iterator[Record]:
1103
- """Yields timestamped annotated records for each `datetime` fieldtype in `record`.
1104
- If `record` does not have any `datetime` fields the original record is returned.
1113
+ """Yields timestamped annotated records for each ``datetime`` fieldtype in ``record``.
1114
+ If ``record`` does not have any ``datetime`` fields the original record is returned.
1105
1115
 
1106
1116
  Args:
1107
1117
  record: Record to add timestamp fields for.
1108
1118
 
1109
1119
  Yields:
1110
- Record annotated with `ts` and `ts_description` fields for each `datetime` fieldtype.
1120
+ Record annotated with ``ts`` and ``ts_description`` fields for each ``datetime`` fieldtype.
1111
1121
  """
1112
- # get all `datetime` fields. (excluding _generated).
1122
+
1123
+ # get all ``datetime`` fields. (excluding _generated).
1113
1124
  dt_fields = record._desc.getfields("datetime")
1114
1125
  if not dt_fields:
1115
1126
  yield record
1116
1127
  return
1117
1128
 
1118
- # yield a new record for each `datetime` field assigned as `ts`.
1129
+ # yield a new record for each ``datetime`` field assigned as ``ts``.
1119
1130
  record_name = record._desc.name
1120
1131
  for field in dt_fields:
1121
1132
  ts_record = TimestampRecord(getattr(record, field.name), field.name)
1122
- # we extend `ts_record` with original `record` so TSRecord info goes first.
1133
+ # we extend ``ts_record`` with original ``record`` so TSRecord info goes first.
1123
1134
  record = extend_record(ts_record, [record], name=record_name)
1124
1135
  yield record
@@ -28,7 +28,6 @@ except ImportError:
28
28
  from flow.record.base import FieldType
29
29
 
30
30
  RE_NORMALIZE_PATH = re.compile(r"[\\/]+")
31
- NATIVE_UNICODE = isinstance("", str)
32
31
 
33
32
  UTC = timezone.utc
34
33
 
@@ -207,10 +206,7 @@ class stringlist(list, FieldType):
207
206
  class string(string_type, FieldType):
208
207
  def __new__(cls, value):
209
208
  if isinstance(value, bytes_type):
210
- value = cls._decode(value, "utf-8")
211
- if isinstance(value, bytes_type):
212
- # Still bytes, so decoding failed (Python 2)
213
- return bytes(value)
209
+ value = value.decode(errors="surrogateescape")
214
210
  return super().__new__(cls, value)
215
211
 
216
212
  def _pack(self):
@@ -221,27 +217,6 @@ class string(string_type, FieldType):
221
217
  return defang(self)
222
218
  return str.__format__(self, spec)
223
219
 
224
- @classmethod
225
- def _decode(cls, data, encoding):
226
- """Decode a byte-string into a unicode-string.
227
-
228
- Python 3: When `data` contains invalid unicode characters a `UnicodeDecodeError` is raised.
229
- Python 2: When `data` contains invalid unicode characters the original byte-string is returned.
230
- """
231
- if NATIVE_UNICODE:
232
- # Raises exception on decode error
233
- return data.decode(encoding)
234
- try:
235
- return data.decode(encoding)
236
- except UnicodeDecodeError:
237
- # Fallback to bytes (Python 2 only)
238
- preview = data[:16].encode("hex_codec") + (".." if len(data) > 16 else "")
239
- warnings.warn(
240
- "Got binary data in string field (hex: {}). Compatibility is not guaranteed.".format(preview),
241
- RuntimeWarning,
242
- )
243
- return data
244
-
245
220
 
246
221
  # Alias for backwards compatibility
247
222
  wstring = string
@@ -278,7 +253,7 @@ class datetime(_dt, FieldType):
278
253
  if len(args) == 1 and not kwargs:
279
254
  arg = args[0]
280
255
  if isinstance(arg, bytes_type):
281
- arg = arg.decode("utf-8")
256
+ arg = arg.decode(errors="surrogateescape")
282
257
  if isinstance(arg, string_type):
283
258
  # If we are on Python 3.11 or newer, we can use fromisoformat() to parse the string (fast path)
284
259
  #
@@ -3,7 +3,6 @@ import struct
3
3
  import warnings
4
4
 
5
5
  from flow.record import FieldType
6
- from flow.record.utils import to_native_str
7
6
 
8
7
 
9
8
  def addr_long(s):
@@ -45,9 +44,6 @@ class subnet(FieldType):
45
44
  DeprecationWarning,
46
45
  stacklevel=5,
47
46
  )
48
- if isinstance(addr, type("")):
49
- addr = to_native_str(addr)
50
-
51
47
  if not isinstance(addr, str):
52
48
  raise TypeError("Subnet() argument 1 must be string, not {}".format(type(addr).__name__))
53
49
 
@@ -67,9 +63,6 @@ class subnet(FieldType):
67
63
  if addr is None:
68
64
  return False
69
65
 
70
- if isinstance(addr, type("")):
71
- addr = to_native_str(addr)
72
-
73
66
  if isinstance(addr, str):
74
67
  addr = addr_long(addr)
75
68
 
@@ -47,12 +47,8 @@ class JsonRecordPacker:
47
47
  serial["_recorddescriptor"] = obj._desc.identifier
48
48
 
49
49
  for field_type, field_name in obj._desc.get_field_tuples():
50
- # PYTHON2: Because "bytes" are also "str" we have to handle this here
51
- if field_type == "bytes" and isinstance(serial[field_name], str):
52
- serial[field_name] = base64.b64encode(serial[field_name]).decode()
53
-
54
50
  # Boolean field types should be cast to a bool instead of staying ints
55
- elif field_type == "boolean" and isinstance(serial[field_name], int):
51
+ if field_type == "boolean" and isinstance(serial[field_name], int):
56
52
  serial[field_name] = bool(serial[field_name])
57
53
 
58
54
  return serial
@@ -3,13 +3,10 @@ from __future__ import annotations
3
3
  import base64
4
4
  import os
5
5
  import sys
6
+ import warnings
6
7
  from functools import wraps
7
8
  from typing import BinaryIO, TextIO
8
9
 
9
- _native = str
10
- _unicode = type("")
11
- _bytes = type(b"")
12
-
13
10
 
14
11
  def get_stdout(binary: bool = False) -> TextIO | BinaryIO:
15
12
  """Return the stdout stream as binary or text stream.
@@ -50,33 +47,32 @@ def is_stdout(fp: TextIO | BinaryIO) -> bool:
50
47
 
51
48
  def to_bytes(value):
52
49
  """Convert a value to a byte string."""
53
- if value is None or isinstance(value, _bytes):
50
+ if value is None or isinstance(value, bytes):
54
51
  return value
55
- if isinstance(value, _unicode):
56
- return value.encode("utf-8")
57
- return _bytes(value)
52
+ if isinstance(value, str):
53
+ return value.encode(errors="surrogateescape")
54
+ return bytes(value)
58
55
 
59
56
 
60
57
  def to_str(value):
61
58
  """Convert a value to a unicode string."""
62
- if value is None or isinstance(value, _unicode):
59
+ if value is None or isinstance(value, str):
63
60
  return value
64
- if isinstance(value, _bytes):
65
- return value.decode("utf-8")
66
- return _unicode(value)
61
+ if isinstance(value, bytes):
62
+ return value.decode(errors="surrogateescape")
63
+ return str(value)
67
64
 
68
65
 
69
66
  def to_native_str(value):
70
- """Convert a value to a native `str`."""
71
- if value is None or isinstance(value, _native):
72
- return value
73
- if isinstance(value, _unicode):
74
- # Python 2: unicode -> str
75
- return value.encode("utf-8")
76
- if isinstance(value, _bytes):
77
- # Python 3: bytes -> str
78
- return value.decode("utf-8")
79
- return _native(value)
67
+ warnings.warn(
68
+ (
69
+ "The to_native_str() function is deprecated, "
70
+ "this function will be removed in flow.record 3.20, "
71
+ "use to_str() instead"
72
+ ),
73
+ DeprecationWarning,
74
+ )
75
+ return to_str(value)
80
76
 
81
77
 
82
78
  def to_base64(value):
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.17.dev3'
16
- __version_tuple__ = version_tuple = (3, 17, 'dev3')
15
+ __version__ = version = '3.17.dev5'
16
+ __version_tuple__ = version_tuple = (3, 17, 'dev5')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.17.dev3
3
+ Version: 3.17.dev5
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -54,6 +54,8 @@ tests/__init__.py
54
54
  tests/_utils.py
55
55
  tests/selector_explain_example.py
56
56
  tests/standalone_test.py
57
+ tests/test_adapter_line.py
58
+ tests/test_adapter_text.py
57
59
  tests/test_avro.py
58
60
  tests/test_avro_adapter.py
59
61
  tests/test_compiled_selector.py
@@ -75,7 +77,6 @@ tests/test_selector.py
75
77
  tests/test_splunk_adapter.py
76
78
  tests/test_sqlite_duckdb_adapter.py
77
79
  tests/test_xlsx_adapter.py
78
- tests/utils_inspect.py
79
80
  tests/docs/Makefile
80
81
  tests/docs/conf.py
81
82
  tests/docs/index.rst
@@ -0,0 +1,29 @@
1
+ from io import BytesIO
2
+
3
+ from flow.record import RecordDescriptor
4
+ from flow.record.adapter.line import LineWriter
5
+
6
+
7
+ def test_line_writer_write_surrogateescape():
8
+ output = BytesIO()
9
+
10
+ lw = LineWriter(
11
+ path=output,
12
+ fields="name",
13
+ )
14
+
15
+ TestRecord = RecordDescriptor(
16
+ "test/string",
17
+ [
18
+ ("string", "name"),
19
+ ],
20
+ )
21
+
22
+ # construct from 'bytes' but with invalid unicode bytes
23
+ record = TestRecord(b"R\xc3\xa9\xeamy")
24
+ lw.write(record)
25
+
26
+ output.seek(0)
27
+ data = output.read()
28
+
29
+ assert data == b"--[ RECORD 1 ]--\nname = R\xc3\xa9\xeamy\n"
@@ -0,0 +1,28 @@
1
+ from io import BytesIO
2
+
3
+ from flow.record import RecordDescriptor
4
+ from flow.record.adapter.text import TextWriter
5
+
6
+
7
+ def test_text_writer_write_surrogateescape():
8
+ output = BytesIO()
9
+
10
+ tw = TextWriter(
11
+ path=output,
12
+ )
13
+
14
+ TestRecord = RecordDescriptor(
15
+ "test/string",
16
+ [
17
+ ("string", "name"),
18
+ ],
19
+ )
20
+
21
+ # construct from 'bytes' but with invalid unicode bytes
22
+ record = TestRecord(b"R\xc3\xa9\xeamy")
23
+ tw.write(record)
24
+
25
+ output.seek(0)
26
+ data = output.read()
27
+
28
+ assert data == b"<test/string name='R\xc3\xa9\\udceamy'>\n"
@@ -213,15 +213,8 @@ def test_string():
213
213
  assert r.name == "Rémy"
214
214
 
215
215
  # construct from 'bytes' but with invalid unicode bytes
216
- if isinstance("", str):
217
- # Python 3
218
- with pytest.raises(UnicodeDecodeError):
219
- TestRecord(b"R\xc3\xa9\xeamy")
220
- else:
221
- # Python 2
222
- with pytest.warns(RuntimeWarning):
223
- r = TestRecord(b"R\xc3\xa9\xeamy")
224
- assert r.name
216
+ r = TestRecord(b"R\xc3\xa9\xeamy")
217
+ assert r.name == "Ré\udceamy"
225
218
 
226
219
 
227
220
  def test_wstring():
@@ -90,3 +90,23 @@ def test_record_pack_bool_regression() -> None:
90
90
 
91
91
  # pack the json string back to a record and make sure it is the same as before
92
92
  assert packer.unpack(data) == record
93
+
94
+
95
+ def test_record_pack_surrogateescape() -> None:
96
+ TestRecord = RecordDescriptor(
97
+ "test/string",
98
+ [
99
+ ("string", "name"),
100
+ ],
101
+ )
102
+
103
+ record = TestRecord(b"R\xc3\xa9\xeamy")
104
+ packer = JsonRecordPacker()
105
+
106
+ data = packer.pack(record)
107
+
108
+ # pack to json string and check if the 3rd and 4th byte are properly surrogate escaped
109
+ assert data.startswith('{"name": "R\\u00e9\\udceamy",')
110
+
111
+ # pack the json string back to a record and make sure it is the same as before
112
+ assert packer.unpack(data) == record
@@ -1,4 +1,5 @@
1
1
  import importlib
2
+ import inspect
2
3
  import os
3
4
  import sys
4
5
  from unittest.mock import patch
@@ -27,8 +28,6 @@ from flow.record.base import (
27
28
  from flow.record.exceptions import RecordDescriptorError
28
29
  from flow.record.stream import RecordFieldRewriter
29
30
 
30
- from . import utils_inspect as inspect
31
-
32
31
 
33
32
  def test_record_creation():
34
33
  TestRecord = RecordDescriptor(
@@ -288,8 +287,30 @@ def test_record_printer_stdout(capsys):
288
287
  writer.write(record)
289
288
 
290
289
  out, err = capsys.readouterr()
291
- modifier = "" if isinstance("", str) else "u"
292
- expected = "<test/a a_string={u}'hello' common={u}'world' a_count=10>\n".format(u=modifier)
290
+ expected = "<test/a a_string='hello' common='world' a_count=10>\n"
291
+ assert out == expected
292
+
293
+
294
+ def test_record_printer_stdout_surrogateescape(capsys):
295
+ Record = RecordDescriptor(
296
+ "test/a",
297
+ [
298
+ ("string", "name"),
299
+ ],
300
+ )
301
+ record = Record(b"R\xc3\xa9\xeamy")
302
+
303
+ # fake capsys to be a tty.
304
+ def isatty():
305
+ return True
306
+
307
+ capsys._capture.out.tmpfile.isatty = isatty
308
+
309
+ writer = RecordPrinter(getattr(sys.stdout, "buffer", sys.stdout))
310
+ writer.write(record)
311
+
312
+ out, err = capsys.readouterr()
313
+ expected = "<test/a name='Ré\\udceamy'>\n"
293
314
  assert out == expected
294
315
 
295
316
 
@@ -1,58 +0,0 @@
1
- """
2
- Backport of `inspect.signature` for Python 2.
3
-
4
- Based on: https://github.com/python/cpython/blob/3.7/Lib/inspect.py
5
- """
6
-
7
- import collections
8
- import inspect
9
-
10
-
11
- class _empty:
12
- pass
13
-
14
-
15
- class Parameter:
16
- POSITIONAL_ONLY = 0
17
- POSITIONAL_OR_KEYWORD = 1
18
- VAR_POSITIONAL = 2
19
- KEYWORD_ONLY = 3
20
- VAR_KEYWORD = 4
21
-
22
- empty = _empty
23
-
24
- def __init__(self, name, kind, default=_empty):
25
- self.name = name
26
- self.kind = kind
27
- self.default = default
28
-
29
-
30
- class Signature:
31
- empty = _empty
32
-
33
- def __init__(self, parameters=None):
34
- self.parameters = parameters
35
-
36
-
37
- def signature(obj):
38
- try:
39
- # Python 3
40
- return inspect.signature(obj)
41
- except AttributeError:
42
- # Python 2
43
- spec = inspect.getargspec(obj)
44
-
45
- # Create parameter objects which are compatible with python 3 objects
46
- parameters = collections.OrderedDict()
47
- for i in range(0, len(spec.args)):
48
- arg = spec.args[i]
49
- default = _empty
50
- if spec.defaults and (len(spec.args) - i <= len(spec.defaults)):
51
- default = spec.defaults[i - len(spec.args)]
52
- parameters[arg] = Parameter(name=arg, default=default, kind=Parameter.POSITIONAL_OR_KEYWORD)
53
- if spec.varargs:
54
- parameters[spec.varargs] = Parameter(name=spec.varargs, kind=Parameter.VAR_POSITIONAL)
55
- if spec.keywords:
56
- parameters[spec.keywords] = Parameter(name=spec.keywords, kind=Parameter.VAR_KEYWORD)
57
-
58
- return Signature(parameters=parameters)
File without changes
File without changes