flow.record 3.17.dev4__tar.gz → 3.17.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {flow_record-3.17.dev4/flow.record.egg-info → flow_record-3.17.dev5}/PKG-INFO +1 -1
  2. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/elastic.py +1 -1
  3. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/line.py +1 -1
  4. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/sqlite.py +1 -1
  5. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/text.py +1 -1
  6. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/xlsx.py +2 -2
  7. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/base.py +2 -2
  8. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/__init__.py +2 -27
  9. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/ipv4.py +0 -7
  10. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/jsonpacker.py +1 -5
  11. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/utils.py +18 -22
  12. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/version.py +2 -2
  13. {flow_record-3.17.dev4 → flow_record-3.17.dev5/flow.record.egg-info}/PKG-INFO +1 -1
  14. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow.record.egg-info/SOURCES.txt +2 -1
  15. flow_record-3.17.dev5/tests/test_adapter_line.py +29 -0
  16. flow_record-3.17.dev5/tests/test_adapter_text.py +28 -0
  17. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_fieldtypes.py +2 -9
  18. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_json_packer.py +20 -0
  19. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_record.py +25 -4
  20. flow_record-3.17.dev4/tests/utils_inspect.py +0 -58
  21. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/COPYRIGHT +0 -0
  22. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/LICENSE +0 -0
  23. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/MANIFEST.in +0 -0
  24. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/README.md +0 -0
  25. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/examples/filesystem.py +0 -0
  26. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/examples/passivedns.py +0 -0
  27. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/examples/records.json +0 -0
  28. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/examples/tcpconn.py +0 -0
  29. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/__init__.py +0 -0
  30. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/__init__.py +0 -0
  31. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/archive.py +0 -0
  32. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/avro.py +0 -0
  33. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/broker.py +0 -0
  34. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/csvfile.py +0 -0
  35. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/duckdb.py +0 -0
  36. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/jsonfile.py +0 -0
  37. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/mongo.py +0 -0
  38. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/split.py +0 -0
  39. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/splunk.py +0 -0
  40. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/adapter/stream.py +0 -0
  41. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/exceptions.py +0 -0
  42. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/credential.py +0 -0
  43. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/__init__.py +0 -0
  44. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/ip.py +0 -0
  45. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/tcp.py +0 -0
  46. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/fieldtypes/net/udp.py +0 -0
  47. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/packer.py +0 -0
  48. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/selector.py +0 -0
  49. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/stream.py +0 -0
  50. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/tools/__init__.py +0 -0
  51. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/tools/geoip.py +0 -0
  52. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/tools/rdump.py +0 -0
  53. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow/record/whitelist.py +0 -0
  54. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow.record.egg-info/dependency_links.txt +0 -0
  55. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow.record.egg-info/entry_points.txt +0 -0
  56. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow.record.egg-info/requires.txt +0 -0
  57. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/flow.record.egg-info/top_level.txt +0 -0
  58. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/pyproject.toml +0 -0
  59. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/setup.cfg +0 -0
  60. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/__init__.py +0 -0
  61. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/_utils.py +0 -0
  62. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/docs/Makefile +0 -0
  63. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/docs/conf.py +0 -0
  64. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/docs/index.rst +0 -0
  65. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/selector_explain_example.py +0 -0
  66. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/standalone_test.py +0 -0
  67. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_avro.py +0 -0
  68. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_avro_adapter.py +0 -0
  69. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_compiled_selector.py +0 -0
  70. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_csv_adapter.py +0 -0
  71. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_deprecations.py +0 -0
  72. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_elastic_adapter.py +0 -0
  73. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_fieldtype_ip.py +0 -0
  74. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_json_record_adapter.py +0 -0
  75. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_multi_timestamp.py +0 -0
  76. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_packer.py +0 -0
  77. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_rdump.py +0 -0
  78. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_record_adapter.py +0 -0
  79. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_record_descriptor.py +0 -0
  80. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_regression.py +0 -0
  81. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_selector.py +0 -0
  82. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_splunk_adapter.py +0 -0
  83. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_sqlite_duckdb_adapter.py +0 -0
  84. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tests/test_xlsx_adapter.py +0 -0
  85. {flow_record-3.17.dev4 → flow_record-3.17.dev5}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.17.dev4
3
+ Version: 3.17.dev5
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -106,7 +106,7 @@ class ElasticWriter(AbstractWriter):
106
106
  }
107
107
 
108
108
  if self.hash_record:
109
- document["_id"] = hashlib.md5(document["_source"].encode()).hexdigest()
109
+ document["_id"] = hashlib.md5(document["_source"].encode(errors="surrogateescape")).hexdigest()
110
110
 
111
111
  return document
112
112
 
@@ -69,7 +69,7 @@ class LineWriter(AbstractWriter):
69
69
  for key, value in rdict.items():
70
70
  if rdict_types:
71
71
  key = f"{key} ({rdict_types[key]})"
72
- self.fp.write(fmt.format(key, value).encode())
72
+ self.fp.write(fmt.format(key, value).encode(errors="surrogateescape"))
73
73
 
74
74
  def flush(self) -> None:
75
75
  if self.fp:
@@ -187,7 +187,7 @@ class SqliteReader(AbstractReader):
187
187
  if value == 0:
188
188
  row[idx] = None
189
189
  elif isinstance(value, str):
190
- row[idx] = value.encode("utf-8")
190
+ row[idx] = value.encode(errors="surrogateescape")
191
191
  yield descriptor_cls.init_from_dict(dict(zip(fnames, row)))
192
192
 
193
193
  def __iter__(self) -> Iterator[Record]:
@@ -41,7 +41,7 @@ class TextWriter(AbstractWriter):
41
41
  buf = self.format_spec.format_map(DefaultMissing(rec._asdict()))
42
42
  else:
43
43
  buf = repr(rec)
44
- self.fp.write(buf.encode() + b"\n")
44
+ self.fp.write(buf.encode(errors="surrogateescape") + b"\n")
45
45
 
46
46
  # because stdout is usually line buffered we force flush here if wanted
47
47
  if self.auto_flush:
@@ -36,7 +36,7 @@ def sanitize_fieldvalues(values: Iterator[Any]) -> Iterator[Any]:
36
36
  elif isinstance(value, bytes):
37
37
  base64_encode = False
38
38
  try:
39
- new_value = 'b"' + value.decode() + '"'
39
+ new_value = 'b"' + value.decode(errors="surrogateescape") + '"'
40
40
  if ILLEGAL_CHARACTERS_RE.search(new_value):
41
41
  base64_encode = True
42
42
  else:
@@ -142,7 +142,7 @@ class XlsxReader(AbstractReader):
142
142
  if field_types[idx] == "bytes":
143
143
  if value[1] == '"': # If so, we know this is b""
144
144
  # Cut of the b" at the start and the trailing "
145
- value = value[2:-1].encode()
145
+ value = value[2:-1].encode(errors="surrogateescape")
146
146
  else:
147
147
  # If not, we know it is base64 encoded (so we cut of the starting 'base64:')
148
148
  value = b64decode(value[7:])
@@ -61,7 +61,7 @@ except ImportError:
61
61
 
62
62
  from collections import OrderedDict
63
63
 
64
- from .utils import to_native_str, to_str
64
+ from .utils import to_str
65
65
  from .whitelist import WHITELIST, WHITELIST_TREE
66
66
 
67
67
  log = logging.getLogger(__package__)
@@ -513,7 +513,7 @@ class RecordDescriptor:
513
513
  name, fields = parse_def(name)
514
514
 
515
515
  self.name = name
516
- self._field_tuples = tuple([(to_native_str(k), to_str(v)) for k, v in fields])
516
+ self._field_tuples = tuple([(to_str(k), to_str(v)) for k, v in fields])
517
517
  self.recordType = _generate_record_class(name, self._field_tuples)
518
518
  self.recordType._desc = self
519
519
 
@@ -28,7 +28,6 @@ except ImportError:
28
28
  from flow.record.base import FieldType
29
29
 
30
30
  RE_NORMALIZE_PATH = re.compile(r"[\\/]+")
31
- NATIVE_UNICODE = isinstance("", str)
32
31
 
33
32
  UTC = timezone.utc
34
33
 
@@ -207,10 +206,7 @@ class stringlist(list, FieldType):
207
206
  class string(string_type, FieldType):
208
207
  def __new__(cls, value):
209
208
  if isinstance(value, bytes_type):
210
- value = cls._decode(value, "utf-8")
211
- if isinstance(value, bytes_type):
212
- # Still bytes, so decoding failed (Python 2)
213
- return bytes(value)
209
+ value = value.decode(errors="surrogateescape")
214
210
  return super().__new__(cls, value)
215
211
 
216
212
  def _pack(self):
@@ -221,27 +217,6 @@ class string(string_type, FieldType):
221
217
  return defang(self)
222
218
  return str.__format__(self, spec)
223
219
 
224
- @classmethod
225
- def _decode(cls, data, encoding):
226
- """Decode a byte-string into a unicode-string.
227
-
228
- Python 3: When `data` contains invalid unicode characters a `UnicodeDecodeError` is raised.
229
- Python 2: When `data` contains invalid unicode characters the original byte-string is returned.
230
- """
231
- if NATIVE_UNICODE:
232
- # Raises exception on decode error
233
- return data.decode(encoding)
234
- try:
235
- return data.decode(encoding)
236
- except UnicodeDecodeError:
237
- # Fallback to bytes (Python 2 only)
238
- preview = data[:16].encode("hex_codec") + (".." if len(data) > 16 else "")
239
- warnings.warn(
240
- "Got binary data in string field (hex: {}). Compatibility is not guaranteed.".format(preview),
241
- RuntimeWarning,
242
- )
243
- return data
244
-
245
220
 
246
221
  # Alias for backwards compatibility
247
222
  wstring = string
@@ -278,7 +253,7 @@ class datetime(_dt, FieldType):
278
253
  if len(args) == 1 and not kwargs:
279
254
  arg = args[0]
280
255
  if isinstance(arg, bytes_type):
281
- arg = arg.decode("utf-8")
256
+ arg = arg.decode(errors="surrogateescape")
282
257
  if isinstance(arg, string_type):
283
258
  # If we are on Python 3.11 or newer, we can use fromisoformat() to parse the string (fast path)
284
259
  #
@@ -3,7 +3,6 @@ import struct
3
3
  import warnings
4
4
 
5
5
  from flow.record import FieldType
6
- from flow.record.utils import to_native_str
7
6
 
8
7
 
9
8
  def addr_long(s):
@@ -45,9 +44,6 @@ class subnet(FieldType):
45
44
  DeprecationWarning,
46
45
  stacklevel=5,
47
46
  )
48
- if isinstance(addr, type("")):
49
- addr = to_native_str(addr)
50
-
51
47
  if not isinstance(addr, str):
52
48
  raise TypeError("Subnet() argument 1 must be string, not {}".format(type(addr).__name__))
53
49
 
@@ -67,9 +63,6 @@ class subnet(FieldType):
67
63
  if addr is None:
68
64
  return False
69
65
 
70
- if isinstance(addr, type("")):
71
- addr = to_native_str(addr)
72
-
73
66
  if isinstance(addr, str):
74
67
  addr = addr_long(addr)
75
68
 
@@ -47,12 +47,8 @@ class JsonRecordPacker:
47
47
  serial["_recorddescriptor"] = obj._desc.identifier
48
48
 
49
49
  for field_type, field_name in obj._desc.get_field_tuples():
50
- # PYTHON2: Because "bytes" are also "str" we have to handle this here
51
- if field_type == "bytes" and isinstance(serial[field_name], str):
52
- serial[field_name] = base64.b64encode(serial[field_name]).decode()
53
-
54
50
  # Boolean field types should be cast to a bool instead of staying ints
55
- elif field_type == "boolean" and isinstance(serial[field_name], int):
51
+ if field_type == "boolean" and isinstance(serial[field_name], int):
56
52
  serial[field_name] = bool(serial[field_name])
57
53
 
58
54
  return serial
@@ -3,13 +3,10 @@ from __future__ import annotations
3
3
  import base64
4
4
  import os
5
5
  import sys
6
+ import warnings
6
7
  from functools import wraps
7
8
  from typing import BinaryIO, TextIO
8
9
 
9
- _native = str
10
- _unicode = type("")
11
- _bytes = type(b"")
12
-
13
10
 
14
11
  def get_stdout(binary: bool = False) -> TextIO | BinaryIO:
15
12
  """Return the stdout stream as binary or text stream.
@@ -50,33 +47,32 @@ def is_stdout(fp: TextIO | BinaryIO) -> bool:
50
47
 
51
48
  def to_bytes(value):
52
49
  """Convert a value to a byte string."""
53
- if value is None or isinstance(value, _bytes):
50
+ if value is None or isinstance(value, bytes):
54
51
  return value
55
- if isinstance(value, _unicode):
56
- return value.encode("utf-8")
57
- return _bytes(value)
52
+ if isinstance(value, str):
53
+ return value.encode(errors="surrogateescape")
54
+ return bytes(value)
58
55
 
59
56
 
60
57
  def to_str(value):
61
58
  """Convert a value to a unicode string."""
62
- if value is None or isinstance(value, _unicode):
59
+ if value is None or isinstance(value, str):
63
60
  return value
64
- if isinstance(value, _bytes):
65
- return value.decode("utf-8")
66
- return _unicode(value)
61
+ if isinstance(value, bytes):
62
+ return value.decode(errors="surrogateescape")
63
+ return str(value)
67
64
 
68
65
 
69
66
  def to_native_str(value):
70
- """Convert a value to a native `str`."""
71
- if value is None or isinstance(value, _native):
72
- return value
73
- if isinstance(value, _unicode):
74
- # Python 2: unicode -> str
75
- return value.encode("utf-8")
76
- if isinstance(value, _bytes):
77
- # Python 3: bytes -> str
78
- return value.decode("utf-8")
79
- return _native(value)
67
+ warnings.warn(
68
+ (
69
+ "The to_native_str() function is deprecated, "
70
+ "this function will be removed in flow.record 3.20, "
71
+ "use to_str() instead"
72
+ ),
73
+ DeprecationWarning,
74
+ )
75
+ return to_str(value)
80
76
 
81
77
 
82
78
  def to_base64(value):
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '3.17.dev4'
16
- __version_tuple__ = version_tuple = (3, 17, 'dev4')
15
+ __version__ = version = '3.17.dev5'
16
+ __version_tuple__ = version_tuple = (3, 17, 'dev5')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flow.record
3
- Version: 3.17.dev4
3
+ Version: 3.17.dev5
4
4
  Summary: A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record
5
5
  Author-email: Dissect Team <dissect@fox-it.com>
6
6
  License: Affero General Public License v3
@@ -54,6 +54,8 @@ tests/__init__.py
54
54
  tests/_utils.py
55
55
  tests/selector_explain_example.py
56
56
  tests/standalone_test.py
57
+ tests/test_adapter_line.py
58
+ tests/test_adapter_text.py
57
59
  tests/test_avro.py
58
60
  tests/test_avro_adapter.py
59
61
  tests/test_compiled_selector.py
@@ -75,7 +77,6 @@ tests/test_selector.py
75
77
  tests/test_splunk_adapter.py
76
78
  tests/test_sqlite_duckdb_adapter.py
77
79
  tests/test_xlsx_adapter.py
78
- tests/utils_inspect.py
79
80
  tests/docs/Makefile
80
81
  tests/docs/conf.py
81
82
  tests/docs/index.rst
@@ -0,0 +1,29 @@
1
+ from io import BytesIO
2
+
3
+ from flow.record import RecordDescriptor
4
+ from flow.record.adapter.line import LineWriter
5
+
6
+
7
+ def test_line_writer_write_surrogateescape():
8
+ output = BytesIO()
9
+
10
+ lw = LineWriter(
11
+ path=output,
12
+ fields="name",
13
+ )
14
+
15
+ TestRecord = RecordDescriptor(
16
+ "test/string",
17
+ [
18
+ ("string", "name"),
19
+ ],
20
+ )
21
+
22
+ # construct from 'bytes' but with invalid unicode bytes
23
+ record = TestRecord(b"R\xc3\xa9\xeamy")
24
+ lw.write(record)
25
+
26
+ output.seek(0)
27
+ data = output.read()
28
+
29
+ assert data == b"--[ RECORD 1 ]--\nname = R\xc3\xa9\xeamy\n"
@@ -0,0 +1,28 @@
1
+ from io import BytesIO
2
+
3
+ from flow.record import RecordDescriptor
4
+ from flow.record.adapter.text import TextWriter
5
+
6
+
7
+ def test_text_writer_write_surrogateescape():
8
+ output = BytesIO()
9
+
10
+ tw = TextWriter(
11
+ path=output,
12
+ )
13
+
14
+ TestRecord = RecordDescriptor(
15
+ "test/string",
16
+ [
17
+ ("string", "name"),
18
+ ],
19
+ )
20
+
21
+ # construct from 'bytes' but with invalid unicode bytes
22
+ record = TestRecord(b"R\xc3\xa9\xeamy")
23
+ tw.write(record)
24
+
25
+ output.seek(0)
26
+ data = output.read()
27
+
28
+ assert data == b"<test/string name='R\xc3\xa9\\udceamy'>\n"
@@ -213,15 +213,8 @@ def test_string():
213
213
  assert r.name == "Rémy"
214
214
 
215
215
  # construct from 'bytes' but with invalid unicode bytes
216
- if isinstance("", str):
217
- # Python 3
218
- with pytest.raises(UnicodeDecodeError):
219
- TestRecord(b"R\xc3\xa9\xeamy")
220
- else:
221
- # Python 2
222
- with pytest.warns(RuntimeWarning):
223
- r = TestRecord(b"R\xc3\xa9\xeamy")
224
- assert r.name
216
+ r = TestRecord(b"R\xc3\xa9\xeamy")
217
+ assert r.name == "Ré\udceamy"
225
218
 
226
219
 
227
220
  def test_wstring():
@@ -90,3 +90,23 @@ def test_record_pack_bool_regression() -> None:
90
90
 
91
91
  # pack the json string back to a record and make sure it is the same as before
92
92
  assert packer.unpack(data) == record
93
+
94
+
95
+ def test_record_pack_surrogateescape() -> None:
96
+ TestRecord = RecordDescriptor(
97
+ "test/string",
98
+ [
99
+ ("string", "name"),
100
+ ],
101
+ )
102
+
103
+ record = TestRecord(b"R\xc3\xa9\xeamy")
104
+ packer = JsonRecordPacker()
105
+
106
+ data = packer.pack(record)
107
+
108
+ # pack to json string and check if the 3rd and 4th byte are properly surrogate escaped
109
+ assert data.startswith('{"name": "R\\u00e9\\udceamy",')
110
+
111
+ # pack the json string back to a record and make sure it is the same as before
112
+ assert packer.unpack(data) == record
@@ -1,4 +1,5 @@
1
1
  import importlib
2
+ import inspect
2
3
  import os
3
4
  import sys
4
5
  from unittest.mock import patch
@@ -27,8 +28,6 @@ from flow.record.base import (
27
28
  from flow.record.exceptions import RecordDescriptorError
28
29
  from flow.record.stream import RecordFieldRewriter
29
30
 
30
- from . import utils_inspect as inspect
31
-
32
31
 
33
32
  def test_record_creation():
34
33
  TestRecord = RecordDescriptor(
@@ -288,8 +287,30 @@ def test_record_printer_stdout(capsys):
288
287
  writer.write(record)
289
288
 
290
289
  out, err = capsys.readouterr()
291
- modifier = "" if isinstance("", str) else "u"
292
- expected = "<test/a a_string={u}'hello' common={u}'world' a_count=10>\n".format(u=modifier)
290
+ expected = "<test/a a_string='hello' common='world' a_count=10>\n"
291
+ assert out == expected
292
+
293
+
294
+ def test_record_printer_stdout_surrogateescape(capsys):
295
+ Record = RecordDescriptor(
296
+ "test/a",
297
+ [
298
+ ("string", "name"),
299
+ ],
300
+ )
301
+ record = Record(b"R\xc3\xa9\xeamy")
302
+
303
+ # fake capsys to be a tty.
304
+ def isatty():
305
+ return True
306
+
307
+ capsys._capture.out.tmpfile.isatty = isatty
308
+
309
+ writer = RecordPrinter(getattr(sys.stdout, "buffer", sys.stdout))
310
+ writer.write(record)
311
+
312
+ out, err = capsys.readouterr()
313
+ expected = "<test/a name='Ré\\udceamy'>\n"
293
314
  assert out == expected
294
315
 
295
316
 
@@ -1,58 +0,0 @@
1
- """
2
- Backport of `inspect.signature` for Python 2.
3
-
4
- Based on: https://github.com/python/cpython/blob/3.7/Lib/inspect.py
5
- """
6
-
7
- import collections
8
- import inspect
9
-
10
-
11
- class _empty:
12
- pass
13
-
14
-
15
- class Parameter:
16
- POSITIONAL_ONLY = 0
17
- POSITIONAL_OR_KEYWORD = 1
18
- VAR_POSITIONAL = 2
19
- KEYWORD_ONLY = 3
20
- VAR_KEYWORD = 4
21
-
22
- empty = _empty
23
-
24
- def __init__(self, name, kind, default=_empty):
25
- self.name = name
26
- self.kind = kind
27
- self.default = default
28
-
29
-
30
- class Signature:
31
- empty = _empty
32
-
33
- def __init__(self, parameters=None):
34
- self.parameters = parameters
35
-
36
-
37
- def signature(obj):
38
- try:
39
- # Python 3
40
- return inspect.signature(obj)
41
- except AttributeError:
42
- # Python 2
43
- spec = inspect.getargspec(obj)
44
-
45
- # Create parameter objects which are compatible with python 3 objects
46
- parameters = collections.OrderedDict()
47
- for i in range(0, len(spec.args)):
48
- arg = spec.args[i]
49
- default = _empty
50
- if spec.defaults and (len(spec.args) - i <= len(spec.defaults)):
51
- default = spec.defaults[i - len(spec.args)]
52
- parameters[arg] = Parameter(name=arg, default=default, kind=Parameter.POSITIONAL_OR_KEYWORD)
53
- if spec.varargs:
54
- parameters[spec.varargs] = Parameter(name=spec.varargs, kind=Parameter.VAR_POSITIONAL)
55
- if spec.keywords:
56
- parameters[spec.keywords] = Parameter(name=spec.keywords, kind=Parameter.VAR_KEYWORD)
57
-
58
- return Signature(parameters=parameters)
File without changes
File without changes