csv-detective 0.9.3.dev2400__py3-none-any.whl → 0.9.3.dev2409__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -123,6 +123,7 @@ def detect_formats(
123
123
  "longitude_l93": "float",
124
124
  "longitude_wgs": "float",
125
125
  "longitude_wgs_fr_metropole": "float",
126
+ "binary": "binary",
126
127
  }
127
128
 
128
129
  if not limited_output:
@@ -0,0 +1,26 @@
1
+ import codecs
2
+
3
+ proportion = 1
4
+ tags = ["type"]
5
+ labels = ["bytes", "binary", "image", "encode", "content"]
6
+
7
+
8
+ def binary_casting(val: str) -> bytes:
9
+ return codecs.escape_decode(val[2:-1])[0]
10
+
11
+
12
+ def _is(val) -> bool:
13
+ if isinstance(val, str) and (
14
+ (val.startswith("b'") and val.endswith("'")) or (val.startswith('b"') and val.endswith('"'))
15
+ ):
16
+ try:
17
+ return isinstance(binary_casting(val), bytes)
18
+ except Exception:
19
+ return False
20
+ return False
21
+
22
+
23
+ _test_values = {
24
+ True: ["b'\x01\x01'", 'b"\x01\x01\x00\x00\x00;\xb7\xd4\xc5_)J\xc0\xcb\x16>\x9e\xd1\xc4\x13@"'],
25
+ False: ["bytes", 'b"ytes'],
26
+ }
@@ -5,6 +5,7 @@ from typing import Iterator
5
5
 
6
6
  import pandas as pd
7
7
 
8
+ from csv_detective.formats.binary import binary_casting
8
9
  from csv_detective.formats.booleen import bool_casting
9
10
  from csv_detective.formats.date import date_casting
10
11
  from csv_detective.formats.float import float_casting
@@ -12,23 +13,27 @@ from csv_detective.parsing.csv import CHUNK_SIZE
12
13
  from csv_detective.utils import display_logs_depending_process_time
13
14
 
14
15
 
15
- def cast(value: str, _type: str) -> str | float | bool | date | datetime | None:
16
+ def cast(value: str, _type: str) -> str | float | bool | date | datetime | bytes | None:
16
17
  if not isinstance(value, str) or not value:
17
18
  # None is the current default value in hydra, should we keep this?
18
19
  return None
19
- if _type == "float":
20
- return float_casting(value)
21
- if _type == "bool":
22
- return bool_casting(value)
23
- if _type == "json":
24
- # in hydra json are given to postgres as strings, conversion is done by postgres
25
- return json.loads(value)
26
- if _type == "date":
27
- _date = date_casting(value)
28
- return _date.date() if _date else None
29
- if _type == "datetime":
30
- return date_casting(value)
31
- raise ValueError(f"Unknown type `{_type}`")
20
+ match _type:
21
+ case "float":
22
+ return float_casting(value)
23
+ case "bool":
24
+ return bool_casting(value)
25
+ case "json":
26
+ # in hydra json are given to postgres as strings, conversion is done by postgres
27
+ return json.loads(value)
28
+ case "date":
29
+ _date = date_casting(value)
30
+ return _date.date() if _date else None
31
+ case "datetime":
32
+ return date_casting(value)
33
+ case "binary":
34
+ return binary_casting(value)
35
+ case _:
36
+ raise ValueError(f"Unknown type `{_type}`")
32
37
 
33
38
 
34
39
  def cast_df(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: csv-detective
3
- Version: 0.9.3.dev2400
3
+ Version: 0.9.3.dev2409
4
4
  Summary: Detect tabular files column content
5
5
  Keywords: CSV,data processing,encoding,guess,parser,tabular
6
6
  Author: data.gouv.fr
@@ -4,7 +4,7 @@ csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
4
4
  csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
5
5
  csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
6
6
  csv_detective/detection/engine.py,sha256=wQeDKpp2DKF-HcS1R8H6GgQyaUgQme4szPtEHgAjBII,1552
7
- csv_detective/detection/formats.py,sha256=uxmWz7J3btAwaOONIACxiL9vTZ8Iv7NdTSUqAOPQy0o,5381
7
+ csv_detective/detection/formats.py,sha256=kQEht5lr9hFhYe0Zn1lfj9jOKaqYrXNrM_tkQX24pEk,5410
8
8
  csv_detective/detection/headers.py,sha256=95pTL524Sy5PGxyQ03ofFUaamvlmkxTJQe8u6HfzOkU,1051
9
9
  csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
10
10
  csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
@@ -13,6 +13,7 @@ csv_detective/explore_csv.py,sha256=-LCHr7vyT0Q0oLtXeOO8pEevJ6-8Ib9JP3D7nVgZM8o,
13
13
  csv_detective/format.py,sha256=XX_cSTQc0jlsQq3GUqHi7Cz36AiRrpjrwPmeoOTLMvo,2396
14
14
  csv_detective/formats/__init__.py,sha256=Egiy29kcG3Oz2eE2maYhD3wP29zOSOWyRlOpGD5LGvU,318
15
15
  csv_detective/formats/adresse.py,sha256=jALDpEDAWyAcgqEfNVRg_W1r6XaYuJKD_jAaP2l-bxk,1943
16
+ csv_detective/formats/binary.py,sha256=OCGRDh5p27sA4yjrpKIp3b2_PfHJYUe5QxIArf-fCxA,676
16
17
  csv_detective/formats/booleen.py,sha256=AnDDKShkSYpWO4POhwY2V7_C4yPWbmqBu8CJPgQ9Gwc,648
17
18
  csv_detective/formats/code_commune_insee.py,sha256=MhwCPVAhwWH-MyaNAIVRNbqKfeNe3oiCpzEGfpHkpJY,504
18
19
  csv_detective/formats/code_csp_insee.py,sha256=_JQ-YbnHMenNnwIg1xBmNVqgCa1tLD2hbPN1soODhDk,656
@@ -71,7 +72,7 @@ csv_detective/formats/username.py,sha256=y38OggfWpEQsGi0JnD9QRM30musa29lO6nz-qyb
71
72
  csv_detective/formats/uuid.py,sha256=ekMEFfzQtz0cLudzmu3AoCM0Yf5pu23qAcFNFgHWJ1A,346
72
73
  csv_detective/formats/year.py,sha256=pkAfYPKZdy0g1ZoHGgJNpgTS5y5weGEKXCVMGaxIX8k,472
73
74
  csv_detective/output/__init__.py,sha256=ALSq_tgX7rGyh--7rmbKz8wHkmResN0h7mNujndow3w,2103
74
- csv_detective/output/dataframe.py,sha256=TyBc2ObaVUns_ydJWOMKmCYvuj7ddxag0QN3z37g3GE,3219
75
+ csv_detective/output/dataframe.py,sha256=Hnd-AY51U0JMACcpuaK9wwO4oCX9Nd7ZLUTqavgJWRA,3406
75
76
  csv_detective/output/example.py,sha256=8LWheSBYCeDFfarbnmzBrdCbTd8Alh1U4pfXMKfabOw,8630
76
77
  csv_detective/output/profile.py,sha256=VUQp0VJ22dfY4R5TybTpuQW_TOX_rLEp98cOzu-Jf44,4876
77
78
  csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
@@ -85,7 +86,7 @@ csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0
85
86
  csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
86
87
  csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
87
88
  csv_detective/validate.py,sha256=XldlbGkUlPaIh0y4z9iaWlmmahwCrD1900s5Cxlq5wI,5430
88
- csv_detective-0.9.3.dev2400.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
89
- csv_detective-0.9.3.dev2400.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
90
- csv_detective-0.9.3.dev2400.dist-info/METADATA,sha256=XBMZp650BNXuUmMPEw7ffC7tNfMD69JGd0diGhKCIQE,11063
91
- csv_detective-0.9.3.dev2400.dist-info/RECORD,,
89
+ csv_detective-0.9.3.dev2409.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
90
+ csv_detective-0.9.3.dev2409.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
91
+ csv_detective-0.9.3.dev2409.dist-info/METADATA,sha256=mcETENWniXaJkJwPuxDTnyUi3DNNq9yhnPKinyGQRpg,11063
92
+ csv_detective-0.9.3.dev2409.dist-info/RECORD,,