csv-detective 0.9.3.dev2400__py3-none-any.whl → 0.9.3.dev2409__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detection/formats.py +1 -0
- csv_detective/formats/binary.py +26 -0
- csv_detective/output/dataframe.py +19 -14
- {csv_detective-0.9.3.dev2400.dist-info → csv_detective-0.9.3.dev2409.dist-info}/METADATA +1 -1
- {csv_detective-0.9.3.dev2400.dist-info → csv_detective-0.9.3.dev2409.dist-info}/RECORD +7 -6
- {csv_detective-0.9.3.dev2400.dist-info → csv_detective-0.9.3.dev2409.dist-info}/WHEEL +0 -0
- {csv_detective-0.9.3.dev2400.dist-info → csv_detective-0.9.3.dev2409.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import codecs
|
|
2
|
+
|
|
3
|
+
proportion = 1
|
|
4
|
+
tags = ["type"]
|
|
5
|
+
labels = ["bytes", "binary", "image", "encode", "content"]
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def binary_casting(val: str) -> bytes:
|
|
9
|
+
return codecs.escape_decode(val[2:-1])[0]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _is(val) -> bool:
|
|
13
|
+
if isinstance(val, str) and (
|
|
14
|
+
(val.startswith("b'") and val.endswith("'")) or (val.startswith('b"') and val.endswith('"'))
|
|
15
|
+
):
|
|
16
|
+
try:
|
|
17
|
+
return isinstance(binary_casting(val), bytes)
|
|
18
|
+
except Exception:
|
|
19
|
+
return False
|
|
20
|
+
return False
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
_test_values = {
|
|
24
|
+
True: ["b'\x01\x01'", 'b"\x01\x01\x00\x00\x00;\xb7\xd4\xc5_)J\xc0\xcb\x16>\x9e\xd1\xc4\x13@"'],
|
|
25
|
+
False: ["bytes", 'b"ytes'],
|
|
26
|
+
}
|
|
@@ -5,6 +5,7 @@ from typing import Iterator
|
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
|
+
from csv_detective.formats.binary import binary_casting
|
|
8
9
|
from csv_detective.formats.booleen import bool_casting
|
|
9
10
|
from csv_detective.formats.date import date_casting
|
|
10
11
|
from csv_detective.formats.float import float_casting
|
|
@@ -12,23 +13,27 @@ from csv_detective.parsing.csv import CHUNK_SIZE
|
|
|
12
13
|
from csv_detective.utils import display_logs_depending_process_time
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
def cast(value: str, _type: str) -> str | float | bool | date | datetime | None:
|
|
16
|
+
def cast(value: str, _type: str) -> str | float | bool | date | datetime | bytes | None:
|
|
16
17
|
if not isinstance(value, str) or not value:
|
|
17
18
|
# None is the current default value in hydra, should we keep this?
|
|
18
19
|
return None
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
20
|
+
match _type:
|
|
21
|
+
case "float":
|
|
22
|
+
return float_casting(value)
|
|
23
|
+
case "bool":
|
|
24
|
+
return bool_casting(value)
|
|
25
|
+
case "json":
|
|
26
|
+
# in hydra json are given to postgres as strings, conversion is done by postgres
|
|
27
|
+
return json.loads(value)
|
|
28
|
+
case "date":
|
|
29
|
+
_date = date_casting(value)
|
|
30
|
+
return _date.date() if _date else None
|
|
31
|
+
case "datetime":
|
|
32
|
+
return date_casting(value)
|
|
33
|
+
case "binary":
|
|
34
|
+
return binary_casting(value)
|
|
35
|
+
case _:
|
|
36
|
+
raise ValueError(f"Unknown type `{_type}`")
|
|
32
37
|
|
|
33
38
|
|
|
34
39
|
def cast_df(
|
|
@@ -4,7 +4,7 @@ csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
|
|
|
4
4
|
csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
|
|
5
5
|
csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
|
|
6
6
|
csv_detective/detection/engine.py,sha256=wQeDKpp2DKF-HcS1R8H6GgQyaUgQme4szPtEHgAjBII,1552
|
|
7
|
-
csv_detective/detection/formats.py,sha256=
|
|
7
|
+
csv_detective/detection/formats.py,sha256=kQEht5lr9hFhYe0Zn1lfj9jOKaqYrXNrM_tkQX24pEk,5410
|
|
8
8
|
csv_detective/detection/headers.py,sha256=95pTL524Sy5PGxyQ03ofFUaamvlmkxTJQe8u6HfzOkU,1051
|
|
9
9
|
csv_detective/detection/rows.py,sha256=quf3ZTTFPOo09H-faZ9cRKibb1QGHEKHlpivFRx2Va4,742
|
|
10
10
|
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
@@ -13,6 +13,7 @@ csv_detective/explore_csv.py,sha256=-LCHr7vyT0Q0oLtXeOO8pEevJ6-8Ib9JP3D7nVgZM8o,
|
|
|
13
13
|
csv_detective/format.py,sha256=XX_cSTQc0jlsQq3GUqHi7Cz36AiRrpjrwPmeoOTLMvo,2396
|
|
14
14
|
csv_detective/formats/__init__.py,sha256=Egiy29kcG3Oz2eE2maYhD3wP29zOSOWyRlOpGD5LGvU,318
|
|
15
15
|
csv_detective/formats/adresse.py,sha256=jALDpEDAWyAcgqEfNVRg_W1r6XaYuJKD_jAaP2l-bxk,1943
|
|
16
|
+
csv_detective/formats/binary.py,sha256=OCGRDh5p27sA4yjrpKIp3b2_PfHJYUe5QxIArf-fCxA,676
|
|
16
17
|
csv_detective/formats/booleen.py,sha256=AnDDKShkSYpWO4POhwY2V7_C4yPWbmqBu8CJPgQ9Gwc,648
|
|
17
18
|
csv_detective/formats/code_commune_insee.py,sha256=MhwCPVAhwWH-MyaNAIVRNbqKfeNe3oiCpzEGfpHkpJY,504
|
|
18
19
|
csv_detective/formats/code_csp_insee.py,sha256=_JQ-YbnHMenNnwIg1xBmNVqgCa1tLD2hbPN1soODhDk,656
|
|
@@ -71,7 +72,7 @@ csv_detective/formats/username.py,sha256=y38OggfWpEQsGi0JnD9QRM30musa29lO6nz-qyb
|
|
|
71
72
|
csv_detective/formats/uuid.py,sha256=ekMEFfzQtz0cLudzmu3AoCM0Yf5pu23qAcFNFgHWJ1A,346
|
|
72
73
|
csv_detective/formats/year.py,sha256=pkAfYPKZdy0g1ZoHGgJNpgTS5y5weGEKXCVMGaxIX8k,472
|
|
73
74
|
csv_detective/output/__init__.py,sha256=ALSq_tgX7rGyh--7rmbKz8wHkmResN0h7mNujndow3w,2103
|
|
74
|
-
csv_detective/output/dataframe.py,sha256=
|
|
75
|
+
csv_detective/output/dataframe.py,sha256=Hnd-AY51U0JMACcpuaK9wwO4oCX9Nd7ZLUTqavgJWRA,3406
|
|
75
76
|
csv_detective/output/example.py,sha256=8LWheSBYCeDFfarbnmzBrdCbTd8Alh1U4pfXMKfabOw,8630
|
|
76
77
|
csv_detective/output/profile.py,sha256=VUQp0VJ22dfY4R5TybTpuQW_TOX_rLEp98cOzu-Jf44,4876
|
|
77
78
|
csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
|
|
@@ -85,7 +86,7 @@ csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0
|
|
|
85
86
|
csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
|
|
86
87
|
csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
|
|
87
88
|
csv_detective/validate.py,sha256=XldlbGkUlPaIh0y4z9iaWlmmahwCrD1900s5Cxlq5wI,5430
|
|
88
|
-
csv_detective-0.9.3.
|
|
89
|
-
csv_detective-0.9.3.
|
|
90
|
-
csv_detective-0.9.3.
|
|
91
|
-
csv_detective-0.9.3.
|
|
89
|
+
csv_detective-0.9.3.dev2409.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
|
|
90
|
+
csv_detective-0.9.3.dev2409.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
|
|
91
|
+
csv_detective-0.9.3.dev2409.dist-info/METADATA,sha256=mcETENWniXaJkJwPuxDTnyUi3DNNq9yhnPKinyGQRpg,11063
|
|
92
|
+
csv_detective-0.9.3.dev2409.dist-info/RECORD,,
|
|
File without changes
|
{csv_detective-0.9.3.dev2400.dist-info → csv_detective-0.9.3.dev2409.dist-info}/entry_points.txt
RENAMED
|
File without changes
|