csv-detective 0.9.3.dev2409__py3-none-any.whl → 0.9.3.dev2447__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/parsing/columns.py +16 -19
- {csv_detective-0.9.3.dev2409.dist-info → csv_detective-0.9.3.dev2447.dist-info}/METADATA +1 -1
- {csv_detective-0.9.3.dev2409.dist-info → csv_detective-0.9.3.dev2447.dist-info}/RECORD +5 -5
- {csv_detective-0.9.3.dev2409.dist-info → csv_detective-0.9.3.dev2447.dist-info}/WHEEL +0 -0
- {csv_detective-0.9.3.dev2409.dist-info → csv_detective-0.9.3.dev2447.dist-info}/entry_points.txt +0 -0
csv_detective/parsing/columns.py
CHANGED
|
@@ -38,23 +38,21 @@ def test_col_val(
|
|
|
38
38
|
if ser_len == 0:
|
|
39
39
|
# being here means the whole column is NaN, so if skipna it's a pass
|
|
40
40
|
return 1.0 if skipna else 0.0
|
|
41
|
-
if not limited_output:
|
|
42
|
-
|
|
41
|
+
if not limited_output or format.proportion < 1:
|
|
42
|
+
# we want or have to go through the whole column to have the proportion
|
|
43
|
+
result: float = serie.apply(format.func).sum() / ser_len
|
|
43
44
|
return result if result >= format.proportion else 0.0
|
|
44
45
|
else:
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
else:
|
|
56
|
-
result = apply_test_func(serie, format.func, ser_len).sum() / ser_len
|
|
57
|
-
return result if result >= format.proportion else 0.0
|
|
46
|
+
# the whole column has to be valid so we have early stops (1 then 5 rows)
|
|
47
|
+
# to not waste time if directly unsuccessful
|
|
48
|
+
for _range in [
|
|
49
|
+
min(1, ser_len),
|
|
50
|
+
min(5, ser_len),
|
|
51
|
+
ser_len,
|
|
52
|
+
]:
|
|
53
|
+
if not all(apply_test_func(serie, format.func, _range)):
|
|
54
|
+
return 0.0
|
|
55
|
+
return 1.0
|
|
58
56
|
finally:
|
|
59
57
|
if verbose and time() - start > 3:
|
|
60
58
|
display_logs_depending_process_time(
|
|
@@ -80,15 +78,14 @@ def test_col(
|
|
|
80
78
|
logging.info(f"\t- Starting with format '{label}'")
|
|
81
79
|
# improvement lead : put the longest tests behind and make them only if previous tests not satisfactory
|
|
82
80
|
# => the following needs to change, "apply" means all columns are tested for one type at once
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
81
|
+
for col in table.columns:
|
|
82
|
+
return_table.loc[label, col] = test_col_val(
|
|
83
|
+
table[col],
|
|
86
84
|
format,
|
|
87
85
|
skipna=skipna,
|
|
88
86
|
limited_output=limited_output,
|
|
89
87
|
verbose=verbose,
|
|
90
88
|
)
|
|
91
|
-
)
|
|
92
89
|
if verbose:
|
|
93
90
|
display_logs_depending_process_time(
|
|
94
91
|
f'\t> Done with type "{label}" in {round(time() - start_type, 3)}s ({idx + 1}/{len(formats)})',
|
|
@@ -78,7 +78,7 @@ csv_detective/output/profile.py,sha256=VUQp0VJ22dfY4R5TybTpuQW_TOX_rLEp98cOzu-Jf
|
|
|
78
78
|
csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
|
|
79
79
|
csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
|
|
80
80
|
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
|
-
csv_detective/parsing/columns.py,sha256=
|
|
81
|
+
csv_detective/parsing/columns.py,sha256=Eo8GUec5ykTuDTR15OC0S_hiMkcpuZnTPCPomW80aSQ,9244
|
|
82
82
|
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
83
83
|
csv_detective/parsing/csv.py,sha256=0T0gpaXzwJo-sq41IoLQD704GiMUYeDVVASVbat-zWg,1726
|
|
84
84
|
csv_detective/parsing/excel.py,sha256=oAVTuoDccJc4-kVjHXiIPLQx3lq3aZRRZQxkG1c06JQ,6992
|
|
@@ -86,7 +86,7 @@ csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0
|
|
|
86
86
|
csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
|
|
87
87
|
csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
|
|
88
88
|
csv_detective/validate.py,sha256=XldlbGkUlPaIh0y4z9iaWlmmahwCrD1900s5Cxlq5wI,5430
|
|
89
|
-
csv_detective-0.9.3.
|
|
90
|
-
csv_detective-0.9.3.
|
|
91
|
-
csv_detective-0.9.3.
|
|
92
|
-
csv_detective-0.9.3.
|
|
89
|
+
csv_detective-0.9.3.dev2447.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
|
|
90
|
+
csv_detective-0.9.3.dev2447.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
|
|
91
|
+
csv_detective-0.9.3.dev2447.dist-info/METADATA,sha256=Pl4Yw1e2r6GcmmTp405LNmslBeHfsYAHNpc1kvGYz14,11063
|
|
92
|
+
csv_detective-0.9.3.dev2447.dist-info/RECORD,,
|
|
File without changes
|
{csv_detective-0.9.3.dev2409.dist-info → csv_detective-0.9.3.dev2447.dist-info}/entry_points.txt
RENAMED
|
File without changes
|