csv-detective 0.9.3.dev2409__py3-none-any.whl → 0.9.3.dev2447__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,23 +38,21 @@ def test_col_val(
38
38
  if ser_len == 0:
39
39
  # being here means the whole column is NaN, so if skipna it's a pass
40
40
  return 1.0 if skipna else 0.0
41
- if not limited_output:
42
- result = apply_test_func(serie, format.func, ser_len).sum() / ser_len
41
+ if not limited_output or format.proportion < 1:
42
+ # we want or have to go through the whole column to have the proportion
43
+ result: float = serie.apply(format.func).sum() / ser_len
43
44
  return result if result >= format.proportion else 0.0
44
45
  else:
45
- if format.proportion == 1:
46
- # early stops (1 then 5 rows) to not waste time if directly unsuccessful
47
- for _range in [
48
- min(1, ser_len),
49
- min(5, ser_len),
50
- ser_len,
51
- ]:
52
- if not all(apply_test_func(serie, format.func, _range)):
53
- return 0.0
54
- return 1.0
55
- else:
56
- result = apply_test_func(serie, format.func, ser_len).sum() / ser_len
57
- return result if result >= format.proportion else 0.0
46
+ # the whole column has to be valid so we have early stops (1 then 5 rows)
47
+ # to not waste time if directly unsuccessful
48
+ for _range in [
49
+ min(1, ser_len),
50
+ min(5, ser_len),
51
+ ser_len,
52
+ ]:
53
+ if not all(apply_test_func(serie, format.func, _range)):
54
+ return 0.0
55
+ return 1.0
58
56
  finally:
59
57
  if verbose and time() - start > 3:
60
58
  display_logs_depending_process_time(
@@ -80,15 +78,14 @@ def test_col(
80
78
  logging.info(f"\t- Starting with format '{label}'")
81
79
  # improvement lead : put the longest tests behind and make them only if previous tests not satisfactory
82
80
  # => the following needs to change, "apply" means all columns are tested for one type at once
83
- return_table.loc[label] = table.apply(
84
- lambda serie: test_col_val(
85
- serie,
81
+ for col in table.columns:
82
+ return_table.loc[label, col] = test_col_val(
83
+ table[col],
86
84
  format,
87
85
  skipna=skipna,
88
86
  limited_output=limited_output,
89
87
  verbose=verbose,
90
88
  )
91
- )
92
89
  if verbose:
93
90
  display_logs_depending_process_time(
94
91
  f'\t> Done with type "{label}" in {round(time() - start_type, 3)}s ({idx + 1}/{len(formats)})',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: csv-detective
3
- Version: 0.9.3.dev2409
3
+ Version: 0.9.3.dev2447
4
4
  Summary: Detect tabular files column content
5
5
  Keywords: CSV,data processing,encoding,guess,parser,tabular
6
6
  Author: data.gouv.fr
@@ -78,7 +78,7 @@ csv_detective/output/profile.py,sha256=VUQp0VJ22dfY4R5TybTpuQW_TOX_rLEp98cOzu-Jf
78
78
  csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
79
79
  csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
80
80
  csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
- csv_detective/parsing/columns.py,sha256=CqtZRZYMYDNMopxnqs4eZLSABi-ms61wqv5M9vWJ7iU,9343
81
+ csv_detective/parsing/columns.py,sha256=Eo8GUec5ykTuDTR15OC0S_hiMkcpuZnTPCPomW80aSQ,9244
82
82
  csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
83
83
  csv_detective/parsing/csv.py,sha256=0T0gpaXzwJo-sq41IoLQD704GiMUYeDVVASVbat-zWg,1726
84
84
  csv_detective/parsing/excel.py,sha256=oAVTuoDccJc4-kVjHXiIPLQx3lq3aZRRZQxkG1c06JQ,6992
@@ -86,7 +86,7 @@ csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0
86
86
  csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
87
87
  csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
88
88
  csv_detective/validate.py,sha256=XldlbGkUlPaIh0y4z9iaWlmmahwCrD1900s5Cxlq5wI,5430
89
- csv_detective-0.9.3.dev2409.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
90
- csv_detective-0.9.3.dev2409.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
91
- csv_detective-0.9.3.dev2409.dist-info/METADATA,sha256=mcETENWniXaJkJwPuxDTnyUi3DNNq9yhnPKinyGQRpg,11063
92
- csv_detective-0.9.3.dev2409.dist-info/RECORD,,
89
+ csv_detective-0.9.3.dev2447.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
90
+ csv_detective-0.9.3.dev2447.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
91
+ csv_detective-0.9.3.dev2447.dist-info/METADATA,sha256=Pl4Yw1e2r6GcmmTp405LNmslBeHfsYAHNpc1kvGYz14,11063
92
+ csv_detective-0.9.3.dev2447.dist-info/RECORD,,