csv-detective 0.9.3.dev2486__py3-none-any.whl → 0.9.3.dev2500__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,29 @@
1
+ import re
2
+
1
3
  proportion = 1
2
4
  tags = ["type"]
3
5
  labels = ["part", "ratio", "taux"]
4
6
 
7
+ scientific_notation_pattern = r"\d+\.\d+[e|E][+|-]?\d+"
8
+
5
9
 
6
10
  def float_casting(val: str) -> float:
7
11
  return float(val.replace(",", "."))
8
12
 
9
13
 
10
14
  def _is(val):
11
- """Detects floats, assuming that tables will not have scientific
12
- notations (3e6) or "+" in the string. "-" is still accepted."""
15
+ """Detects floats (including scientific notation), unless there is an underscore or a plus sign (bad practice)."""
13
16
  try:
14
17
  if (
15
18
  not isinstance(val, str)
16
- or any([k in val for k in ["_", "+", "e", "E"]])
19
+ or "_" in val
17
20
  or (val.startswith("0") and len(val) > 1 and val[1] not in [".", ","])
18
21
  ):
19
22
  return False
23
+ elif any([k in val for k in ["+", "e", "E"]]) and not re.match(
24
+ scientific_notation_pattern, val
25
+ ):
26
+ return False
20
27
  float_casting(val)
21
28
  return True
22
29
  except ValueError:
@@ -24,6 +31,6 @@ def _is(val):
24
31
 
25
32
 
26
33
  _test_values = {
27
- True: ["1", "0", "1764", "-24", "1.2", "1863.23", "-12.7", "0.1"],
34
+ True: ["1", "0", "1764", "-24", "1.2", "1863.23", "-12.7", "0.1", "1.9764E-1", "19.01e-29"],
28
35
  False: ["01053", "01053.89", "1e3", "123_456", "123_456.78", "+35", "+35.9"],
29
36
  }
@@ -48,11 +48,10 @@ def test_col_val(
48
48
  for _range in [
49
49
  min(1, ser_len),
50
50
  min(5, ser_len),
51
- ser_len,
52
51
  ]:
53
52
  if not all(apply_test_func(serie, format.func, _range)):
54
53
  return 0.0
55
- return 1.0
54
+ return float(serie.apply(format.func).sum() == ser_len)
56
55
  finally:
57
56
  if verbose and time() - start > 3:
58
57
  display_logs_depending_process_time(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: csv-detective
3
- Version: 0.9.3.dev2486
3
+ Version: 0.9.3.dev2500
4
4
  Summary: Detect tabular files column content
5
5
  Keywords: CSV,data processing,encoding,guess,parser,tabular
6
6
  Author: data.gouv.fr
@@ -38,7 +38,7 @@ csv_detective/formats/datetime_naive.py,sha256=nvA8qT1fb2RmpXN5_Cw9YZA6pC4BryX_B
38
38
  csv_detective/formats/datetime_rfc822.py,sha256=l-SLb34hSuHxC2JQ-9SD-nG38JqzoozwUZiGtoybb0A,601
39
39
  csv_detective/formats/departement.py,sha256=UP9UF23BFq_-mIS8N10K5XkoCXwPmDeSoa_7lCAkI4w,768
40
40
  csv_detective/formats/email.py,sha256=Qen2EBDYY5TtWXwxrrTGWRrbIybz0ySlVpl4ZRk8pzA,517
41
- csv_detective/formats/float.py,sha256=tWs_tW64OuacNQENu3uk5GOEVQMQls2iiteFOacQRAQ,832
41
+ csv_detective/formats/float.py,sha256=DF8CwBC4Vk-PFRlIawDr6OUPTtZjAiKYguvilDGUcmY,1033
42
42
  csv_detective/formats/geojson.py,sha256=udbBxCBRmb0o6TD8z5ryemfqdinBz6njNJU0XcbfMig,757
43
43
  csv_detective/formats/insee_ape700.py,sha256=cLs3Eersqm4wX6oqsqp0Vb3WGPJb2xY5Za_vh0uLgKc,780
44
44
  csv_detective/formats/insee_canton.py,sha256=Q5jczsOmh1wPP2KtDkcmqZ7Hlv50Zz9YvPIbxy46qs0,531
@@ -78,7 +78,7 @@ csv_detective/output/profile.py,sha256=VUQp0VJ22dfY4R5TybTpuQW_TOX_rLEp98cOzu-Jf
78
78
  csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
79
79
  csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
80
80
  csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
- csv_detective/parsing/columns.py,sha256=WwivsR4r-SAkugzVSmYeUkgbNXz3CWXnEl2ZmoX_tcs,9238
81
+ csv_detective/parsing/columns.py,sha256=rb5JywbKnYCT3Jb0ZaG1BnyPVtB3gy5mSD-K7qcOl8I,9257
82
82
  csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
83
83
  csv_detective/parsing/csv.py,sha256=5rw6gXZFQC1T4NT9CnW0AumidrYOkF8kjrfWGmk949I,1716
84
84
  csv_detective/parsing/excel.py,sha256=tb65I78tdYlZci_tzvvQt8U6bZSYKjeVdn2CEvsET1o,6972
@@ -86,7 +86,7 @@ csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0
86
86
  csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
87
87
  csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
88
88
  csv_detective/validate.py,sha256=CjZXhhDP-n6wGgEqbwrGRqebU8L5bidwnvQp-TbnvFA,5424
89
- csv_detective-0.9.3.dev2486.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
90
- csv_detective-0.9.3.dev2486.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
91
- csv_detective-0.9.3.dev2486.dist-info/METADATA,sha256=XOx2vmkCJGFTCAc0znJ3bcz7V9mAtq7qXqh0GF8ERys,11063
92
- csv_detective-0.9.3.dev2486.dist-info/RECORD,,
89
+ csv_detective-0.9.3.dev2500.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
90
+ csv_detective-0.9.3.dev2500.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
91
+ csv_detective-0.9.3.dev2500.dist-info/METADATA,sha256=Xei7oHRc7gmW58t75DcUOF8Jp43mZ6yWZ_WJCj2RHxo,11063
92
+ csv_detective-0.9.3.dev2500.dist-info/RECORD,,