csv-detective 0.8.1.dev1526__py3-none-any.whl → 0.8.1.dev1535__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,7 @@ def test_col_val(
16
16
  skipna: bool = True,
17
17
  limited_output: bool = False,
18
18
  verbose: bool = False,
19
- ):
19
+ ) -> float:
20
20
  """Tests values of the serie using test_func.
21
21
  - skipna : if True indicates that NaNs are not counted as False
22
22
  - proportion : indicates the proportion of values that have to pass the test
@@ -81,10 +81,13 @@ def test_col(table: pd.DataFrame, all_tests: list, limited_output: bool, skipna:
81
81
  if verbose:
82
82
  start = time()
83
83
  logging.info("Testing columns to get types")
84
- test_funcs = dict()
85
- for test in all_tests:
86
- name = test.__name__.split(".")[-1]
87
- test_funcs[name] = {"func": test._is, "prop": test.PROPORTION}
84
+ test_funcs = {
85
+ test.__name__.split(".")[-1]: {
86
+ "func": test._is,
87
+ "prop": test.PROPORTION,
88
+ }
89
+ for test in all_tests
90
+ }
88
91
  return_table = pd.DataFrame(columns=table.columns)
89
92
  for idx, (key, value) in enumerate(test_funcs.items()):
90
93
  if verbose:
csv_detective/validate.py CHANGED
@@ -5,11 +5,15 @@ import pandas as pd
5
5
 
6
6
  from csv_detective.load_tests import return_all_tests
7
7
  from csv_detective.parsing.load import load_file
8
+ from csv_detective.parsing.columns import test_col_val
8
9
 
9
10
  logging.basicConfig(level=logging.INFO)
10
11
 
11
12
  tests = {
12
- t.__name__.split(".")[-1]: t._is
13
+ t.__name__.split(".")[-1]: {
14
+ "func": t._is,
15
+ "prop": t.PROPORTION,
16
+ }
13
17
  for t in return_all_tests("ALL", "detect_fields")
14
18
  }
15
19
 
@@ -56,11 +60,13 @@ def validate(
56
60
  if args["format"] == "string":
57
61
  # no test for columns that have not been recognized as a specific format
58
62
  continue
59
- test_func = tests[args["format"]]
60
- col_data = table[col_name]
61
- if skipna:
62
- col_data = col_data.loc[~col_data.isna()]
63
- if not col_data.apply(test_func).all():
63
+ test_result: float = test_col_val(
64
+ serie=table[col_name],
65
+ test_func=tests[args["format"]]["func"],
66
+ proportion=tests[args["format"]]["prop"],
67
+ skipna=skipna,
68
+ )
69
+ if not bool(test_result):
64
70
  if verbose:
65
71
  logging.warning("> Test failed, proceeding with full analysis")
66
72
  return False, table, analysis
@@ -8,6 +8,7 @@
8
8
  - For big files, analyse on sample then validate on whole file [#124](https://github.com/datagouv/csv-detective/pull/124) [#129](https://github.com/datagouv/csv-detective/pull/129)
9
9
  - Fix imports [#125](https://github.com/datagouv/csv-detective/pull/125) [#126](https://github.com/datagouv/csv-detective/pull/126) [#127](https://github.com/datagouv/csv-detective/pull/127) [#128](https://github.com/datagouv/csv-detective/pull/128)
10
10
  - Split aware and naive datetimes for hydra to cast them separately [#130](https://github.com/datagouv/csv-detective/pull/130)
11
+ - Validate using the testing function, to consider PROPORTIONS [#131](https://github.com/datagouv/csv-detective/pull/131)
11
12
 
12
13
  ## 0.8.0 (2025-05-20)
13
14
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: csv_detective
3
- Version: 0.8.1.dev1526
3
+ Version: 0.8.1.dev1535
4
4
  Summary: Detect tabular files column content
5
5
  Home-page: https://github.com/datagouv/csv_detective
6
6
  Author: Etalab
@@ -4,7 +4,7 @@ csv_detective/explore_csv.py,sha256=VEeAJaz3FPOmGmQ-Yuf3FuSRRPULM03FrTf3qwZX52s,
4
4
  csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2385
5
5
  csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
6
6
  csv_detective/utils.py,sha256=-tIs9yV7RJPGj65lQ7LjRGch6Iws9UeuIPQsd2uUUJM,1025
7
- csv_detective/validate.py,sha256=d_4Phmjk6Y0Z0YYVw4vpoZy8E79K370reGgkpzx1mcQ,2644
7
+ csv_detective/validate.py,sha256=5Li_vfvU9wdfoZjNjef-MBUoKcKoJ-c7381QoX9aDXY,2818
8
8
  csv_detective/detect_fields/__init__.py,sha256=HYSy0P_aH6R8Z8Hvd8aMaBAQaZ1QwcsWHT0YPm0iYs0,998
9
9
  csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -144,16 +144,16 @@ csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz
144
144
  csv_detective/output/schema.py,sha256=WxgajFuLfUTQQtmEdlO8ve2ULDzw2BYfz8QFwUsdDh0,13558
145
145
  csv_detective/output/utils.py,sha256=qFYhxJmkKrTUefdH7Owh-liZijswomCafic4cXYSyCg,2506
146
146
  csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
147
- csv_detective/parsing/columns.py,sha256=VzgG9Nwph5C_fLW_TuQC5BZVlPmOyjrH7Plvm_c8kWc,5675
147
+ csv_detective/parsing/columns.py,sha256=rLzAU36cHMpVynEPhj8uMdr3IRO3_Yq58Yw7Z6oLPiQ,5693
148
148
  csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
149
149
  csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,1626
150
150
  csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
151
151
  csv_detective/parsing/load.py,sha256=u6fbGFZsL2GwPQRzhAXgt32JpUur7vbQdErREHxNJ-w,3661
152
152
  csv_detective/parsing/text.py,sha256=_TprGi0gHZlRsafizI3dqQhBehZW4BazqxmypMcAZ-o,1824
153
- csv_detective-0.8.1.dev1526.data/data/share/csv_detective/CHANGELOG.md,sha256=QBkuYfCNZtm-waJYz1YEITwR8kCMDKKZH6-ef7oj8tQ,9161
154
- csv_detective-0.8.1.dev1526.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
155
- csv_detective-0.8.1.dev1526.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
156
- csv_detective-0.8.1.dev1526.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
153
+ csv_detective-0.8.1.dev1535.data/data/share/csv_detective/CHANGELOG.md,sha256=ECVXB1NkkzReQ4Bt_Bc3GrmGy3aMTSiP6VLxNlpAiN4,9283
154
+ csv_detective-0.8.1.dev1535.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
155
+ csv_detective-0.8.1.dev1535.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
156
+ csv_detective-0.8.1.dev1535.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
157
157
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
158
  tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
159
159
  tests/test_fields.py,sha256=zeEQbHs0ougLzydmZLZs1l2UdrhKBEtdCCK64B4dhSU,10700
@@ -161,8 +161,8 @@ tests/test_file.py,sha256=0bHV9wx9mSRoav_DVF19g694yohb1p0bw7rtcBeKG-8,8451
161
161
  tests/test_labels.py,sha256=Nkr645bUewrj8hjNDKr67FQ6Sy_TID6f3E5Kfkl231M,464
162
162
  tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
163
163
  tests/test_validation.py,sha256=CTGonR6htxcWF9WH8MxumDD8cF45Y-G4hm94SM4lFjU,3246
164
- csv_detective-0.8.1.dev1526.dist-info/METADATA,sha256=6w8386meaPhTcYjmslsOqjkqvpLPZme5ikCsx7zJizo,10443
165
- csv_detective-0.8.1.dev1526.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
166
- csv_detective-0.8.1.dev1526.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
167
- csv_detective-0.8.1.dev1526.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
168
- csv_detective-0.8.1.dev1526.dist-info/RECORD,,
164
+ csv_detective-0.8.1.dev1535.dist-info/METADATA,sha256=MlEo_TETeNCT0LVS92Uu6f0W5BRMT70GqYXh2fhE12U,10443
165
+ csv_detective-0.8.1.dev1535.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
166
+ csv_detective-0.8.1.dev1535.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
167
+ csv_detective-0.8.1.dev1535.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
168
+ csv_detective-0.8.1.dev1535.dist-info/RECORD,,