csv-detective 0.8.1.dev1526__py3-none-any.whl → 0.8.1.dev1535__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/parsing/columns.py +8 -5
- csv_detective/validate.py +12 -6
- {csv_detective-0.8.1.dev1526.data → csv_detective-0.8.1.dev1535.data}/data/share/csv_detective/CHANGELOG.md +1 -0
- {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1535.dist-info}/METADATA +1 -1
- {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1535.dist-info}/RECORD +11 -11
- {csv_detective-0.8.1.dev1526.data → csv_detective-0.8.1.dev1535.data}/data/share/csv_detective/LICENSE +0 -0
- {csv_detective-0.8.1.dev1526.data → csv_detective-0.8.1.dev1535.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1535.dist-info}/WHEEL +0 -0
- {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1535.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1535.dist-info}/licenses/LICENSE +0 -0
- {csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1535.dist-info}/top_level.txt +0 -0
csv_detective/parsing/columns.py
CHANGED
|
@@ -16,7 +16,7 @@ def test_col_val(
|
|
|
16
16
|
skipna: bool = True,
|
|
17
17
|
limited_output: bool = False,
|
|
18
18
|
verbose: bool = False,
|
|
19
|
-
):
|
|
19
|
+
) -> float:
|
|
20
20
|
"""Tests values of the serie using test_func.
|
|
21
21
|
- skipna : if True indicates that NaNs are not counted as False
|
|
22
22
|
- proportion : indicates the proportion of values that have to pass the test
|
|
@@ -81,10 +81,13 @@ def test_col(table: pd.DataFrame, all_tests: list, limited_output: bool, skipna:
|
|
|
81
81
|
if verbose:
|
|
82
82
|
start = time()
|
|
83
83
|
logging.info("Testing columns to get types")
|
|
84
|
-
test_funcs =
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
84
|
+
test_funcs = {
|
|
85
|
+
test.__name__.split(".")[-1]: {
|
|
86
|
+
"func": test._is,
|
|
87
|
+
"prop": test.PROPORTION,
|
|
88
|
+
}
|
|
89
|
+
for test in all_tests
|
|
90
|
+
}
|
|
88
91
|
return_table = pd.DataFrame(columns=table.columns)
|
|
89
92
|
for idx, (key, value) in enumerate(test_funcs.items()):
|
|
90
93
|
if verbose:
|
csv_detective/validate.py
CHANGED
|
@@ -5,11 +5,15 @@ import pandas as pd
|
|
|
5
5
|
|
|
6
6
|
from csv_detective.load_tests import return_all_tests
|
|
7
7
|
from csv_detective.parsing.load import load_file
|
|
8
|
+
from csv_detective.parsing.columns import test_col_val
|
|
8
9
|
|
|
9
10
|
logging.basicConfig(level=logging.INFO)
|
|
10
11
|
|
|
11
12
|
tests = {
|
|
12
|
-
t.__name__.split(".")[-1]:
|
|
13
|
+
t.__name__.split(".")[-1]: {
|
|
14
|
+
"func": t._is,
|
|
15
|
+
"prop": t.PROPORTION,
|
|
16
|
+
}
|
|
13
17
|
for t in return_all_tests("ALL", "detect_fields")
|
|
14
18
|
}
|
|
15
19
|
|
|
@@ -56,11 +60,13 @@ def validate(
|
|
|
56
60
|
if args["format"] == "string":
|
|
57
61
|
# no test for columns that have not been recognized as a specific format
|
|
58
62
|
continue
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
63
|
+
test_result: float = test_col_val(
|
|
64
|
+
serie=table[col_name],
|
|
65
|
+
test_func=tests[args["format"]]["func"],
|
|
66
|
+
proportion=tests[args["format"]]["prop"],
|
|
67
|
+
skipna=skipna,
|
|
68
|
+
)
|
|
69
|
+
if not bool(test_result):
|
|
64
70
|
if verbose:
|
|
65
71
|
logging.warning("> Test failed, proceeding with full analysis")
|
|
66
72
|
return False, table, analysis
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
- For big files, analyse on sample then validate on whole file [#124](https://github.com/datagouv/csv-detective/pull/124) [#129](https://github.com/datagouv/csv-detective/pull/129)
|
|
9
9
|
- Fix imports [#125](https://github.com/datagouv/csv-detective/pull/125) [#126](https://github.com/datagouv/csv-detective/pull/126) [#127](https://github.com/datagouv/csv-detective/pull/127) [#128](https://github.com/datagouv/csv-detective/pull/128)
|
|
10
10
|
- Split aware and naive datetimes for hydra to cast them separately [#130](https://github.com/datagouv/csv-detective/pull/130)
|
|
11
|
+
- Validate using the testing function, to consider PROPORTIONS [#131](https://github.com/datagouv/csv-detective/pull/131)
|
|
11
12
|
|
|
12
13
|
## 0.8.0 (2025-05-20)
|
|
13
14
|
|
|
@@ -4,7 +4,7 @@ csv_detective/explore_csv.py,sha256=VEeAJaz3FPOmGmQ-Yuf3FuSRRPULM03FrTf3qwZX52s,
|
|
|
4
4
|
csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2385
|
|
5
5
|
csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
|
|
6
6
|
csv_detective/utils.py,sha256=-tIs9yV7RJPGj65lQ7LjRGch6Iws9UeuIPQsd2uUUJM,1025
|
|
7
|
-
csv_detective/validate.py,sha256=
|
|
7
|
+
csv_detective/validate.py,sha256=5Li_vfvU9wdfoZjNjef-MBUoKcKoJ-c7381QoX9aDXY,2818
|
|
8
8
|
csv_detective/detect_fields/__init__.py,sha256=HYSy0P_aH6R8Z8Hvd8aMaBAQaZ1QwcsWHT0YPm0iYs0,998
|
|
9
9
|
csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -144,16 +144,16 @@ csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz
|
|
|
144
144
|
csv_detective/output/schema.py,sha256=WxgajFuLfUTQQtmEdlO8ve2ULDzw2BYfz8QFwUsdDh0,13558
|
|
145
145
|
csv_detective/output/utils.py,sha256=qFYhxJmkKrTUefdH7Owh-liZijswomCafic4cXYSyCg,2506
|
|
146
146
|
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
|
-
csv_detective/parsing/columns.py,sha256=
|
|
147
|
+
csv_detective/parsing/columns.py,sha256=rLzAU36cHMpVynEPhj8uMdr3IRO3_Yq58Yw7Z6oLPiQ,5693
|
|
148
148
|
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
149
149
|
csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,1626
|
|
150
150
|
csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
|
|
151
151
|
csv_detective/parsing/load.py,sha256=u6fbGFZsL2GwPQRzhAXgt32JpUur7vbQdErREHxNJ-w,3661
|
|
152
152
|
csv_detective/parsing/text.py,sha256=_TprGi0gHZlRsafizI3dqQhBehZW4BazqxmypMcAZ-o,1824
|
|
153
|
-
csv_detective-0.8.1.
|
|
154
|
-
csv_detective-0.8.1.
|
|
155
|
-
csv_detective-0.8.1.
|
|
156
|
-
csv_detective-0.8.1.
|
|
153
|
+
csv_detective-0.8.1.dev1535.data/data/share/csv_detective/CHANGELOG.md,sha256=ECVXB1NkkzReQ4Bt_Bc3GrmGy3aMTSiP6VLxNlpAiN4,9283
|
|
154
|
+
csv_detective-0.8.1.dev1535.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
155
|
+
csv_detective-0.8.1.dev1535.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
|
|
156
|
+
csv_detective-0.8.1.dev1535.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
157
157
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
158
158
|
tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
|
|
159
159
|
tests/test_fields.py,sha256=zeEQbHs0ougLzydmZLZs1l2UdrhKBEtdCCK64B4dhSU,10700
|
|
@@ -161,8 +161,8 @@ tests/test_file.py,sha256=0bHV9wx9mSRoav_DVF19g694yohb1p0bw7rtcBeKG-8,8451
|
|
|
161
161
|
tests/test_labels.py,sha256=Nkr645bUewrj8hjNDKr67FQ6Sy_TID6f3E5Kfkl231M,464
|
|
162
162
|
tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
|
|
163
163
|
tests/test_validation.py,sha256=CTGonR6htxcWF9WH8MxumDD8cF45Y-G4hm94SM4lFjU,3246
|
|
164
|
-
csv_detective-0.8.1.
|
|
165
|
-
csv_detective-0.8.1.
|
|
166
|
-
csv_detective-0.8.1.
|
|
167
|
-
csv_detective-0.8.1.
|
|
168
|
-
csv_detective-0.8.1.
|
|
164
|
+
csv_detective-0.8.1.dev1535.dist-info/METADATA,sha256=MlEo_TETeNCT0LVS92Uu6f0W5BRMT70GqYXh2fhE12U,10443
|
|
165
|
+
csv_detective-0.8.1.dev1535.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
166
|
+
csv_detective-0.8.1.dev1535.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
167
|
+
csv_detective-0.8.1.dev1535.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
|
|
168
|
+
csv_detective-0.8.1.dev1535.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1535.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1535.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{csv_detective-0.8.1.dev1526.dist-info → csv_detective-0.8.1.dev1535.dist-info}/top_level.txt
RENAMED
|
File without changes
|