csv-detective 0.7.5.dev1307__py3-none-any.whl → 0.7.5.dev1330__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detect_fields/FR/other/code_import/__init__.py +9 -0
- csv_detective/detect_fields/FR/other/code_waldec/__init__.py +2 -5
- csv_detective/detect_fields/__init__.py +1 -0
- csv_detective/detect_labels/FR/geo/adresse/__init__.py +2 -2
- csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +3 -3
- csv_detective/detect_labels/FR/geo/code_departement/__init__.py +2 -2
- csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +2 -2
- csv_detective/detect_labels/FR/geo/code_postal/__init__.py +2 -2
- csv_detective/detect_labels/FR/geo/code_region/__init__.py +2 -2
- csv_detective/detect_labels/FR/geo/commune/__init__.py +2 -2
- csv_detective/detect_labels/FR/geo/departement/__init__.py +2 -2
- csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +2 -2
- csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +2 -2
- csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +2 -2
- csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +2 -2
- csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +2 -2
- csv_detective/detect_labels/FR/geo/pays/__init__.py +2 -2
- csv_detective/detect_labels/FR/geo/region/__init__.py +2 -2
- csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +2 -2
- csv_detective/detect_labels/FR/other/code_rna/__init__.py +2 -2
- csv_detective/detect_labels/FR/other/code_waldec/__init__.py +2 -2
- csv_detective/detect_labels/FR/other/csp_insee/__init__.py +2 -2
- csv_detective/detect_labels/FR/other/date_fr/__init__.py +2 -2
- csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +2 -2
- csv_detective/detect_labels/FR/other/sexe/__init__.py +2 -2
- csv_detective/detect_labels/FR/other/siren/__init__.py +2 -2
- csv_detective/detect_labels/FR/other/siret/__init__.py +2 -2
- csv_detective/detect_labels/FR/other/tel_fr/__init__.py +2 -2
- csv_detective/detect_labels/FR/other/uai/__init__.py +2 -2
- csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +2 -2
- csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +2 -2
- csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +2 -2
- csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +2 -2
- csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +2 -2
- csv_detective/detect_labels/geo/json_geojson/__init__.py +2 -2
- csv_detective/detect_labels/geo/latitude_wgs/__init__.py +2 -2
- csv_detective/detect_labels/geo/latlon_wgs/__init__.py +2 -2
- csv_detective/detect_labels/geo/longitude_wgs/__init__.py +2 -2
- csv_detective/detect_labels/other/booleen/__init__.py +2 -2
- csv_detective/detect_labels/other/email/__init__.py +2 -2
- csv_detective/detect_labels/other/float/__init__.py +2 -2
- csv_detective/detect_labels/other/int/__init__.py +2 -2
- csv_detective/detect_labels/other/mongo_object_id/__init__.py +2 -2
- csv_detective/detect_labels/other/twitter/__init__.py +2 -2
- csv_detective/detect_labels/other/url/__init__.py +2 -2
- csv_detective/detect_labels/other/uuid/__init__.py +2 -2
- csv_detective/detect_labels/temp/date/__init__.py +2 -2
- csv_detective/detect_labels/temp/datetime_iso/__init__.py +2 -2
- csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +2 -2
- csv_detective/detect_labels/temp/year/__init__.py +2 -2
- csv_detective/explore_csv.py +12 -7
- csv_detective/output/__init__.py +2 -2
- csv_detective/output/example.py +1 -1
- csv_detective/parsing/columns.py +0 -2
- csv_detective/parsing/load.py +4 -4
- csv_detective/utils.py +2 -7
- csv_detective/validate.py +21 -16
- {csv_detective-0.7.5.dev1307.data → csv_detective-0.7.5.dev1330.data}/data/share/csv_detective/CHANGELOG.md +1 -0
- {csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/METADATA +1 -1
- {csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/RECORD +68 -67
- {csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/WHEEL +1 -1
- tests/test_fields.py +6 -1
- tests/test_validation.py +85 -4
- {csv_detective-0.7.5.dev1307.data → csv_detective-0.7.5.dev1330.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1307.data → csv_detective-0.7.5.dev1330.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/licenses/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/top_level.txt +0 -0
tests/test_validation.py
CHANGED
|
@@ -1,18 +1,99 @@
|
|
|
1
1
|
import json
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
|
+
import pytest
|
|
4
5
|
|
|
6
|
+
from csv_detective.explore_csv import validate_then_detect
|
|
5
7
|
from csv_detective.validate import validate
|
|
6
8
|
|
|
7
9
|
|
|
8
|
-
def
|
|
10
|
+
def set_nested_value(source_dict: dict, key_chain: list[str], value):
|
|
11
|
+
current_dict = source_dict
|
|
12
|
+
for key in key_chain[:-1]:
|
|
13
|
+
if key not in current_dict:
|
|
14
|
+
current_dict[key] = {}
|
|
15
|
+
current_dict = current_dict[key]
|
|
16
|
+
current_dict[key_chain[-1]] = value
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_nested_value(source_dict: dict, key_chain: list[str]):
|
|
20
|
+
result = source_dict
|
|
21
|
+
for k in key_chain:
|
|
22
|
+
result = result[k]
|
|
23
|
+
return result
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@pytest.mark.parametrize(
|
|
27
|
+
"_params",
|
|
28
|
+
(
|
|
29
|
+
((True, pd.DataFrame, dict), {}),
|
|
30
|
+
((False, None, None), {"separator": "|"}),
|
|
31
|
+
((False, None, None), {"encoding": "unknown"}),
|
|
32
|
+
((False, None, None), {"header": ["a", "b"]}),
|
|
33
|
+
((False, pd.DataFrame, dict), {
|
|
34
|
+
"columns.NUMCOM": {
|
|
35
|
+
"python_type": "int",
|
|
36
|
+
"format": "int",
|
|
37
|
+
"score": 1.0,
|
|
38
|
+
},
|
|
39
|
+
}),
|
|
40
|
+
),
|
|
41
|
+
)
|
|
42
|
+
def test_validation(_params):
|
|
43
|
+
(should_be_valid, table_type, analysis_type), modif_previous_analysis = _params
|
|
9
44
|
with open("tests/data/a_test_file.json", "r") as f:
|
|
10
45
|
previous_analysis = json.load(f)
|
|
46
|
+
for dotkey in modif_previous_analysis:
|
|
47
|
+
keys = dotkey.split(".")
|
|
48
|
+
set_nested_value(previous_analysis, keys, modif_previous_analysis[dotkey])
|
|
11
49
|
is_valid, table, analysis = validate(
|
|
12
50
|
"tests/data/a_test_file.csv",
|
|
13
51
|
previous_analysis=previous_analysis,
|
|
14
52
|
num_rows=-1,
|
|
53
|
+
sep=previous_analysis.get("separator"),
|
|
54
|
+
encoding=previous_analysis.get("encoding"),
|
|
55
|
+
)
|
|
56
|
+
assert is_valid == should_be_valid
|
|
57
|
+
if table_type is None:
|
|
58
|
+
assert table is None
|
|
59
|
+
else:
|
|
60
|
+
assert isinstance(table, table_type)
|
|
61
|
+
if analysis_type is None:
|
|
62
|
+
assert analysis is None
|
|
63
|
+
else:
|
|
64
|
+
assert isinstance(analysis, analysis_type)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@pytest.mark.parametrize(
|
|
68
|
+
"modif_previous_analysis",
|
|
69
|
+
(
|
|
70
|
+
{"separator": "|"},
|
|
71
|
+
{"encoding": "unknown"},
|
|
72
|
+
{"header": ["a", "b"]},
|
|
73
|
+
{
|
|
74
|
+
"columns.NUMCOM": {
|
|
75
|
+
"python_type": "int",
|
|
76
|
+
"format": "int",
|
|
77
|
+
"score": 1.0,
|
|
78
|
+
},
|
|
79
|
+
},
|
|
80
|
+
),
|
|
81
|
+
)
|
|
82
|
+
def test_validate_then_detect(modif_previous_analysis):
|
|
83
|
+
with open("tests/data/a_test_file.json", "r") as f:
|
|
84
|
+
previous_analysis = json.load(f)
|
|
85
|
+
valid_values = {}
|
|
86
|
+
for dotkey in modif_previous_analysis:
|
|
87
|
+
keys = dotkey.split(".")
|
|
88
|
+
valid_values[dotkey] = get_nested_value(previous_analysis, keys)
|
|
89
|
+
set_nested_value(previous_analysis, keys, modif_previous_analysis[dotkey])
|
|
90
|
+
analysis = validate_then_detect(
|
|
91
|
+
"tests/data/a_test_file.csv",
|
|
92
|
+
previous_analysis=previous_analysis,
|
|
93
|
+
num_rows=-1,
|
|
94
|
+
output_profile=True,
|
|
95
|
+
save_results=False,
|
|
15
96
|
)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
97
|
+
# checking that if not valid, the analysis has managed to retrieve the right values
|
|
98
|
+
for dotkey in modif_previous_analysis:
|
|
99
|
+
assert get_nested_value(analysis, dotkey.split(".")) == valid_values[dotkey]
|
|
File without changes
|
|
File without changes
|
{csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
{csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/top_level.txt
RENAMED
|
File without changes
|