csv-detective 0.7.5.dev1277__py3-none-any.whl → 0.7.5.dev1298__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/__init__.py +1 -1
- csv_detective/detect_fields/__init__.py +6 -4
- csv_detective/detect_fields/geo/latlon_wgs/__init__.py +7 -7
- csv_detective/detect_fields/other/float/__init__.py +4 -4
- csv_detective/detect_fields/other/money/__init__.py +11 -0
- csv_detective/detect_fields/other/percent/__init__.py +9 -0
- csv_detective/detection/formats.py +145 -0
- csv_detective/explore_csv.py +94 -222
- csv_detective/load_tests.py +62 -0
- csv_detective/output/__init__.py +64 -0
- csv_detective/output/dataframe.py +0 -0
- csv_detective/output/example.py +77 -77
- csv_detective/output/profile.py +0 -0
- csv_detective/output/schema.py +0 -0
- csv_detective/output/utils.py +0 -0
- csv_detective/utils.py +2 -0
- csv_detective/validate.py +70 -0
- {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/CHANGELOG.md +2 -0
- {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/METADATA +1 -1
- {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/RECORD +27 -20
- {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/WHEEL +1 -1
- tests/test_example.py +10 -10
- tests/test_fields.py +270 -415
- tests/test_file.py +19 -9
- tests/test_structure.py +6 -0
- tests/test_validation.py +18 -0
- {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/licenses/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/top_level.txt +0 -0
tests/test_file.py
CHANGED
|
@@ -28,7 +28,7 @@ def test_columns_output_on_file():
|
|
|
28
28
|
"STRUCTURED_INFO",
|
|
29
29
|
"GEO_INFO",
|
|
30
30
|
]
|
|
31
|
-
assert output["total_lines"] ==
|
|
31
|
+
assert output["total_lines"] == 404
|
|
32
32
|
assert output["nb_duplicates"] == 7
|
|
33
33
|
assert output["columns"]["NOMCOM"]["format"] == "commune"
|
|
34
34
|
assert output["columns"]["NOMDEP"]["format"] == "departement"
|
|
@@ -48,7 +48,7 @@ def test_profile_output_on_file():
|
|
|
48
48
|
)
|
|
49
49
|
assert all(
|
|
50
50
|
[
|
|
51
|
-
c in list(output["profile"]["
|
|
51
|
+
c in list(output["profile"]["TXCOUVGLO_COM_2014"].keys())
|
|
52
52
|
for c in [
|
|
53
53
|
"min",
|
|
54
54
|
"max",
|
|
@@ -60,12 +60,22 @@ def test_profile_output_on_file():
|
|
|
60
60
|
]
|
|
61
61
|
]
|
|
62
62
|
)
|
|
63
|
-
assert
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
63
|
+
assert not any(
|
|
64
|
+
[
|
|
65
|
+
c in list(output["profile"]["NUMCOM"].keys())
|
|
66
|
+
for c in [
|
|
67
|
+
"min",
|
|
68
|
+
"max",
|
|
69
|
+
"mean",
|
|
70
|
+
"std",
|
|
71
|
+
]
|
|
72
|
+
]
|
|
73
|
+
)
|
|
74
|
+
assert output["profile"]["TXCOUVGLO_COM_2014"]["min"] == 0.0
|
|
75
|
+
assert output["profile"]["TXCOUVGLO_COM_2014"]["max"] == 200.2
|
|
76
|
+
assert round(output["profile"]["TXCOUVGLO_COM_2014"]["mean"]) == 60
|
|
77
|
+
assert round(output["profile"]["TXCOUVGLO_COM_2014"]["std"]) == 36
|
|
78
|
+
assert output["profile"]["TXCOUVGLO_COM_2014"]["nb_distinct"] == 290
|
|
69
79
|
assert output["profile"]["TXCOUVGLO_COM_2014"]["nb_missing_values"] == 3
|
|
70
80
|
assert output["profile"]["GEO_INFO"]["nb_distinct"] == 1
|
|
71
81
|
|
|
@@ -175,7 +185,7 @@ def mocked_responses():
|
|
|
175
185
|
"params",
|
|
176
186
|
# ideally we'd like to do the same with params_others but pandas.read_excel uses urllib
|
|
177
187
|
# which doesn't support the way we mock the response, TBC
|
|
178
|
-
params_csv + [("a_test_file.csv", {"separator": ";", "header_row_idx": 2, "total_lines":
|
|
188
|
+
params_csv + [("a_test_file.csv", {"separator": ";", "header_row_idx": 2, "total_lines": 404})]
|
|
179
189
|
)
|
|
180
190
|
def test_urls(mocked_responses, params):
|
|
181
191
|
file_name, checks = params
|
tests/test_structure.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
# flake8: noqa
|
|
3
3
|
from csv_detective import detect_fields, detect_labels
|
|
4
|
+
from csv_detective.load_tests import return_all_tests
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def tests_conformity():
|
|
@@ -29,3 +30,8 @@ def tests_conformity():
|
|
|
29
30
|
.replace("/", ".")
|
|
30
31
|
)
|
|
31
32
|
assert "_is" in dir(_package)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_all_tests_have_unique_name():
|
|
36
|
+
names = [t.__name__.split(".")[-1] for t in return_all_tests("ALL", "detect_fields")]
|
|
37
|
+
assert len(names) == len(set(names))
|
tests/test_validation.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from csv_detective.validate import validate
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_validation():
|
|
9
|
+
with open("tests/data/a_test_file.json", "r") as f:
|
|
10
|
+
previous_analysis = json.load(f)
|
|
11
|
+
is_valid, table, analysis = validate(
|
|
12
|
+
"tests/data/a_test_file.csv",
|
|
13
|
+
previous_analysis=previous_analysis,
|
|
14
|
+
num_rows=-1,
|
|
15
|
+
)
|
|
16
|
+
assert is_valid is True
|
|
17
|
+
assert isinstance(table, pd.DataFrame)
|
|
18
|
+
assert isinstance(analysis, dict)
|
|
File without changes
|
|
File without changes
|
{csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
{csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/top_level.txt
RENAMED
|
File without changes
|