csv-detective 0.7.5.dev1277__py3-none-any.whl → 0.7.5.dev1298__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. csv_detective/__init__.py +1 -1
  2. csv_detective/detect_fields/__init__.py +6 -4
  3. csv_detective/detect_fields/geo/latlon_wgs/__init__.py +7 -7
  4. csv_detective/detect_fields/other/float/__init__.py +4 -4
  5. csv_detective/detect_fields/other/money/__init__.py +11 -0
  6. csv_detective/detect_fields/other/percent/__init__.py +9 -0
  7. csv_detective/detection/formats.py +145 -0
  8. csv_detective/explore_csv.py +94 -222
  9. csv_detective/load_tests.py +62 -0
  10. csv_detective/output/__init__.py +64 -0
  11. csv_detective/output/dataframe.py +0 -0
  12. csv_detective/output/example.py +77 -77
  13. csv_detective/output/profile.py +0 -0
  14. csv_detective/output/schema.py +0 -0
  15. csv_detective/output/utils.py +0 -0
  16. csv_detective/utils.py +2 -0
  17. csv_detective/validate.py +70 -0
  18. {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/CHANGELOG.md +2 -0
  19. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/METADATA +1 -1
  20. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/RECORD +27 -20
  21. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/WHEEL +1 -1
  22. tests/test_example.py +10 -10
  23. tests/test_fields.py +270 -415
  24. tests/test_file.py +19 -9
  25. tests/test_structure.py +6 -0
  26. tests/test_validation.py +18 -0
  27. {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
  28. {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/README.md +0 -0
  29. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/entry_points.txt +0 -0
  30. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/licenses/LICENSE.AGPL.txt +0 -0
  31. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/top_level.txt +0 -0
tests/test_file.py CHANGED
@@ -28,7 +28,7 @@ def test_columns_output_on_file():
28
28
  "STRUCTURED_INFO",
29
29
  "GEO_INFO",
30
30
  ]
31
- assert output["total_lines"] == 414
31
+ assert output["total_lines"] == 404
32
32
  assert output["nb_duplicates"] == 7
33
33
  assert output["columns"]["NOMCOM"]["format"] == "commune"
34
34
  assert output["columns"]["NOMDEP"]["format"] == "departement"
@@ -48,7 +48,7 @@ def test_profile_output_on_file():
48
48
  )
49
49
  assert all(
50
50
  [
51
- c in list(output["profile"]["NUMCOM"].keys())
51
+ c in list(output["profile"]["TXCOUVGLO_COM_2014"].keys())
52
52
  for c in [
53
53
  "min",
54
54
  "max",
@@ -60,12 +60,22 @@ def test_profile_output_on_file():
60
60
  ]
61
61
  ]
62
62
  )
63
- assert len(output["profile"]["NOMCOM"].keys()) == 3
64
- assert output["profile"]["NUMCOM"]["min"] == 1001
65
- assert output["profile"]["NUMCOM"]["max"] == 6125
66
- assert round(output["profile"]["NUMCOM"]["mean"]) == 1245
67
- assert round(output["profile"]["NUMCOM"]["std"]) == 363
68
- assert output["profile"]["TXCOUVGLO_COM_2014"]["nb_distinct"] == 296
63
+ assert not any(
64
+ [
65
+ c in list(output["profile"]["NUMCOM"].keys())
66
+ for c in [
67
+ "min",
68
+ "max",
69
+ "mean",
70
+ "std",
71
+ ]
72
+ ]
73
+ )
74
+ assert output["profile"]["TXCOUVGLO_COM_2014"]["min"] == 0.0
75
+ assert output["profile"]["TXCOUVGLO_COM_2014"]["max"] == 200.2
76
+ assert round(output["profile"]["TXCOUVGLO_COM_2014"]["mean"]) == 60
77
+ assert round(output["profile"]["TXCOUVGLO_COM_2014"]["std"]) == 36
78
+ assert output["profile"]["TXCOUVGLO_COM_2014"]["nb_distinct"] == 290
69
79
  assert output["profile"]["TXCOUVGLO_COM_2014"]["nb_missing_values"] == 3
70
80
  assert output["profile"]["GEO_INFO"]["nb_distinct"] == 1
71
81
 
@@ -175,7 +185,7 @@ def mocked_responses():
175
185
  "params",
176
186
  # ideally we'd like to do the same with params_others but pandas.read_excel uses urllib
177
187
  # which doesn't support the way we mock the response, TBC
178
- params_csv + [("a_test_file.csv", {"separator": ";", "header_row_idx": 2, "total_lines": 414})]
188
+ params_csv + [("a_test_file.csv", {"separator": ";", "header_row_idx": 2, "total_lines": 404})]
179
189
  )
180
190
  def test_urls(mocked_responses, params):
181
191
  file_name, checks = params
tests/test_structure.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  # flake8: noqa
3
3
  from csv_detective import detect_fields, detect_labels
4
+ from csv_detective.load_tests import return_all_tests
4
5
 
5
6
 
6
7
  def tests_conformity():
@@ -29,3 +30,8 @@ def tests_conformity():
29
30
  .replace("/", ".")
30
31
  )
31
32
  assert "_is" in dir(_package)
33
+
34
+
35
+ def test_all_tests_have_unique_name():
36
+ names = [t.__name__.split(".")[-1] for t in return_all_tests("ALL", "detect_fields")]
37
+ assert len(names) == len(set(names))
@@ -0,0 +1,18 @@
1
+ import json
2
+
3
+ import pandas as pd
4
+
5
+ from csv_detective.validate import validate
6
+
7
+
8
+ def test_validation():
9
+ with open("tests/data/a_test_file.json", "r") as f:
10
+ previous_analysis = json.load(f)
11
+ is_valid, table, analysis = validate(
12
+ "tests/data/a_test_file.csv",
13
+ previous_analysis=previous_analysis,
14
+ num_rows=-1,
15
+ )
16
+ assert is_valid is True
17
+ assert isinstance(table, pd.DataFrame)
18
+ assert isinstance(analysis, dict)