csv-detective 0.7.5.dev1307__py3-none-any.whl → 0.7.5.dev1330__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. csv_detective/detect_fields/FR/other/code_import/__init__.py +9 -0
  2. csv_detective/detect_fields/FR/other/code_waldec/__init__.py +2 -5
  3. csv_detective/detect_fields/__init__.py +1 -0
  4. csv_detective/detect_labels/FR/geo/adresse/__init__.py +2 -2
  5. csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +3 -3
  6. csv_detective/detect_labels/FR/geo/code_departement/__init__.py +2 -2
  7. csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +2 -2
  8. csv_detective/detect_labels/FR/geo/code_postal/__init__.py +2 -2
  9. csv_detective/detect_labels/FR/geo/code_region/__init__.py +2 -2
  10. csv_detective/detect_labels/FR/geo/commune/__init__.py +2 -2
  11. csv_detective/detect_labels/FR/geo/departement/__init__.py +2 -2
  12. csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +2 -2
  13. csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +2 -2
  14. csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +2 -2
  15. csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +2 -2
  16. csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +2 -2
  17. csv_detective/detect_labels/FR/geo/pays/__init__.py +2 -2
  18. csv_detective/detect_labels/FR/geo/region/__init__.py +2 -2
  19. csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +2 -2
  20. csv_detective/detect_labels/FR/other/code_rna/__init__.py +2 -2
  21. csv_detective/detect_labels/FR/other/code_waldec/__init__.py +2 -2
  22. csv_detective/detect_labels/FR/other/csp_insee/__init__.py +2 -2
  23. csv_detective/detect_labels/FR/other/date_fr/__init__.py +2 -2
  24. csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +2 -2
  25. csv_detective/detect_labels/FR/other/sexe/__init__.py +2 -2
  26. csv_detective/detect_labels/FR/other/siren/__init__.py +2 -2
  27. csv_detective/detect_labels/FR/other/siret/__init__.py +2 -2
  28. csv_detective/detect_labels/FR/other/tel_fr/__init__.py +2 -2
  29. csv_detective/detect_labels/FR/other/uai/__init__.py +2 -2
  30. csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +2 -2
  31. csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +2 -2
  32. csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +2 -2
  33. csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +2 -2
  34. csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +2 -2
  35. csv_detective/detect_labels/geo/json_geojson/__init__.py +2 -2
  36. csv_detective/detect_labels/geo/latitude_wgs/__init__.py +2 -2
  37. csv_detective/detect_labels/geo/latlon_wgs/__init__.py +2 -2
  38. csv_detective/detect_labels/geo/longitude_wgs/__init__.py +2 -2
  39. csv_detective/detect_labels/other/booleen/__init__.py +2 -2
  40. csv_detective/detect_labels/other/email/__init__.py +2 -2
  41. csv_detective/detect_labels/other/float/__init__.py +2 -2
  42. csv_detective/detect_labels/other/int/__init__.py +2 -2
  43. csv_detective/detect_labels/other/mongo_object_id/__init__.py +2 -2
  44. csv_detective/detect_labels/other/twitter/__init__.py +2 -2
  45. csv_detective/detect_labels/other/url/__init__.py +2 -2
  46. csv_detective/detect_labels/other/uuid/__init__.py +2 -2
  47. csv_detective/detect_labels/temp/date/__init__.py +2 -2
  48. csv_detective/detect_labels/temp/datetime_iso/__init__.py +2 -2
  49. csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +2 -2
  50. csv_detective/detect_labels/temp/year/__init__.py +2 -2
  51. csv_detective/explore_csv.py +12 -7
  52. csv_detective/output/__init__.py +2 -2
  53. csv_detective/output/example.py +1 -1
  54. csv_detective/parsing/columns.py +0 -2
  55. csv_detective/parsing/load.py +4 -4
  56. csv_detective/utils.py +2 -7
  57. csv_detective/validate.py +21 -16
  58. {csv_detective-0.7.5.dev1307.data → csv_detective-0.7.5.dev1330.data}/data/share/csv_detective/CHANGELOG.md +1 -0
  59. {csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/METADATA +1 -1
  60. {csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/RECORD +68 -67
  61. {csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/WHEEL +1 -1
  62. tests/test_fields.py +6 -1
  63. tests/test_validation.py +85 -4
  64. {csv_detective-0.7.5.dev1307.data → csv_detective-0.7.5.dev1330.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
  65. {csv_detective-0.7.5.dev1307.data → csv_detective-0.7.5.dev1330.data}/data/share/csv_detective/README.md +0 -0
  66. {csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/entry_points.txt +0 -0
  67. {csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/licenses/LICENSE.AGPL.txt +0 -0
  68. {csv_detective-0.7.5.dev1307.dist-info → csv_detective-0.7.5.dev1330.dist-info}/top_level.txt +0 -0
tests/test_validation.py CHANGED
@@ -1,18 +1,99 @@
1
1
  import json
2
2
 
3
3
  import pandas as pd
4
+ import pytest
4
5
 
6
+ from csv_detective.explore_csv import validate_then_detect
5
7
  from csv_detective.validate import validate
6
8
 
7
9
 
8
- def test_validation():
10
+ def set_nested_value(source_dict: dict, key_chain: list[str], value):
11
+ current_dict = source_dict
12
+ for key in key_chain[:-1]:
13
+ if key not in current_dict:
14
+ current_dict[key] = {}
15
+ current_dict = current_dict[key]
16
+ current_dict[key_chain[-1]] = value
17
+
18
+
19
+ def get_nested_value(source_dict: dict, key_chain: list[str]):
20
+ result = source_dict
21
+ for k in key_chain:
22
+ result = result[k]
23
+ return result
24
+
25
+
26
+ @pytest.mark.parametrize(
27
+ "_params",
28
+ (
29
+ ((True, pd.DataFrame, dict), {}),
30
+ ((False, None, None), {"separator": "|"}),
31
+ ((False, None, None), {"encoding": "unknown"}),
32
+ ((False, None, None), {"header": ["a", "b"]}),
33
+ ((False, pd.DataFrame, dict), {
34
+ "columns.NUMCOM": {
35
+ "python_type": "int",
36
+ "format": "int",
37
+ "score": 1.0,
38
+ },
39
+ }),
40
+ ),
41
+ )
42
+ def test_validation(_params):
43
+ (should_be_valid, table_type, analysis_type), modif_previous_analysis = _params
9
44
  with open("tests/data/a_test_file.json", "r") as f:
10
45
  previous_analysis = json.load(f)
46
+ for dotkey in modif_previous_analysis:
47
+ keys = dotkey.split(".")
48
+ set_nested_value(previous_analysis, keys, modif_previous_analysis[dotkey])
11
49
  is_valid, table, analysis = validate(
12
50
  "tests/data/a_test_file.csv",
13
51
  previous_analysis=previous_analysis,
14
52
  num_rows=-1,
53
+ sep=previous_analysis.get("separator"),
54
+ encoding=previous_analysis.get("encoding"),
55
+ )
56
+ assert is_valid == should_be_valid
57
+ if table_type is None:
58
+ assert table is None
59
+ else:
60
+ assert isinstance(table, table_type)
61
+ if analysis_type is None:
62
+ assert analysis is None
63
+ else:
64
+ assert isinstance(analysis, analysis_type)
65
+
66
+
67
+ @pytest.mark.parametrize(
68
+ "modif_previous_analysis",
69
+ (
70
+ {"separator": "|"},
71
+ {"encoding": "unknown"},
72
+ {"header": ["a", "b"]},
73
+ {
74
+ "columns.NUMCOM": {
75
+ "python_type": "int",
76
+ "format": "int",
77
+ "score": 1.0,
78
+ },
79
+ },
80
+ ),
81
+ )
82
+ def test_validate_then_detect(modif_previous_analysis):
83
+ with open("tests/data/a_test_file.json", "r") as f:
84
+ previous_analysis = json.load(f)
85
+ valid_values = {}
86
+ for dotkey in modif_previous_analysis:
87
+ keys = dotkey.split(".")
88
+ valid_values[dotkey] = get_nested_value(previous_analysis, keys)
89
+ set_nested_value(previous_analysis, keys, modif_previous_analysis[dotkey])
90
+ analysis = validate_then_detect(
91
+ "tests/data/a_test_file.csv",
92
+ previous_analysis=previous_analysis,
93
+ num_rows=-1,
94
+ output_profile=True,
95
+ save_results=False,
15
96
  )
16
- assert is_valid is True
17
- assert isinstance(table, pd.DataFrame)
18
- assert isinstance(analysis, dict)
97
+ # checking that if not valid, the analysis has managed to retrieve the right values
98
+ for dotkey in modif_previous_analysis:
99
+ assert get_nested_value(analysis, dotkey.split(".")) == valid_values[dotkey]