csv-detective 0.7.5.dev1320__py3-none-any.whl → 0.7.5.dev1335__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. csv_detective/detect_labels/FR/geo/adresse/__init__.py +2 -2
  2. csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +3 -3
  3. csv_detective/detect_labels/FR/geo/code_departement/__init__.py +2 -2
  4. csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +2 -2
  5. csv_detective/detect_labels/FR/geo/code_postal/__init__.py +2 -2
  6. csv_detective/detect_labels/FR/geo/code_region/__init__.py +2 -2
  7. csv_detective/detect_labels/FR/geo/commune/__init__.py +2 -2
  8. csv_detective/detect_labels/FR/geo/departement/__init__.py +2 -2
  9. csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +2 -2
  10. csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +2 -2
  11. csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +2 -2
  12. csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +2 -2
  13. csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +2 -2
  14. csv_detective/detect_labels/FR/geo/pays/__init__.py +2 -2
  15. csv_detective/detect_labels/FR/geo/region/__init__.py +2 -2
  16. csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +2 -2
  17. csv_detective/detect_labels/FR/other/code_rna/__init__.py +2 -2
  18. csv_detective/detect_labels/FR/other/code_waldec/__init__.py +2 -2
  19. csv_detective/detect_labels/FR/other/csp_insee/__init__.py +2 -2
  20. csv_detective/detect_labels/FR/other/date_fr/__init__.py +2 -2
  21. csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +2 -2
  22. csv_detective/detect_labels/FR/other/sexe/__init__.py +2 -2
  23. csv_detective/detect_labels/FR/other/siren/__init__.py +2 -2
  24. csv_detective/detect_labels/FR/other/siret/__init__.py +2 -2
  25. csv_detective/detect_labels/FR/other/tel_fr/__init__.py +2 -2
  26. csv_detective/detect_labels/FR/other/uai/__init__.py +2 -2
  27. csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +2 -2
  28. csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +2 -2
  29. csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +2 -2
  30. csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +2 -2
  31. csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +2 -2
  32. csv_detective/detect_labels/geo/json_geojson/__init__.py +2 -2
  33. csv_detective/detect_labels/geo/latitude_wgs/__init__.py +2 -2
  34. csv_detective/detect_labels/geo/latlon_wgs/__init__.py +2 -2
  35. csv_detective/detect_labels/geo/longitude_wgs/__init__.py +2 -2
  36. csv_detective/detect_labels/other/booleen/__init__.py +2 -2
  37. csv_detective/detect_labels/other/email/__init__.py +2 -2
  38. csv_detective/detect_labels/other/float/__init__.py +2 -2
  39. csv_detective/detect_labels/other/int/__init__.py +2 -2
  40. csv_detective/detect_labels/other/mongo_object_id/__init__.py +2 -2
  41. csv_detective/detect_labels/other/twitter/__init__.py +2 -2
  42. csv_detective/detect_labels/other/url/__init__.py +2 -2
  43. csv_detective/detect_labels/other/uuid/__init__.py +2 -2
  44. csv_detective/detect_labels/temp/date/__init__.py +2 -2
  45. csv_detective/detect_labels/temp/datetime_iso/__init__.py +2 -2
  46. csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +2 -2
  47. csv_detective/detect_labels/temp/year/__init__.py +2 -2
  48. csv_detective/explore_csv.py +12 -7
  49. csv_detective/output/__init__.py +2 -2
  50. csv_detective/output/example.py +1 -1
  51. csv_detective/parsing/columns.py +0 -2
  52. csv_detective/parsing/load.py +4 -4
  53. csv_detective/utils.py +2 -7
  54. csv_detective/validate.py +21 -16
  55. {csv_detective-0.7.5.dev1320.data → csv_detective-0.7.5.dev1335.data}/data/share/csv_detective/CHANGELOG.md +1 -0
  56. {csv_detective-0.7.5.dev1320.dist-info → csv_detective-0.7.5.dev1335.dist-info}/METADATA +1 -1
  57. {csv_detective-0.7.5.dev1320.dist-info → csv_detective-0.7.5.dev1335.dist-info}/RECORD +64 -64
  58. {csv_detective-0.7.5.dev1320.dist-info → csv_detective-0.7.5.dev1335.dist-info}/WHEEL +1 -1
  59. tests/test_validation.py +85 -4
  60. {csv_detective-0.7.5.dev1320.data → csv_detective-0.7.5.dev1335.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
  61. {csv_detective-0.7.5.dev1320.data → csv_detective-0.7.5.dev1335.data}/data/share/csv_detective/README.md +0 -0
  62. {csv_detective-0.7.5.dev1320.dist-info → csv_detective-0.7.5.dev1335.dist-info}/entry_points.txt +0 -0
  63. {csv_detective-0.7.5.dev1320.dist-info → csv_detective-0.7.5.dev1335.dist-info}/licenses/LICENSE.AGPL.txt +0 -0
  64. {csv_detective-0.7.5.dev1320.dist-info → csv_detective-0.7.5.dev1335.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- from csv_detective.utils import full_word_strictly_inside_string
1
+ from csv_detective.utils import is_word_in_string
2
2
  from csv_detective.parsing.text import _process_text
3
3
 
4
4
  PROPORTION = 0.5
@@ -38,7 +38,7 @@ def _is(header):
38
38
  words_combination_in_header = 0.5 * float(
39
39
  any(
40
40
  [
41
- full_word_strictly_inside_string(
41
+ is_word_in_string(
42
42
  words_combination, processed_header
43
43
  ) for words_combination in words_combinations_list
44
44
  ]
@@ -1,4 +1,4 @@
1
- from csv_detective.utils import full_word_strictly_inside_string
1
+ from csv_detective.utils import is_word_in_string
2
2
  from csv_detective.parsing.text import _process_text
3
3
 
4
4
  PROPORTION = 0.5
@@ -23,7 +23,7 @@ def _is(header):
23
23
  words_combination_in_header = 0.5 * float(
24
24
  any(
25
25
  [
26
- full_word_strictly_inside_string(
26
+ is_word_in_string(
27
27
  words_combination, processed_header
28
28
  ) for words_combination in words_combinations_list
29
29
  ]
@@ -1,4 +1,4 @@
1
- from csv_detective.utils import full_word_strictly_inside_string
1
+ from csv_detective.utils import is_word_in_string
2
2
  from csv_detective.parsing.text import _process_text
3
3
 
4
4
  PROPORTION = 0.5
@@ -43,7 +43,7 @@ def _is(header):
43
43
  words_combination_in_header = 0.5 * float(
44
44
  any(
45
45
  [
46
- full_word_strictly_inside_string(
46
+ is_word_in_string(
47
47
  words_combination, processed_header
48
48
  ) for words_combination in words_combinations_list
49
49
  ]
@@ -1,4 +1,4 @@
1
- from csv_detective.utils import full_word_strictly_inside_string
1
+ from csv_detective.utils import is_word_in_string
2
2
  from csv_detective.parsing.text import _process_text
3
3
 
4
4
  PROPORTION = 0.5
@@ -35,7 +35,7 @@ def _is(header):
35
35
  words_combination_in_header = 0.5 * float(
36
36
  any(
37
37
  [
38
- full_word_strictly_inside_string(
38
+ is_word_in_string(
39
39
  words_combination, processed_header
40
40
  ) for words_combination in words_combinations_list
41
41
  ]
@@ -1,4 +1,4 @@
1
- from csv_detective.utils import full_word_strictly_inside_string
1
+ from csv_detective.utils import is_word_in_string
2
2
  from csv_detective.parsing.text import _process_text
3
3
 
4
4
  PROPORTION = 0.5
@@ -34,7 +34,7 @@ def _is(header):
34
34
  words_combination_in_header = 0.5 * float(
35
35
  any(
36
36
  [
37
- full_word_strictly_inside_string(
37
+ is_word_in_string(
38
38
  words_combination, processed_header
39
39
  ) for words_combination in words_combinations_list
40
40
  ]
@@ -1,4 +1,4 @@
1
- from csv_detective.utils import full_word_strictly_inside_string
1
+ from csv_detective.utils import is_word_in_string
2
2
  from csv_detective.parsing.text import _process_text
3
3
 
4
4
  PROPORTION = 0.5
@@ -34,7 +34,7 @@ def _is(header):
34
34
  words_combination_in_header = 0.5 * float(
35
35
  any(
36
36
  [
37
- full_word_strictly_inside_string(
37
+ is_word_in_string(
38
38
  words_combination, processed_header
39
39
  ) for words_combination in words_combinations_list
40
40
  ]
@@ -111,15 +111,12 @@ def validate_then_detect(
111
111
  user_input_tests: Union[str, list[str]] = "ALL",
112
112
  limited_output: bool = True,
113
113
  save_results: Union[bool, str] = True,
114
- encoding: str = None,
115
- sep: str = None,
116
114
  skipna: bool = True,
117
115
  output_profile: bool = False,
118
116
  output_schema: bool = False,
119
117
  output_df: bool = False,
120
118
  cast_json: bool = True,
121
119
  verbose: bool = False,
122
- sheet_name: Union[str, int] = None,
123
120
  ):
124
121
 
125
122
  if verbose:
@@ -131,17 +128,25 @@ def validate_then_detect(
131
128
  file_path=file_path,
132
129
  previous_analysis=previous_analysis,
133
130
  num_rows=num_rows,
134
- encoding=encoding,
135
- sep=sep,
131
+ encoding=previous_analysis.get("encoding"),
132
+ sep=previous_analysis.get("separator"),
133
+ sheet_name=previous_analysis.get("sheet_name"),
136
134
  verbose=verbose,
137
135
  skipna=skipna,
138
- sheet_name=sheet_name,
139
136
  )
140
137
  if is_valid:
141
138
  # skipping formats detection as the validation is successful
142
139
  analysis = previous_analysis
140
+ # profile has to be regenerated, it's independent from analysis
143
141
  del analysis["profile"]
144
142
  else:
143
+ if analysis is None:
144
+ # if loading failed in validate, we load it from scratch
145
+ table, analysis = load_file(
146
+ file_path=file_path,
147
+ num_rows=num_rows,
148
+ verbose=verbose,
149
+ )
145
150
  analysis = detect_formats(
146
151
  table=table,
147
152
  analysis=analysis,
@@ -163,7 +168,7 @@ def validate_then_detect(
163
168
  output_df=output_df,
164
169
  cast_json=cast_json,
165
170
  verbose=verbose,
166
- sheet_name=sheet_name,
171
+ sheet_name=analysis.get("sheet_name"),
167
172
  )
168
173
  finally:
169
174
  if verbose:
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  import os
3
- from typing import Union
3
+ from typing import Optional, Union
4
4
 
5
5
  import pandas as pd
6
6
 
@@ -22,7 +22,7 @@ def generate_output(
22
22
  output_df: bool = False,
23
23
  cast_json: bool = True,
24
24
  verbose: bool = False,
25
- sheet_name: Union[str, int] = None,
25
+ sheet_name: Optional[Union[str, int]] = None,
26
26
  ) -> Union[dict, tuple[dict, pd.DataFrame]]:
27
27
 
28
28
  if output_profile:
@@ -70,7 +70,7 @@ def create_example_csv_file(
70
70
  return str(uuid.uuid4())
71
71
 
72
72
  def _date(
73
- date_range: Union[None, list[str]] = None,
73
+ date_range: Optional[list[str]] = None,
74
74
  format: str = "%Y-%m-%d",
75
75
  required: bool = True,
76
76
  ) -> str:
@@ -76,7 +76,6 @@ def test_col_label(label: str, test_func: Callable, proportion: float = 1, limit
76
76
 
77
77
 
78
78
  def test_col(table: pd.DataFrame, all_tests: list, limited_output: bool, skipna: bool = True, verbose: bool = False):
79
- # Initialising dict for tests
80
79
  if verbose:
81
80
  start = time()
82
81
  logging.info("Testing columns to get types")
@@ -112,7 +111,6 @@ def test_col(table: pd.DataFrame, all_tests: list, limited_output: bool, skipna:
112
111
 
113
112
 
114
113
  def test_label(table: pd.DataFrame, all_tests: list, limited_output: bool, verbose: bool = False):
115
- # Initialising dict for tests
116
114
  if verbose:
117
115
  start = time()
118
116
  logging.info("Testing labels to get types")
@@ -1,5 +1,5 @@
1
1
  from io import BytesIO, StringIO
2
- from typing import Union
2
+ from typing import Optional, Union
3
3
 
4
4
  import pandas as pd
5
5
  import requests
@@ -25,10 +25,10 @@ from .excel import (
25
25
  def load_file(
26
26
  file_path: str,
27
27
  num_rows: int = 500,
28
- encoding: str = None,
29
- sep: str = None,
28
+ encoding: Optional[str] = None,
29
+ sep: Optional[str] = None,
30
30
  verbose: bool = False,
31
- sheet_name: Union[str, int] = None,
31
+ sheet_name: Optional[Union[str, int]] = None,
32
32
  ) -> tuple[pd.DataFrame, dict]:
33
33
  file_name = file_path.split('/')[-1]
34
34
  engine = None
csv_detective/utils.py CHANGED
@@ -34,10 +34,5 @@ def prevent_nan(value: float) -> Optional[float]:
34
34
  return value
35
35
 
36
36
 
37
- def full_word_strictly_inside_string(word: str, string: str):
38
- return (
39
- word == string
40
- or (" " + word + " " in string)
41
- or (string.startswith(word + " "))
42
- or (string.endswith(" " + word))
43
- )
37
+ def is_word_in_string(word: str, string: str):
38
+ return word in string
csv_detective/validate.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Union
2
+ from typing import Optional, Union
3
3
 
4
4
  import pandas as pd
5
5
 
@@ -18,33 +18,38 @@ def validate(
18
18
  file_path: str,
19
19
  previous_analysis: dict,
20
20
  num_rows: int = 500,
21
- encoding: str = None,
22
- sep: str = None,
21
+ encoding: Optional[str] = None,
22
+ sep: Optional[str] = None,
23
23
  verbose: bool = False,
24
24
  skipna: bool = True,
25
- sheet_name: Union[str, int] = None,
26
- ) -> tuple[bool, pd.DataFrame, dict]:
25
+ sheet_name: Optional[Union[str, int]] = None,
26
+ ) -> tuple[bool, Optional[pd.DataFrame], Optional[dict]]:
27
27
  """
28
28
  Verify is the given file has the same fields and types as in the previous analysis.
29
29
  """
30
- table, analysis = load_file(
31
- file_path=file_path,
32
- num_rows=num_rows,
33
- encoding=encoding,
34
- sep=sep,
35
- verbose=verbose,
36
- sheet_name=sheet_name,
37
- )
30
+ try:
31
+ table, analysis = load_file(
32
+ file_path=file_path,
33
+ num_rows=num_rows,
34
+ encoding=encoding,
35
+ sep=sep,
36
+ verbose=verbose,
37
+ sheet_name=sheet_name,
38
+ )
39
+ except Exception as e:
40
+ if verbose:
41
+ logging.warning(f"> Could not load the file with previous analysis values: {e}")
42
+ return False, None, None
38
43
  if verbose:
39
44
  logging.info("Comparing table with the previous analysis")
40
45
  logging.info("- Checking if all columns match")
41
46
  if (
42
- any(col_name not in list(table.columns) for col_name in previous_analysis["columns"])
43
- or any(col_name not in list(previous_analysis["columns"].keys()) for col_name in table.columns)
47
+ any(col_name not in analysis["header"] for col_name in previous_analysis["header"])
48
+ or any(col_name not in previous_analysis["header"] for col_name in analysis["header"])
44
49
  ):
45
50
  if verbose:
46
51
  logging.warning("> Columns do not match, proceeding with full analysis")
47
- return False, table, analysis
52
+ return False, None, None
48
53
  for col_name, args in previous_analysis["columns"].items():
49
54
  if verbose:
50
55
  logging.info(f"- Testing {col_name} for {args['format']}")
@@ -17,6 +17,7 @@
17
17
  - Better float detection [#113](https://github.com/datagouv/csv-detective/pull/113)
18
18
  - Refactor fields tests [#114](https://github.com/datagouv/csv-detective/pull/114)
19
19
  - Better code waldec and add code import [#116](https://github.com/datagouv/csv-detective/pull/116)
20
+ - Better validation and refactors [#117](https://github.com/datagouv/csv-detective/pull/117)
20
21
 
21
22
  ## 0.7.4 (2024-11-15)
22
23
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: csv_detective
3
- Version: 0.7.5.dev1320
3
+ Version: 0.7.5.dev1335
4
4
  Summary: Detect CSV column content
5
5
  Home-page: https://github.com/etalab/csv_detective
6
6
  Author: Etalab
@@ -1,10 +1,10 @@
1
1
  csv_detective/__init__.py,sha256=vpK7WMkIQbcJzu6HKOwcn7PpHsNCCaXZ1YLMS5Wq9tM,165
2
2
  csv_detective/cli.py,sha256=itooHtpyfC6DUsL_DchPKe1xo7m0MYJIp1L4R8eqoTk,1401
3
- csv_detective/explore_csv.py,sha256=ocWlUEtuwZ-6bjDc6gfhC2-6DljMVhvXhHrfICCXGfQ,8986
3
+ csv_detective/explore_csv.py,sha256=K9OM1NGZI1u6E6J_rUbbkpeM5UHQysvu6PKwm4cso6I,9326
4
4
  csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2385
5
5
  csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
6
- csv_detective/utils.py,sha256=Bx_1k4Sdpd5PCjuAy4AeayCmmw7TMR_zgtKIHNLi5g0,1157
7
- csv_detective/validate.py,sha256=0wSi5GgKPRW3m66413a-9Uti1vBRam5pQxVA9Dc5jQ8,2368
6
+ csv_detective/utils.py,sha256=8cBKgWifWF7BG_uMfLmxtV45p6PZ4b50NjWXKoAAZ4s,1002
7
+ csv_detective/validate.py,sha256=4e7f8bNXPU9GqNx4QXXiaoINyotozbL52JB6psVAjyY,2631
8
8
  csv_detective/detect_fields/__init__.py,sha256=7Tz0Niaz0BboA3YVsp_6WPA6ywciwDN4-lOy_Ie_0Y8,976
9
9
  csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -74,60 +74,60 @@ csv_detective/detect_fields/temp/year/__init__.py,sha256=RjsiIHoplnI4Odi5587TzRh
74
74
  csv_detective/detect_labels/__init__.py,sha256=BJjWlwTnnDe9nomABDUreu9EMu6IFG3T47d7YCJZbRc,878
75
75
  csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
76
  csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
- csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=e5ROxhrXNCefLwL5lXTWHO0PEWwLHfqmowm7XoeqZ2I,1063
78
- csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py,sha256=D_9QFvAeX5Nwp4qtQ0NEpKR0jpRlDx-rNBSrlYrw4nw,1096
79
- csv_detective/detect_labels/FR/geo/code_departement/__init__.py,sha256=rpzxUVsZyazVVguOorLadiJv_Vz1n04ijm0RbVmRDts,1025
80
- csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py,sha256=VUqv3G-JO-9CJU4-EX5DXs4O22Lqm75vuOy9MngoojA,949
81
- csv_detective/detect_labels/FR/geo/code_postal/__init__.py,sha256=USIYj7PiULI_WCfDxpzRCW9tv8-FNYKWopsVZ3H79mE,1070
82
- csv_detective/detect_labels/FR/geo/code_region/__init__.py,sha256=f9WroGVfB5jUzd_Rjs4XocZT2Ma-xZd2On9StUHy3F4,1012
83
- csv_detective/detect_labels/FR/geo/commune/__init__.py,sha256=iYD0UPhRVKYFv8DAEfe_RoQlE47igZ_MacsHxVLyYcM,948
84
- csv_detective/detect_labels/FR/geo/departement/__init__.py,sha256=fqNziX5ID6mVE5nVNviOsncVqkYyVvj7J_8hxN7_D1w,1229
85
- csv_detective/detect_labels/FR/geo/insee_canton/__init__.py,sha256=EAcQ2FqTKQdxhSYr5VCuEpjc7BdGwTdMkLL_VL6ay7Y,957
86
- csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py,sha256=X3vGdh_DHzWZXuV2-L9QhuWTLjHyaPZyS__s9Y5yiNg,1386
87
- csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=cRYxeGnBkuxKwrDXpeoRhiCf6xkb533-_bNjk9MB818,1381
88
- csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py,sha256=Pf00tBADr7HvJLeW_YqY3QU1EBVJDi365woheAzsNKY,1139
89
- csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=LfvgcrjVsXmxT6xC3X8eQIiQ_STvPRwjUbUQ4TyfJE0,1144
90
- csv_detective/detect_labels/FR/geo/pays/__init__.py,sha256=RsI_QXMJOZ5PpKcoKWy7AmUHFjehHXcUezquZyt1eq4,1169
91
- csv_detective/detect_labels/FR/geo/region/__init__.py,sha256=h9pE3xu2-PFw1jmDenkoKWmFkYmpK9-UgCboPlL7Aeg,1164
77
+ csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=ISgpkhy6KwOmKqCt6w_RpxZ7zm5gx2D3mp2UE9D6Pjw,1033
78
+ csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py,sha256=_QKJX7Og8cL1AYBLjIbvULsy-XJ017G0ZXk7H_GOqdI,1067
79
+ csv_detective/detect_labels/FR/geo/code_departement/__init__.py,sha256=_lU5bXG8hODduVxVyXegZjRR_mxWM3SXfwb6stJbOrU,995
80
+ csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py,sha256=qIFLhkj3vr0lfBHtDwYNhGqLgdzN0w7LRFJByt0pEts,919
81
+ csv_detective/detect_labels/FR/geo/code_postal/__init__.py,sha256=TUquZFf6cuTIvjvox8ReIiOqzJnepCZcLX21KNtWwyo,1040
82
+ csv_detective/detect_labels/FR/geo/code_region/__init__.py,sha256=6I9DpXNMBYJ1bTqAiheFhnMo2vbrz51PdZttrbinGVA,982
83
+ csv_detective/detect_labels/FR/geo/commune/__init__.py,sha256=WQl7z3h0428A-4H5ytry0XseAjE7hKLVh2YvCFvqfuM,918
84
+ csv_detective/detect_labels/FR/geo/departement/__init__.py,sha256=qnCjAkBGwsKsfLtvW_EgG-9eK_SBgyFrBKE9Q0A7wxI,1199
85
+ csv_detective/detect_labels/FR/geo/insee_canton/__init__.py,sha256=dLJPbSuOQETbl1IBeme5H4KXtDlfPBe5lIfczR4ek48,927
86
+ csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py,sha256=fJrd8pIewZqAkNNfERWD39kK3oxzYy-Paxce66c3UnY,1356
87
+ csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=xRrXcUUlk7XqHuHbTXUToM3n90_kLXQxdSzMkcc9jIc,1351
88
+ csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py,sha256=1uFQv436tkosABNVU_htAJcggJ6QRlF70-aBgHJHc8A,1109
89
+ csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=dOhUJy_vukt9xFnY2CG4wg1q9vHBUa00mbsu4YSN6xY,1114
90
+ csv_detective/detect_labels/FR/geo/pays/__init__.py,sha256=-k5shWSQnLpDvRWKuGFqt5ScbNyBO__vL-4UrL_hRjQ,1139
91
+ csv_detective/detect_labels/FR/geo/region/__init__.py,sha256=uQKqMZvG4bs0eafvRHV2RwtbwFJ9vCFQNE2Ep23eHq0,1134
92
92
  csv_detective/detect_labels/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
93
- csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py,sha256=lcLdEdNo4rhLvqzP3C0rmU_1PaQvTdpviXt9xGSaGFc,939
94
- csv_detective/detect_labels/FR/other/code_rna/__init__.py,sha256=DJykTRguggOlsIuyjYezJ99c8MGCSwwwCLcoQjfN40o,1024
95
- csv_detective/detect_labels/FR/other/code_waldec/__init__.py,sha256=idLo99rELzs1uc4mOcby9RLZLhhpsOp5AoTudT2jPwM,934
96
- csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256=J5G8pldzBdXRaopYNzGDztRFIsI_7rdaAPQ_kSuz5PU,1043
97
- csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=9EXCmzKSa5PSWrPbVeLscbJCaiwQEXX-1rCr79U8XLA,975
98
- csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=9bq2171SrmDIHx4A0cAeSHfWyQl40e-dIR9_ur4cEHQ,1124
99
- csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=AEKBGWEKxDoT8k9BF-v9vl1SHc4DffiiFyhip-6tC78,956
100
- csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=9w2VCs8kq-XVRmxxwqZYIynfCPwbFbl-pBPqXtnXx8Y,1103
101
- csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=Yqrp7NDEN0WRA_oktMb0wWoLQ99rzIvNvJ8jVhBCRD8,1040
102
- csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=gdzclIAjhr_k-a04l_FDz9kQywBfSA6vqa0UQxdaqNw,1143
103
- csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=mB0hC2JUKGnhGl6MUDFzSM_-t-Tvt3Vm21Gr_JXkL3k,1316
93
+ csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py,sha256=OIOih96ohL50BXgkopAV6NTXQsp5hP78YC46g_r-hKs,909
94
+ csv_detective/detect_labels/FR/other/code_rna/__init__.py,sha256=Nih32b26tuJs2f_x-XZ-cjD4nobgBhXsMALsQDlz2NM,994
95
+ csv_detective/detect_labels/FR/other/code_waldec/__init__.py,sha256=JcvDvLHlxddehJHEJNAAu3ZmjcJ__6qa4t440CFtKq0,904
96
+ csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256=XgcgdjcLA1OdPktRPSPzlXePaK8GYR6SF1DCKSoZ6RA,1013
97
+ csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=FoWbiIxDHIcoQmyWMayqmnRedd0I_RuC_0SIhWIXzww,945
98
+ csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=TnZocEWxNwqcX5Y-c45dW9BCEWUMbwFlqM2p0XRTNWU,1094
99
+ csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=rn035P9h8PsZ-Fu-v71DxcA_6HH9vmJ8lH-hSPmsflg,926
100
+ csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=utC1MWILaja5dkNfg3T2-0gXgHxOpIi74L2SaS5Z2PE,1073
101
+ csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=zl45o9AtUgAjsH9WZsdU9nDbEXUEOxuRcAX2JOxUe4U,1010
102
+ csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=BeHgQwLDrFABECzDYfuAKmXhAFGqTK9mrjk2w3aecNY,1113
103
+ csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=XYs7d5CipvJcvL1OEIvqKNg1Ubb9nI2x54KG_jW8Sx8,1286
104
104
  csv_detective/detect_labels/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
- csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=FHXmOIjH4e5n_mahtScgOVYUAi_M4PeHAnsuIm5LxCA,1074
106
- csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py,sha256=hX0FPAia4x28GD398WvpeaBQ4_3F5G3xAhySmZBdi5w,934
105
+ csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=Ezpf-7lsk389VKdKMZvZ00rMqq070uSVVb8oko06KGw,1044
106
+ csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py,sha256=5GytrQmPCmr-vndjcAS5cQWOO4RPvrfQh8KqH9qhrCc,904
107
107
  csv_detective/detect_labels/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
- csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
109
- csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
110
- csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
111
- csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=0sYS6bF_xmmhqsJ0Wrx7GC3qBAYjK7uhVud_ZbIQHHQ,1072
112
- csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=cRYxeGnBkuxKwrDXpeoRhiCf6xkb533-_bNjk9MB818,1381
113
- csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=SwR1NU0vpk8YdHTIk1wk9zQpNoUsoABq-K8GfRMY0fw,1705
114
- csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=z4rOrkCypI5JodgX9alTrV03IpetgAW4BGJuNvFlU4s,1145
108
+ csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py,sha256=CUtYIsh08LjNoa-BJkxrYvHuwJBG--u1AK5BN4RDpL4,1035
109
+ csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=CUtYIsh08LjNoa-BJkxrYvHuwJBG--u1AK5BN4RDpL4,1035
110
+ csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=CUtYIsh08LjNoa-BJkxrYvHuwJBG--u1AK5BN4RDpL4,1035
111
+ csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=3scv7fZ5cxu5MR8RR-AF4KmGhkZT--CYcFg22IibhkY,1042
112
+ csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=xRrXcUUlk7XqHuHbTXUToM3n90_kLXQxdSzMkcc9jIc,1351
113
+ csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=yL8Fp4DcwOm0f5_5CbSZwbvGD1p3LOkRS7hxz778O7g,1675
114
+ csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=yHhVPefvqgl8Q1fEdstoxDeGyJNkJ-2b1S5cwdF4HTI,1115
115
115
  csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
116
- csv_detective/detect_labels/other/booleen/__init__.py,sha256=uvQ7yDVAlEO8AY44OMblh_ZrxPTOmdvFtbcQEanpWSo,987
117
- csv_detective/detect_labels/other/email/__init__.py,sha256=VRUYZXGn-hRqE2sY0JY-Oh_wtT568orDTBxBGYsgqxE,1148
118
- csv_detective/detect_labels/other/float/__init__.py,sha256=jIr1r9FFy8NWvi5fOuIhj52bc7cZmM3OeTo-c6TUWII,926
119
- csv_detective/detect_labels/other/int/__init__.py,sha256=G1GAlKNaOZH_l39Zpw85xkl7JcdnY5PlEEroyU78hlY,933
116
+ csv_detective/detect_labels/other/booleen/__init__.py,sha256=0AvbuPVr7corJLDOu-wNS9BOy6J8XzOPIouS9MyFKHA,957
117
+ csv_detective/detect_labels/other/email/__init__.py,sha256=0VXS8hWILdGRWugx9hEz5yEAnlaoJ6jYX3znkzjlDYE,1118
118
+ csv_detective/detect_labels/other/float/__init__.py,sha256=FD8NlVSZ0TARGKKKCkWYRT9vYwDXpQe7X4V7VPJNUrw,896
119
+ csv_detective/detect_labels/other/int/__init__.py,sha256=I8ff6zX1tsk4JtNWs0V0Vam-BtdiKiGyUkUvIysfbUY,903
120
120
  csv_detective/detect_labels/other/money/__init__.py,sha256=kBEGuUy6kYkOI3vC_a7waBciG2ipyV9bhC330U8WaoI,279
121
121
  csv_detective/detect_labels/other/money/check_col_name.py,sha256=zgp5eUnf3XRQuxgdEGfxPfUnniO8Pzw19uK0ICr2pf8,414
122
- csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=3TW59y4vo4Pkx_fQrmEs1-gZbdJeNiK7ip25cpR829U,927
123
- csv_detective/detect_labels/other/twitter/__init__.py,sha256=x3b522ov_g-kmcq4k4eoZ8FQqrXdnlRJJit5UbnzIrQ,959
124
- csv_detective/detect_labels/other/url/__init__.py,sha256=wVQsWQzOuBY-cD7wn_PXcWLVEkknBA2lBCu8SRWsQG4,1202
125
- csv_detective/detect_labels/other/uuid/__init__.py,sha256=ySxqFvtGHguoiOyD5A1YRFY3SuubkgBAEY_Ud5kZVPM,931
122
+ csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=gyuizUcsQwdwKVmnaGJbauc01SkqhgaXtsq_vWlwsXs,897
123
+ csv_detective/detect_labels/other/twitter/__init__.py,sha256=MGuWhcmZFDcBz16v-g8By_k-RF3UimU7qb8QTAAs8PA,929
124
+ csv_detective/detect_labels/other/url/__init__.py,sha256=NSMvRhtNJgyVr2AQpkI1O-UWdBiovq62WHEmMb3WlOM,1172
125
+ csv_detective/detect_labels/other/uuid/__init__.py,sha256=ePXGCdVfKus67jvdeq5MZA1CA2j47PKjHhWnrsyCAi8,901
126
126
  csv_detective/detect_labels/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
127
- csv_detective/detect_labels/temp/date/__init__.py,sha256=CRv-S0figO6MOPdE0Lv5hWdjtIr6EmWzwlcjn5ofIxo,1322
128
- csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=0lFdN5Z43m6Qm-wBqcyM_mceUmI4s3vqgLCM-Jlgoxw,1157
129
- csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=4N0EGJA_2vXC1iFptvzpU6IN7AIJH5MFUrRY2p7Cjfs,1175
130
- csv_detective/detect_labels/temp/year/__init__.py,sha256=3U9j8Hux432KdGtIyArq_-vScn-5eYFwpn976WM9N4M,1150
127
+ csv_detective/detect_labels/temp/date/__init__.py,sha256=oI77XxATeJLk27r8Cdg1DmSNYtLl5Se4zay3eG12eJ0,1292
128
+ csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=C8ZgzfZWVw6nebMuySpED2HRUho8W4rLxv6qDNpJvas,1127
129
+ csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=WPSWNPgDPAHBMT-Nv0X-6u3yTQfcsaab2NNiG2-8qgk,1145
130
+ csv_detective/detect_labels/temp/year/__init__.py,sha256=AGkHXXvo_oG9di9p9Glae-c8TIPJ0319isnNKOzBCjk,1120
131
131
  csv_detective/detection/columns.py,sha256=vfE-DKESA6J9Rfsl-a8tjgZfE21VmzArO5TrbzL0KmE,2905
132
132
  csv_detective/detection/encoding.py,sha256=tpjJEMNM_2TcLXDzn1lNQPnSRnsWYjs83tQ8jNwTj4E,973
133
133
  csv_detective/detection/engine.py,sha256=HiIrU-l9EO5Fbc2Vh8W_Uy5-dpKcQQzlxCqMuWc09LY,1530
@@ -136,31 +136,31 @@ csv_detective/detection/headers.py,sha256=wrVII2RQpsVmHhrO1DHf3dmiu8kbtOjBlskf41
136
136
  csv_detective/detection/rows.py,sha256=3qvsbsBcMxiqqfSYYkOgsRpX777rk22tnRHDwUA97kU,742
137
137
  csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
138
138
  csv_detective/detection/variables.py,sha256=3qEMtjZ_zyIFXvTnFgK7ZMDx8C12uQXKfFjEj2moyJc,3558
139
- csv_detective/output/__init__.py,sha256=XDS4Dgvv6oloIao9JquHa0m1nnlQ_q2gHuEPGlaETic,1890
139
+ csv_detective/output/__init__.py,sha256=5KTevPfp_4MRxByJyOntQjToNfeG7dPQn-_13wSq7EU,1910
140
140
  csv_detective/output/dataframe.py,sha256=89iQRE59cHQyQQEsujQVIKP2YAUYpPklWkdDOqZE-wE,2183
141
- csv_detective/output/example.py,sha256=26rY7XNXK47e9xJMl-Js8jJwFIuv7V7B7e256VecKuk,8652
141
+ csv_detective/output/example.py,sha256=EdPX1iqHhIG4DsiHuYdy-J7JxOkjgUh_o2D5nrfM5fA,8649
142
142
  csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
143
143
  csv_detective/output/schema.py,sha256=ZDBWDOD8IYp7rcB0_n8l9JXGIhOQ6bTZHFWfTmnNNEQ,13480
144
144
  csv_detective/output/utils.py,sha256=HbmvCCCmFo7NJxhD_UsJIveuw-rrfhrvYckv1CJn_10,2301
145
- csv_detective/parsing/columns.py,sha256=Oj0Ddp2fPZeL70GDWdF7GY2RmhiVdz0IEvoBJFt-wao,5701
145
+ csv_detective/parsing/columns.py,sha256=zY652tZdFpwnA0vA8nfE1I-1X7kw8NVAeRfblCSYAYE,5631
146
146
  csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
147
147
  csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,1626
148
148
  csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
149
- csv_detective/parsing/load.py,sha256=SpP0pfxswOAPPpwbZfoP1blh0EKV5VMs0TpTgQJKzjs,3621
149
+ csv_detective/parsing/load.py,sha256=u6fbGFZsL2GwPQRzhAXgt32JpUur7vbQdErREHxNJ-w,3661
150
150
  csv_detective/parsing/text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
151
- csv_detective-0.7.5.dev1320.data/data/share/csv_detective/CHANGELOG.md,sha256=aFDguybPGcPheztzpQNq-YVZZW1n8prG1txK4b32DhM,8084
152
- csv_detective-0.7.5.dev1320.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
153
- csv_detective-0.7.5.dev1320.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
154
- csv_detective-0.7.5.dev1320.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
151
+ csv_detective-0.7.5.dev1335.data/data/share/csv_detective/CHANGELOG.md,sha256=a_xgrE-o1Qk1NkVcuohY3Dp76R4l66cyf3IPHw7mB4E,8177
152
+ csv_detective-0.7.5.dev1335.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
153
+ csv_detective-0.7.5.dev1335.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
154
+ csv_detective-0.7.5.dev1335.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
155
155
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
156
156
  tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
157
157
  tests/test_fields.py,sha256=E6kEsp6_W56WW6FXWUl7hggsJv-vsKuOaJ9JLoFmrUw,9964
158
158
  tests/test_file.py,sha256=9APE1d43lQ8Dk8lwJFNUK_YekYYsQ0ae2_fgpcPE9mk,8116
159
159
  tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
160
160
  tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
161
- tests/test_validation.py,sha256=VwtBcnGAQ_eSFrBibWnMSTDjuy6y2JLlqvc3Zb667NY,479
162
- csv_detective-0.7.5.dev1320.dist-info/METADATA,sha256=lxx-TBya3ciYiOlxVY6YGAd7MVv7D6ChKGtl6gJDkRE,1386
163
- csv_detective-0.7.5.dev1320.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
164
- csv_detective-0.7.5.dev1320.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
165
- csv_detective-0.7.5.dev1320.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
166
- csv_detective-0.7.5.dev1320.dist-info/RECORD,,
161
+ tests/test_validation.py,sha256=x3UZoyx_uyseLtv8yf_OJmRQ27j2eX4_rQUgbq0F6pg,3215
162
+ csv_detective-0.7.5.dev1335.dist-info/METADATA,sha256=WThF7NjIybiB65F4Zn63wtay78anbqOg1dt6pXSHxCk,1386
163
+ csv_detective-0.7.5.dev1335.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
164
+ csv_detective-0.7.5.dev1335.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
165
+ csv_detective-0.7.5.dev1335.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
166
+ csv_detective-0.7.5.dev1335.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.4.0)
2
+ Generator: setuptools (80.7.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
tests/test_validation.py CHANGED
@@ -1,18 +1,99 @@
1
1
  import json
2
2
 
3
3
  import pandas as pd
4
+ import pytest
4
5
 
6
+ from csv_detective.explore_csv import validate_then_detect
5
7
  from csv_detective.validate import validate
6
8
 
7
9
 
8
- def test_validation():
10
+ def set_nested_value(source_dict: dict, key_chain: list[str], value):
11
+ current_dict = source_dict
12
+ for key in key_chain[:-1]:
13
+ if key not in current_dict:
14
+ current_dict[key] = {}
15
+ current_dict = current_dict[key]
16
+ current_dict[key_chain[-1]] = value
17
+
18
+
19
+ def get_nested_value(source_dict: dict, key_chain: list[str]):
20
+ result = source_dict
21
+ for k in key_chain:
22
+ result = result[k]
23
+ return result
24
+
25
+
26
+ @pytest.mark.parametrize(
27
+ "_params",
28
+ (
29
+ ((True, pd.DataFrame, dict), {}),
30
+ ((False, None, None), {"separator": "|"}),
31
+ ((False, None, None), {"encoding": "unknown"}),
32
+ ((False, None, None), {"header": ["a", "b"]}),
33
+ ((False, pd.DataFrame, dict), {
34
+ "columns.NUMCOM": {
35
+ "python_type": "int",
36
+ "format": "int",
37
+ "score": 1.0,
38
+ },
39
+ }),
40
+ ),
41
+ )
42
+ def test_validation(_params):
43
+ (should_be_valid, table_type, analysis_type), modif_previous_analysis = _params
9
44
  with open("tests/data/a_test_file.json", "r") as f:
10
45
  previous_analysis = json.load(f)
46
+ for dotkey in modif_previous_analysis:
47
+ keys = dotkey.split(".")
48
+ set_nested_value(previous_analysis, keys, modif_previous_analysis[dotkey])
11
49
  is_valid, table, analysis = validate(
12
50
  "tests/data/a_test_file.csv",
13
51
  previous_analysis=previous_analysis,
14
52
  num_rows=-1,
53
+ sep=previous_analysis.get("separator"),
54
+ encoding=previous_analysis.get("encoding"),
55
+ )
56
+ assert is_valid == should_be_valid
57
+ if table_type is None:
58
+ assert table is None
59
+ else:
60
+ assert isinstance(table, table_type)
61
+ if analysis_type is None:
62
+ assert analysis is None
63
+ else:
64
+ assert isinstance(analysis, analysis_type)
65
+
66
+
67
+ @pytest.mark.parametrize(
68
+ "modif_previous_analysis",
69
+ (
70
+ {"separator": "|"},
71
+ {"encoding": "unknown"},
72
+ {"header": ["a", "b"]},
73
+ {
74
+ "columns.NUMCOM": {
75
+ "python_type": "int",
76
+ "format": "int",
77
+ "score": 1.0,
78
+ },
79
+ },
80
+ ),
81
+ )
82
+ def test_validate_then_detect(modif_previous_analysis):
83
+ with open("tests/data/a_test_file.json", "r") as f:
84
+ previous_analysis = json.load(f)
85
+ valid_values = {}
86
+ for dotkey in modif_previous_analysis:
87
+ keys = dotkey.split(".")
88
+ valid_values[dotkey] = get_nested_value(previous_analysis, keys)
89
+ set_nested_value(previous_analysis, keys, modif_previous_analysis[dotkey])
90
+ analysis = validate_then_detect(
91
+ "tests/data/a_test_file.csv",
92
+ previous_analysis=previous_analysis,
93
+ num_rows=-1,
94
+ output_profile=True,
95
+ save_results=False,
15
96
  )
16
- assert is_valid is True
17
- assert isinstance(table, pd.DataFrame)
18
- assert isinstance(analysis, dict)
97
+ # checking that if not valid, the analysis has managed to retrieve the right values
98
+ for dotkey in modif_previous_analysis:
99
+ assert get_nested_value(analysis, dotkey.split(".")) == valid_values[dotkey]