PyPI - csv-detective - Versions diffs - 0.7.5.dev1320__py3-none-any.whl → 0.7.5.dev1335__py3-none-any.whl - Mend

csv-detective 0.7.5.dev1320py3-none-any.whl → 0.7.5.dev1335py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

csv_detective/detect_labels/other/url/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from csv_detective.utils import full_word_strictly_inside_string
+from csv_detective.utils import is_word_in_string
 from csv_detective.parsing.text import _process_text
 PROPORTION = 0.5
@@ -38,7 +38,7 @@ def _is(header):
     words_combination_in_header = 0.5 * float(
         any(
             [
-                full_word_strictly_inside_string(
+                is_word_in_string(
                     words_combination, processed_header
                 ) for words_combination in words_combinations_list
             ]

csv_detective/detect_labels/other/uuid/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from csv_detective.utils import full_word_strictly_inside_string
+from csv_detective.utils import is_word_in_string
 from csv_detective.parsing.text import _process_text
 PROPORTION = 0.5
@@ -23,7 +23,7 @@ def _is(header):
     words_combination_in_header = 0.5 * float(
         any(
             [
-                full_word_strictly_inside_string(
+                is_word_in_string(
                     words_combination, processed_header
                 ) for words_combination in words_combinations_list
             ]

csv_detective/detect_labels/temp/date/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from csv_detective.utils import full_word_strictly_inside_string
+from csv_detective.utils import is_word_in_string
 from csv_detective.parsing.text import _process_text
 PROPORTION = 0.5
@@ -43,7 +43,7 @@ def _is(header):
     words_combination_in_header = 0.5 * float(
         any(
             [
-                full_word_strictly_inside_string(
+                is_word_in_string(
                     words_combination, processed_header
                 ) for words_combination in words_combinations_list
             ]

csv_detective/detect_labels/temp/datetime_iso/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from csv_detective.utils import full_word_strictly_inside_string
+from csv_detective.utils import is_word_in_string
 from csv_detective.parsing.text import _process_text
 PROPORTION = 0.5
@@ -35,7 +35,7 @@ def _is(header):
     words_combination_in_header = 0.5 * float(
         any(
             [
-                full_word_strictly_inside_string(
+                is_word_in_string(
                     words_combination, processed_header
                 ) for words_combination in words_combinations_list
             ]

csv_detective/detect_labels/temp/datetime_rfc822/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from csv_detective.utils import full_word_strictly_inside_string
+from csv_detective.utils import is_word_in_string
 from csv_detective.parsing.text import _process_text
 PROPORTION = 0.5
@@ -34,7 +34,7 @@ def _is(header):
     words_combination_in_header = 0.5 * float(
         any(
             [
-                full_word_strictly_inside_string(
+                is_word_in_string(
                     words_combination, processed_header
                 ) for words_combination in words_combinations_list
             ]

csv_detective/detect_labels/temp/year/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from csv_detective.utils import full_word_strictly_inside_string
+from csv_detective.utils import is_word_in_string
 from csv_detective.parsing.text import _process_text
 PROPORTION = 0.5
@@ -34,7 +34,7 @@ def _is(header):
     words_combination_in_header = 0.5 * float(
         any(
             [
-                full_word_strictly_inside_string(
+                is_word_in_string(
                     words_combination, processed_header
                 ) for words_combination in words_combinations_list
             ]

csv_detective/explore_csv.py CHANGED Viewed

@@ -111,15 +111,12 @@ def validate_then_detect(
     user_input_tests: Union[str, list[str]] = "ALL",
     limited_output: bool = True,
     save_results: Union[bool, str] = True,
-    encoding: str = None,
-    sep: str = None,
     skipna: bool = True,
     output_profile: bool = False,
     output_schema: bool = False,
     output_df: bool = False,
     cast_json: bool = True,
     verbose: bool = False,
-    sheet_name: Union[str, int] = None,
 ):
     if verbose:
@@ -131,17 +128,25 @@ def validate_then_detect(
         file_path=file_path,
         previous_analysis=previous_analysis,
         num_rows=num_rows,
-        encoding=encoding,
-        sep=sep,
+        encoding=previous_analysis.get("encoding"),
+        sep=previous_analysis.get("separator"),
+        sheet_name=previous_analysis.get("sheet_name"),
         verbose=verbose,
         skipna=skipna,
-        sheet_name=sheet_name,
     )
     if is_valid:
         # skipping formats detection as the validation is successful
         analysis = previous_analysis
+        # profile has to be regenerated, it's independent from analysis
         del analysis["profile"]
     else:
+        if analysis is None:
+            # if loading failed in validate, we load it from scratch
+            table, analysis = load_file(
+                file_path=file_path,
+                num_rows=num_rows,
+                verbose=verbose,
+            )
         analysis = detect_formats(
             table=table,
             analysis=analysis,
@@ -163,7 +168,7 @@ def validate_then_detect(
             output_df=output_df,
             cast_json=cast_json,
             verbose=verbose,
-            sheet_name=sheet_name,
+            sheet_name=analysis.get("sheet_name"),
         )
     finally:
         if verbose:

csv_detective/output/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 import os
-from typing import Union
+from typing import Optional, Union
 import pandas as pd
@@ -22,7 +22,7 @@ def generate_output(
     output_df: bool = False,
     cast_json: bool = True,
     verbose: bool = False,
-    sheet_name: Union[str, int] = None,
+    sheet_name: Optional[Union[str, int]] = None,
 ) -> Union[dict, tuple[dict, pd.DataFrame]]:
     if output_profile:

csv_detective/output/example.py CHANGED Viewed

@@ -70,7 +70,7 @@ def create_example_csv_file(
         return str(uuid.uuid4())
     def _date(
-        date_range: Union[None, list[str]] = None,
+        date_range: Optional[list[str]] = None,
         format: str = "%Y-%m-%d",
         required: bool = True,
     ) -> str:

csv_detective/parsing/columns.py CHANGED Viewed

@@ -76,7 +76,6 @@ def test_col_label(label: str, test_func: Callable, proportion: float = 1, limit
 def test_col(table: pd.DataFrame, all_tests: list, limited_output: bool, skipna: bool = True, verbose: bool = False):
-    # Initialising dict for tests
     if verbose:
         start = time()
         logging.info("Testing columns to get types")
@@ -112,7 +111,6 @@ def test_col(table: pd.DataFrame, all_tests: list, limited_output: bool, skipna:
 def test_label(table: pd.DataFrame, all_tests: list, limited_output: bool, verbose: bool = False):
-    # Initialising dict for tests
     if verbose:
         start = time()
         logging.info("Testing labels to get types")

csv_detective/parsing/load.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from io import BytesIO, StringIO
-from typing import Union
+from typing import Optional, Union
 import pandas as pd
 import requests
@@ -25,10 +25,10 @@ from .excel import (
 def load_file(
     file_path: str,
     num_rows: int = 500,
-    encoding: str = None,
-    sep: str = None,
+    encoding: Optional[str] = None,
+    sep: Optional[str] = None,
     verbose: bool = False,
-    sheet_name: Union[str, int] = None,
+    sheet_name: Optional[Union[str, int]] = None,
 ) -> tuple[pd.DataFrame, dict]:
     file_name = file_path.split('/')[-1]
     engine = None

csv_detective/utils.py CHANGED Viewed

@@ -34,10 +34,5 @@ def prevent_nan(value: float) -> Optional[float]:
     return value
-def full_word_strictly_inside_string(word: str, string: str):
-    return (
-        word == string
-        or (" " + word + " " in string)
-        or (string.startswith(word + " "))
-        or (string.endswith(" " + word))
-    )
+def is_word_in_string(word: str, string: str):
+    return word in string

csv_detective/validate.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import logging
-from typing import Union
+from typing import Optional, Union
 import pandas as pd
@@ -18,33 +18,38 @@ def validate(
     file_path: str,
     previous_analysis: dict,
     num_rows: int = 500,
-    encoding: str = None,
-    sep: str = None,
+    encoding: Optional[str] = None,
+    sep: Optional[str] = None,
     verbose: bool = False,
     skipna: bool = True,
-    sheet_name: Union[str, int] = None,
-) -> tuple[bool, pd.DataFrame, dict]:
+    sheet_name: Optional[Union[str, int]] = None,
+) -> tuple[bool, Optional[pd.DataFrame], Optional[dict]]:
     """
     Verify is the given file has the same fields and types as in the previous analysis.
     """
-    table, analysis = load_file(
-        file_path=file_path,
-        num_rows=num_rows,
-        encoding=encoding,
-        sep=sep,
-        verbose=verbose,
-        sheet_name=sheet_name,
-    )
+    try:
+        table, analysis = load_file(
+            file_path=file_path,
+            num_rows=num_rows,
+            encoding=encoding,
+            sep=sep,
+            verbose=verbose,
+            sheet_name=sheet_name,
+        )
+    except Exception as e:
+        if verbose:
+            logging.warning(f"> Could not load the file with previous analysis values: {e}")
+        return False, None, None
     if verbose:
         logging.info("Comparing table with the previous analysis")
         logging.info("- Checking if all columns match")
     if (
-        any(col_name not in list(table.columns) for col_name in previous_analysis["columns"])
-        or any(col_name not in list(previous_analysis["columns"].keys()) for col_name in table.columns)
+        any(col_name not in analysis["header"] for col_name in previous_analysis["header"])
+        or any(col_name not in previous_analysis["header"] for col_name in analysis["header"])
     ):
         if verbose:
             logging.warning("> Columns do not match, proceeding with full analysis")
-        return False, table, analysis
+        return False, None, None
     for col_name, args in previous_analysis["columns"].items():
         if verbose:
             logging.info(f"- Testing {col_name} for {args['format']}")

{csv_detective-0.7.5.dev1320.data → csv_detective-0.7.5.dev1335.data}/data/share/csv_detective/CHANGELOG.md RENAMED Viewed

@@ -17,6 +17,7 @@
 - Better float detection [#113](https://github.com/datagouv/csv-detective/pull/113)
 - Refactor fields tests [#114](https://github.com/datagouv/csv-detective/pull/114)
 - Better code waldec and add code import [#116](https://github.com/datagouv/csv-detective/pull/116)
+- Better validation and refactors [#117](https://github.com/datagouv/csv-detective/pull/117)
 ## 0.7.4 (2024-11-15)

{csv_detective-0.7.5.dev1320.dist-info → csv_detective-0.7.5.dev1335.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: csv_detective
-Version: 0.7.5.dev1320
+Version: 0.7.5.dev1335
 Summary: Detect CSV column content
 Home-page: https://github.com/etalab/csv_detective
 Author: Etalab

{csv_detective-0.7.5.dev1320.dist-info → csv_detective-0.7.5.dev1335.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
 csv_detective/__init__.py,sha256=vpK7WMkIQbcJzu6HKOwcn7PpHsNCCaXZ1YLMS5Wq9tM,165
 csv_detective/cli.py,sha256=itooHtpyfC6DUsL_DchPKe1xo7m0MYJIp1L4R8eqoTk,1401
-csv_detective/explore_csv.py,sha256=ocWlUEtuwZ-6bjDc6gfhC2-6DljMVhvXhHrfICCXGfQ,8986
+csv_detective/explore_csv.py,sha256=K9OM1NGZI1u6E6J_rUbbkpeM5UHQysvu6PKwm4cso6I,9326
 csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2385
 csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
-csv_detective/utils.py,sha256=Bx_1k4Sdpd5PCjuAy4AeayCmmw7TMR_zgtKIHNLi5g0,1157
-csv_detective/validate.py,sha256=0wSi5GgKPRW3m66413a-9Uti1vBRam5pQxVA9Dc5jQ8,2368
+csv_detective/utils.py,sha256=8cBKgWifWF7BG_uMfLmxtV45p6PZ4b50NjWXKoAAZ4s,1002
+csv_detective/validate.py,sha256=4e7f8bNXPU9GqNx4QXXiaoINyotozbL52JB6psVAjyY,2631
 csv_detective/detect_fields/__init__.py,sha256=7Tz0Niaz0BboA3YVsp_6WPA6ywciwDN4-lOy_Ie_0Y8,976
 csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -74,60 +74,60 @@ csv_detective/detect_fields/temp/year/__init__.py,sha256=RjsiIHoplnI4Odi5587TzRh
 csv_detective/detect_labels/__init__.py,sha256=BJjWlwTnnDe9nomABDUreu9EMu6IFG3T47d7YCJZbRc,878
 csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=e5ROxhrXNCefLwL5lXTWHO0PEWwLHfqmowm7XoeqZ2I,1063
-csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py,sha256=D_9QFvAeX5Nwp4qtQ0NEpKR0jpRlDx-rNBSrlYrw4nw,1096
-csv_detective/detect_labels/FR/geo/code_departement/__init__.py,sha256=rpzxUVsZyazVVguOorLadiJv_Vz1n04ijm0RbVmRDts,1025
-csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py,sha256=VUqv3G-JO-9CJU4-EX5DXs4O22Lqm75vuOy9MngoojA,949
-csv_detective/detect_labels/FR/geo/code_postal/__init__.py,sha256=USIYj7PiULI_WCfDxpzRCW9tv8-FNYKWopsVZ3H79mE,1070
-csv_detective/detect_labels/FR/geo/code_region/__init__.py,sha256=f9WroGVfB5jUzd_Rjs4XocZT2Ma-xZd2On9StUHy3F4,1012
-csv_detective/detect_labels/FR/geo/commune/__init__.py,sha256=iYD0UPhRVKYFv8DAEfe_RoQlE47igZ_MacsHxVLyYcM,948
-csv_detective/detect_labels/FR/geo/departement/__init__.py,sha256=fqNziX5ID6mVE5nVNviOsncVqkYyVvj7J_8hxN7_D1w,1229
-csv_detective/detect_labels/FR/geo/insee_canton/__init__.py,sha256=EAcQ2FqTKQdxhSYr5VCuEpjc7BdGwTdMkLL_VL6ay7Y,957
-csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py,sha256=X3vGdh_DHzWZXuV2-L9QhuWTLjHyaPZyS__s9Y5yiNg,1386
-csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=cRYxeGnBkuxKwrDXpeoRhiCf6xkb533-_bNjk9MB818,1381
-csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py,sha256=Pf00tBADr7HvJLeW_YqY3QU1EBVJDi365woheAzsNKY,1139
-csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=LfvgcrjVsXmxT6xC3X8eQIiQ_STvPRwjUbUQ4TyfJE0,1144
-csv_detective/detect_labels/FR/geo/pays/__init__.py,sha256=RsI_QXMJOZ5PpKcoKWy7AmUHFjehHXcUezquZyt1eq4,1169
-csv_detective/detect_labels/FR/geo/region/__init__.py,sha256=h9pE3xu2-PFw1jmDenkoKWmFkYmpK9-UgCboPlL7Aeg,1164
+csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=ISgpkhy6KwOmKqCt6w_RpxZ7zm5gx2D3mp2UE9D6Pjw,1033
+csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py,sha256=_QKJX7Og8cL1AYBLjIbvULsy-XJ017G0ZXk7H_GOqdI,1067
+csv_detective/detect_labels/FR/geo/code_departement/__init__.py,sha256=_lU5bXG8hODduVxVyXegZjRR_mxWM3SXfwb6stJbOrU,995
+csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py,sha256=qIFLhkj3vr0lfBHtDwYNhGqLgdzN0w7LRFJByt0pEts,919
+csv_detective/detect_labels/FR/geo/code_postal/__init__.py,sha256=TUquZFf6cuTIvjvox8ReIiOqzJnepCZcLX21KNtWwyo,1040
+csv_detective/detect_labels/FR/geo/code_region/__init__.py,sha256=6I9DpXNMBYJ1bTqAiheFhnMo2vbrz51PdZttrbinGVA,982
+csv_detective/detect_labels/FR/geo/commune/__init__.py,sha256=WQl7z3h0428A-4H5ytry0XseAjE7hKLVh2YvCFvqfuM,918
+csv_detective/detect_labels/FR/geo/departement/__init__.py,sha256=qnCjAkBGwsKsfLtvW_EgG-9eK_SBgyFrBKE9Q0A7wxI,1199
+csv_detective/detect_labels/FR/geo/insee_canton/__init__.py,sha256=dLJPbSuOQETbl1IBeme5H4KXtDlfPBe5lIfczR4ek48,927
+csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py,sha256=fJrd8pIewZqAkNNfERWD39kK3oxzYy-Paxce66c3UnY,1356
+csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=xRrXcUUlk7XqHuHbTXUToM3n90_kLXQxdSzMkcc9jIc,1351
+csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py,sha256=1uFQv436tkosABNVU_htAJcggJ6QRlF70-aBgHJHc8A,1109
+csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=dOhUJy_vukt9xFnY2CG4wg1q9vHBUa00mbsu4YSN6xY,1114
+csv_detective/detect_labels/FR/geo/pays/__init__.py,sha256=-k5shWSQnLpDvRWKuGFqt5ScbNyBO__vL-4UrL_hRjQ,1139
+csv_detective/detect_labels/FR/geo/region/__init__.py,sha256=uQKqMZvG4bs0eafvRHV2RwtbwFJ9vCFQNE2Ep23eHq0,1134
 csv_detective/detect_labels/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py,sha256=lcLdEdNo4rhLvqzP3C0rmU_1PaQvTdpviXt9xGSaGFc,939
-csv_detective/detect_labels/FR/other/code_rna/__init__.py,sha256=DJykTRguggOlsIuyjYezJ99c8MGCSwwwCLcoQjfN40o,1024
-csv_detective/detect_labels/FR/other/code_waldec/__init__.py,sha256=idLo99rELzs1uc4mOcby9RLZLhhpsOp5AoTudT2jPwM,934
-csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256=J5G8pldzBdXRaopYNzGDztRFIsI_7rdaAPQ_kSuz5PU,1043
-csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=9EXCmzKSa5PSWrPbVeLscbJCaiwQEXX-1rCr79U8XLA,975
-csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=9bq2171SrmDIHx4A0cAeSHfWyQl40e-dIR9_ur4cEHQ,1124
-csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=AEKBGWEKxDoT8k9BF-v9vl1SHc4DffiiFyhip-6tC78,956
-csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=9w2VCs8kq-XVRmxxwqZYIynfCPwbFbl-pBPqXtnXx8Y,1103
-csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=Yqrp7NDEN0WRA_oktMb0wWoLQ99rzIvNvJ8jVhBCRD8,1040
-csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=gdzclIAjhr_k-a04l_FDz9kQywBfSA6vqa0UQxdaqNw,1143
-csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=mB0hC2JUKGnhGl6MUDFzSM_-t-Tvt3Vm21Gr_JXkL3k,1316
+csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py,sha256=OIOih96ohL50BXgkopAV6NTXQsp5hP78YC46g_r-hKs,909
+csv_detective/detect_labels/FR/other/code_rna/__init__.py,sha256=Nih32b26tuJs2f_x-XZ-cjD4nobgBhXsMALsQDlz2NM,994
+csv_detective/detect_labels/FR/other/code_waldec/__init__.py,sha256=JcvDvLHlxddehJHEJNAAu3ZmjcJ__6qa4t440CFtKq0,904
+csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256=XgcgdjcLA1OdPktRPSPzlXePaK8GYR6SF1DCKSoZ6RA,1013
+csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=FoWbiIxDHIcoQmyWMayqmnRedd0I_RuC_0SIhWIXzww,945
+csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=TnZocEWxNwqcX5Y-c45dW9BCEWUMbwFlqM2p0XRTNWU,1094
+csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=rn035P9h8PsZ-Fu-v71DxcA_6HH9vmJ8lH-hSPmsflg,926
+csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=utC1MWILaja5dkNfg3T2-0gXgHxOpIi74L2SaS5Z2PE,1073
+csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=zl45o9AtUgAjsH9WZsdU9nDbEXUEOxuRcAX2JOxUe4U,1010
+csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=BeHgQwLDrFABECzDYfuAKmXhAFGqTK9mrjk2w3aecNY,1113
+csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=XYs7d5CipvJcvL1OEIvqKNg1Ubb9nI2x54KG_jW8Sx8,1286
 csv_detective/detect_labels/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=FHXmOIjH4e5n_mahtScgOVYUAi_M4PeHAnsuIm5LxCA,1074
-csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py,sha256=hX0FPAia4x28GD398WvpeaBQ4_3F5G3xAhySmZBdi5w,934
+csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=Ezpf-7lsk389VKdKMZvZ00rMqq070uSVVb8oko06KGw,1044
+csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py,sha256=5GytrQmPCmr-vndjcAS5cQWOO4RPvrfQh8KqH9qhrCc,904
 csv_detective/detect_labels/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
-csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
-csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
-csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=0sYS6bF_xmmhqsJ0Wrx7GC3qBAYjK7uhVud_ZbIQHHQ,1072
-csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=cRYxeGnBkuxKwrDXpeoRhiCf6xkb533-_bNjk9MB818,1381
-csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=SwR1NU0vpk8YdHTIk1wk9zQpNoUsoABq-K8GfRMY0fw,1705
-csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=z4rOrkCypI5JodgX9alTrV03IpetgAW4BGJuNvFlU4s,1145
+csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py,sha256=CUtYIsh08LjNoa-BJkxrYvHuwJBG--u1AK5BN4RDpL4,1035
+csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=CUtYIsh08LjNoa-BJkxrYvHuwJBG--u1AK5BN4RDpL4,1035
+csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=CUtYIsh08LjNoa-BJkxrYvHuwJBG--u1AK5BN4RDpL4,1035
+csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=3scv7fZ5cxu5MR8RR-AF4KmGhkZT--CYcFg22IibhkY,1042
+csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=xRrXcUUlk7XqHuHbTXUToM3n90_kLXQxdSzMkcc9jIc,1351
+csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=yL8Fp4DcwOm0f5_5CbSZwbvGD1p3LOkRS7hxz778O7g,1675
+csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=yHhVPefvqgl8Q1fEdstoxDeGyJNkJ-2b1S5cwdF4HTI,1115
 csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_labels/other/booleen/__init__.py,sha256=uvQ7yDVAlEO8AY44OMblh_ZrxPTOmdvFtbcQEanpWSo,987
-csv_detective/detect_labels/other/email/__init__.py,sha256=VRUYZXGn-hRqE2sY0JY-Oh_wtT568orDTBxBGYsgqxE,1148
-csv_detective/detect_labels/other/float/__init__.py,sha256=jIr1r9FFy8NWvi5fOuIhj52bc7cZmM3OeTo-c6TUWII,926
-csv_detective/detect_labels/other/int/__init__.py,sha256=G1GAlKNaOZH_l39Zpw85xkl7JcdnY5PlEEroyU78hlY,933
+csv_detective/detect_labels/other/booleen/__init__.py,sha256=0AvbuPVr7corJLDOu-wNS9BOy6J8XzOPIouS9MyFKHA,957
+csv_detective/detect_labels/other/email/__init__.py,sha256=0VXS8hWILdGRWugx9hEz5yEAnlaoJ6jYX3znkzjlDYE,1118
+csv_detective/detect_labels/other/float/__init__.py,sha256=FD8NlVSZ0TARGKKKCkWYRT9vYwDXpQe7X4V7VPJNUrw,896
+csv_detective/detect_labels/other/int/__init__.py,sha256=I8ff6zX1tsk4JtNWs0V0Vam-BtdiKiGyUkUvIysfbUY,903
 csv_detective/detect_labels/other/money/__init__.py,sha256=kBEGuUy6kYkOI3vC_a7waBciG2ipyV9bhC330U8WaoI,279
 csv_detective/detect_labels/other/money/check_col_name.py,sha256=zgp5eUnf3XRQuxgdEGfxPfUnniO8Pzw19uK0ICr2pf8,414
-csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=3TW59y4vo4Pkx_fQrmEs1-gZbdJeNiK7ip25cpR829U,927
-csv_detective/detect_labels/other/twitter/__init__.py,sha256=x3b522ov_g-kmcq4k4eoZ8FQqrXdnlRJJit5UbnzIrQ,959
-csv_detective/detect_labels/other/url/__init__.py,sha256=wVQsWQzOuBY-cD7wn_PXcWLVEkknBA2lBCu8SRWsQG4,1202
-csv_detective/detect_labels/other/uuid/__init__.py,sha256=ySxqFvtGHguoiOyD5A1YRFY3SuubkgBAEY_Ud5kZVPM,931
+csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=gyuizUcsQwdwKVmnaGJbauc01SkqhgaXtsq_vWlwsXs,897
+csv_detective/detect_labels/other/twitter/__init__.py,sha256=MGuWhcmZFDcBz16v-g8By_k-RF3UimU7qb8QTAAs8PA,929
+csv_detective/detect_labels/other/url/__init__.py,sha256=NSMvRhtNJgyVr2AQpkI1O-UWdBiovq62WHEmMb3WlOM,1172
+csv_detective/detect_labels/other/uuid/__init__.py,sha256=ePXGCdVfKus67jvdeq5MZA1CA2j47PKjHhWnrsyCAi8,901
 csv_detective/detect_labels/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_labels/temp/date/__init__.py,sha256=CRv-S0figO6MOPdE0Lv5hWdjtIr6EmWzwlcjn5ofIxo,1322
-csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=0lFdN5Z43m6Qm-wBqcyM_mceUmI4s3vqgLCM-Jlgoxw,1157
-csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=4N0EGJA_2vXC1iFptvzpU6IN7AIJH5MFUrRY2p7Cjfs,1175
-csv_detective/detect_labels/temp/year/__init__.py,sha256=3U9j8Hux432KdGtIyArq_-vScn-5eYFwpn976WM9N4M,1150
+csv_detective/detect_labels/temp/date/__init__.py,sha256=oI77XxATeJLk27r8Cdg1DmSNYtLl5Se4zay3eG12eJ0,1292
+csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=C8ZgzfZWVw6nebMuySpED2HRUho8W4rLxv6qDNpJvas,1127
+csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=WPSWNPgDPAHBMT-Nv0X-6u3yTQfcsaab2NNiG2-8qgk,1145
+csv_detective/detect_labels/temp/year/__init__.py,sha256=AGkHXXvo_oG9di9p9Glae-c8TIPJ0319isnNKOzBCjk,1120
 csv_detective/detection/columns.py,sha256=vfE-DKESA6J9Rfsl-a8tjgZfE21VmzArO5TrbzL0KmE,2905
 csv_detective/detection/encoding.py,sha256=tpjJEMNM_2TcLXDzn1lNQPnSRnsWYjs83tQ8jNwTj4E,973
 csv_detective/detection/engine.py,sha256=HiIrU-l9EO5Fbc2Vh8W_Uy5-dpKcQQzlxCqMuWc09LY,1530
@@ -136,31 +136,31 @@ csv_detective/detection/headers.py,sha256=wrVII2RQpsVmHhrO1DHf3dmiu8kbtOjBlskf41
 csv_detective/detection/rows.py,sha256=3qvsbsBcMxiqqfSYYkOgsRpX777rk22tnRHDwUA97kU,742
 csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
 csv_detective/detection/variables.py,sha256=3qEMtjZ_zyIFXvTnFgK7ZMDx8C12uQXKfFjEj2moyJc,3558
-csv_detective/output/__init__.py,sha256=XDS4Dgvv6oloIao9JquHa0m1nnlQ_q2gHuEPGlaETic,1890
+csv_detective/output/__init__.py,sha256=5KTevPfp_4MRxByJyOntQjToNfeG7dPQn-_13wSq7EU,1910
 csv_detective/output/dataframe.py,sha256=89iQRE59cHQyQQEsujQVIKP2YAUYpPklWkdDOqZE-wE,2183
-csv_detective/output/example.py,sha256=26rY7XNXK47e9xJMl-Js8jJwFIuv7V7B7e256VecKuk,8652
+csv_detective/output/example.py,sha256=EdPX1iqHhIG4DsiHuYdy-J7JxOkjgUh_o2D5nrfM5fA,8649
 csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
 csv_detective/output/schema.py,sha256=ZDBWDOD8IYp7rcB0_n8l9JXGIhOQ6bTZHFWfTmnNNEQ,13480
 csv_detective/output/utils.py,sha256=HbmvCCCmFo7NJxhD_UsJIveuw-rrfhrvYckv1CJn_10,2301
-csv_detective/parsing/columns.py,sha256=Oj0Ddp2fPZeL70GDWdF7GY2RmhiVdz0IEvoBJFt-wao,5701
+csv_detective/parsing/columns.py,sha256=zY652tZdFpwnA0vA8nfE1I-1X7kw8NVAeRfblCSYAYE,5631
 csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
 csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,1626
 csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
-csv_detective/parsing/load.py,sha256=SpP0pfxswOAPPpwbZfoP1blh0EKV5VMs0TpTgQJKzjs,3621
+csv_detective/parsing/load.py,sha256=u6fbGFZsL2GwPQRzhAXgt32JpUur7vbQdErREHxNJ-w,3661
 csv_detective/parsing/text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
-csv_detective-0.7.5.dev1320.data/data/share/csv_detective/CHANGELOG.md,sha256=aFDguybPGcPheztzpQNq-YVZZW1n8prG1txK4b32DhM,8084
-csv_detective-0.7.5.dev1320.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
-csv_detective-0.7.5.dev1320.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
-csv_detective-0.7.5.dev1320.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
+csv_detective-0.7.5.dev1335.data/data/share/csv_detective/CHANGELOG.md,sha256=a_xgrE-o1Qk1NkVcuohY3Dp76R4l66cyf3IPHw7mB4E,8177
+csv_detective-0.7.5.dev1335.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
+csv_detective-0.7.5.dev1335.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
+csv_detective-0.7.5.dev1335.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
 tests/test_fields.py,sha256=E6kEsp6_W56WW6FXWUl7hggsJv-vsKuOaJ9JLoFmrUw,9964
 tests/test_file.py,sha256=9APE1d43lQ8Dk8lwJFNUK_YekYYsQ0ae2_fgpcPE9mk,8116
 tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
 tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
-tests/test_validation.py,sha256=VwtBcnGAQ_eSFrBibWnMSTDjuy6y2JLlqvc3Zb667NY,479
-csv_detective-0.7.5.dev1320.dist-info/METADATA,sha256=lxx-TBya3ciYiOlxVY6YGAd7MVv7D6ChKGtl6gJDkRE,1386
-csv_detective-0.7.5.dev1320.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
-csv_detective-0.7.5.dev1320.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
-csv_detective-0.7.5.dev1320.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
-csv_detective-0.7.5.dev1320.dist-info/RECORD,,
+tests/test_validation.py,sha256=x3UZoyx_uyseLtv8yf_OJmRQ27j2eX4_rQUgbq0F6pg,3215
+csv_detective-0.7.5.dev1335.dist-info/METADATA,sha256=WThF7NjIybiB65F4Zn63wtay78anbqOg1dt6pXSHxCk,1386
+csv_detective-0.7.5.dev1335.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
+csv_detective-0.7.5.dev1335.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
+csv_detective-0.7.5.dev1335.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
+csv_detective-0.7.5.dev1335.dist-info/RECORD,,

{csv_detective-0.7.5.dev1320.dist-info → csv_detective-0.7.5.dev1335.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.4.0)
+Generator: setuptools (80.7.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

tests/test_validation.py CHANGED Viewed

@@ -1,18 +1,99 @@
 import json
 import pandas as pd
+import pytest
+from csv_detective.explore_csv import validate_then_detect
 from csv_detective.validate import validate
-def test_validation():
+def set_nested_value(source_dict: dict, key_chain: list[str], value):
+    current_dict = source_dict
+    for key in key_chain[:-1]:
+        if key not in current_dict:
+            current_dict[key] = {}
+        current_dict = current_dict[key]
+    current_dict[key_chain[-1]] = value
+def get_nested_value(source_dict: dict, key_chain: list[str]):
+    result = source_dict
+    for k in key_chain:
+        result = result[k]
+    return result
+@pytest.mark.parametrize(
+    "_params",
+    (
+        ((True, pd.DataFrame, dict), {}),
+        ((False, None, None), {"separator": "|"}),
+        ((False, None, None), {"encoding": "unknown"}),
+        ((False, None, None), {"header": ["a", "b"]}),
+        ((False, pd.DataFrame, dict), {
+            "columns.NUMCOM": {
+                "python_type": "int",
+                "format": "int",
+                "score": 1.0,
+            },
+        }),
+    ),
+)
+def test_validation(_params):
+    (should_be_valid, table_type, analysis_type), modif_previous_analysis = _params
     with open("tests/data/a_test_file.json", "r") as f:
         previous_analysis = json.load(f)
+    for dotkey in modif_previous_analysis:
+        keys = dotkey.split(".")
+        set_nested_value(previous_analysis, keys, modif_previous_analysis[dotkey])
     is_valid, table, analysis = validate(
         "tests/data/a_test_file.csv",
         previous_analysis=previous_analysis,
         num_rows=-1,
+        sep=previous_analysis.get("separator"),
+        encoding=previous_analysis.get("encoding"),
+    )
+    assert is_valid == should_be_valid
+    if table_type is None:
+        assert table is None
+    else:
+        assert isinstance(table, table_type)
+    if analysis_type is None:
+        assert analysis is None
+    else:
+        assert isinstance(analysis, analysis_type)
+@pytest.mark.parametrize(
+    "modif_previous_analysis",
+    (
+        {"separator": "|"},
+        {"encoding": "unknown"},
+        {"header": ["a", "b"]},
+        {
+            "columns.NUMCOM": {
+                "python_type": "int",
+                "format": "int",
+                "score": 1.0,
+            },
+        },
+    ),
+)
+def test_validate_then_detect(modif_previous_analysis):
+    with open("tests/data/a_test_file.json", "r") as f:
+        previous_analysis = json.load(f)
+    valid_values = {}
+    for dotkey in modif_previous_analysis:
+        keys = dotkey.split(".")
+        valid_values[dotkey] = get_nested_value(previous_analysis, keys)
+        set_nested_value(previous_analysis, keys, modif_previous_analysis[dotkey])
+    analysis = validate_then_detect(
+        "tests/data/a_test_file.csv",
+        previous_analysis=previous_analysis,
+        num_rows=-1,
+        output_profile=True,
+        save_results=False,
     )
-    assert is_valid is True
-    assert isinstance(table, pd.DataFrame)
-    assert isinstance(analysis, dict)
+    # checking that if not valid, the analysis has managed to retrieve the right values
+    for dotkey in modif_previous_analysis:
+        assert get_nested_value(analysis, dotkey.split(".")) == valid_values[dotkey]

{csv_detective-0.7.5.dev1320.data → csv_detective-0.7.5.dev1335.data}/data/share/csv_detective/LICENSE.AGPL.txt RENAMED Viewed

File without changes

{csv_detective-0.7.5.dev1320.data → csv_detective-0.7.5.dev1335.data}/data/share/csv_detective/README.md RENAMED Viewed

File without changes

{csv_detective-0.7.5.dev1320.dist-info → csv_detective-0.7.5.dev1335.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{csv_detective-0.7.5.dev1320.dist-info → csv_detective-0.7.5.dev1335.dist-info}/licenses/LICENSE.AGPL.txt RENAMED Viewed

File without changes

{csv_detective-0.7.5.dev1320.dist-info → csv_detective-0.7.5.dev1335.dist-info}/top_level.txt RENAMED Viewed

File without changes

csv-detective 0.7.5.dev1320__py3-none-any.whl → 0.7.5.dev1335__py3-none-any.whl

csv-detective 0.7.5.dev1320py3-none-any.whl → 0.7.5.dev1335py3-none-any.whl