PyPI - csv-detective - Versions diffs - 0.7.5.dev1180__py3-none-any.whl → 0.7.5.dev1209__py3-none-any.whl - Mend

csv-detective 0.7.5.dev1180py3-none-any.whl → 0.7.5.dev1209py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

csv_detective/utils.py CHANGED Viewed

@@ -1,15 +1,6 @@
-from typing import Callable, Optional, Union
-import json
-import pandas as pd
 import logging
-from time import time
-from datetime import date, datetime
-from csv_detective.detect_fields.other.booleen import bool_casting
-from csv_detective.detect_fields.other.float import float_casting
-from csv_detective.detect_fields.temp.date import date_casting
-logging.basicConfig(level=logging.INFO)
+import math
+from typing import Optional
 def display_logs_depending_process_time(prompt: str, duration: float):
@@ -25,193 +16,20 @@ def display_logs_depending_process_time(prompt: str, duration: float):
     if duration < threshold_warn:
         logging.info(prompt)
     elif duration < threshold_critical:
-        logging.warn(prompt)
+        logging.warning(prompt)
     else:
         logging.critical(prompt)
-def test_col_val(
-    serie: pd.Series,
-    test_func: Callable,
-    proportion: float = 0.9,
-    skipna: bool = True,
-    limited_output: bool = False,
-    verbose: bool = False,
-):
-    """Tests values of the serie using test_func.
-         - skipna : if True indicates that NaNs are not counted as False
-         - proportion :  indicates the proportion of values that have to pass the test
-    for the serie to be detected as a certain format
-    """
-    if verbose:
-        start = time()
-    # TODO : change for a cleaner method and only test columns in modules labels
-    def apply_test_func(serie: pd.Series, test_func: Callable, _range: int):
-        return serie.sample(n=_range).apply(test_func)
-    try:
-        if skipna:
-            serie = serie[serie.notnull()]
-        ser_len = len(serie)
-        if ser_len == 0:
-            return 0.0
-        if not limited_output:
-            result = apply_test_func(serie, test_func, ser_len).sum() / ser_len
-            return result if result >= proportion else 0.0
-        else:
-            if proportion == 1:  # Then try first 1 value, then 5, then all
-                for _range in [
-                    min(1, ser_len),
-                    min(5, ser_len),
-                    ser_len,
-                ]:  # Pour ne pas faire d'opérations inutiles, on commence par 1,
-                    # puis 5 valeurs puis la serie complète
-                    if all(apply_test_func(serie, test_func, _range)):
-                        # print(serie.name, ': check OK')
-                        pass
-                    else:
-                        return 0.0
-                return 1.0
-            else:
-                # if we have a proportion, statistically it's OK to analyse up to 10k rows
-                # (arbitrary number) and get a significant result
-                to_analyse = min(ser_len, 10000)
-                result = apply_test_func(serie, test_func, to_analyse).sum() / to_analyse
-                return result if result >= proportion else 0.0
-    finally:
-        if verbose and time() - start > 3:
-            display_logs_depending_process_time(
-                f"\t/!\\ Column '{serie.name}' took too long ({round(time() - start, 3)}s)",
-                time() - start
-            )
-def test_col_label(label: str, test_func: Callable, proportion: float = 1, limited_output: bool = False):
-    """Tests label (from header) using test_func.
-    - proportion :  indicates the minimum score to pass the test for the serie
-    to be detected as a certain format
-    """
-    if not limited_output:
-        return test_func(label)
-    else:
-        result = test_func(label)
-        return result if result >= proportion else 0
-def test_col(table: pd.DataFrame, all_tests: list, limited_output: bool, skipna: bool = True, verbose: bool = False):
-    # Initialising dict for tests
-    if verbose:
-        start = time()
-        logging.info("Testing columns to get types")
-    test_funcs = dict()
-    for test in all_tests:
-        name = test.__name__.split(".")[-1]
-        test_funcs[name] = {"func": test._is, "prop": test.PROPORTION}
-    return_table = pd.DataFrame(columns=table.columns)
-    for idx, (key, value) in enumerate(test_funcs.items()):
-        if verbose:
-            start_type = time()
-            logging.info(f"\t- Starting with type '{key}'")
-        # improvement lead : put the longest tests behind and make them only if previous tests not satisfactory
-        # => the following needs to change, "apply" means all columns are tested for one type at once
-        return_table.loc[key] = table.apply(
-            lambda serie: test_col_val(
-                serie,
-                value["func"],
-                value["prop"],
-                skipna=skipna,
-                limited_output=limited_output,
-                verbose=verbose,
-            )
-        )
-        if verbose:
-            display_logs_depending_process_time(
-                f'\t> Done with type "{key}" in {round(time() - start_type, 3)}s ({idx+1}/{len(test_funcs)})',
-                time() - start_type
-            )
-    if verbose:
-        display_logs_depending_process_time(f"Done testing columns in {round(time() - start, 3)}s", time() - start)
-    return return_table
-def test_label(table: pd.DataFrame, all_tests: list, limited_output: bool, verbose: bool = False):
-    # Initialising dict for tests
-    if verbose:
-        start = time()
-        logging.info("Testing labels to get types")
-    test_funcs = dict()
-    for test in all_tests:
-        name = test.__name__.split(".")[-1]
-        test_funcs[name] = {"func": test._is, "prop": test.PROPORTION}
-    return_table = pd.DataFrame(columns=table.columns)
-    for idx, (key, value) in enumerate(test_funcs.items()):
-        if verbose:
-            start_type = time()
-        return_table.loc[key] = [
-            test_col_label(
-                col_name, value["func"], value["prop"], limited_output=limited_output
-            )
-            for col_name in table.columns
-        ]
-        if verbose:
-            display_logs_depending_process_time(
-                f'\t- Done with type "{key}" in {round(time() - start_type, 3)}s ({idx+1}/{len(test_funcs)})',
-                time() - start_type
-            )
-    if verbose:
-        display_logs_depending_process_time(f"Done testing labels in {round(time() - start, 3)}s", time() - start)
-    return return_table
+def is_url(file_path: str) -> bool:
+    # could be more sophisticated if needed
+    return file_path.startswith('http')
-def prepare_output_dict(return_table: pd.DataFrame, limited_output: bool):
-    return_dict_cols = return_table.to_dict("dict")
-    return_dict_cols_intermediary = {}
-    for column_name in return_dict_cols:
-        return_dict_cols_intermediary[column_name] = []
-        for detected_value_type in return_dict_cols[column_name]:
-            if return_dict_cols[column_name][detected_value_type] == 0:
-                continue
-            dict_tmp = {}
-            dict_tmp["format"] = detected_value_type
-            dict_tmp["score"] = return_dict_cols[column_name][detected_value_type]
-            return_dict_cols_intermediary[column_name].append(dict_tmp)
-        # Clean dict using priorities
-        formats_detected = {
-            x["format"] for x in return_dict_cols_intermediary[column_name]
-        }
-        formats_to_remove = set()
-        # Deprioritise float and int detection vs others
-        if len(formats_detected - {"float", "int"}) > 0:
-            formats_to_remove = formats_to_remove.union({"float", "int"})
-        if "int" in formats_detected:
-            formats_to_remove.add("float")
-        if "latitude_wgs_fr_metropole" in formats_detected:
-            formats_to_remove.add("latitude_l93")
-            formats_to_remove.add("latitude_wgs")
-        if "longitude_wgs_fr_metropole" in formats_detected:
-            formats_to_remove.add("longitude_l93")
-            formats_to_remove.add("longitude_wgs")
-        if "longitude_wgs" in formats_detected:
-            formats_to_remove.add("longitude_l93")
-        if "code_region" in formats_detected:
-            formats_to_remove.add("code_departement")
-        formats_to_keep = formats_detected - formats_to_remove
-        detections = return_dict_cols_intermediary[column_name]
-        detections = [x for x in detections if x["format"] in formats_to_keep]
-        if not limited_output:
-            return_dict_cols_intermediary[column_name] = detections
-        else:
-            return_dict_cols_intermediary[column_name] = (
-                max(detections, key=lambda x: x["score"])
-                if len(detections) > 0
-                else {"format": "string", "score": 1.0}
-            )
-    return return_dict_cols_intermediary
+def prevent_nan(value: float) -> Optional[float]:
+    if math.isnan(value):
+        return None
+    return value
 def full_word_strictly_inside_string(word: str, string: str):
@@ -221,47 +39,3 @@ def full_word_strictly_inside_string(word: str, string: str):
         or (string.startswith(word + " "))
         or (string.endswith(" " + word))
     )
-def cast(value: str, _type: str) -> Optional[Union[str, float, bool, date, datetime]]:
-    if not isinstance(value, str) or not value:
-        # None is the current default value in hydra, should we keep this?
-        return None
-    if _type == "float":
-        return float_casting(value)
-    if _type == "bool":
-        return bool_casting(value)
-    if _type == "json":
-        # in hydra json are given to postgres as strings, conversion is done by postgres
-        return json.loads(value)
-    if _type == "date":
-        _date = date_casting(value)
-        return _date.date() if _date else None
-    if _type == "datetime":
-        return date_casting(value)
-    raise ValueError(f"Unknown type `{_type}`")
-def cast_df(df: pd.DataFrame, columns: dict, cast_json: bool = True, verbose: bool = False) -> pd.DataFrame:
-    if verbose:
-        start = time()
-    output_df = pd.DataFrame()
-    for col_name, detection in columns.items():
-        if detection["python_type"] == "string" or (detection["python_type"] == "json" and not cast_json):
-            # no change if detected type is string
-            output_df[col_name] = df[col_name].copy()
-        elif detection["python_type"] == "int":
-            # to allow having ints and NaN in the same column
-            output_df[col_name] = df[col_name].copy().astype(pd.Int64Dtype())
-        else:
-            output_df[col_name] = df[col_name].apply(
-                lambda col: cast(col, _type=detection["python_type"])
-            )
-        # to save RAM
-        del df[col_name]
-    if verbose:
-        display_logs_depending_process_time(
-            f'Casting columns completed in {round(time() - start, 3)}s',
-            time() - start,
-        )
-    return output_df

{csv_detective-0.7.5.dev1180.data → csv_detective-0.7.5.dev1209.data}/data/share/csv_detective/CHANGELOG.md RENAMED Viewed

@@ -9,6 +9,9 @@
 - Raise an error if the encoding could not be guessed [#106](https://github.com/datagouv/csv-detective/pull/106)
 - Allow to only specify tests to skip ("all but...") [#108](https://github.com/datagouv/csv-detective/pull/108)
 - Fix bool casting [#109](https://github.com/datagouv/csv-detective/pull/109)
+- Handle csv.gz files [#110](https://github.com/datagouv/csv-detective/pull/110)
+- Refactor file tests [#110](https://github.com/datagouv/csv-detective/pull/110)
+- Restructure repo (breaking changes) [#111](https://github.com/datagouv/csv-detective/pull/111)
 ## 0.7.4 (2024-11-15)

{csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1209.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: csv_detective
-Version: 0.7.5.dev1180
+Version: 0.7.5.dev1209
 Summary: Detect CSV column content
 Home-page: https://github.com/etalab/csv_detective
 Author: Etalab
@@ -37,5 +37,6 @@ Dynamic: description-content-type
 Dynamic: home-page
 Dynamic: keywords
 Dynamic: license
+Dynamic: license-file
 Dynamic: requires-dist
 Dynamic: summary

{csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1209.dist-info}/RECORD RENAMED Viewed

@@ -1,16 +1,12 @@
-csv_detective/__init__.py,sha256=Au4bNJ_Gi6P6o0uO4R56nYdshG7M6-7Rg_xX4whLmLI,143
+csv_detective/__init__.py,sha256=GCHgu0BhH5ACV7cf-1gDr9nRyvSoeQ1vRw9SjEHeMT4,143
 csv_detective/cli.py,sha256=Ua7SE1wMH2uFUsTmfumh4nJk7O06okpMd2gvjUDO1II,1048
-csv_detective/create_example.py,sha256=358e7Q7RWMrY_eEo3pUteJWmg2smFb5edJ_AzcQPrqA,8646
-csv_detective/detection.py,sha256=zrP8qvLDvhVXTHi7Ty8G_ga4zfZPjBhuyApqFQkPq2Y,22373
-csv_detective/explore_csv.py,sha256=BY1_X7OH2Lod08DTBwWaGvguc2OhpwOko4nlI8rf0HM,17470
-csv_detective/process_text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
+csv_detective/explore_csv.py,sha256=aJ2pG7lK4sgY9Pv31zEzFVGByxkfw4wwgrQqfgUtBOo,14903
 csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
-csv_detective/schema_generation.py,sha256=D1Cq4QRajsKtY8EJSwbRTIB-T_Cb2ZpcmYtCrJ6DvJQ,13135
-csv_detective/utils.py,sha256=yO9INaLh-QX-FFL2A153AlMqftE04wb0hpN6HJvsKGg,10581
+csv_detective/utils.py,sha256=KAYfSJXnPuAXnSc38Jm57oQ_JP_0kUkmI1OV6gN5_ys,1116
 csv_detective/detect_fields/__init__.py,sha256=NVfE3BQVExgXb-BPbhDvlkM5-0naEVLpZ4aM_OGHYfE,931
 csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=e5JqMNOPxx0Ivju3zAHCGMopZroCpR4vr3DJKlQhMz4,1675
+csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=NqV8ULf9gY9iFnA1deKR-1Yobr96WwCsn5JfbP_MjiY,1675
 csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py,sha256=tfHdqUnCQ0cv-fBo3Cy--8UNXzgjld4kseI5eQ_sR4E,187
 csv_detective/detect_fields/FR/geo/code_departement/__init__.py,sha256=unr-Y4zquKSM5PVUiQGnOm-zQvaN8qd3v_XHf0W2VH8,378
 csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py,sha256=27bCkZP5w7tpsKUdOIXuiAG90DTdw066CWg3G5HtsKE,160
@@ -26,16 +22,16 @@ csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256
 csv_detective/detect_fields/FR/geo/pays/__init__.py,sha256=2q5T4SmCK6ZFF1mrv7d-q9tOIQKBcROI24y_UYIuvz0,383
 csv_detective/detect_fields/FR/geo/region/__init__.py,sha256=JbFKDd4jAnd9yb7YqP36MoLdO1JFPm1cg60fGXt6ZvI,1074
 csv_detective/detect_fields/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=X0NT6YbBg9PrxIcBwzUCQuBiv_QdDdqb3CJnrlent28,566
+csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py,sha256=SRWJvg3Ikyjmop9iL14igTjxNGpO-QB3fpADI_bLYEY,566
 csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt,sha256=rbcjtMP6qTZ7BTU6ZegkiXKCruqY_m9Ep6ZgRabFS_E,2486
 csv_detective/detect_fields/FR/other/code_rna/__init__.py,sha256=Z0RjMBt1--ZL7Jd1RsHAQCCbTAQk_BnlnTq8VF1o_VA,146
 csv_detective/detect_fields/FR/other/code_waldec/__init__.py,sha256=g9n5sOjRlk4I9YFZjdaTYrXf8ftXRDunGZOUpYhN4fA,295
-csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=XacU_3rwXqtdbw_ULTSnu0OOtx0w_rKlviCrLmNdHjc,496
+csv_detective/detect_fields/FR/other/csp_insee/__init__.py,sha256=lvcaVKgOPrCaZb-Y1-wYCbLYB_CQjCJFNAzfWDwtTVE,496
 csv_detective/detect_fields/FR/other/csp_insee/csp_insee.txt,sha256=kgKaKc-5PHu5U4--ugLjpFyMNtTU9CGdZ9ANU3YAsM4,32879
 csv_detective/detect_fields/FR/other/date_fr/__init__.py,sha256=kMV52djlG0y4o0ELEZuvTv_FvooYOgTnV1aWhycFJDc,284
-csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=4v3CiKogDt-bjSn3AvdnYp-5nAFrs3r3JGGSKC6z_Ag,519
+csv_detective/detect_fields/FR/other/insee_ape700/__init__.py,sha256=g8pOqJPKVpQiMd78zgrjXJWYeWkYhu8r3D4IQX519HQ,519
 csv_detective/detect_fields/FR/other/insee_ape700/insee_ape700.txt,sha256=nKgslakENwgE7sPkVNHqR23iXuxF02p9-v5MC2_ntx8,4398
-csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=hrgZ9Kd_c-cwMmD4w1D5NX10i5f-FzPFbxF4eajYhTU,269
+csv_detective/detect_fields/FR/other/sexe/__init__.py,sha256=iYkLe3MM51GWyBX_4BTq5PWDX_EeYRbEHWKMr8oE1MQ,269
 csv_detective/detect_fields/FR/other/siren/__init__.py,sha256=ohSwUL2rXqTXPG5WDAh2SP-lp1SzFCYgo4IhJ-PXmdk,442
 csv_detective/detect_fields/FR/other/siret/__init__.py,sha256=ThEeT6rXmS0EvHW8y4A_74bILyErDGxLe9v3elHOFs8,707
 csv_detective/detect_fields/FR/other/tel_fr/__init__.py,sha256=BF47aMTe0rUIx66iurIo7fM9Nrk0YorQ7WmFLnkWonI,343
@@ -65,7 +61,7 @@ csv_detective/detect_fields/other/twitter/__init__.py,sha256=qbwLKsTBRFQ4PyTNVeE
 csv_detective/detect_fields/other/url/__init__.py,sha256=9WaTqCglEsw_lJG_xZsBMdxJXg2yuQ92_fkX6CXWNV0,286
 csv_detective/detect_fields/other/uuid/__init__.py,sha256=3-z0fDax29SJc57zPjNGR6DPICJu6gfuNGC5L3jh4d0,223
 csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_fields/temp/date/__init__.py,sha256=aFP1feFWFhCpR6Q9s_4BBwWxFtwFiMXY1iduSeQIjdA,943
+csv_detective/detect_fields/temp/date/__init__.py,sha256=1a_Ra9fmT4wgGMrcknXP7eN7A2QiaMF0Yjy0-BMihtA,987
 csv_detective/detect_fields/temp/datetime/__init__.py,sha256=Ykwhk2ospjY9P0KOG0AitgqN0sld6UmhOlbMz_XGQzQ,597
 csv_detective/detect_fields/temp/datetime_iso/__init__.py,sha256=DOfli-A7gPlZmiV2J6Ka5_yDUCaOgxis29LET_tfhA4,444
 csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=JtUzg3BXYd-XJMLGxQ0P1OAJGOQ7DlYMD4fCU9yndg0,511
@@ -73,72 +69,90 @@ csv_detective/detect_fields/temp/year/__init__.py,sha256=RjsiIHoplnI4Odi5587TzRh
 csv_detective/detect_labels/__init__.py,sha256=BJjWlwTnnDe9nomABDUreu9EMu6IFG3T47d7YCJZbRc,878
 csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=r14SVoVJiaabyr6lTahI_Qsk0EH3F8UVSi6TRnDQS7o,1063
-csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py,sha256=qJKvP6g98ceSxaFtd37-bI-9uFhQvdwFSh1n3MrzrOo,1096
-csv_detective/detect_labels/FR/geo/code_departement/__init__.py,sha256=FtPwHudArmsgkjCT_IM-I4_wALOsKjiK0-TEsYe9tw4,1025
-csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py,sha256=irrBwY_TXAGRhOAcH1Xqi9D7P5Ajk2854ee1qXwuTnA,949
-csv_detective/detect_labels/FR/geo/code_postal/__init__.py,sha256=L74MwxadiT_MVrEEWUlMbhUsE_kk7xz_E2BHrZMUxMs,1070
-csv_detective/detect_labels/FR/geo/code_region/__init__.py,sha256=Di9j-AKCogKxavnPgGjA_P8hy8g6JyJ0GBOO0k4l-qY,1012
-csv_detective/detect_labels/FR/geo/commune/__init__.py,sha256=8Jhx4neUt5iyyK_b1D4WWsdxi3mpz7cNZQ28fFF4xaE,948
-csv_detective/detect_labels/FR/geo/departement/__init__.py,sha256=N8MYMhqhspoLAUgD25pIrsqDKRuwTGnXXm8Chr8wih8,1229
-csv_detective/detect_labels/FR/geo/insee_canton/__init__.py,sha256=8Tcqzjn-dGGjpxzo-2TqmEYpyfEhcqa1XNcQgMnqq88,957
-csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py,sha256=nUT7SL4sKP_q9LTbiMBaCzJ029yBMP_phAD_CiOVHfc,1386
-csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=DCyN6-k1FH1kfTy4tFZWIH6lyaKeT-vgWnDh8TB7JhU,1381
-csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py,sha256=Lcqc8Agjxy2dPulu65NRel4uxRLPcQrAGrLsBTYT8EQ,1139
-csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=xd_W-L9pkKpsMT1IZ0fVMdty1dmT75uS8gmekb_InAw,1144
-csv_detective/detect_labels/FR/geo/pays/__init__.py,sha256=HJ3hNV3xeAN46YP6c-tqQgHMNvltm-tgApfofR5FraE,1169
-csv_detective/detect_labels/FR/geo/region/__init__.py,sha256=ZPw8LXIuV8OvFVY_DA3MkvpAFzB6Rs749Ppr0Wc4lao,1164
+csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=e5ROxhrXNCefLwL5lXTWHO0PEWwLHfqmowm7XoeqZ2I,1063
+csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py,sha256=D_9QFvAeX5Nwp4qtQ0NEpKR0jpRlDx-rNBSrlYrw4nw,1096
+csv_detective/detect_labels/FR/geo/code_departement/__init__.py,sha256=rpzxUVsZyazVVguOorLadiJv_Vz1n04ijm0RbVmRDts,1025
+csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py,sha256=VUqv3G-JO-9CJU4-EX5DXs4O22Lqm75vuOy9MngoojA,949
+csv_detective/detect_labels/FR/geo/code_postal/__init__.py,sha256=USIYj7PiULI_WCfDxpzRCW9tv8-FNYKWopsVZ3H79mE,1070
+csv_detective/detect_labels/FR/geo/code_region/__init__.py,sha256=f9WroGVfB5jUzd_Rjs4XocZT2Ma-xZd2On9StUHy3F4,1012
+csv_detective/detect_labels/FR/geo/commune/__init__.py,sha256=iYD0UPhRVKYFv8DAEfe_RoQlE47igZ_MacsHxVLyYcM,948
+csv_detective/detect_labels/FR/geo/departement/__init__.py,sha256=fqNziX5ID6mVE5nVNviOsncVqkYyVvj7J_8hxN7_D1w,1229
+csv_detective/detect_labels/FR/geo/insee_canton/__init__.py,sha256=EAcQ2FqTKQdxhSYr5VCuEpjc7BdGwTdMkLL_VL6ay7Y,957
+csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py,sha256=X3vGdh_DHzWZXuV2-L9QhuWTLjHyaPZyS__s9Y5yiNg,1386
+csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py,sha256=cRYxeGnBkuxKwrDXpeoRhiCf6xkb533-_bNjk9MB818,1381
+csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py,sha256=Pf00tBADr7HvJLeW_YqY3QU1EBVJDi365woheAzsNKY,1139
+csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py,sha256=LfvgcrjVsXmxT6xC3X8eQIiQ_STvPRwjUbUQ4TyfJE0,1144
+csv_detective/detect_labels/FR/geo/pays/__init__.py,sha256=RsI_QXMJOZ5PpKcoKWy7AmUHFjehHXcUezquZyt1eq4,1169
+csv_detective/detect_labels/FR/geo/region/__init__.py,sha256=h9pE3xu2-PFw1jmDenkoKWmFkYmpK9-UgCboPlL7Aeg,1164
 csv_detective/detect_labels/FR/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py,sha256=rk5S6UGL0vND2X2ty1HJJH3qswUbaV6ZJHHKAywRa6o,939
-csv_detective/detect_labels/FR/other/code_rna/__init__.py,sha256=MBF9qZIGbX1dP3DJBI71pbqrGcgOR1xXAbGdiHX0pc4,1024
-csv_detective/detect_labels/FR/other/code_waldec/__init__.py,sha256=RDU8jDZgutfxnJl5lQkbqymJmGeeGXpR2i4CuGfqU10,934
-csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256=-GoB9i83O3_rg81Ry3ZtLOdMhlrMPZ34he4hn9U6qDc,1043
-csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=7eV737iM1X9MTHureWiCpnxAUJ2_YjI14Vs41MGEX2Q,975
-csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=6UDx_6JRG__aSXTKKPq-2oBJR7ZiWg0HhSLDl4pETm4,1124
-csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=87BcCHmofOMqfHfHzmwZzIplcBMAm1AUMxVNvigigTQ,956
-csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=oWkOquzLLbDwBlAs_hoic_UQu7LFOmFZ76570vwRgdc,1103
-csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=Xx-oajnzxEe6pEAYafsnZo7S-mLfnB0pP3z5gv3kJy0,1040
-csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=WWglp7xmb_Wz8bxrYYmB46cRyCJKNNqguubziMktZZI,1143
-csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=sVcw6fwQi9ocIEmLEJRi9m4WvTLg_ORwaW0KaJqeMB8,1316
+csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py,sha256=lcLdEdNo4rhLvqzP3C0rmU_1PaQvTdpviXt9xGSaGFc,939
+csv_detective/detect_labels/FR/other/code_rna/__init__.py,sha256=DJykTRguggOlsIuyjYezJ99c8MGCSwwwCLcoQjfN40o,1024
+csv_detective/detect_labels/FR/other/code_waldec/__init__.py,sha256=idLo99rELzs1uc4mOcby9RLZLhhpsOp5AoTudT2jPwM,934
+csv_detective/detect_labels/FR/other/csp_insee/__init__.py,sha256=J5G8pldzBdXRaopYNzGDztRFIsI_7rdaAPQ_kSuz5PU,1043
+csv_detective/detect_labels/FR/other/date_fr/__init__.py,sha256=9EXCmzKSa5PSWrPbVeLscbJCaiwQEXX-1rCr79U8XLA,975
+csv_detective/detect_labels/FR/other/insee_ape700/__init__.py,sha256=9bq2171SrmDIHx4A0cAeSHfWyQl40e-dIR9_ur4cEHQ,1124
+csv_detective/detect_labels/FR/other/sexe/__init__.py,sha256=AEKBGWEKxDoT8k9BF-v9vl1SHc4DffiiFyhip-6tC78,956
+csv_detective/detect_labels/FR/other/siren/__init__.py,sha256=9w2VCs8kq-XVRmxxwqZYIynfCPwbFbl-pBPqXtnXx8Y,1103
+csv_detective/detect_labels/FR/other/siret/__init__.py,sha256=Yqrp7NDEN0WRA_oktMb0wWoLQ99rzIvNvJ8jVhBCRD8,1040
+csv_detective/detect_labels/FR/other/tel_fr/__init__.py,sha256=gdzclIAjhr_k-a04l_FDz9kQywBfSA6vqa0UQxdaqNw,1143
+csv_detective/detect_labels/FR/other/uai/__init__.py,sha256=mB0hC2JUKGnhGl6MUDFzSM_-t-Tvt3Vm21Gr_JXkL3k,1316
 csv_detective/detect_labels/FR/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=a3fKjduxRIMNu7TF124pG--Mb21PIqZYnQwYU4APLBw,1074
-csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py,sha256=taNWDz1_0KE4cOS4SeJcC8igMSA2LBbv8TvbCg50-TY,934
+csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py,sha256=FHXmOIjH4e5n_mahtScgOVYUAi_M4PeHAnsuIm5LxCA,1074
+csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py,sha256=hX0FPAia4x28GD398WvpeaBQ4_3F5G3xAhySmZBdi5w,934
 csv_detective/detect_labels/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py,sha256=ev8w1hySEoNiMcU1IhJy72IB5OliCvoUy-ytKWPG3oI,1065
-csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=ev8w1hySEoNiMcU1IhJy72IB5OliCvoUy-ytKWPG3oI,1065
-csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=ev8w1hySEoNiMcU1IhJy72IB5OliCvoUy-ytKWPG3oI,1065
-csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=p6mXMb1GMkrs27WmoqRqPE3wCbs3iPL4FWfcc280bGA,1072
-csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=DCyN6-k1FH1kfTy4tFZWIH6lyaKeT-vgWnDh8TB7JhU,1381
-csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=xfzeQ1KXmEZAkpVQT_qAYsC4RnXKl11dTB9PoFExGgQ,1705
-csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=knv3HnIerZ6oUPrzGkW2GJjsiTnCklqZ9_koNJCG91I,1145
+csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
+csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
+csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=xKio-qy6EJbAowTiCo7-7fzMlD7s6z4O6_qJPVmlIDE,1065
+csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=0sYS6bF_xmmhqsJ0Wrx7GC3qBAYjK7uhVud_ZbIQHHQ,1072
+csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=cRYxeGnBkuxKwrDXpeoRhiCf6xkb533-_bNjk9MB818,1381
+csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=SwR1NU0vpk8YdHTIk1wk9zQpNoUsoABq-K8GfRMY0fw,1705
+csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=z4rOrkCypI5JodgX9alTrV03IpetgAW4BGJuNvFlU4s,1145
 csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_labels/other/booleen/__init__.py,sha256=ahxzBUcJYc5f1J8aAeYDhtSc-URJHS6ruGTAbZXaoG8,987
-csv_detective/detect_labels/other/email/__init__.py,sha256=2pf0e8o3L57damyi4BLrqA9Opw0trZl2wWDHY88s41E,1148
-csv_detective/detect_labels/other/float/__init__.py,sha256=9JC0-B-aVqlLe3FeN8uH5HZjIc2V6hZ7JFStkSLsHW0,926
-csv_detective/detect_labels/other/int/__init__.py,sha256=i9xN8TYBy4C5b1vYO1l3Rkvn4uq_tft8Rip_ErSUIt8,933
+csv_detective/detect_labels/other/booleen/__init__.py,sha256=uvQ7yDVAlEO8AY44OMblh_ZrxPTOmdvFtbcQEanpWSo,987
+csv_detective/detect_labels/other/email/__init__.py,sha256=VRUYZXGn-hRqE2sY0JY-Oh_wtT568orDTBxBGYsgqxE,1148
+csv_detective/detect_labels/other/float/__init__.py,sha256=jIr1r9FFy8NWvi5fOuIhj52bc7cZmM3OeTo-c6TUWII,926
+csv_detective/detect_labels/other/int/__init__.py,sha256=G1GAlKNaOZH_l39Zpw85xkl7JcdnY5PlEEroyU78hlY,933
 csv_detective/detect_labels/other/money/__init__.py,sha256=kBEGuUy6kYkOI3vC_a7waBciG2ipyV9bhC330U8WaoI,279
 csv_detective/detect_labels/other/money/check_col_name.py,sha256=zgp5eUnf3XRQuxgdEGfxPfUnniO8Pzw19uK0ICr2pf8,414
-csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=Y-inIGmeH3lZcN9kR6icE3QypS54qJWv8aE4GQUudpc,927
-csv_detective/detect_labels/other/twitter/__init__.py,sha256=D8G4vGsFL9a99OJz-03wp4HbZSvT-y1IxyRJiSsqxFc,959
-csv_detective/detect_labels/other/url/__init__.py,sha256=vqUQvn5o6JZU8iRsSG3AYqggjlhzagozVYWwpuSReV8,1202
-csv_detective/detect_labels/other/uuid/__init__.py,sha256=OdMUxqvqMdGaY5nph7CbIF_Q0LSxljxE72kCMT4m-Zk,931
+csv_detective/detect_labels/other/mongo_object_id/__init__.py,sha256=3TW59y4vo4Pkx_fQrmEs1-gZbdJeNiK7ip25cpR829U,927
+csv_detective/detect_labels/other/twitter/__init__.py,sha256=x3b522ov_g-kmcq4k4eoZ8FQqrXdnlRJJit5UbnzIrQ,959
+csv_detective/detect_labels/other/url/__init__.py,sha256=wVQsWQzOuBY-cD7wn_PXcWLVEkknBA2lBCu8SRWsQG4,1202
+csv_detective/detect_labels/other/uuid/__init__.py,sha256=ySxqFvtGHguoiOyD5A1YRFY3SuubkgBAEY_Ud5kZVPM,931
 csv_detective/detect_labels/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_labels/temp/date/__init__.py,sha256=-R7VqlryozelSn4wH_7w9x6ks77DP1kw2XMBYSLrzXE,1322
-csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=Ih9l56nBcdmGLyWDavVUWuUUuVZBz9QUDE1hHzADvVg,1157
-csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=DQ_h4uDW1e6qu2rATEhgGKw6O-vVi7HbDhbEDDCT9uY,1175
-csv_detective/detect_labels/temp/year/__init__.py,sha256=zPF_mvhzhXMAlHPAskS8mhuxjLj2AlKpV4ss8Q4tDms,1150
-csv_detective-0.7.5.dev1180.data/data/share/csv_detective/CHANGELOG.md,sha256=j5OyYgmRMfYqQoEzY6In6OdivOZR3kpv9AlXkda5vpA,7366
-csv_detective-0.7.5.dev1180.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
-csv_detective-0.7.5.dev1180.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
+csv_detective/detect_labels/temp/date/__init__.py,sha256=CRv-S0figO6MOPdE0Lv5hWdjtIr6EmWzwlcjn5ofIxo,1322
+csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=0lFdN5Z43m6Qm-wBqcyM_mceUmI4s3vqgLCM-Jlgoxw,1157
+csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=4N0EGJA_2vXC1iFptvzpU6IN7AIJH5MFUrRY2p7Cjfs,1175
+csv_detective/detect_labels/temp/year/__init__.py,sha256=3U9j8Hux432KdGtIyArq_-vScn-5eYFwpn976WM9N4M,1150
+csv_detective/detection/columns.py,sha256=vfE-DKESA6J9Rfsl-a8tjgZfE21VmzArO5TrbzL0KmE,2905
+csv_detective/detection/encoding.py,sha256=tpjJEMNM_2TcLXDzn1lNQPnSRnsWYjs83tQ8jNwTj4E,973
+csv_detective/detection/engine.py,sha256=HiIrU-l9EO5Fbc2Vh8W_Uy5-dpKcQQzlxCqMuWc09LY,1530
+csv_detective/detection/headers.py,sha256=wrVII2RQpsVmHhrO1DHf3dmiu8kbtOjBlskf41cnQmc,1172
+csv_detective/detection/rows.py,sha256=3qvsbsBcMxiqqfSYYkOgsRpX777rk22tnRHDwUA97kU,742
+csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
+csv_detective/detection/variables.py,sha256=3qEMtjZ_zyIFXvTnFgK7ZMDx8C12uQXKfFjEj2moyJc,3558
+csv_detective/output/dataframe.py,sha256=89iQRE59cHQyQQEsujQVIKP2YAUYpPklWkdDOqZE-wE,2183
+csv_detective/output/example.py,sha256=i8PkdXxidF7qR_9aK8vh12JpZdJQryhBgyrMS8iy5rk,8642
+csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
+csv_detective/output/schema.py,sha256=ZDBWDOD8IYp7rcB0_n8l9JXGIhOQ6bTZHFWfTmnNNEQ,13480
+csv_detective/output/utils.py,sha256=HbmvCCCmFo7NJxhD_UsJIveuw-rrfhrvYckv1CJn_10,2301
+csv_detective/parsing/columns.py,sha256=Oj0Ddp2fPZeL70GDWdF7GY2RmhiVdz0IEvoBJFt-wao,5701
+csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
+csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,1626
+csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
+csv_detective/parsing/load.py,sha256=SpP0pfxswOAPPpwbZfoP1blh0EKV5VMs0TpTgQJKzjs,3621
+csv_detective/parsing/text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
+csv_detective-0.7.5.dev1209.data/data/share/csv_detective/CHANGELOG.md,sha256=povo1ufNJvsxJLkzdjYLgkTy9E-MNFWTg6elXe2nyqU,7625
+csv_detective-0.7.5.dev1209.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
+csv_detective-0.7.5.dev1209.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
+csv_detective-0.7.5.dev1209.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/test_example.py,sha256=0NfChooJQlFxTo2nY5FOQIcsK4zzWA_SBmt2LwVQovY,2014
-tests/test_fields.py,sha256=_96htvTzvM7u-W57RpOBbsacWirIm4R36PP7JhPEaYQ,11123
-tests/test_file.py,sha256=HO-Zqv0ZDFy3d0ZrpjWQPXBrwgUmzesseoEofy8G2UU,7529
+tests/test_fields.py,sha256=fcgycaFxacOcN0WdwuUvxef_ejd6tRHNpkD5pxMjMXE,11141
+tests/test_file.py,sha256=EleTssys5fCP4N0W1eTZN35uijzoF15e3dIcuIlrMsk,7865
 tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
 tests/test_structure.py,sha256=SVsnluVoIIprYw_67I1_gB3cp9m1wlO8C7SpdsLW8cM,1161
-csv_detective-0.7.5.dev1180.dist-info/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
-csv_detective-0.7.5.dev1180.dist-info/METADATA,sha256=MPk6yPPa_Xm0K9edLe_7DfXGCVniEP8cs1tHQqJGzvo,1364
-csv_detective-0.7.5.dev1180.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
-csv_detective-0.7.5.dev1180.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
-csv_detective-0.7.5.dev1180.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
-csv_detective-0.7.5.dev1180.dist-info/RECORD,,
+csv_detective-0.7.5.dev1209.dist-info/METADATA,sha256=LwKAMVqoJjZfnrWAJV_nv_V3oprmbmmaNmX9e4Zvruc,1386
+csv_detective-0.7.5.dev1209.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+csv_detective-0.7.5.dev1209.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
+csv_detective-0.7.5.dev1209.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
+csv_detective-0.7.5.dev1209.dist-info/RECORD,,

{csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1209.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.2)
+Generator: setuptools (78.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

tests/test_fields.py CHANGED Viewed

@@ -1,7 +1,8 @@
-import pandas as pd
+from datetime import date as _date, datetime as _datetime
 from numpy import random
+import pandas as pd
 import pytest
-from datetime import date as _date, datetime as _datetime
 from csv_detective.detect_fields.FR.geo import (
     adresse,
@@ -43,12 +44,12 @@ from csv_detective.detect_fields.other import (
     float as test_float,
 )
 from csv_detective.detect_fields.temp import date, datetime_iso, datetime_rfc822, year
-from csv_detective.detection import (
+from csv_detective.detection.variables import (
     detect_continuous_variable,
-    detetect_categorical_variable,
+    detect_categorical_variable,
 )
 from csv_detective.explore_csv import return_all_tests
-from csv_detective.utils import cast
+from csv_detective.output.dataframe import cast
 def test_all_tests_return_bool():
@@ -71,7 +72,7 @@ def test_detetect_categorical_variable():
     }
     df = pd.DataFrame(df_dict, dtype="unicode")
-    res, _ = detetect_categorical_variable(df)
+    res, _ = detect_categorical_variable(df)
     assert len(res.values) and all(k in res.values for k in ["cat", "cat2"])

csv-detective 0.7.5.dev1180__py3-none-any.whl → 0.7.5.dev1209__py3-none-any.whl

csv-detective 0.7.5.dev1180py3-none-any.whl → 0.7.5.dev1209py3-none-any.whl