PyPI - csv-detective - Versions diffs - 0.10.3.dev7__py3-none-any.whl → 0.10.2549__py3-none-any.whl - Mend

csv-detective 0.10.3.dev7py3-none-any.whl → 0.10.2549py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

csv_detective/detection/__init__.py +0 -0
csv_detective/detection/columns.py +0 -0
csv_detective/detection/encoding.py +0 -0
csv_detective/detection/engine.py +0 -0
csv_detective/detection/formats.py +38 -11
csv_detective/detection/headers.py +14 -12
csv_detective/detection/rows.py +1 -1
csv_detective/detection/separator.py +0 -0
csv_detective/detection/variables.py +0 -0
csv_detective/explore_csv.py +6 -18
csv_detective/format.py +5 -12
csv_detective/formats/__init__.py +0 -0
csv_detective/formats/adresse.py +9 -9
csv_detective/formats/binary.py +1 -2
csv_detective/formats/booleen.py +2 -3
csv_detective/formats/code_commune_insee.py +10 -12
csv_detective/formats/code_csp_insee.py +1 -1
csv_detective/formats/code_departement.py +7 -8
csv_detective/formats/code_fantoir.py +5 -6
csv_detective/formats/code_import.py +1 -1
csv_detective/formats/code_postal.py +9 -10
csv_detective/formats/code_region.py +6 -7
csv_detective/formats/code_rna.py +6 -7
csv_detective/formats/code_waldec.py +1 -1
csv_detective/formats/commune.py +5 -5
csv_detective/formats/csp_insee.py +5 -6
csv_detective/formats/data/insee_ape700.txt +1 -1
csv_detective/formats/data/iso_country_code_alpha2.txt +397 -153
csv_detective/formats/data/iso_country_code_alpha3.txt +132 -132
csv_detective/formats/data/iso_country_code_numeric.txt +94 -94
csv_detective/formats/date.py +18 -28
csv_detective/formats/date_fr.py +1 -1
csv_detective/formats/datetime_aware.py +2 -7
csv_detective/formats/datetime_naive.py +0 -3
csv_detective/formats/datetime_rfc822.py +0 -1
csv_detective/formats/departement.py +15 -15
csv_detective/formats/email.py +13 -13
csv_detective/formats/float.py +1 -2
csv_detective/formats/geojson.py +10 -10
csv_detective/formats/insee_ape700.py +8 -10
csv_detective/formats/insee_canton.py +6 -6
csv_detective/formats/int.py +1 -2
csv_detective/formats/iso_country_code_alpha2.py +14 -14
csv_detective/formats/iso_country_code_alpha3.py +13 -6
csv_detective/formats/iso_country_code_numeric.py +9 -2
csv_detective/formats/jour_de_la_semaine.py +12 -11
csv_detective/formats/json.py +0 -6
csv_detective/formats/latitude_l93.py +22 -8
csv_detective/formats/latitude_wgs.py +29 -31
csv_detective/formats/latitude_wgs_fr_metropole.py +30 -7
csv_detective/formats/latlon_wgs.py +28 -30
csv_detective/formats/longitude_l93.py +13 -8
csv_detective/formats/longitude_wgs.py +19 -34
csv_detective/formats/longitude_wgs_fr_metropole.py +19 -6
csv_detective/formats/lonlat_wgs.py +11 -12
csv_detective/formats/mois_de_lannee.py +1 -1
csv_detective/formats/money.py +1 -1
csv_detective/formats/mongo_object_id.py +1 -1
csv_detective/formats/pays.py +13 -11
csv_detective/formats/percent.py +1 -1
csv_detective/formats/region.py +13 -13
csv_detective/formats/sexe.py +1 -1
csv_detective/formats/siren.py +10 -9
csv_detective/formats/siret.py +9 -9
csv_detective/formats/tel_fr.py +13 -7
csv_detective/formats/uai.py +18 -17
csv_detective/formats/url.py +16 -16
csv_detective/formats/username.py +1 -1
csv_detective/formats/uuid.py +1 -1
csv_detective/formats/year.py +12 -7
csv_detective/output/__init__.py +0 -0
csv_detective/output/dataframe.py +3 -8
csv_detective/output/example.py +0 -0
csv_detective/output/profile.py +2 -6
csv_detective/output/schema.py +0 -0
csv_detective/output/utils.py +0 -0
csv_detective/parsing/__init__.py +0 -0
csv_detective/parsing/columns.py +1 -1
csv_detective/parsing/compression.py +0 -0
csv_detective/parsing/csv.py +0 -0
csv_detective/parsing/excel.py +1 -1
csv_detective/parsing/load.py +12 -11
csv_detective/parsing/text.py +12 -13
csv_detective/validate.py +36 -71
{csv_detective-0.10.3.dev7.dist-info → csv_detective-0.10.2549.dist-info}/METADATA +18 -15
csv_detective-0.10.2549.dist-info/RECORD +92 -0
csv_detective-0.10.2549.dist-info/WHEEL +4 -0
{csv_detective-0.10.3.dev7.dist-info → csv_detective-0.10.2549.dist-info}/entry_points.txt +1 -0
csv_detective-0.10.3.dev7.dist-info/RECORD +0 -111
csv_detective-0.10.3.dev7.dist-info/WHEEL +0 -5
csv_detective-0.10.3.dev7.dist-info/licenses/LICENSE +0 -21
csv_detective-0.10.3.dev7.dist-info/top_level.txt +0 -3
tests/__init__.py +0 -0
tests/data/a_test_file.csv +0 -407
tests/data/a_test_file.json +0 -394
tests/data/b_test_file.csv +0 -7
tests/data/c_test_file.csv +0 -2
tests/data/csv_file +0 -7
tests/data/file.csv.gz +0 -0
tests/data/file.ods +0 -0
tests/data/file.xls +0 -0
tests/data/file.xlsx +0 -0
tests/data/xlsx_file +0 -0
tests/test_example.py +0 -67
tests/test_fields.py +0 -175
tests/test_file.py +0 -468
tests/test_labels.py +0 -26
tests/test_structure.py +0 -45
tests/test_validation.py +0 -163

tests/data/a_test_file.json DELETED Viewed

@@ -1,394 +0,0 @@
-{
-    "encoding": "ASCII",
-    "separator": ";",
-    "header_row_idx": 2,
-    "header": [
-        "NUMCOM",
-        "NOMCOM",
-        "NUMDEP",
-        "NOMDEP",
-        "NUMEPCI",
-        "NOMEPCI",
-        "TXCOUVGLO_COM_2014",
-        "TXCOUVGLO_DEP_2014",
-        "TXCOUVGLO_EPCI_2014",
-        "STRUCTURED_INFO",
-        "GEO_INFO"
-    ],
-    "total_lines": 404,
-    "nb_duplicates": 7,
-    "heading_columns": 0,
-    "trailing_columns": 0,
-    "continuous": [
-        "TXCOUVGLO_EPCI_2014"
-    ],
-    "categorical": [
-        "NUMDEP",
-        "NOMDEP",
-        "TXCOUVGLO_DEP_2014",
-        "GEO_INFO"
-    ],
-    "columns_fields": {
-        "NUMCOM": {
-            "python_type": "string",
-            "format": "code_commune_insee",
-            "score": 1.0
-        },
-        "NOMCOM": {
-            "python_type": "string",
-            "format": "commune",
-            "score": 1.0
-        },
-        "NUMDEP": {
-            "python_type": "int",
-            "format": "int",
-            "score": 1.0
-        },
-        "NOMDEP": {
-            "python_type": "string",
-            "format": "departement",
-            "score": 1.0
-        },
-        "NUMEPCI": {
-            "python_type": "string",
-            "format": "siren",
-            "score": 1.0
-        },
-        "NOMEPCI": {
-            "python_type": "string",
-            "format": "string",
-            "score": 1.0
-        },
-        "TXCOUVGLO_COM_2014": {
-            "python_type": "float",
-            "format": "float",
-            "score": 1.0
-        },
-        "TXCOUVGLO_DEP_2014": {
-            "python_type": "float",
-            "format": "latitude_wgs",
-            "score": 0.9951690821256038
-        },
-        "TXCOUVGLO_EPCI_2014": {
-            "python_type": "float",
-            "format": "longitude_wgs",
-            "score": 0.9444444444444444
-        },
-        "STRUCTURED_INFO": {
-            "python_type": "json",
-            "format": "json",
-            "score": 1.0
-        },
-        "GEO_INFO": {
-            "python_type": "json",
-            "format": "geojson",
-            "score": 1.0
-        }
-    },
-    "columns_labels": {
-        "NUMCOM": {
-            "python_type": "string",
-            "format": "code_commune_insee",
-            "score": 0.5
-        },
-        "NOMCOM": {
-            "python_type": "string",
-            "format": "string",
-            "score": 1.0
-        },
-        "NUMDEP": {
-            "python_type": "string",
-            "format": "string",
-            "score": 1.0
-        },
-        "NOMDEP": {
-            "python_type": "string",
-            "format": "string",
-            "score": 1.0
-        },
-        "NUMEPCI": {
-            "python_type": "string",
-            "format": "string",
-            "score": 1.0
-        },
-        "NOMEPCI": {
-            "python_type": "string",
-            "format": "string",
-            "score": 1.0
-        },
-        "TXCOUVGLO_COM_2014": {
-            "python_type": "string",
-            "format": "code_commune_insee",
-            "score": 0.5
-        },
-        "TXCOUVGLO_DEP_2014": {
-            "python_type": "string",
-            "format": "code_departement",
-            "score": 0.5
-        },
-        "TXCOUVGLO_EPCI_2014": {
-            "python_type": "string",
-            "format": "string",
-            "score": 1.0
-        },
-        "STRUCTURED_INFO": {
-            "python_type": "string",
-            "format": "string",
-            "score": 1.0
-        },
-        "GEO_INFO": {
-            "python_type": "string",
-            "format": "latlon_wgs",
-            "score": 0.5
-        }
-    },
-    "columns": {
-        "NUMCOM": {
-            "python_type": "string",
-            "format": "code_commune_insee",
-            "score": 1.125
-        },
-        "NOMCOM": {
-            "python_type": "string",
-            "format": "commune",
-            "score": 1.0
-        },
-        "NUMDEP": {
-            "python_type": "int",
-            "format": "int",
-            "score": 1.0
-        },
-        "NOMDEP": {
-            "python_type": "string",
-            "format": "departement",
-            "score": 1.0
-        },
-        "NUMEPCI": {
-            "python_type": "string",
-            "format": "siren",
-            "score": 1.0
-        },
-        "NOMEPCI": {
-            "python_type": "string",
-            "format": "string",
-            "score": 1.0
-        },
-        "TXCOUVGLO_COM_2014": {
-            "python_type": "float",
-            "format": "float",
-            "score": 1.0
-        },
-        "TXCOUVGLO_DEP_2014": {
-            "python_type": "float",
-            "format": "float",
-            "score": 1.0
-        },
-        "TXCOUVGLO_EPCI_2014": {
-            "python_type": "float",
-            "format": "float",
-            "score": 1.0
-        },
-        "STRUCTURED_INFO": {
-            "python_type": "json",
-            "format": "json",
-            "score": 1.0
-        },
-        "GEO_INFO": {
-            "python_type": "json",
-            "format": "geojson",
-            "score": 1.0
-        }
-    },
-    "formats": {
-        "code_commune_insee": [
-            "NUMCOM"
-        ],
-        "int": [
-            "NUMDEP"
-        ],
-        "commune": [
-            "NOMCOM"
-        ],
-        "departement": [
-            "NOMDEP"
-        ],
-        "siren": [
-            "NUMEPCI"
-        ],
-        "string": [
-            "NOMEPCI"
-        ],
-        "float": [
-            "TXCOUVGLO_COM_2014",
-            "TXCOUVGLO_DEP_2014",
-            "TXCOUVGLO_EPCI_2014"
-        ],
-        "json": [
-            "STRUCTURED_INFO"
-        ],
-        "geojson": [
-            "GEO_INFO"
-        ]
-    },
-    "profile": {
-        "NUMCOM": {
-            "tops": [
-                "01170",
-                "01169",
-                "01167",
-                "01166",
-                "01165",
-                "01163",
-                "01162",
-                "01297",
-                "01304",
-                "01303"
-            ],
-            "nb_distinct": 407,
-            "nb_missing_values": 0
-        },
-        "NOMCOM": {
-            "tops": [
-                "BEARD-GEOVREISSIAT",
-                "GENOUILLEUX",
-                "GARNERANS",
-                "FRANS",
-                "FRANCHELEINS",
-                "FOISSIAT",
-                "FLAXIEU",
-                "PIZAY",
-                "PONT-D'AIN",
-                "PONCIN"
-            ],
-            "nb_distinct": 407,
-            "nb_missing_values": 0
-        },
-        "NUMDEP": {
-            "min": 1,
-            "max": 6,
-            "mean": 1,
-            "std": 0,
-            "tops": [
-                1,
-                6
-            ],
-            "nb_distinct": 2,
-            "nb_missing_values": 0
-        },
-        "NOMDEP": {
-            "tops": [
-                "AIN",
-                "ALPES MARITIMES"
-            ],
-            "nb_distinct": 2,
-            "nb_missing_values": 0
-        },
-        "NUMEPCI": {
-            "tops": [
-                "200040350",
-                "200042935",
-                "240100883",
-                "240100750",
-                "200042497",
-                "200035210",
-                "240100156",
-                "240100370",
-                "200029999",
-                "240100628"
-            ],
-            "nb_distinct": 33,
-            "nb_missing_values": 0
-        },
-        "NOMEPCI": {
-            "tops": [
-                "CC BUGEY SUD",
-                "CC HAUT - BUGEY",
-                "CC DE LA PLAINE DE L'AIN",
-                "CC DU PAYS DE GEX",
-                "CC DOMBES SAONE VALLEE",
-                "CC CHALARONNE CENTRE",
-                "CC DE MONTREVEL - EN - BRESSE",
-                "CC DU VALROMEY",
-                "CC RIVES DE L'AIN - PAYS DU CERDON",
-                "CA BOURG EN BRESSE AGGLOMERATION"
-            ],
-            "nb_distinct": 33,
-            "nb_missing_values": 0
-        },
-        "TXCOUVGLO_COM_2014": {
-            "min": 0.0,
-            "max": 200.2,
-            "mean": 59.35863746958638,
-            "std": 36.453598197621275,
-            "tops": [
-                0.0,
-                68.6,
-                30.5,
-                54.7,
-                82.6,
-                78.4,
-                64.3,
-                78.1,
-                24.9,
-                null
-            ],
-            "nb_distinct": 297,
-            "nb_missing_values": 3
-        },
-        "TXCOUVGLO_DEP_2014": {
-            "min": 47.0,
-            "max": 65.2,
-            "mean": 65.112077294686,
-            "std": 1.263455055322421,
-            "tops": [
-                65.2,
-                47.0
-            ],
-            "nb_distinct": 2,
-            "nb_missing_values": 0
-        },
-        "TXCOUVGLO_EPCI_2014": {
-            "min": 28.3,
-            "max": 93.9,
-            "mean": 64.45772946859903,
-            "std": 12.72227368109601,
-            "tops": [
-                52.4,
-                45.3,
-                75.2,
-                78.4,
-                46.9,
-                77.8,
-                67.9,
-                70.0,
-                72.9,
-                68.7
-            ],
-            "nb_distinct": 30,
-            "nb_missing_values": 0
-        },
-        "STRUCTURED_INFO": {
-            "tops": [
-                "{\"champ_1\": 154, \"champ_2\": 0.0792}",
-                "{\"champ_1\": 153, \"champ_2\": 0.0737}",
-                "{\"champ_1\": 152, \"champ_2\": 0.0681}",
-                "{\"champ_1\": 151, \"champ_2\": 0.0624}",
-                "{\"champ_1\": 150, \"champ_2\": 0.0568}",
-                "{\"champ_1\": 149, \"champ_2\": 0.0511}",
-                "{\"champ_1\": 148, \"champ_2\": 0.0454}",
-                "{\"champ_1\": 268, \"champ_2\": 0.553}",
-                "{\"champ_1\": 275, \"champ_2\": 0.5732}",
-                "{\"champ_1\": 274, \"champ_2\": 0.5704}"
-            ],
-            "nb_distinct": 407,
-            "nb_missing_values": 0
-        },
-        "GEO_INFO": {
-            "tops": [
-                "{\"type\": \"Point\", \"coordinates\": [12.5, 2.8]}"
-            ],
-            "nb_distinct": 1,
-            "nb_missing_values": 0
-        }
-    }
-}

tests/data/b_test_file.csv DELETED Viewed

@@ -1,7 +0,0 @@
-code_departement,code_region,partly_empty
-01,01,10
-23,84,100
-44,32,76
-31,32,
-28,84,
-59,24,

tests/data/c_test_file.csv DELETED Viewed

	@@ -1,2 +0,0 @@
1	- colmmun A;columnB
2	- row A;row B;row C

tests/data/csv_file DELETED Viewed

@@ -1,7 +0,0 @@
-code_departement,code_region
-01,01
-23,84
-44,32
-31,32
-28,84
-59,24

tests/data/file.csv.gz DELETED Viewed

Binary file

tests/data/file.ods DELETED Viewed

Binary file

tests/data/file.xls DELETED Viewed

Binary file

tests/data/file.xlsx DELETED Viewed

Binary file

tests/data/xlsx_file DELETED Viewed

Binary file

tests/test_example.py DELETED Viewed

@@ -1,67 +0,0 @@
-import re
-from uuid import UUID
-from csv_detective.output.example import create_example_csv_file
-def test_example_creation():
-    fields = [
-        {
-            "name": "id_unique",
-            "type": "id",
-        },
-        {
-            "name": "nom_modele",
-            "type": "str",
-            "args": {"length": 20},
-        },
-        {
-            "name": "siret",
-            "type": "str",
-            "args": {"pattern": "^\\d{14}$"},
-        },
-        {
-            "name": "type_producteur",
-            "type": "str",
-            "args": {"enum": ["privé", "public", "association"]},
-        },
-        {
-            "name": "date_creation",
-            "type": "date",
-            "args": {
-                "date_range": ["1996-02-13", "2000-01-28"],
-                "format": "%Y-%m-%d",
-            },
-        },
-        {
-            "name": "url_produit",
-            "type": "url",
-        },
-        {
-            "name": "nb_produits",
-            "type": "int",
-        },
-        {"name": "note", "type": "float", "args": {"num_range": [1, 20]}},
-    ]
-    df = create_example_csv_file(
-        fields=fields,
-        file_length=5,
-        output_name=None,
-    )
-    assert len(df) == 5
-    assert all(UUID(_) for _ in df["id_unique"])
-    assert all(len(_) == 20 for _ in df["nom_modele"])
-    assert all(re.match("^\\d{14}$", _) for _ in df["siret"])
-    assert all(_ in ["privé", "public", "association"] for _ in df["type_producteur"])
-    assert all(_ >= "1996-02-13" and _ <= "2000-01-28" for _ in df["date_creation"])
-    assert all(_.startswith("http") for _ in df["url_produit"])
-    assert all(isinstance(_, int) for _ in df["nb_produits"])
-    assert all(_ >= 1 and _ <= 20 for _ in df["note"])
-def test_example_from_tableschema():
-    df = create_example_csv_file(
-        schema_path="https://schema.data.gouv.fr/schemas/etalab/schema-irve-statique/2.3.1/schema-statique.json",
-        output_name=None,
-    )
-    assert len(df) == 10

tests/test_fields.py DELETED Viewed

@@ -1,175 +0,0 @@
-from datetime import date as _date
-from datetime import datetime as _datetime
-from unittest.mock import patch
-import pandas as pd
-import pytest
-from numpy import random
-from csv_detective.detection.variables import (
-    detect_categorical_variable,
-    detect_continuous_variable,
-)
-from csv_detective.format import FormatsManager
-from csv_detective.output.dataframe import cast
-from csv_detective.output.utils import prepare_output_dict
-from csv_detective.parsing.columns import test_col as col_test  # to prevent pytest from testing it
-fmtm = FormatsManager()
-def test_all_format_funcs_return_bool():
-    for format in fmtm.formats.values():
-        for tmp in ["a", "9", "3.14", "[]", float("nan"), "2021-06-22 10:20:10"]:
-            assert isinstance(format.func(tmp), bool)
-# categorical
-def test_detect_categorical_variable():
-    categorical_col = ["type_a"] * 33 + ["type_b"] * 33 + ["type_c"] * 34
-    categorical_col2 = [str(k // 20) for k in range(100)]
-    not_categorical_col = [i for i in range(100)]
-    df_dict = {
-        "cat": categorical_col,
-        "cat2": categorical_col2,
-        "not_cat": not_categorical_col,
-    }
-    df = pd.DataFrame(df_dict, dtype=str)
-    res, _ = detect_categorical_variable(df)
-    assert len(res) and all(k in res for k in ["cat", "cat2"])
-# continuous
-def test_detect_continuous_variable():
-    continuous_col = random.random(100)
-    continuous_col_2 = [1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7, 21, 3] * 10
-    not_continuous_col = ["type_a"] * 33 + ["type_b"] * 33 + ["type_c"] * 34
-    df_dict = {"cont": continuous_col, "not_cont": not_continuous_col}
-    df_dict_2 = {"cont": continuous_col_2, "not_cont": not_continuous_col}
-    df = pd.DataFrame(df_dict, dtype=str)
-    df2 = pd.DataFrame(df_dict_2, dtype=str)
-    res = detect_continuous_variable(df)
-    res2 = detect_continuous_variable(df2, continuous_th=0.65)
-    assert res.values and res.values[0] == "cont"
-    assert res2.values and res2.values[0] == "cont"
-# we could also have a function here to add all True values of (almost)
-# each field to the False values of all others (to do when parenthood is added)
-def test_all_fields_have_tests():
-    for format in fmtm.formats.values():
-        valid = format._test_values
-        # checking structure
-        assert all(
-            isinstance(key, bool)
-            and isinstance(vals, list)
-            and all(isinstance(val, str) for val in vals)
-            for key, vals in valid.items()
-        )
-        # checking that we have valid and invalid cases for each
-        assert all(b in valid.keys() for b in [True, False])
-# this is based on the _test_values of each <format>.py file
-@pytest.mark.parametrize(
-    "args",
-    (
-        (format.func, value, valid)
-        for valid in [True, False]
-        for format in fmtm.formats.values()
-        for value in format._test_values[valid]
-    ),
-)
-def test_fields_with_values(args):
-    func, value, valid = args
-    assert func(value) is valid
-@pytest.mark.parametrize(
-    "args",
-    (
-        ("Valeur", "string", str),
-        ("-17", "int", int),
-        ("1.9", "float", float),
-        ("oui", "bool", bool),
-        ("[1, 2]", "json", list),
-        ('{"a": 1}', "json", dict),
-        ("2022-08-01", "date", _date),
-        ("2024-09-23 17:32:07", "datetime", _datetime),
-        ("2024-09-23 17:32:07+02:00", "datetime", _datetime),
-        ("N/A", "int", None),
-        ("nan", "bool", None),
-        ("", "date", None),  # all NaN-like values should be cast as None for all type
-    ),
-)
-def test_cast(args):
-    value, detected_type, cast_type = args
-    if cast_type is None:
-        assert cast(value, detected_type) is None
-    else:
-        assert isinstance(cast(value, detected_type), cast_type)
-@pytest.mark.parametrize(
-    "args",
-    (
-        # there is a specific numerical format => specific wins
-        ({"int": 1, "float": 1, "latitude_wgs": 1}, "latitude_wgs"),
-        # scores are equal for related formats => priority wins
-        ({"int": 1, "float": 1}, "int"),
-        # score is lower for priority format => secondary wins
-        ({"int": 0.5, "float": 1}, "float"),
-        # score is lower for priority format, but is 1 => priority wins
-        ({"int": 1, "float": 1.25}, "int"),
-        # two rounds of priority => highest priority wins
-        ({"latlon_wgs": 1, "lonlat_wgs": 1, "json": 1}, "latlon_wgs"),
-        # no detection => default to string
-        ({}, "string"),
-    ),
-)
-def test_priority(args):
-    detections, expected = args
-    col = "col1"
-    output = prepare_output_dict(pd.DataFrame({col: detections}), limited_output=True)
-    assert output[col]["format"] == expected
-@pytest.mark.parametrize(
-    "args",
-    (
-        ("1996-02-13", fmtm.formats["date"]),
-        ("28/01/2000", fmtm.formats["date"]),
-        ("2025-08-20T14:30:00+02:00", fmtm.formats["datetime_aware"]),
-        ("2025/08/20 14:30:00.2763-12:00", fmtm.formats["datetime_aware"]),
-        ("1925_12_20T14:30:00.2763", fmtm.formats["datetime_naive"]),
-        ("1925 12 20 14:30:00Z", fmtm.formats["datetime_aware"]),
-    ),
-)
-def test_early_detection(args):
-    value, format = args
-    with patch("csv_detective.formats.date.date_casting") as mock_func:
-        res = format.func(value)
-        assert res
-        mock_func.assert_not_called()
-def test_all_proportion_1():
-    # building a table that uses only correct values for these formats, except on one row
-    table = pd.DataFrame(
-        {
-            name: (format._test_values[True] * 100)[:100] + ["not_suitable"]
-            for name, format in fmtm.formats.items()
-            if format.proportion == 1
-        }
-    )
-    # testing columns for all formats
-    returned_table = col_test(table, fmtm.formats, limited_output=True)
-    # the analysis should have found no match on any format
-    assert all(returned_table[col].sum() == 0 for col in table.columns)

csv-detective 0.10.3.dev7__py3-none-any.whl → 0.10.2549__py3-none-any.whl

csv-detective 0.10.3.dev7py3-none-any.whl → 0.10.2549py3-none-any.whl