PyPI - csv-detective - Versions diffs - 0.8.1.dev1703__py3-none-any.whl → 0.8.1.dev1729__py3-none-any.whl - Mend

csv-detective 0.8.1.dev1703py3-none-any.whl → 0.8.1.dev1729py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

csv_detective/detection/formats.py CHANGED Viewed

@@ -1,16 +1,17 @@
-from collections import defaultdict
 import logging
+from collections import defaultdict
 from typing import Union
 import numpy as np
 import pandas as pd
 from csv_detective.detection.variables import (
     detect_categorical_variable,
     # detect_continuous_variable,
 )
 from csv_detective.load_tests import return_all_tests
 from csv_detective.output.utils import prepare_output_dict
-from csv_detective.parsing.columns import test_col, test_label, MAX_ROWS_ANALYSIS
+from csv_detective.parsing.columns import MAX_ROWS_ANALYSIS, test_col, test_label
 from csv_detective.validate import validate
@@ -42,10 +43,12 @@ def detect_formats(
         #     detect_continuous_variable(table.iloc[:, ~categorical_mask.values], verbose=verbose)
         # )
-    analysis.update({
-        "categorical": res_categorical,
-        # "continuous": res_continuous,
-    })
+    analysis.update(
+        {
+            "categorical": res_categorical,
+            # "continuous": res_continuous,
+        }
+    )
     # list testing to be performed
     all_tests_fields = return_all_tests(
@@ -60,7 +63,9 @@ def detect_formats(
         return analysis
     # Perform testing on fields
-    scores_table_fields = test_col(table, all_tests_fields, limited_output, skipna=skipna, verbose=verbose)
+    scores_table_fields = test_col(
+        table, all_tests_fields, limited_output, skipna=skipna, verbose=verbose
+    )
     analysis["columns_fields"] = prepare_output_dict(scores_table_fields, limited_output)
     # Perform testing on labels
@@ -71,16 +76,14 @@ def detect_formats(
     # This is because the fields are more important than the labels and yields a max
     # of 1.5 for the final score.
     scores_table = scores_table_fields * (
-        1
-        + scores_table_labels.reindex(
-            index=scores_table_fields.index, fill_value=0
-        ).values / 2
+        1 + scores_table_labels.reindex(index=scores_table_fields.index, fill_value=0).values / 2
     )
     # To reduce false positives: ensure these formats are detected only if the label yields
     # a detection (skipping the ones that have been excluded by the users).
     formats_with_mandatory_label = [
-        f for f in [
+        f
+        for f in [
             "code_departement",
             "code_commune_insee",
             "code_postal",
@@ -90,7 +93,8 @@ def detect_formats(
             "longitude_wgs_fr_metropole",
             "latitude_l93",
             "longitude_l93",
-        ] if f in scores_table.index
+        ]
+        if f in scores_table.index
     ]
     scores_table.loc[formats_with_mandatory_label, :] = np.where(
         scores_table_labels.loc[formats_with_mandatory_label, :],
@@ -123,9 +127,7 @@ def detect_formats(
             analysis[detection_method] = {
                 col_name: [
                     {
-                        "python_type": metier_to_python_type.get(
-                            detection["format"], "string"
-                        ),
+                        "python_type": metier_to_python_type.get(detection["format"], "string"),
                         **detection,
                     }
                     for detection in detections
@@ -136,9 +138,7 @@ def detect_formats(
         for detection_method in ["columns_fields", "columns_labels", "columns"]:
             analysis[detection_method] = {
                 col_name: {
-                    "python_type": metier_to_python_type.get(
-                        detection["format"], "string"
-                    ),
+                    "python_type": metier_to_python_type.get(detection["format"], "string"),
                     **detection,
                 }
                 for col_name, detection in analysis[detection_method].items()

csv_detective/detection/headers.py CHANGED Viewed

@@ -15,18 +15,16 @@ def detect_headers(file: TextIO, sep: str, verbose: bool = False) -> tuple[int,
         header = file.readline()
         position = file.tell()
         chaine = [c for c in header.replace("\n", "").split(sep) if c]
-        if chaine[-1] not in ["", "\n"] and all(
-            [mot not in ["", "\n"] for mot in chaine[1:-1]]
-        ):
+        if chaine[-1] not in ["", "\n"] and all([mot not in ["", "\n"] for mot in chaine[1:-1]]):
             next_row = file.readline()
             file.seek(position)
             if header != next_row:
                 if verbose:
                     display_logs_depending_process_time(
-                        f'Detected headers in {round(time() - start, 3)}s',
+                        f"Detected headers in {round(time() - start, 3)}s",
                         time() - start,
                     )
                 return i, chaine
     if verbose:
-        logging.info('No header detected')
+        logging.info("No header detected")
     return 0, None

csv_detective/detection/rows.py CHANGED Viewed

@@ -5,7 +5,7 @@ def remove_empty_first_rows(table: pd.DataFrame) -> tuple[pd.DataFrame, int]:
     """Analog process to detect_headers for csv files, determines how many rows to skip
     to end up with the header at the right place"""
     idx = 0
-    if all([str(c).startswith('Unnamed:') for c in table.columns]):
+    if all([str(c).startswith("Unnamed:") for c in table.columns]):
         # there is on offset between the index in the file (idx here)
         # and the index in the dataframe, because of the header
         idx = 1

csv_detective/detection/variables.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from ast import literal_eval
 import logging
+from ast import literal_eval
 from time import time
 import pandas as pd
@@ -7,7 +7,9 @@ import pandas as pd
 from csv_detective.utils import display_logs_depending_process_time
-def detect_continuous_variable(table: pd.DataFrame, continuous_th: float = 0.9, verbose: bool = False):
+def detect_continuous_variable(
+    table: pd.DataFrame, continuous_th: float = 0.9, verbose: bool = False
+):
     """
     Detects whether a column contains continuous variables. We consider a continuous column
     one that contains a considerable amount of float values.
@@ -34,16 +36,13 @@ def detect_continuous_variable(table: pd.DataFrame, continuous_th: float = 0.9,
             value = value.replace(",", ".")
             value = literal_eval(value)
             return type(value)
-        # flake8: noqa
-        except:
+        except Exception:
             return False
     if verbose:
         start = time()
         logging.info("Detecting continuous columns")
-    res = table.apply(
-        lambda serie: check_threshold(serie.apply(parses_to_integer), continuous_th)
-    )
+    res = table.apply(lambda serie: check_threshold(serie.apply(parses_to_integer), continuous_th))
     if verbose:
         display_logs_depending_process_time(
             f"Detected {sum(res)} continuous columns in {round(time() - start, 3)}s",

csv_detective/explore_csv.py CHANGED Viewed

@@ -55,7 +55,10 @@ def routine(
         dict: a dict with information about the csv and possible types for each column
     """
-    if not (isinstance(save_results, bool) or (isinstance(save_results, str) and save_results.endswith(".json"))):
+    if not (
+        isinstance(save_results, bool)
+        or (isinstance(save_results, str) and save_results.endswith(".json"))
+    ):
         raise ValueError("`save_results` must be a bool or a valid path to a json file.")
     if verbose:
@@ -100,8 +103,7 @@ def routine(
     finally:
         if verbose:
             display_logs_depending_process_time(
-                f"Routine completed in {round(time() - start_routine, 3)}s",
-                time() - start_routine
+                f"Routine completed in {round(time() - start_routine, 3)}s", time() - start_routine
             )
@@ -119,7 +121,6 @@ def validate_then_detect(
     cast_json: bool = True,
     verbose: bool = False,
 ):
     if verbose:
         start_routine = time()
         if is_url(file_path):
@@ -170,8 +171,7 @@ def validate_then_detect(
     finally:
         if verbose:
             display_logs_depending_process_time(
-                f"Process completed in {round(time() - start_routine, 3)}s",
-                time() - start_routine
+                f"Process completed in {round(time() - start_routine, 3)}s", time() - start_routine
             )
@@ -226,8 +226,7 @@ def routine_minio(
         if location_dict is not None:
             if any(
                 [
-                    (location_key not in location_dict)
-                    or (location_dict[location_key] is None)
+                    (location_key not in location_dict) or (location_dict[location_key] is None)
                     for location_key in ["netloc", "bucket", "key"]
                 ]
             ):

csv_detective/load_tests.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import os
 from typing import Union
-# flake8: noqa
-from csv_detective import detect_fields, detect_labels
+from csv_detective import detect_fields, detect_labels  # noqa
 def get_all_packages(detect_type) -> list:
@@ -12,10 +11,7 @@ def get_all_packages(detect_type) -> list:
         for filename in filenames:
             file = os.path.join(dirpath, filename).replace(root_dir, "")
             if file.endswith("__init__.py"):
-                module = (
-                    file.replace("__init__.py", "")
-                    .replace("/", ".").replace("\\", ".")[:-1]
-                )
+                module = file.replace("__init__.py", "").replace("/", ".").replace("\\", ".")[:-1]
                 if module:
                     modules.append(detect_type + module)
     return modules
@@ -43,20 +39,15 @@ def return_all_tests(
     if "ALL" in user_input_tests or all(x[0] == "-" for x in user_input_tests):
         tests_to_do = [detect_type]
     else:
-        tests_to_do = [
-            f"{detect_type}.{x}" for x in user_input_tests if x[0] != "-"
-        ]
-    tests_skipped = [
-        f"{detect_type}.{x[1:]}" for x in user_input_tests if x[0] == "-"
-    ]
+        tests_to_do = [f"{detect_type}.{x}" for x in user_input_tests if x[0] != "-"]
+    tests_skipped = [f"{detect_type}.{x[1:]}" for x in user_input_tests if x[0] == "-"]
     all_tests = [
         # this is why we need to import detect_fields/labels
-        eval(x) for x in all_packages
+        eval(x)
+        for x in all_packages
         if any([y == x[: len(y)] for y in tests_to_do])
         and all([y != x[: len(y)] for y in tests_skipped])
     ]
     # to remove groups of tests
-    all_tests = [
-        test for test in all_tests if "_is" in dir(test)
-    ]
+    all_tests = [test for test in all_tests if "_is" in dir(test)]
     return all_tests

csv_detective/output/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Optional, Union
 import pandas as pd
 from csv_detective.utils import is_url
 from .dataframe import cast_df
 from .profile import create_profile
 from .schema import generate_table_schema
@@ -24,7 +25,6 @@ def generate_output(
     verbose: bool = False,
     sheet_name: Optional[Union[str, int]] = None,
 ) -> Union[dict, tuple[dict, pd.DataFrame]]:
     if output_profile:
         analysis["profile"] = create_profile(
             table=table,
@@ -40,7 +40,7 @@ def generate_output(
         else:
             output_path = os.path.splitext(file_path)[0]
             if is_url(output_path):
-                output_path = output_path.split('/')[-1]
+                output_path = output_path.split("/")[-1]
             if analysis.get("sheet_name"):
                 output_path += "_sheet-" + str(sheet_name)
             output_path += ".json"
@@ -48,11 +48,7 @@ def generate_output(
             json.dump(analysis, fp, indent=4, separators=(",", ": "), ensure_ascii=False)
     if output_schema:
-        analysis["schema"] = generate_table_schema(
-            analysis,
-            save_file=False,
-            verbose=verbose
-        )
+        analysis["schema"] = generate_table_schema(analysis, save_file=False, verbose=verbose)
     if output_df:
         return analysis, cast_df(

csv_detective/output/dataframe.py CHANGED Viewed

@@ -1,7 +1,7 @@
-from datetime import date, datetime
 import json
-from typing import Optional, Union
+from datetime import date, datetime
 from time import time
+from typing import Optional, Union
 import pandas as pd
@@ -30,12 +30,16 @@ def cast(value: str, _type: str) -> Optional[Union[str, float, bool, date, datet
     raise ValueError(f"Unknown type `{_type}`")
-def cast_df(df: pd.DataFrame, columns: dict, cast_json: bool = True, verbose: bool = False) -> pd.DataFrame:
+def cast_df(
+    df: pd.DataFrame, columns: dict, cast_json: bool = True, verbose: bool = False
+) -> pd.DataFrame:
     if verbose:
         start = time()
     output_df = pd.DataFrame()
     for col_name, detection in columns.items():
-        if detection["python_type"] == "string" or (detection["python_type"] == "json" and not cast_json):
+        if detection["python_type"] == "string" or (
+            detection["python_type"] == "json" and not cast_json
+        ):
             # no change if detected type is string
             output_df[col_name] = df[col_name].copy()
         elif detection["python_type"] == "int":
@@ -49,7 +53,7 @@ def cast_df(df: pd.DataFrame, columns: dict, cast_json: bool = True, verbose: bo
         del df[col_name]
     if verbose:
         display_logs_depending_process_time(
-            f'Casting columns completed in {round(time() - start, 3)}s',
+            f"Casting columns completed in {round(time() - start, 3)}s",
             time() - start,
         )
     return output_df

csv_detective/output/example.py CHANGED Viewed

@@ -1,14 +1,14 @@
-from datetime import datetime
 import json
 import random
 import string
-from typing import Union, Optional, Any, Type
 import uuid
+from datetime import datetime
+from typing import Any, Optional, Type, Union
-from faker import Faker
 import pandas as pd
 import requests
 import rstr
+from faker import Faker
 fake = Faker()
@@ -135,7 +135,7 @@ def create_example_csv_file(
             return random.choice(enum)
         if num_range is None:
             num_range = [0, 1000]
-        if num_type == int:
+        if num_type is int:
             return random.randint(num_range[0], num_range[1])
         else:
             return round(random.uniform(num_range[0], num_range[1]), 1)
@@ -179,7 +179,7 @@ def create_example_csv_file(
         "yearmonth": "date",
         "time": "time",
         "datetime": "datetime",
-        "array": "array"
+        "array": "array",
     }
     if schema_path:
@@ -188,7 +188,7 @@ def create_example_csv_file(
         else:
             with open(schema_path, encoding=encoding) as jsonfile:
                 schema = json.load(jsonfile)
-        if not ("fields" in schema.keys()):
+        if "fields" not in schema.keys():
             raise ValueError("The schema must have a 'fields' key.")
         else:
             fields = [
@@ -198,12 +198,14 @@ def create_example_csv_file(
                     # when frformat is supported in TableSchema, we can build args for French standards
                     # linked to https://github.com/datagouv/fr-format/issues/26
                     "args": (
-                        build_args_from_constraints(f["constraints"]) if "constraints" in f.keys()
+                        build_args_from_constraints(f["constraints"])
+                        if "constraints" in f.keys()
                         else build_args_from_constraints(f["arrayItem"]["constraints"])
                         if "arrayItem" in f.keys() and "constraints" in f["arrayItem"].keys()
                         else {}
-                    )
-                } for f in schema["fields"]
+                    ),
+                }
+                for f in schema["fields"]
             ]
     for k in range(len(fields)):
@@ -234,10 +236,8 @@ def create_example_csv_file(
     # would it be better to create by column or by row (as for now)?
     output = pd.DataFrame(
         [
-            [
-                types_to_func.get(f["type"], "str")(**f["args"])
-                for f in fields
-            ] for _ in range(file_length)
+            [types_to_func.get(f["type"], "str")(**f["args"]) for f in fields]
+            for _ in range(file_length)
         ],
         columns=[f["name"] for f in fields],
     )

csv_detective/output/profile.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from collections import defaultdict
 import logging
+from collections import defaultdict
 from time import time
 import pandas as pd
@@ -29,15 +29,12 @@ def create_profile(
     safe_table = table.copy()
     if not limited_output:
         dict_cols_fields = {
-            k: v[0] if v else {'python_type': 'string', 'format': 'string', 'score': 1.0}
+            k: v[0] if v else {"python_type": "string", "format": "string", "score": 1.0}
             for k, v in dict_cols_fields.items()
         }
-    dtypes = {
-        k: map_python_types.get(v["python_type"], str)
-        for k, v in dict_cols_fields.items()
-    }
+    dtypes = {k: map_python_types.get(v["python_type"], str) for k, v in dict_cols_fields.items()}
     for c in safe_table.columns:
-        if dtypes[c] == float:
+        if dtypes[c] is float:
             safe_table[c] = safe_table[c].apply(
                 lambda s: float_casting(s) if isinstance(s, str) else s
             )
@@ -48,18 +45,26 @@ def create_profile(
             int,
         ]:
             profile[c].update(
-                min=prevent_nan(map_python_types.get(dict_cols_fields[c]["python_type"], str)(
-                    safe_table[c].min()
-                )),
-                max=prevent_nan(map_python_types.get(dict_cols_fields[c]["python_type"], str)(
-                    safe_table[c].max()
-                )),
-                mean=prevent_nan(map_python_types.get(dict_cols_fields[c]["python_type"], str)(
-                    safe_table[c].mean()
-                )),
-                std=prevent_nan(map_python_types.get(dict_cols_fields[c]["python_type"], str)(
-                    safe_table[c].std()
-                )),
+                min=prevent_nan(
+                    map_python_types.get(dict_cols_fields[c]["python_type"], str)(
+                        safe_table[c].min()
+                    )
+                ),
+                max=prevent_nan(
+                    map_python_types.get(dict_cols_fields[c]["python_type"], str)(
+                        safe_table[c].max()
+                    )
+                ),
+                mean=prevent_nan(
+                    map_python_types.get(dict_cols_fields[c]["python_type"], str)(
+                        safe_table[c].mean()
+                    )
+                ),
+                std=prevent_nan(
+                    map_python_types.get(dict_cols_fields[c]["python_type"], str)(
+                        safe_table[c].std()
+                    )
+                ),
             )
         tops_bruts = (
             safe_table[safe_table[c].notna()][c]
@@ -70,10 +75,12 @@ def create_profile(
         )
         tops = []
         for tb in tops_bruts:
-            tops.append({
-                "count": tb["count"],
-                "value": tb[c],
-            })
+            tops.append(
+                {
+                    "count": tb["count"],
+                    "value": tb[c],
+                }
+            )
         profile[c].update(
             tops=tops,
             nb_distinct=safe_table[c].nunique(),

csv_detective/output/schema.py CHANGED Viewed

@@ -1,14 +1,14 @@
-from datetime import datetime
 import json
 import logging
 import os
 import tempfile
+from datetime import datetime
 from time import time
 from typing import Optional
 from botocore.exceptions import ClientError
-from csv_detective.s3_utils import get_s3_client, download_from_minio, upload_to_minio
+from csv_detective.s3_utils import download_from_minio, get_s3_client, upload_to_minio
 from csv_detective.utils import display_logs_depending_process_time
@@ -26,13 +26,11 @@ def get_description(format: str) -> str:
         "insee_canton": "Le nom du canton",
         "latitude_l93": "La latitude au format Lambert 93",
         "latitude_wgs_fr_metropole": (
-            "La latitude au format WGS. Ne concerne que des latitudes "
-            "de la métropole française"
+            "La latitude au format WGS. Ne concerne que des latitudes de la métropole française"
         ),
         "longitude_l93": "La longitude au format Lambert 93",
         "longitude_wgs_fr_metropole": (
-            "La longitude au format WGS. Ne concerne que des longitudes "
-            "de la métropole française"
+            "La longitude au format WGS. Ne concerne que des longitudes de la métropole française"
         ),
         "pays": "Le nom du pays",
         "region": "Le nom de la région",
@@ -86,13 +84,13 @@ def get_pattern(format: str) -> str:
         ),
         "uai": r"^(0[0-8][0-9]|09[0-5]|9[78][0-9]|[67]20)[0-9]{4}[A-Z]$",
         "email": r"^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$",
-        "twitter": r'^@[A-Za-z0-9_]+$',
-        "mongo_object_id": r'^[0-9a-fA-F]{24}$',
-        "uuid": r'^[{]?[0-9a-fA-F]{8}' + '-?([0-9a-fA-F]{4}-?)' + '{3}[0-9a-fA-F]{12}[}]?$',
+        "twitter": r"^@[A-Za-z0-9_]+$",
+        "mongo_object_id": r"^[0-9a-fA-F]{24}$",
+        "uuid": r"^[{]?[0-9a-fA-F]{8}" + "-?([0-9a-fA-F]{4}-?)" + "{3}[0-9a-fA-F]{12}[}]?$",
         "url": (
-            r'^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]'
-            r'{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)$'
-        )
+            r"^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]"
+            r"{1,6}\b(?:[-a-zA-Z0-9()@:%_\+.~#?&\/=]*)$"
+        ),
     }
     if format in format_to_pattern:
         return {"pattern": format_to_pattern[format]}
@@ -210,7 +208,7 @@ def generate_table_schema(
     key: Optional[str] = None,
     minio_user: Optional[str] = None,
     minio_pwd: Optional[str] = None,
-    verbose: bool = False
+    verbose: bool = False,
 ) -> dict:
     """Generates a table schema from the analysis report
@@ -236,7 +234,7 @@ def generate_table_schema(
             "example": get_example(field_report["format"]),
             "type": get_validata_type(field_report["format"]),
             "formatFR": field_report["format"],
-            "constraints": get_constraints(field_report["format"])
+            "constraints": get_constraints(field_report["format"]),
         }
         for header, field_report in analysis_report["columns"].items()
     ]
@@ -255,12 +253,9 @@ def generate_table_schema(
         "sources": [
             {
                 "title": "Spécification Tableschema",
-                "path": "https://specs.frictionlessdata.io/table-schema"
+                "path": "https://specs.frictionlessdata.io/table-schema",
             },
-            {
-                "title": "schema.data.gouv.fr",
-                "path": "https://schema.data.gouv.fr"
-            }
+            {"title": "schema.data.gouv.fr", "path": "https://schema.data.gouv.fr"},
         ],
         "created": datetime.today().strftime("%Y-%m-%d"),
         "lastModified": datetime.today().strftime("%Y-%m-%d"),
@@ -278,7 +273,9 @@ def generate_table_schema(
     }
     if verbose:
-        display_logs_depending_process_time(f'Created schema in {round(time() - start, 3)}s', time() - start)
+        display_logs_depending_process_time(
+            f"Created schema in {round(time() - start, 3)}s", time() - start
+        )
     if not save_file:
         return schema
@@ -301,9 +298,9 @@ def generate_table_schema(
         if "Contents" in tableschema_objects:
             tableschema_keys = [
                 tableschema["Key"]
-                for tableschema in client.list_objects(
-                    Bucket=bucket, Prefix=key, Delimiter="/"
-                )["Contents"]
+                for tableschema in client.list_objects(Bucket=bucket, Prefix=key, Delimiter="/")[
+                    "Contents"
+                ]
             ]
             tableschema_versions = [
                 os.path.splitext(tableschema_key)[0].split("_")[-1]

csv_detective/output/utils.py CHANGED Viewed

@@ -19,14 +19,17 @@ def prepare_output_dict(return_table: pd.DataFrame, limited_output: bool):
             # no need to specify int and float everywhere, they are deprioritized anyway
             ("int", ("float",)),
             # bool over everything
-            ("booleen", (
-                "latitude_l93",
-                "latitude_wgs",
-                "latitude_wgs_fr_metropole",
-                "longitude_l93",
-                "longitude_wgs",
-                "longitude_wgs_fr_metropole",
-            )),
+            (
+                "booleen",
+                (
+                    "latitude_l93",
+                    "latitude_wgs",
+                    "latitude_wgs_fr_metropole",
+                    "longitude_l93",
+                    "longitude_wgs",
+                    "longitude_wgs_fr_metropole",
+                ),
+            ),
             ("geojson", ("json",)),
             # latlon over lonlat if no longitude allows to discriminate
             ("latlon_wgs", ("json", "lonlat_wgs")),
@@ -49,13 +52,10 @@ def prepare_output_dict(return_table: pd.DataFrame, limited_output: bool):
         for prio_format, secondary_formats in priorities:
             if prio_format in detected_formats:
                 for secondary in secondary_formats:
-                    if (
-                        secondary in detected_formats
-                        and (
-                            return_dict_cols[column_name][prio_format]
-                            >= return_dict_cols[column_name][secondary]
-                            or return_dict_cols[column_name][prio_format] >= 1
-                        )
+                    if secondary in detected_formats and (
+                        return_dict_cols[column_name][prio_format]
+                        >= return_dict_cols[column_name][secondary]
+                        or return_dict_cols[column_name][prio_format] >= 1
                     ):
                         formats_to_remove.add(secondary)

csv-detective 0.8.1.dev1703__py3-none-any.whl → 0.8.1.dev1729__py3-none-any.whl

csv-detective 0.8.1.dev1703py3-none-any.whl → 0.8.1.dev1729py3-none-any.whl