PyPI - csv-detective - Versions diffs - 0.9.2.dev1874__py3-none-any.whl → 0.9.3.dev0__py3-none-any.whl - Mend

csv-detective 0.9.2.dev1874py3-none-any.whl → 0.9.3.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

csv_detective/__init__.py +1 -2
csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +1 -1
csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +1 -1
csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +1 -1
csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +1 -1
csv_detective/detect_fields/geo/latitude_wgs/__init__.py +1 -1
csv_detective/detect_fields/geo/longitude_wgs/__init__.py +1 -1
csv_detective/detect_fields/other/email/__init__.py +2 -2
csv_detective/detect_fields/temp/date/__init__.py +1 -2
csv_detective/detect_fields/temp/datetime_aware/__init__.py +7 -6
csv_detective/detect_fields/temp/datetime_naive/__init__.py +4 -8
csv_detective/detection/engine.py +1 -2
csv_detective/detection/formats.py +14 -8
csv_detective/detection/headers.py +2 -2
csv_detective/explore_csv.py +11 -119
csv_detective/load_tests.py +1 -2
csv_detective/output/__init__.py +11 -14
csv_detective/output/dataframe.py +1 -2
csv_detective/output/example.py +12 -12
csv_detective/output/profile.py +13 -10
csv_detective/output/schema.py +7 -86
csv_detective/parsing/excel.py +2 -3
csv_detective/parsing/load.py +3 -4
csv_detective/utils.py +4 -3
csv_detective/validate.py +4 -5
{csv_detective-0.9.2.dev1874.dist-info → csv_detective-0.9.3.dev0.dist-info}/METADATA +18 -26
{csv_detective-0.9.2.dev1874.dist-info → csv_detective-0.9.3.dev0.dist-info}/RECORD +34 -36
tests/test_fields.py +37 -4
tests/test_file.py +68 -0
venv/bin/activate_this.py +1 -1
csv_detective/s3_utils.py +0 -44
venv/bin/jp.py +0 -54
{csv_detective-0.9.2.dev1874.dist-info → csv_detective-0.9.3.dev0.dist-info}/WHEEL +0 -0
{csv_detective-0.9.2.dev1874.dist-info → csv_detective-0.9.3.dev0.dist-info}/entry_points.txt +0 -0
{csv_detective-0.9.2.dev1874.dist-info → csv_detective-0.9.3.dev0.dist-info}/licenses/LICENSE +0 -0
{csv_detective-0.9.2.dev1874.dist-info → csv_detective-0.9.3.dev0.dist-info}/top_level.txt +0 -0

csv_detective/__init__.py CHANGED Viewed

@@ -1,7 +1,6 @@
-from csv_detective.explore_csv import routine, routine_minio, validate_then_detect
+from csv_detective.explore_csv import routine, validate_then_detect
 __all__ = [
     "routine",
-    "routine_minio",
     "validate_then_detect",
 ]

csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ from frformat import LatitudeL93
 from csv_detective.detect_fields.other.float import _is as is_float
 from csv_detective.detect_fields.other.float import float_casting
-PROPORTION = 0.9
+PROPORTION = 1
 _latitudel93 = LatitudeL93()

csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from csv_detective.detect_fields.other.float import _is as is_float
-PROPORTION = 0.9
+PROPORTION = 1
 def _is(val):

csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ from frformat import LongitudeL93
 from csv_detective.detect_fields.other.float import _is as is_float
 from csv_detective.detect_fields.other.float import float_casting
-PROPORTION = 0.9
+PROPORTION = 1
 _longitudel93 = LongitudeL93()

csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from csv_detective.detect_fields.other.float import _is as is_float
-PROPORTION = 0.9
+PROPORTION = 1
 def _is(val):

csv_detective/detect_fields/geo/latitude_wgs/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from csv_detective.detect_fields.other.float import _is as is_float
-PROPORTION = 0.9
+PROPORTION = 1
 def _is(val):

csv_detective/detect_fields/geo/longitude_wgs/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from csv_detective.detect_fields.other.float import _is as is_float
-PROPORTION = 0.9
+PROPORTION = 1
 def _is(val):

csv_detective/detect_fields/other/email/__init__.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import re
-PROPORTION = 1
+PROPORTION = 0.9
 def _is(val):
     """Detects e-mails"""
     return isinstance(val, str) and bool(
-        re.match(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}$", val)
+        re.match(r"^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$", val, re.IGNORECASE)
     )

csv_detective/detect_fields/temp/date/__init__.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import re
 from datetime import datetime
-from typing import Optional
 from dateparser import parse as date_parser
 from dateutil.parser import ParserError
@@ -10,7 +9,7 @@ PROPORTION = 1
 # /!\ this is only for dates, not datetimes which are handled by other utils
-def date_casting(val: str) -> Optional[datetime]:
+def date_casting(val: str) -> datetime | None:
     """For performance reasons, we try first with dateutil and fallback on dateparser"""
     try:
         return dateutil_parser(val)

csv_detective/detect_fields/temp/datetime_aware/__init__.py CHANGED Viewed

@@ -1,24 +1,25 @@
 import re
-from typing import Any, Optional
+from typing import Any
 from csv_detective.detect_fields.temp.date import aaaammjj_pattern, date_casting
 PROPORTION = 1
 threshold = 0.7
-# matches AAAA-MM-JJTHH:MM:SS(.dddddd)±HH:MM with any of the listed separators for the date OR NO SEPARATOR
+# matches AAAA-MM-JJTHH:MM:SS(.dddddd)(±HH:MM|Z) with any of the listed separators for the date OR NO SEPARATOR
 pat = (
     aaaammjj_pattern.replace("$", "")
-    + r"(T|\s)(0\d|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(.\d{1,6})?[+-](0\d|1[0-9]|2[0-3]):([0-5][0-9])$"
+    + r"(T|\s)(0\d|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(.\d{1,6})"
+    + r"?(([+-](0\d|1[0-9]|2[0-3]):([0-5][0-9]))|Z)$"
 )
-def _is(val: Optional[Any]) -> bool:
+def _is(val: Any | None) -> bool:
     """Detects timezone-aware datetimes only"""
     # early stops, to cut processing time
-    # 21 is the minimal length of a datetime format YYMMDDTHH:MM:SS+HH:MM
+    # 16 is the minimal length of a datetime format YYMMDDTHH:MM:SSZ
     # 32 is the maximal length of an ISO datetime format YYYY-MM-DDTHH:MM:SS.dddddd+HH:MM, keeping some slack
-    if not isinstance(val, str) or len(val) > 35 or len(val) < 21:
+    if not isinstance(val, str) or len(val) > 35 or len(val) < 16:
         return False
     # if usual format, no need to parse
     if bool(re.match(pat, val)):

csv_detective/detect_fields/temp/datetime_naive/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import re
-from typing import Any, Optional
+from typing import Any
 from csv_detective.detect_fields.temp.date import aaaammjj_pattern, date_casting
@@ -9,11 +9,11 @@ threshold = 0.7
 # matches AAAA-MM-JJTHH:MM:SS(.dddddd)Z with any of the listed separators for the date OR NO SEPARATOR
 pat = (
     aaaammjj_pattern.replace("$", "")
-    + r"(T|\s)(0\d|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(.\d{1,6})?Z$"
+    + r"(T|\s)(0\d|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(.\d{1,6})?$"
 )
-def _is(val: Optional[Any]) -> bool:
+def _is(val: Any | None) -> bool:
     """Detects naive datetimes only"""
     # early stops, to cut processing time
     # 15 is the minimal length of a datetime format YYMMDDTHH:MM:SS
@@ -26,8 +26,4 @@ def _is(val: Optional[Any]) -> bool:
     if sum([char.isdigit() or char in {"-", "/", ":", " "} for char in val]) / len(val) < threshold:
         return False
     res = date_casting(val)
-    return (
-        res is not None
-        and bool(res.hour or res.minute or res.second or res.microsecond)
-        and not bool(res.tzinfo)
-    )
+    return res is not None and not bool(res.tzinfo)

csv_detective/detection/engine.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from time import time
-from typing import Optional
 import magic
 import requests
@@ -16,7 +15,7 @@ engine_to_file = {
 }
-def detect_engine(file_path: str, verbose=False) -> Optional[str]:
+def detect_engine(file_path: str, verbose=False) -> str | None:
     if verbose:
         start = time()
     mapping = {

csv_detective/detection/formats.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import logging
 from collections import defaultdict
-from typing import Union
 import numpy as np
 import pandas as pd
@@ -22,7 +21,7 @@ def detect_formats(
     table: pd.DataFrame,
     analysis: dict,
     file_path: str,
-    user_input_tests: Union[str, list[str]] = "ALL",
+    user_input_tests: str | list[str] = "ALL",
     limited_output: bool = True,
     skipna: bool = True,
     verbose: bool = False,
@@ -30,7 +29,7 @@ def detect_formats(
     on_sample = len(table) > MAX_ROWS_ANALYSIS
     if on_sample:
         if verbose:
-            logging.warning(f"File is too long, analysing the {MAX_ROWS_ANALYSIS} first rows")
+            logging.warning(f"File is too long, analysing a sample of {MAX_ROWS_ANALYSIS} rows")
         table = build_sample(table)
     if table.empty:
@@ -183,13 +182,15 @@ def build_sample(table: pd.DataFrame) -> pd.DataFrame:
     samples = pd.concat(
         [
             # one row with the minimum of the column
-            table.loc[table[col] == table[col].dropna().min()].iloc[[0]]
+            table.loc[table[col] == val].iloc[[0]]
             for col in table.columns
+            if not pd.isna(val := table[col].dropna().min())
         ]
         + [
             # one row with the maximum of the column
-            table.loc[table[col] == table[col].dropna().max()].iloc[[0]]
+            table.loc[table[col] == val].iloc[[0]]
             for col in table.columns
+            if not pd.isna(val := table[col].dropna().max())
         ]
         + [
             # one row with a NaN value if the column has any
@@ -199,7 +200,12 @@ def build_sample(table: pd.DataFrame) -> pd.DataFrame:
         ],
         ignore_index=True,
     )
-    return pd.concat(
-        [samples, table.sample(n=MAX_ROWS_ANALYSIS - len(samples), random_state=1)],
-        ignore_index=True,
+    return (
+        pd.concat(
+            [samples, table.sample(n=MAX_ROWS_ANALYSIS - len(samples), random_state=1)],
+            ignore_index=True,
+        )
+        # this is very unlikely but we never know
+        if len(samples) <= MAX_ROWS_ANALYSIS
+        else samples.sample(n=MAX_ROWS_ANALYSIS, random_state=1)
     )

csv_detective/detection/headers.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import logging
 from time import time
-from typing import Optional, TextIO
+from typing import TextIO
 from csv_detective.utils import display_logs_depending_process_time
-def detect_headers(file: TextIO, sep: str, verbose: bool = False) -> tuple[int, Optional[list]]:
+def detect_headers(file: TextIO, sep: str, verbose: bool = False) -> tuple[int, list | None]:
     """Tests 10 first rows for possible header (in case header is not 1st row)"""
     if verbose:
         start = time()

csv_detective/explore_csv.py CHANGED Viewed

@@ -1,16 +1,11 @@
-import json
 import logging
-import os
-import tempfile
 from time import time
-from typing import Optional, Union
 import pandas as pd
 from csv_detective.detection.formats import detect_formats
-from csv_detective.output import generate_output, generate_table_schema
+from csv_detective.output import generate_output
 from csv_detective.parsing.load import load_file
-from csv_detective.s3_utils import download_from_minio, upload_to_minio
 from csv_detective.utils import display_logs_depending_process_time, is_url
 from csv_detective.validate import validate
@@ -20,24 +15,24 @@ logging.basicConfig(level=logging.INFO)
 def routine(
     file_path: str,
     num_rows: int = 500,
-    user_input_tests: Union[str, list[str]] = "ALL",
+    user_input_tests: str | list[str] = "ALL",
     limited_output: bool = True,
-    save_results: Union[bool, str] = True,
-    encoding: Optional[str] = None,
-    sep: Optional[str] = None,
+    save_results: bool | str = True,
+    encoding: str | None = None,
+    sep: str | None = None,
     skipna: bool = True,
     output_profile: bool = False,
     output_schema: bool = False,
     output_df: bool = False,
     cast_json: bool = True,
     verbose: bool = False,
-    sheet_name: Optional[Union[str, int]] = None,
-) -> Union[dict, tuple[dict, pd.DataFrame]]:
-    """Returns a dict with information about the csv table and possible
+    sheet_name: str | int | None = None,
+) -> dict | tuple[dict, pd.DataFrame]:
+    """Returns a dict with information about the table and possible
     column contents, and if requested the DataFrame with columns cast according to analysis.
     Args:
-        file_path: local path to CSV file if not using Minio
+        file_path: local path or URL to file
         num_rows: number of rows to sample from the file for analysis ; -1 for analysis
         of the whole file
         user_input_tests: tests to run on the file
@@ -111,9 +106,9 @@ def validate_then_detect(
     file_path: str,
     previous_analysis: dict,
     num_rows: int = 500,
-    user_input_tests: Union[str, list[str]] = "ALL",
+    user_input_tests: str | list[str] = "ALL",
     limited_output: bool = True,
-    save_results: Union[bool, str] = True,
+    save_results: bool | str = True,
     skipna: bool = True,
     output_profile: bool = False,
     output_schema: bool = False,
@@ -173,106 +168,3 @@ def validate_then_detect(
             display_logs_depending_process_time(
                 f"Process completed in {round(time() - start_routine, 3)}s", time() - start_routine
             )
-def routine_minio(
-    csv_minio_location: dict[str, str],
-    output_minio_location: dict[str, str],
-    tableschema_minio_location: dict[str, str],
-    minio_user: str,
-    minio_pwd: str,
-    **kwargs,
-):
-    """Returns a dict with information about the csv table and possible
-    column contents.
-    Args:
-        csv_minio_location: dict with Minio URL, bucket and key of the CSV file
-        output_minio_location: Minio URL, bucket and key to store output file. None if
-        not uploading to Minio.
-        tableschema_minio_location: Minio URL, bucket and key to store tableschema file.
-        None if not uploading the tableschema to Minio.
-        minio_user: user name for the minio instance
-        minio_pwd: password for the minio instance
-        kwargs: arguments for routine
-    Returns:
-        dict: a dict with information about the csv and possible types for each column
-    """
-    if (
-        (
-            any(
-                [
-                    location_dict is not None
-                    for location_dict in [
-                        csv_minio_location,
-                        output_minio_location,
-                        tableschema_minio_location,
-                    ]
-                ]
-            )
-        )
-        and (minio_user is None)
-        or (minio_pwd is None)
-    ):
-        raise ValueError("Minio credentials are required if using Minio")
-    for location_dict in [
-        csv_minio_location,
-        output_minio_location,
-        tableschema_minio_location,
-    ]:
-        if location_dict is not None:
-            if any(
-                [
-                    (location_key not in location_dict) or (location_dict[location_key] is None)
-                    for location_key in ["netloc", "bucket", "key"]
-                ]
-            ):
-                raise ValueError("Minio location dict must contain url, bucket and key")
-    file_path = tempfile.NamedTemporaryFile(delete=False).name
-    download_from_minio(
-        netloc=csv_minio_location["netloc"],
-        bucket=csv_minio_location["bucket"],
-        key=csv_minio_location["key"],
-        filepath=file_path,
-        minio_user=minio_user,
-        minio_pwd=minio_pwd,
-    )
-    analysis = routine(
-        file_path,
-        save_results=True,
-        **kwargs,
-    )
-    # Write report JSON file.
-    output_path_to_store_minio_file = os.path.splitext(file_path)[0] + ".json"
-    with open(output_path_to_store_minio_file, "w", encoding="utf8") as fp:
-        json.dump(analysis, fp, indent=4, separators=(",", ": "))
-    upload_to_minio(
-        netloc=output_minio_location["netloc"],
-        bucket=output_minio_location["bucket"],
-        key=output_minio_location["key"],
-        filepath=output_path_to_store_minio_file,
-        minio_user=minio_user,
-        minio_pwd=minio_pwd,
-    )
-    os.remove(output_path_to_store_minio_file)
-    os.remove(file_path)
-    generate_table_schema(
-        analysis_report=analysis,
-        save_file=True,
-        netloc=tableschema_minio_location["netloc"],
-        bucket=tableschema_minio_location["bucket"],
-        key=tableschema_minio_location["key"],
-        minio_user=minio_user,
-        minio_pwd=minio_pwd,
-    )
-    return analysis

csv_detective/load_tests.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-from typing import Union
 from csv_detective import detect_fields, detect_labels  # noqa
@@ -18,7 +17,7 @@ def get_all_packages(detect_type) -> list:
 def return_all_tests(
-    user_input_tests: Union[str, list],
+    user_input_tests: str | list,
     detect_type: str,
 ) -> list:
     """

csv_detective/output/__init__.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import json
 import os
-from typing import Optional, Union
 import pandas as pd
@@ -17,22 +16,15 @@ def generate_output(
     file_path: str,
     num_rows: int = 500,
     limited_output: bool = True,
-    save_results: Union[bool, str] = True,
+    save_results: bool | str = True,
     output_profile: bool = False,
     output_schema: bool = False,
     output_df: bool = False,
     cast_json: bool = True,
     verbose: bool = False,
-    sheet_name: Optional[Union[str, int]] = None,
-) -> Union[dict, tuple[dict, pd.DataFrame]]:
-    if output_profile or output_df:
-        # to create the profile we have to cast columns, so using the dedicated function
-        table = cast_df(
-            df=table,
-            columns=analysis["columns"],
-            cast_json=cast_json,
-            verbose=verbose,
-        )
+    sheet_name: str | int | None = None,
+) -> dict | tuple[dict, pd.DataFrame]:
+    if output_profile:
         analysis["profile"] = create_profile(
             table=table,
             columns=analysis["columns"],
@@ -58,8 +50,13 @@ def generate_output(
             )
     if output_schema:
-        analysis["schema"] = generate_table_schema(analysis, save_file=False, verbose=verbose)
+        analysis["schema"] = generate_table_schema(analysis, save_results=False, verbose=verbose)
     if output_df:
-        return analysis, table
+        return analysis, cast_df(
+            df=table,
+            columns=analysis["columns"],
+            cast_json=cast_json,
+            verbose=verbose,
+        )
     return analysis

csv_detective/output/dataframe.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import json
 from datetime import date, datetime
 from time import time
-from typing import Optional, Union
 import pandas as pd
@@ -11,7 +10,7 @@ from csv_detective.detect_fields.temp.date import date_casting
 from csv_detective.utils import display_logs_depending_process_time
-def cast(value: str, _type: str) -> Optional[Union[str, float, bool, date, datetime]]:
+def cast(value: str, _type: str) -> str | float | bool | date | datetime | None:
     if not isinstance(value, str) or not value:
         # None is the current default value in hydra, should we keep this?
         return None

csv_detective/output/example.py CHANGED Viewed

@@ -3,7 +3,7 @@ import random
 import string
 import uuid
 from datetime import datetime
-from typing import Any, Optional, Type, Union
+from typing import Any, Type
 import pandas as pd
 import requests
@@ -14,10 +14,10 @@ fake = Faker()
 def create_example_csv_file(
-    fields: Optional[dict] = None,
-    schema_path: Optional[str] = None,
+    fields: dict | None = None,
+    schema_path: str | None = None,
     file_length: int = 10,
-    output_name: Optional[str] = "example_file.csv",
+    output_name: str | None = "example_file.csv",
     output_sep: str = ";",
     encoding: str = "utf-8",
     ignore_required: bool = False,
@@ -49,8 +49,8 @@ def create_example_csv_file(
     def _string(
         length: int = 10,
         required: bool = True,
-        pattern: Optional[str] = None,
-        enum: Optional[str] = None,
+        pattern: str | None = None,
+        enum: str | None = None,
     ) -> str:
         if potential_skip(required):
             return ""
@@ -70,7 +70,7 @@ def create_example_csv_file(
         return str(uuid.uuid4())
     def _date(
-        date_range: Optional[list[str]] = None,
+        date_range: list[str] | None = None,
         format: str = "%Y-%m-%d",
         required: bool = True,
     ) -> str:
@@ -99,7 +99,7 @@ def create_example_csv_file(
         return fake.time(format)
     def _datetime(
-        datetime_range: Optional[list[str]] = None,
+        datetime_range: list[str] | None = None,
         format: str = "%Y-%m-%d %H-%M-%S",
         required: bool = True,
     ) -> str:
@@ -123,11 +123,11 @@ def create_example_csv_file(
         return f"http://{rstr.domainsafe()}.{rstr.letters(3)}/{rstr.urlsafe()}"
     def _number(
-        num_type: Type[Union[int, float]] = int,
-        num_range: Optional[list[float]] = None,
-        enum: Optional[list] = None,
+        num_type: Type[int | float] = int,
+        num_range: list[float] | None = None,
+        enum: list | None = None,
         required: bool = True,
-    ) -> Union[int, float]:
+    ) -> int | float:
         assert num_range is None or len(num_range) == 2
         if potential_skip(required):
             return ""

csv_detective/output/profile.py CHANGED Viewed

@@ -4,7 +4,8 @@ from time import time
 import pandas as pd
-from csv_detective.utils import display_logs_depending_process_time, prevent_nan
+from csv_detective.detect_fields.other.float import float_casting
+from csv_detective.utils import cast_prevent_nan, display_logs_depending_process_time
 def create_profile(
@@ -18,11 +19,6 @@ def create_profile(
     if verbose:
         start = time()
         logging.info("Creating profile")
-    map_python_types = {
-        "string": str,
-        "int": float,
-        "float": float,
-    }
     if num_rows > 0:
         raise ValueError("To create profiles num_rows has to be set to -1")
@@ -35,12 +31,19 @@ def create_profile(
     for c in table.columns:
         # for numerical formats we want min, max, mean, std
         if columns[c]["python_type"] in ["float", "int"]:
+            # we locally cast the column to perform the operations, using the same method as in cast_df
+            cast_col = (
+                table[c].astype(pd.Int64Dtype())
+                if columns[c]["python_type"] == "int"
+                else table[c].apply(lambda x: float_casting(x) if isinstance(x, str) else pd.NA)
+            )
             profile[c].update(
-                min=prevent_nan(map_python_types[columns[c]["python_type"]](table[c].min())),
-                max=prevent_nan(map_python_types[columns[c]["python_type"]](table[c].max())),
-                mean=prevent_nan(map_python_types[columns[c]["python_type"]](table[c].mean())),
-                std=prevent_nan(map_python_types[columns[c]["python_type"]](table[c].std())),
+                min=cast_prevent_nan(cast_col.min(), columns[c]["python_type"]),
+                max=cast_prevent_nan(cast_col.max(), columns[c]["python_type"]),
+                mean=cast_prevent_nan(cast_col.mean(), columns[c]["python_type"]),
+                std=cast_prevent_nan(cast_col.std(), columns[c]["python_type"]),
             )
+            del cast_col
         # for all formats we want most frequent values, nb unique values and nb missing values
         tops_bruts = (
             table.loc[table[c].notna(), c]

csv-detective 0.9.2.dev1874__py3-none-any.whl → 0.9.3.dev0__py3-none-any.whl

csv-detective 0.9.2.dev1874py3-none-any.whl → 0.9.3.dev0py3-none-any.whl