PyPI - csv-detective - Versions diffs - 0.7.5.dev1239__py3-none-any.whl → 0.7.5.dev1277__py3-none-any.whl - Mend

csv-detective 0.7.5.dev1239py3-none-any.whl → 0.7.5.dev1277py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

csv_detective/cli.py CHANGED Viewed

@@ -8,37 +8,52 @@ from .explore_csv import routine
 def run():
-    explorer = argparse.ArgumentParser(description='Get the arguments we want')
+    explorer = argparse.ArgumentParser(description="Analyse a tabular file")
     explorer.add_argument(
-        'file_path',
+        "file_path",
         type=str,
-        help='Enter path of csv file to explore'
+        help="Enter path of tabular file to explore"
     )
     explorer.add_argument(
-        '-n',
-        '--num_rows',
-        dest='num_rows',
+        "-n",
+        "--num_rows",
+        dest="num_rows",
         type=int,
-        nargs='?',
-        help='Number of rows to use for detection'
+        nargs="?",
+        help="Number of rows to use for detection (default 500)"
     )
     explorer.add_argument(
-        '-t',
-        '--select_tests',
-        dest='city',
+        "-s",
+        "--sep",
+        dest="sep",
         type=str,
-        nargs='*',
-        help='List of tests to be performed (use "" if you want to use the dash option to remove tests)'
+        nargs="?",
+        help="Columns separator (detected if not specified)"
+    )
+    explorer.add_argument(
+        "--save",
+        dest="save_results",
+        type=int,
+        nargs="?",
+        help="Whether to save the resulting analysis to json (1 = save, 0 = don't)"
+    )
+    explorer.add_argument(
+        "-v",
+        "--verbose",
+        dest="verbose",
+        type=int,
+        nargs="?",
+        help="Verbose (0 = quiet, 1 = details)"
     )
     opts = explorer.parse_args()
-    num_rows = opts.num_rows or 50
     inspection_results = routine(
-        opts.file_path,
-        num_rows=num_rows,
-        user_input_tests='ALL',
-        output_mode='ALL'
+        csv_file_path=opts.file_path,
+        num_rows=opts.num_rows,
+        sep=opts.sep,
+        save_results=bool(opts.save_results),
+        verbose=bool(opts.verbose),
     )
-    print(json.dumps(inspection_results, indent=4, sort_keys=True, ensure_ascii=False))
+    print(json.dumps(inspection_results, indent=4, ensure_ascii=False))

csv_detective/detect_fields/other/float/__init__.py CHANGED Viewed

@@ -12,7 +12,7 @@ def _is(val):
         if (
             not isinstance(val, str)
             or any([k in val for k in ['_', '+', 'e', 'E']])
-            or (val.startswith('0') and len(val) > 1)
+            or (val.startswith("0") and len(val) > 1 and val[1] not in [".", ","])
         ):
             return False
         float_casting(val)

csv_detective/explore_csv.py CHANGED Viewed

@@ -4,7 +4,7 @@ import logging
 import os
 import tempfile
 from time import time
-from typing import Union
+from typing import Optional, Union
 import numpy as np
 import pandas as pd
@@ -25,7 +25,7 @@ from .s3_utils import download_from_minio, upload_to_minio
 from .utils import display_logs_depending_process_time, is_url
-def get_all_packages(detect_type) -> list:
+def get_all_packages(detect_type: str) -> list:
     root_dir = os.path.dirname(os.path.abspath(__file__)) + "/" + detect_type
     modules = []
     for dirpath, _, filenames in os.walk(root_dir):
@@ -88,15 +88,15 @@ def routine(
     user_input_tests: Union[str, list[str]] = "ALL",
     limited_output: bool = True,
     save_results: Union[bool, str] = True,
-    encoding: str = None,
-    sep: str = None,
+    encoding: Optional[str] = None,
+    sep: Optional[str] = None,
     skipna: bool = True,
     output_profile: bool = False,
     output_schema: bool = False,
     output_df: bool = False,
     cast_json: bool = True,
     verbose: bool = False,
-    sheet_name: Union[str, int] = None,
+    sheet_name: Optional[Union[str, int]] = None,
 ) -> Union[dict, tuple[dict, pd.DataFrame]]:
     """Returns a dict with information about the csv table and possible
     column contents.
@@ -307,10 +307,7 @@ def routine_minio(
     tableschema_minio_location: dict[str, str],
     minio_user: str,
     minio_pwd: str,
-    num_rows: int = 500,
-    user_input_tests: Union[str, list[str]] = "ALL",
-    encoding: str = None,
-    sep: str = None,
+    **kwargs,
 ):
     """Returns a dict with information about the csv table and possible
     column contents.
@@ -323,11 +320,7 @@ def routine_minio(
         None if not uploading the tableschema to Minio.
         minio_user: user name for the minio instance
         minio_pwd: password for the minio instance
-        num_rows: number of rows to sample from the file for analysis ; -1 for analysis of
-        the whole file
-        user_input_tests: tests to run on the file
-        output_mode: LIMITED or ALL, whether or not to return all possible types or only
-        the most likely one for each column
+        kwargs: arguments for routine
     Returns:
         dict: a dict with information about the csv and possible types for each column
@@ -376,14 +369,10 @@ def routine_minio(
         minio_pwd=minio_pwd,
     )
-    analysis = routine(
-        file_path,
+    analysis = routine(file_path,
         num_rows,
-        user_input_tests,
-        output_mode="LIMITED",
         save_results=True,
-        encoding=encoding,
-        sep=sep,
+        **kwargs,
     )
     # Write report JSON file.
@@ -404,8 +393,8 @@ def routine_minio(
     os.remove(file_path)
     generate_table_schema(
-        analysis,
-        True,
+        analysis_report=analysis,
+        save_file=True,
         netloc=tableschema_minio_location["netloc"],
         bucket=tableschema_minio_location["bucket"],
         key=tableschema_minio_location["key"],

{csv_detective-0.7.5.dev1239.data → csv_detective-0.7.5.dev1277.data}/data/share/csv_detective/CHANGELOG.md RENAMED Viewed

@@ -7,11 +7,13 @@
 - Better naming, hint types and minor refactors [#103](https://github.com/datagouv/csv-detective/pull/103)
 - The returned dataframe has its columns properly cast to the detected types [#104](https://github.com/datagouv/csv-detective/pull/104)
 - Raise an error if the encoding could not be guessed [#106](https://github.com/datagouv/csv-detective/pull/106)
+- Fix CLI and minio routine [#107](https://github.com/datagouv/csv-detective/pull/107)
 - Allow to only specify tests to skip ("all but...") [#108](https://github.com/datagouv/csv-detective/pull/108)
 - Fix bool casting [#109](https://github.com/datagouv/csv-detective/pull/109)
 - Handle csv.gz files [#110](https://github.com/datagouv/csv-detective/pull/110)
 - Refactor file tests [#110](https://github.com/datagouv/csv-detective/pull/110)
 - Restructure repo (breaking changes) [#111](https://github.com/datagouv/csv-detective/pull/111)
+- Better float detection [#113](https://github.com/datagouv/csv-detective/pull/113)
 ## 0.7.4 (2024-11-15)

{csv_detective-0.7.5.dev1239.dist-info → csv_detective-0.7.5.dev1277.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: csv_detective
-Version: 0.7.5.dev1239
+Version: 0.7.5.dev1277
 Summary: Detect CSV column content
 Home-page: https://github.com/etalab/csv_detective
 Author: Etalab

{csv_detective-0.7.5.dev1239.dist-info → csv_detective-0.7.5.dev1277.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 csv_detective/__init__.py,sha256=GCHgu0BhH5ACV7cf-1gDr9nRyvSoeQ1vRw9SjEHeMT4,143
-csv_detective/cli.py,sha256=Ua7SE1wMH2uFUsTmfumh4nJk7O06okpMd2gvjUDO1II,1048
-csv_detective/explore_csv.py,sha256=aJ2pG7lK4sgY9Pv31zEzFVGByxkfw4wwgrQqfgUtBOo,14903
+csv_detective/cli.py,sha256=itooHtpyfC6DUsL_DchPKe1xo7m0MYJIp1L4R8eqoTk,1401
+csv_detective/explore_csv.py,sha256=FmgJ2h1SxV8b_wOWia4xsswyVJTlCCW66e0nhltz-0s,14511
 csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
 csv_detective/utils.py,sha256=KAYfSJXnPuAXnSc38Jm57oQ_JP_0kUkmI1OV6gN5_ys,1116
 csv_detective/detect_fields/__init__.py,sha256=NVfE3BQVExgXb-BPbhDvlkM5-0naEVLpZ4aM_OGHYfE,931
@@ -53,7 +53,7 @@ csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=G7afWOKiGh_Tv7g
 csv_detective/detect_fields/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_fields/other/booleen/__init__.py,sha256=wn_yyTAmGxqo0l0b7JRpGb0da_E27iGxES9zWCrnsqc,497
 csv_detective/detect_fields/other/email/__init__.py,sha256=O9tgJmq0O8Q-8iin63NqEEDhlsUJjxFZNaNFM4GZaws,178
-csv_detective/detect_fields/other/float/__init__.py,sha256=dpEd5ZijmjQ7gqcTnYRoRoLGGJae0RyGwVC6MPra9go,549
+csv_detective/detect_fields/other/float/__init__.py,sha256=7bXuPAmBuIhKJEhq7d20B60WVol1AUpqRkWhreQpWfU,578
 csv_detective/detect_fields/other/int/__init__.py,sha256=QN3kQJLYqLRBiubUK7g4Xq03PlA5wqVwx2pPPIO9FdI,320
 csv_detective/detect_fields/other/json/__init__.py,sha256=DhzyvT12kOqgum89silIu3uoSYXmC_s_AaxLtXAD4eU,540
 csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=7fcrHsOZAqXp2_N0IjPskYJ_qi4xRlo9iyNNDQVLzsU,156
@@ -141,18 +141,18 @@ csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,
 csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
 csv_detective/parsing/load.py,sha256=SpP0pfxswOAPPpwbZfoP1blh0EKV5VMs0TpTgQJKzjs,3621
 csv_detective/parsing/text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
-csv_detective-0.7.5.dev1239.data/data/share/csv_detective/CHANGELOG.md,sha256=povo1ufNJvsxJLkzdjYLgkTy9E-MNFWTg6elXe2nyqU,7625
-csv_detective-0.7.5.dev1239.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
-csv_detective-0.7.5.dev1239.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
-csv_detective-0.7.5.dev1239.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
+csv_detective-0.7.5.dev1277.data/data/share/csv_detective/CHANGELOG.md,sha256=tgIIm6s4qoP4RGJK1cmqf-Cm5aHmXmBrwi37NVIYedg,7796
+csv_detective-0.7.5.dev1277.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
+csv_detective-0.7.5.dev1277.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
+csv_detective-0.7.5.dev1277.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/test_example.py,sha256=0NfChooJQlFxTo2nY5FOQIcsK4zzWA_SBmt2LwVQovY,2014
-tests/test_fields.py,sha256=fcgycaFxacOcN0WdwuUvxef_ejd6tRHNpkD5pxMjMXE,11141
+tests/test_fields.py,sha256=LPLx09cX5u9XHAh65XvTgIqzKylToiHZxXzKhpV0wsk,11148
 tests/test_file.py,sha256=EleTssys5fCP4N0W1eTZN35uijzoF15e3dIcuIlrMsk,7865
 tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
 tests/test_structure.py,sha256=SVsnluVoIIprYw_67I1_gB3cp9m1wlO8C7SpdsLW8cM,1161
-csv_detective-0.7.5.dev1239.dist-info/METADATA,sha256=81-Ik3akmjcTO7mTqHRWrMLUP-4uZ4ffPyg9L74pImg,1386
-csv_detective-0.7.5.dev1239.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-csv_detective-0.7.5.dev1239.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
-csv_detective-0.7.5.dev1239.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
-csv_detective-0.7.5.dev1239.dist-info/RECORD,,
+csv_detective-0.7.5.dev1277.dist-info/METADATA,sha256=RgcnqpKqQ1us0lmVf6McKYJs38DC1sqvAh10XgnJOY8,1386
+csv_detective-0.7.5.dev1277.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
+csv_detective-0.7.5.dev1277.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
+csv_detective-0.7.5.dev1277.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
+csv_detective-0.7.5.dev1277.dist-info/RECORD,,

{csv_detective-0.7.5.dev1239.dist-info → csv_detective-0.7.5.dev1277.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (78.1.0)
+Generator: setuptools (79.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

tests/test_fields.py CHANGED Viewed

@@ -501,7 +501,7 @@ def test_not_match_int():
 # float
 def test_match_float():
-    for val in ["1", "0", "1764", "-24", "1.2", "1863.23", "-12.7"]:
+    for val in ["1", "0", "1764", "-24", "1.2", "1863.23", "-12.7", "0.1"]:
         assert test_float._is(val)

{csv_detective-0.7.5.dev1239.data → csv_detective-0.7.5.dev1277.data}/data/share/csv_detective/LICENSE.AGPL.txt RENAMED Viewed

File without changes

{csv_detective-0.7.5.dev1239.data → csv_detective-0.7.5.dev1277.data}/data/share/csv_detective/README.md RENAMED Viewed

File without changes

{csv_detective-0.7.5.dev1239.dist-info → csv_detective-0.7.5.dev1277.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{csv_detective-0.7.5.dev1239.dist-info → csv_detective-0.7.5.dev1277.dist-info}/licenses/LICENSE.AGPL.txt RENAMED Viewed

File without changes

{csv_detective-0.7.5.dev1239.dist-info → csv_detective-0.7.5.dev1277.dist-info}/top_level.txt RENAMED Viewed

File without changes

csv-detective 0.7.5.dev1239__py3-none-any.whl → 0.7.5.dev1277__py3-none-any.whl

csv-detective 0.7.5.dev1239py3-none-any.whl → 0.7.5.dev1277py3-none-any.whl