PyPI - csv-detective - Versions diffs - 0.8.1.dev1549__py3-none-any.whl → 0.8.1.dev1599__py3-none-any.whl - Mend

csv-detective 0.8.1.dev1549py3-none-any.whl → 0.8.1.dev1599py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

csv_detective/detect_fields/__init__.py CHANGED Viewed

@@ -53,6 +53,7 @@ from .geo import (
     latitude_wgs,
     longitude_wgs,
     latlon_wgs,
+    lonlat_wgs,
     json_geojson,
 )

csv_detective/detect_fields/geo/json_geojson/__init__.py CHANGED Viewed

@@ -1,22 +1,18 @@
 import json
-from json import JSONDecodeError
 PROPORTION = 0.9
 def _is(val):
-    '''Renvoie True si val peut etre geojson'''
+    """Renvoie True si val peut etre un geojson"""
     try:
         j = json.loads(val)
-        if 'type' in j and 'coordinates' in j:
-            return True
-        if 'geometry' in j:
-            if 'coordinates' in j['geometry']:
+        if isinstance(j, dict):
+            if "type" in j and "coordinates" in j:
                 return True
-    except JSONDecodeError:
-        pass
-    except TypeError:
+            if "geometry" in j and "coordinates" in j["geometry"]:
+                return True
+    except Exception:
         pass
     return False

csv_detective/detect_fields/geo/latlon_wgs/__init__.py CHANGED Viewed

@@ -5,9 +5,12 @@ PROPORTION = 1
 def _is(val):
-    '''Renvoie True si val peut etre une latitude,longitude'''
+    """Renvoie True si val peut etre une latitude,longitude"""
     if not isinstance(val, str) or val.count(",") != 1:
         return False
     lat, lon = val.split(",")
+    # handling [lat,lon]
+    if lat.startswith("[") and lon.endswith("]"):
+        lat, lon = lat[1:], lon[:-1]
     return is_lat(lat) and is_lon(lon.replace(" ", ""))

csv_detective/detect_fields/geo/lonlat_wgs/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+from ..latitude_wgs import _is as is_lat
+from ..longitude_wgs import _is as is_lon
+PROPORTION = 1
+def _is(val):
+    """Renvoie True si val peut etre une longitude,latitude"""
+    if not isinstance(val, str) or val.count(",") != 1:
+        return False
+    lon, lat = val.split(",")
+    # handling [lon,lat]
+    if lon.startswith("[") and lat.endswith("]"):
+        lon, lat = lon[1:], lat[:-1]
+    return is_lon(lon) and is_lat(lat.replace(" ", ""))

csv_detective/detect_fields/other/json/__init__.py CHANGED Viewed

@@ -5,20 +5,10 @@ PROPORTION = 1
 def _is(val):
-    '''Detects json'''
+    """Detects json"""
     try:
         loaded = json.loads(val)
-        if isinstance(loaded, list) or (
-            isinstance(loaded, dict) and not (
-                any(
-                    [
-                        geo in loaded for geo in ['coordinates', 'geometry']
-                    ]
-                )
-            )
-        ):
-            return True
-        else:
-            return False
+        # we don't want to consider integers for instance
+        return isinstance(loaded, (list, dict))
     except (JSONDecodeError, TypeError):
         return False

csv_detective/detect_labels/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@ from .FR.geo import (
     longitude_l93,
     longitude_wgs_fr_metropole,
     pays,
-    region
+    region,
 )
 from .FR.other import (
     code_csp_insee,
@@ -27,7 +27,7 @@ from .FR.other import (
     siren,
     siret,
     tel_fr,
-    uai
+    uai,
 )
 from .FR.temp import jour_de_la_semaine, mois_de_annee
 from .geo import (
@@ -37,7 +37,8 @@ from .geo import (
     json_geojson,
     latitude_wgs,
     latlon_wgs,
-    longitude_wgs
+    longitude_wgs,
+    lonlat_wgs,
 )
 from .other import booleen, email, float, int, money, mongo_object_id, twitter, url, uuid
 from .temp import date, datetime_rfc822, year

csv_detective/detect_labels/geo/latlon_wgs/__init__.py CHANGED Viewed

@@ -2,42 +2,45 @@ from csv_detective.parsing.text import header_score
 PROPORTION = 0.5
+COMMON_COORDS_LABELS = [
+    "c geo",
+    "code geo",
+    "coord gps",
+    "coordonnees",
+    "coordonnees ban",
+    "coordonnees finales",
+    "coordonnees geo",
+    "coordonnees geographiques",
+    "coordonnees geoloc",
+    "coordonnees geoloc",
+    "coordonnees gps",
+    "coordonnees insee",
+    "coordonnees xy",
+    "geo",
+    "geo coordinates",
+    "geo cp",
+    "geo localisation",
+    "geo point",
+    "geo point 2d",
+    "geolocalisation",
+    "geom x y",
+    "geometry x y",
+    "geopoint",
+    "point geo",
+    "point geo insee",
+    "position",
+    "position geographique",
+    "wgs84",
+    "x y",
+    "xy",
+]
 def _is(header: str) -> float:
     words_combinations_list = [
         "latlon wgs",
         "latlon",
-        "geo point",
-        "geo point 2d",
-        "wgs84",
-        "geolocalisation",
-        "geo",
-        "coordonnees finales",
-        "coordonnees",
-        "coordonnees ban",
-        "xy",
-        "geometry x y",
-        "coordonnees insee",
-        "coordonnees geographiques",
-        "position",
-        "coordonnes gps",
-        "geopoint",
-        "geom x y",
-        "coord gps",
         "latlong",
-        "position geographique",
-        "c geo",
-        "coordonnes geoloc",
         "lat lon",
-        "code geo",
-        "geo localisation",
-        "coordonnes geo",
-        "geo cp",
-        "x y",
-        "geo coordinates",
-        "point geo",
-        "point geo insee",
-        "coordonnees geoloc",
-        "coordonnees xy",
-    ]
+    ] + COMMON_COORDS_LABELS
     return header_score(header, words_combinations_list)

csv_detective/detect_labels/geo/lonlat_wgs/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+from csv_detective.parsing.text import header_score
+from ..latlon_wgs import COMMON_COORDS_LABELS
+PROPORTION = 0.5
+def _is(header: str) -> float:
+    words_combinations_list = [
+        "lonlat wgs",
+        "lonlat",
+        "longlat",
+        "lon lat",
+    ] + COMMON_COORDS_LABELS
+    return header_score(header, words_combinations_list)

csv_detective/detect_labels/other/booleen/__init__.py CHANGED Viewed

@@ -4,5 +4,5 @@ PROPORTION = 0.5
 def _is(header: str) -> float:
-    words_combinations_list = ["is_", "has_", "est_"]
+    words_combinations_list = ["is ", "has ", "est "]
     return header_score(header, words_combinations_list)

csv_detective/detection/formats.py CHANGED Viewed

@@ -110,11 +110,9 @@ def detect_formats(
         "datetime_naive": "datetime",
         "datetime_rfc822": "datetime",
         "date": "date",
-        "latitude": "float",
         "latitude_l93": "float",
         "latitude_wgs": "float",
         "latitude_wgs_fr_metropole": "float",
-        "longitude": "float",
         "longitude_l93": "float",
         "longitude_wgs": "float",
         "longitude_wgs_fr_metropole": "float",

csv_detective/output/schema.py CHANGED Viewed

@@ -51,6 +51,7 @@ def get_description(format: str) -> str:
         "latitude_wgs": "La latitude au format WGS",
         "longitude_wgs": "La longitude au format WGS",
         "latlon_wgs": "Les coordonnées XY (latitude et longitude)",
+        "lonlat_wgs": "Les coordonnées XY (longitude et latitude)",
         "booleen": "Booléen",
         "email": "L'adresse couriel (email)",
         "float": "Nombre flottant (à virgule)",
@@ -116,6 +117,7 @@ def get_validata_type(format: str) -> str:
         "latitude_wgs": "number",
         "latitude_wgs_fr_metropole": "number",
         "latlon_wgs": "geo_point",
+        "lonlat_wgs": "geo_point",
         "longitude": "number",
         "longitude_l93": "number",
         "longitude_wgs": "number",
@@ -162,6 +164,7 @@ def get_example(format: str) -> str:
         "latitude_wgs": 42.42,
         "latitude_wgs_fr_metropole": 41.3,
         "latlon_wgs": "42.42, 0.0",
+        "lonlat_wgs": "0.0, 42.42",
         "longitude": 0.0,
         "longitude_l93": -357823,
         "longitude_wgs": 0.0,

csv_detective/output/utils.py CHANGED Viewed

@@ -2,52 +2,73 @@ import pandas as pd
 def prepare_output_dict(return_table: pd.DataFrame, limited_output: bool):
+    # -> dict[str, dict | list[dict]] (to be added when upgrading to python>=3.10)
     return_dict_cols = return_table.to_dict("dict")
-    return_dict_cols_intermediary = {}
+    output_dict = {}
     for column_name in return_dict_cols:
-        return_dict_cols_intermediary[column_name] = []
-        for detected_value_type in return_dict_cols[column_name]:
-            if return_dict_cols[column_name][detected_value_type] == 0:
-                continue
-            dict_tmp = {}
-            dict_tmp["format"] = detected_value_type
-            dict_tmp["score"] = return_dict_cols[column_name][detected_value_type]
-            return_dict_cols_intermediary[column_name].append(dict_tmp)
-        # Clean dict using priorities
-        formats_detected = {
-            x["format"] for x in return_dict_cols_intermediary[column_name]
-        }
+        # keep only formats with a non-zero score
+        output_dict[column_name] = [
+            {
+                "format": detected_value_type,
+                "score": return_dict_cols[column_name][detected_value_type],
+            }
+            for detected_value_type in return_dict_cols[column_name]
+            if return_dict_cols[column_name][detected_value_type] > 0
+        ]
+        priorities = [
+            # no need to specify int and float everywhere, they are deprioritized anyway
+            ("int", ("float",)),
+            # bool over everything
+            ("booleen", (
+                "latitude_l93",
+                "latitude_wgs",
+                "latitude_wgs_fr_metropole",
+                "longitude_l93",
+                "longitude_wgs",
+                "longitude_wgs_fr_metropole",
+            )),
+            ("geojson", ("json",)),
+            # latlon over lonlat if no longitude allows to discriminate
+            ("latlon_wgs", ("json", "lonlat_wgs")),
+            ("lonlat_wgs", ("json",)),
+            ("latitude_wgs_fr_metropole", ("latitude_l93", "latitude_wgs")),
+            ("longitude_wgs_fr_metropole", ("longitude_l93", "longitude_wgs")),
+            ("latitude_wgs", ("latitude_l93",)),
+            ("longitude_wgs", ("longitude_l93",)),
+            ("code_region", ("code_departement",)),
+            ("datetime_rfc822", ("datetime_aware",)),
+        ]
+        detected_formats = set(x["format"] for x in output_dict[column_name])
         formats_to_remove = set()
         # Deprioritise float and int detection vs others
-        if len(formats_detected - {"float", "int"}) > 0:
+        if len(detected_formats - {"float", "int"}) > 0:
             formats_to_remove = formats_to_remove.union({"float", "int"})
-        if "int" in formats_detected:
-            formats_to_remove.add("float")
-        if "latitude_wgs_fr_metropole" in formats_detected:
-            formats_to_remove.add("latitude_l93")
-            formats_to_remove.add("latitude_wgs")
-        if "longitude_wgs_fr_metropole" in formats_detected:
-            formats_to_remove.add("longitude_l93")
-            formats_to_remove.add("longitude_wgs")
-        if "longitude_wgs" in formats_detected:
-            formats_to_remove.add("longitude_l93")
-        if "code_region" in formats_detected:
-            formats_to_remove.add("code_departement")
-        if "datetime_rfc822" in formats_detected:
-            formats_to_remove.add("datetime_aware")
+        # Deprioritize less specific formats if:
+        # secondary score is even or worse
+        # or priority score is at least 1 (max of the field score)
+        for prio_format, secondary_formats in priorities:
+            if prio_format in detected_formats:
+                for secondary in secondary_formats:
+                    if (
+                        secondary in detected_formats
+                        and (
+                            return_dict_cols[column_name][prio_format]
+                            >= return_dict_cols[column_name][secondary]
+                            or return_dict_cols[column_name][prio_format] >= 1
+                        )
+                    ):
+                        formats_to_remove.add(secondary)
-        formats_to_keep = formats_detected - formats_to_remove
+        formats_to_keep = detected_formats - formats_to_remove
-        detections = return_dict_cols_intermediary[column_name]
-        detections = [x for x in detections if x["format"] in formats_to_keep]
+        detections = [x for x in output_dict[column_name] if x["format"] in formats_to_keep]
         if not limited_output:
-            return_dict_cols_intermediary[column_name] = detections
+            output_dict[column_name] = detections
         else:
-            return_dict_cols_intermediary[column_name] = (
+            output_dict[column_name] = (
                 max(detections, key=lambda x: x["score"])
                 if len(detections) > 0
                 else {"format": "string", "score": 1.0}
             )
-    return return_dict_cols_intermediary
+    return output_dict

csv_detective/parsing/columns.py CHANGED Viewed

@@ -46,7 +46,6 @@ def test_col_val(
                 ]:  # Pour ne pas faire d'opérations inutiles, on commence par 1,
                     # puis 5 valeurs puis la serie complète
                     if all(apply_test_func(serie, test_func, _range)):
-                        # print(serie.name, ': check OK')
                         pass
                     else:
                         return 0.0

{csv_detective-0.8.1.dev1549.data → csv_detective-0.8.1.dev1599.data}/data/share/csv_detective/CHANGELOG.md RENAMED Viewed

@@ -10,6 +10,8 @@
 - Split aware and naive datetimes for hydra to cast them separately [#130](https://github.com/datagouv/csv-detective/pull/130)
 - Validate using the testing function, to consider PROPORTIONS [#131](https://github.com/datagouv/csv-detective/pull/131)
 - Remove `datetime_iso` format due to ambiguous cast in db (can be naive or aware) [#132](https://github.com/datagouv/csv-detective/pull/132)
+- Add `lonlat_wgs` format and handle optional brackets for `latlon_wgs` [#133](https://github.com/datagouv/csv-detective/pull/133)
+- Refactor format prioritizing [#134](https://github.com/datagouv/csv-detective/pull/134)
 ## 0.8.0 (2025-05-20)

{csv_detective-0.8.1.dev1549.dist-info → csv_detective-0.8.1.dev1599.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: csv_detective
-Version: 0.8.1.dev1549
+Version: 0.8.1.dev1599
 Summary: Detect tabular files column content
 Home-page: https://github.com/datagouv/csv_detective
 Author: Etalab

{csv_detective-0.8.1.dev1549.dist-info → csv_detective-0.8.1.dev1599.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2
 csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
 csv_detective/utils.py,sha256=-tIs9yV7RJPGj65lQ7LjRGch6Iws9UeuIPQsd2uUUJM,1025
 csv_detective/validate.py,sha256=5Li_vfvU9wdfoZjNjef-MBUoKcKoJ-c7381QoX9aDXY,2818
-csv_detective/detect_fields/__init__.py,sha256=jThGn0_HO8U0mMoSbf38x8l46ABRQcmHcNLvjZqQQdc,984
+csv_detective/detect_fields/__init__.py,sha256=0A5SZTp_IhhJ9z7lWeH4K5_0uwMK_VdMudjPm7oggVg,1000
 csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=NqV8ULf9gY9iFnA1deKR-1Yobr96WwCsn5JfbP_MjiY,1675
@@ -49,16 +49,17 @@ csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py,sha256=u98rn
 csv_detective/detect_fields/geo/iso_country_code_alpha3/iso_country_code_alpha3.txt,sha256=aYqKSohgXuBtcIBfF52f8JWYDdxL_HV_Ol1srGnWBp4,1003
 csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=wJAynAkGZN7jKeI3xOeLXQ_irxQBb_J56pRkLDYVClY,436
 csv_detective/detect_fields/geo/iso_country_code_numeric/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
-csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=FPHOfTrfXJs62-NgeOcNGOvwPd7I1fEVp8lTdMNfj3w,433
+csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=6wlwlxQmsVIZ21g-THvH3nBj-I8FuoF2sBlZAoEMGiQ,393
 csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=ArS6PuYEd0atZwSqNDZhXZz1TwzdiwdV8ovRYTOacpg,327
-csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=7_mnO9uC_kI7e2WR8xIer7Kqw8zi-v-JKaAD4zcoGbE,342
+csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=IXDTqD4YFUJYI1FYZ5ZfkqXY6KvNY7sgBVFRAvgTHtI,454
 csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=G7afWOKiGh_Tv7gwDNGt1a4B_A8hkCBkIxn3THDCUFk,330
+csv_detective/detect_fields/geo/lonlat_wgs/__init__.py,sha256=CnBMYevfGdhBvureF3oc_zqT-RZjG419iAuUlugQFLc,454
 csv_detective/detect_fields/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_fields/other/booleen/__init__.py,sha256=wn_yyTAmGxqo0l0b7JRpGb0da_E27iGxES9zWCrnsqc,497
 csv_detective/detect_fields/other/email/__init__.py,sha256=O9tgJmq0O8Q-8iin63NqEEDhlsUJjxFZNaNFM4GZaws,178
 csv_detective/detect_fields/other/float/__init__.py,sha256=AT4Kpgwoz5PuAoLx00u0SL8DjjXZxsE8zSRbN18uAv4,578
 csv_detective/detect_fields/other/int/__init__.py,sha256=QN3kQJLYqLRBiubUK7g4Xq03PlA5wqVwx2pPPIO9FdI,320
-csv_detective/detect_fields/other/json/__init__.py,sha256=DhzyvT12kOqgum89silIu3uoSYXmC_s_AaxLtXAD4eU,540
+csv_detective/detect_fields/other/json/__init__.py,sha256=AkRWZAidEM1dWkVRFThEBI5M7kMUu5Yu12iCViGM8lU,310
 csv_detective/detect_fields/other/money/__init__.py,sha256=g_ZwBZXl9LhldwFYQotC5WqLiE8qQCZHtoI9eJvl_9M,232
 csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=7fcrHsOZAqXp2_N0IjPskYJ_qi4xRlo9iyNNDQVLzsU,156
 csv_detective/detect_fields/other/percent/__init__.py,sha256=vgpekNOPBRuunoVBXMi81rwHv4uSOhe78pbVtQ5SBO8,177
@@ -71,7 +72,7 @@ csv_detective/detect_fields/temp/datetime_aware/__init__.py,sha256=bEfWvXx_GNCRU
 csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=GtQo55SrrXfoT-L7ZXW63jrlAYvNT5m56wMfhuY3pyI,836
 csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=JtUzg3BXYd-XJMLGxQ0P1OAJGOQ7DlYMD4fCU9yndg0,511
 csv_detective/detect_fields/temp/year/__init__.py,sha256=RjsiIHoplnI4Odi5587TzRhKTDT-FTqGOBpdartuShA,194
-csv_detective/detect_labels/__init__.py,sha256=oVq2fiO6QkaWB0wZImL8YVW7oiwPky8ivmLZAFmK55Q,864
+csv_detective/detect_labels/__init__.py,sha256=8vrFUrMc8a_VOC5gvYNMKL-Do_q9eMTrghJRI9Xotvk,883
 csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=fNWFW-Wo3n6azDBfmi0J0qnzP-p2StLxCc9eNiE9NNE,346
@@ -110,10 +111,11 @@ csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=biUZP
 csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=biUZP8gAsVpjXLTx1WeS19qR4ia0pzpi6R69wJgu4B0,348
 csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=On8VOCDD0EspZra6fTQCXH4MYao2xmRu-o7xWcab7Jg,355
 csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=ME_KjniqDSdAwXP7XnKXyr5IA75KrGSLIhvPNfsux6E,664
-csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=dbWX1LKpoev7zwWthw9vlwGQp6CSlgYrTBnPpvyNC-A,989
+csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=tDndlFyEM7qKS3ATxp0Xs0FsPsOPpRWhDe1ockbWw8s,923
 csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=_8IV2FLtrOjzhQNsk-fsgc9-jbAgzKDVMr4tXu2P-s4,429
+csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=NNKlFcMsKVqnUKEm_4flGxcNUGS2-iS3m6ihQf2AVTk,345
 csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/detect_labels/other/booleen/__init__.py,sha256=BZwnfR-Zcv8dqscLrBKhttgwm4Dqq16M0PaGirxYWio,214
+csv_detective/detect_labels/other/booleen/__init__.py,sha256=zEkarex7L4T3vmYjR5hdhtnhugTVDsvkgG_it6nN0aA,214
 csv_detective/detect_labels/other/email/__init__.py,sha256=Poagn45-eC2a_Wdk5Qs6d2BgYdncCQKZp2yEB50IuNw,431
 csv_detective/detect_labels/other/float/__init__.py,sha256=X0axZN2GAfC_y01zRfIyvOfRsOy2KNQcQ-mlQAKxqT4,216
 csv_detective/detect_labels/other/int/__init__.py,sha256=_1AY7thEBCcgSBQQ2YbY4YaPaxGRQ71BtmaFaX088ig,215
@@ -130,7 +132,7 @@ csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
 csv_detective/detection/columns.py,sha256=vfE-DKESA6J9Rfsl-a8tjgZfE21VmzArO5TrbzL0KmE,2905
 csv_detective/detection/encoding.py,sha256=tpjJEMNM_2TcLXDzn1lNQPnSRnsWYjs83tQ8jNwTj4E,973
 csv_detective/detection/engine.py,sha256=HiIrU-l9EO5Fbc2Vh8W_Uy5-dpKcQQzlxCqMuWc09LY,1530
-csv_detective/detection/formats.py,sha256=3vf7VdjxTmdt5KaTqGBwT5GuZhHuw98R-sIemTcOIJg,6345
+csv_detective/detection/formats.py,sha256=c0LFTWbibWbEJSZaPy_86LIMOY3qRxj-I_agwpb4zbI,6284
 csv_detective/detection/headers.py,sha256=wrVII2RQpsVmHhrO1DHf3dmiu8kbtOjBlskf41cnQmc,1172
 csv_detective/detection/rows.py,sha256=3qvsbsBcMxiqqfSYYkOgsRpX777rk22tnRHDwUA97kU,742
 csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
@@ -139,28 +141,28 @@ csv_detective/output/__init__.py,sha256=5KTevPfp_4MRxByJyOntQjToNfeG7dPQn-_13wSq
 csv_detective/output/dataframe.py,sha256=89iQRE59cHQyQQEsujQVIKP2YAUYpPklWkdDOqZE-wE,2183
 csv_detective/output/example.py,sha256=EdPX1iqHhIG4DsiHuYdy-J7JxOkjgUh_o2D5nrfM5fA,8649
 csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
-csv_detective/output/schema.py,sha256=Hpav3RgIP7gOb93h154s1wNSlEZtHNJVzFDDwp54UcQ,13669
-csv_detective/output/utils.py,sha256=RcOkFQihwfmEIOD-gwrUKi2r5CwBbs17vkuAf8n7-Wo,2405
+csv_detective/output/schema.py,sha256=yC9K1vw6NUTULNv9a7CaMGns9iXmbzFLbtHI4wegqEc,13812
+csv_detective/output/utils.py,sha256=xPM2KYdqousmjU22-w7HnaF6AR74fj8lhQY77Y9xs7w,3310
 csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/parsing/columns.py,sha256=rLzAU36cHMpVynEPhj8uMdr3IRO3_Yq58Yw7Z6oLPiQ,5693
+csv_detective/parsing/columns.py,sha256=aMdG6-G-2Tj_2JdHotAIveQwaG_r8chGcGieFiUaBRk,5634
 csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
 csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,1626
 csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
 csv_detective/parsing/load.py,sha256=u6fbGFZsL2GwPQRzhAXgt32JpUur7vbQdErREHxNJ-w,3661
 csv_detective/parsing/text.py,sha256=_TprGi0gHZlRsafizI3dqQhBehZW4BazqxmypMcAZ-o,1824
-csv_detective-0.8.1.dev1549.data/data/share/csv_detective/CHANGELOG.md,sha256=1jO_wJx_-DK1TqmdmIu2bmbnvg2iJ2iX78MEb29MZYY,9425
-csv_detective-0.8.1.dev1549.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
-csv_detective-0.8.1.dev1549.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
-csv_detective-0.8.1.dev1549.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
+csv_detective-0.8.1.dev1599.data/data/share/csv_detective/CHANGELOG.md,sha256=WQ8cTB2D5YkAJ9AsS2ziKtZL8m1sPclGPenTD1BxZ_g,9646
+csv_detective-0.8.1.dev1599.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
+csv_detective-0.8.1.dev1599.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
+csv_detective-0.8.1.dev1599.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
-tests/test_fields.py,sha256=tTFOmlb9gewtCwcZV7B6Gc3aH6xXK5kMUFSEBi7iIy4,10638
+tests/test_fields.py,sha256=IwMpjOn8W5kDCvJYp3Cer4m571qomzjupOAvSRFMg_Q,11819
 tests/test_file.py,sha256=0bHV9wx9mSRoav_DVF19g694yohb1p0bw7rtcBeKG-8,8451
 tests/test_labels.py,sha256=Nkr645bUewrj8hjNDKr67FQ6Sy_TID6f3E5Kfkl231M,464
 tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
 tests/test_validation.py,sha256=CTGonR6htxcWF9WH8MxumDD8cF45Y-G4hm94SM4lFjU,3246
-csv_detective-0.8.1.dev1549.dist-info/METADATA,sha256=LQkj1jrN7dsdUjGOV3Z8BRKANccZTxdXdoRWtAvOa6w,10443
-csv_detective-0.8.1.dev1549.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-csv_detective-0.8.1.dev1549.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
-csv_detective-0.8.1.dev1549.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
-csv_detective-0.8.1.dev1549.dist-info/RECORD,,
+csv_detective-0.8.1.dev1599.dist-info/METADATA,sha256=NoE1tBjCZxO2uffbH9wSgkuNzOVOgLRA2qkjth7ynyk,10443
+csv_detective-0.8.1.dev1599.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+csv_detective-0.8.1.dev1599.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
+csv_detective-0.8.1.dev1599.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
+csv_detective-0.8.1.dev1599.dist-info/RECORD,,

tests/test_fields.py CHANGED Viewed

@@ -44,6 +44,7 @@ from csv_detective.detect_fields.geo import (
     latitude_wgs,
     latlon_wgs,
     longitude_wgs,
+    lonlat_wgs,
 )
 from csv_detective.detect_fields.other import (
     booleen,
@@ -71,6 +72,7 @@ from csv_detective.detection.variables import (
 )
 from csv_detective.load_tests import return_all_tests
 from csv_detective.output.dataframe import cast
+from csv_detective.output.utils import prepare_output_dict
 def test_all_tests_return_bool():
@@ -263,13 +265,17 @@ fields = {
         False: ["100"],
     },
     latlon_wgs: {
-        True: ["43.2,-22.6", "-10.7,140", "-40.7, 10.8"],
-        False: ["0.1,192", "-102, 92"],
+        True: ["43.2,-22.6", "-10.7,140", "-40.7, 10.8", "[12,-0.28]"],
+        False: ["0.1,192", "-102, 92", "[23.02,4.1", "23.02,4.1]", "160.1,-27"],
     },
     longitude_wgs: {
         True: ["120", "-20.2"],
         False: ["-200"],
     },
+    lonlat_wgs: {
+        True: ["-22.6,43.2", "140,-10.7", "10.8, -40.7", "[-0.28,12]"],
+        False: ["192,0.1", "92, -102", "[4.1,23.02", "4.1,23.02]", "-27,160.1"],
+    },
     booleen: {
         True: ["oui", "0", "1", "yes", "false", "True"],
         False: ["nein", "ja", "2", "-0"],
@@ -280,7 +286,7 @@ fields = {
     },
     json: {
         True: ['{"pomme": "fruit", "reponse": 42}', "[1,2,3,4]"],
-        False: ['{"coordinates": [45.783753, 3.049342], "citycode": "63870"}', "{zefib:"],
+        False: ["5", '{"zefib":', '{"a"}'],
     },
     money: {
         True: ["120€", "-20.2$"],
@@ -405,3 +411,27 @@ def test_fields_with_values(args):
 def test_cast(args):
     value, detected_type, cast_type = args
     assert isinstance(cast(value, detected_type), cast_type)
+@pytest.mark.parametrize(
+    "args",
+    (
+        # there is a specific numerical format => specific wins
+        ({"int": 1, "float": 1, "latitude_wgs": 1}, "latitude_wgs"),
+        # scores are equal for related formats => priority wins
+        ({"int": 1, "float": 1}, "int"),
+        # score is lower for priority format => secondary wins
+        ({"int": 0.5, "float": 1}, "float"),
+        # score is lower for priority format, but is 1 => priority wins
+        ({"int": 1, "float": 1.25}, "int"),
+        # two rounds of priority => highest priority wins
+        ({"latlon_wgs": 1, "lonlat_wgs": 1, "json": 1}, "latlon_wgs"),
+        # no detection => default to string
+        ({}, "string"),
+    ),
+)
+def test_priority(args):
+    detections, expected = args
+    col = "col1"
+    output = prepare_output_dict(pd.DataFrame({col: detections}), limited_output=True)
+    assert output[col]["format"] == expected

{csv_detective-0.8.1.dev1549.data → csv_detective-0.8.1.dev1599.data}/data/share/csv_detective/LICENSE RENAMED Viewed

File without changes

{csv_detective-0.8.1.dev1549.data → csv_detective-0.8.1.dev1599.data}/data/share/csv_detective/README.md RENAMED Viewed

File without changes

{csv_detective-0.8.1.dev1549.dist-info → csv_detective-0.8.1.dev1599.dist-info}/WHEEL RENAMED Viewed

File without changes

{csv_detective-0.8.1.dev1549.dist-info → csv_detective-0.8.1.dev1599.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{csv_detective-0.8.1.dev1549.dist-info → csv_detective-0.8.1.dev1599.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{csv_detective-0.8.1.dev1549.dist-info → csv_detective-0.8.1.dev1599.dist-info}/top_level.txt RENAMED Viewed

File without changes

csv-detective 0.8.1.dev1549__py3-none-any.whl → 0.8.1.dev1599__py3-none-any.whl

csv-detective 0.8.1.dev1549py3-none-any.whl → 0.8.1.dev1599py3-none-any.whl