PyPI - csv-detective - Versions diffs - 0.10.1.dev2581__py3-none-any.whl → 0.10.1.dev2599__py3-none-any.whl - Mend

csv-detective 0.10.1.dev2581py3-none-any.whl → 0.10.1.dev2599py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

csv_detective/detection/formats.py +11 -38
csv_detective/format.py +11 -4
csv_detective/formats/adresse.py +9 -9
csv_detective/formats/binary.py +2 -1
csv_detective/formats/booleen.py +3 -2
csv_detective/formats/code_commune_insee.py +12 -10
csv_detective/formats/code_csp_insee.py +1 -1
csv_detective/formats/code_departement.py +8 -7
csv_detective/formats/code_fantoir.py +6 -5
csv_detective/formats/code_import.py +1 -1
csv_detective/formats/code_postal.py +10 -9
csv_detective/formats/code_region.py +7 -6
csv_detective/formats/code_rna.py +7 -6
csv_detective/formats/code_waldec.py +1 -1
csv_detective/formats/commune.py +5 -5
csv_detective/formats/csp_insee.py +6 -5
csv_detective/formats/date.py +18 -17
csv_detective/formats/date_fr.py +1 -1
csv_detective/formats/datetime_aware.py +2 -1
csv_detective/formats/datetime_naive.py +1 -0
csv_detective/formats/datetime_rfc822.py +1 -0
csv_detective/formats/departement.py +15 -15
csv_detective/formats/email.py +13 -13
csv_detective/formats/float.py +2 -1
csv_detective/formats/geojson.py +10 -10
csv_detective/formats/insee_ape700.py +10 -8
csv_detective/formats/insee_canton.py +6 -6
csv_detective/formats/int.py +2 -1
csv_detective/formats/iso_country_code_alpha2.py +10 -9
csv_detective/formats/iso_country_code_alpha3.py +2 -9
csv_detective/formats/iso_country_code_numeric.py +2 -9
csv_detective/formats/jour_de_la_semaine.py +11 -12
csv_detective/formats/json.py +6 -0
csv_detective/formats/latitude_l93.py +8 -22
csv_detective/formats/latitude_wgs.py +21 -26
csv_detective/formats/latitude_wgs_fr_metropole.py +4 -26
csv_detective/formats/latlon_wgs.py +27 -26
csv_detective/formats/longitude_l93.py +8 -13
csv_detective/formats/longitude_wgs.py +24 -16
csv_detective/formats/longitude_wgs_fr_metropole.py +4 -16
csv_detective/formats/lonlat_wgs.py +10 -9
csv_detective/formats/mois_de_lannee.py +1 -1
csv_detective/formats/money.py +1 -1
csv_detective/formats/mongo_object_id.py +1 -1
csv_detective/formats/pays.py +11 -13
csv_detective/formats/percent.py +1 -1
csv_detective/formats/region.py +13 -13
csv_detective/formats/sexe.py +1 -1
csv_detective/formats/siren.py +9 -10
csv_detective/formats/siret.py +9 -9
csv_detective/formats/tel_fr.py +7 -13
csv_detective/formats/uai.py +17 -18
csv_detective/formats/url.py +16 -16
csv_detective/formats/username.py +1 -1
csv_detective/formats/uuid.py +1 -1
csv_detective/formats/year.py +7 -12
csv_detective/parsing/text.py +13 -12
{csv_detective-0.10.1.dev2581.dist-info → csv_detective-0.10.1.dev2599.dist-info}/METADATA +1 -1
csv_detective-0.10.1.dev2599.dist-info/RECORD +92 -0
{csv_detective-0.10.1.dev2581.dist-info → csv_detective-0.10.1.dev2599.dist-info}/WHEEL +1 -1
csv_detective-0.10.1.dev2581.dist-info/RECORD +0 -92
{csv_detective-0.10.1.dev2581.dist-info → csv_detective-0.10.1.dev2599.dist-info}/entry_points.txt +0 -0

csv_detective/formats/iso_country_code_alpha2.py CHANGED Viewed

@@ -3,15 +3,16 @@ from os.path import dirname, join
 proportion = 1
 tags = ["geo"]
-labels = [
-    "iso country code",
-    "code pays",
-    "pays",
-    "country",
-    "nation",
-    "pays code",
-    "code pays (iso)",
-]
+labels = {
+    "iso country code": 1,
+    "code pays": 1,
+    "pays": 1,
+    "country": 1,
+    "nation": 1,
+    "pays code": 1,
+    "code pays (iso)": 1,
+    "code": 0.5,
+}
 with open(join(dirname(__file__), "data", "iso_country_code_alpha2.txt"), "r") as iofile:
     liste_pays = iofile.read().split("\n")

csv_detective/formats/iso_country_code_alpha3.py CHANGED Viewed

@@ -1,17 +1,10 @@
 import re
 from os.path import dirname, join
+from csv_detective.formats.iso_country_code_alpha2 import labels  # noqa
 proportion = 1
 tags = ["geo"]
-labels = [
-    "iso country code",
-    "code pays",
-    "pays",
-    "country",
-    "nation",
-    "pays code",
-    "code pays (iso)",
-]
 with open(join(dirname(__file__), "data", "iso_country_code_alpha3.txt"), "r") as iofile:
     liste_pays = iofile.read().split("\n")

csv_detective/formats/iso_country_code_numeric.py CHANGED Viewed

@@ -1,17 +1,10 @@
 import re
 from os.path import dirname, join
+from csv_detective.formats.iso_country_code_alpha2 import labels  # noqa
 proportion = 1
 tags = ["geo"]
-labels = [
-    "iso country code",
-    "code pays",
-    "pays",
-    "country",
-    "nation",
-    "pays code",
-    "code pays (iso)",
-]
 with open(join(dirname(__file__), "data", "iso_country_code_numeric.txt"), "r") as iofile:
     liste_pays = iofile.read().split("\n")

csv_detective/formats/jour_de_la_semaine.py CHANGED Viewed

@@ -1,14 +1,14 @@
 proportion = 0.8
 tags = ["fr", "temp"]
-labels = [
-    "jour semaine",
-    "type jour",
-    "jour de la semaine",
-    "saufjour",
-    "nomjour",
-    "jour",
-    "jour de fermeture",
-]
+labels = {
+    "jour semaine": 1,
+    "type jour": 1,
+    "jour de la semaine": 1,
+    "saufjour": 1,
+    "nomjour": 1,
+    "jour": 0.75,
+    "jour de fermeture": 1,
+}
 jours = {
     "lundi",
@@ -31,11 +31,10 @@ jours = {
 def _is(val):
     if not isinstance(val, str):
         return False
-    val = val.lower()
-    return val in jours
+    return val.lower() in jours
 _test_values = {
     True: ["lundi"],
-    False: ["jour de la biere"],
+    False: ["jour"],
 }

csv_detective/formats/json.py CHANGED Viewed

@@ -2,7 +2,13 @@ import json
 from json import JSONDecodeError
 proportion = 1
+python_type = "json"
 tags = ["type"]
+labels = {
+    "list": 1,
+    "dict": 1,
+    "complex": 1,
+}
 def _is(val):

csv_detective/formats/latitude_l93.py CHANGED Viewed

@@ -2,31 +2,17 @@ from frformat import LatitudeL93
 from csv_detective.formats.float import _is as is_float
 from csv_detective.formats.float import float_casting
+from csv_detective.formats.latitude_wgs import SHARED_LATITUDE_LABELS
 proportion = 1
 tags = ["fr", "geo"]
-labels = [
-    "latitude",
-    "lat",
-    "y",
-    "yf",
-    "yd",
-    "y l93",
-    "coordonnee y",
-    "latitude lb93",
-    "coord y",
-    "ycoord",
-    "geocodage y gps",
-    "location latitude",
-    "ylatitude",
-    "ylat",
-    "latitude (y)",
-    "latitudeorg",
-    "coordinates.latitude",
-    "googlemap latitude",
-    "latitudelieu",
-    "latitude googlemap",
-]
+mandatory_label = True
+python_type = "float"
+labels = SHARED_LATITUDE_LABELS | {
+    "y l93": 1,
+    "latitude lb93": 1,
+    "lamby": 1,
+}
 _latitudel93 = LatitudeL93()

csv_detective/formats/latitude_wgs.py CHANGED Viewed

@@ -2,37 +2,32 @@ from csv_detective.formats.float import _is as is_float
 proportion = 1
 tags = ["geo"]
-labels = [
-    "latitude",
-    "lat",
-    "y",
-    "yf",
-    "yd",
-    "coordonnee y",
-    "coord y",
-    "ycoord",
-    "geocodage y gps",
-    "location latitude",
-    "ylatitude",
-    "ylat",
-    "latitude (y)",
-    "latitudeorg",
-    "coordinates.latitude",
-    "googlemap latitude",
-    "latitudelieu",
-    "latitude googlemap",
-    "latitude wgs84",
-    "y wgs84",
-    "latitude (wgs84)",
-]
+mandatory_label = True
+python_type = "float"
+SHARED_LATITUDE_LABELS = {
+    "latitude": 1,
+    "lat": 0.75,
+    "y": 0.5,
+    "yf": 0.5,
+    "yd": 0.5,
+    "coordonnee y": 1,
+    "coord y": 1,
+    "ycoord": 1,
+    "ylat": 1,
+}
+labels = SHARED_LATITUDE_LABELS | {
+    "y gps": 1,
+    "latitude wgs84": 1,
+    "y wgs84": 1,
+    "wsg": 0.75,
+    "gps": 0.5,
+}
 def _is(val):
     try:
         return is_float(val) and float(val) >= -90 and float(val) <= 90
-    except ValueError:
-        return False
-    except OverflowError:
+    except Exception:
         return False

csv_detective/formats/latitude_wgs_fr_metropole.py CHANGED Viewed

@@ -1,38 +1,16 @@
 from csv_detective.formats.float import _is as is_float
+from csv_detective.formats.latitude_wgs import labels  # noqa
 proportion = 1
 tags = ["fr", "geo"]
-labels = [
-    "latitude",
-    "lat",
-    "y",
-    "yf",
-    "yd",
-    "coordonnee y",
-    "coord y",
-    "ycoord",
-    "geocodage y gps",
-    "location latitude",
-    "ylatitude",
-    "ylat",
-    "latitude (y)",
-    "latitudeorg",
-    "coordinates.latitude",
-    "googlemap latitude",
-    "latitudelieu",
-    "latitude googlemap",
-    "latitude wgs84",
-    "y wgs84",
-    "latitude (wgs84)",
-]
+mandatory_label = True
+python_type = "float"
 def _is(val):
     try:
         return is_float(val) and float(val) >= 41.3 and float(val) <= 51.3
-    except ValueError:
-        return False
-    except OverflowError:
+    except Exception:
         return False

csv_detective/formats/latlon_wgs.py CHANGED Viewed

@@ -3,37 +3,38 @@ from csv_detective.formats.longitude_wgs import _is as is_lon
 proportion = 1
 tags = ["geo"]
+mandatory_label = True
+SHARED_COORDS_LABELS = {
+    "ban": 1,
+    "coordinates": 1,
+    "coordonnees": 1,
+    "coordonnees insee": 1,
+    "geo": 0.5,
+    "geopoint": 1,
+    "geoloc": 1,
+    "geolocalisation": 1,
+    "geom": 0.75,
+    "geometry": 1,
+    "gps": 1,
+    "localisation": 1,
+    "point": 1,
+    "position": 1,
+    "wgs84": 1,
+}
-SHARED_COORDS_LABELS = [
-    "ban",
-    "coordinates",
-    "coordonnees",
-    "coordonnees insee",
-    "geo",
-    "geopoint",
-    "geoloc",
-    "geolocalisation",
-    "geom",
-    "geometry",
-    "gps",
-    "localisation",
-    "point",
-    "position",
-    "wgs84",
-]
-specific = [
-    "latlon",
-    "lat lon",
-    "x y",
-    "xy",
-]
+specific = {
+    "latlon": 1,
+    "lat lon": 1,
+    "x y": 0.75,
+    "xy": 0.75,
+}
 # we aim wide to catch exact matches if possible for the highest possible score
 labels = (
     SHARED_COORDS_LABELS
-    + specific
-    + [w + sep + suf for suf in specific for w in SHARED_COORDS_LABELS for sep in ["", " "]]
+    | specific
+    | {w + sep + suf: 1 for suf in specific for w in SHARED_COORDS_LABELS for sep in ["", " "]}
 )

csv_detective/formats/longitude_l93.py CHANGED Viewed

@@ -2,22 +2,17 @@ from frformat import LongitudeL93
 from csv_detective.formats.float import _is as is_float
 from csv_detective.formats.float import float_casting
+from csv_detective.formats.longitude_wgs import SHARED_LONGITUDE_LABELS
 proportion = 1
 tags = ["fr", "geo"]
-labels = [
-    "longitude",
-    "lon",
-    "long",
-    "geocodage x gps",
-    "location longitude",
-    "xlongitude",
-    "lng",
-    "xlong",
-    "x",
-    "xf",
-    "xd",
-]
+mandatory_label = True
+python_type = "float"
+labels = SHARED_LONGITUDE_LABELS | {
+    "x l93": 1,
+    "longitude lb93": 1,
+    "lambx": 1,
+}
 _longitudel93 = LongitudeL93()

csv_detective/formats/longitude_wgs.py CHANGED Viewed

@@ -2,27 +2,35 @@ from csv_detective.formats.float import _is as is_float
 proportion = 1
 tags = ["geo"]
-labels = [
-    "longitude",
-    "lon",
-    "long",
-    "geocodage x gps",
-    "location longitude",
-    "xlongitude",
-    "lng",
-    "xlong",
-    "x",
-    "xf",
-    "xd",
-]
+mandatory_label = True
+python_type = "float"
+SHARED_LONGITUDE_LABELS = {
+    "longitude": 1,
+    "long": 0.75,
+    "lon": 0.75,
+    "lng": 0.5,
+    "x": 0.5,
+    "xf": 0.5,
+    "xd": 0.5,
+    "coordonnee x": 1,
+    "coord x": 1,
+    "xcoord": 1,
+    "xlon": 1,
+    "xlong": 1,
+}
+labels = SHARED_LONGITUDE_LABELS | {
+    "x gps": 1,
+    "longitude wgs84": 1,
+    "x wgs84": 1,
+    "wsg": 0.75,
+    "gps": 0.5,
+}
 def _is(val):
     try:
         return is_float(val) and float(val) >= -180 and float(val) <= 180
-    except ValueError:
-        return False
-    except OverflowError:
+    except Exception:
         return False

csv_detective/formats/longitude_wgs_fr_metropole.py CHANGED Viewed

@@ -1,28 +1,16 @@
 from csv_detective.formats.float import _is as is_float
+from csv_detective.formats.longitude_wgs import labels  # noqa
 proportion = 1
 tags = ["fr", "geo"]
-labels = [
-    "longitude",
-    "lon",
-    "long",
-    "geocodage x gps",
-    "location longitude",
-    "xlongitude",
-    "lng",
-    "xlong",
-    "x",
-    "xf",
-    "xd",
-]
+mandatory_label = True
+python_type = "float"
 def _is(val):
     try:
         return is_float(val) and float(val) >= -5.5 and float(val) <= 9.8
-    except ValueError:
-        return False
-    except OverflowError:
+    except Exception:
         return False

csv_detective/formats/lonlat_wgs.py CHANGED Viewed

@@ -4,19 +4,20 @@ from csv_detective.formats.longitude_wgs import _is as is_lon
 proportion = 1
 tags = ["geo"]
+mandatory_label = True
-specific = [
-    "lonlat",
-    "lon lat",
-    "y x",
-    "yx",
-]
+specific = {
+    "lonlat": 1,
+    "lon lat": 1,
+    "y x": 0.75,
+    "yx": 0.75,
+}
 # we aim wide to catch exact matches if possible for the highest possible score
-words = (
+labels = (
     SHARED_COORDS_LABELS
-    + specific
-    + [w + sep + suf for suf in specific for w in SHARED_COORDS_LABELS for sep in ["", " "]]
+    | specific
+    | {w + sep + suf: 1 for suf in specific for w in SHARED_COORDS_LABELS for sep in ["", " "]}
 )

csv_detective/formats/mois_de_lannee.py CHANGED Viewed

@@ -2,7 +2,7 @@ from unidecode import unidecode
 proportion = 1
 tags = ["fr", "temp"]
-labels = ["mois", "month"]
+labels = {"mois": 1, "month": 1}
 mois = {
     "janvier",

csv_detective/formats/money.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from csv_detective.formats.float import _is as is_float
 proportion = 0.8
-labels = ["budget", "salaire", "euro", "euros", "prêt", "montant"]
+labels = {"budget": 1, "salaire": 1, "euro": 1, "euros": 1, "prêt": 1, "montant": 1}
 currencies = {"€", "$", "£", "¥"}

csv_detective/formats/mongo_object_id.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import re
 proportion = 0.8
-labels = ["id", "objectid"]
+labels = {"id": 1, "objectid": 1}
 def _is(val):

csv_detective/formats/pays.py CHANGED Viewed

@@ -2,19 +2,17 @@ from frformat import Millesime, Options, Pays
 proportion = 0.6
 tags = ["fr", "geo"]
-labels = [
-    "pays",
-    "payslieu",
-    "paysorg",
-    "country",
-    "pays lib",
-    "lieupays",
-    "pays beneficiaire",
-    "nom du pays",
-    "journey start country",
-    "libelle pays",
-    "journey end country",
-]
+labels = {
+    "pays": 1,
+    "payslieu": 1,
+    "paysorg": 1,
+    "country": 1,
+    "pays lib": 1,
+    "lieupays": 1,
+    "pays beneficiaire": 1,
+    "nom du pays": 1,
+    "libelle pays": 1,
+}
 _options = Options(
     ignore_case=True,

csv_detective/formats/percent.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from csv_detective.formats.float import _is as is_float
 proportion = 0.8
-labels = []
+labels = {"pourcent": 1, "part": 0.75, "pct": 0.75}
 def _is(val):

csv_detective/formats/region.py CHANGED Viewed

@@ -2,19 +2,19 @@ from frformat import Millesime, Options, Region
 proportion = 1
 tags = ["fr", "geo"]
-labels = [
-    "region",
-    "libelle region",
-    "nom region",
-    "libelle reg",
-    "nom reg",
-    "reg libusage",
-    "nom de la region",
-    "regionorg",
-    "regionlieu",
-    "reg",
-    "nom officiel region",
-]
+labels = {
+    "region": 1,
+    "libelle region": 1,
+    "nom region": 1,
+    "libelle reg": 1,
+    "nom reg": 1,
+    "reg libusage": 1,
+    "nom de la region": 1,
+    "regionorg": 1,
+    "regionlieu": 1,
+    "reg": 0.5,
+    "nom officiel region": 1,
+}
 _extra_valid_values_set = frozenset(
     {

csv_detective/formats/sexe.py CHANGED Viewed

@@ -2,7 +2,7 @@ from csv_detective.parsing.text import _process_text
 proportion = 1
 tags = ["fr"]
-labels = ["sexe", "sex", "civilite", "genre", "id sexe"]
+labels = {"sexe": 1, "sex": 1, "civilite": 1, "genre": 1}
 def _is(val):

csv_detective/formats/siren.py CHANGED Viewed

@@ -2,16 +2,15 @@ import re
 proportion = 0.9
 tags = ["fr"]
-labels = [
-    "siren",
-    "siren organisme designe",
-    "siren organisme designant",
-    "n° siren",
-    "siren organisme",
-    "siren titulaire",
-    "numero siren",
-    "epci",
-]
+mandatory_label = True
+labels = {
+    "siren": 1,
+    "n° siren": 1,
+    "siren organisme": 1,
+    "siren titulaire": 1,
+    "numero siren": 1,
+    "epci": 1,
+}
 def _is(val):

csv_detective/formats/siret.py CHANGED Viewed

@@ -2,15 +2,15 @@ import re
 proportion = 0.8
 tags = ["fr"]
-labels = [
-    "siret",
-    "siret d",
-    "num siret",
-    "siretacheteur",
-    "n° siret",
-    "coll siret",
-    "epci",
-]
+mandatory_label = True
+labels = {
+    "siret": 1,
+    "num siret": 1,
+    "siretacheteur": 1,
+    "n° siret": 1,
+    "coll siret": 1,
+    "epci": 1,
+}
 def _is(val):

csv_detective/formats/tel_fr.py CHANGED Viewed

@@ -2,19 +2,13 @@ import re
 proportion = 0.7
 tags = ["fr"]
-labels = [
-    "telephone",
-    "tel",
-    "tel1",
-    "tel2",
-    "phone",
-    "num tel",
-    "tel mob",
-    "telephone sav",
-    "telephone1",
-    "coordinates.phone",
-    "telephone du lieu",
-]
+labels = {
+    "telephone": 1,
+    "tel": 1,
+    "phone": 1,
+    "num tel": 1,
+    "tel mob": 1,
+}
 def _is(val):

csv-detective 0.10.1.dev2581__py3-none-any.whl → 0.10.1.dev2599__py3-none-any.whl

csv-detective 0.10.1.dev2581py3-none-any.whl → 0.10.1.dev2599py3-none-any.whl