PyPI - csv-detective - Versions diffs - 0.6.7__py3-none-any.whl → 0.9.3.dev2438__py3-none-any.whl - Mend

csv-detective 0.6.7py3-none-any.whl → 0.9.3.dev2438py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

csv_detective/__init__.py +7 -1
csv_detective/cli.py +33 -21
csv_detective/{detect_fields/FR → detection}/__init__.py +0 -0
csv_detective/detection/columns.py +89 -0
csv_detective/detection/encoding.py +29 -0
csv_detective/detection/engine.py +46 -0
csv_detective/detection/formats.py +156 -0
csv_detective/detection/headers.py +28 -0
csv_detective/detection/rows.py +18 -0
csv_detective/detection/separator.py +44 -0
csv_detective/detection/variables.py +97 -0
csv_detective/explore_csv.py +151 -377
csv_detective/format.py +67 -0
csv_detective/formats/__init__.py +9 -0
csv_detective/formats/adresse.py +116 -0
csv_detective/formats/binary.py +26 -0
csv_detective/formats/booleen.py +35 -0
csv_detective/formats/code_commune_insee.py +26 -0
csv_detective/formats/code_csp_insee.py +36 -0
csv_detective/formats/code_departement.py +29 -0
csv_detective/formats/code_fantoir.py +21 -0
csv_detective/formats/code_import.py +17 -0
csv_detective/formats/code_postal.py +25 -0
csv_detective/formats/code_region.py +22 -0
csv_detective/formats/code_rna.py +29 -0
csv_detective/formats/code_waldec.py +17 -0
csv_detective/formats/commune.py +27 -0
csv_detective/formats/csp_insee.py +31 -0
csv_detective/{detect_fields/FR/other/insee_ape700 → formats/data}/insee_ape700.txt +0 -0
csv_detective/formats/date.py +99 -0
csv_detective/formats/date_fr.py +22 -0
csv_detective/formats/datetime_aware.py +45 -0
csv_detective/formats/datetime_naive.py +48 -0
csv_detective/formats/datetime_rfc822.py +24 -0
csv_detective/formats/departement.py +37 -0
csv_detective/formats/email.py +28 -0
csv_detective/formats/float.py +29 -0
csv_detective/formats/geojson.py +36 -0
csv_detective/formats/insee_ape700.py +31 -0
csv_detective/formats/insee_canton.py +28 -0
csv_detective/formats/int.py +23 -0
csv_detective/formats/iso_country_code_alpha2.py +30 -0
csv_detective/formats/iso_country_code_alpha3.py +30 -0
csv_detective/formats/iso_country_code_numeric.py +31 -0
csv_detective/formats/jour_de_la_semaine.py +41 -0
csv_detective/formats/json.py +20 -0
csv_detective/formats/latitude_l93.py +48 -0
csv_detective/formats/latitude_wgs.py +42 -0
csv_detective/formats/latitude_wgs_fr_metropole.py +42 -0
csv_detective/formats/latlon_wgs.py +53 -0
csv_detective/formats/longitude_l93.py +39 -0
csv_detective/formats/longitude_wgs.py +32 -0
csv_detective/formats/longitude_wgs_fr_metropole.py +32 -0
csv_detective/formats/lonlat_wgs.py +36 -0
csv_detective/formats/mois_de_lannee.py +48 -0
csv_detective/formats/money.py +18 -0
csv_detective/formats/mongo_object_id.py +14 -0
csv_detective/formats/pays.py +35 -0
csv_detective/formats/percent.py +16 -0
csv_detective/formats/region.py +70 -0
csv_detective/formats/sexe.py +17 -0
csv_detective/formats/siren.py +37 -0
csv_detective/{detect_fields/FR/other/siret/__init__.py → formats/siret.py} +47 -29
csv_detective/formats/tel_fr.py +36 -0
csv_detective/formats/uai.py +36 -0
csv_detective/formats/url.py +46 -0
csv_detective/formats/username.py +14 -0
csv_detective/formats/uuid.py +16 -0
csv_detective/formats/year.py +28 -0
csv_detective/output/__init__.py +65 -0
csv_detective/output/dataframe.py +96 -0
csv_detective/output/example.py +250 -0
csv_detective/output/profile.py +119 -0
csv_detective/{schema_generation.py → output/schema.py} +268 -343
csv_detective/output/utils.py +74 -0
csv_detective/{detect_fields/FR/geo → parsing}/__init__.py +0 -0
csv_detective/parsing/columns.py +235 -0
csv_detective/parsing/compression.py +11 -0
csv_detective/parsing/csv.py +56 -0
csv_detective/parsing/excel.py +167 -0
csv_detective/parsing/load.py +111 -0
csv_detective/parsing/text.py +56 -0
csv_detective/utils.py +23 -196
csv_detective/validate.py +138 -0
csv_detective-0.9.3.dev2438.dist-info/METADATA +267 -0
csv_detective-0.9.3.dev2438.dist-info/RECORD +92 -0
csv_detective-0.9.3.dev2438.dist-info/WHEEL +4 -0
{csv_detective-0.6.7.dist-info → csv_detective-0.9.3.dev2438.dist-info}/entry_points.txt +1 -0
csv_detective/all_packages.txt +0 -104
csv_detective/detect_fields/FR/geo/adresse/__init__.py +0 -100
csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py +0 -24
csv_detective/detect_fields/FR/geo/code_commune_insee/code_commune_insee.txt +0 -37600
csv_detective/detect_fields/FR/geo/code_departement/__init__.py +0 -11
csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py +0 -15
csv_detective/detect_fields/FR/geo/code_fantoir/code_fantoir.txt +0 -26122
csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -19
csv_detective/detect_fields/FR/geo/code_postal/code_postal.txt +0 -36822
csv_detective/detect_fields/FR/geo/code_region/__init__.py +0 -27
csv_detective/detect_fields/FR/geo/commune/__init__.py +0 -21
csv_detective/detect_fields/FR/geo/commune/commune.txt +0 -36745
csv_detective/detect_fields/FR/geo/departement/__init__.py +0 -19
csv_detective/detect_fields/FR/geo/departement/departement.txt +0 -101
csv_detective/detect_fields/FR/geo/insee_canton/__init__.py +0 -20
csv_detective/detect_fields/FR/geo/insee_canton/canton2017.txt +0 -2055
csv_detective/detect_fields/FR/geo/insee_canton/cantons.txt +0 -2055
csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +0 -13
csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -13
csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +0 -13
csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -13
csv_detective/detect_fields/FR/geo/pays/__init__.py +0 -17
csv_detective/detect_fields/FR/geo/pays/pays.txt +0 -248
csv_detective/detect_fields/FR/geo/region/__init__.py +0 -16
csv_detective/detect_fields/FR/geo/region/region.txt +0 -44
csv_detective/detect_fields/FR/other/__init__.py +0 -0
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +0 -26
csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt +0 -498
csv_detective/detect_fields/FR/other/code_rna/__init__.py +0 -8
csv_detective/detect_fields/FR/other/code_waldec/__init__.py +0 -12
csv_detective/detect_fields/FR/other/csp_insee/__init__.py +0 -16
csv_detective/detect_fields/FR/other/date_fr/__init__.py +0 -12
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +0 -16
csv_detective/detect_fields/FR/other/sexe/__init__.py +0 -9
csv_detective/detect_fields/FR/other/siren/__init__.py +0 -18
csv_detective/detect_fields/FR/other/tel_fr/__init__.py +0 -15
csv_detective/detect_fields/FR/other/uai/__init__.py +0 -15
csv_detective/detect_fields/FR/temp/__init__.py +0 -0
csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py +0 -23
csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py +0 -37
csv_detective/detect_fields/__init__.py +0 -57
csv_detective/detect_fields/geo/__init__.py +0 -0
csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +0 -15
csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +0 -14
csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +0 -15
csv_detective/detect_fields/geo/json_geojson/__init__.py +0 -22
csv_detective/detect_fields/geo/latitude_wgs/__init__.py +0 -13
csv_detective/detect_fields/geo/latlon_wgs/__init__.py +0 -15
csv_detective/detect_fields/geo/longitude_wgs/__init__.py +0 -13
csv_detective/detect_fields/other/__init__.py +0 -0
csv_detective/detect_fields/other/booleen/__init__.py +0 -21
csv_detective/detect_fields/other/email/__init__.py +0 -8
csv_detective/detect_fields/other/float/__init__.py +0 -17
csv_detective/detect_fields/other/int/__init__.py +0 -12
csv_detective/detect_fields/other/json/__init__.py +0 -24
csv_detective/detect_fields/other/mongo_object_id/__init__.py +0 -8
csv_detective/detect_fields/other/twitter/__init__.py +0 -8
csv_detective/detect_fields/other/url/__init__.py +0 -11
csv_detective/detect_fields/other/uuid/__init__.py +0 -11
csv_detective/detect_fields/temp/__init__.py +0 -0
csv_detective/detect_fields/temp/date/__init__.py +0 -62
csv_detective/detect_fields/temp/datetime_iso/__init__.py +0 -18
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py +0 -21
csv_detective/detect_fields/temp/year/__init__.py +0 -10
csv_detective/detect_labels/FR/__init__.py +0 -0
csv_detective/detect_labels/FR/geo/__init__.py +0 -0
csv_detective/detect_labels/FR/geo/adresse/__init__.py +0 -40
csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +0 -42
csv_detective/detect_labels/FR/geo/code_departement/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/code_postal/__init__.py +0 -41
csv_detective/detect_labels/FR/geo/code_region/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/commune/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/departement/__init__.py +0 -47
csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +0 -54
csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -55
csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +0 -44
csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -45
csv_detective/detect_labels/FR/geo/pays/__init__.py +0 -45
csv_detective/detect_labels/FR/geo/region/__init__.py +0 -45
csv_detective/detect_labels/FR/other/__init__.py +0 -0
csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +0 -33
csv_detective/detect_labels/FR/other/code_rna/__init__.py +0 -38
csv_detective/detect_labels/FR/other/code_waldec/__init__.py +0 -33
csv_detective/detect_labels/FR/other/csp_insee/__init__.py +0 -37
csv_detective/detect_labels/FR/other/date_fr/__init__.py +0 -33
csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +0 -40
csv_detective/detect_labels/FR/other/sexe/__init__.py +0 -33
csv_detective/detect_labels/FR/other/siren/__init__.py +0 -41
csv_detective/detect_labels/FR/other/siret/__init__.py +0 -40
csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -45
csv_detective/detect_labels/FR/other/uai/__init__.py +0 -50
csv_detective/detect_labels/FR/temp/__init__.py +0 -0
csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +0 -41
csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +0 -33
csv_detective/detect_labels/__init__.py +0 -43
csv_detective/detect_labels/geo/__init__.py +0 -0
csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +0 -41
csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +0 -41
csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +0 -41
csv_detective/detect_labels/geo/json_geojson/__init__.py +0 -42
csv_detective/detect_labels/geo/latitude_wgs/__init__.py +0 -55
csv_detective/detect_labels/geo/latlon_wgs/__init__.py +0 -67
csv_detective/detect_labels/geo/longitude_wgs/__init__.py +0 -45
csv_detective/detect_labels/other/__init__.py +0 -0
csv_detective/detect_labels/other/booleen/__init__.py +0 -34
csv_detective/detect_labels/other/email/__init__.py +0 -45
csv_detective/detect_labels/other/float/__init__.py +0 -33
csv_detective/detect_labels/other/int/__init__.py +0 -33
csv_detective/detect_labels/other/money/__init__.py +0 -11
csv_detective/detect_labels/other/money/check_col_name.py +0 -8
csv_detective/detect_labels/other/mongo_object_id/__init__.py +0 -33
csv_detective/detect_labels/other/twitter/__init__.py +0 -33
csv_detective/detect_labels/other/url/__init__.py +0 -48
csv_detective/detect_labels/other/uuid/__init__.py +0 -33
csv_detective/detect_labels/temp/__init__.py +0 -0
csv_detective/detect_labels/temp/date/__init__.py +0 -51
csv_detective/detect_labels/temp/datetime_iso/__init__.py +0 -45
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +0 -44
csv_detective/detect_labels/temp/year/__init__.py +0 -44
csv_detective/detection.py +0 -361
csv_detective/process_text.py +0 -39
csv_detective/s3_utils.py +0 -48
csv_detective-0.6.7.data/data/share/csv_detective/CHANGELOG.md +0 -118
csv_detective-0.6.7.data/data/share/csv_detective/LICENSE.AGPL.txt +0 -661
csv_detective-0.6.7.data/data/share/csv_detective/README.md +0 -247
csv_detective-0.6.7.dist-info/LICENSE.AGPL.txt +0 -661
csv_detective-0.6.7.dist-info/METADATA +0 -23
csv_detective-0.6.7.dist-info/RECORD +0 -150
csv_detective-0.6.7.dist-info/WHEEL +0 -5
csv_detective-0.6.7.dist-info/top_level.txt +0 -2
tests/__init__.py +0 -0
tests/test_fields.py +0 -360
tests/test_file.py +0 -116
tests/test_labels.py +0 -7
/csv_detective/{detect_fields/FR/other/csp_insee → formats/data}/csp_insee.txt +0 -0
/csv_detective/{detect_fields/geo/iso_country_code_alpha2 → formats/data}/iso_country_code_alpha2.txt +0 -0
/csv_detective/{detect_fields/geo/iso_country_code_alpha3 → formats/data}/iso_country_code_alpha3.txt +0 -0
/csv_detective/{detect_fields/geo/iso_country_code_numeric → formats/data}/iso_country_code_numeric.txt +0 -0

csv_detective/formats/datetime_rfc822.py ADDED Viewed

@@ -0,0 +1,24 @@
+import re
+from csv_detective.formats.datetime_aware import labels  # noqa
+proportion = 1
+tags = ["temp", "type"]
+def _is(val):
+    return isinstance(val, str) and bool(
+        re.match(
+            r"^[A-Za-z]{3}, (0[1-9]|[1-2][0-9]|3[01]) [A-Za-z]{3} \d{4} "
+            r"([0-2])([0-9]):([0-5])([0-9]):([0-5])([0-9]) "
+            r"(ut|gmt|est|edt|cst|cdt|mst|mdt|pst|pdt|[+\-](0[0-9]|1[0-3])00)$",
+            val.lower(),
+            re.IGNORECASE,
+        )
+    )
+_test_values = {
+    True: ["Sun, 06 Nov 1994 08:49:37 GMT"],
+    False: ["2021-06-22T10:20:10"],
+}

csv_detective/formats/departement.py ADDED Viewed

@@ -0,0 +1,37 @@
+from frformat import Departement, Millesime, Options
+proportion = 0.9
+tags = ["fr", "geo"]
+labels = [
+    "departement",
+    "libelle du departement",
+    "deplib",
+    "nom dept",
+    "dept",
+    "libdepartement",
+    "nom departement",
+    "libelle dep",
+    "libelle departement",
+    "lb departements",
+    "dep libusage",
+    "lb departement",
+    "nom dep",
+]
+_options = Options(
+    ignore_case=True,
+    ignore_accents=True,
+    replace_non_alphanumeric_with_space=True,
+    ignore_extra_whitespace=True,
+)
+_departement = Departement(Millesime.LATEST, _options)
+def _is(val):
+    return isinstance(val, str) and _departement.is_valid(val)
+_test_values = {
+    True: ["essonne"],
+    False: ["alabama", "auvergne"],
+}

csv_detective/formats/email.py ADDED Viewed

@@ -0,0 +1,28 @@
+import re
+proportion = 0.9
+labels = [
+    "email",
+    "mail",
+    "courriel",
+    "contact",
+    "mel",
+    "lieucourriel",
+    "coordinates.emailcontact",
+    "e mail",
+    "mo mail",
+    "adresse mail",
+    "adresse email",
+]
+def _is(val):
+    return isinstance(val, str) and bool(
+        re.match(r"^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$", val, re.IGNORECASE)
+    )
+_test_values = {
+    True: ["cdo_intern@data.gouv.fr", "P.NOM@CIE.LONGDOMAIN"],
+    False: ["cdo@@gouv.sfd"],
+}

csv_detective/formats/float.py ADDED Viewed

@@ -0,0 +1,29 @@
+proportion = 1
+tags = ["type"]
+labels = ["part", "ratio", "taux"]
+def float_casting(val: str) -> float:
+    return float(val.replace(",", "."))
+def _is(val):
+    """Detects floats, assuming that tables will not have scientific
+    notations (3e6) or "+" in the string. "-" is still accepted."""
+    try:
+        if (
+            not isinstance(val, str)
+            or any([k in val for k in ["_", "+", "e", "E"]])
+            or (val.startswith("0") and len(val) > 1 and val[1] not in [".", ","])
+        ):
+            return False
+        float_casting(val)
+        return True
+    except ValueError:
+        return False
+_test_values = {
+    True: ["1", "0", "1764", "-24", "1.2", "1863.23", "-12.7", "0.1"],
+    False: ["01053", "01053.89", "1e3", "123_456", "123_456.78", "+35", "+35.9"],
+}

csv_detective/formats/geojson.py ADDED Viewed

@@ -0,0 +1,36 @@
+import json
+proportion = 1
+tags = ["geo"]
+labels = [
+    "json geojson",
+    "json",
+    "geojson",
+    "geo shape",
+    "geom",
+    "geometry",
+    "geo shape",
+    "geoshape",
+]
+def _is(val) -> bool:
+    try:
+        j = json.loads(val)
+        if isinstance(j, dict):
+            if "type" in j and "coordinates" in j:
+                return True
+            if "geometry" in j and "coordinates" in j["geometry"]:
+                return True
+    except Exception:
+        pass
+    return False
+_test_values = {
+    True: [
+        '{"coordinates": [45.783753, 3.049342], "type": "63870"}',
+        '{"geometry": {"coordinates": [45.783753, 3.049342]}}',
+    ],
+    False: ['{"pomme": "fruit", "reponse": 42}'],
+}

csv_detective/formats/insee_ape700.py ADDED Viewed

@@ -0,0 +1,31 @@
+from os.path import dirname, join
+from csv_detective.parsing.text import _process_text
+proportion = 0.8
+tags = ["fr"]
+labels = [
+    "code ape",
+    "code activite (ape)",
+    "code naf",
+    "code naf organisme designe",
+    "code naf organisme designant",
+    "base sirene : code ape de l'etablissement siege",
+]
+f = open(join(dirname(__file__), "data", "insee_ape700.txt"), "r")
+condes_insee_ape = f.read().split("\n")
+# removing empty str due to additionnal line in file
+del condes_insee_ape[-1]
+condes_insee_ape = set(condes_insee_ape)
+f.close()
+def _is(val):
+    if not isinstance(val, str):
+        return False
+    val = _process_text(val).upper()
+    return val in condes_insee_ape
+_test_values = {True: ["0116Z"], False: ["0116A"]}

csv_detective/formats/insee_canton.py ADDED Viewed

@@ -0,0 +1,28 @@
+from frformat import Canton, Millesime, Options
+proportion = 0.9
+tags = ["fr", "geo"]
+labels = [
+    "insee canton",
+    "canton",
+    "cant",
+    "nom canton",
+]
+_options = Options(
+    ignore_case=True,
+    ignore_accents=True,
+    replace_non_alphanumeric_with_space=True,
+    ignore_extra_whitespace=True,
+)
+_canton = Canton(Millesime.LATEST, _options)
+def _is(val):
+    return isinstance(val, str) and _canton.is_valid(val)
+_test_values = {
+    True: ["nantua"],
+    False: ["california"],
+}

csv_detective/formats/int.py ADDED Viewed

@@ -0,0 +1,23 @@
+labels = ["nb", "nombre", "nbre"]
+tag = ["type"]
+def _is(val):
+    """Detects integers"""
+    if (
+        not isinstance(val, str)
+        or any([v in val for v in [".", "_", "+"]])
+        or (val.startswith("0") and len(val) > 1)
+    ):
+        return False
+    try:
+        int(val)
+        return True
+    except ValueError:
+        return False
+_test_values = {
+    True: ["1", "0", "1764", "-24"],
+    False: ["01053", "1.2", "123_456", "+35"],
+}

csv_detective/formats/iso_country_code_alpha2.py ADDED Viewed

@@ -0,0 +1,30 @@
+import re
+from os.path import dirname, join
+proportion = 1
+tags = ["geo"]
+labels = [
+    "iso country code",
+    "code pays",
+    "pays",
+    "country",
+    "nation",
+    "pays code",
+    "code pays (iso)",
+]
+with open(join(dirname(__file__), "data", "iso_country_code_alpha2.txt"), "r") as iofile:
+    liste_pays = iofile.read().split("\n")
+liste_pays = set(liste_pays)
+def _is(val):
+    if not isinstance(val, str) or not bool(re.match(r"[A-Z]{2}$", val)):
+        return False
+    return val in liste_pays
+_test_values = {
+    True: ["FR"],
+    False: ["XX", "A", "FRA"],
+}

csv_detective/formats/iso_country_code_alpha3.py ADDED Viewed

@@ -0,0 +1,30 @@
+import re
+from os.path import dirname, join
+proportion = 1
+tags = ["geo"]
+labels = [
+    "iso country code",
+    "code pays",
+    "pays",
+    "country",
+    "nation",
+    "pays code",
+    "code pays (iso)",
+]
+with open(join(dirname(__file__), "data", "iso_country_code_alpha3.txt"), "r") as iofile:
+    liste_pays = iofile.read().split("\n")
+def _is(val):
+    """Renvoie True si val peut etre un code iso pays alpha-3, False sinon"""
+    if not isinstance(val, str) or not bool(re.match(r"[A-Z]{3}$", val)):
+        return False
+    return val in set(liste_pays)
+_test_values = {
+    True: ["FRA"],
+    False: ["XXX", "FR", "A"],
+}

csv_detective/formats/iso_country_code_numeric.py ADDED Viewed

@@ -0,0 +1,31 @@
+import re
+from os.path import dirname, join
+proportion = 1
+tags = ["geo"]
+labels = [
+    "iso country code",
+    "code pays",
+    "pays",
+    "country",
+    "nation",
+    "pays code",
+    "code pays (iso)",
+]
+with open(join(dirname(__file__), "data", "iso_country_code_numeric.txt"), "r") as iofile:
+    liste_pays = iofile.read().split("\n")
+liste_pays = set(liste_pays)
+def _is(val):
+    """Renvoie True si val peut etre un code iso pays numerique, False sinon"""
+    if not isinstance(val, str) or not bool(re.match(r"[0-9]{3}$", val)):
+        return False
+    return val in liste_pays
+_test_values = {
+    True: ["250"],
+    False: ["003"],
+}

csv_detective/formats/jour_de_la_semaine.py ADDED Viewed

@@ -0,0 +1,41 @@
+proportion = 0.8
+tags = ["fr", "temp"]
+labels = [
+    "jour semaine",
+    "type jour",
+    "jour de la semaine",
+    "saufjour",
+    "nomjour",
+    "jour",
+    "jour de fermeture",
+]
+jours = {
+    "lundi",
+    "mardi",
+    "mercredi",
+    "jeudi",
+    "vendredi",
+    "samedi",
+    "dimanche",
+    "lun",
+    "mar",
+    "mer",
+    "jeu",
+    "ven",
+    "sam",
+    "dim",
+}
+def _is(val):
+    if not isinstance(val, str):
+        return False
+    val = val.lower()
+    return val in jours
+_test_values = {
+    True: ["lundi"],
+    False: ["jour de la biere"],
+}

csv_detective/formats/json.py ADDED Viewed

@@ -0,0 +1,20 @@
+import json
+from json import JSONDecodeError
+proportion = 1
+tags = ["type"]
+def _is(val):
+    try:
+        loaded = json.loads(val)
+        # we don't want to consider integers for instance
+        return isinstance(loaded, (list, dict))
+    except (JSONDecodeError, TypeError):
+        return False
+_test_values = {
+    True: ['{"pomme": "fruit", "reponse": 42}', "[1,2,3,4]"],
+    False: ["5", '{"zefib":', '{"a"}'],
+}

csv_detective/formats/latitude_l93.py ADDED Viewed

@@ -0,0 +1,48 @@
+from frformat import LatitudeL93
+from csv_detective.formats.float import _is as is_float
+from csv_detective.formats.float import float_casting
+proportion = 1
+tags = ["fr", "geo"]
+labels = [
+    "latitude",
+    "lat",
+    "y",
+    "yf",
+    "yd",
+    "y l93",
+    "coordonnee y",
+    "latitude lb93",
+    "coord y",
+    "ycoord",
+    "geocodage y gps",
+    "location latitude",
+    "ylatitude",
+    "ylat",
+    "latitude (y)",
+    "latitudeorg",
+    "coordinates.latitude",
+    "googlemap latitude",
+    "latitudelieu",
+    "latitude googlemap",
+]
+_latitudel93 = LatitudeL93()
+def _is(val):
+    try:
+        if isinstance(val, str) and is_float(val):
+            return _latitudel93.is_valid(float_casting(val))
+        return False
+    except (ValueError, OverflowError):
+        return False
+_test_values = {
+    True: ["6037008", "7123528.5", "7124528,5"],
+    False: ["0", "-6734529.6", "7245669.8", "3422674,78", "32_34"],
+}

csv_detective/formats/latitude_wgs.py ADDED Viewed

@@ -0,0 +1,42 @@
+from csv_detective.formats.float import _is as is_float
+proportion = 1
+tags = ["geo"]
+labels = [
+    "latitude",
+    "lat",
+    "y",
+    "yf",
+    "yd",
+    "coordonnee y",
+    "coord y",
+    "ycoord",
+    "geocodage y gps",
+    "location latitude",
+    "ylatitude",
+    "ylat",
+    "latitude (y)",
+    "latitudeorg",
+    "coordinates.latitude",
+    "googlemap latitude",
+    "latitudelieu",
+    "latitude googlemap",
+    "latitude wgs84",
+    "y wgs84",
+    "latitude (wgs84)",
+]
+def _is(val):
+    try:
+        return is_float(val) and float(val) >= -90 and float(val) <= 90
+    except ValueError:
+        return False
+    except OverflowError:
+        return False
+_test_values = {
+    True: ["43.2", "-22"],
+    False: ["100"],
+}

csv_detective/formats/latitude_wgs_fr_metropole.py ADDED Viewed

@@ -0,0 +1,42 @@
+from csv_detective.formats.float import _is as is_float
+proportion = 1
+tags = ["fr", "geo"]
+labels = [
+    "latitude",
+    "lat",
+    "y",
+    "yf",
+    "yd",
+    "coordonnee y",
+    "coord y",
+    "ycoord",
+    "geocodage y gps",
+    "location latitude",
+    "ylatitude",
+    "ylat",
+    "latitude (y)",
+    "latitudeorg",
+    "coordinates.latitude",
+    "googlemap latitude",
+    "latitudelieu",
+    "latitude googlemap",
+    "latitude wgs84",
+    "y wgs84",
+    "latitude (wgs84)",
+]
+def _is(val):
+    try:
+        return is_float(val) and float(val) >= 41.3 and float(val) <= 51.3
+    except ValueError:
+        return False
+    except OverflowError:
+        return False
+_test_values = {
+    True: ["42.5"],
+    False: ["22.5", "62.5"],
+}

csv_detective/formats/latlon_wgs.py ADDED Viewed

@@ -0,0 +1,53 @@
+from csv_detective.formats.latitude_wgs import _is as is_lat
+from csv_detective.formats.longitude_wgs import _is as is_lon
+proportion = 1
+tags = ["geo"]
+SHARED_COORDS_LABELS = [
+    "ban",
+    "coordinates",
+    "coordonnees",
+    "coordonnees insee",
+    "geo",
+    "geopoint",
+    "geoloc",
+    "geolocalisation",
+    "geom",
+    "geometry",
+    "gps",
+    "localisation",
+    "point",
+    "position",
+    "wgs84",
+]
+specific = [
+    "latlon",
+    "lat lon",
+    "x y",
+    "xy",
+]
+# we aim wide to catch exact matches if possible for the highest possible score
+labels = (
+    SHARED_COORDS_LABELS
+    + specific
+    + [w + sep + suf for suf in specific for w in SHARED_COORDS_LABELS for sep in ["", " "]]
+)
+def _is(val):
+    if not isinstance(val, str) or val.count(",") != 1:
+        return False
+    lat, lon = val.split(",")
+    # handling [lat,lon]
+    if lat.startswith("[") and lon.endswith("]"):
+        lat, lon = lat[1:], lon[:-1]
+    return is_lat(lat) and is_lon(lon.replace(" ", ""))
+_test_values = {
+    True: ["43.2,-22.6", "-10.7,140", "-40.7, 10.8", "[12,-0.28]"],
+    False: ["0.1,192", "-102, 92", "[23.02,4.1", "23.02,4.1]", "160.1,-27"],
+}

csv_detective/formats/longitude_l93.py ADDED Viewed

@@ -0,0 +1,39 @@
+from frformat import LongitudeL93
+from csv_detective.formats.float import _is as is_float
+from csv_detective.formats.float import float_casting
+proportion = 1
+tags = ["fr", "geo"]
+labels = [
+    "longitude",
+    "lon",
+    "long",
+    "geocodage x gps",
+    "location longitude",
+    "xlongitude",
+    "lng",
+    "xlong",
+    "x",
+    "xf",
+    "xd",
+]
+_longitudel93 = LongitudeL93()
+def _is(val):
+    try:
+        if isinstance(val, str) and is_float(val):
+            return _longitudel93.is_valid(float_casting(val))
+        return False
+    except (ValueError, OverflowError):
+        return False
+_test_values = {
+    True: ["0", "-154", "1265783,45", "34723.4"],
+    False: ["1456669.8", "-776225", "346_3214"],
+}

csv_detective/formats/longitude_wgs.py ADDED Viewed

@@ -0,0 +1,32 @@
+from csv_detective.formats.float import _is as is_float
+proportion = 1
+tags = ["geo"]
+labels = [
+    "longitude",
+    "lon",
+    "long",
+    "geocodage x gps",
+    "location longitude",
+    "xlongitude",
+    "lng",
+    "xlong",
+    "x",
+    "xf",
+    "xd",
+]
+def _is(val):
+    try:
+        return is_float(val) and float(val) >= -180 and float(val) <= 180
+    except ValueError:
+        return False
+    except OverflowError:
+        return False
+_test_values = {
+    True: ["120", "-20.2"],
+    False: ["-200"],
+}

csv_detective/formats/longitude_wgs_fr_metropole.py ADDED Viewed

@@ -0,0 +1,32 @@
+from csv_detective.formats.float import _is as is_float
+proportion = 1
+tags = ["fr", "geo"]
+labels = [
+    "longitude",
+    "lon",
+    "long",
+    "geocodage x gps",
+    "location longitude",
+    "xlongitude",
+    "lng",
+    "xlong",
+    "x",
+    "xf",
+    "xd",
+]
+def _is(val):
+    try:
+        return is_float(val) and float(val) >= -5.5 and float(val) <= 9.8
+    except ValueError:
+        return False
+    except OverflowError:
+        return False
+_test_values = {
+    True: ["-2.5"],
+    False: ["12.8"],
+}

csv-detective 0.6.7__py3-none-any.whl → 0.9.3.dev2438__py3-none-any.whl

csv-detective 0.6.7py3-none-any.whl → 0.9.3.dev2438py3-none-any.whl