PyPI - csv-detective - Versions diffs - 0.9.3.dev2258__py3-none-any.whl → 0.9.3.dev2348__py3-none-any.whl - Mend

csv-detective 0.9.3.dev2258py3-none-any.whl → 0.9.3.dev2348py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (179) hide show

csv_detective/detect_labels/FR/other/uai/__init__.py DELETED Viewed

@@ -1,25 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = [
-        "uai",
-        "code etablissement",
-        "code uai",
-        "uai - identifiant",
-        "numero uai",
-        "rne",
-        "numero de l'etablissement",
-        "code rne",
-        "codeetab",
-        "code uai de l'etablissement",
-        "ref uai",
-        "cd rne",
-        "numerouai",
-        "numero d etablissement",
-        "code etablissement",
-        "numero etablissement",
-    ]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/FR/temp/__init__.py DELETED Viewed

File without changes

csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py DELETED Viewed

@@ -1,16 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = [
-        "jour semaine",
-        "type jour",
-        "jour de la semaine",
-        "saufjour",
-        "nomjour",
-        "jour",
-        "jour de fermeture",
-    ]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = ["mois de annee", "mois", "month"]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/__init__.py DELETED Viewed

@@ -1,94 +0,0 @@
-from .FR.geo import (
-    adresse,
-    code_commune_insee,
-    code_departement,
-    code_fantoir,
-    code_postal,
-    code_region,
-    commune,
-    departement,
-    insee_canton,
-    latitude_l93,
-    latitude_wgs_fr_metropole,
-    longitude_l93,
-    longitude_wgs_fr_metropole,
-    pays,
-    region,
-)
-from .FR.other import (
-    code_csp_insee,
-    code_rna,
-    code_waldec,
-    csp_insee,
-    date_fr,
-    insee_ape700,
-    sexe,
-    siren,
-    siret,
-    tel_fr,
-    uai,
-)
-from .FR.temp import jour_de_la_semaine, mois_de_annee
-from .geo import (
-    iso_country_code_alpha2,
-    iso_country_code_alpha3,
-    iso_country_code_numeric,
-    json_geojson,
-    latitude_wgs,
-    latlon_wgs,
-    longitude_wgs,
-    lonlat_wgs,
-)
-from .other import booleen, email, float, int, money, mongo_object_id, twitter, url, uuid
-from .temp import date, datetime_rfc822, year
-__all__ = [
-    "adresse",
-    "code_commune_insee",
-    "code_departement",
-    "code_fantoir",
-    "code_postal",
-    "code_region",
-    "commune",
-    "departement",
-    "insee_canton",
-    "latitude_l93",
-    "latitude_wgs_fr_metropole",
-    "longitude_l93",
-    "longitude_wgs_fr_metropole",
-    "pays",
-    "region",
-    "code_csp_insee",
-    "code_rna",
-    "code_waldec",
-    "csp_insee",
-    "date_fr",
-    "insee_ape700",
-    "sexe",
-    "siren",
-    "siret",
-    "tel_fr",
-    "uai",
-    "iso_country_code_alpha2",
-    "iso_country_code_alpha3",
-    "iso_country_code_numeric",
-    "json_geojson",
-    "latitude_wgs",
-    "latlon_wgs",
-    "longitude_wgs",
-    "lonlat_wgs",
-    "jour_de_la_semaine",
-    "mois_de_annee",
-    "booleen",
-    "email",
-    "float",
-    "int",
-    "money",
-    "mongo_object_id",
-    "twitter",
-    "url",
-    "uuid",
-    "date",
-    "datetime_rfc822",
-    "year",
-]

csv_detective/detect_labels/geo/__init__.py DELETED Viewed

File without changes

csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py DELETED Viewed

@@ -1,16 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = [
-        "iso country code",
-        "code pays",
-        "pays",
-        "country",
-        "nation",
-        "pays code",
-        "code pays (iso)",
-    ]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py DELETED Viewed

@@ -1,16 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = [
-        "iso country code",
-        "code pays",
-        "pays",
-        "country",
-        "nation",
-        "pays code",
-        "code pays (iso)",
-    ]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py DELETED Viewed

@@ -1,16 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = [
-        "iso country code",
-        "code pays",
-        "pays",
-        "country",
-        "nation",
-        "pays code",
-        "code pays (iso)",
-    ]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/geo/json_geojson/__init__.py DELETED Viewed

@@ -1,17 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = [
-        "json geojson",
-        "json",
-        "geojson",
-        "geo shape",
-        "geom",
-        "geometry",
-        "geo shape",
-        "geoshape",
-    ]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/geo/latitude_wgs/__init__.py DELETED Viewed

@@ -1,30 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = [
-        "latitude",
-        "lat",
-        "y",
-        "yf",
-        "yd",
-        "coordonnee y",
-        "coord y",
-        "ycoord",
-        "geocodage y gps",
-        "location latitude",
-        "ylatitude",
-        "ylat",
-        "latitude (y)",
-        "latitudeorg",
-        "coordinates.latitude",
-        "googlemap latitude",
-        "latitudelieu",
-        "latitude googlemap",
-        "latitude wgs84",
-        "y wgs84",
-        "latitude (wgs84)",
-    ]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/geo/latlon_wgs/__init__.py DELETED Viewed

@@ -1,39 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-COMMON_COORDS_LABELS = [
-    "ban",
-    "coordinates",
-    "coordonnees",
-    "coordonnees insee",
-    "geo",
-    "geopoint",
-    "geoloc",
-    "geolocalisation",
-    "geom",
-    "geometry",
-    "gps",
-    "localisation",
-    "point",
-    "position",
-    "wgs84",
-]
-specific = [
-    "latlon",
-    "lat lon",
-    "x y",
-    "xy",
-]
-# we aim wide to catch exact matches if possible for the highest possible score
-words = (
-    COMMON_COORDS_LABELS
-    + specific
-    + [w + sep + suf for suf in specific for w in COMMON_COORDS_LABELS for sep in ["", " "]]
-)
-def _is(header: str) -> float:
-    return header_score(header, words)

csv_detective/detect_labels/geo/longitude_wgs/__init__.py DELETED Viewed

@@ -1,21 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    #  Does not detect CRS
-    words_combinations_list = [
-        "longitude",
-        "lon",
-        "long",
-        "geocodage x gps",
-        "location longitude",
-        "xlongitude",
-        "lng",
-        "xlong",
-        "x",
-        "xf",
-        "xd",
-    ]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/geo/lonlat_wgs/__init__.py DELETED Viewed

@@ -1,23 +0,0 @@
-from csv_detective.parsing.text import header_score
-from ..latlon_wgs import COMMON_COORDS_LABELS
-PROPORTION = 0.5
-specific = [
-    "lonlat",
-    "lon lat",
-    "y x",
-    "yx",
-]
-# we aim wide to catch exact matches if possible for the highest possible score
-words = (
-    COMMON_COORDS_LABELS
-    + specific
-    + [w + sep + suf for suf in specific for w in COMMON_COORDS_LABELS for sep in ["", " "]]
-)
-def _is(header: str) -> float:
-    return header_score(header, words)

csv_detective/detect_labels/other/__init__.py DELETED Viewed

File without changes

csv_detective/detect_labels/other/booleen/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = ["is ", "has ", "est "]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/other/email/__init__.py DELETED Viewed

@@ -1,20 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = [
-        "email",
-        "mail",
-        "courriel",
-        "contact",
-        "mel",
-        "lieucourriel",
-        "coordinates.emailcontact",
-        "e mail",
-        "mo mail",
-        "adresse mail",
-        "adresse email",
-    ]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/other/float/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = ["part", "ratio", "taux"]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/other/int/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = ["nb", "nombre", "nbre"]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/other/money/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = ["budget", "salaire", "euro", "euros", "prêt", "montant"]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/other/mongo_object_id/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = ["id", "objectid"]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/other/twitter/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = ["twitter", "twitter account", "twitter username"]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/other/url/__init__.py DELETED Viewed

@@ -1,23 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = [
-        "url",
-        "url source",
-        "site web",
-        "source url",
-        "site internet",
-        "remote url",
-        "web",
-        "site",
-        "lien",
-        "site data",
-        "lien url",
-        "lien vers le fichier",
-        "sitweb",
-        "interneturl",
-    ]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/other/uuid/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = ["id", "uuid", "guid"]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/temp/__init__.py DELETED Viewed

File without changes

csv_detective/detect_labels/temp/date/__init__.py DELETED Viewed

@@ -1,28 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = [
-        "date",
-        "jour",
-        "date de mise a jour",
-        "sns date",
-        "date maj",
-        "rem date",
-        "periode",
-        "date de publication",
-        "dpc",
-        "extract date",
-        "date immatriculation",
-        "date jeu donnees",
-        "datemaj",
-        "dateouv",
-        "date der maj",
-        "dmaj",
-        "jour",
-        "yyyymmdd",
-        "aaaammjj",
-    ]
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/temp/datetime_rfc822/__init__.py DELETED Viewed

@@ -1,19 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = [
-        "datetime",
-        "timestamp",
-        "osm_timestamp",
-        "date",
-        "created at",
-        "last update",
-        "date maj",
-        "createdat",
-        "date naissance",
-        "date donnees",
-    ]  # Almost same as IS0, no example in data
-    return header_score(header, words_combinations_list)

csv_detective/detect_labels/temp/year/__init__.py DELETED Viewed

@@ -1,19 +0,0 @@
-from csv_detective.parsing.text import header_score
-PROPORTION = 0.5
-def _is(header: str) -> float:
-    words_combinations_list = [
-        "year",
-        "annee",
-        "annee depot",
-        "an nais",
-        "exercice",
-        "data year",
-        "annee de publication",
-        "exercice comptable",
-        "annee de naissance",
-        "annee ouverture",
-    ]
-    return header_score(header, words_combinations_list)

csv_detective/load_tests.py DELETED Viewed

@@ -1,59 +0,0 @@
-import os
-from csv_detective import detect_fields, detect_labels  # noqa
-def get_all_packages(detect_type) -> list:
-    root_dir = os.path.dirname(os.path.abspath(__file__)) + "/" + detect_type
-    modules = []
-    for dirpath, _, filenames in os.walk(root_dir):
-        for filename in filenames:
-            file = os.path.join(dirpath, filename).replace(root_dir, "")
-            if file.endswith("__init__.py"):
-                module = file.replace("__init__.py", "").replace("/", ".").replace("\\", ".")[:-1]
-                if module:
-                    modules.append(detect_type + module)
-    return modules
-def return_all_tests(
-    user_input_tests: str | list,
-    detect_type: str,
-) -> dict[str, dict]:
-    """
-    returns all tests that have a method _is and are listed in the user_input_tests
-    the function can select a sub_package from csv_detective
-    user_input_tests may look like this:
-        - "ALL": all possible tests are made
-        - "FR.other.siren" (or any other path-like string to one of the tests, or a group of tests, like "FR.geo"):
-        this specifc (group of) test(s) only
-        - ["FR.temp.mois_de_annee", "geo", ...]: only the specified tests will be made ; you may also skip
-        specific (groups of) tests by add "-" at the start (e.g "-temp.date")
-    """
-    assert detect_type in ["detect_fields", "detect_labels"]
-    all_packages = get_all_packages(detect_type=detect_type)
-    if isinstance(user_input_tests, str):
-        user_input_tests = [user_input_tests]
-    if "ALL" in user_input_tests or all(x[0] == "-" for x in user_input_tests):
-        tests_to_do = [detect_type]
-    else:
-        tests_to_do = [f"{detect_type}.{x}" for x in user_input_tests if x[0] != "-"]
-    tests_skipped = [f"{detect_type}.{x[1:]}" for x in user_input_tests if x[0] == "-"]
-    # removing specified (groups of) tests
-    all_tests = [
-        # this is why we need to import detect_fields/labels
-        eval(x)
-        for x in all_packages
-        if any([y == x[: len(y)] for y in tests_to_do])
-        and all([y != x[: len(y)] for y in tests_skipped])
-    ]
-    return {
-        test.__name__.split(".")[-1]: {
-            "func": test._is,
-            "prop": test.PROPORTION,
-            "module": test,
-        }
-        for test in all_tests
-        if "_is" in dir(test)
-    }

csv-detective 0.9.3.dev2258__py3-none-any.whl → 0.9.3.dev2348__py3-none-any.whl

csv-detective 0.9.3.dev2258py3-none-any.whl → 0.9.3.dev2348py3-none-any.whl