PyPI - csv-detective - Versions diffs - 0.6.7__py3-none-any.whl → 0.9.3.dev2438__py3-none-any.whl - Mend

csv-detective 0.6.7py3-none-any.whl → 0.9.3.dev2438py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

csv_detective/__init__.py +7 -1
csv_detective/cli.py +33 -21
csv_detective/{detect_fields/FR → detection}/__init__.py +0 -0
csv_detective/detection/columns.py +89 -0
csv_detective/detection/encoding.py +29 -0
csv_detective/detection/engine.py +46 -0
csv_detective/detection/formats.py +156 -0
csv_detective/detection/headers.py +28 -0
csv_detective/detection/rows.py +18 -0
csv_detective/detection/separator.py +44 -0
csv_detective/detection/variables.py +97 -0
csv_detective/explore_csv.py +151 -377
csv_detective/format.py +67 -0
csv_detective/formats/__init__.py +9 -0
csv_detective/formats/adresse.py +116 -0
csv_detective/formats/binary.py +26 -0
csv_detective/formats/booleen.py +35 -0
csv_detective/formats/code_commune_insee.py +26 -0
csv_detective/formats/code_csp_insee.py +36 -0
csv_detective/formats/code_departement.py +29 -0
csv_detective/formats/code_fantoir.py +21 -0
csv_detective/formats/code_import.py +17 -0
csv_detective/formats/code_postal.py +25 -0
csv_detective/formats/code_region.py +22 -0
csv_detective/formats/code_rna.py +29 -0
csv_detective/formats/code_waldec.py +17 -0
csv_detective/formats/commune.py +27 -0
csv_detective/formats/csp_insee.py +31 -0
csv_detective/{detect_fields/FR/other/insee_ape700 → formats/data}/insee_ape700.txt +0 -0
csv_detective/formats/date.py +99 -0
csv_detective/formats/date_fr.py +22 -0
csv_detective/formats/datetime_aware.py +45 -0
csv_detective/formats/datetime_naive.py +48 -0
csv_detective/formats/datetime_rfc822.py +24 -0
csv_detective/formats/departement.py +37 -0
csv_detective/formats/email.py +28 -0
csv_detective/formats/float.py +29 -0
csv_detective/formats/geojson.py +36 -0
csv_detective/formats/insee_ape700.py +31 -0
csv_detective/formats/insee_canton.py +28 -0
csv_detective/formats/int.py +23 -0
csv_detective/formats/iso_country_code_alpha2.py +30 -0
csv_detective/formats/iso_country_code_alpha3.py +30 -0
csv_detective/formats/iso_country_code_numeric.py +31 -0
csv_detective/formats/jour_de_la_semaine.py +41 -0
csv_detective/formats/json.py +20 -0
csv_detective/formats/latitude_l93.py +48 -0
csv_detective/formats/latitude_wgs.py +42 -0
csv_detective/formats/latitude_wgs_fr_metropole.py +42 -0
csv_detective/formats/latlon_wgs.py +53 -0
csv_detective/formats/longitude_l93.py +39 -0
csv_detective/formats/longitude_wgs.py +32 -0
csv_detective/formats/longitude_wgs_fr_metropole.py +32 -0
csv_detective/formats/lonlat_wgs.py +36 -0
csv_detective/formats/mois_de_lannee.py +48 -0
csv_detective/formats/money.py +18 -0
csv_detective/formats/mongo_object_id.py +14 -0
csv_detective/formats/pays.py +35 -0
csv_detective/formats/percent.py +16 -0
csv_detective/formats/region.py +70 -0
csv_detective/formats/sexe.py +17 -0
csv_detective/formats/siren.py +37 -0
csv_detective/{detect_fields/FR/other/siret/__init__.py → formats/siret.py} +47 -29
csv_detective/formats/tel_fr.py +36 -0
csv_detective/formats/uai.py +36 -0
csv_detective/formats/url.py +46 -0
csv_detective/formats/username.py +14 -0
csv_detective/formats/uuid.py +16 -0
csv_detective/formats/year.py +28 -0
csv_detective/output/__init__.py +65 -0
csv_detective/output/dataframe.py +96 -0
csv_detective/output/example.py +250 -0
csv_detective/output/profile.py +119 -0
csv_detective/{schema_generation.py → output/schema.py} +268 -343
csv_detective/output/utils.py +74 -0
csv_detective/{detect_fields/FR/geo → parsing}/__init__.py +0 -0
csv_detective/parsing/columns.py +235 -0
csv_detective/parsing/compression.py +11 -0
csv_detective/parsing/csv.py +56 -0
csv_detective/parsing/excel.py +167 -0
csv_detective/parsing/load.py +111 -0
csv_detective/parsing/text.py +56 -0
csv_detective/utils.py +23 -196
csv_detective/validate.py +138 -0
csv_detective-0.9.3.dev2438.dist-info/METADATA +267 -0
csv_detective-0.9.3.dev2438.dist-info/RECORD +92 -0
csv_detective-0.9.3.dev2438.dist-info/WHEEL +4 -0
{csv_detective-0.6.7.dist-info → csv_detective-0.9.3.dev2438.dist-info}/entry_points.txt +1 -0
csv_detective/all_packages.txt +0 -104
csv_detective/detect_fields/FR/geo/adresse/__init__.py +0 -100
csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py +0 -24
csv_detective/detect_fields/FR/geo/code_commune_insee/code_commune_insee.txt +0 -37600
csv_detective/detect_fields/FR/geo/code_departement/__init__.py +0 -11
csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py +0 -15
csv_detective/detect_fields/FR/geo/code_fantoir/code_fantoir.txt +0 -26122
csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -19
csv_detective/detect_fields/FR/geo/code_postal/code_postal.txt +0 -36822
csv_detective/detect_fields/FR/geo/code_region/__init__.py +0 -27
csv_detective/detect_fields/FR/geo/commune/__init__.py +0 -21
csv_detective/detect_fields/FR/geo/commune/commune.txt +0 -36745
csv_detective/detect_fields/FR/geo/departement/__init__.py +0 -19
csv_detective/detect_fields/FR/geo/departement/departement.txt +0 -101
csv_detective/detect_fields/FR/geo/insee_canton/__init__.py +0 -20
csv_detective/detect_fields/FR/geo/insee_canton/canton2017.txt +0 -2055
csv_detective/detect_fields/FR/geo/insee_canton/cantons.txt +0 -2055
csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +0 -13
csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -13
csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +0 -13
csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -13
csv_detective/detect_fields/FR/geo/pays/__init__.py +0 -17
csv_detective/detect_fields/FR/geo/pays/pays.txt +0 -248
csv_detective/detect_fields/FR/geo/region/__init__.py +0 -16
csv_detective/detect_fields/FR/geo/region/region.txt +0 -44
csv_detective/detect_fields/FR/other/__init__.py +0 -0
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +0 -26
csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt +0 -498
csv_detective/detect_fields/FR/other/code_rna/__init__.py +0 -8
csv_detective/detect_fields/FR/other/code_waldec/__init__.py +0 -12
csv_detective/detect_fields/FR/other/csp_insee/__init__.py +0 -16
csv_detective/detect_fields/FR/other/date_fr/__init__.py +0 -12
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +0 -16
csv_detective/detect_fields/FR/other/sexe/__init__.py +0 -9
csv_detective/detect_fields/FR/other/siren/__init__.py +0 -18
csv_detective/detect_fields/FR/other/tel_fr/__init__.py +0 -15
csv_detective/detect_fields/FR/other/uai/__init__.py +0 -15
csv_detective/detect_fields/FR/temp/__init__.py +0 -0
csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py +0 -23
csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py +0 -37
csv_detective/detect_fields/__init__.py +0 -57
csv_detective/detect_fields/geo/__init__.py +0 -0
csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +0 -15
csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +0 -14
csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +0 -15
csv_detective/detect_fields/geo/json_geojson/__init__.py +0 -22
csv_detective/detect_fields/geo/latitude_wgs/__init__.py +0 -13
csv_detective/detect_fields/geo/latlon_wgs/__init__.py +0 -15
csv_detective/detect_fields/geo/longitude_wgs/__init__.py +0 -13
csv_detective/detect_fields/other/__init__.py +0 -0
csv_detective/detect_fields/other/booleen/__init__.py +0 -21
csv_detective/detect_fields/other/email/__init__.py +0 -8
csv_detective/detect_fields/other/float/__init__.py +0 -17
csv_detective/detect_fields/other/int/__init__.py +0 -12
csv_detective/detect_fields/other/json/__init__.py +0 -24
csv_detective/detect_fields/other/mongo_object_id/__init__.py +0 -8
csv_detective/detect_fields/other/twitter/__init__.py +0 -8
csv_detective/detect_fields/other/url/__init__.py +0 -11
csv_detective/detect_fields/other/uuid/__init__.py +0 -11
csv_detective/detect_fields/temp/__init__.py +0 -0
csv_detective/detect_fields/temp/date/__init__.py +0 -62
csv_detective/detect_fields/temp/datetime_iso/__init__.py +0 -18
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py +0 -21
csv_detective/detect_fields/temp/year/__init__.py +0 -10
csv_detective/detect_labels/FR/__init__.py +0 -0
csv_detective/detect_labels/FR/geo/__init__.py +0 -0
csv_detective/detect_labels/FR/geo/adresse/__init__.py +0 -40
csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +0 -42
csv_detective/detect_labels/FR/geo/code_departement/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/code_postal/__init__.py +0 -41
csv_detective/detect_labels/FR/geo/code_region/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/commune/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/departement/__init__.py +0 -47
csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +0 -54
csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -55
csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +0 -44
csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -45
csv_detective/detect_labels/FR/geo/pays/__init__.py +0 -45
csv_detective/detect_labels/FR/geo/region/__init__.py +0 -45
csv_detective/detect_labels/FR/other/__init__.py +0 -0
csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +0 -33
csv_detective/detect_labels/FR/other/code_rna/__init__.py +0 -38
csv_detective/detect_labels/FR/other/code_waldec/__init__.py +0 -33
csv_detective/detect_labels/FR/other/csp_insee/__init__.py +0 -37
csv_detective/detect_labels/FR/other/date_fr/__init__.py +0 -33
csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +0 -40
csv_detective/detect_labels/FR/other/sexe/__init__.py +0 -33
csv_detective/detect_labels/FR/other/siren/__init__.py +0 -41
csv_detective/detect_labels/FR/other/siret/__init__.py +0 -40
csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -45
csv_detective/detect_labels/FR/other/uai/__init__.py +0 -50
csv_detective/detect_labels/FR/temp/__init__.py +0 -0
csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +0 -41
csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +0 -33
csv_detective/detect_labels/__init__.py +0 -43
csv_detective/detect_labels/geo/__init__.py +0 -0
csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +0 -41
csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +0 -41
csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +0 -41
csv_detective/detect_labels/geo/json_geojson/__init__.py +0 -42
csv_detective/detect_labels/geo/latitude_wgs/__init__.py +0 -55
csv_detective/detect_labels/geo/latlon_wgs/__init__.py +0 -67
csv_detective/detect_labels/geo/longitude_wgs/__init__.py +0 -45
csv_detective/detect_labels/other/__init__.py +0 -0
csv_detective/detect_labels/other/booleen/__init__.py +0 -34
csv_detective/detect_labels/other/email/__init__.py +0 -45
csv_detective/detect_labels/other/float/__init__.py +0 -33
csv_detective/detect_labels/other/int/__init__.py +0 -33
csv_detective/detect_labels/other/money/__init__.py +0 -11
csv_detective/detect_labels/other/money/check_col_name.py +0 -8
csv_detective/detect_labels/other/mongo_object_id/__init__.py +0 -33
csv_detective/detect_labels/other/twitter/__init__.py +0 -33
csv_detective/detect_labels/other/url/__init__.py +0 -48
csv_detective/detect_labels/other/uuid/__init__.py +0 -33
csv_detective/detect_labels/temp/__init__.py +0 -0
csv_detective/detect_labels/temp/date/__init__.py +0 -51
csv_detective/detect_labels/temp/datetime_iso/__init__.py +0 -45
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +0 -44
csv_detective/detect_labels/temp/year/__init__.py +0 -44
csv_detective/detection.py +0 -361
csv_detective/process_text.py +0 -39
csv_detective/s3_utils.py +0 -48
csv_detective-0.6.7.data/data/share/csv_detective/CHANGELOG.md +0 -118
csv_detective-0.6.7.data/data/share/csv_detective/LICENSE.AGPL.txt +0 -661
csv_detective-0.6.7.data/data/share/csv_detective/README.md +0 -247
csv_detective-0.6.7.dist-info/LICENSE.AGPL.txt +0 -661
csv_detective-0.6.7.dist-info/METADATA +0 -23
csv_detective-0.6.7.dist-info/RECORD +0 -150
csv_detective-0.6.7.dist-info/WHEEL +0 -5
csv_detective-0.6.7.dist-info/top_level.txt +0 -2
tests/__init__.py +0 -0
tests/test_fields.py +0 -360
tests/test_file.py +0 -116
tests/test_labels.py +0 -7
/csv_detective/{detect_fields/FR/other/csp_insee → formats/data}/csp_insee.txt +0 -0
/csv_detective/{detect_fields/geo/iso_country_code_alpha2 → formats/data}/iso_country_code_alpha2.txt +0 -0
/csv_detective/{detect_fields/geo/iso_country_code_alpha3 → formats/data}/iso_country_code_alpha3.txt +0 -0
/csv_detective/{detect_fields/geo/iso_country_code_numeric → formats/data}/iso_country_code_numeric.txt +0 -0

csv_detective/detect_fields/__init__.py DELETED Viewed

@@ -1,57 +0,0 @@
-# flake8: noqa
-from .FR.other import (
-    code_csp_insee,
-    csp_insee,
-    sexe,
-    siren,
-    tel_fr,
-    uai,
-    siret,
-    insee_ape700,
-    date_fr,
-    code_waldec,
-    code_rna
-)
-from .other import (
-    email,
-    url,
-    booleen,
-    mongo_object_id,
-    twitter,
-    float,
-    int,
-    uuid,
-    json
-)
-from .FR.geo import (
-    adresse,
-    code_commune_insee,
-    code_postal,
-    commune,
-    departement,
-    pays,
-    region,
-    code_departement,
-    code_fantoir,
-    longitude_wgs_fr_metropole,
-    latitude_wgs_fr_metropole,
-    code_region,
-    latitude_l93,
-    longitude_l93,
-    insee_canton
-)
-from .geo import (
-    iso_country_code_alpha2,
-    iso_country_code_alpha3,
-    iso_country_code_numeric,
-    latitude_wgs,
-    longitude_wgs,
-    latlon_wgs,
-    json_geojson
-)
-from .FR.temp import jour_de_la_semaine, mois_de_annee
-from .temp import year, date, datetime_iso, datetime_rfc822

csv_detective/detect_fields/geo/__init__.py DELETED Viewed

File without changes

csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py DELETED Viewed

@@ -1,15 +0,0 @@
-from os.path import dirname, join
-import re
-PROPORTION = 1
-with open(join(dirname(__file__), 'iso_country_code_alpha2.txt'), 'r') as iofile:
-    liste_pays = iofile.read().split('\n')
-liste_pays = set(liste_pays)
-def _is(val):
-    '''Renvoie True si val peut etre un code iso pays alpha-2, False sinon'''
-    if not bool(re.match(r'[A-Z]{2}$', val)):
-        return False
-    return val in liste_pays

csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py DELETED Viewed

@@ -1,14 +0,0 @@
-from os.path import dirname, join
-import re
-PROPORTION = 1
-with open(join(dirname(__file__), 'iso_country_code_alpha3.txt'), 'r') as iofile:
-    liste_pays = iofile.read().split('\n')
-def _is(val):
-    '''Renvoie True si val peut etre un code iso pays alpha-3, False sinon'''
-    if not bool(re.match(r'[A-Z]{3}$', val)):
-        return False
-    return val in set(liste_pays)

csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py DELETED Viewed

@@ -1,15 +0,0 @@
-from os.path import dirname, join
-import re
-PROPORTION = 1
-with open(join(dirname(__file__), 'iso_country_code_numeric.txt'), 'r') as iofile:
-    liste_pays = iofile.read().split('\n')
-liste_pays = set(liste_pays)
-def _is(val):
-    '''Renvoie True si val peut etre un code iso pays numerique, False sinon'''
-    if not bool(re.match(r'[0-9]{3}$', val)):
-        return False
-    return val in liste_pays

csv_detective/detect_fields/geo/json_geojson/__init__.py DELETED Viewed

@@ -1,22 +0,0 @@
-import json
-from json import JSONDecodeError
-PROPORTION = 0.9
-def _is(val):
-    '''Renvoie True si val peut etre geojson'''
-    try:
-        j = json.loads(val)
-        if 'type' in j and 'coordinates' in j:
-            return True
-        if 'geometry' in j:
-            if 'coordinates' in j['geometry']:
-                return True
-    except JSONDecodeError:
-        pass
-    except TypeError:
-        pass
-    return False

csv_detective/detect_fields/geo/latitude_wgs/__init__.py DELETED Viewed

@@ -1,13 +0,0 @@
-from csv_detective.detect_fields.other.float import _is as is_float
-PROPORTION = 0.9
-def _is(val):
-    '''Renvoie True si val peut etre une latitude'''
-    try:
-        return is_float(val) and float(val) >= -90 and float(val) <= 90
-    except ValueError:
-        return False
-    except OverflowError:
-        return False

csv_detective/detect_fields/geo/latlon_wgs/__init__.py DELETED Viewed

@@ -1,15 +0,0 @@
-import re
-PROPORTION = 0.9
-def _is(val):
-    '''Renvoie True si val peut etre une latitude,longitude'''
-    a = bool(
-        re.match(
-            r'^\[?[\+\-]?[0-8]?\d\.\d* ?, ?[\+\-]?(1[0-7]\d|\d{1,2})\.\d+\]?$', val
-        )
-    )
-    return a

csv_detective/detect_fields/geo/longitude_wgs/__init__.py DELETED Viewed

@@ -1,13 +0,0 @@
-from csv_detective.detect_fields.other.float import _is as is_float
-PROPORTION = 0.9
-def _is(val):
-    '''Renvoie True si val peut etre une longitude'''
-    try:
-        is_float(val) and float(val) >= -180 and float(val) <= 180
-    except ValueError:
-        return False
-    except OverflowError:
-        return False

csv_detective/detect_fields/other/__init__.py DELETED Viewed

File without changes

csv_detective/detect_fields/other/booleen/__init__.py DELETED Viewed

@@ -1,21 +0,0 @@
-PROPORTION = 1
-liste_bool = {
-    '0',
-    '1',
-    'vrai',
-    'faux',
-    'true',
-    'false',
-    'oui',
-    'non',
-    'yes',
-    'no',
-    'y',
-    'n',
-    'o'
-}
-def _is(val):
-    '''Détection les booléens'''
-    return val.lower() in liste_bool

csv_detective/detect_fields/other/email/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-import re
-PROPORTION = 1
-def _is(val):
-    '''Detects e-mails'''
-    return bool(re.match(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}$', val))

csv_detective/detect_fields/other/float/__init__.py DELETED Viewed

@@ -1,17 +0,0 @@
-PROPORTION = 1
-def float_casting(str2cast):
-    return float(str2cast.replace(',', '.'))
-def _is(val):
-    '''Detects floats, assuming that tables will not have scientific
-    notations (3e6) or "+" in the string. "-" is still accepted.'''
-    try:
-        if any([k in val for k in ['_', '+', 'e', 'E']]):
-            return False
-        float_casting(val)
-        return True
-    except ValueError:
-        return False

csv_detective/detect_fields/other/int/__init__.py DELETED Viewed

@@ -1,12 +0,0 @@
-PROPORTION = 1
-def _is(val):
-    '''Detects integers'''
-    if any([v in val for v in ['.', '_', '+']]):
-        return False
-    try:
-        int(val)
-        return True
-    except ValueError:
-        return False

csv_detective/detect_fields/other/json/__init__.py DELETED Viewed

@@ -1,24 +0,0 @@
-import json
-from json import JSONDecodeError
-PROPORTION = 1
-def _is(val):
-    '''Detects json'''
-    try:
-        loaded = json.loads(val)
-        if isinstance(loaded, list) or (
-            isinstance(loaded, dict) and not (
-                any(
-                    [
-                        geo in loaded for geo in ['coordinates', 'geometry']
-                    ]
-                )
-            )
-        ):
-            return True
-        else:
-            return False
-    except JSONDecodeError:
-        return False

csv_detective/detect_fields/other/mongo_object_id/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-import re
-PROPORTION = 0.8
-def _is(val):
-    '''Detects Mongo ObjectIds'''
-    return bool(re.match(r'^[0-9a-fA-F]{24}$', val))

csv_detective/detect_fields/other/twitter/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-import re
-PROPORTION = 1
-def _is(val):
-    '''Detects twitter accounts'''
-    return bool(re.match(r'^@[A-Za-z0-9_]+$', val))

csv_detective/detect_fields/other/url/__init__.py DELETED Viewed

@@ -1,11 +0,0 @@
-PROPORTION = 1
-def _is(val):
-    '''Detects urls'''
-    a = 'http://' in val
-    b = 'www.' in val
-    c = any([x in val for x in ['.fr', '.com', '.org', '.gouv', '.net']])
-    d = not ('@' in val)
-    return (a or b or c) and d

csv_detective/detect_fields/other/uuid/__init__.py DELETED Viewed

@@ -1,11 +0,0 @@
-import re
-PROPORTION = 0.8
-def _is(val):
-    '''Detects UUIDs'''
-    return bool(re.match(
-        r'^[{]?[0-9a-fA-F]{8}' + '-?([0-9a-fA-F]{4}-?)' + '{3}[0-9a-fA-F]{12}[}]?$',
-        val
-    ))

csv_detective/detect_fields/temp/__init__.py DELETED Viewed

File without changes

csv_detective/detect_fields/temp/date/__init__.py DELETED Viewed

@@ -1,62 +0,0 @@
-import re
-from dateutil.parser import parse, ParserError
-from csv_detective.detect_fields.other.float import _is as is_float
-from unidecode import unidecode
-PROPORTION = 1
-# /!\ this is only for dates, not datetimes which are handled by other utils
-def is_dateutil_date(val: str) -> bool:
-    # we don't want to get datetimes here, so length restriction
-    # longest date string expected here is DD-septembre-YYYY, so 17 characters
-    if len(val) > 17:
-        return False
-    try:
-        res = parse(val, fuzzy=False)
-        if res.hour or res.minute or res.second:
-            return False
-        return True
-    except (ParserError, ValueError, TypeError, OverflowError):
-        return False
-def _is(val):
-    '''Renvoie True si val peut être une date, False sinon
-    On ne garde que les regex pour les cas où parse() ne convient pas'''
-    # matches 02/12 03 and 02_12 2003
-    a = bool(
-        re.match(
-            r'^(0[1-9]|[12][0-9]|3[01])[ -/_](0[1-9]|1[012])[ -/_]'
-            r'([0-9]{2}|(19|20)[0-9]{2}$)',
-            val
-        )
-    )
-    # matches 02052003
-    b = bool(
-        re.match(
-            r'^(0[1-9]|[12][0-9]|3[01])(0[1-9]|1[012])([0-9]{2}|'
-            r'(19|20){2}$)',
-            val
-        )
-    )
-    # matches JJ*MM*AAAA
-    c = bool(
-        re.match(
-            r'^(0[1-9]|[12][0-9]|3[01]).?(0[1-9]|1[012]).?(19|20)?\d\d$', val))
-    # matches JJ-mmm-AAAA and matches JJ-mmm...mm-AAAA
-    d = bool(
-        re.match(
-            r'^(0[1-9]|[12][0-9]|3[01])[ -/_;.:,](jan|fev|feb|mar|avr|apr'
-            r'|mai|may|jun|jui|jul|aou|aug|sep|oct|nov|dec|janvier|fevrier|mars|avril|'
-            r'mai|juin|jullet|aout|septembre|octobre|novembre|decembre)[ -/_;.:,]'
-            r'([0-9]{2}$|(19|20)[0-9]{2}$)',
-            unidecode(val)
-        )
-    )
-    return (is_dateutil_date(val) and not is_float(val)) or a or b or c or d

csv_detective/detect_fields/temp/datetime_iso/__init__.py DELETED Viewed

@@ -1,18 +0,0 @@
-import re
-PROPORTION = 1
-def _is(val):
-    '''Renvoie True si val peut être une date au format iso, False sinon
-    Exemple: 2023-01-15T12:30:45.123456Z'''
-    a = bool(
-        re.match(
-            r'^\d{4}-(0[1-9]|1[012])\-(0[1-9]|[12][0-9]|3[01])[Tt]'
-            r'([0-2])([0-9]):([0-5])([0-9]):([0-5])([0-9])'
-            r'(\.\d+)?([Zz]|[-+](0[0-9]|1[0-2]):[0-5][0-9])?$',
-            val
-        )
-    )
-    return a

csv_detective/detect_fields/temp/datetime_rfc822/__init__.py DELETED Viewed

@@ -1,21 +0,0 @@
-import re
-PROPORTION = 1
-def _is(val):
-    '''Renvoie True si val peut être une date au format rfc822, False sinon
-    Exemple: Tue, 19 Dec 2023 15:30:45 +0000'''
-    val = val.lower()
-    a = bool(
-        re.match(
-            r'^[A-Za-z]{3}, (0[1-9]|[1-2][0-9]|3[01]) [A-Za-z]{3} \d{4} '
-            r'([0-2])([0-9]):([0-5])([0-9]):([0-5])([0-9]) '
-            r'(ut|gmt|est|edt|cst|cdt|mst|mdt|pst|pdt|[+\-](0[0-9]|1[0-3])00)$',
-            val,
-            re.IGNORECASE
-        )
-    )
-    return a

csv_detective/detect_fields/temp/year/__init__.py DELETED Viewed

@@ -1,10 +0,0 @@
-PROPORTION = 1
-def _is(val):
-    '''Returns True if val can be a year'''
-    try:
-        val = int(val)
-    except ValueError:
-        return False
-    return (1800 <= val) and (val <= 2100)

csv_detective/detect_labels/FR/__init__.py DELETED Viewed

File without changes

csv_detective/detect_labels/FR/geo/__init__.py DELETED Viewed

File without changes

csv_detective/detect_labels/FR/geo/adresse/__init__.py DELETED Viewed

@@ -1,40 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'adresse',
-        'adresse postale',
-        'adresse geographique',
-        'adr',
-        'adresse complete',
-        'adresse station'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py DELETED Viewed

@@ -1,42 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'code commune insee',
-        'code insee',
-        'codes insee',
-        'code commune',
-        'code insee commune',
-        'insee',
-        'code com',
-        'com'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/FR/geo/code_departement/__init__.py DELETED Viewed

@@ -1,33 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    # 'dep': Possible confusion with dep name?
-    words_combinations_list = ['code departement', 'code_departement', 'dep', 'departement', 'dept']
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py DELETED Viewed

@@ -1,33 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = ['cadastre1', 'code fantoir', 'fantoir']
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/FR/geo/code_postal/__init__.py DELETED Viewed

@@ -1,41 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'code postal',
-        'postal code',
-        'postcode',
-        'post code',
-        'cp',
-        'codes postaux',
-        'location postcode'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/FR/geo/code_region/__init__.py DELETED Viewed

@@ -1,33 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    # 'reg' : possible confusion with region name?
-    words_combinations_list = ['code region', 'reg', 'code insee region', 'region']
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv-detective 0.6.7__py3-none-any.whl → 0.9.3.dev2438__py3-none-any.whl

csv-detective 0.6.7py3-none-any.whl → 0.9.3.dev2438py3-none-any.whl