PyPI - csv-detective - Versions diffs - 0.8.1.dev1674__py3-none-any.whl → 0.8.1.dev1720__py3-none-any.whl - Mend

csv-detective 0.8.1.dev1674py3-none-any.whl → 0.8.1.dev1720py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

csv_detective/__init__.py CHANGED Viewed

@@ -1,4 +1,2 @@
 from csv_detective.explore_csv import routine, routine_minio, validate_then_detect  # noqa
 from csv_detective.output.example import create_example_csv_file  # noqa
-__version__ = '0.8.1.dev'

csv_detective/cli.py CHANGED Viewed

@@ -4,23 +4,20 @@ Command line client for csv_detective
 import argparse
 import json
 from csv_detective.explore_csv import routine
 def run():
     explorer = argparse.ArgumentParser(description="Analyse a tabular file")
-    explorer.add_argument(
-        "file_path",
-        type=str,
-        help="Enter path of tabular file to explore"
-    )
+    explorer.add_argument("file_path", type=str, help="Enter path of tabular file to explore")
     explorer.add_argument(
         "-n",
         "--num_rows",
         dest="num_rows",
         type=int,
         nargs="?",
-        help="Number of rows to use for detection (default 500)"
+        help="Number of rows to use for detection (default 500)",
     )
     explorer.add_argument(
         "-s",
@@ -28,14 +25,14 @@ def run():
         dest="sep",
         type=str,
         nargs="?",
-        help="Columns separator (detected if not specified)"
+        help="Columns separator (detected if not specified)",
     )
     explorer.add_argument(
         "--save",
         dest="save_results",
         type=int,
         nargs="?",
-        help="Whether to save the resulting analysis to json (1 = save, 0 = don't)"
+        help="Whether to save the resulting analysis to json (1 = save, 0 = don't)",
     )
     explorer.add_argument(
         "-v",
@@ -43,7 +40,7 @@ def run():
         dest="verbose",
         type=int,
         nargs="?",
-        help="Verbose (0 = quiet, 1 = details)"
+        help="Verbose (0 = quiet, 1 = details)",
     )
     opts = explorer.parse_args()

csv_detective/detect_fields/FR/geo/adresse/__init__.py CHANGED Viewed

@@ -3,97 +3,97 @@ from csv_detective.parsing.text import _process_text
 PROPORTION = 0.55
 # ajouts d'espaces en fin de mots pour s'assurer que le str n'est pas juste une substr d'un mot plus long
 voies = {
-    'aire ',
-    'allee ',
-    'avenue ',
-    'base ',
-    'boulevard ',
-    'cami ',
-    'carrefour ',
-    'chemin ',
-    'cheminement ',
-    'chaussee ',
-    'cite ',
-    'clos ',
-    'coin ',
-    'corniche ',
-    'cote ',
-    'cour ',
-    'cours ',
-    'domaine ',
-    'descente ',
-    'ecart ',
-    'esplanade ',
-    'faubourg ',
-    'gare ',
-    'grande rue',
-    'hameau ',
-    'halle ',
-    'ilot ',
-    'impasse ',
-    'lieu dit',
-    'lotissement ',
-    'marche ',
-    'montee ',
-    'parc ',
-    'passage ',
-    'place ',
-    'plan ',
-    'plaine ',
-    'plateau ',
-    'pont ',
-    'port ',
-    'promenade ',
-    'parvis ',
-    'quartier ',
-    'quai ',
-    'residence ',
-    'ruelle ',
-    'rocade ',
-    'rond point',
-    'route ',
-    'rue ',
+    "aire ",
+    "allee ",
+    "avenue ",
+    "base ",
+    "boulevard ",
+    "cami ",
+    "carrefour ",
+    "chemin ",
+    "cheminement ",
+    "chaussee ",
+    "cite ",
+    "clos ",
+    "coin ",
+    "corniche ",
+    "cote ",
+    "cour ",
+    "cours ",
+    "domaine ",
+    "descente ",
+    "ecart ",
+    "esplanade ",
+    "faubourg ",
+    "gare ",
+    "grande rue",
+    "hameau ",
+    "halle ",
+    "ilot ",
+    "impasse ",
+    "lieu dit",
+    "lotissement ",
+    "marche ",
+    "montee ",
+    "parc ",
+    "passage ",
+    "place ",
+    "plan ",
+    "plaine ",
+    "plateau ",
+    "pont ",
+    "port ",
+    "promenade ",
+    "parvis ",
+    "quartier ",
+    "quai ",
+    "residence ",
+    "ruelle ",
+    "rocade ",
+    "rond point",
+    "route ",
+    "rue ",
     # 'sente - sentier',
-    'square ',
-    'tour ',
+    "square ",
+    "tour ",
     # 'terre-plein',
-    'traverse ',
-    'villa ',
-    'village ',
-    'voie ',
-    'zone artisanale',
-    'zone d’amenagement concerte',
-    'zone d’amenagement differe',
-    'zone industrielle',
-    'zone ',
+    "traverse ",
+    "villa ",
+    "village ",
+    "voie ",
+    "zone artisanale",
+    "zone d’amenagement concerte",
+    "zone d’amenagement differe",
+    "zone industrielle",
+    "zone ",
     # 'r',
-    'av ',
-    'pl ',
-    'bd ',
-    'cami ',
+    "av ",
+    "pl ",
+    "bd ",
+    "cami ",
     # 'che',
-    'chs ',
-    'dom ',
-    'ham ',
-    'ld ',
+    "chs ",
+    "dom ",
+    "ham ",
+    "ld ",
     # 'pro',
     # 'rte',
-    'vlge ',
-    'za ',
-    'zac ',
-    'zad ',
-    'zi ',
+    "vlge ",
+    "za ",
+    "zac ",
+    "zad ",
+    "zi ",
     # 'car',
-    'fg ',
+    "fg ",
     # 'lot',
-    'imp ',
+    "imp ",
     # 'qu',
-    'mte'
+    "mte",
 }
 def _is(val):
-    '''Repere des adresses'''
+    """Repere des adresses"""
     if not isinstance(val, str) or len(val) > 150:
         return False
     val = _process_text(val)

csv_detective/detect_fields/FR/geo/code_departement/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from frformat import NumeroDepartement, Options, Millesime
+from frformat import Millesime, NumeroDepartement, Options
 PROPORTION = 1
@@ -6,7 +6,7 @@ _options = Options(
     ignore_case=True,
     ignore_accents=True,
     replace_non_alphanumeric_with_space=True,
-    ignore_extra_whitespace=True
+    ignore_extra_whitespace=True,
 )
 _numero_departement = NumeroDepartement(Millesime.LATEST, _options)

csv_detective/detect_fields/FR/geo/code_postal/__init__.py CHANGED Viewed

@@ -6,5 +6,4 @@ _code_postal = CodePostal()
 def _is(val):
     return _code_postal.is_valid(val)

csv_detective/detect_fields/FR/geo/code_region/__init__.py CHANGED Viewed

@@ -6,5 +6,5 @@ _code_region = CodeRegion(Millesime.LATEST)
 def _is(val):
-    '''Renvoie True si val peut être un code_région, False sinon'''
+    """Renvoie True si val peut être un code_région, False sinon"""
     return isinstance(val, str) and _code_region.is_valid(val)

csv_detective/detect_fields/FR/geo/commune/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from frformat import Commune, Options, Millesime
+from frformat import Commune, Millesime, Options
 PROPORTION = 0.9
@@ -6,7 +6,7 @@ _options = Options(
     ignore_case=True,
     ignore_accents=True,
     replace_non_alphanumeric_with_space=True,
-    ignore_extra_whitespace=True
+    ignore_extra_whitespace=True,
 )
 _commune = Commune(Millesime.LATEST, _options)

csv_detective/detect_fields/FR/geo/departement/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from frformat import Departement, Options, Millesime
+from frformat import Departement, Millesime, Options
 PROPORTION = 0.9
@@ -6,7 +6,7 @@ _options = Options(
     ignore_case=True,
     ignore_accents=True,
     replace_non_alphanumeric_with_space=True,
-    ignore_extra_whitespace=True
+    ignore_extra_whitespace=True,
 )
 _departement = Departement(Millesime.LATEST, _options)

csv_detective/detect_fields/FR/geo/insee_canton/__init__.py CHANGED Viewed

@@ -1,11 +1,11 @@
-from frformat import Canton, Options, Millesime
+from frformat import Canton, Millesime, Options
 PROPORTION = 0.9
 _options = Options(
     ignore_case=True,
     ignore_accents=True,
     replace_non_alphanumeric_with_space=True,
-    ignore_extra_whitespace=True
+    ignore_extra_whitespace=True,
 )
 _canton = Canton(Millesime.LATEST, _options)

csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py CHANGED Viewed

@@ -1,9 +1,8 @@
 from frformat import LatitudeL93
-from csv_detective.detect_fields.other.float import _is as is_float
+from csv_detective.detect_fields.other.float import _is as is_float
 from csv_detective.detect_fields.other.float import float_casting
 PROPORTION = 0.9
 _latitudel93 = LatitudeL93()

csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ PROPORTION = 0.9
 def _is(val):
-    '''Renvoie True si val peut etre une latitude en métropole'''
+    """Renvoie True si val peut etre une latitude en métropole"""
     try:
         return is_float(val) and float(val) >= 41.3 and float(val) <= 51.3
     except ValueError:

csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py CHANGED Viewed

@@ -1,9 +1,8 @@
 from frformat import LongitudeL93
-from csv_detective.detect_fields.other.float import _is as is_float
+from csv_detective.detect_fields.other.float import _is as is_float
 from csv_detective.detect_fields.other.float import float_casting
 PROPORTION = 0.9
 _longitudel93 = LongitudeL93()

csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ PROPORTION = 0.9
 def _is(val):
-    '''Renvoie True si val peut etre une longitude en métropole'''
+    """Renvoie True si val peut etre une longitude en métropole"""
     try:
         return is_float(val) and float(val) >= -5.5 and float(val) <= 9.8
     except ValueError:

csv_detective/detect_fields/FR/geo/pays/__init__.py CHANGED Viewed

@@ -1,13 +1,13 @@
-from frformat import Pays, Options, Millesime
+from frformat import Millesime, Options, Pays
 PROPORTION = 0.6
 _options = Options(
-        ignore_case=True,
-        ignore_accents=True,
-        replace_non_alphanumeric_with_space=True,
-        ignore_extra_whitespace=True
-    )
+    ignore_case=True,
+    ignore_accents=True,
+    replace_non_alphanumeric_with_space=True,
+    ignore_extra_whitespace=True,
+)
 _pays = Pays(Millesime.LATEST, _options)

csv_detective/detect_fields/FR/geo/region/__init__.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from frformat import Region, Options, Millesime
+from frformat import Millesime, Options, Region
 PROPORTION = 1
-_extra_valid_values_set = frozenset({
+_extra_valid_values_set = frozenset(
+    {
         "alsace",
         "aquitaine",
         "ara",
@@ -30,7 +31,8 @@ _extra_valid_values_set = frozenset({
         "poitou charentes",
         "reunion",
         "rhone alpes",
-        })
+    }
+)
 _options = Options(
@@ -38,7 +40,7 @@ _options = Options(
     ignore_accents=True,
     replace_non_alphanumeric_with_space=True,
     ignore_extra_whitespace=True,
-    extra_valid_values=_extra_valid_values_set
+    extra_valid_values=_extra_valid_values_set,
 )
 _region = Region(Millesime.LATEST, _options)

csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py CHANGED Viewed

@@ -1,28 +1,29 @@
-from csv_detective.parsing.text import _process_text
 import re
+from csv_detective.parsing.text import _process_text
 PROPORTION = 1
 def _is(val):
-    '''Repère les code csp telles que définies par l'INSEE'''
+    """Repère les code csp telles que définies par l'INSEE"""
     if not isinstance(val, str):
         return False
     val = _process_text(val)
     if len(val) != 4:
         return False
-    a = bool(re.match(r'^[123456][0-9]{2}[abcdefghijkl]$', val))
+    a = bool(re.match(r"^[123456][0-9]{2}[abcdefghijkl]$", val))
     b = val in {
-        '7100',
-        '7200',
-        '7400',
-        '7500',
-        '7700',
-        '7800',
-        '8100',
-        '8300',
-        '8400',
-        '8500',
-        '8600'
+        "7100",
+        "7200",
+        "7400",
+        "7500",
+        "7700",
+        "7800",
+        "8100",
+        "8300",
+        "8400",
+        "8500",
+        "8600",
     }
     return a or b

csv_detective/detect_fields/FR/other/csp_insee/__init__.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from os.path import dirname, join
 from csv_detective.parsing.text import _process_text
 PROPORTION = 1
-f = open(join(dirname(__file__), 'csp_insee.txt'), 'r')
-codes_insee = f.read().split('\n')
+f = open(join(dirname(__file__), "csp_insee.txt"), "r")
+codes_insee = f.read().split("\n")
 # removing empty str due to additionnal line in file
 del codes_insee[-1]
 codes_insee = set(codes_insee)
@@ -11,7 +12,7 @@ f.close()
 def _is(val):
-    '''Repère les csp telles que définies par l'INSEE'''
+    """Repère les csp telles que définies par l'INSEE"""
     if not isinstance(val, str):
         return False
     val = _process_text(val)

csv_detective/detect_fields/FR/other/date_fr/__init__.py CHANGED Viewed

@@ -2,11 +2,11 @@ import re
 PROPORTION = 1
 regex = (
-    r'^\d{1,2}[ \-](janvier|fevrier|mars|avril|mai|juin|juillet|aout|septembre'
-    r'|octobre|novembre|decembre)[ \-]\d{4}$'
+    r"^\d{1,2}[ \-](janvier|fevrier|mars|avril|mai|juin|juillet|aout|septembre"
+    r"|octobre|novembre|decembre)[ \-]\d{4}$"
 )
 def _is(val):
-    '''Repere les dates textuelles FR'''
+    """Repere les dates textuelles FR"""
     return isinstance(val, str) and bool(re.match(regex, val))

csv_detective/detect_fields/FR/other/insee_ape700/__init__.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from os.path import dirname, join
 from csv_detective.parsing.text import _process_text
 PROPORTION = 1
-f = open(join(dirname(__file__), 'insee_ape700.txt'), 'r')
-condes_insee_ape = f.read().split('\n')
+f = open(join(dirname(__file__), "insee_ape700.txt"), "r")
+condes_insee_ape = f.read().split("\n")
 # removing empty str due to additionnal line in file
 del condes_insee_ape[-1]
 condes_insee_ape = set(condes_insee_ape)
@@ -11,7 +12,7 @@ f.close()
 def _is(val):
-    '''Repère les codes APE700 de l'INSEE'''
+    """Repère les codes APE700 de l'INSEE"""
     if not isinstance(val, str):
         return False
     val = _process_text(val).upper()

csv_detective/detect_fields/FR/other/sexe/__init__.py CHANGED Viewed

@@ -4,8 +4,8 @@ PROPORTION = 1
 def _is(val):
-    '''Repère le sexe'''
+    """Repère le sexe"""
     if not isinstance(val, str):
         return False
     val = _process_text(val)
-    return val in {'homme', 'femme', 'h', 'f', 'm', 'masculin', 'feminin'}
+    return val in {"homme", "femme", "h", "f", "m", "masculin", "feminin"}

csv_detective/detect_fields/FR/other/siren/__init__.py CHANGED Viewed

@@ -4,11 +4,11 @@ PROPORTION = 0.9
 def _is(val):
-    '''Repere les codes SIREN'''
+    """Repere les codes SIREN"""
     if not isinstance(val, str):
         return False
-    val = val.replace(' ', '')
-    if not bool(re.match(r'^[0-9]{9}$', val)):
+    val = val.replace(" ", "")
+    if not bool(re.match(r"^[0-9]{9}$", val)):
         return False
     # Vérification par clé propre aux codes siren
     cle = 0

csv_detective/detect_fields/FR/other/siret/__init__.py CHANGED Viewed

@@ -4,11 +4,11 @@ PROPORTION = 0.8
 def _is(val):
-    '''Détection des identifiants SIRET (SIRENE)'''
+    """Détection des identifiants SIRET (SIRENE)"""
     if not isinstance(val, str):
         return False
-    val = val.replace(' ', '')
-    if not bool(re.match(r'^[0-9]{14}$', val)):
+    val = val.replace(" ", "")
+    if not bool(re.match(r"^[0-9]{14}$", val)):
         return False
     # Vérification par clé de luhn du SIREN

csv_detective/detect_fields/FR/other/tel_fr/__init__.py CHANGED Viewed

@@ -4,14 +4,14 @@ PROPORTION = 0.7
 def _is(val):
-    '''Repère les numeros de telephone francais'''
+    """Repère les numeros de telephone francais"""
     if not isinstance(val, str):
         return False
     if len(val) < 10:
         return False
-    val = val.replace('.', '').replace('-', '').replace(' ', '')
+    val = val.replace(".", "").replace("-", "").replace(" ", "")
-    match_1 = bool(re.match(r'^(0|\+33|0033)?[0-9]{9}$', val))
+    match_1 = bool(re.match(r"^(0|\+33|0033)?[0-9]{9}$", val))
     return match_1

csv_detective/detect_fields/FR/other/uai/__init__.py CHANGED Viewed

@@ -4,12 +4,12 @@ PROPORTION = 1
 def _is(val):
-    '''Repere les codes UAI de l'éducation nationale'''
+    """Repere les codes UAI de l'éducation nationale"""
     # test sur la longueur
     if not isinstance(val, str) or len(val) != 8:
         return False
-    if not bool(re.match(r'^(0[0-8][0-9]|09[0-5]|9[78][0-9]|[67]20)[0-9]{4}[A-Z]$', val)):
+    if not bool(re.match(r"^(0[0-8][0-9]|09[0-5]|9[78][0-9]|[67]20)[0-9]{4}[A-Z]$", val)):
         return False
     return True

csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py CHANGED Viewed

@@ -1,24 +1,24 @@
 PROPORTION = 1
 jours = {
-    'lundi',
-    'mardi',
-    'mercredi',
-    'jeudi',
-    'vendredi',
-    'samedi',
-    'dimanche',
-    'lun',
-    'mar',
-    'mer',
-    'jeu',
-    'ven',
-    'sam',
-    'dim'
+    "lundi",
+    "mardi",
+    "mercredi",
+    "jeudi",
+    "vendredi",
+    "samedi",
+    "dimanche",
+    "lun",
+    "mar",
+    "mer",
+    "jeu",
+    "ven",
+    "sam",
+    "dim",
 }
 def _is(val):
-    '''Renvoie True si les champs peuvent être des jours de la semaine'''
+    """Renvoie True si les champs peuvent être des jours de la semaine"""
     if not isinstance(val, str):
         return False
     val = val.lower()

csv-detective 0.8.1.dev1674__py3-none-any.whl → 0.8.1.dev1720__py3-none-any.whl

csv-detective 0.8.1.dev1674py3-none-any.whl → 0.8.1.dev1720py3-none-any.whl