PyPI - csv-detective - Versions diffs - 0.6.7__py3-none-any.whl → 0.9.3.dev2438__py3-none-any.whl - Mend

csv-detective 0.6.7py3-none-any.whl → 0.9.3.dev2438py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

csv_detective/__init__.py +7 -1
csv_detective/cli.py +33 -21
csv_detective/{detect_fields/FR → detection}/__init__.py +0 -0
csv_detective/detection/columns.py +89 -0
csv_detective/detection/encoding.py +29 -0
csv_detective/detection/engine.py +46 -0
csv_detective/detection/formats.py +156 -0
csv_detective/detection/headers.py +28 -0
csv_detective/detection/rows.py +18 -0
csv_detective/detection/separator.py +44 -0
csv_detective/detection/variables.py +97 -0
csv_detective/explore_csv.py +151 -377
csv_detective/format.py +67 -0
csv_detective/formats/__init__.py +9 -0
csv_detective/formats/adresse.py +116 -0
csv_detective/formats/binary.py +26 -0
csv_detective/formats/booleen.py +35 -0
csv_detective/formats/code_commune_insee.py +26 -0
csv_detective/formats/code_csp_insee.py +36 -0
csv_detective/formats/code_departement.py +29 -0
csv_detective/formats/code_fantoir.py +21 -0
csv_detective/formats/code_import.py +17 -0
csv_detective/formats/code_postal.py +25 -0
csv_detective/formats/code_region.py +22 -0
csv_detective/formats/code_rna.py +29 -0
csv_detective/formats/code_waldec.py +17 -0
csv_detective/formats/commune.py +27 -0
csv_detective/formats/csp_insee.py +31 -0
csv_detective/{detect_fields/FR/other/insee_ape700 → formats/data}/insee_ape700.txt +0 -0
csv_detective/formats/date.py +99 -0
csv_detective/formats/date_fr.py +22 -0
csv_detective/formats/datetime_aware.py +45 -0
csv_detective/formats/datetime_naive.py +48 -0
csv_detective/formats/datetime_rfc822.py +24 -0
csv_detective/formats/departement.py +37 -0
csv_detective/formats/email.py +28 -0
csv_detective/formats/float.py +29 -0
csv_detective/formats/geojson.py +36 -0
csv_detective/formats/insee_ape700.py +31 -0
csv_detective/formats/insee_canton.py +28 -0
csv_detective/formats/int.py +23 -0
csv_detective/formats/iso_country_code_alpha2.py +30 -0
csv_detective/formats/iso_country_code_alpha3.py +30 -0
csv_detective/formats/iso_country_code_numeric.py +31 -0
csv_detective/formats/jour_de_la_semaine.py +41 -0
csv_detective/formats/json.py +20 -0
csv_detective/formats/latitude_l93.py +48 -0
csv_detective/formats/latitude_wgs.py +42 -0
csv_detective/formats/latitude_wgs_fr_metropole.py +42 -0
csv_detective/formats/latlon_wgs.py +53 -0
csv_detective/formats/longitude_l93.py +39 -0
csv_detective/formats/longitude_wgs.py +32 -0
csv_detective/formats/longitude_wgs_fr_metropole.py +32 -0
csv_detective/formats/lonlat_wgs.py +36 -0
csv_detective/formats/mois_de_lannee.py +48 -0
csv_detective/formats/money.py +18 -0
csv_detective/formats/mongo_object_id.py +14 -0
csv_detective/formats/pays.py +35 -0
csv_detective/formats/percent.py +16 -0
csv_detective/formats/region.py +70 -0
csv_detective/formats/sexe.py +17 -0
csv_detective/formats/siren.py +37 -0
csv_detective/{detect_fields/FR/other/siret/__init__.py → formats/siret.py} +47 -29
csv_detective/formats/tel_fr.py +36 -0
csv_detective/formats/uai.py +36 -0
csv_detective/formats/url.py +46 -0
csv_detective/formats/username.py +14 -0
csv_detective/formats/uuid.py +16 -0
csv_detective/formats/year.py +28 -0
csv_detective/output/__init__.py +65 -0
csv_detective/output/dataframe.py +96 -0
csv_detective/output/example.py +250 -0
csv_detective/output/profile.py +119 -0
csv_detective/{schema_generation.py → output/schema.py} +268 -343
csv_detective/output/utils.py +74 -0
csv_detective/{detect_fields/FR/geo → parsing}/__init__.py +0 -0
csv_detective/parsing/columns.py +235 -0
csv_detective/parsing/compression.py +11 -0
csv_detective/parsing/csv.py +56 -0
csv_detective/parsing/excel.py +167 -0
csv_detective/parsing/load.py +111 -0
csv_detective/parsing/text.py +56 -0
csv_detective/utils.py +23 -196
csv_detective/validate.py +138 -0
csv_detective-0.9.3.dev2438.dist-info/METADATA +267 -0
csv_detective-0.9.3.dev2438.dist-info/RECORD +92 -0
csv_detective-0.9.3.dev2438.dist-info/WHEEL +4 -0
{csv_detective-0.6.7.dist-info → csv_detective-0.9.3.dev2438.dist-info}/entry_points.txt +1 -0
csv_detective/all_packages.txt +0 -104
csv_detective/detect_fields/FR/geo/adresse/__init__.py +0 -100
csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py +0 -24
csv_detective/detect_fields/FR/geo/code_commune_insee/code_commune_insee.txt +0 -37600
csv_detective/detect_fields/FR/geo/code_departement/__init__.py +0 -11
csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py +0 -15
csv_detective/detect_fields/FR/geo/code_fantoir/code_fantoir.txt +0 -26122
csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -19
csv_detective/detect_fields/FR/geo/code_postal/code_postal.txt +0 -36822
csv_detective/detect_fields/FR/geo/code_region/__init__.py +0 -27
csv_detective/detect_fields/FR/geo/commune/__init__.py +0 -21
csv_detective/detect_fields/FR/geo/commune/commune.txt +0 -36745
csv_detective/detect_fields/FR/geo/departement/__init__.py +0 -19
csv_detective/detect_fields/FR/geo/departement/departement.txt +0 -101
csv_detective/detect_fields/FR/geo/insee_canton/__init__.py +0 -20
csv_detective/detect_fields/FR/geo/insee_canton/canton2017.txt +0 -2055
csv_detective/detect_fields/FR/geo/insee_canton/cantons.txt +0 -2055
csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +0 -13
csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -13
csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +0 -13
csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -13
csv_detective/detect_fields/FR/geo/pays/__init__.py +0 -17
csv_detective/detect_fields/FR/geo/pays/pays.txt +0 -248
csv_detective/detect_fields/FR/geo/region/__init__.py +0 -16
csv_detective/detect_fields/FR/geo/region/region.txt +0 -44
csv_detective/detect_fields/FR/other/__init__.py +0 -0
csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +0 -26
csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt +0 -498
csv_detective/detect_fields/FR/other/code_rna/__init__.py +0 -8
csv_detective/detect_fields/FR/other/code_waldec/__init__.py +0 -12
csv_detective/detect_fields/FR/other/csp_insee/__init__.py +0 -16
csv_detective/detect_fields/FR/other/date_fr/__init__.py +0 -12
csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +0 -16
csv_detective/detect_fields/FR/other/sexe/__init__.py +0 -9
csv_detective/detect_fields/FR/other/siren/__init__.py +0 -18
csv_detective/detect_fields/FR/other/tel_fr/__init__.py +0 -15
csv_detective/detect_fields/FR/other/uai/__init__.py +0 -15
csv_detective/detect_fields/FR/temp/__init__.py +0 -0
csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py +0 -23
csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py +0 -37
csv_detective/detect_fields/__init__.py +0 -57
csv_detective/detect_fields/geo/__init__.py +0 -0
csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +0 -15
csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +0 -14
csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +0 -15
csv_detective/detect_fields/geo/json_geojson/__init__.py +0 -22
csv_detective/detect_fields/geo/latitude_wgs/__init__.py +0 -13
csv_detective/detect_fields/geo/latlon_wgs/__init__.py +0 -15
csv_detective/detect_fields/geo/longitude_wgs/__init__.py +0 -13
csv_detective/detect_fields/other/__init__.py +0 -0
csv_detective/detect_fields/other/booleen/__init__.py +0 -21
csv_detective/detect_fields/other/email/__init__.py +0 -8
csv_detective/detect_fields/other/float/__init__.py +0 -17
csv_detective/detect_fields/other/int/__init__.py +0 -12
csv_detective/detect_fields/other/json/__init__.py +0 -24
csv_detective/detect_fields/other/mongo_object_id/__init__.py +0 -8
csv_detective/detect_fields/other/twitter/__init__.py +0 -8
csv_detective/detect_fields/other/url/__init__.py +0 -11
csv_detective/detect_fields/other/uuid/__init__.py +0 -11
csv_detective/detect_fields/temp/__init__.py +0 -0
csv_detective/detect_fields/temp/date/__init__.py +0 -62
csv_detective/detect_fields/temp/datetime_iso/__init__.py +0 -18
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py +0 -21
csv_detective/detect_fields/temp/year/__init__.py +0 -10
csv_detective/detect_labels/FR/__init__.py +0 -0
csv_detective/detect_labels/FR/geo/__init__.py +0 -0
csv_detective/detect_labels/FR/geo/adresse/__init__.py +0 -40
csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +0 -42
csv_detective/detect_labels/FR/geo/code_departement/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/code_postal/__init__.py +0 -41
csv_detective/detect_labels/FR/geo/code_region/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/commune/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/departement/__init__.py +0 -47
csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +0 -33
csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +0 -54
csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -55
csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +0 -44
csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -45
csv_detective/detect_labels/FR/geo/pays/__init__.py +0 -45
csv_detective/detect_labels/FR/geo/region/__init__.py +0 -45
csv_detective/detect_labels/FR/other/__init__.py +0 -0
csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +0 -33
csv_detective/detect_labels/FR/other/code_rna/__init__.py +0 -38
csv_detective/detect_labels/FR/other/code_waldec/__init__.py +0 -33
csv_detective/detect_labels/FR/other/csp_insee/__init__.py +0 -37
csv_detective/detect_labels/FR/other/date_fr/__init__.py +0 -33
csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +0 -40
csv_detective/detect_labels/FR/other/sexe/__init__.py +0 -33
csv_detective/detect_labels/FR/other/siren/__init__.py +0 -41
csv_detective/detect_labels/FR/other/siret/__init__.py +0 -40
csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -45
csv_detective/detect_labels/FR/other/uai/__init__.py +0 -50
csv_detective/detect_labels/FR/temp/__init__.py +0 -0
csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +0 -41
csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +0 -33
csv_detective/detect_labels/__init__.py +0 -43
csv_detective/detect_labels/geo/__init__.py +0 -0
csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +0 -41
csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +0 -41
csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +0 -41
csv_detective/detect_labels/geo/json_geojson/__init__.py +0 -42
csv_detective/detect_labels/geo/latitude_wgs/__init__.py +0 -55
csv_detective/detect_labels/geo/latlon_wgs/__init__.py +0 -67
csv_detective/detect_labels/geo/longitude_wgs/__init__.py +0 -45
csv_detective/detect_labels/other/__init__.py +0 -0
csv_detective/detect_labels/other/booleen/__init__.py +0 -34
csv_detective/detect_labels/other/email/__init__.py +0 -45
csv_detective/detect_labels/other/float/__init__.py +0 -33
csv_detective/detect_labels/other/int/__init__.py +0 -33
csv_detective/detect_labels/other/money/__init__.py +0 -11
csv_detective/detect_labels/other/money/check_col_name.py +0 -8
csv_detective/detect_labels/other/mongo_object_id/__init__.py +0 -33
csv_detective/detect_labels/other/twitter/__init__.py +0 -33
csv_detective/detect_labels/other/url/__init__.py +0 -48
csv_detective/detect_labels/other/uuid/__init__.py +0 -33
csv_detective/detect_labels/temp/__init__.py +0 -0
csv_detective/detect_labels/temp/date/__init__.py +0 -51
csv_detective/detect_labels/temp/datetime_iso/__init__.py +0 -45
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +0 -44
csv_detective/detect_labels/temp/year/__init__.py +0 -44
csv_detective/detection.py +0 -361
csv_detective/process_text.py +0 -39
csv_detective/s3_utils.py +0 -48
csv_detective-0.6.7.data/data/share/csv_detective/CHANGELOG.md +0 -118
csv_detective-0.6.7.data/data/share/csv_detective/LICENSE.AGPL.txt +0 -661
csv_detective-0.6.7.data/data/share/csv_detective/README.md +0 -247
csv_detective-0.6.7.dist-info/LICENSE.AGPL.txt +0 -661
csv_detective-0.6.7.dist-info/METADATA +0 -23
csv_detective-0.6.7.dist-info/RECORD +0 -150
csv_detective-0.6.7.dist-info/WHEEL +0 -5
csv_detective-0.6.7.dist-info/top_level.txt +0 -2
tests/__init__.py +0 -0
tests/test_fields.py +0 -360
tests/test_file.py +0 -116
tests/test_labels.py +0 -7
/csv_detective/{detect_fields/FR/other/csp_insee → formats/data}/csp_insee.txt +0 -0
/csv_detective/{detect_fields/geo/iso_country_code_alpha2 → formats/data}/iso_country_code_alpha2.txt +0 -0
/csv_detective/{detect_fields/geo/iso_country_code_alpha3 → formats/data}/iso_country_code_alpha3.txt +0 -0
/csv_detective/{detect_fields/geo/iso_country_code_numeric → formats/data}/iso_country_code_numeric.txt +0 -0

csv_detective/detect_labels/FR/other/siren/__init__.py DELETED Viewed

@@ -1,41 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'siren',
-        'siren organisme designe',
-        'siren organisme designant',
-        'n° siren',
-        'siren organisme',
-        'siren titulaire',
-        'numero siren'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/FR/other/siret/__init__.py DELETED Viewed

@@ -1,40 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'siret',
-        'siret d',
-        'num siret',
-        'siretacheteur',
-        'n° siret',
-        'coll siret'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/FR/other/tel_fr/__init__.py DELETED Viewed

@@ -1,45 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'telephone',
-        'tel',
-        'tel1',
-        'tel2',
-        'phone',
-        'num tel',
-        'tel mob',
-        'telephone sav',
-        'telephone1',
-        'coordinates.phone',
-        'telephone du lieu'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/FR/other/uai/__init__.py DELETED Viewed

@@ -1,50 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'uai',
-        'code etablissement',
-        'code uai',
-        'uai - identifiant',
-        'numero uai',
-        'rne',
-        "numero de l'etablissement",
-        'code rne',
-        'codeetab',
-        "code uai de l'etablissement",
-        'ref uai',
-        'cd rne',
-        'numerouai',
-        'numero d etablissement',
-        'code etablissement',
-        'numero etablissement'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/FR/temp/__init__.py DELETED Viewed

File without changes

csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py DELETED Viewed

@@ -1,41 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'jour semaine',
-        'type jour',
-        'jour de la semaine',
-        'saufjour',
-        'nomjour',
-        'jour',
-        'jour de fermeture'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py DELETED Viewed

@@ -1,33 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = ['mois de annee', 'mois']
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/__init__.py DELETED Viewed

@@ -1,43 +0,0 @@
-# flake8: noqa
-from .FR.geo import (
-    adresse,
-    code_commune_insee,
-    code_departement,
-    code_fantoir,
-    code_postal,
-    code_region,
-    commune,
-    departement,
-    insee_canton,
-    latitude_l93,
-    latitude_wgs_fr_metropole,
-    longitude_l93,
-    longitude_wgs_fr_metropole,
-    pays,
-    region
-)
-from .FR.other import (
-    code_csp_insee,
-    code_rna,
-    code_waldec,
-    csp_insee,
-    date_fr,
-    insee_ape700,
-    sexe,
-    siren,
-    siret,
-    tel_fr,
-    uai
-)
-from .FR.temp import jour_de_la_semaine, mois_de_annee
-from .geo import (
-    iso_country_code_alpha2,
-    iso_country_code_alpha3,
-    iso_country_code_numeric,
-    json_geojson,
-    latitude_wgs,
-    latlon_wgs,
-    longitude_wgs
-)
-from .other import booleen, email, float, int, money, mongo_object_id, twitter, url, uuid
-from .temp import date, datetime_iso, datetime_rfc822, year

csv_detective/detect_labels/geo/__init__.py DELETED Viewed

File without changes

csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py DELETED Viewed

@@ -1,41 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'iso country code',
-        'code pays',
-        'pays',
-        'country',
-        'nation',
-        'pays code',
-        'code pays (iso)'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py DELETED Viewed

@@ -1,41 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'iso country code',
-        'code pays',
-        'pays',
-        'country',
-        'nation',
-        'pays code',
-        'code pays (iso)'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py DELETED Viewed

@@ -1,41 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'iso country code',
-        'code pays',
-        'pays',
-        'country',
-        'nation',
-        'pays code',
-        'code pays (iso)'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/geo/json_geojson/__init__.py DELETED Viewed

@@ -1,42 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'json geojson',
-        'json',
-        'geojson',
-        'geo shape',
-        'geom',
-        'geometry',
-        'geo shape',
-        'geoshape'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/geo/latitude_wgs/__init__.py DELETED Viewed

@@ -1,55 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'latitude',
-        'lat',
-        'y',
-        'yf',
-        'yd',
-        'coordonnee y',
-        'coord y',
-        'ycoord',
-        'geocodage y gps',
-        'location latitude',
-        'ylatitude',
-        'ylat',
-        'latitude (y)',
-        'latitudeorg',
-        'coordinates.latitude',
-        'googlemap latitude',
-        'latitudelieu',
-        'latitude googlemap',
-        'latitude wgs84',
-        'y wgs84',
-        'latitude (wgs84)'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/geo/latlon_wgs/__init__.py DELETED Viewed

@@ -1,67 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    words_combinations_list = [
-        'latlon wgs',
-        'latlon',
-        'geo point',
-        'geo point 2d',
-        'wgs84',
-        'geolocalisation',
-        'geo',
-        'coordonnees finales',
-        'coordonnees',
-        'coordonnees ban',
-        'xy',
-        'geometry x y',
-        'coordonnees insee',
-        'coordonnees geographiques',
-        'position',
-        'coordonnes gps',
-        'geopoint',
-        'geom x y',
-        'coord gps',
-        'latlong',
-        'position geographique',
-        'c geo',
-        'coordonnes geoloc',
-        'lat lon',
-        'code geo',
-        'geo localisation',
-        'coordonnes geo',
-        'geo cp',
-        'x y',
-        'geo coordinates',
-        'point geo',
-        'point geo insee',
-        'coordonnees geoloc',
-        'coordonnees xy'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/geo/longitude_wgs/__init__.py DELETED Viewed

@@ -1,45 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    #  Does not detect CRS
-    words_combinations_list = [
-        'longitude',
-        'lon',
-        'long',
-        'geocodage x gps',
-        'location longitude',
-        'xlongitude',
-        'lng',
-        'xlong',
-        'x',
-        'xf',
-        'xd'
-    ]
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv_detective/detect_labels/other/__init__.py DELETED Viewed

File without changes

csv_detective/detect_labels/other/booleen/__init__.py DELETED Viewed

@@ -1,34 +0,0 @@
-from csv_detective.utils import full_word_strictly_inside_string
-from csv_detective.process_text import _process_text
-PROPORTION = 0.5
-def _is(header):
-    '''
-    Returns 1 if the (processed) header matches one of the expected words combination,
-    else 0
-    '''
-    #  Not relevant to make it match with specific words (find other rules)
-    words_combinations_list = []
-    processed_header = _process_text(header)
-    header_matches_words_combination = float(
-        any(
-            [
-                words_combination == processed_header for words_combination in words_combinations_list
-            ]
-        )
-    )
-    words_combination_in_header = 0.5 * float(
-        any(
-            [
-                full_word_strictly_inside_string(
-                    words_combination, processed_header
-                ) for words_combination in words_combinations_list
-            ]
-        )
-    )
-    return max(header_matches_words_combination, words_combination_in_header)

csv-detective 0.6.7__py3-none-any.whl → 0.9.3.dev2438__py3-none-any.whl

csv-detective 0.6.7py3-none-any.whl → 0.9.3.dev2438py3-none-any.whl