csv-detective 0.6.7__py3-none-any.whl → 0.9.3.dev2438__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/__init__.py +7 -1
- csv_detective/cli.py +33 -21
- csv_detective/{detect_fields/FR → detection}/__init__.py +0 -0
- csv_detective/detection/columns.py +89 -0
- csv_detective/detection/encoding.py +29 -0
- csv_detective/detection/engine.py +46 -0
- csv_detective/detection/formats.py +156 -0
- csv_detective/detection/headers.py +28 -0
- csv_detective/detection/rows.py +18 -0
- csv_detective/detection/separator.py +44 -0
- csv_detective/detection/variables.py +97 -0
- csv_detective/explore_csv.py +151 -377
- csv_detective/format.py +67 -0
- csv_detective/formats/__init__.py +9 -0
- csv_detective/formats/adresse.py +116 -0
- csv_detective/formats/binary.py +26 -0
- csv_detective/formats/booleen.py +35 -0
- csv_detective/formats/code_commune_insee.py +26 -0
- csv_detective/formats/code_csp_insee.py +36 -0
- csv_detective/formats/code_departement.py +29 -0
- csv_detective/formats/code_fantoir.py +21 -0
- csv_detective/formats/code_import.py +17 -0
- csv_detective/formats/code_postal.py +25 -0
- csv_detective/formats/code_region.py +22 -0
- csv_detective/formats/code_rna.py +29 -0
- csv_detective/formats/code_waldec.py +17 -0
- csv_detective/formats/commune.py +27 -0
- csv_detective/formats/csp_insee.py +31 -0
- csv_detective/{detect_fields/FR/other/insee_ape700 → formats/data}/insee_ape700.txt +0 -0
- csv_detective/formats/date.py +99 -0
- csv_detective/formats/date_fr.py +22 -0
- csv_detective/formats/datetime_aware.py +45 -0
- csv_detective/formats/datetime_naive.py +48 -0
- csv_detective/formats/datetime_rfc822.py +24 -0
- csv_detective/formats/departement.py +37 -0
- csv_detective/formats/email.py +28 -0
- csv_detective/formats/float.py +29 -0
- csv_detective/formats/geojson.py +36 -0
- csv_detective/formats/insee_ape700.py +31 -0
- csv_detective/formats/insee_canton.py +28 -0
- csv_detective/formats/int.py +23 -0
- csv_detective/formats/iso_country_code_alpha2.py +30 -0
- csv_detective/formats/iso_country_code_alpha3.py +30 -0
- csv_detective/formats/iso_country_code_numeric.py +31 -0
- csv_detective/formats/jour_de_la_semaine.py +41 -0
- csv_detective/formats/json.py +20 -0
- csv_detective/formats/latitude_l93.py +48 -0
- csv_detective/formats/latitude_wgs.py +42 -0
- csv_detective/formats/latitude_wgs_fr_metropole.py +42 -0
- csv_detective/formats/latlon_wgs.py +53 -0
- csv_detective/formats/longitude_l93.py +39 -0
- csv_detective/formats/longitude_wgs.py +32 -0
- csv_detective/formats/longitude_wgs_fr_metropole.py +32 -0
- csv_detective/formats/lonlat_wgs.py +36 -0
- csv_detective/formats/mois_de_lannee.py +48 -0
- csv_detective/formats/money.py +18 -0
- csv_detective/formats/mongo_object_id.py +14 -0
- csv_detective/formats/pays.py +35 -0
- csv_detective/formats/percent.py +16 -0
- csv_detective/formats/region.py +70 -0
- csv_detective/formats/sexe.py +17 -0
- csv_detective/formats/siren.py +37 -0
- csv_detective/{detect_fields/FR/other/siret/__init__.py → formats/siret.py} +47 -29
- csv_detective/formats/tel_fr.py +36 -0
- csv_detective/formats/uai.py +36 -0
- csv_detective/formats/url.py +46 -0
- csv_detective/formats/username.py +14 -0
- csv_detective/formats/uuid.py +16 -0
- csv_detective/formats/year.py +28 -0
- csv_detective/output/__init__.py +65 -0
- csv_detective/output/dataframe.py +96 -0
- csv_detective/output/example.py +250 -0
- csv_detective/output/profile.py +119 -0
- csv_detective/{schema_generation.py → output/schema.py} +268 -343
- csv_detective/output/utils.py +74 -0
- csv_detective/{detect_fields/FR/geo → parsing}/__init__.py +0 -0
- csv_detective/parsing/columns.py +235 -0
- csv_detective/parsing/compression.py +11 -0
- csv_detective/parsing/csv.py +56 -0
- csv_detective/parsing/excel.py +167 -0
- csv_detective/parsing/load.py +111 -0
- csv_detective/parsing/text.py +56 -0
- csv_detective/utils.py +23 -196
- csv_detective/validate.py +138 -0
- csv_detective-0.9.3.dev2438.dist-info/METADATA +267 -0
- csv_detective-0.9.3.dev2438.dist-info/RECORD +92 -0
- csv_detective-0.9.3.dev2438.dist-info/WHEEL +4 -0
- {csv_detective-0.6.7.dist-info → csv_detective-0.9.3.dev2438.dist-info}/entry_points.txt +1 -0
- csv_detective/all_packages.txt +0 -104
- csv_detective/detect_fields/FR/geo/adresse/__init__.py +0 -100
- csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py +0 -24
- csv_detective/detect_fields/FR/geo/code_commune_insee/code_commune_insee.txt +0 -37600
- csv_detective/detect_fields/FR/geo/code_departement/__init__.py +0 -11
- csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py +0 -15
- csv_detective/detect_fields/FR/geo/code_fantoir/code_fantoir.txt +0 -26122
- csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -19
- csv_detective/detect_fields/FR/geo/code_postal/code_postal.txt +0 -36822
- csv_detective/detect_fields/FR/geo/code_region/__init__.py +0 -27
- csv_detective/detect_fields/FR/geo/commune/__init__.py +0 -21
- csv_detective/detect_fields/FR/geo/commune/commune.txt +0 -36745
- csv_detective/detect_fields/FR/geo/departement/__init__.py +0 -19
- csv_detective/detect_fields/FR/geo/departement/departement.txt +0 -101
- csv_detective/detect_fields/FR/geo/insee_canton/__init__.py +0 -20
- csv_detective/detect_fields/FR/geo/insee_canton/canton2017.txt +0 -2055
- csv_detective/detect_fields/FR/geo/insee_canton/cantons.txt +0 -2055
- csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/pays/__init__.py +0 -17
- csv_detective/detect_fields/FR/geo/pays/pays.txt +0 -248
- csv_detective/detect_fields/FR/geo/region/__init__.py +0 -16
- csv_detective/detect_fields/FR/geo/region/region.txt +0 -44
- csv_detective/detect_fields/FR/other/__init__.py +0 -0
- csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +0 -26
- csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt +0 -498
- csv_detective/detect_fields/FR/other/code_rna/__init__.py +0 -8
- csv_detective/detect_fields/FR/other/code_waldec/__init__.py +0 -12
- csv_detective/detect_fields/FR/other/csp_insee/__init__.py +0 -16
- csv_detective/detect_fields/FR/other/date_fr/__init__.py +0 -12
- csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +0 -16
- csv_detective/detect_fields/FR/other/sexe/__init__.py +0 -9
- csv_detective/detect_fields/FR/other/siren/__init__.py +0 -18
- csv_detective/detect_fields/FR/other/tel_fr/__init__.py +0 -15
- csv_detective/detect_fields/FR/other/uai/__init__.py +0 -15
- csv_detective/detect_fields/FR/temp/__init__.py +0 -0
- csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py +0 -23
- csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py +0 -37
- csv_detective/detect_fields/__init__.py +0 -57
- csv_detective/detect_fields/geo/__init__.py +0 -0
- csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +0 -15
- csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +0 -14
- csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +0 -15
- csv_detective/detect_fields/geo/json_geojson/__init__.py +0 -22
- csv_detective/detect_fields/geo/latitude_wgs/__init__.py +0 -13
- csv_detective/detect_fields/geo/latlon_wgs/__init__.py +0 -15
- csv_detective/detect_fields/geo/longitude_wgs/__init__.py +0 -13
- csv_detective/detect_fields/other/__init__.py +0 -0
- csv_detective/detect_fields/other/booleen/__init__.py +0 -21
- csv_detective/detect_fields/other/email/__init__.py +0 -8
- csv_detective/detect_fields/other/float/__init__.py +0 -17
- csv_detective/detect_fields/other/int/__init__.py +0 -12
- csv_detective/detect_fields/other/json/__init__.py +0 -24
- csv_detective/detect_fields/other/mongo_object_id/__init__.py +0 -8
- csv_detective/detect_fields/other/twitter/__init__.py +0 -8
- csv_detective/detect_fields/other/url/__init__.py +0 -11
- csv_detective/detect_fields/other/uuid/__init__.py +0 -11
- csv_detective/detect_fields/temp/__init__.py +0 -0
- csv_detective/detect_fields/temp/date/__init__.py +0 -62
- csv_detective/detect_fields/temp/datetime_iso/__init__.py +0 -18
- csv_detective/detect_fields/temp/datetime_rfc822/__init__.py +0 -21
- csv_detective/detect_fields/temp/year/__init__.py +0 -10
- csv_detective/detect_labels/FR/__init__.py +0 -0
- csv_detective/detect_labels/FR/geo/__init__.py +0 -0
- csv_detective/detect_labels/FR/geo/adresse/__init__.py +0 -40
- csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +0 -42
- csv_detective/detect_labels/FR/geo/code_departement/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/code_postal/__init__.py +0 -41
- csv_detective/detect_labels/FR/geo/code_region/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/commune/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/departement/__init__.py +0 -47
- csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +0 -54
- csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -55
- csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +0 -44
- csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -45
- csv_detective/detect_labels/FR/geo/pays/__init__.py +0 -45
- csv_detective/detect_labels/FR/geo/region/__init__.py +0 -45
- csv_detective/detect_labels/FR/other/__init__.py +0 -0
- csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +0 -33
- csv_detective/detect_labels/FR/other/code_rna/__init__.py +0 -38
- csv_detective/detect_labels/FR/other/code_waldec/__init__.py +0 -33
- csv_detective/detect_labels/FR/other/csp_insee/__init__.py +0 -37
- csv_detective/detect_labels/FR/other/date_fr/__init__.py +0 -33
- csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +0 -40
- csv_detective/detect_labels/FR/other/sexe/__init__.py +0 -33
- csv_detective/detect_labels/FR/other/siren/__init__.py +0 -41
- csv_detective/detect_labels/FR/other/siret/__init__.py +0 -40
- csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -45
- csv_detective/detect_labels/FR/other/uai/__init__.py +0 -50
- csv_detective/detect_labels/FR/temp/__init__.py +0 -0
- csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +0 -41
- csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +0 -33
- csv_detective/detect_labels/__init__.py +0 -43
- csv_detective/detect_labels/geo/__init__.py +0 -0
- csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +0 -41
- csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +0 -41
- csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +0 -41
- csv_detective/detect_labels/geo/json_geojson/__init__.py +0 -42
- csv_detective/detect_labels/geo/latitude_wgs/__init__.py +0 -55
- csv_detective/detect_labels/geo/latlon_wgs/__init__.py +0 -67
- csv_detective/detect_labels/geo/longitude_wgs/__init__.py +0 -45
- csv_detective/detect_labels/other/__init__.py +0 -0
- csv_detective/detect_labels/other/booleen/__init__.py +0 -34
- csv_detective/detect_labels/other/email/__init__.py +0 -45
- csv_detective/detect_labels/other/float/__init__.py +0 -33
- csv_detective/detect_labels/other/int/__init__.py +0 -33
- csv_detective/detect_labels/other/money/__init__.py +0 -11
- csv_detective/detect_labels/other/money/check_col_name.py +0 -8
- csv_detective/detect_labels/other/mongo_object_id/__init__.py +0 -33
- csv_detective/detect_labels/other/twitter/__init__.py +0 -33
- csv_detective/detect_labels/other/url/__init__.py +0 -48
- csv_detective/detect_labels/other/uuid/__init__.py +0 -33
- csv_detective/detect_labels/temp/__init__.py +0 -0
- csv_detective/detect_labels/temp/date/__init__.py +0 -51
- csv_detective/detect_labels/temp/datetime_iso/__init__.py +0 -45
- csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +0 -44
- csv_detective/detect_labels/temp/year/__init__.py +0 -44
- csv_detective/detection.py +0 -361
- csv_detective/process_text.py +0 -39
- csv_detective/s3_utils.py +0 -48
- csv_detective-0.6.7.data/data/share/csv_detective/CHANGELOG.md +0 -118
- csv_detective-0.6.7.data/data/share/csv_detective/LICENSE.AGPL.txt +0 -661
- csv_detective-0.6.7.data/data/share/csv_detective/README.md +0 -247
- csv_detective-0.6.7.dist-info/LICENSE.AGPL.txt +0 -661
- csv_detective-0.6.7.dist-info/METADATA +0 -23
- csv_detective-0.6.7.dist-info/RECORD +0 -150
- csv_detective-0.6.7.dist-info/WHEEL +0 -5
- csv_detective-0.6.7.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/test_fields.py +0 -360
- tests/test_file.py +0 -116
- tests/test_labels.py +0 -7
- /csv_detective/{detect_fields/FR/other/csp_insee → formats/data}/csp_insee.txt +0 -0
- /csv_detective/{detect_fields/geo/iso_country_code_alpha2 → formats/data}/iso_country_code_alpha2.txt +0 -0
- /csv_detective/{detect_fields/geo/iso_country_code_alpha3 → formats/data}/iso_country_code_alpha3.txt +0 -0
- /csv_detective/{detect_fields/geo/iso_country_code_numeric → formats/data}/iso_country_code_numeric.txt +0 -0
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
PROPORTION = 1
|
|
3
|
-
liste_regions = {
|
|
4
|
-
'01',
|
|
5
|
-
'02',
|
|
6
|
-
'03',
|
|
7
|
-
'04',
|
|
8
|
-
'06',
|
|
9
|
-
'11',
|
|
10
|
-
'24',
|
|
11
|
-
'27',
|
|
12
|
-
'28',
|
|
13
|
-
'32',
|
|
14
|
-
'44',
|
|
15
|
-
'52',
|
|
16
|
-
'53',
|
|
17
|
-
'75',
|
|
18
|
-
'76',
|
|
19
|
-
'84',
|
|
20
|
-
'93',
|
|
21
|
-
'94'
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def _is(val):
|
|
26
|
-
'''Renvoie True si val peut être un code_région, False sinon'''
|
|
27
|
-
return val in liste_regions
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
from os.path import dirname, join
|
|
2
|
-
from csv_detective.process_text import _process_text
|
|
3
|
-
|
|
4
|
-
PROPORTION = 0.9
|
|
5
|
-
f = open(join(dirname(__file__), 'commune.txt'), 'r')
|
|
6
|
-
communes = f.read().split('\n')
|
|
7
|
-
# removing empty str due to additionnal line in file
|
|
8
|
-
del communes[-1]
|
|
9
|
-
communes = set(communes)
|
|
10
|
-
max_len = max({len(p) for p in communes})
|
|
11
|
-
f.close()
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def _is(val):
|
|
15
|
-
'''Match avec le nom des communes'''
|
|
16
|
-
if len(val) > max_len:
|
|
17
|
-
return False
|
|
18
|
-
val = val.lower().replace('-', ' ')
|
|
19
|
-
|
|
20
|
-
val = _process_text(val)
|
|
21
|
-
return val in communes
|