csv-detective 0.6.7__py3-none-any.whl → 0.9.3.dev2438__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/__init__.py +7 -1
- csv_detective/cli.py +33 -21
- csv_detective/{detect_fields/FR → detection}/__init__.py +0 -0
- csv_detective/detection/columns.py +89 -0
- csv_detective/detection/encoding.py +29 -0
- csv_detective/detection/engine.py +46 -0
- csv_detective/detection/formats.py +156 -0
- csv_detective/detection/headers.py +28 -0
- csv_detective/detection/rows.py +18 -0
- csv_detective/detection/separator.py +44 -0
- csv_detective/detection/variables.py +97 -0
- csv_detective/explore_csv.py +151 -377
- csv_detective/format.py +67 -0
- csv_detective/formats/__init__.py +9 -0
- csv_detective/formats/adresse.py +116 -0
- csv_detective/formats/binary.py +26 -0
- csv_detective/formats/booleen.py +35 -0
- csv_detective/formats/code_commune_insee.py +26 -0
- csv_detective/formats/code_csp_insee.py +36 -0
- csv_detective/formats/code_departement.py +29 -0
- csv_detective/formats/code_fantoir.py +21 -0
- csv_detective/formats/code_import.py +17 -0
- csv_detective/formats/code_postal.py +25 -0
- csv_detective/formats/code_region.py +22 -0
- csv_detective/formats/code_rna.py +29 -0
- csv_detective/formats/code_waldec.py +17 -0
- csv_detective/formats/commune.py +27 -0
- csv_detective/formats/csp_insee.py +31 -0
- csv_detective/{detect_fields/FR/other/insee_ape700 → formats/data}/insee_ape700.txt +0 -0
- csv_detective/formats/date.py +99 -0
- csv_detective/formats/date_fr.py +22 -0
- csv_detective/formats/datetime_aware.py +45 -0
- csv_detective/formats/datetime_naive.py +48 -0
- csv_detective/formats/datetime_rfc822.py +24 -0
- csv_detective/formats/departement.py +37 -0
- csv_detective/formats/email.py +28 -0
- csv_detective/formats/float.py +29 -0
- csv_detective/formats/geojson.py +36 -0
- csv_detective/formats/insee_ape700.py +31 -0
- csv_detective/formats/insee_canton.py +28 -0
- csv_detective/formats/int.py +23 -0
- csv_detective/formats/iso_country_code_alpha2.py +30 -0
- csv_detective/formats/iso_country_code_alpha3.py +30 -0
- csv_detective/formats/iso_country_code_numeric.py +31 -0
- csv_detective/formats/jour_de_la_semaine.py +41 -0
- csv_detective/formats/json.py +20 -0
- csv_detective/formats/latitude_l93.py +48 -0
- csv_detective/formats/latitude_wgs.py +42 -0
- csv_detective/formats/latitude_wgs_fr_metropole.py +42 -0
- csv_detective/formats/latlon_wgs.py +53 -0
- csv_detective/formats/longitude_l93.py +39 -0
- csv_detective/formats/longitude_wgs.py +32 -0
- csv_detective/formats/longitude_wgs_fr_metropole.py +32 -0
- csv_detective/formats/lonlat_wgs.py +36 -0
- csv_detective/formats/mois_de_lannee.py +48 -0
- csv_detective/formats/money.py +18 -0
- csv_detective/formats/mongo_object_id.py +14 -0
- csv_detective/formats/pays.py +35 -0
- csv_detective/formats/percent.py +16 -0
- csv_detective/formats/region.py +70 -0
- csv_detective/formats/sexe.py +17 -0
- csv_detective/formats/siren.py +37 -0
- csv_detective/{detect_fields/FR/other/siret/__init__.py → formats/siret.py} +47 -29
- csv_detective/formats/tel_fr.py +36 -0
- csv_detective/formats/uai.py +36 -0
- csv_detective/formats/url.py +46 -0
- csv_detective/formats/username.py +14 -0
- csv_detective/formats/uuid.py +16 -0
- csv_detective/formats/year.py +28 -0
- csv_detective/output/__init__.py +65 -0
- csv_detective/output/dataframe.py +96 -0
- csv_detective/output/example.py +250 -0
- csv_detective/output/profile.py +119 -0
- csv_detective/{schema_generation.py → output/schema.py} +268 -343
- csv_detective/output/utils.py +74 -0
- csv_detective/{detect_fields/FR/geo → parsing}/__init__.py +0 -0
- csv_detective/parsing/columns.py +235 -0
- csv_detective/parsing/compression.py +11 -0
- csv_detective/parsing/csv.py +56 -0
- csv_detective/parsing/excel.py +167 -0
- csv_detective/parsing/load.py +111 -0
- csv_detective/parsing/text.py +56 -0
- csv_detective/utils.py +23 -196
- csv_detective/validate.py +138 -0
- csv_detective-0.9.3.dev2438.dist-info/METADATA +267 -0
- csv_detective-0.9.3.dev2438.dist-info/RECORD +92 -0
- csv_detective-0.9.3.dev2438.dist-info/WHEEL +4 -0
- {csv_detective-0.6.7.dist-info → csv_detective-0.9.3.dev2438.dist-info}/entry_points.txt +1 -0
- csv_detective/all_packages.txt +0 -104
- csv_detective/detect_fields/FR/geo/adresse/__init__.py +0 -100
- csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py +0 -24
- csv_detective/detect_fields/FR/geo/code_commune_insee/code_commune_insee.txt +0 -37600
- csv_detective/detect_fields/FR/geo/code_departement/__init__.py +0 -11
- csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py +0 -15
- csv_detective/detect_fields/FR/geo/code_fantoir/code_fantoir.txt +0 -26122
- csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -19
- csv_detective/detect_fields/FR/geo/code_postal/code_postal.txt +0 -36822
- csv_detective/detect_fields/FR/geo/code_region/__init__.py +0 -27
- csv_detective/detect_fields/FR/geo/commune/__init__.py +0 -21
- csv_detective/detect_fields/FR/geo/commune/commune.txt +0 -36745
- csv_detective/detect_fields/FR/geo/departement/__init__.py +0 -19
- csv_detective/detect_fields/FR/geo/departement/departement.txt +0 -101
- csv_detective/detect_fields/FR/geo/insee_canton/__init__.py +0 -20
- csv_detective/detect_fields/FR/geo/insee_canton/canton2017.txt +0 -2055
- csv_detective/detect_fields/FR/geo/insee_canton/cantons.txt +0 -2055
- csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/pays/__init__.py +0 -17
- csv_detective/detect_fields/FR/geo/pays/pays.txt +0 -248
- csv_detective/detect_fields/FR/geo/region/__init__.py +0 -16
- csv_detective/detect_fields/FR/geo/region/region.txt +0 -44
- csv_detective/detect_fields/FR/other/__init__.py +0 -0
- csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +0 -26
- csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt +0 -498
- csv_detective/detect_fields/FR/other/code_rna/__init__.py +0 -8
- csv_detective/detect_fields/FR/other/code_waldec/__init__.py +0 -12
- csv_detective/detect_fields/FR/other/csp_insee/__init__.py +0 -16
- csv_detective/detect_fields/FR/other/date_fr/__init__.py +0 -12
- csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +0 -16
- csv_detective/detect_fields/FR/other/sexe/__init__.py +0 -9
- csv_detective/detect_fields/FR/other/siren/__init__.py +0 -18
- csv_detective/detect_fields/FR/other/tel_fr/__init__.py +0 -15
- csv_detective/detect_fields/FR/other/uai/__init__.py +0 -15
- csv_detective/detect_fields/FR/temp/__init__.py +0 -0
- csv_detective/detect_fields/FR/temp/jour_de_la_semaine/__init__.py +0 -23
- csv_detective/detect_fields/FR/temp/mois_de_annee/__init__.py +0 -37
- csv_detective/detect_fields/__init__.py +0 -57
- csv_detective/detect_fields/geo/__init__.py +0 -0
- csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +0 -15
- csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +0 -14
- csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +0 -15
- csv_detective/detect_fields/geo/json_geojson/__init__.py +0 -22
- csv_detective/detect_fields/geo/latitude_wgs/__init__.py +0 -13
- csv_detective/detect_fields/geo/latlon_wgs/__init__.py +0 -15
- csv_detective/detect_fields/geo/longitude_wgs/__init__.py +0 -13
- csv_detective/detect_fields/other/__init__.py +0 -0
- csv_detective/detect_fields/other/booleen/__init__.py +0 -21
- csv_detective/detect_fields/other/email/__init__.py +0 -8
- csv_detective/detect_fields/other/float/__init__.py +0 -17
- csv_detective/detect_fields/other/int/__init__.py +0 -12
- csv_detective/detect_fields/other/json/__init__.py +0 -24
- csv_detective/detect_fields/other/mongo_object_id/__init__.py +0 -8
- csv_detective/detect_fields/other/twitter/__init__.py +0 -8
- csv_detective/detect_fields/other/url/__init__.py +0 -11
- csv_detective/detect_fields/other/uuid/__init__.py +0 -11
- csv_detective/detect_fields/temp/__init__.py +0 -0
- csv_detective/detect_fields/temp/date/__init__.py +0 -62
- csv_detective/detect_fields/temp/datetime_iso/__init__.py +0 -18
- csv_detective/detect_fields/temp/datetime_rfc822/__init__.py +0 -21
- csv_detective/detect_fields/temp/year/__init__.py +0 -10
- csv_detective/detect_labels/FR/__init__.py +0 -0
- csv_detective/detect_labels/FR/geo/__init__.py +0 -0
- csv_detective/detect_labels/FR/geo/adresse/__init__.py +0 -40
- csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +0 -42
- csv_detective/detect_labels/FR/geo/code_departement/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/code_postal/__init__.py +0 -41
- csv_detective/detect_labels/FR/geo/code_region/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/commune/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/departement/__init__.py +0 -47
- csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +0 -33
- csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +0 -54
- csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -55
- csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +0 -44
- csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -45
- csv_detective/detect_labels/FR/geo/pays/__init__.py +0 -45
- csv_detective/detect_labels/FR/geo/region/__init__.py +0 -45
- csv_detective/detect_labels/FR/other/__init__.py +0 -0
- csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +0 -33
- csv_detective/detect_labels/FR/other/code_rna/__init__.py +0 -38
- csv_detective/detect_labels/FR/other/code_waldec/__init__.py +0 -33
- csv_detective/detect_labels/FR/other/csp_insee/__init__.py +0 -37
- csv_detective/detect_labels/FR/other/date_fr/__init__.py +0 -33
- csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +0 -40
- csv_detective/detect_labels/FR/other/sexe/__init__.py +0 -33
- csv_detective/detect_labels/FR/other/siren/__init__.py +0 -41
- csv_detective/detect_labels/FR/other/siret/__init__.py +0 -40
- csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -45
- csv_detective/detect_labels/FR/other/uai/__init__.py +0 -50
- csv_detective/detect_labels/FR/temp/__init__.py +0 -0
- csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +0 -41
- csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +0 -33
- csv_detective/detect_labels/__init__.py +0 -43
- csv_detective/detect_labels/geo/__init__.py +0 -0
- csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +0 -41
- csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +0 -41
- csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +0 -41
- csv_detective/detect_labels/geo/json_geojson/__init__.py +0 -42
- csv_detective/detect_labels/geo/latitude_wgs/__init__.py +0 -55
- csv_detective/detect_labels/geo/latlon_wgs/__init__.py +0 -67
- csv_detective/detect_labels/geo/longitude_wgs/__init__.py +0 -45
- csv_detective/detect_labels/other/__init__.py +0 -0
- csv_detective/detect_labels/other/booleen/__init__.py +0 -34
- csv_detective/detect_labels/other/email/__init__.py +0 -45
- csv_detective/detect_labels/other/float/__init__.py +0 -33
- csv_detective/detect_labels/other/int/__init__.py +0 -33
- csv_detective/detect_labels/other/money/__init__.py +0 -11
- csv_detective/detect_labels/other/money/check_col_name.py +0 -8
- csv_detective/detect_labels/other/mongo_object_id/__init__.py +0 -33
- csv_detective/detect_labels/other/twitter/__init__.py +0 -33
- csv_detective/detect_labels/other/url/__init__.py +0 -48
- csv_detective/detect_labels/other/uuid/__init__.py +0 -33
- csv_detective/detect_labels/temp/__init__.py +0 -0
- csv_detective/detect_labels/temp/date/__init__.py +0 -51
- csv_detective/detect_labels/temp/datetime_iso/__init__.py +0 -45
- csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +0 -44
- csv_detective/detect_labels/temp/year/__init__.py +0 -44
- csv_detective/detection.py +0 -361
- csv_detective/process_text.py +0 -39
- csv_detective/s3_utils.py +0 -48
- csv_detective-0.6.7.data/data/share/csv_detective/CHANGELOG.md +0 -118
- csv_detective-0.6.7.data/data/share/csv_detective/LICENSE.AGPL.txt +0 -661
- csv_detective-0.6.7.data/data/share/csv_detective/README.md +0 -247
- csv_detective-0.6.7.dist-info/LICENSE.AGPL.txt +0 -661
- csv_detective-0.6.7.dist-info/METADATA +0 -23
- csv_detective-0.6.7.dist-info/RECORD +0 -150
- csv_detective-0.6.7.dist-info/WHEEL +0 -5
- csv_detective-0.6.7.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/test_fields.py +0 -360
- tests/test_file.py +0 -116
- tests/test_labels.py +0 -7
- /csv_detective/{detect_fields/FR/other/csp_insee → formats/data}/csp_insee.txt +0 -0
- /csv_detective/{detect_fields/geo/iso_country_code_alpha2 → formats/data}/iso_country_code_alpha2.txt +0 -0
- /csv_detective/{detect_fields/geo/iso_country_code_alpha3 → formats/data}/iso_country_code_alpha3.txt +0 -0
- /csv_detective/{detect_fields/geo/iso_country_code_numeric → formats/data}/iso_country_code_numeric.txt +0 -0
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from csv_detective.detect_fields.other.float import _is as is_float
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.9
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(val):
|
|
7
|
-
'''Renvoie True si val peut etre une latitude en Lambert 93'''
|
|
8
|
-
try:
|
|
9
|
-
is_float(val) and float(val) >= 6037008 and float(val) <= 7230728
|
|
10
|
-
except ValueError:
|
|
11
|
-
return False
|
|
12
|
-
except OverflowError:
|
|
13
|
-
return False
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from csv_detective.detect_fields.other.float import _is as is_float
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.9
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(val):
|
|
7
|
-
'''Renvoie True si val peut etre une latitude en métropole'''
|
|
8
|
-
try:
|
|
9
|
-
return is_float(val) and float(val) >= 41.3 and float(val) <= 51.3
|
|
10
|
-
except ValueError:
|
|
11
|
-
return False
|
|
12
|
-
except OverflowError:
|
|
13
|
-
return False
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from csv_detective.detect_fields.other.float import _is as is_float
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.9
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(val):
|
|
7
|
-
'''Renvoie True si val peut etre une longitude en métropole'''
|
|
8
|
-
try:
|
|
9
|
-
is_float(val) and float(val) >= -357823 and float(val) <= 1313633
|
|
10
|
-
except ValueError:
|
|
11
|
-
return False
|
|
12
|
-
except OverflowError:
|
|
13
|
-
return False
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from csv_detective.detect_fields.other.float import _is as is_float
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.9
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(val):
|
|
7
|
-
'''Renvoie True si val peut etre une longitude en métropole'''
|
|
8
|
-
try:
|
|
9
|
-
return is_float(val) and float(val) >= -5.5 and float(val) <= 9.8
|
|
10
|
-
except ValueError:
|
|
11
|
-
return False
|
|
12
|
-
except OverflowError:
|
|
13
|
-
return False
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from os.path import dirname, join
|
|
2
|
-
from csv_detective.process_text import _process_text
|
|
3
|
-
|
|
4
|
-
PROPORTION = 0.6
|
|
5
|
-
f = open(join(dirname(__file__), 'pays.txt'), 'r')
|
|
6
|
-
pays = f.read().split('\n')
|
|
7
|
-
pays = set(pays)
|
|
8
|
-
max_len = max({len(p) for p in pays})
|
|
9
|
-
f.close()
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def _is(val):
|
|
13
|
-
'''Match avec le nom des pays'''
|
|
14
|
-
if len(val) > max_len:
|
|
15
|
-
return False
|
|
16
|
-
val = _process_text(val)
|
|
17
|
-
return val in pays
|
|
@@ -1,248 +0,0 @@
|
|
|
1
|
-
afghanistan
|
|
2
|
-
afrique du sud
|
|
3
|
-
Åland, Îles
|
|
4
|
-
albanie
|
|
5
|
-
algerie
|
|
6
|
-
allemagne
|
|
7
|
-
andorre
|
|
8
|
-
angola
|
|
9
|
-
anguilla
|
|
10
|
-
antarctique
|
|
11
|
-
antigua et barbuda
|
|
12
|
-
arabie saoudite
|
|
13
|
-
argentine
|
|
14
|
-
armenie
|
|
15
|
-
aruba
|
|
16
|
-
australie
|
|
17
|
-
autriche
|
|
18
|
-
azerbaidjan
|
|
19
|
-
bahamas
|
|
20
|
-
bahrein
|
|
21
|
-
bangladesh
|
|
22
|
-
barbade
|
|
23
|
-
belarus
|
|
24
|
-
belgique
|
|
25
|
-
belize
|
|
26
|
-
benin
|
|
27
|
-
bermudes
|
|
28
|
-
bhoutan
|
|
29
|
-
bolivie
|
|
30
|
-
bonaire
|
|
31
|
-
bosnie herzegovine
|
|
32
|
-
botswana
|
|
33
|
-
bouvet
|
|
34
|
-
bresil
|
|
35
|
-
brunei darussalam
|
|
36
|
-
bulgarie
|
|
37
|
-
burkina faso
|
|
38
|
-
burundi
|
|
39
|
-
caimanes
|
|
40
|
-
cambodge
|
|
41
|
-
cameroun
|
|
42
|
-
canada
|
|
43
|
-
cap vert
|
|
44
|
-
centrafricaine, republique
|
|
45
|
-
chili
|
|
46
|
-
chine
|
|
47
|
-
christmas
|
|
48
|
-
chypre
|
|
49
|
-
cocos
|
|
50
|
-
colombie
|
|
51
|
-
comores
|
|
52
|
-
congo
|
|
53
|
-
congo
|
|
54
|
-
cook, iles
|
|
55
|
-
coree
|
|
56
|
-
coree
|
|
57
|
-
costa rica
|
|
58
|
-
cote d'ivoire
|
|
59
|
-
croatie
|
|
60
|
-
cuba
|
|
61
|
-
curacao
|
|
62
|
-
danemark
|
|
63
|
-
djibouti
|
|
64
|
-
dominicaine
|
|
65
|
-
dominique
|
|
66
|
-
egypte
|
|
67
|
-
el salvador
|
|
68
|
-
emirats arabes unis
|
|
69
|
-
equateur
|
|
70
|
-
erythree
|
|
71
|
-
espagne
|
|
72
|
-
estonie
|
|
73
|
-
etats unis
|
|
74
|
-
ethiopie
|
|
75
|
-
falkland
|
|
76
|
-
feroe
|
|
77
|
-
fidji
|
|
78
|
-
finlande
|
|
79
|
-
france
|
|
80
|
-
gabon
|
|
81
|
-
gambie
|
|
82
|
-
georgie
|
|
83
|
-
georgie du sud et les iles sandwich du sud
|
|
84
|
-
ghana
|
|
85
|
-
gibraltar
|
|
86
|
-
grece
|
|
87
|
-
grenade
|
|
88
|
-
groenland
|
|
89
|
-
guadeloupe
|
|
90
|
-
guam
|
|
91
|
-
guatemala
|
|
92
|
-
guernesey
|
|
93
|
-
guinee
|
|
94
|
-
guinee bissau
|
|
95
|
-
guinee equatoriale
|
|
96
|
-
guyana
|
|
97
|
-
guyane francaise
|
|
98
|
-
haiti
|
|
99
|
-
heard, ile et mcdonald, iles
|
|
100
|
-
honduras
|
|
101
|
-
hong kong
|
|
102
|
-
hongrie
|
|
103
|
-
ile de man
|
|
104
|
-
iles mineures eloignees des etats unis
|
|
105
|
-
iles vierges britanniques
|
|
106
|
-
iles vierges des etats unis
|
|
107
|
-
inde
|
|
108
|
-
indonesie
|
|
109
|
-
iran
|
|
110
|
-
iraq
|
|
111
|
-
irlande
|
|
112
|
-
islande
|
|
113
|
-
israel
|
|
114
|
-
italie
|
|
115
|
-
jamaique
|
|
116
|
-
japon
|
|
117
|
-
jersey
|
|
118
|
-
jordanie
|
|
119
|
-
kazakhstan
|
|
120
|
-
kenya
|
|
121
|
-
kirghizistan
|
|
122
|
-
kiribati
|
|
123
|
-
koweit
|
|
124
|
-
laos
|
|
125
|
-
lesotho
|
|
126
|
-
lettonie
|
|
127
|
-
liban
|
|
128
|
-
liberia
|
|
129
|
-
libyenne
|
|
130
|
-
liechtenstein
|
|
131
|
-
lituanie
|
|
132
|
-
luxembourg
|
|
133
|
-
macao
|
|
134
|
-
macedoine
|
|
135
|
-
madagascar
|
|
136
|
-
malaisie
|
|
137
|
-
malawi
|
|
138
|
-
maldives
|
|
139
|
-
mali
|
|
140
|
-
malte
|
|
141
|
-
mariannes du nord
|
|
142
|
-
maroc
|
|
143
|
-
marshall
|
|
144
|
-
martinique
|
|
145
|
-
maurice
|
|
146
|
-
mauritanie
|
|
147
|
-
mayotte
|
|
148
|
-
mexique
|
|
149
|
-
micronesie
|
|
150
|
-
moldova
|
|
151
|
-
monaco
|
|
152
|
-
mongolie
|
|
153
|
-
montenegro
|
|
154
|
-
montserrat
|
|
155
|
-
mozambique
|
|
156
|
-
myanmar
|
|
157
|
-
namibie
|
|
158
|
-
nauru
|
|
159
|
-
nepal
|
|
160
|
-
nicaragua
|
|
161
|
-
niger
|
|
162
|
-
nigeria
|
|
163
|
-
niue
|
|
164
|
-
norfolk
|
|
165
|
-
norvege
|
|
166
|
-
nouvelle caledonie
|
|
167
|
-
nouvelle zelande
|
|
168
|
-
ocean indien
|
|
169
|
-
oman
|
|
170
|
-
ouganda
|
|
171
|
-
ouzbekistan
|
|
172
|
-
pakistan
|
|
173
|
-
palaos
|
|
174
|
-
palestinien occupe
|
|
175
|
-
panama
|
|
176
|
-
papouasie nouvelle guinee
|
|
177
|
-
paraguay
|
|
178
|
-
pays bas
|
|
179
|
-
perou
|
|
180
|
-
philippines
|
|
181
|
-
pitcairn
|
|
182
|
-
pologne
|
|
183
|
-
polynesie francaise
|
|
184
|
-
porto rico
|
|
185
|
-
portugal
|
|
186
|
-
qatar
|
|
187
|
-
reunion
|
|
188
|
-
roumanie
|
|
189
|
-
royaume uni
|
|
190
|
-
russie
|
|
191
|
-
rwanda
|
|
192
|
-
sahara occidental
|
|
193
|
-
saint barthelemy
|
|
194
|
-
sainte helene
|
|
195
|
-
sainte lucie
|
|
196
|
-
saint kitts et nevis
|
|
197
|
-
saint marin
|
|
198
|
-
saint martin
|
|
199
|
-
saint pierre et miquelon
|
|
200
|
-
saint siege
|
|
201
|
-
saint vincent et les grenadines
|
|
202
|
-
salomon
|
|
203
|
-
samoa
|
|
204
|
-
samoa americaines
|
|
205
|
-
sao tome et principe
|
|
206
|
-
senegal
|
|
207
|
-
serbie
|
|
208
|
-
seychelles
|
|
209
|
-
sierra leone
|
|
210
|
-
singapour
|
|
211
|
-
slovaquie
|
|
212
|
-
slovenie
|
|
213
|
-
somalie
|
|
214
|
-
soudan
|
|
215
|
-
sri lanka
|
|
216
|
-
suede
|
|
217
|
-
suisse
|
|
218
|
-
suriname
|
|
219
|
-
svalbard
|
|
220
|
-
swaziland
|
|
221
|
-
syrie
|
|
222
|
-
tadjikistan
|
|
223
|
-
taiwan
|
|
224
|
-
tanzanie
|
|
225
|
-
tchad
|
|
226
|
-
tcheque
|
|
227
|
-
terres australes francaises
|
|
228
|
-
thailande
|
|
229
|
-
timor leste
|
|
230
|
-
togo
|
|
231
|
-
tokelau
|
|
232
|
-
tonga
|
|
233
|
-
trinite et tobago
|
|
234
|
-
tunisie
|
|
235
|
-
turkmenistan
|
|
236
|
-
turks et caiques, iles
|
|
237
|
-
turquie
|
|
238
|
-
tuvalu
|
|
239
|
-
ukraine
|
|
240
|
-
uruguay
|
|
241
|
-
vanuatu
|
|
242
|
-
vatican
|
|
243
|
-
venezuela
|
|
244
|
-
viet nam
|
|
245
|
-
wallis et futuna
|
|
246
|
-
yemen
|
|
247
|
-
zambie
|
|
248
|
-
zimbabwe
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
from os.path import dirname, join
|
|
2
|
-
from csv_detective.process_text import _process_text
|
|
3
|
-
|
|
4
|
-
PROPORTION = 1
|
|
5
|
-
f = open(join(dirname(__file__), 'region.txt'), 'r')
|
|
6
|
-
regions = f.read().split('\n')
|
|
7
|
-
# removing empty str due to additionnal line in file
|
|
8
|
-
del regions[-1]
|
|
9
|
-
regions = set(regions)
|
|
10
|
-
f.close()
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def _is(val):
|
|
14
|
-
'''Match avec le nom des regions'''
|
|
15
|
-
val = _process_text(val)
|
|
16
|
-
return val in regions
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
alsace
|
|
2
|
-
aquitaine
|
|
3
|
-
auvergne
|
|
4
|
-
basse normandie
|
|
5
|
-
bourgogne
|
|
6
|
-
bretagne
|
|
7
|
-
centre
|
|
8
|
-
centre val de loire
|
|
9
|
-
champagne ardenne
|
|
10
|
-
corse
|
|
11
|
-
franche comte
|
|
12
|
-
guadeloupe
|
|
13
|
-
guyane
|
|
14
|
-
haute normandie
|
|
15
|
-
ile de france
|
|
16
|
-
la reunion
|
|
17
|
-
languedoc roussillon
|
|
18
|
-
limousin
|
|
19
|
-
lorraine
|
|
20
|
-
martinique
|
|
21
|
-
mayotte
|
|
22
|
-
midi pyrenees
|
|
23
|
-
nord pas de calais
|
|
24
|
-
npdc
|
|
25
|
-
pays de la loire
|
|
26
|
-
picardie
|
|
27
|
-
poitou charentes
|
|
28
|
-
provence alpes cote d azur
|
|
29
|
-
paca
|
|
30
|
-
rhone alpes
|
|
31
|
-
hauts de france
|
|
32
|
-
hdf
|
|
33
|
-
normandie
|
|
34
|
-
grand est
|
|
35
|
-
nouvelle aquitaine
|
|
36
|
-
occitanie
|
|
37
|
-
auvergne rhone alpes
|
|
38
|
-
ara
|
|
39
|
-
bourgogne franche comte
|
|
40
|
-
bfc
|
|
41
|
-
auvergne et rhone alpes
|
|
42
|
-
bourgogne et franche comte
|
|
43
|
-
reunion
|
|
44
|
-
la reunion
|
|
File without changes
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
from csv_detective.process_text import _process_text
|
|
2
|
-
import re
|
|
3
|
-
|
|
4
|
-
PROPORTION = 1
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def _is(val):
|
|
8
|
-
'''Repère les code csp telles que définies par l'INSEE'''
|
|
9
|
-
val = _process_text(val)
|
|
10
|
-
if len(val) != 4:
|
|
11
|
-
return False
|
|
12
|
-
a = bool(re.match(r'^[123456][0-9]{2}[abcdefghijkl]$', val))
|
|
13
|
-
b = val in {
|
|
14
|
-
'7100',
|
|
15
|
-
'7200',
|
|
16
|
-
'7400',
|
|
17
|
-
'7500',
|
|
18
|
-
'7700',
|
|
19
|
-
'7800',
|
|
20
|
-
'8100',
|
|
21
|
-
'8300',
|
|
22
|
-
'8400',
|
|
23
|
-
'8500',
|
|
24
|
-
'8600'
|
|
25
|
-
}
|
|
26
|
-
return a or b
|