csv-detective 0.10.1.dev2581__py3-none-any.whl → 0.10.1.dev2599__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detection/formats.py +11 -38
- csv_detective/format.py +11 -4
- csv_detective/formats/adresse.py +9 -9
- csv_detective/formats/binary.py +2 -1
- csv_detective/formats/booleen.py +3 -2
- csv_detective/formats/code_commune_insee.py +12 -10
- csv_detective/formats/code_csp_insee.py +1 -1
- csv_detective/formats/code_departement.py +8 -7
- csv_detective/formats/code_fantoir.py +6 -5
- csv_detective/formats/code_import.py +1 -1
- csv_detective/formats/code_postal.py +10 -9
- csv_detective/formats/code_region.py +7 -6
- csv_detective/formats/code_rna.py +7 -6
- csv_detective/formats/code_waldec.py +1 -1
- csv_detective/formats/commune.py +5 -5
- csv_detective/formats/csp_insee.py +6 -5
- csv_detective/formats/date.py +18 -17
- csv_detective/formats/date_fr.py +1 -1
- csv_detective/formats/datetime_aware.py +2 -1
- csv_detective/formats/datetime_naive.py +1 -0
- csv_detective/formats/datetime_rfc822.py +1 -0
- csv_detective/formats/departement.py +15 -15
- csv_detective/formats/email.py +13 -13
- csv_detective/formats/float.py +2 -1
- csv_detective/formats/geojson.py +10 -10
- csv_detective/formats/insee_ape700.py +10 -8
- csv_detective/formats/insee_canton.py +6 -6
- csv_detective/formats/int.py +2 -1
- csv_detective/formats/iso_country_code_alpha2.py +10 -9
- csv_detective/formats/iso_country_code_alpha3.py +2 -9
- csv_detective/formats/iso_country_code_numeric.py +2 -9
- csv_detective/formats/jour_de_la_semaine.py +11 -12
- csv_detective/formats/json.py +6 -0
- csv_detective/formats/latitude_l93.py +8 -22
- csv_detective/formats/latitude_wgs.py +21 -26
- csv_detective/formats/latitude_wgs_fr_metropole.py +4 -26
- csv_detective/formats/latlon_wgs.py +27 -26
- csv_detective/formats/longitude_l93.py +8 -13
- csv_detective/formats/longitude_wgs.py +24 -16
- csv_detective/formats/longitude_wgs_fr_metropole.py +4 -16
- csv_detective/formats/lonlat_wgs.py +10 -9
- csv_detective/formats/mois_de_lannee.py +1 -1
- csv_detective/formats/money.py +1 -1
- csv_detective/formats/mongo_object_id.py +1 -1
- csv_detective/formats/pays.py +11 -13
- csv_detective/formats/percent.py +1 -1
- csv_detective/formats/region.py +13 -13
- csv_detective/formats/sexe.py +1 -1
- csv_detective/formats/siren.py +9 -10
- csv_detective/formats/siret.py +9 -9
- csv_detective/formats/tel_fr.py +7 -13
- csv_detective/formats/uai.py +17 -18
- csv_detective/formats/url.py +16 -16
- csv_detective/formats/username.py +1 -1
- csv_detective/formats/uuid.py +1 -1
- csv_detective/formats/year.py +7 -12
- csv_detective/parsing/text.py +13 -12
- {csv_detective-0.10.1.dev2581.dist-info → csv_detective-0.10.1.dev2599.dist-info}/METADATA +1 -1
- csv_detective-0.10.1.dev2599.dist-info/RECORD +92 -0
- {csv_detective-0.10.1.dev2581.dist-info → csv_detective-0.10.1.dev2599.dist-info}/WHEEL +1 -1
- csv_detective-0.10.1.dev2581.dist-info/RECORD +0 -92
- {csv_detective-0.10.1.dev2581.dist-info → csv_detective-0.10.1.dev2599.dist-info}/entry_points.txt +0 -0
|
@@ -3,15 +3,16 @@ from os.path import dirname, join
|
|
|
3
3
|
|
|
4
4
|
proportion = 1
|
|
5
5
|
tags = ["geo"]
|
|
6
|
-
labels =
|
|
7
|
-
"iso country code",
|
|
8
|
-
"code pays",
|
|
9
|
-
"pays",
|
|
10
|
-
"country",
|
|
11
|
-
"nation",
|
|
12
|
-
"pays code",
|
|
13
|
-
"code pays (iso)",
|
|
14
|
-
|
|
6
|
+
labels = {
|
|
7
|
+
"iso country code": 1,
|
|
8
|
+
"code pays": 1,
|
|
9
|
+
"pays": 1,
|
|
10
|
+
"country": 1,
|
|
11
|
+
"nation": 1,
|
|
12
|
+
"pays code": 1,
|
|
13
|
+
"code pays (iso)": 1,
|
|
14
|
+
"code": 0.5,
|
|
15
|
+
}
|
|
15
16
|
|
|
16
17
|
with open(join(dirname(__file__), "data", "iso_country_code_alpha2.txt"), "r") as iofile:
|
|
17
18
|
liste_pays = iofile.read().split("\n")
|
|
@@ -1,17 +1,10 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from os.path import dirname, join
|
|
3
3
|
|
|
4
|
+
from csv_detective.formats.iso_country_code_alpha2 import labels # noqa
|
|
5
|
+
|
|
4
6
|
proportion = 1
|
|
5
7
|
tags = ["geo"]
|
|
6
|
-
labels = [
|
|
7
|
-
"iso country code",
|
|
8
|
-
"code pays",
|
|
9
|
-
"pays",
|
|
10
|
-
"country",
|
|
11
|
-
"nation",
|
|
12
|
-
"pays code",
|
|
13
|
-
"code pays (iso)",
|
|
14
|
-
]
|
|
15
8
|
|
|
16
9
|
with open(join(dirname(__file__), "data", "iso_country_code_alpha3.txt"), "r") as iofile:
|
|
17
10
|
liste_pays = iofile.read().split("\n")
|
|
@@ -1,17 +1,10 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from os.path import dirname, join
|
|
3
3
|
|
|
4
|
+
from csv_detective.formats.iso_country_code_alpha2 import labels # noqa
|
|
5
|
+
|
|
4
6
|
proportion = 1
|
|
5
7
|
tags = ["geo"]
|
|
6
|
-
labels = [
|
|
7
|
-
"iso country code",
|
|
8
|
-
"code pays",
|
|
9
|
-
"pays",
|
|
10
|
-
"country",
|
|
11
|
-
"nation",
|
|
12
|
-
"pays code",
|
|
13
|
-
"code pays (iso)",
|
|
14
|
-
]
|
|
15
8
|
|
|
16
9
|
with open(join(dirname(__file__), "data", "iso_country_code_numeric.txt"), "r") as iofile:
|
|
17
10
|
liste_pays = iofile.read().split("\n")
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
proportion = 0.8
|
|
2
2
|
tags = ["fr", "temp"]
|
|
3
|
-
labels =
|
|
4
|
-
"jour semaine",
|
|
5
|
-
"type jour",
|
|
6
|
-
"jour de la semaine",
|
|
7
|
-
"saufjour",
|
|
8
|
-
"nomjour",
|
|
9
|
-
"jour",
|
|
10
|
-
"jour de fermeture",
|
|
11
|
-
|
|
3
|
+
labels = {
|
|
4
|
+
"jour semaine": 1,
|
|
5
|
+
"type jour": 1,
|
|
6
|
+
"jour de la semaine": 1,
|
|
7
|
+
"saufjour": 1,
|
|
8
|
+
"nomjour": 1,
|
|
9
|
+
"jour": 0.75,
|
|
10
|
+
"jour de fermeture": 1,
|
|
11
|
+
}
|
|
12
12
|
|
|
13
13
|
jours = {
|
|
14
14
|
"lundi",
|
|
@@ -31,11 +31,10 @@ jours = {
|
|
|
31
31
|
def _is(val):
|
|
32
32
|
if not isinstance(val, str):
|
|
33
33
|
return False
|
|
34
|
-
|
|
35
|
-
return val in jours
|
|
34
|
+
return val.lower() in jours
|
|
36
35
|
|
|
37
36
|
|
|
38
37
|
_test_values = {
|
|
39
38
|
True: ["lundi"],
|
|
40
|
-
False: ["jour
|
|
39
|
+
False: ["jour"],
|
|
41
40
|
}
|
csv_detective/formats/json.py
CHANGED
|
@@ -2,31 +2,17 @@ from frformat import LatitudeL93
|
|
|
2
2
|
|
|
3
3
|
from csv_detective.formats.float import _is as is_float
|
|
4
4
|
from csv_detective.formats.float import float_casting
|
|
5
|
+
from csv_detective.formats.latitude_wgs import SHARED_LATITUDE_LABELS
|
|
5
6
|
|
|
6
7
|
proportion = 1
|
|
7
8
|
tags = ["fr", "geo"]
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
"y",
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
-
|
|
15
|
-
"coordonnee y",
|
|
16
|
-
"latitude lb93",
|
|
17
|
-
"coord y",
|
|
18
|
-
"ycoord",
|
|
19
|
-
"geocodage y gps",
|
|
20
|
-
"location latitude",
|
|
21
|
-
"ylatitude",
|
|
22
|
-
"ylat",
|
|
23
|
-
"latitude (y)",
|
|
24
|
-
"latitudeorg",
|
|
25
|
-
"coordinates.latitude",
|
|
26
|
-
"googlemap latitude",
|
|
27
|
-
"latitudelieu",
|
|
28
|
-
"latitude googlemap",
|
|
29
|
-
]
|
|
9
|
+
mandatory_label = True
|
|
10
|
+
python_type = "float"
|
|
11
|
+
labels = SHARED_LATITUDE_LABELS | {
|
|
12
|
+
"y l93": 1,
|
|
13
|
+
"latitude lb93": 1,
|
|
14
|
+
"lamby": 1,
|
|
15
|
+
}
|
|
30
16
|
|
|
31
17
|
_latitudel93 = LatitudeL93()
|
|
32
18
|
|
|
@@ -2,37 +2,32 @@ from csv_detective.formats.float import _is as is_float
|
|
|
2
2
|
|
|
3
3
|
proportion = 1
|
|
4
4
|
tags = ["geo"]
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
"
|
|
9
|
-
"
|
|
10
|
-
"
|
|
11
|
-
"
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
"
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
|
|
25
|
-
"y wgs84",
|
|
26
|
-
"latitude (wgs84)",
|
|
27
|
-
]
|
|
5
|
+
mandatory_label = True
|
|
6
|
+
python_type = "float"
|
|
7
|
+
SHARED_LATITUDE_LABELS = {
|
|
8
|
+
"latitude": 1,
|
|
9
|
+
"lat": 0.75,
|
|
10
|
+
"y": 0.5,
|
|
11
|
+
"yf": 0.5,
|
|
12
|
+
"yd": 0.5,
|
|
13
|
+
"coordonnee y": 1,
|
|
14
|
+
"coord y": 1,
|
|
15
|
+
"ycoord": 1,
|
|
16
|
+
"ylat": 1,
|
|
17
|
+
}
|
|
18
|
+
labels = SHARED_LATITUDE_LABELS | {
|
|
19
|
+
"y gps": 1,
|
|
20
|
+
"latitude wgs84": 1,
|
|
21
|
+
"y wgs84": 1,
|
|
22
|
+
"wsg": 0.75,
|
|
23
|
+
"gps": 0.5,
|
|
24
|
+
}
|
|
28
25
|
|
|
29
26
|
|
|
30
27
|
def _is(val):
|
|
31
28
|
try:
|
|
32
29
|
return is_float(val) and float(val) >= -90 and float(val) <= 90
|
|
33
|
-
except
|
|
34
|
-
return False
|
|
35
|
-
except OverflowError:
|
|
30
|
+
except Exception:
|
|
36
31
|
return False
|
|
37
32
|
|
|
38
33
|
|
|
@@ -1,38 +1,16 @@
|
|
|
1
1
|
from csv_detective.formats.float import _is as is_float
|
|
2
|
+
from csv_detective.formats.latitude_wgs import labels # noqa
|
|
2
3
|
|
|
3
4
|
proportion = 1
|
|
4
5
|
tags = ["fr", "geo"]
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
"lat",
|
|
8
|
-
"y",
|
|
9
|
-
"yf",
|
|
10
|
-
"yd",
|
|
11
|
-
"coordonnee y",
|
|
12
|
-
"coord y",
|
|
13
|
-
"ycoord",
|
|
14
|
-
"geocodage y gps",
|
|
15
|
-
"location latitude",
|
|
16
|
-
"ylatitude",
|
|
17
|
-
"ylat",
|
|
18
|
-
"latitude (y)",
|
|
19
|
-
"latitudeorg",
|
|
20
|
-
"coordinates.latitude",
|
|
21
|
-
"googlemap latitude",
|
|
22
|
-
"latitudelieu",
|
|
23
|
-
"latitude googlemap",
|
|
24
|
-
"latitude wgs84",
|
|
25
|
-
"y wgs84",
|
|
26
|
-
"latitude (wgs84)",
|
|
27
|
-
]
|
|
6
|
+
mandatory_label = True
|
|
7
|
+
python_type = "float"
|
|
28
8
|
|
|
29
9
|
|
|
30
10
|
def _is(val):
|
|
31
11
|
try:
|
|
32
12
|
return is_float(val) and float(val) >= 41.3 and float(val) <= 51.3
|
|
33
|
-
except
|
|
34
|
-
return False
|
|
35
|
-
except OverflowError:
|
|
13
|
+
except Exception:
|
|
36
14
|
return False
|
|
37
15
|
|
|
38
16
|
|
|
@@ -3,37 +3,38 @@ from csv_detective.formats.longitude_wgs import _is as is_lon
|
|
|
3
3
|
|
|
4
4
|
proportion = 1
|
|
5
5
|
tags = ["geo"]
|
|
6
|
+
mandatory_label = True
|
|
7
|
+
|
|
8
|
+
SHARED_COORDS_LABELS = {
|
|
9
|
+
"ban": 1,
|
|
10
|
+
"coordinates": 1,
|
|
11
|
+
"coordonnees": 1,
|
|
12
|
+
"coordonnees insee": 1,
|
|
13
|
+
"geo": 0.5,
|
|
14
|
+
"geopoint": 1,
|
|
15
|
+
"geoloc": 1,
|
|
16
|
+
"geolocalisation": 1,
|
|
17
|
+
"geom": 0.75,
|
|
18
|
+
"geometry": 1,
|
|
19
|
+
"gps": 1,
|
|
20
|
+
"localisation": 1,
|
|
21
|
+
"point": 1,
|
|
22
|
+
"position": 1,
|
|
23
|
+
"wgs84": 1,
|
|
24
|
+
}
|
|
6
25
|
|
|
7
|
-
|
|
8
|
-
"
|
|
9
|
-
"
|
|
10
|
-
"
|
|
11
|
-
"
|
|
12
|
-
|
|
13
|
-
"geopoint",
|
|
14
|
-
"geoloc",
|
|
15
|
-
"geolocalisation",
|
|
16
|
-
"geom",
|
|
17
|
-
"geometry",
|
|
18
|
-
"gps",
|
|
19
|
-
"localisation",
|
|
20
|
-
"point",
|
|
21
|
-
"position",
|
|
22
|
-
"wgs84",
|
|
23
|
-
]
|
|
24
|
-
|
|
25
|
-
specific = [
|
|
26
|
-
"latlon",
|
|
27
|
-
"lat lon",
|
|
28
|
-
"x y",
|
|
29
|
-
"xy",
|
|
30
|
-
]
|
|
26
|
+
specific = {
|
|
27
|
+
"latlon": 1,
|
|
28
|
+
"lat lon": 1,
|
|
29
|
+
"x y": 0.75,
|
|
30
|
+
"xy": 0.75,
|
|
31
|
+
}
|
|
31
32
|
|
|
32
33
|
# we aim wide to catch exact matches if possible for the highest possible score
|
|
33
34
|
labels = (
|
|
34
35
|
SHARED_COORDS_LABELS
|
|
35
|
-
|
|
36
|
-
|
|
36
|
+
| specific
|
|
37
|
+
| {w + sep + suf: 1 for suf in specific for w in SHARED_COORDS_LABELS for sep in ["", " "]}
|
|
37
38
|
)
|
|
38
39
|
|
|
39
40
|
|
|
@@ -2,22 +2,17 @@ from frformat import LongitudeL93
|
|
|
2
2
|
|
|
3
3
|
from csv_detective.formats.float import _is as is_float
|
|
4
4
|
from csv_detective.formats.float import float_casting
|
|
5
|
+
from csv_detective.formats.longitude_wgs import SHARED_LONGITUDE_LABELS
|
|
5
6
|
|
|
6
7
|
proportion = 1
|
|
7
8
|
tags = ["fr", "geo"]
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
"
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
-
|
|
15
|
-
"lng",
|
|
16
|
-
"xlong",
|
|
17
|
-
"x",
|
|
18
|
-
"xf",
|
|
19
|
-
"xd",
|
|
20
|
-
]
|
|
9
|
+
mandatory_label = True
|
|
10
|
+
python_type = "float"
|
|
11
|
+
labels = SHARED_LONGITUDE_LABELS | {
|
|
12
|
+
"x l93": 1,
|
|
13
|
+
"longitude lb93": 1,
|
|
14
|
+
"lambx": 1,
|
|
15
|
+
}
|
|
21
16
|
|
|
22
17
|
_longitudel93 = LongitudeL93()
|
|
23
18
|
|
|
@@ -2,27 +2,35 @@ from csv_detective.formats.float import _is as is_float
|
|
|
2
2
|
|
|
3
3
|
proportion = 1
|
|
4
4
|
tags = ["geo"]
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
"
|
|
9
|
-
"
|
|
10
|
-
"
|
|
11
|
-
"
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
"
|
|
17
|
-
|
|
5
|
+
mandatory_label = True
|
|
6
|
+
python_type = "float"
|
|
7
|
+
SHARED_LONGITUDE_LABELS = {
|
|
8
|
+
"longitude": 1,
|
|
9
|
+
"long": 0.75,
|
|
10
|
+
"lon": 0.75,
|
|
11
|
+
"lng": 0.5,
|
|
12
|
+
"x": 0.5,
|
|
13
|
+
"xf": 0.5,
|
|
14
|
+
"xd": 0.5,
|
|
15
|
+
"coordonnee x": 1,
|
|
16
|
+
"coord x": 1,
|
|
17
|
+
"xcoord": 1,
|
|
18
|
+
"xlon": 1,
|
|
19
|
+
"xlong": 1,
|
|
20
|
+
}
|
|
21
|
+
labels = SHARED_LONGITUDE_LABELS | {
|
|
22
|
+
"x gps": 1,
|
|
23
|
+
"longitude wgs84": 1,
|
|
24
|
+
"x wgs84": 1,
|
|
25
|
+
"wsg": 0.75,
|
|
26
|
+
"gps": 0.5,
|
|
27
|
+
}
|
|
18
28
|
|
|
19
29
|
|
|
20
30
|
def _is(val):
|
|
21
31
|
try:
|
|
22
32
|
return is_float(val) and float(val) >= -180 and float(val) <= 180
|
|
23
|
-
except
|
|
24
|
-
return False
|
|
25
|
-
except OverflowError:
|
|
33
|
+
except Exception:
|
|
26
34
|
return False
|
|
27
35
|
|
|
28
36
|
|
|
@@ -1,28 +1,16 @@
|
|
|
1
1
|
from csv_detective.formats.float import _is as is_float
|
|
2
|
+
from csv_detective.formats.longitude_wgs import labels # noqa
|
|
2
3
|
|
|
3
4
|
proportion = 1
|
|
4
5
|
tags = ["fr", "geo"]
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
"lon",
|
|
8
|
-
"long",
|
|
9
|
-
"geocodage x gps",
|
|
10
|
-
"location longitude",
|
|
11
|
-
"xlongitude",
|
|
12
|
-
"lng",
|
|
13
|
-
"xlong",
|
|
14
|
-
"x",
|
|
15
|
-
"xf",
|
|
16
|
-
"xd",
|
|
17
|
-
]
|
|
6
|
+
mandatory_label = True
|
|
7
|
+
python_type = "float"
|
|
18
8
|
|
|
19
9
|
|
|
20
10
|
def _is(val):
|
|
21
11
|
try:
|
|
22
12
|
return is_float(val) and float(val) >= -5.5 and float(val) <= 9.8
|
|
23
|
-
except
|
|
24
|
-
return False
|
|
25
|
-
except OverflowError:
|
|
13
|
+
except Exception:
|
|
26
14
|
return False
|
|
27
15
|
|
|
28
16
|
|
|
@@ -4,19 +4,20 @@ from csv_detective.formats.longitude_wgs import _is as is_lon
|
|
|
4
4
|
|
|
5
5
|
proportion = 1
|
|
6
6
|
tags = ["geo"]
|
|
7
|
+
mandatory_label = True
|
|
7
8
|
|
|
8
|
-
specific =
|
|
9
|
-
"lonlat",
|
|
10
|
-
"lon lat",
|
|
11
|
-
"y x",
|
|
12
|
-
"yx",
|
|
13
|
-
|
|
9
|
+
specific = {
|
|
10
|
+
"lonlat": 1,
|
|
11
|
+
"lon lat": 1,
|
|
12
|
+
"y x": 0.75,
|
|
13
|
+
"yx": 0.75,
|
|
14
|
+
}
|
|
14
15
|
|
|
15
16
|
# we aim wide to catch exact matches if possible for the highest possible score
|
|
16
|
-
|
|
17
|
+
labels = (
|
|
17
18
|
SHARED_COORDS_LABELS
|
|
18
|
-
|
|
19
|
-
|
|
19
|
+
| specific
|
|
20
|
+
| {w + sep + suf: 1 for suf in specific for w in SHARED_COORDS_LABELS for sep in ["", " "]}
|
|
20
21
|
)
|
|
21
22
|
|
|
22
23
|
|
csv_detective/formats/money.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from csv_detective.formats.float import _is as is_float
|
|
2
2
|
|
|
3
3
|
proportion = 0.8
|
|
4
|
-
labels =
|
|
4
|
+
labels = {"budget": 1, "salaire": 1, "euro": 1, "euros": 1, "prêt": 1, "montant": 1}
|
|
5
5
|
|
|
6
6
|
currencies = {"€", "$", "£", "¥"}
|
|
7
7
|
|
csv_detective/formats/pays.py
CHANGED
|
@@ -2,19 +2,17 @@ from frformat import Millesime, Options, Pays
|
|
|
2
2
|
|
|
3
3
|
proportion = 0.6
|
|
4
4
|
tags = ["fr", "geo"]
|
|
5
|
-
labels =
|
|
6
|
-
"pays",
|
|
7
|
-
"payslieu",
|
|
8
|
-
"paysorg",
|
|
9
|
-
"country",
|
|
10
|
-
"pays lib",
|
|
11
|
-
"lieupays",
|
|
12
|
-
"pays beneficiaire",
|
|
13
|
-
"nom du pays",
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
"journey end country",
|
|
17
|
-
]
|
|
5
|
+
labels = {
|
|
6
|
+
"pays": 1,
|
|
7
|
+
"payslieu": 1,
|
|
8
|
+
"paysorg": 1,
|
|
9
|
+
"country": 1,
|
|
10
|
+
"pays lib": 1,
|
|
11
|
+
"lieupays": 1,
|
|
12
|
+
"pays beneficiaire": 1,
|
|
13
|
+
"nom du pays": 1,
|
|
14
|
+
"libelle pays": 1,
|
|
15
|
+
}
|
|
18
16
|
|
|
19
17
|
_options = Options(
|
|
20
18
|
ignore_case=True,
|
csv_detective/formats/percent.py
CHANGED
csv_detective/formats/region.py
CHANGED
|
@@ -2,19 +2,19 @@ from frformat import Millesime, Options, Region
|
|
|
2
2
|
|
|
3
3
|
proportion = 1
|
|
4
4
|
tags = ["fr", "geo"]
|
|
5
|
-
labels =
|
|
6
|
-
"region",
|
|
7
|
-
"libelle region",
|
|
8
|
-
"nom region",
|
|
9
|
-
"libelle reg",
|
|
10
|
-
"nom reg",
|
|
11
|
-
"reg libusage",
|
|
12
|
-
"nom de la region",
|
|
13
|
-
"regionorg",
|
|
14
|
-
"regionlieu",
|
|
15
|
-
"reg",
|
|
16
|
-
"nom officiel region",
|
|
17
|
-
|
|
5
|
+
labels = {
|
|
6
|
+
"region": 1,
|
|
7
|
+
"libelle region": 1,
|
|
8
|
+
"nom region": 1,
|
|
9
|
+
"libelle reg": 1,
|
|
10
|
+
"nom reg": 1,
|
|
11
|
+
"reg libusage": 1,
|
|
12
|
+
"nom de la region": 1,
|
|
13
|
+
"regionorg": 1,
|
|
14
|
+
"regionlieu": 1,
|
|
15
|
+
"reg": 0.5,
|
|
16
|
+
"nom officiel region": 1,
|
|
17
|
+
}
|
|
18
18
|
|
|
19
19
|
_extra_valid_values_set = frozenset(
|
|
20
20
|
{
|
csv_detective/formats/sexe.py
CHANGED
csv_detective/formats/siren.py
CHANGED
|
@@ -2,16 +2,15 @@ import re
|
|
|
2
2
|
|
|
3
3
|
proportion = 0.9
|
|
4
4
|
tags = ["fr"]
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
"siren
|
|
8
|
-
"siren
|
|
9
|
-
"
|
|
10
|
-
"siren
|
|
11
|
-
"siren
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
]
|
|
5
|
+
mandatory_label = True
|
|
6
|
+
labels = {
|
|
7
|
+
"siren": 1,
|
|
8
|
+
"n° siren": 1,
|
|
9
|
+
"siren organisme": 1,
|
|
10
|
+
"siren titulaire": 1,
|
|
11
|
+
"numero siren": 1,
|
|
12
|
+
"epci": 1,
|
|
13
|
+
}
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
def _is(val):
|
csv_detective/formats/siret.py
CHANGED
|
@@ -2,15 +2,15 @@ import re
|
|
|
2
2
|
|
|
3
3
|
proportion = 0.8
|
|
4
4
|
tags = ["fr"]
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
"siret
|
|
8
|
-
"num siret",
|
|
9
|
-
"siretacheteur",
|
|
10
|
-
"n° siret",
|
|
11
|
-
"coll siret",
|
|
12
|
-
"epci",
|
|
13
|
-
|
|
5
|
+
mandatory_label = True
|
|
6
|
+
labels = {
|
|
7
|
+
"siret": 1,
|
|
8
|
+
"num siret": 1,
|
|
9
|
+
"siretacheteur": 1,
|
|
10
|
+
"n° siret": 1,
|
|
11
|
+
"coll siret": 1,
|
|
12
|
+
"epci": 1,
|
|
13
|
+
}
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def _is(val):
|
csv_detective/formats/tel_fr.py
CHANGED
|
@@ -2,19 +2,13 @@ import re
|
|
|
2
2
|
|
|
3
3
|
proportion = 0.7
|
|
4
4
|
tags = ["fr"]
|
|
5
|
-
labels =
|
|
6
|
-
"telephone",
|
|
7
|
-
"tel",
|
|
8
|
-
"
|
|
9
|
-
"
|
|
10
|
-
"
|
|
11
|
-
|
|
12
|
-
"tel mob",
|
|
13
|
-
"telephone sav",
|
|
14
|
-
"telephone1",
|
|
15
|
-
"coordinates.phone",
|
|
16
|
-
"telephone du lieu",
|
|
17
|
-
]
|
|
5
|
+
labels = {
|
|
6
|
+
"telephone": 1,
|
|
7
|
+
"tel": 1,
|
|
8
|
+
"phone": 1,
|
|
9
|
+
"num tel": 1,
|
|
10
|
+
"tel mob": 1,
|
|
11
|
+
}
|
|
18
12
|
|
|
19
13
|
|
|
20
14
|
def _is(val):
|