csv-detective 0.10.1.dev2590__py3-none-any.whl → 0.10.1.dev2599__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/format.py +3 -3
- csv_detective/formats/adresse.py +9 -9
- csv_detective/formats/binary.py +1 -1
- csv_detective/formats/booleen.py +2 -2
- csv_detective/formats/code_commune_insee.py +11 -10
- csv_detective/formats/code_csp_insee.py +1 -1
- csv_detective/formats/code_departement.py +7 -7
- csv_detective/formats/code_fantoir.py +5 -5
- csv_detective/formats/code_import.py +1 -1
- csv_detective/formats/code_postal.py +9 -9
- csv_detective/formats/code_region.py +6 -6
- csv_detective/formats/code_rna.py +7 -6
- csv_detective/formats/code_waldec.py +1 -1
- csv_detective/formats/commune.py +5 -5
- csv_detective/formats/csp_insee.py +6 -5
- csv_detective/formats/date.py +17 -17
- csv_detective/formats/date_fr.py +1 -1
- csv_detective/formats/datetime_aware.py +1 -1
- csv_detective/formats/departement.py +15 -15
- csv_detective/formats/email.py +13 -13
- csv_detective/formats/float.py +1 -1
- csv_detective/formats/geojson.py +9 -10
- csv_detective/formats/insee_ape700.py +10 -8
- csv_detective/formats/insee_canton.py +6 -6
- csv_detective/formats/int.py +1 -1
- csv_detective/formats/iso_country_code_alpha2.py +10 -9
- csv_detective/formats/iso_country_code_alpha3.py +2 -9
- csv_detective/formats/iso_country_code_numeric.py +2 -9
- csv_detective/formats/jour_de_la_semaine.py +11 -12
- csv_detective/formats/json.py +5 -0
- csv_detective/formats/latitude_l93.py +6 -22
- csv_detective/formats/latitude_wgs.py +19 -26
- csv_detective/formats/latitude_wgs_fr_metropole.py +2 -26
- csv_detective/formats/latlon_wgs.py +26 -26
- csv_detective/formats/longitude_l93.py +6 -13
- csv_detective/formats/longitude_wgs.py +22 -16
- csv_detective/formats/longitude_wgs_fr_metropole.py +2 -16
- csv_detective/formats/lonlat_wgs.py +9 -9
- csv_detective/formats/mois_de_lannee.py +1 -1
- csv_detective/formats/money.py +1 -1
- csv_detective/formats/mongo_object_id.py +1 -1
- csv_detective/formats/pays.py +11 -13
- csv_detective/formats/percent.py +1 -1
- csv_detective/formats/region.py +13 -13
- csv_detective/formats/sexe.py +1 -1
- csv_detective/formats/siren.py +8 -10
- csv_detective/formats/siret.py +8 -9
- csv_detective/formats/tel_fr.py +7 -13
- csv_detective/formats/uai.py +17 -18
- csv_detective/formats/url.py +16 -16
- csv_detective/formats/username.py +1 -1
- csv_detective/formats/uuid.py +1 -1
- csv_detective/formats/year.py +6 -12
- csv_detective/parsing/text.py +13 -12
- {csv_detective-0.10.1.dev2590.dist-info → csv_detective-0.10.1.dev2599.dist-info}/METADATA +1 -1
- csv_detective-0.10.1.dev2599.dist-info/RECORD +92 -0
- {csv_detective-0.10.1.dev2590.dist-info → csv_detective-0.10.1.dev2599.dist-info}/WHEEL +1 -1
- csv_detective-0.10.1.dev2590.dist-info/RECORD +0 -92
- {csv_detective-0.10.1.dev2590.dist-info → csv_detective-0.10.1.dev2599.dist-info}/entry_points.txt +0 -0
csv_detective/format.py
CHANGED
|
@@ -9,7 +9,7 @@ class Format:
|
|
|
9
9
|
name: str,
|
|
10
10
|
func: Callable[[Any], bool],
|
|
11
11
|
_test_values: dict[bool, list[str]],
|
|
12
|
-
labels:
|
|
12
|
+
labels: dict[str, float] = {},
|
|
13
13
|
proportion: float = 1,
|
|
14
14
|
tags: list[str] = [],
|
|
15
15
|
mandatory_label: bool = False,
|
|
@@ -22,14 +22,14 @@ class Format:
|
|
|
22
22
|
name: the name of the format.
|
|
23
23
|
func: the value test for the format (returns whether a string is valid).
|
|
24
24
|
_test_values: lists of valid and invalid values, used in the tests
|
|
25
|
-
labels: the
|
|
25
|
+
labels: the dict of hint headers and their credibilty for the header score (NB: credibility is relative witin a single format, should be used to rank the valid labels)
|
|
26
26
|
proportion: the tolerance (between 0 and 1) to say a column is valid for a format. (1 => 100% of the column has to pass the func check for the column to be considered valid)
|
|
27
27
|
tags: to allow users to submit a file to only a subset of formats
|
|
28
28
|
"""
|
|
29
29
|
self.name: str = name
|
|
30
30
|
self.func: Callable = func
|
|
31
31
|
self._test_values: dict[bool, list[str]] = _test_values
|
|
32
|
-
self.labels:
|
|
32
|
+
self.labels: dict[str, float] = labels
|
|
33
33
|
self.proportion: float = proportion
|
|
34
34
|
self.tags: list[str] = tags
|
|
35
35
|
self.mandatory_label: bool = mandatory_label
|
csv_detective/formats/adresse.py
CHANGED
|
@@ -2,15 +2,15 @@ from csv_detective.parsing.text import _process_text
|
|
|
2
2
|
|
|
3
3
|
proportion = 0.55
|
|
4
4
|
tags = ["fr", "geo"]
|
|
5
|
-
labels =
|
|
6
|
-
"adresse",
|
|
7
|
-
"localisation",
|
|
8
|
-
"adresse postale",
|
|
9
|
-
"adresse geographique",
|
|
10
|
-
"adr",
|
|
11
|
-
"adresse complete",
|
|
12
|
-
"adresse station",
|
|
13
|
-
|
|
5
|
+
labels = {
|
|
6
|
+
"adresse": 1,
|
|
7
|
+
"localisation": 1,
|
|
8
|
+
"adresse postale": 1,
|
|
9
|
+
"adresse geographique": 1,
|
|
10
|
+
"adr": 0.5,
|
|
11
|
+
"adresse complete": 1,
|
|
12
|
+
"adresse station": 1,
|
|
13
|
+
}
|
|
14
14
|
|
|
15
15
|
voies = {
|
|
16
16
|
"aire ",
|
csv_detective/formats/binary.py
CHANGED
csv_detective/formats/booleen.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
proportion = 1
|
|
2
2
|
tags = ["type"]
|
|
3
3
|
python_type = "bool"
|
|
4
|
-
labels =
|
|
4
|
+
labels = {"is ": 1, "has ": 1, "est ": 1}
|
|
5
5
|
|
|
6
6
|
bool_mapping = {
|
|
7
7
|
"1": True,
|
|
@@ -22,7 +22,7 @@ bool_mapping = {
|
|
|
22
22
|
liste_bool = set(bool_mapping.keys())
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
def bool_casting(val: str) -> bool:
|
|
25
|
+
def bool_casting(val: str) -> bool | None:
|
|
26
26
|
return bool_mapping.get(val.lower())
|
|
27
27
|
|
|
28
28
|
|
|
@@ -3,16 +3,17 @@ from frformat import CodeCommuneInsee, Millesime
|
|
|
3
3
|
proportion = 0.75
|
|
4
4
|
tags = ["fr", "geo"]
|
|
5
5
|
mandatory_label = True
|
|
6
|
-
labels =
|
|
7
|
-
"code commune insee",
|
|
8
|
-
"code insee",
|
|
9
|
-
"codes insee",
|
|
10
|
-
"code commune",
|
|
11
|
-
"code insee commune",
|
|
12
|
-
"insee",
|
|
13
|
-
"code com",
|
|
14
|
-
"com",
|
|
15
|
-
|
|
6
|
+
labels = {
|
|
7
|
+
"code commune insee": 1,
|
|
8
|
+
"code insee": 1,
|
|
9
|
+
"codes insee": 1,
|
|
10
|
+
"code commune": 1,
|
|
11
|
+
"code insee commune": 1,
|
|
12
|
+
"insee": 0.75,
|
|
13
|
+
"code com": 1,
|
|
14
|
+
"com": 0.5,
|
|
15
|
+
"code": 0.5,
|
|
16
|
+
}
|
|
16
17
|
|
|
17
18
|
_code_commune_insee = CodeCommuneInsee(Millesime.LATEST)
|
|
18
19
|
|
|
@@ -3,13 +3,13 @@ from frformat import Millesime, NumeroDepartement, Options
|
|
|
3
3
|
proportion = 1
|
|
4
4
|
tags = ["fr", "geo"]
|
|
5
5
|
mandatory_label = True
|
|
6
|
-
labels =
|
|
7
|
-
"code departement",
|
|
8
|
-
"code_departement",
|
|
9
|
-
"dep",
|
|
10
|
-
"departement",
|
|
11
|
-
"dept",
|
|
12
|
-
|
|
6
|
+
labels = {
|
|
7
|
+
"code departement": 1,
|
|
8
|
+
"code_departement": 1,
|
|
9
|
+
"dep": 0.5,
|
|
10
|
+
"departement": 1,
|
|
11
|
+
"dept": 0.75,
|
|
12
|
+
}
|
|
13
13
|
|
|
14
14
|
_options = Options(
|
|
15
15
|
ignore_case=True,
|
|
@@ -3,11 +3,11 @@ from frformat import CodeFantoir
|
|
|
3
3
|
proportion = 1
|
|
4
4
|
tags = ["fr", "geo"]
|
|
5
5
|
mandatory_label = True
|
|
6
|
-
labels =
|
|
7
|
-
"cadastre1",
|
|
8
|
-
"code fantoir",
|
|
9
|
-
"fantoir",
|
|
10
|
-
|
|
6
|
+
labels = {
|
|
7
|
+
"cadastre1": 1,
|
|
8
|
+
"code fantoir": 1,
|
|
9
|
+
"fantoir": 1,
|
|
10
|
+
}
|
|
11
11
|
|
|
12
12
|
_code_fantoir = CodeFantoir()
|
|
13
13
|
|
|
@@ -3,15 +3,15 @@ from frformat import CodePostal
|
|
|
3
3
|
proportion = 0.9
|
|
4
4
|
tags = ["fr", "geo"]
|
|
5
5
|
mandatory_label = True
|
|
6
|
-
labels =
|
|
7
|
-
"code postal",
|
|
8
|
-
"postal code",
|
|
9
|
-
"postcode",
|
|
10
|
-
"post code",
|
|
11
|
-
"cp",
|
|
12
|
-
"codes postaux",
|
|
13
|
-
"location postcode",
|
|
14
|
-
|
|
6
|
+
labels = {
|
|
7
|
+
"code postal": 1,
|
|
8
|
+
"postal code": 1,
|
|
9
|
+
"postcode": 1,
|
|
10
|
+
"post code": 1,
|
|
11
|
+
"cp": 0.5,
|
|
12
|
+
"codes postaux": 1,
|
|
13
|
+
"location postcode": 1,
|
|
14
|
+
}
|
|
15
15
|
|
|
16
16
|
_code_postal = CodePostal()
|
|
17
17
|
|
|
@@ -3,12 +3,12 @@ from frformat import CodeRegion, Millesime
|
|
|
3
3
|
proportion = 1
|
|
4
4
|
tags = ["fr", "geo"]
|
|
5
5
|
mandatory_label = True
|
|
6
|
-
labels =
|
|
7
|
-
"code region",
|
|
8
|
-
"reg",
|
|
9
|
-
"code insee region",
|
|
10
|
-
"region",
|
|
11
|
-
|
|
6
|
+
labels = {
|
|
7
|
+
"code region": 1,
|
|
8
|
+
"reg": 0.5,
|
|
9
|
+
"code insee region": 1,
|
|
10
|
+
"region": 1,
|
|
11
|
+
}
|
|
12
12
|
|
|
13
13
|
_code_region = CodeRegion(Millesime.LATEST)
|
|
14
14
|
|
|
@@ -2,12 +2,13 @@ from frformat import CodeRNA
|
|
|
2
2
|
|
|
3
3
|
proportion = 0.9
|
|
4
4
|
tags = ["fr"]
|
|
5
|
-
labels =
|
|
6
|
-
"code rna",
|
|
7
|
-
"rna",
|
|
8
|
-
"n° inscription association",
|
|
9
|
-
"identifiant association",
|
|
10
|
-
|
|
5
|
+
labels = {
|
|
6
|
+
"code rna": 1,
|
|
7
|
+
"rna": 1,
|
|
8
|
+
"n° inscription association": 1,
|
|
9
|
+
"identifiant association": 1,
|
|
10
|
+
"asso": 0.75,
|
|
11
|
+
}
|
|
11
12
|
|
|
12
13
|
_code_rna = CodeRNA()
|
|
13
14
|
|
csv_detective/formats/commune.py
CHANGED
|
@@ -2,11 +2,11 @@ from frformat import Commune, Millesime, Options
|
|
|
2
2
|
|
|
3
3
|
proportion = 0.8
|
|
4
4
|
tags = ["fr", "geo"]
|
|
5
|
-
labels =
|
|
6
|
-
"commune",
|
|
7
|
-
"ville",
|
|
8
|
-
"libelle commune",
|
|
9
|
-
|
|
5
|
+
labels = {
|
|
6
|
+
"commune": 1,
|
|
7
|
+
"ville": 1,
|
|
8
|
+
"libelle commune": 1,
|
|
9
|
+
}
|
|
10
10
|
|
|
11
11
|
_options = Options(
|
|
12
12
|
ignore_case=True,
|
|
@@ -4,11 +4,12 @@ from csv_detective.parsing.text import _process_text
|
|
|
4
4
|
|
|
5
5
|
proportion = 1
|
|
6
6
|
tags = ["fr"]
|
|
7
|
-
labels =
|
|
8
|
-
"csp insee",
|
|
9
|
-
"csp",
|
|
10
|
-
"categorie socioprofessionnelle",
|
|
11
|
-
|
|
7
|
+
labels = {
|
|
8
|
+
"csp insee": 1,
|
|
9
|
+
"csp": 0.75,
|
|
10
|
+
"categorie socioprofessionnelle": 1,
|
|
11
|
+
"sociopro": 1,
|
|
12
|
+
}
|
|
12
13
|
|
|
13
14
|
f = open(join(dirname(__file__), "data", "csp_insee.txt"), "r")
|
|
14
15
|
codes_insee = f.read().split("\n")
|
csv_detective/formats/date.py
CHANGED
|
@@ -8,23 +8,23 @@ from dateutil.parser import parse as dateutil_parser
|
|
|
8
8
|
proportion = 1
|
|
9
9
|
tags = ["temp", "type"]
|
|
10
10
|
python_type = "date"
|
|
11
|
-
SHARED_DATE_LABELS =
|
|
12
|
-
"date",
|
|
13
|
-
"mise à jour",
|
|
14
|
-
"modifie",
|
|
15
|
-
"maj",
|
|
16
|
-
"datemaj",
|
|
17
|
-
"update",
|
|
18
|
-
"created",
|
|
19
|
-
"modified",
|
|
20
|
-
|
|
21
|
-
labels = SHARED_DATE_LABELS
|
|
22
|
-
"jour",
|
|
23
|
-
"periode",
|
|
24
|
-
"dpc",
|
|
25
|
-
"yyyymmdd",
|
|
26
|
-
"aaaammjj",
|
|
27
|
-
|
|
11
|
+
SHARED_DATE_LABELS = {
|
|
12
|
+
"date": 1,
|
|
13
|
+
"mise à jour": 1,
|
|
14
|
+
"modifie": 1,
|
|
15
|
+
"maj": 0.75,
|
|
16
|
+
"datemaj": 1,
|
|
17
|
+
"update": 1,
|
|
18
|
+
"created": 1,
|
|
19
|
+
"modified": 1,
|
|
20
|
+
}
|
|
21
|
+
labels = SHARED_DATE_LABELS | {
|
|
22
|
+
"jour": 0.75,
|
|
23
|
+
"periode": 0.75,
|
|
24
|
+
"dpc": 0.5,
|
|
25
|
+
"yyyymmdd": 1,
|
|
26
|
+
"aaaammjj": 1,
|
|
27
|
+
}
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def date_casting(val: str) -> datetime | None:
|
csv_detective/formats/date_fr.py
CHANGED
|
@@ -5,7 +5,7 @@ from csv_detective.formats.date import SHARED_DATE_LABELS, aaaammjj_pattern, dat
|
|
|
5
5
|
proportion = 1
|
|
6
6
|
tags = ["temp", "type"]
|
|
7
7
|
python_type = "datetime"
|
|
8
|
-
labels = SHARED_DATE_LABELS
|
|
8
|
+
labels = SHARED_DATE_LABELS | {"datetime": 1, "timestamp": 1}
|
|
9
9
|
|
|
10
10
|
threshold = 0.7
|
|
11
11
|
pat = (
|
|
@@ -2,21 +2,21 @@ from frformat import Departement, Millesime, Options
|
|
|
2
2
|
|
|
3
3
|
proportion = 0.9
|
|
4
4
|
tags = ["fr", "geo"]
|
|
5
|
-
labels =
|
|
6
|
-
"departement",
|
|
7
|
-
"libelle du departement",
|
|
8
|
-
"deplib",
|
|
9
|
-
"nom dept",
|
|
10
|
-
"dept",
|
|
11
|
-
"libdepartement",
|
|
12
|
-
"nom departement",
|
|
13
|
-
"libelle dep",
|
|
14
|
-
"libelle departement",
|
|
15
|
-
"lb departements",
|
|
16
|
-
"dep libusage",
|
|
17
|
-
"lb departement",
|
|
18
|
-
"nom dep",
|
|
19
|
-
|
|
5
|
+
labels = {
|
|
6
|
+
"departement": 1,
|
|
7
|
+
"libelle du departement": 1,
|
|
8
|
+
"deplib": 1,
|
|
9
|
+
"nom dept": 1,
|
|
10
|
+
"dept": 0.75,
|
|
11
|
+
"libdepartement": 1,
|
|
12
|
+
"nom departement": 1,
|
|
13
|
+
"libelle dep": 1,
|
|
14
|
+
"libelle departement": 1,
|
|
15
|
+
"lb departements": 1,
|
|
16
|
+
"dep libusage": 1,
|
|
17
|
+
"lb departement": 1,
|
|
18
|
+
"nom dep": 1,
|
|
19
|
+
}
|
|
20
20
|
|
|
21
21
|
_options = Options(
|
|
22
22
|
ignore_case=True,
|
csv_detective/formats/email.py
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
import re
|
|
2
2
|
|
|
3
3
|
proportion = 0.9
|
|
4
|
-
labels =
|
|
5
|
-
"email",
|
|
6
|
-
"mail",
|
|
7
|
-
"courriel",
|
|
8
|
-
"contact",
|
|
9
|
-
"mel",
|
|
10
|
-
"lieucourriel",
|
|
11
|
-
"coordinates.emailcontact",
|
|
12
|
-
"e mail",
|
|
13
|
-
"mo mail",
|
|
14
|
-
"adresse mail",
|
|
15
|
-
"adresse email",
|
|
16
|
-
|
|
4
|
+
labels = {
|
|
5
|
+
"email": 1,
|
|
6
|
+
"mail": 1,
|
|
7
|
+
"courriel": 1,
|
|
8
|
+
"contact": 1,
|
|
9
|
+
"mel": 1,
|
|
10
|
+
"lieucourriel": 1,
|
|
11
|
+
"coordinates.emailcontact": 1,
|
|
12
|
+
"e mail": 1,
|
|
13
|
+
"mo mail": 1,
|
|
14
|
+
"adresse mail": 1,
|
|
15
|
+
"adresse email": 1,
|
|
16
|
+
}
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def _is(val):
|
csv_detective/formats/float.py
CHANGED
csv_detective/formats/geojson.py
CHANGED
|
@@ -3,16 +3,15 @@ import json
|
|
|
3
3
|
proportion = 1
|
|
4
4
|
tags = ["geo"]
|
|
5
5
|
python_type = "json"
|
|
6
|
-
labels =
|
|
7
|
-
"json geojson",
|
|
8
|
-
"json",
|
|
9
|
-
"geojson",
|
|
10
|
-
"geo shape",
|
|
11
|
-
"geom",
|
|
12
|
-
"geometry",
|
|
13
|
-
"
|
|
14
|
-
|
|
15
|
-
]
|
|
6
|
+
labels = {
|
|
7
|
+
"json geojson": 1,
|
|
8
|
+
"json": 1,
|
|
9
|
+
"geojson": 1,
|
|
10
|
+
"geo shape": 1,
|
|
11
|
+
"geom": 0.75,
|
|
12
|
+
"geometry": 1,
|
|
13
|
+
"geoshape": 1,
|
|
14
|
+
}
|
|
16
15
|
|
|
17
16
|
|
|
18
17
|
def _is(val) -> bool:
|
|
@@ -4,14 +4,16 @@ from csv_detective.parsing.text import _process_text
|
|
|
4
4
|
|
|
5
5
|
proportion = 0.8
|
|
6
6
|
tags = ["fr"]
|
|
7
|
-
labels =
|
|
8
|
-
"code ape",
|
|
9
|
-
"code activite (ape)",
|
|
10
|
-
"code naf",
|
|
11
|
-
"code naf organisme designe",
|
|
12
|
-
"code naf organisme designant",
|
|
13
|
-
"base sirene : code ape de l'etablissement siege",
|
|
14
|
-
|
|
7
|
+
labels = {
|
|
8
|
+
"code ape": 1,
|
|
9
|
+
"code activite (ape)": 1,
|
|
10
|
+
"code naf": 1,
|
|
11
|
+
"code naf organisme designe": 1,
|
|
12
|
+
"code naf organisme designant": 1,
|
|
13
|
+
"base sirene : code ape de l'etablissement siege": 1,
|
|
14
|
+
"naf": 0.75,
|
|
15
|
+
"ape": 0.5,
|
|
16
|
+
}
|
|
15
17
|
|
|
16
18
|
f = open(join(dirname(__file__), "data", "insee_ape700.txt"), "r")
|
|
17
19
|
condes_insee_ape = f.read().split("\n")
|
|
@@ -2,12 +2,12 @@ from frformat import Canton, Millesime, Options
|
|
|
2
2
|
|
|
3
3
|
proportion = 0.9
|
|
4
4
|
tags = ["fr", "geo"]
|
|
5
|
-
labels =
|
|
6
|
-
"insee canton",
|
|
7
|
-
"canton",
|
|
8
|
-
"cant",
|
|
9
|
-
"nom canton",
|
|
10
|
-
|
|
5
|
+
labels = {
|
|
6
|
+
"insee canton": 1,
|
|
7
|
+
"canton": 1,
|
|
8
|
+
"cant": 0.5,
|
|
9
|
+
"nom canton": 1,
|
|
10
|
+
}
|
|
11
11
|
|
|
12
12
|
_options = Options(
|
|
13
13
|
ignore_case=True,
|
csv_detective/formats/int.py
CHANGED
|
@@ -3,15 +3,16 @@ from os.path import dirname, join
|
|
|
3
3
|
|
|
4
4
|
proportion = 1
|
|
5
5
|
tags = ["geo"]
|
|
6
|
-
labels =
|
|
7
|
-
"iso country code",
|
|
8
|
-
"code pays",
|
|
9
|
-
"pays",
|
|
10
|
-
"country",
|
|
11
|
-
"nation",
|
|
12
|
-
"pays code",
|
|
13
|
-
"code pays (iso)",
|
|
14
|
-
|
|
6
|
+
labels = {
|
|
7
|
+
"iso country code": 1,
|
|
8
|
+
"code pays": 1,
|
|
9
|
+
"pays": 1,
|
|
10
|
+
"country": 1,
|
|
11
|
+
"nation": 1,
|
|
12
|
+
"pays code": 1,
|
|
13
|
+
"code pays (iso)": 1,
|
|
14
|
+
"code": 0.5,
|
|
15
|
+
}
|
|
15
16
|
|
|
16
17
|
with open(join(dirname(__file__), "data", "iso_country_code_alpha2.txt"), "r") as iofile:
|
|
17
18
|
liste_pays = iofile.read().split("\n")
|
|
@@ -1,17 +1,10 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from os.path import dirname, join
|
|
3
3
|
|
|
4
|
+
from csv_detective.formats.iso_country_code_alpha2 import labels # noqa
|
|
5
|
+
|
|
4
6
|
proportion = 1
|
|
5
7
|
tags = ["geo"]
|
|
6
|
-
labels = [
|
|
7
|
-
"iso country code",
|
|
8
|
-
"code pays",
|
|
9
|
-
"pays",
|
|
10
|
-
"country",
|
|
11
|
-
"nation",
|
|
12
|
-
"pays code",
|
|
13
|
-
"code pays (iso)",
|
|
14
|
-
]
|
|
15
8
|
|
|
16
9
|
with open(join(dirname(__file__), "data", "iso_country_code_alpha3.txt"), "r") as iofile:
|
|
17
10
|
liste_pays = iofile.read().split("\n")
|
|
@@ -1,17 +1,10 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from os.path import dirname, join
|
|
3
3
|
|
|
4
|
+
from csv_detective.formats.iso_country_code_alpha2 import labels # noqa
|
|
5
|
+
|
|
4
6
|
proportion = 1
|
|
5
7
|
tags = ["geo"]
|
|
6
|
-
labels = [
|
|
7
|
-
"iso country code",
|
|
8
|
-
"code pays",
|
|
9
|
-
"pays",
|
|
10
|
-
"country",
|
|
11
|
-
"nation",
|
|
12
|
-
"pays code",
|
|
13
|
-
"code pays (iso)",
|
|
14
|
-
]
|
|
15
8
|
|
|
16
9
|
with open(join(dirname(__file__), "data", "iso_country_code_numeric.txt"), "r") as iofile:
|
|
17
10
|
liste_pays = iofile.read().split("\n")
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
proportion = 0.8
|
|
2
2
|
tags = ["fr", "temp"]
|
|
3
|
-
labels =
|
|
4
|
-
"jour semaine",
|
|
5
|
-
"type jour",
|
|
6
|
-
"jour de la semaine",
|
|
7
|
-
"saufjour",
|
|
8
|
-
"nomjour",
|
|
9
|
-
"jour",
|
|
10
|
-
"jour de fermeture",
|
|
11
|
-
|
|
3
|
+
labels = {
|
|
4
|
+
"jour semaine": 1,
|
|
5
|
+
"type jour": 1,
|
|
6
|
+
"jour de la semaine": 1,
|
|
7
|
+
"saufjour": 1,
|
|
8
|
+
"nomjour": 1,
|
|
9
|
+
"jour": 0.75,
|
|
10
|
+
"jour de fermeture": 1,
|
|
11
|
+
}
|
|
12
12
|
|
|
13
13
|
jours = {
|
|
14
14
|
"lundi",
|
|
@@ -31,11 +31,10 @@ jours = {
|
|
|
31
31
|
def _is(val):
|
|
32
32
|
if not isinstance(val, str):
|
|
33
33
|
return False
|
|
34
|
-
|
|
35
|
-
return val in jours
|
|
34
|
+
return val.lower() in jours
|
|
36
35
|
|
|
37
36
|
|
|
38
37
|
_test_values = {
|
|
39
38
|
True: ["lundi"],
|
|
40
|
-
False: ["jour
|
|
39
|
+
False: ["jour"],
|
|
41
40
|
}
|