csv-detective 0.9.3.dev2258__py3-none-any.whl → 0.9.3.dev2348__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detection/formats.py +12 -15
- csv_detective/explore_csv.py +28 -9
- csv_detective/format.py +67 -0
- csv_detective/formats/__init__.py +9 -0
- csv_detective/{detect_fields/FR/geo/adresse/__init__.py → formats/adresse.py} +116 -100
- csv_detective/{detect_fields/other/booleen/__init__.py → formats/booleen.py} +35 -27
- csv_detective/formats/code_commune_insee.py +26 -0
- csv_detective/{detect_fields/FR/other/code_csp_insee/__init__.py → formats/code_csp_insee.py} +36 -29
- csv_detective/{detect_fields/FR/geo/code_departement/__init__.py → formats/code_departement.py} +29 -15
- csv_detective/formats/code_fantoir.py +21 -0
- csv_detective/{detect_fields/FR/other/code_import/__init__.py → formats/code_import.py} +17 -9
- csv_detective/formats/code_postal.py +25 -0
- csv_detective/formats/code_region.py +22 -0
- csv_detective/formats/code_rna.py +29 -0
- csv_detective/formats/code_waldec.py +17 -0
- csv_detective/{detect_fields/FR/geo/commune/__init__.py → formats/commune.py} +27 -16
- csv_detective/{detect_fields/FR/other/csp_insee/__init__.py → formats/csp_insee.py} +31 -19
- csv_detective/{detect_fields/FR/other/insee_ape700 → formats/data}/insee_ape700.txt +0 -0
- csv_detective/{detect_fields/temp/date/__init__.py → formats/date.py} +99 -62
- csv_detective/formats/date_fr.py +22 -0
- csv_detective/{detect_fields/temp/datetime_aware/__init__.py → formats/datetime_aware.py} +18 -7
- csv_detective/{detect_fields/temp/datetime_naive/__init__.py → formats/datetime_naive.py} +21 -2
- csv_detective/{detect_fields/temp/datetime_rfc822/__init__.py → formats/datetime_rfc822.py} +24 -18
- csv_detective/formats/departement.py +37 -0
- csv_detective/formats/email.py +28 -0
- csv_detective/{detect_fields/other/float/__init__.py → formats/float.py} +29 -21
- csv_detective/formats/geojson.py +36 -0
- csv_detective/{detect_fields/FR/other/insee_ape700/__init__.py → formats/insee_ape700.py} +31 -19
- csv_detective/{detect_fields/FR/geo/insee_canton/__init__.py → formats/insee_canton.py} +28 -15
- csv_detective/{detect_fields/other/int/__init__.py → formats/int.py} +23 -16
- csv_detective/formats/iso_country_code_alpha2.py +30 -0
- csv_detective/formats/iso_country_code_alpha3.py +30 -0
- csv_detective/formats/iso_country_code_numeric.py +31 -0
- csv_detective/{detect_fields/FR/temp/jour_de_la_semaine/__init__.py → formats/jour_de_la_semaine.py} +41 -25
- csv_detective/{detect_fields/other/json/__init__.py → formats/json.py} +20 -14
- csv_detective/formats/latitude_l93.py +48 -0
- csv_detective/formats/latitude_wgs.py +42 -0
- csv_detective/formats/latitude_wgs_fr_metropole.py +42 -0
- csv_detective/formats/latlon_wgs.py +53 -0
- csv_detective/formats/longitude_l93.py +39 -0
- csv_detective/formats/longitude_wgs.py +32 -0
- csv_detective/formats/longitude_wgs_fr_metropole.py +32 -0
- csv_detective/formats/lonlat_wgs.py +36 -0
- csv_detective/{detect_fields/FR/temp/mois_de_annee/__init__.py → formats/mois_de_lannee.py} +48 -39
- csv_detective/formats/money.py +18 -0
- csv_detective/formats/mongo_object_id.py +14 -0
- csv_detective/formats/pays.py +35 -0
- csv_detective/formats/percent.py +16 -0
- csv_detective/{detect_fields/FR/geo/region/__init__.py → formats/region.py} +70 -50
- csv_detective/formats/sexe.py +17 -0
- csv_detective/{detect_fields/FR/other/siren/__init__.py → formats/siren.py} +37 -20
- csv_detective/{detect_fields/FR/other/siret/__init__.py → formats/siret.py} +47 -31
- csv_detective/formats/tel_fr.py +36 -0
- csv_detective/formats/uai.py +36 -0
- csv_detective/formats/url.py +45 -0
- csv_detective/formats/username.py +14 -0
- csv_detective/formats/uuid.py +16 -0
- csv_detective/formats/year.py +28 -0
- csv_detective/output/__init__.py +3 -4
- csv_detective/output/dataframe.py +3 -3
- csv_detective/output/profile.py +2 -3
- csv_detective/output/schema.py +2 -2
- csv_detective/parsing/columns.py +35 -50
- csv_detective/parsing/csv.py +2 -2
- csv_detective/parsing/load.py +4 -5
- csv_detective/validate.py +9 -4
- {csv_detective-0.9.3.dev2258.dist-info → csv_detective-0.9.3.dev2348.dist-info}/METADATA +6 -5
- csv_detective-0.9.3.dev2348.dist-info/RECORD +102 -0
- tests/test_fields.py +39 -364
- tests/test_file.py +1 -1
- tests/test_labels.py +5 -3
- tests/test_structure.py +40 -36
- csv_detective/detect_fields/FR/__init__.py +0 -0
- csv_detective/detect_fields/FR/geo/__init__.py +0 -0
- csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py +0 -9
- csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py +0 -9
- csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -9
- csv_detective/detect_fields/FR/geo/code_region/__init__.py +0 -10
- csv_detective/detect_fields/FR/geo/departement/__init__.py +0 -16
- csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +0 -19
- csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +0 -19
- csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -13
- csv_detective/detect_fields/FR/geo/pays/__init__.py +0 -16
- csv_detective/detect_fields/FR/other/__init__.py +0 -0
- csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt +0 -498
- csv_detective/detect_fields/FR/other/code_rna/__init__.py +0 -9
- csv_detective/detect_fields/FR/other/code_waldec/__init__.py +0 -9
- csv_detective/detect_fields/FR/other/date_fr/__init__.py +0 -12
- csv_detective/detect_fields/FR/other/sexe/__init__.py +0 -11
- csv_detective/detect_fields/FR/other/tel_fr/__init__.py +0 -17
- csv_detective/detect_fields/FR/other/uai/__init__.py +0 -15
- csv_detective/detect_fields/FR/temp/__init__.py +0 -0
- csv_detective/detect_fields/__init__.py +0 -112
- csv_detective/detect_fields/geo/__init__.py +0 -0
- csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +0 -15
- csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +0 -14
- csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +0 -15
- csv_detective/detect_fields/geo/json_geojson/__init__.py +0 -18
- csv_detective/detect_fields/geo/latitude_wgs/__init__.py +0 -13
- csv_detective/detect_fields/geo/latlon_wgs/__init__.py +0 -16
- csv_detective/detect_fields/geo/longitude_wgs/__init__.py +0 -13
- csv_detective/detect_fields/geo/lonlat_wgs/__init__.py +0 -16
- csv_detective/detect_fields/other/__init__.py +0 -0
- csv_detective/detect_fields/other/email/__init__.py +0 -10
- csv_detective/detect_fields/other/money/__init__.py +0 -11
- csv_detective/detect_fields/other/mongo_object_id/__init__.py +0 -8
- csv_detective/detect_fields/other/percent/__init__.py +0 -9
- csv_detective/detect_fields/other/twitter/__init__.py +0 -8
- csv_detective/detect_fields/other/url/__init__.py +0 -14
- csv_detective/detect_fields/other/uuid/__init__.py +0 -10
- csv_detective/detect_fields/temp/__init__.py +0 -0
- csv_detective/detect_fields/temp/year/__init__.py +0 -10
- csv_detective/detect_labels/FR/__init__.py +0 -0
- csv_detective/detect_labels/FR/geo/__init__.py +0 -0
- csv_detective/detect_labels/FR/geo/adresse/__init__.py +0 -15
- csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +0 -17
- csv_detective/detect_labels/FR/geo/code_departement/__init__.py +0 -15
- csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +0 -12
- csv_detective/detect_labels/FR/geo/code_postal/__init__.py +0 -16
- csv_detective/detect_labels/FR/geo/code_region/__init__.py +0 -14
- csv_detective/detect_labels/FR/geo/commune/__init__.py +0 -12
- csv_detective/detect_labels/FR/geo/departement/__init__.py +0 -22
- csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +0 -13
- csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +0 -30
- csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -30
- csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +0 -21
- csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -21
- csv_detective/detect_labels/FR/geo/pays/__init__.py +0 -20
- csv_detective/detect_labels/FR/geo/region/__init__.py +0 -20
- csv_detective/detect_labels/FR/other/__init__.py +0 -0
- csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +0 -8
- csv_detective/detect_labels/FR/other/code_rna/__init__.py +0 -13
- csv_detective/detect_labels/FR/other/code_waldec/__init__.py +0 -8
- csv_detective/detect_labels/FR/other/csp_insee/__init__.py +0 -13
- csv_detective/detect_labels/FR/other/date_fr/__init__.py +0 -9
- csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +0 -15
- csv_detective/detect_labels/FR/other/sexe/__init__.py +0 -8
- csv_detective/detect_labels/FR/other/siren/__init__.py +0 -17
- csv_detective/detect_labels/FR/other/siret/__init__.py +0 -16
- csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -20
- csv_detective/detect_labels/FR/other/uai/__init__.py +0 -25
- csv_detective/detect_labels/FR/temp/__init__.py +0 -0
- csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +0 -16
- csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +0 -8
- csv_detective/detect_labels/__init__.py +0 -94
- csv_detective/detect_labels/geo/__init__.py +0 -0
- csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +0 -16
- csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +0 -16
- csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +0 -16
- csv_detective/detect_labels/geo/json_geojson/__init__.py +0 -17
- csv_detective/detect_labels/geo/latitude_wgs/__init__.py +0 -30
- csv_detective/detect_labels/geo/latlon_wgs/__init__.py +0 -39
- csv_detective/detect_labels/geo/longitude_wgs/__init__.py +0 -21
- csv_detective/detect_labels/geo/lonlat_wgs/__init__.py +0 -23
- csv_detective/detect_labels/other/__init__.py +0 -0
- csv_detective/detect_labels/other/booleen/__init__.py +0 -8
- csv_detective/detect_labels/other/email/__init__.py +0 -20
- csv_detective/detect_labels/other/float/__init__.py +0 -8
- csv_detective/detect_labels/other/int/__init__.py +0 -8
- csv_detective/detect_labels/other/money/__init__.py +0 -8
- csv_detective/detect_labels/other/mongo_object_id/__init__.py +0 -8
- csv_detective/detect_labels/other/twitter/__init__.py +0 -8
- csv_detective/detect_labels/other/url/__init__.py +0 -23
- csv_detective/detect_labels/other/uuid/__init__.py +0 -8
- csv_detective/detect_labels/temp/__init__.py +0 -0
- csv_detective/detect_labels/temp/date/__init__.py +0 -28
- csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +0 -19
- csv_detective/detect_labels/temp/year/__init__.py +0 -19
- csv_detective/load_tests.py +0 -59
- csv_detective-0.9.3.dev2258.dist-info/RECORD +0 -166
- /csv_detective/{detect_fields/FR/other/csp_insee → formats/data}/csp_insee.txt +0 -0
- /csv_detective/{detect_fields/geo/iso_country_code_alpha2 → formats/data}/iso_country_code_alpha2.txt +0 -0
- /csv_detective/{detect_fields/geo/iso_country_code_alpha3 → formats/data}/iso_country_code_alpha3.txt +0 -0
- /csv_detective/{detect_fields/geo/iso_country_code_numeric → formats/data}/iso_country_code_numeric.txt +0 -0
- {csv_detective-0.9.3.dev2258.dist-info → csv_detective-0.9.3.dev2348.dist-info}/WHEEL +0 -0
- {csv_detective-0.9.3.dev2258.dist-info → csv_detective-0.9.3.dev2348.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.9.3.dev2258.dist-info → csv_detective-0.9.3.dev2348.dist-info}/licenses/LICENSE +0 -0
- {csv_detective-0.9.3.dev2258.dist-info → csv_detective-0.9.3.dev2348.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from os.path import dirname, join
|
|
3
|
-
|
|
4
|
-
PROPORTION = 1
|
|
5
|
-
|
|
6
|
-
with open(join(dirname(__file__), "iso_country_code_alpha3.txt"), "r") as iofile:
|
|
7
|
-
liste_pays = iofile.read().split("\n")
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def _is(val):
|
|
11
|
-
"""Renvoie True si val peut etre un code iso pays alpha-3, False sinon"""
|
|
12
|
-
if not isinstance(val, str) or not bool(re.match(r"[A-Z]{3}$", val)):
|
|
13
|
-
return False
|
|
14
|
-
return val in set(liste_pays)
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from os.path import dirname, join
|
|
3
|
-
|
|
4
|
-
PROPORTION = 1
|
|
5
|
-
|
|
6
|
-
with open(join(dirname(__file__), "iso_country_code_numeric.txt"), "r") as iofile:
|
|
7
|
-
liste_pays = iofile.read().split("\n")
|
|
8
|
-
liste_pays = set(liste_pays)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def _is(val):
|
|
12
|
-
"""Renvoie True si val peut etre un code iso pays numerique, False sinon"""
|
|
13
|
-
if not isinstance(val, str) or not bool(re.match(r"[0-9]{3}$", val)):
|
|
14
|
-
return False
|
|
15
|
-
return val in liste_pays
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.9
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(val):
|
|
7
|
-
"""Renvoie True si val peut etre un geojson"""
|
|
8
|
-
|
|
9
|
-
try:
|
|
10
|
-
j = json.loads(val)
|
|
11
|
-
if isinstance(j, dict):
|
|
12
|
-
if "type" in j and "coordinates" in j:
|
|
13
|
-
return True
|
|
14
|
-
if "geometry" in j and "coordinates" in j["geometry"]:
|
|
15
|
-
return True
|
|
16
|
-
except Exception:
|
|
17
|
-
pass
|
|
18
|
-
return False
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from csv_detective.detect_fields.other.float import _is as is_float
|
|
2
|
-
|
|
3
|
-
PROPORTION = 1
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(val):
|
|
7
|
-
"""Renvoie True si val peut etre une latitude"""
|
|
8
|
-
try:
|
|
9
|
-
return is_float(val) and float(val) >= -90 and float(val) <= 90
|
|
10
|
-
except ValueError:
|
|
11
|
-
return False
|
|
12
|
-
except OverflowError:
|
|
13
|
-
return False
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
from ..latitude_wgs import _is as is_lat
|
|
2
|
-
from ..longitude_wgs import _is as is_lon
|
|
3
|
-
|
|
4
|
-
PROPORTION = 1
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def _is(val):
|
|
8
|
-
"""Renvoie True si val peut etre une latitude,longitude"""
|
|
9
|
-
|
|
10
|
-
if not isinstance(val, str) or val.count(",") != 1:
|
|
11
|
-
return False
|
|
12
|
-
lat, lon = val.split(",")
|
|
13
|
-
# handling [lat,lon]
|
|
14
|
-
if lat.startswith("[") and lon.endswith("]"):
|
|
15
|
-
lat, lon = lat[1:], lon[:-1]
|
|
16
|
-
return is_lat(lat) and is_lon(lon.replace(" ", ""))
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from csv_detective.detect_fields.other.float import _is as is_float
|
|
2
|
-
|
|
3
|
-
PROPORTION = 1
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(val):
|
|
7
|
-
"""Renvoie True si val peut etre une longitude"""
|
|
8
|
-
try:
|
|
9
|
-
return is_float(val) and float(val) >= -180 and float(val) <= 180
|
|
10
|
-
except ValueError:
|
|
11
|
-
return False
|
|
12
|
-
except OverflowError:
|
|
13
|
-
return False
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
from ..latitude_wgs import _is as is_lat
|
|
2
|
-
from ..longitude_wgs import _is as is_lon
|
|
3
|
-
|
|
4
|
-
PROPORTION = 1
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def _is(val):
|
|
8
|
-
"""Renvoie True si val peut etre une longitude,latitude"""
|
|
9
|
-
|
|
10
|
-
if not isinstance(val, str) or val.count(",") != 1:
|
|
11
|
-
return False
|
|
12
|
-
lon, lat = val.split(",")
|
|
13
|
-
# handling [lon,lat]
|
|
14
|
-
if lon.startswith("[") and lat.endswith("]"):
|
|
15
|
-
lon, lat = lon[1:], lat[:-1]
|
|
16
|
-
return is_lon(lon) and is_lat(lat.replace(" ", ""))
|
|
File without changes
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
|
|
3
|
-
PROPORTION = 1
|
|
4
|
-
url_pattern = re.compile(
|
|
5
|
-
r"^((https?|ftp)://|www\.)(([A-Za-z0-9-]+\.)+[A-Za-z]{2,6})"
|
|
6
|
-
r"(/[A-Za-z0-9._~:/?#[@!$&'()*+,;=%-]*)?$"
|
|
7
|
-
)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def _is(val):
|
|
11
|
-
"""Detects urls"""
|
|
12
|
-
if not isinstance(val, str):
|
|
13
|
-
return False
|
|
14
|
-
return bool(url_pattern.match(val))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"adresse",
|
|
9
|
-
"adresse postale",
|
|
10
|
-
"adresse geographique",
|
|
11
|
-
"adr",
|
|
12
|
-
"adresse complete",
|
|
13
|
-
"adresse station",
|
|
14
|
-
]
|
|
15
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"code commune insee",
|
|
9
|
-
"code insee",
|
|
10
|
-
"codes insee",
|
|
11
|
-
"code commune",
|
|
12
|
-
"code insee commune",
|
|
13
|
-
"insee",
|
|
14
|
-
"code com",
|
|
15
|
-
"com",
|
|
16
|
-
]
|
|
17
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
# "dep": Possible confusion with dep name?
|
|
8
|
-
words_combinations_list = [
|
|
9
|
-
"code departement",
|
|
10
|
-
"code_departement",
|
|
11
|
-
"dep",
|
|
12
|
-
"departement",
|
|
13
|
-
"dept",
|
|
14
|
-
]
|
|
15
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"code postal",
|
|
9
|
-
"postal code",
|
|
10
|
-
"postcode",
|
|
11
|
-
"post code",
|
|
12
|
-
"cp",
|
|
13
|
-
"codes postaux",
|
|
14
|
-
"location postcode",
|
|
15
|
-
]
|
|
16
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
# "reg" : possible confusion with region name?
|
|
8
|
-
words_combinations_list = [
|
|
9
|
-
"code region",
|
|
10
|
-
"reg",
|
|
11
|
-
"code insee region",
|
|
12
|
-
"region",
|
|
13
|
-
]
|
|
14
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"departement",
|
|
9
|
-
"libelle du departement",
|
|
10
|
-
"deplib",
|
|
11
|
-
"nom dept",
|
|
12
|
-
"dept",
|
|
13
|
-
"libdepartement",
|
|
14
|
-
"nom departement",
|
|
15
|
-
"libelle dep",
|
|
16
|
-
"libelle departement",
|
|
17
|
-
"lb departements",
|
|
18
|
-
"dep libusage",
|
|
19
|
-
"lb departement",
|
|
20
|
-
"nom dep",
|
|
21
|
-
]
|
|
22
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"insee canton",
|
|
9
|
-
"canton",
|
|
10
|
-
"cant",
|
|
11
|
-
"nom canton",
|
|
12
|
-
]
|
|
13
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
# Does not always detect CRS
|
|
8
|
-
words_combinations_list = [
|
|
9
|
-
"latitude",
|
|
10
|
-
"lat",
|
|
11
|
-
"y",
|
|
12
|
-
"yf",
|
|
13
|
-
"yd",
|
|
14
|
-
"y l93",
|
|
15
|
-
"coordonnee y",
|
|
16
|
-
"latitude lb93",
|
|
17
|
-
"coord y",
|
|
18
|
-
"ycoord",
|
|
19
|
-
"geocodage y gps",
|
|
20
|
-
"location latitude",
|
|
21
|
-
"ylatitude",
|
|
22
|
-
"ylat",
|
|
23
|
-
"latitude (y)",
|
|
24
|
-
"latitudeorg",
|
|
25
|
-
"coordinates.latitude",
|
|
26
|
-
"googlemap latitude",
|
|
27
|
-
"latitudelieu",
|
|
28
|
-
"latitude googlemap",
|
|
29
|
-
]
|
|
30
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"latitude",
|
|
9
|
-
"lat",
|
|
10
|
-
"y",
|
|
11
|
-
"yf",
|
|
12
|
-
"yd",
|
|
13
|
-
"coordonnee y",
|
|
14
|
-
"coord y",
|
|
15
|
-
"ycoord",
|
|
16
|
-
"geocodage y gps",
|
|
17
|
-
"location latitude",
|
|
18
|
-
"ylatitude",
|
|
19
|
-
"ylat",
|
|
20
|
-
"latitude (y)",
|
|
21
|
-
"latitudeorg",
|
|
22
|
-
"coordinates.latitude",
|
|
23
|
-
"googlemap latitude",
|
|
24
|
-
"latitudelieu",
|
|
25
|
-
"latitude googlemap",
|
|
26
|
-
"latitude wgs84",
|
|
27
|
-
"y wgs84",
|
|
28
|
-
"latitude (wgs84)",
|
|
29
|
-
]
|
|
30
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
# Does not detect CRS
|
|
8
|
-
words_combinations_list = [
|
|
9
|
-
"longitude",
|
|
10
|
-
"lon",
|
|
11
|
-
"long",
|
|
12
|
-
"geocodage x gps",
|
|
13
|
-
"location longitude",
|
|
14
|
-
"xlongitude",
|
|
15
|
-
"lng",
|
|
16
|
-
"xlong",
|
|
17
|
-
"x",
|
|
18
|
-
"xf",
|
|
19
|
-
"xd",
|
|
20
|
-
]
|
|
21
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
# Does not detect CRS
|
|
8
|
-
words_combinations_list = [
|
|
9
|
-
"longitude",
|
|
10
|
-
"lon",
|
|
11
|
-
"long",
|
|
12
|
-
"geocodage x gps",
|
|
13
|
-
"location longitude",
|
|
14
|
-
"xlongitude",
|
|
15
|
-
"lng",
|
|
16
|
-
"xlong",
|
|
17
|
-
"x",
|
|
18
|
-
"xf",
|
|
19
|
-
"xd",
|
|
20
|
-
]
|
|
21
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"pays",
|
|
9
|
-
"payslieu",
|
|
10
|
-
"paysorg",
|
|
11
|
-
"country",
|
|
12
|
-
"pays lib",
|
|
13
|
-
"lieupays",
|
|
14
|
-
"pays beneficiaire",
|
|
15
|
-
"nom du pays",
|
|
16
|
-
"journey start country",
|
|
17
|
-
"libelle pays",
|
|
18
|
-
"journey end country",
|
|
19
|
-
]
|
|
20
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"region",
|
|
9
|
-
"libelle region",
|
|
10
|
-
"nom region",
|
|
11
|
-
"libelle reg",
|
|
12
|
-
"nom reg",
|
|
13
|
-
"reg libusage",
|
|
14
|
-
"nom de la region",
|
|
15
|
-
"regionorg",
|
|
16
|
-
"regionlieu",
|
|
17
|
-
"reg",
|
|
18
|
-
"nom officiel region",
|
|
19
|
-
]
|
|
20
|
-
return header_score(header, words_combinations_list)
|
|
File without changes
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"code rna",
|
|
9
|
-
"rna",
|
|
10
|
-
"n° inscription association",
|
|
11
|
-
"identifiant association",
|
|
12
|
-
]
|
|
13
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
# To improve? No specific header found in data
|
|
8
|
-
words_combinations_list = [
|
|
9
|
-
"csp insee",
|
|
10
|
-
"csp",
|
|
11
|
-
"categorie socioprofessionnelle",
|
|
12
|
-
]
|
|
13
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"code ape",
|
|
9
|
-
"code activite (ape)",
|
|
10
|
-
"code naf",
|
|
11
|
-
"code naf organisme designe",
|
|
12
|
-
"code naf organisme designant",
|
|
13
|
-
"base sirene : code ape de l'etablissement siege",
|
|
14
|
-
]
|
|
15
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"siren",
|
|
9
|
-
"siren organisme designe",
|
|
10
|
-
"siren organisme designant",
|
|
11
|
-
"n° siren",
|
|
12
|
-
"siren organisme",
|
|
13
|
-
"siren titulaire",
|
|
14
|
-
"numero siren",
|
|
15
|
-
"epci",
|
|
16
|
-
]
|
|
17
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"siret",
|
|
9
|
-
"siret d",
|
|
10
|
-
"num siret",
|
|
11
|
-
"siretacheteur",
|
|
12
|
-
"n° siret",
|
|
13
|
-
"coll siret",
|
|
14
|
-
"epci",
|
|
15
|
-
]
|
|
16
|
-
return header_score(header, words_combinations_list)
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
from csv_detective.parsing.text import header_score
|
|
2
|
-
|
|
3
|
-
PROPORTION = 0.5
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def _is(header: str) -> float:
|
|
7
|
-
words_combinations_list = [
|
|
8
|
-
"telephone",
|
|
9
|
-
"tel",
|
|
10
|
-
"tel1",
|
|
11
|
-
"tel2",
|
|
12
|
-
"phone",
|
|
13
|
-
"num tel",
|
|
14
|
-
"tel mob",
|
|
15
|
-
"telephone sav",
|
|
16
|
-
"telephone1",
|
|
17
|
-
"coordinates.phone",
|
|
18
|
-
"telephone du lieu",
|
|
19
|
-
]
|
|
20
|
-
return header_score(header, words_combinations_list)
|