csv-detective 0.10.4.dev1__py3-none-any.whl → 0.10.2549__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. csv_detective/detection/__init__.py +0 -0
  2. csv_detective/detection/columns.py +0 -0
  3. csv_detective/detection/encoding.py +0 -0
  4. csv_detective/detection/engine.py +0 -0
  5. csv_detective/detection/formats.py +38 -13
  6. csv_detective/detection/headers.py +14 -12
  7. csv_detective/detection/rows.py +1 -1
  8. csv_detective/detection/separator.py +0 -0
  9. csv_detective/detection/variables.py +0 -0
  10. csv_detective/explore_csv.py +6 -18
  11. csv_detective/format.py +5 -12
  12. csv_detective/formats/__init__.py +0 -0
  13. csv_detective/formats/adresse.py +9 -9
  14. csv_detective/formats/binary.py +1 -2
  15. csv_detective/formats/booleen.py +2 -3
  16. csv_detective/formats/code_commune_insee.py +10 -12
  17. csv_detective/formats/code_csp_insee.py +1 -1
  18. csv_detective/formats/code_departement.py +7 -8
  19. csv_detective/formats/code_fantoir.py +5 -6
  20. csv_detective/formats/code_import.py +1 -1
  21. csv_detective/formats/code_postal.py +9 -10
  22. csv_detective/formats/code_region.py +6 -7
  23. csv_detective/formats/code_rna.py +6 -7
  24. csv_detective/formats/code_waldec.py +1 -1
  25. csv_detective/formats/commune.py +5 -5
  26. csv_detective/formats/csp_insee.py +5 -6
  27. csv_detective/formats/data/insee_ape700.txt +1 -1
  28. csv_detective/formats/data/iso_country_code_alpha2.txt +397 -153
  29. csv_detective/formats/data/iso_country_code_alpha3.txt +132 -132
  30. csv_detective/formats/data/iso_country_code_numeric.txt +94 -94
  31. csv_detective/formats/date.py +18 -28
  32. csv_detective/formats/date_fr.py +1 -1
  33. csv_detective/formats/datetime_aware.py +2 -7
  34. csv_detective/formats/datetime_naive.py +0 -3
  35. csv_detective/formats/datetime_rfc822.py +0 -1
  36. csv_detective/formats/departement.py +15 -15
  37. csv_detective/formats/email.py +13 -13
  38. csv_detective/formats/float.py +1 -2
  39. csv_detective/formats/geojson.py +10 -10
  40. csv_detective/formats/insee_ape700.py +8 -10
  41. csv_detective/formats/insee_canton.py +6 -6
  42. csv_detective/formats/int.py +1 -2
  43. csv_detective/formats/iso_country_code_alpha2.py +14 -14
  44. csv_detective/formats/iso_country_code_alpha3.py +13 -6
  45. csv_detective/formats/iso_country_code_numeric.py +9 -2
  46. csv_detective/formats/jour_de_la_semaine.py +12 -11
  47. csv_detective/formats/json.py +0 -6
  48. csv_detective/formats/latitude_l93.py +22 -8
  49. csv_detective/formats/latitude_wgs.py +29 -31
  50. csv_detective/formats/latitude_wgs_fr_metropole.py +30 -7
  51. csv_detective/formats/latlon_wgs.py +28 -30
  52. csv_detective/formats/longitude_l93.py +13 -8
  53. csv_detective/formats/longitude_wgs.py +19 -34
  54. csv_detective/formats/longitude_wgs_fr_metropole.py +19 -6
  55. csv_detective/formats/lonlat_wgs.py +11 -12
  56. csv_detective/formats/mois_de_lannee.py +1 -1
  57. csv_detective/formats/money.py +1 -1
  58. csv_detective/formats/mongo_object_id.py +1 -1
  59. csv_detective/formats/pays.py +13 -11
  60. csv_detective/formats/percent.py +1 -1
  61. csv_detective/formats/region.py +13 -13
  62. csv_detective/formats/sexe.py +1 -1
  63. csv_detective/formats/siren.py +10 -9
  64. csv_detective/formats/siret.py +9 -9
  65. csv_detective/formats/tel_fr.py +13 -7
  66. csv_detective/formats/uai.py +18 -17
  67. csv_detective/formats/url.py +16 -16
  68. csv_detective/formats/username.py +1 -1
  69. csv_detective/formats/uuid.py +1 -1
  70. csv_detective/formats/year.py +12 -7
  71. csv_detective/output/__init__.py +0 -0
  72. csv_detective/output/dataframe.py +3 -8
  73. csv_detective/output/example.py +0 -0
  74. csv_detective/output/profile.py +2 -6
  75. csv_detective/output/schema.py +0 -0
  76. csv_detective/output/utils.py +0 -0
  77. csv_detective/parsing/__init__.py +0 -0
  78. csv_detective/parsing/columns.py +5 -9
  79. csv_detective/parsing/compression.py +0 -0
  80. csv_detective/parsing/csv.py +0 -0
  81. csv_detective/parsing/excel.py +1 -1
  82. csv_detective/parsing/load.py +12 -11
  83. csv_detective/parsing/text.py +12 -13
  84. csv_detective/validate.py +36 -71
  85. {csv_detective-0.10.4.dev1.dist-info → csv_detective-0.10.2549.dist-info}/METADATA +18 -15
  86. csv_detective-0.10.2549.dist-info/RECORD +92 -0
  87. csv_detective-0.10.2549.dist-info/WHEEL +4 -0
  88. {csv_detective-0.10.4.dev1.dist-info → csv_detective-0.10.2549.dist-info}/entry_points.txt +1 -0
  89. csv_detective-0.10.4.dev1.dist-info/RECORD +0 -111
  90. csv_detective-0.10.4.dev1.dist-info/WHEEL +0 -5
  91. csv_detective-0.10.4.dev1.dist-info/licenses/LICENSE +0 -21
  92. csv_detective-0.10.4.dev1.dist-info/top_level.txt +0 -3
  93. tests/__init__.py +0 -0
  94. tests/data/a_test_file.csv +0 -407
  95. tests/data/a_test_file.json +0 -394
  96. tests/data/b_test_file.csv +0 -7
  97. tests/data/c_test_file.csv +0 -2
  98. tests/data/csv_file +0 -7
  99. tests/data/file.csv.gz +0 -0
  100. tests/data/file.ods +0 -0
  101. tests/data/file.xls +0 -0
  102. tests/data/file.xlsx +0 -0
  103. tests/data/xlsx_file +0 -0
  104. tests/test_example.py +0 -67
  105. tests/test_fields.py +0 -175
  106. tests/test_file.py +0 -469
  107. tests/test_labels.py +0 -26
  108. tests/test_structure.py +0 -45
  109. tests/test_validation.py +0 -163
@@ -7,24 +7,23 @@ from dateutil.parser import parse as dateutil_parser
7
7
 
8
8
  proportion = 1
9
9
  tags = ["temp", "type"]
10
- python_type = "date"
11
- SHARED_DATE_LABELS = {
12
- "date": 1,
13
- "mise à jour": 1,
14
- "modifie": 1,
15
- "maj": 0.75,
16
- "datemaj": 1,
17
- "update": 1,
18
- "created": 1,
19
- "modified": 1,
20
- }
21
- labels = SHARED_DATE_LABELS | {
22
- "jour": 0.75,
23
- "periode": 0.75,
24
- "dpc": 0.5,
25
- "yyyymmdd": 1,
26
- "aaaammjj": 1,
27
- }
10
+ SHARED_DATE_LABELS = [
11
+ "date",
12
+ "mise à jour",
13
+ "modifie",
14
+ "maj",
15
+ "datemaj",
16
+ "update",
17
+ "created",
18
+ "modified",
19
+ ]
20
+ labels = SHARED_DATE_LABELS + [
21
+ "jour",
22
+ "periode",
23
+ "dpc",
24
+ "yyyymmdd",
25
+ "aaaammjj",
26
+ ]
28
27
 
29
28
 
30
29
  def date_casting(val: str) -> datetime | None:
@@ -57,9 +56,7 @@ string_month_pattern = (
57
56
 
58
57
 
59
58
  def _is(val):
60
- # many early stops, to cut processing time
61
- # and avoid the costly use of date_casting as much as possible
62
- # /!\ timestamps are considered ints, not dates
59
+ # early stops, to cut processing time
63
60
  if not isinstance(val, str) or len(val) > 20 or len(val) < 8:
64
61
  return False
65
62
  # if it's a usual date pattern
@@ -72,13 +69,8 @@ def _is(val):
72
69
  ]
73
70
  ):
74
71
  return True
75
- if re.match(r"^-?\d+[\.|,]\d+$", val):
76
- # regular floats are excluded
77
- return False
78
- # not enough digits => not a date (slightly arbitrary)
79
72
  if sum([char.isdigit() for char in val]) / len(val) < threshold:
80
73
  return False
81
- # last resort
82
74
  res = date_casting(val)
83
75
  if not res or res.hour or res.minute or res.second:
84
76
  return False
@@ -93,7 +85,6 @@ _test_values = {
93
85
  "15 décembre 1985",
94
86
  "02 05 2003",
95
87
  "20030502",
96
- "2003.05.02",
97
88
  "1993-12/02",
98
89
  ],
99
90
  False: [
@@ -104,6 +95,5 @@ _test_values = {
104
95
  "12152003",
105
96
  "20031512",
106
97
  "02052003",
107
- "6.27367393749392839",
108
98
  ],
109
99
  }
@@ -4,7 +4,7 @@ from csv_detective.parsing.text import _process_text
4
4
 
5
5
  proportion = 1
6
6
  tags = ["fr", "temp"]
7
- labels = {"date": 1}
7
+ labels = ["date"]
8
8
 
9
9
  pattern = (
10
10
  r"^(0?[1-9]|[12][0-9]|3[01])[ \-/](janvier|fevrier|mars|avril|mai|juin|juillet|aout|septembre"
@@ -4,8 +4,7 @@ from csv_detective.formats.date import SHARED_DATE_LABELS, aaaammjj_pattern, dat
4
4
 
5
5
  proportion = 1
6
6
  tags = ["temp", "type"]
7
- python_type = "datetime"
8
- labels = SHARED_DATE_LABELS | {"datetime": 1, "timestamp": 1}
7
+ labels = SHARED_DATE_LABELS + ["datetime", "timestamp"]
9
8
 
10
9
  threshold = 0.7
11
10
  pat = (
@@ -13,9 +12,7 @@ pat = (
13
12
  + r"(T|\s)(0\d|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(.\d{1,6})"
14
13
  + r"?(([+-](0\d|1[0-9]|2[0-3]):([0-5][0-9]))|Z)$"
15
14
  )
16
- # date_casting is very (too?) good at finding date(time)s where there sometimes is just a number
17
- # this prefix check asserts we only consider strings that have a somewhat fine structure trying to cast
18
- prefix = r"^\d{2}[-/:]?\d{2}"
15
+ prefix = r"^\d{4}"
19
16
 
20
17
 
21
18
  def _is(val):
@@ -44,8 +41,6 @@ _test_values = {
44
41
  "2000-12-21 10:20:10.1Z",
45
42
  "2024-12-19T10:53:36.428000+00:00",
46
43
  "1996/06/22 10:20:10 GMT",
47
- "12/31/2022 12:00:00-04:00",
48
- "12:00:00-04:00 12/31/2022",
49
44
  ],
50
45
  False: [
51
46
  "2021-06-22T30:20:10",
@@ -6,7 +6,6 @@ from csv_detective.formats.datetime_aware import labels, prefix # noqa
6
6
 
7
7
  proportion = 1
8
8
  tags = ["temp", "type"]
9
- python_type = "datetime"
10
9
  threshold = 0.7
11
10
 
12
11
  # matches AAAA-MM-JJTHH:MM:SS(.dddddd)Z with any of the listed separators for the date OR NO SEPARATOR
@@ -37,8 +36,6 @@ _test_values = {
37
36
  "2021-06-22 10:20:10",
38
37
  "2030/06-22 00:00:00",
39
38
  "2030/06/22 00:00:00.0028",
40
- "12/31/2022 12:00:00",
41
- "12:00:00 12/31/2022",
42
39
  ],
43
40
  False: [
44
41
  "2021-06-22T30:20:10",
@@ -4,7 +4,6 @@ from csv_detective.formats.datetime_aware import labels # noqa
4
4
 
5
5
  proportion = 1
6
6
  tags = ["temp", "type"]
7
- python_type = "datetime"
8
7
 
9
8
 
10
9
  def _is(val):
@@ -2,21 +2,21 @@ from frformat import Departement, Millesime, Options
2
2
 
3
3
  proportion = 0.9
4
4
  tags = ["fr", "geo"]
5
- labels = {
6
- "departement": 1,
7
- "libelle du departement": 1,
8
- "deplib": 1,
9
- "nom dept": 1,
10
- "dept": 0.75,
11
- "libdepartement": 1,
12
- "nom departement": 1,
13
- "libelle dep": 1,
14
- "libelle departement": 1,
15
- "lb departements": 1,
16
- "dep libusage": 1,
17
- "lb departement": 1,
18
- "nom dep": 1,
19
- }
5
+ labels = [
6
+ "departement",
7
+ "libelle du departement",
8
+ "deplib",
9
+ "nom dept",
10
+ "dept",
11
+ "libdepartement",
12
+ "nom departement",
13
+ "libelle dep",
14
+ "libelle departement",
15
+ "lb departements",
16
+ "dep libusage",
17
+ "lb departement",
18
+ "nom dep",
19
+ ]
20
20
 
21
21
  _options = Options(
22
22
  ignore_case=True,
@@ -1,19 +1,19 @@
1
1
  import re
2
2
 
3
3
  proportion = 0.9
4
- labels = {
5
- "email": 1,
6
- "mail": 1,
7
- "courriel": 1,
8
- "contact": 1,
9
- "mel": 1,
10
- "lieucourriel": 1,
11
- "coordinates.emailcontact": 1,
12
- "e mail": 1,
13
- "mo mail": 1,
14
- "adresse mail": 1,
15
- "adresse email": 1,
16
- }
4
+ labels = [
5
+ "email",
6
+ "mail",
7
+ "courriel",
8
+ "contact",
9
+ "mel",
10
+ "lieucourriel",
11
+ "coordinates.emailcontact",
12
+ "e mail",
13
+ "mo mail",
14
+ "adresse mail",
15
+ "adresse email",
16
+ ]
17
17
 
18
18
 
19
19
  def _is(val):
@@ -2,8 +2,7 @@ import re
2
2
 
3
3
  proportion = 1
4
4
  tags = ["type"]
5
- python_type = "float"
6
- labels = {"part": 1, "ratio": 1, "taux": 1}
5
+ labels = ["part", "ratio", "taux"]
7
6
 
8
7
  scientific_notation_pattern = r"\d+\.\d+[e|E][+|-]?\d+"
9
8
 
@@ -2,16 +2,16 @@ import json
2
2
 
3
3
  proportion = 1
4
4
  tags = ["geo"]
5
- python_type = "json"
6
- labels = {
7
- "json geojson": 1,
8
- "json": 1,
9
- "geojson": 1,
10
- "geo shape": 1,
11
- "geom": 0.75,
12
- "geometry": 1,
13
- "geoshape": 1,
14
- }
5
+ labels = [
6
+ "json geojson",
7
+ "json",
8
+ "geojson",
9
+ "geo shape",
10
+ "geom",
11
+ "geometry",
12
+ "geo shape",
13
+ "geoshape",
14
+ ]
15
15
 
16
16
 
17
17
  def _is(val) -> bool:
@@ -4,16 +4,14 @@ from csv_detective.parsing.text import _process_text
4
4
 
5
5
  proportion = 0.8
6
6
  tags = ["fr"]
7
- labels = {
8
- "code ape": 1,
9
- "code activite (ape)": 1,
10
- "code naf": 1,
11
- "code naf organisme designe": 1,
12
- "code naf organisme designant": 1,
13
- "base sirene : code ape de l'etablissement siege": 1,
14
- "naf": 0.75,
15
- "ape": 0.5,
16
- }
7
+ labels = [
8
+ "code ape",
9
+ "code activite (ape)",
10
+ "code naf",
11
+ "code naf organisme designe",
12
+ "code naf organisme designant",
13
+ "base sirene : code ape de l'etablissement siege",
14
+ ]
17
15
 
18
16
  f = open(join(dirname(__file__), "data", "insee_ape700.txt"), "r")
19
17
  condes_insee_ape = f.read().split("\n")
@@ -2,12 +2,12 @@ from frformat import Canton, Millesime, Options
2
2
 
3
3
  proportion = 0.9
4
4
  tags = ["fr", "geo"]
5
- labels = {
6
- "insee canton": 1,
7
- "canton": 1,
8
- "cant": 0.5,
9
- "nom canton": 1,
10
- }
5
+ labels = [
6
+ "insee canton",
7
+ "canton",
8
+ "cant",
9
+ "nom canton",
10
+ ]
11
11
 
12
12
  _options = Options(
13
13
  ignore_case=True,
@@ -1,6 +1,5 @@
1
+ labels = ["nb", "nombre", "nbre"]
1
2
  tag = ["type"]
2
- python_type = "int"
3
- labels = {"nb": 0.75, "nombre": 1, "nbre": 0.75}
4
3
 
5
4
 
6
5
  def _is(val):
@@ -3,28 +3,28 @@ from os.path import dirname, join
3
3
 
4
4
  proportion = 1
5
5
  tags = ["geo"]
6
- labels = {
7
- "iso country code": 1,
8
- "code pays": 1,
9
- "pays": 1,
10
- "country": 1,
11
- "nation": 1,
12
- "pays code": 1,
13
- "code pays (iso)": 1,
14
- "code": 0.5,
15
- }
6
+ labels = [
7
+ "iso country code",
8
+ "code pays",
9
+ "pays",
10
+ "country",
11
+ "nation",
12
+ "pays code",
13
+ "code pays (iso)",
14
+ ]
16
15
 
17
16
  with open(join(dirname(__file__), "data", "iso_country_code_alpha2.txt"), "r") as iofile:
18
- liste_pays = set(iofile.read().split("\n"))
17
+ liste_pays = iofile.read().split("\n")
18
+ liste_pays = set(liste_pays)
19
19
 
20
20
 
21
21
  def _is(val):
22
- if not isinstance(val, str) or not bool(re.match(r"[a-zA-Z]{2}$", val)):
22
+ if not isinstance(val, str) or not bool(re.match(r"[A-Z]{2}$", val)):
23
23
  return False
24
- return val.upper() in liste_pays
24
+ return val in liste_pays
25
25
 
26
26
 
27
27
  _test_values = {
28
- True: ["FR", "sj"],
28
+ True: ["FR"],
29
29
  False: ["XX", "A", "FRA"],
30
30
  }
@@ -1,23 +1,30 @@
1
1
  import re
2
2
  from os.path import dirname, join
3
3
 
4
- from csv_detective.formats.iso_country_code_alpha2 import labels # noqa
5
-
6
4
  proportion = 1
7
5
  tags = ["geo"]
6
+ labels = [
7
+ "iso country code",
8
+ "code pays",
9
+ "pays",
10
+ "country",
11
+ "nation",
12
+ "pays code",
13
+ "code pays (iso)",
14
+ ]
8
15
 
9
16
  with open(join(dirname(__file__), "data", "iso_country_code_alpha3.txt"), "r") as iofile:
10
- liste_pays = set(iofile.read().split("\n"))
17
+ liste_pays = iofile.read().split("\n")
11
18
 
12
19
 
13
20
  def _is(val):
14
21
  """Renvoie True si val peut etre un code iso pays alpha-3, False sinon"""
15
- if not isinstance(val, str) or not bool(re.match(r"[a-zA-Z]{3}$", val)):
22
+ if not isinstance(val, str) or not bool(re.match(r"[A-Z]{3}$", val)):
16
23
  return False
17
- return val.upper() in liste_pays
24
+ return val in set(liste_pays)
18
25
 
19
26
 
20
27
  _test_values = {
21
- True: ["FRA", "brb"],
28
+ True: ["FRA"],
22
29
  False: ["XXX", "FR", "A"],
23
30
  }
@@ -1,10 +1,17 @@
1
1
  import re
2
2
  from os.path import dirname, join
3
3
 
4
- from csv_detective.formats.iso_country_code_alpha2 import labels # noqa
5
-
6
4
  proportion = 1
7
5
  tags = ["geo"]
6
+ labels = [
7
+ "iso country code",
8
+ "code pays",
9
+ "pays",
10
+ "country",
11
+ "nation",
12
+ "pays code",
13
+ "code pays (iso)",
14
+ ]
8
15
 
9
16
  with open(join(dirname(__file__), "data", "iso_country_code_numeric.txt"), "r") as iofile:
10
17
  liste_pays = iofile.read().split("\n")
@@ -1,14 +1,14 @@
1
1
  proportion = 0.8
2
2
  tags = ["fr", "temp"]
3
- labels = {
4
- "jour semaine": 1,
5
- "type jour": 1,
6
- "jour de la semaine": 1,
7
- "saufjour": 1,
8
- "nomjour": 1,
9
- "jour": 0.75,
10
- "jour de fermeture": 1,
11
- }
3
+ labels = [
4
+ "jour semaine",
5
+ "type jour",
6
+ "jour de la semaine",
7
+ "saufjour",
8
+ "nomjour",
9
+ "jour",
10
+ "jour de fermeture",
11
+ ]
12
12
 
13
13
  jours = {
14
14
  "lundi",
@@ -31,10 +31,11 @@ jours = {
31
31
  def _is(val):
32
32
  if not isinstance(val, str):
33
33
  return False
34
- return val.lower() in jours
34
+ val = val.lower()
35
+ return val in jours
35
36
 
36
37
 
37
38
  _test_values = {
38
39
  True: ["lundi"],
39
- False: ["jour"],
40
+ False: ["jour de la biere"],
40
41
  }
@@ -2,13 +2,7 @@ import json
2
2
  from json import JSONDecodeError
3
3
 
4
4
  proportion = 1
5
- python_type = "json"
6
5
  tags = ["type"]
7
- labels = {
8
- "list": 1,
9
- "dict": 1,
10
- "complex": 1,
11
- }
12
6
 
13
7
 
14
8
  def _is(val):
@@ -2,17 +2,31 @@ from frformat import LatitudeL93
2
2
 
3
3
  from csv_detective.formats.float import _is as is_float
4
4
  from csv_detective.formats.float import float_casting
5
- from csv_detective.formats.latitude_wgs import SHARED_LATITUDE_LABELS
6
5
 
7
6
  proportion = 1
8
7
  tags = ["fr", "geo"]
9
- mandatory_label = True
10
- python_type = "float"
11
- labels = SHARED_LATITUDE_LABELS | {
12
- "y l93": 1,
13
- "latitude lb93": 1,
14
- "lamby": 1,
15
- }
8
+ labels = [
9
+ "latitude",
10
+ "lat",
11
+ "y",
12
+ "yf",
13
+ "yd",
14
+ "y l93",
15
+ "coordonnee y",
16
+ "latitude lb93",
17
+ "coord y",
18
+ "ycoord",
19
+ "geocodage y gps",
20
+ "location latitude",
21
+ "ylatitude",
22
+ "ylat",
23
+ "latitude (y)",
24
+ "latitudeorg",
25
+ "coordinates.latitude",
26
+ "googlemap latitude",
27
+ "latitudelieu",
28
+ "latitude googlemap",
29
+ ]
16
30
 
17
31
  _latitudel93 = LatitudeL93()
18
32
 
@@ -1,44 +1,42 @@
1
1
  from csv_detective.formats.float import _is as is_float
2
- from csv_detective.formats.int import _is as is_int
3
2
 
4
3
  proportion = 1
5
4
  tags = ["geo"]
6
- mandatory_label = True
7
- python_type = "float"
8
- SHARED_LATITUDE_LABELS = {
9
- "latitude": 1,
10
- "lat": 0.75,
11
- "y": 0.5,
12
- "yf": 0.5,
13
- "yd": 0.5,
14
- "coordonnee y": 1,
15
- "coord y": 1,
16
- "ycoord": 1,
17
- "ylat": 1,
18
- }
19
- labels = SHARED_LATITUDE_LABELS | {
20
- "y gps": 1,
21
- "latitude wgs84": 1,
22
- "y wgs84": 1,
23
- "wsg": 0.75,
24
- "gps": 0.5,
25
- }
5
+ labels = [
6
+ "latitude",
7
+ "lat",
8
+ "y",
9
+ "yf",
10
+ "yd",
11
+ "coordonnee y",
12
+ "coord y",
13
+ "ycoord",
14
+ "geocodage y gps",
15
+ "location latitude",
16
+ "ylatitude",
17
+ "ylat",
18
+ "latitude (y)",
19
+ "latitudeorg",
20
+ "coordinates.latitude",
21
+ "googlemap latitude",
22
+ "latitudelieu",
23
+ "latitude googlemap",
24
+ "latitude wgs84",
25
+ "y wgs84",
26
+ "latitude (wgs84)",
27
+ ]
26
28
 
27
29
 
28
30
  def _is(val):
29
31
  try:
30
- return (
31
- is_float(val)
32
- and -90 <= float(val) <= 90
33
- # we ideally would like a certain level of decimal precision
34
- # but 1.200 is saved as 1.2 in csv so we just discriminate ints
35
- and not is_int(val)
36
- )
37
- except Exception:
32
+ return is_float(val) and float(val) >= -90 and float(val) <= 90
33
+ except ValueError:
34
+ return False
35
+ except OverflowError:
38
36
  return False
39
37
 
40
38
 
41
39
  _test_values = {
42
- True: ["43.2872", "-22.61", "-3.0"],
43
- False: ["100.1973", "40"],
40
+ True: ["43.2", "-22"],
41
+ False: ["100"],
44
42
  }
@@ -1,19 +1,42 @@
1
- from csv_detective.formats.latitude_wgs import _is as is_latitude, labels # noqa
1
+ from csv_detective.formats.float import _is as is_float
2
2
 
3
3
  proportion = 1
4
4
  tags = ["fr", "geo"]
5
- mandatory_label = True
6
- python_type = "float"
5
+ labels = [
6
+ "latitude",
7
+ "lat",
8
+ "y",
9
+ "yf",
10
+ "yd",
11
+ "coordonnee y",
12
+ "coord y",
13
+ "ycoord",
14
+ "geocodage y gps",
15
+ "location latitude",
16
+ "ylatitude",
17
+ "ylat",
18
+ "latitude (y)",
19
+ "latitudeorg",
20
+ "coordinates.latitude",
21
+ "googlemap latitude",
22
+ "latitudelieu",
23
+ "latitude googlemap",
24
+ "latitude wgs84",
25
+ "y wgs84",
26
+ "latitude (wgs84)",
27
+ ]
7
28
 
8
29
 
9
30
  def _is(val):
10
31
  try:
11
- return is_latitude(val) and 41.3 <= float(val) <= 51.3
12
- except Exception:
32
+ return is_float(val) and float(val) >= 41.3 and float(val) <= 51.3
33
+ except ValueError:
34
+ return False
35
+ except OverflowError:
13
36
  return False
14
37
 
15
38
 
16
39
  _test_values = {
17
- True: ["42.576", "42.5"],
18
- False: ["22.5"],
40
+ True: ["42.5"],
41
+ False: ["22.5", "62.5"],
19
42
  }