csv-detective 0.9.3.dev2258__py3-none-any.whl → 0.9.3.dev2348__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. csv_detective/detection/formats.py +12 -15
  2. csv_detective/explore_csv.py +28 -9
  3. csv_detective/format.py +67 -0
  4. csv_detective/formats/__init__.py +9 -0
  5. csv_detective/{detect_fields/FR/geo/adresse/__init__.py → formats/adresse.py} +116 -100
  6. csv_detective/{detect_fields/other/booleen/__init__.py → formats/booleen.py} +35 -27
  7. csv_detective/formats/code_commune_insee.py +26 -0
  8. csv_detective/{detect_fields/FR/other/code_csp_insee/__init__.py → formats/code_csp_insee.py} +36 -29
  9. csv_detective/{detect_fields/FR/geo/code_departement/__init__.py → formats/code_departement.py} +29 -15
  10. csv_detective/formats/code_fantoir.py +21 -0
  11. csv_detective/{detect_fields/FR/other/code_import/__init__.py → formats/code_import.py} +17 -9
  12. csv_detective/formats/code_postal.py +25 -0
  13. csv_detective/formats/code_region.py +22 -0
  14. csv_detective/formats/code_rna.py +29 -0
  15. csv_detective/formats/code_waldec.py +17 -0
  16. csv_detective/{detect_fields/FR/geo/commune/__init__.py → formats/commune.py} +27 -16
  17. csv_detective/{detect_fields/FR/other/csp_insee/__init__.py → formats/csp_insee.py} +31 -19
  18. csv_detective/{detect_fields/FR/other/insee_ape700 → formats/data}/insee_ape700.txt +0 -0
  19. csv_detective/{detect_fields/temp/date/__init__.py → formats/date.py} +99 -62
  20. csv_detective/formats/date_fr.py +22 -0
  21. csv_detective/{detect_fields/temp/datetime_aware/__init__.py → formats/datetime_aware.py} +18 -7
  22. csv_detective/{detect_fields/temp/datetime_naive/__init__.py → formats/datetime_naive.py} +21 -2
  23. csv_detective/{detect_fields/temp/datetime_rfc822/__init__.py → formats/datetime_rfc822.py} +24 -18
  24. csv_detective/formats/departement.py +37 -0
  25. csv_detective/formats/email.py +28 -0
  26. csv_detective/{detect_fields/other/float/__init__.py → formats/float.py} +29 -21
  27. csv_detective/formats/geojson.py +36 -0
  28. csv_detective/{detect_fields/FR/other/insee_ape700/__init__.py → formats/insee_ape700.py} +31 -19
  29. csv_detective/{detect_fields/FR/geo/insee_canton/__init__.py → formats/insee_canton.py} +28 -15
  30. csv_detective/{detect_fields/other/int/__init__.py → formats/int.py} +23 -16
  31. csv_detective/formats/iso_country_code_alpha2.py +30 -0
  32. csv_detective/formats/iso_country_code_alpha3.py +30 -0
  33. csv_detective/formats/iso_country_code_numeric.py +31 -0
  34. csv_detective/{detect_fields/FR/temp/jour_de_la_semaine/__init__.py → formats/jour_de_la_semaine.py} +41 -25
  35. csv_detective/{detect_fields/other/json/__init__.py → formats/json.py} +20 -14
  36. csv_detective/formats/latitude_l93.py +48 -0
  37. csv_detective/formats/latitude_wgs.py +42 -0
  38. csv_detective/formats/latitude_wgs_fr_metropole.py +42 -0
  39. csv_detective/formats/latlon_wgs.py +53 -0
  40. csv_detective/formats/longitude_l93.py +39 -0
  41. csv_detective/formats/longitude_wgs.py +32 -0
  42. csv_detective/formats/longitude_wgs_fr_metropole.py +32 -0
  43. csv_detective/formats/lonlat_wgs.py +36 -0
  44. csv_detective/{detect_fields/FR/temp/mois_de_annee/__init__.py → formats/mois_de_lannee.py} +48 -39
  45. csv_detective/formats/money.py +18 -0
  46. csv_detective/formats/mongo_object_id.py +14 -0
  47. csv_detective/formats/pays.py +35 -0
  48. csv_detective/formats/percent.py +16 -0
  49. csv_detective/{detect_fields/FR/geo/region/__init__.py → formats/region.py} +70 -50
  50. csv_detective/formats/sexe.py +17 -0
  51. csv_detective/{detect_fields/FR/other/siren/__init__.py → formats/siren.py} +37 -20
  52. csv_detective/{detect_fields/FR/other/siret/__init__.py → formats/siret.py} +47 -31
  53. csv_detective/formats/tel_fr.py +36 -0
  54. csv_detective/formats/uai.py +36 -0
  55. csv_detective/formats/url.py +45 -0
  56. csv_detective/formats/username.py +14 -0
  57. csv_detective/formats/uuid.py +16 -0
  58. csv_detective/formats/year.py +28 -0
  59. csv_detective/output/__init__.py +3 -4
  60. csv_detective/output/dataframe.py +3 -3
  61. csv_detective/output/profile.py +2 -3
  62. csv_detective/output/schema.py +2 -2
  63. csv_detective/parsing/columns.py +35 -50
  64. csv_detective/parsing/csv.py +2 -2
  65. csv_detective/parsing/load.py +4 -5
  66. csv_detective/validate.py +9 -4
  67. {csv_detective-0.9.3.dev2258.dist-info → csv_detective-0.9.3.dev2348.dist-info}/METADATA +6 -5
  68. csv_detective-0.9.3.dev2348.dist-info/RECORD +102 -0
  69. tests/test_fields.py +39 -364
  70. tests/test_file.py +1 -1
  71. tests/test_labels.py +5 -3
  72. tests/test_structure.py +40 -36
  73. csv_detective/detect_fields/FR/__init__.py +0 -0
  74. csv_detective/detect_fields/FR/geo/__init__.py +0 -0
  75. csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py +0 -9
  76. csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py +0 -9
  77. csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -9
  78. csv_detective/detect_fields/FR/geo/code_region/__init__.py +0 -10
  79. csv_detective/detect_fields/FR/geo/departement/__init__.py +0 -16
  80. csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +0 -19
  81. csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -13
  82. csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +0 -19
  83. csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -13
  84. csv_detective/detect_fields/FR/geo/pays/__init__.py +0 -16
  85. csv_detective/detect_fields/FR/other/__init__.py +0 -0
  86. csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt +0 -498
  87. csv_detective/detect_fields/FR/other/code_rna/__init__.py +0 -9
  88. csv_detective/detect_fields/FR/other/code_waldec/__init__.py +0 -9
  89. csv_detective/detect_fields/FR/other/date_fr/__init__.py +0 -12
  90. csv_detective/detect_fields/FR/other/sexe/__init__.py +0 -11
  91. csv_detective/detect_fields/FR/other/tel_fr/__init__.py +0 -17
  92. csv_detective/detect_fields/FR/other/uai/__init__.py +0 -15
  93. csv_detective/detect_fields/FR/temp/__init__.py +0 -0
  94. csv_detective/detect_fields/__init__.py +0 -112
  95. csv_detective/detect_fields/geo/__init__.py +0 -0
  96. csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +0 -15
  97. csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +0 -14
  98. csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +0 -15
  99. csv_detective/detect_fields/geo/json_geojson/__init__.py +0 -18
  100. csv_detective/detect_fields/geo/latitude_wgs/__init__.py +0 -13
  101. csv_detective/detect_fields/geo/latlon_wgs/__init__.py +0 -16
  102. csv_detective/detect_fields/geo/longitude_wgs/__init__.py +0 -13
  103. csv_detective/detect_fields/geo/lonlat_wgs/__init__.py +0 -16
  104. csv_detective/detect_fields/other/__init__.py +0 -0
  105. csv_detective/detect_fields/other/email/__init__.py +0 -10
  106. csv_detective/detect_fields/other/money/__init__.py +0 -11
  107. csv_detective/detect_fields/other/mongo_object_id/__init__.py +0 -8
  108. csv_detective/detect_fields/other/percent/__init__.py +0 -9
  109. csv_detective/detect_fields/other/twitter/__init__.py +0 -8
  110. csv_detective/detect_fields/other/url/__init__.py +0 -14
  111. csv_detective/detect_fields/other/uuid/__init__.py +0 -10
  112. csv_detective/detect_fields/temp/__init__.py +0 -0
  113. csv_detective/detect_fields/temp/year/__init__.py +0 -10
  114. csv_detective/detect_labels/FR/__init__.py +0 -0
  115. csv_detective/detect_labels/FR/geo/__init__.py +0 -0
  116. csv_detective/detect_labels/FR/geo/adresse/__init__.py +0 -15
  117. csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +0 -17
  118. csv_detective/detect_labels/FR/geo/code_departement/__init__.py +0 -15
  119. csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +0 -12
  120. csv_detective/detect_labels/FR/geo/code_postal/__init__.py +0 -16
  121. csv_detective/detect_labels/FR/geo/code_region/__init__.py +0 -14
  122. csv_detective/detect_labels/FR/geo/commune/__init__.py +0 -12
  123. csv_detective/detect_labels/FR/geo/departement/__init__.py +0 -22
  124. csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +0 -13
  125. csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +0 -30
  126. csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -30
  127. csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +0 -21
  128. csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -21
  129. csv_detective/detect_labels/FR/geo/pays/__init__.py +0 -20
  130. csv_detective/detect_labels/FR/geo/region/__init__.py +0 -20
  131. csv_detective/detect_labels/FR/other/__init__.py +0 -0
  132. csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +0 -8
  133. csv_detective/detect_labels/FR/other/code_rna/__init__.py +0 -13
  134. csv_detective/detect_labels/FR/other/code_waldec/__init__.py +0 -8
  135. csv_detective/detect_labels/FR/other/csp_insee/__init__.py +0 -13
  136. csv_detective/detect_labels/FR/other/date_fr/__init__.py +0 -9
  137. csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +0 -15
  138. csv_detective/detect_labels/FR/other/sexe/__init__.py +0 -8
  139. csv_detective/detect_labels/FR/other/siren/__init__.py +0 -17
  140. csv_detective/detect_labels/FR/other/siret/__init__.py +0 -16
  141. csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -20
  142. csv_detective/detect_labels/FR/other/uai/__init__.py +0 -25
  143. csv_detective/detect_labels/FR/temp/__init__.py +0 -0
  144. csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +0 -16
  145. csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +0 -8
  146. csv_detective/detect_labels/__init__.py +0 -94
  147. csv_detective/detect_labels/geo/__init__.py +0 -0
  148. csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +0 -16
  149. csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +0 -16
  150. csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +0 -16
  151. csv_detective/detect_labels/geo/json_geojson/__init__.py +0 -17
  152. csv_detective/detect_labels/geo/latitude_wgs/__init__.py +0 -30
  153. csv_detective/detect_labels/geo/latlon_wgs/__init__.py +0 -39
  154. csv_detective/detect_labels/geo/longitude_wgs/__init__.py +0 -21
  155. csv_detective/detect_labels/geo/lonlat_wgs/__init__.py +0 -23
  156. csv_detective/detect_labels/other/__init__.py +0 -0
  157. csv_detective/detect_labels/other/booleen/__init__.py +0 -8
  158. csv_detective/detect_labels/other/email/__init__.py +0 -20
  159. csv_detective/detect_labels/other/float/__init__.py +0 -8
  160. csv_detective/detect_labels/other/int/__init__.py +0 -8
  161. csv_detective/detect_labels/other/money/__init__.py +0 -8
  162. csv_detective/detect_labels/other/mongo_object_id/__init__.py +0 -8
  163. csv_detective/detect_labels/other/twitter/__init__.py +0 -8
  164. csv_detective/detect_labels/other/url/__init__.py +0 -23
  165. csv_detective/detect_labels/other/uuid/__init__.py +0 -8
  166. csv_detective/detect_labels/temp/__init__.py +0 -0
  167. csv_detective/detect_labels/temp/date/__init__.py +0 -28
  168. csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +0 -19
  169. csv_detective/detect_labels/temp/year/__init__.py +0 -19
  170. csv_detective/load_tests.py +0 -59
  171. csv_detective-0.9.3.dev2258.dist-info/RECORD +0 -166
  172. /csv_detective/{detect_fields/FR/other/csp_insee → formats/data}/csp_insee.txt +0 -0
  173. /csv_detective/{detect_fields/geo/iso_country_code_alpha2 → formats/data}/iso_country_code_alpha2.txt +0 -0
  174. /csv_detective/{detect_fields/geo/iso_country_code_alpha3 → formats/data}/iso_country_code_alpha3.txt +0 -0
  175. /csv_detective/{detect_fields/geo/iso_country_code_numeric → formats/data}/iso_country_code_numeric.txt +0 -0
  176. {csv_detective-0.9.3.dev2258.dist-info → csv_detective-0.9.3.dev2348.dist-info}/WHEEL +0 -0
  177. {csv_detective-0.9.3.dev2258.dist-info → csv_detective-0.9.3.dev2348.dist-info}/entry_points.txt +0 -0
  178. {csv_detective-0.9.3.dev2258.dist-info → csv_detective-0.9.3.dev2348.dist-info}/licenses/LICENSE +0 -0
  179. {csv_detective-0.9.3.dev2258.dist-info → csv_detective-0.9.3.dev2348.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,36 @@
1
+ import json
2
+
3
+ proportion = 1
4
+ tags = ["geo"]
5
+ labels = [
6
+ "json geojson",
7
+ "json",
8
+ "geojson",
9
+ "geo shape",
10
+ "geom",
11
+ "geometry",
12
+ "geo shape",
13
+ "geoshape",
14
+ ]
15
+
16
+
17
+ def _is(val) -> bool:
18
+ try:
19
+ j = json.loads(val)
20
+ if isinstance(j, dict):
21
+ if "type" in j and "coordinates" in j:
22
+ return True
23
+ if "geometry" in j and "coordinates" in j["geometry"]:
24
+ return True
25
+ except Exception:
26
+ pass
27
+ return False
28
+
29
+
30
+ _test_values = {
31
+ True: [
32
+ '{"coordinates": [45.783753, 3.049342], "type": "63870"}',
33
+ '{"geometry": {"coordinates": [45.783753, 3.049342]}}',
34
+ ],
35
+ False: ['{"pomme": "fruit", "reponse": 42}'],
36
+ }
@@ -1,19 +1,31 @@
1
- from os.path import dirname, join
2
-
3
- from csv_detective.parsing.text import _process_text
4
-
5
- PROPORTION = 1
6
- f = open(join(dirname(__file__), "insee_ape700.txt"), "r")
7
- condes_insee_ape = f.read().split("\n")
8
- # removing empty str due to additionnal line in file
9
- del condes_insee_ape[-1]
10
- condes_insee_ape = set(condes_insee_ape)
11
- f.close()
12
-
13
-
14
- def _is(val):
15
- """Repère les codes APE700 de l'INSEE"""
16
- if not isinstance(val, str):
17
- return False
18
- val = _process_text(val).upper()
19
- return val in condes_insee_ape
1
+ from os.path import dirname, join
2
+
3
+ from csv_detective.parsing.text import _process_text
4
+
5
+ proportion = 0.8
6
+ tags = ["fr"]
7
+ labels = [
8
+ "code ape",
9
+ "code activite (ape)",
10
+ "code naf",
11
+ "code naf organisme designe",
12
+ "code naf organisme designant",
13
+ "base sirene : code ape de l'etablissement siege",
14
+ ]
15
+
16
+ f = open(join(dirname(__file__), "data", "insee_ape700.txt"), "r")
17
+ condes_insee_ape = f.read().split("\n")
18
+ # removing empty str due to additionnal line in file
19
+ del condes_insee_ape[-1]
20
+ condes_insee_ape = set(condes_insee_ape)
21
+ f.close()
22
+
23
+
24
+ def _is(val):
25
+ if not isinstance(val, str):
26
+ return False
27
+ val = _process_text(val).upper()
28
+ return val in condes_insee_ape
29
+
30
+
31
+ _test_values = {True: ["0116Z"], False: ["0116A"]}
@@ -1,15 +1,28 @@
1
- from frformat import Canton, Millesime, Options
2
-
3
- PROPORTION = 0.9
4
- _options = Options(
5
- ignore_case=True,
6
- ignore_accents=True,
7
- replace_non_alphanumeric_with_space=True,
8
- ignore_extra_whitespace=True,
9
- )
10
- _canton = Canton(Millesime.LATEST, _options)
11
-
12
-
13
- def _is(val):
14
- """Match avec le nom des cantons"""
15
- return isinstance(val, str) and _canton.is_valid(val)
1
+ from frformat import Canton, Millesime, Options
2
+
3
+ proportion = 0.9
4
+ tags = ["fr", "geo"]
5
+ labels = [
6
+ "insee canton",
7
+ "canton",
8
+ "cant",
9
+ "nom canton",
10
+ ]
11
+
12
+ _options = Options(
13
+ ignore_case=True,
14
+ ignore_accents=True,
15
+ replace_non_alphanumeric_with_space=True,
16
+ ignore_extra_whitespace=True,
17
+ )
18
+ _canton = Canton(Millesime.LATEST, _options)
19
+
20
+
21
+ def _is(val):
22
+ return isinstance(val, str) and _canton.is_valid(val)
23
+
24
+
25
+ _test_values = {
26
+ True: ["nantua"],
27
+ False: ["california"],
28
+ }
@@ -1,16 +1,23 @@
1
- PROPORTION = 1
2
-
3
-
4
- def _is(val):
5
- """Detects integers"""
6
- if (
7
- not isinstance(val, str)
8
- or any([v in val for v in [".", "_", "+"]])
9
- or (val.startswith("0") and len(val) > 1)
10
- ):
11
- return False
12
- try:
13
- int(val)
14
- return True
15
- except ValueError:
16
- return False
1
+ labels = ["nb", "nombre", "nbre"]
2
+ tag = ["type"]
3
+
4
+
5
+ def _is(val):
6
+ """Detects integers"""
7
+ if (
8
+ not isinstance(val, str)
9
+ or any([v in val for v in [".", "_", "+"]])
10
+ or (val.startswith("0") and len(val) > 1)
11
+ ):
12
+ return False
13
+ try:
14
+ int(val)
15
+ return True
16
+ except ValueError:
17
+ return False
18
+
19
+
20
+ _test_values = {
21
+ True: ["1", "0", "1764", "-24"],
22
+ False: ["01053", "1.2", "123_456", "+35"],
23
+ }
@@ -0,0 +1,30 @@
1
+ import re
2
+ from os.path import dirname, join
3
+
4
+ proportion = 1
5
+ tags = ["geo"]
6
+ labels = [
7
+ "iso country code",
8
+ "code pays",
9
+ "pays",
10
+ "country",
11
+ "nation",
12
+ "pays code",
13
+ "code pays (iso)",
14
+ ]
15
+
16
+ with open(join(dirname(__file__), "data", "iso_country_code_alpha2.txt"), "r") as iofile:
17
+ liste_pays = iofile.read().split("\n")
18
+ liste_pays = set(liste_pays)
19
+
20
+
21
+ def _is(val):
22
+ if not isinstance(val, str) or not bool(re.match(r"[A-Z]{2}$", val)):
23
+ return False
24
+ return val in liste_pays
25
+
26
+
27
+ _test_values = {
28
+ True: ["FR"],
29
+ False: ["XX", "A", "FRA"],
30
+ }
@@ -0,0 +1,30 @@
1
+ import re
2
+ from os.path import dirname, join
3
+
4
+ proportion = 1
5
+ tags = ["geo"]
6
+ labels = [
7
+ "iso country code",
8
+ "code pays",
9
+ "pays",
10
+ "country",
11
+ "nation",
12
+ "pays code",
13
+ "code pays (iso)",
14
+ ]
15
+
16
+ with open(join(dirname(__file__), "data", "iso_country_code_alpha3.txt"), "r") as iofile:
17
+ liste_pays = iofile.read().split("\n")
18
+
19
+
20
+ def _is(val):
21
+ """Renvoie True si val peut etre un code iso pays alpha-3, False sinon"""
22
+ if not isinstance(val, str) or not bool(re.match(r"[A-Z]{3}$", val)):
23
+ return False
24
+ return val in set(liste_pays)
25
+
26
+
27
+ _test_values = {
28
+ True: ["FRA"],
29
+ False: ["XXX", "FR", "A"],
30
+ }
@@ -0,0 +1,31 @@
1
+ import re
2
+ from os.path import dirname, join
3
+
4
+ proportion = 1
5
+ tags = ["geo"]
6
+ labels = [
7
+ "iso country code",
8
+ "code pays",
9
+ "pays",
10
+ "country",
11
+ "nation",
12
+ "pays code",
13
+ "code pays (iso)",
14
+ ]
15
+
16
+ with open(join(dirname(__file__), "data", "iso_country_code_numeric.txt"), "r") as iofile:
17
+ liste_pays = iofile.read().split("\n")
18
+ liste_pays = set(liste_pays)
19
+
20
+
21
+ def _is(val):
22
+ """Renvoie True si val peut etre un code iso pays numerique, False sinon"""
23
+ if not isinstance(val, str) or not bool(re.match(r"[0-9]{3}$", val)):
24
+ return False
25
+ return val in liste_pays
26
+
27
+
28
+ _test_values = {
29
+ True: ["250"],
30
+ False: ["003"],
31
+ }
@@ -1,25 +1,41 @@
1
- PROPORTION = 1
2
- jours = {
3
- "lundi",
4
- "mardi",
5
- "mercredi",
6
- "jeudi",
7
- "vendredi",
8
- "samedi",
9
- "dimanche",
10
- "lun",
11
- "mar",
12
- "mer",
13
- "jeu",
14
- "ven",
15
- "sam",
16
- "dim",
17
- }
18
-
19
-
20
- def _is(val):
21
- """Renvoie True si les champs peuvent être des jours de la semaine"""
22
- if not isinstance(val, str):
23
- return False
24
- val = val.lower()
25
- return val in jours
1
+ proportion = 0.8
2
+ tags = ["fr", "temp"]
3
+ labels = [
4
+ "jour semaine",
5
+ "type jour",
6
+ "jour de la semaine",
7
+ "saufjour",
8
+ "nomjour",
9
+ "jour",
10
+ "jour de fermeture",
11
+ ]
12
+
13
+ jours = {
14
+ "lundi",
15
+ "mardi",
16
+ "mercredi",
17
+ "jeudi",
18
+ "vendredi",
19
+ "samedi",
20
+ "dimanche",
21
+ "lun",
22
+ "mar",
23
+ "mer",
24
+ "jeu",
25
+ "ven",
26
+ "sam",
27
+ "dim",
28
+ }
29
+
30
+
31
+ def _is(val):
32
+ if not isinstance(val, str):
33
+ return False
34
+ val = val.lower()
35
+ return val in jours
36
+
37
+
38
+ _test_values = {
39
+ True: ["lundi"],
40
+ False: ["jour de la biere"],
41
+ }
@@ -1,14 +1,20 @@
1
- import json
2
- from json import JSONDecodeError
3
-
4
- PROPORTION = 1
5
-
6
-
7
- def _is(val):
8
- """Detects json"""
9
- try:
10
- loaded = json.loads(val)
11
- # we don't want to consider integers for instance
12
- return isinstance(loaded, (list, dict))
13
- except (JSONDecodeError, TypeError):
14
- return False
1
+ import json
2
+ from json import JSONDecodeError
3
+
4
+ proportion = 1
5
+ tags = ["type"]
6
+
7
+
8
+ def _is(val):
9
+ try:
10
+ loaded = json.loads(val)
11
+ # we don't want to consider integers for instance
12
+ return isinstance(loaded, (list, dict))
13
+ except (JSONDecodeError, TypeError):
14
+ return False
15
+
16
+
17
+ _test_values = {
18
+ True: ['{"pomme": "fruit", "reponse": 42}', "[1,2,3,4]"],
19
+ False: ["5", '{"zefib":', '{"a"}'],
20
+ }
@@ -0,0 +1,48 @@
1
+ from frformat import LatitudeL93
2
+
3
+ from csv_detective.formats.float import _is as is_float
4
+ from csv_detective.formats.float import float_casting
5
+
6
+ proportion = 1
7
+ tags = ["fr", "geo"]
8
+ labels = [
9
+ "latitude",
10
+ "lat",
11
+ "y",
12
+ "yf",
13
+ "yd",
14
+ "y l93",
15
+ "coordonnee y",
16
+ "latitude lb93",
17
+ "coord y",
18
+ "ycoord",
19
+ "geocodage y gps",
20
+ "location latitude",
21
+ "ylatitude",
22
+ "ylat",
23
+ "latitude (y)",
24
+ "latitudeorg",
25
+ "coordinates.latitude",
26
+ "googlemap latitude",
27
+ "latitudelieu",
28
+ "latitude googlemap",
29
+ ]
30
+
31
+ _latitudel93 = LatitudeL93()
32
+
33
+
34
+ def _is(val):
35
+ try:
36
+ if isinstance(val, str) and is_float(val):
37
+ return _latitudel93.is_valid(float_casting(val))
38
+
39
+ return False
40
+
41
+ except (ValueError, OverflowError):
42
+ return False
43
+
44
+
45
+ _test_values = {
46
+ True: ["6037008", "7123528.5", "7124528,5"],
47
+ False: ["0", "-6734529.6", "7245669.8", "3422674,78", "32_34"],
48
+ }
@@ -0,0 +1,42 @@
1
+ from csv_detective.formats.float import _is as is_float
2
+
3
+ proportion = 1
4
+ tags = ["geo"]
5
+ labels = [
6
+ "latitude",
7
+ "lat",
8
+ "y",
9
+ "yf",
10
+ "yd",
11
+ "coordonnee y",
12
+ "coord y",
13
+ "ycoord",
14
+ "geocodage y gps",
15
+ "location latitude",
16
+ "ylatitude",
17
+ "ylat",
18
+ "latitude (y)",
19
+ "latitudeorg",
20
+ "coordinates.latitude",
21
+ "googlemap latitude",
22
+ "latitudelieu",
23
+ "latitude googlemap",
24
+ "latitude wgs84",
25
+ "y wgs84",
26
+ "latitude (wgs84)",
27
+ ]
28
+
29
+
30
+ def _is(val):
31
+ try:
32
+ return is_float(val) and float(val) >= -90 and float(val) <= 90
33
+ except ValueError:
34
+ return False
35
+ except OverflowError:
36
+ return False
37
+
38
+
39
+ _test_values = {
40
+ True: ["43.2", "-22"],
41
+ False: ["100"],
42
+ }
@@ -0,0 +1,42 @@
1
+ from csv_detective.formats.float import _is as is_float
2
+
3
+ proportion = 1
4
+ tags = ["fr", "geo"]
5
+ labels = [
6
+ "latitude",
7
+ "lat",
8
+ "y",
9
+ "yf",
10
+ "yd",
11
+ "coordonnee y",
12
+ "coord y",
13
+ "ycoord",
14
+ "geocodage y gps",
15
+ "location latitude",
16
+ "ylatitude",
17
+ "ylat",
18
+ "latitude (y)",
19
+ "latitudeorg",
20
+ "coordinates.latitude",
21
+ "googlemap latitude",
22
+ "latitudelieu",
23
+ "latitude googlemap",
24
+ "latitude wgs84",
25
+ "y wgs84",
26
+ "latitude (wgs84)",
27
+ ]
28
+
29
+
30
+ def _is(val):
31
+ try:
32
+ return is_float(val) and float(val) >= 41.3 and float(val) <= 51.3
33
+ except ValueError:
34
+ return False
35
+ except OverflowError:
36
+ return False
37
+
38
+
39
+ _test_values = {
40
+ True: ["42.5"],
41
+ False: ["22.5", "62.5"],
42
+ }
@@ -0,0 +1,53 @@
1
+ from csv_detective.formats.latitude_wgs import _is as is_lat
2
+ from csv_detective.formats.longitude_wgs import _is as is_lon
3
+
4
+ proportion = 1
5
+ tags = ["geo"]
6
+
7
+ SHARED_COORDS_LABELS = [
8
+ "ban",
9
+ "coordinates",
10
+ "coordonnees",
11
+ "coordonnees insee",
12
+ "geo",
13
+ "geopoint",
14
+ "geoloc",
15
+ "geolocalisation",
16
+ "geom",
17
+ "geometry",
18
+ "gps",
19
+ "localisation",
20
+ "point",
21
+ "position",
22
+ "wgs84",
23
+ ]
24
+
25
+ specific = [
26
+ "latlon",
27
+ "lat lon",
28
+ "x y",
29
+ "xy",
30
+ ]
31
+
32
+ # we aim wide to catch exact matches if possible for the highest possible score
33
+ labels = (
34
+ SHARED_COORDS_LABELS
35
+ + specific
36
+ + [w + sep + suf for suf in specific for w in SHARED_COORDS_LABELS for sep in ["", " "]]
37
+ )
38
+
39
+
40
+ def _is(val):
41
+ if not isinstance(val, str) or val.count(",") != 1:
42
+ return False
43
+ lat, lon = val.split(",")
44
+ # handling [lat,lon]
45
+ if lat.startswith("[") and lon.endswith("]"):
46
+ lat, lon = lat[1:], lon[:-1]
47
+ return is_lat(lat) and is_lon(lon.replace(" ", ""))
48
+
49
+
50
+ _test_values = {
51
+ True: ["43.2,-22.6", "-10.7,140", "-40.7, 10.8", "[12,-0.28]"],
52
+ False: ["0.1,192", "-102, 92", "[23.02,4.1", "23.02,4.1]", "160.1,-27"],
53
+ }
@@ -0,0 +1,39 @@
1
+ from frformat import LongitudeL93
2
+
3
+ from csv_detective.formats.float import _is as is_float
4
+ from csv_detective.formats.float import float_casting
5
+
6
+ proportion = 1
7
+ tags = ["fr", "geo"]
8
+ labels = [
9
+ "longitude",
10
+ "lon",
11
+ "long",
12
+ "geocodage x gps",
13
+ "location longitude",
14
+ "xlongitude",
15
+ "lng",
16
+ "xlong",
17
+ "x",
18
+ "xf",
19
+ "xd",
20
+ ]
21
+
22
+ _longitudel93 = LongitudeL93()
23
+
24
+
25
+ def _is(val):
26
+ try:
27
+ if isinstance(val, str) and is_float(val):
28
+ return _longitudel93.is_valid(float_casting(val))
29
+
30
+ return False
31
+
32
+ except (ValueError, OverflowError):
33
+ return False
34
+
35
+
36
+ _test_values = {
37
+ True: ["0", "-154", "1265783,45", "34723.4"],
38
+ False: ["1456669.8", "-776225", "346_3214"],
39
+ }
@@ -0,0 +1,32 @@
1
+ from csv_detective.formats.float import _is as is_float
2
+
3
+ proportion = 1
4
+ tags = ["geo"]
5
+ labels = [
6
+ "longitude",
7
+ "lon",
8
+ "long",
9
+ "geocodage x gps",
10
+ "location longitude",
11
+ "xlongitude",
12
+ "lng",
13
+ "xlong",
14
+ "x",
15
+ "xf",
16
+ "xd",
17
+ ]
18
+
19
+
20
+ def _is(val):
21
+ try:
22
+ return is_float(val) and float(val) >= -180 and float(val) <= 180
23
+ except ValueError:
24
+ return False
25
+ except OverflowError:
26
+ return False
27
+
28
+
29
+ _test_values = {
30
+ True: ["120", "-20.2"],
31
+ False: ["-200"],
32
+ }
@@ -0,0 +1,32 @@
1
+ from csv_detective.formats.float import _is as is_float
2
+
3
+ proportion = 1
4
+ tags = ["fr", "geo"]
5
+ labels = [
6
+ "longitude",
7
+ "lon",
8
+ "long",
9
+ "geocodage x gps",
10
+ "location longitude",
11
+ "xlongitude",
12
+ "lng",
13
+ "xlong",
14
+ "x",
15
+ "xf",
16
+ "xd",
17
+ ]
18
+
19
+
20
+ def _is(val):
21
+ try:
22
+ return is_float(val) and float(val) >= -5.5 and float(val) <= 9.8
23
+ except ValueError:
24
+ return False
25
+ except OverflowError:
26
+ return False
27
+
28
+
29
+ _test_values = {
30
+ True: ["-2.5"],
31
+ False: ["12.8"],
32
+ }