csv-detective 0.9.3.dev2241__py3-none-any.whl → 0.9.3.dev2319__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. csv_detective/detection/formats.py +12 -15
  2. csv_detective/detection/headers.py +6 -8
  3. csv_detective/explore_csv.py +28 -9
  4. csv_detective/format.py +67 -0
  5. csv_detective/formats/__init__.py +9 -0
  6. csv_detective/{detect_fields/FR/geo/adresse/__init__.py → formats/adresse.py} +116 -100
  7. csv_detective/{detect_fields/other/booleen/__init__.py → formats/booleen.py} +35 -27
  8. csv_detective/formats/code_commune_insee.py +26 -0
  9. csv_detective/{detect_fields/FR/other/code_csp_insee/__init__.py → formats/code_csp_insee.py} +36 -29
  10. csv_detective/{detect_fields/FR/geo/code_departement/__init__.py → formats/code_departement.py} +29 -15
  11. csv_detective/formats/code_fantoir.py +21 -0
  12. csv_detective/{detect_fields/FR/other/code_import/__init__.py → formats/code_import.py} +17 -9
  13. csv_detective/formats/code_postal.py +25 -0
  14. csv_detective/formats/code_region.py +22 -0
  15. csv_detective/formats/code_rna.py +29 -0
  16. csv_detective/formats/code_waldec.py +17 -0
  17. csv_detective/{detect_fields/FR/geo/commune/__init__.py → formats/commune.py} +27 -16
  18. csv_detective/{detect_fields/FR/other/csp_insee/__init__.py → formats/csp_insee.py} +31 -19
  19. csv_detective/{detect_fields/FR/other/insee_ape700 → formats/data}/insee_ape700.txt +0 -0
  20. csv_detective/{detect_fields/temp/date/__init__.py → formats/date.py} +99 -62
  21. csv_detective/formats/date_fr.py +22 -0
  22. csv_detective/{detect_fields/temp/datetime_aware/__init__.py → formats/datetime_aware.py} +18 -7
  23. csv_detective/{detect_fields/temp/datetime_naive/__init__.py → formats/datetime_naive.py} +21 -2
  24. csv_detective/{detect_fields/temp/datetime_rfc822/__init__.py → formats/datetime_rfc822.py} +24 -18
  25. csv_detective/formats/departement.py +37 -0
  26. csv_detective/formats/email.py +28 -0
  27. csv_detective/{detect_fields/other/float/__init__.py → formats/float.py} +29 -21
  28. csv_detective/formats/geojson.py +36 -0
  29. csv_detective/{detect_fields/FR/other/insee_ape700/__init__.py → formats/insee_ape700.py} +31 -19
  30. csv_detective/{detect_fields/FR/geo/insee_canton/__init__.py → formats/insee_canton.py} +28 -15
  31. csv_detective/{detect_fields/other/int/__init__.py → formats/int.py} +23 -16
  32. csv_detective/formats/iso_country_code_alpha2.py +30 -0
  33. csv_detective/formats/iso_country_code_alpha3.py +30 -0
  34. csv_detective/formats/iso_country_code_numeric.py +31 -0
  35. csv_detective/{detect_fields/FR/temp/jour_de_la_semaine/__init__.py → formats/jour_de_la_semaine.py} +41 -25
  36. csv_detective/{detect_fields/other/json/__init__.py → formats/json.py} +20 -14
  37. csv_detective/formats/latitude_l93.py +48 -0
  38. csv_detective/formats/latitude_wgs.py +42 -0
  39. csv_detective/formats/latitude_wgs_fr_metropole.py +42 -0
  40. csv_detective/formats/latlon_wgs.py +53 -0
  41. csv_detective/formats/longitude_l93.py +39 -0
  42. csv_detective/formats/longitude_wgs.py +32 -0
  43. csv_detective/formats/longitude_wgs_fr_metropole.py +32 -0
  44. csv_detective/formats/lonlat_wgs.py +36 -0
  45. csv_detective/{detect_fields/FR/temp/mois_de_annee/__init__.py → formats/mois_de_lannee.py} +48 -39
  46. csv_detective/formats/money.py +18 -0
  47. csv_detective/formats/mongo_object_id.py +14 -0
  48. csv_detective/formats/pays.py +35 -0
  49. csv_detective/formats/percent.py +16 -0
  50. csv_detective/{detect_fields/FR/geo/region/__init__.py → formats/region.py} +70 -50
  51. csv_detective/formats/sexe.py +17 -0
  52. csv_detective/{detect_fields/FR/other/siren/__init__.py → formats/siren.py} +37 -20
  53. csv_detective/{detect_fields/FR/other/siret/__init__.py → formats/siret.py} +47 -31
  54. csv_detective/formats/tel_fr.py +36 -0
  55. csv_detective/formats/uai.py +36 -0
  56. csv_detective/formats/url.py +45 -0
  57. csv_detective/formats/username.py +14 -0
  58. csv_detective/formats/uuid.py +16 -0
  59. csv_detective/formats/year.py +28 -0
  60. csv_detective/output/__init__.py +3 -4
  61. csv_detective/output/dataframe.py +3 -3
  62. csv_detective/output/profile.py +2 -3
  63. csv_detective/output/schema.py +2 -2
  64. csv_detective/parsing/columns.py +35 -50
  65. csv_detective/parsing/csv.py +2 -2
  66. csv_detective/parsing/load.py +10 -11
  67. csv_detective/validate.py +9 -4
  68. {csv_detective-0.9.3.dev2241.dist-info → csv_detective-0.9.3.dev2319.dist-info}/METADATA +6 -5
  69. csv_detective-0.9.3.dev2319.dist-info/RECORD +102 -0
  70. tests/test_fields.py +39 -364
  71. tests/test_file.py +1 -1
  72. tests/test_labels.py +5 -3
  73. tests/test_structure.py +40 -36
  74. csv_detective/detect_fields/FR/__init__.py +0 -0
  75. csv_detective/detect_fields/FR/geo/__init__.py +0 -0
  76. csv_detective/detect_fields/FR/geo/code_commune_insee/__init__.py +0 -9
  77. csv_detective/detect_fields/FR/geo/code_fantoir/__init__.py +0 -9
  78. csv_detective/detect_fields/FR/geo/code_postal/__init__.py +0 -9
  79. csv_detective/detect_fields/FR/geo/code_region/__init__.py +0 -10
  80. csv_detective/detect_fields/FR/geo/departement/__init__.py +0 -16
  81. csv_detective/detect_fields/FR/geo/latitude_l93/__init__.py +0 -19
  82. csv_detective/detect_fields/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -13
  83. csv_detective/detect_fields/FR/geo/longitude_l93/__init__.py +0 -19
  84. csv_detective/detect_fields/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -13
  85. csv_detective/detect_fields/FR/geo/pays/__init__.py +0 -16
  86. csv_detective/detect_fields/FR/other/__init__.py +0 -0
  87. csv_detective/detect_fields/FR/other/code_csp_insee/code_csp_insee.txt +0 -498
  88. csv_detective/detect_fields/FR/other/code_rna/__init__.py +0 -9
  89. csv_detective/detect_fields/FR/other/code_waldec/__init__.py +0 -9
  90. csv_detective/detect_fields/FR/other/date_fr/__init__.py +0 -12
  91. csv_detective/detect_fields/FR/other/sexe/__init__.py +0 -11
  92. csv_detective/detect_fields/FR/other/tel_fr/__init__.py +0 -17
  93. csv_detective/detect_fields/FR/other/uai/__init__.py +0 -15
  94. csv_detective/detect_fields/FR/temp/__init__.py +0 -0
  95. csv_detective/detect_fields/__init__.py +0 -112
  96. csv_detective/detect_fields/geo/__init__.py +0 -0
  97. csv_detective/detect_fields/geo/iso_country_code_alpha2/__init__.py +0 -15
  98. csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py +0 -14
  99. csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py +0 -15
  100. csv_detective/detect_fields/geo/json_geojson/__init__.py +0 -18
  101. csv_detective/detect_fields/geo/latitude_wgs/__init__.py +0 -13
  102. csv_detective/detect_fields/geo/latlon_wgs/__init__.py +0 -16
  103. csv_detective/detect_fields/geo/longitude_wgs/__init__.py +0 -13
  104. csv_detective/detect_fields/geo/lonlat_wgs/__init__.py +0 -16
  105. csv_detective/detect_fields/other/__init__.py +0 -0
  106. csv_detective/detect_fields/other/email/__init__.py +0 -10
  107. csv_detective/detect_fields/other/money/__init__.py +0 -11
  108. csv_detective/detect_fields/other/mongo_object_id/__init__.py +0 -8
  109. csv_detective/detect_fields/other/percent/__init__.py +0 -9
  110. csv_detective/detect_fields/other/twitter/__init__.py +0 -8
  111. csv_detective/detect_fields/other/url/__init__.py +0 -14
  112. csv_detective/detect_fields/other/uuid/__init__.py +0 -10
  113. csv_detective/detect_fields/temp/__init__.py +0 -0
  114. csv_detective/detect_fields/temp/year/__init__.py +0 -10
  115. csv_detective/detect_labels/FR/__init__.py +0 -0
  116. csv_detective/detect_labels/FR/geo/__init__.py +0 -0
  117. csv_detective/detect_labels/FR/geo/adresse/__init__.py +0 -15
  118. csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +0 -17
  119. csv_detective/detect_labels/FR/geo/code_departement/__init__.py +0 -15
  120. csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +0 -12
  121. csv_detective/detect_labels/FR/geo/code_postal/__init__.py +0 -16
  122. csv_detective/detect_labels/FR/geo/code_region/__init__.py +0 -14
  123. csv_detective/detect_labels/FR/geo/commune/__init__.py +0 -12
  124. csv_detective/detect_labels/FR/geo/departement/__init__.py +0 -22
  125. csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +0 -13
  126. csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +0 -30
  127. csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +0 -30
  128. csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +0 -21
  129. csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +0 -21
  130. csv_detective/detect_labels/FR/geo/pays/__init__.py +0 -20
  131. csv_detective/detect_labels/FR/geo/region/__init__.py +0 -20
  132. csv_detective/detect_labels/FR/other/__init__.py +0 -0
  133. csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +0 -8
  134. csv_detective/detect_labels/FR/other/code_rna/__init__.py +0 -13
  135. csv_detective/detect_labels/FR/other/code_waldec/__init__.py +0 -8
  136. csv_detective/detect_labels/FR/other/csp_insee/__init__.py +0 -13
  137. csv_detective/detect_labels/FR/other/date_fr/__init__.py +0 -9
  138. csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +0 -15
  139. csv_detective/detect_labels/FR/other/sexe/__init__.py +0 -8
  140. csv_detective/detect_labels/FR/other/siren/__init__.py +0 -17
  141. csv_detective/detect_labels/FR/other/siret/__init__.py +0 -16
  142. csv_detective/detect_labels/FR/other/tel_fr/__init__.py +0 -20
  143. csv_detective/detect_labels/FR/other/uai/__init__.py +0 -25
  144. csv_detective/detect_labels/FR/temp/__init__.py +0 -0
  145. csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +0 -16
  146. csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +0 -8
  147. csv_detective/detect_labels/__init__.py +0 -94
  148. csv_detective/detect_labels/geo/__init__.py +0 -0
  149. csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +0 -16
  150. csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +0 -16
  151. csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +0 -16
  152. csv_detective/detect_labels/geo/json_geojson/__init__.py +0 -17
  153. csv_detective/detect_labels/geo/latitude_wgs/__init__.py +0 -30
  154. csv_detective/detect_labels/geo/latlon_wgs/__init__.py +0 -39
  155. csv_detective/detect_labels/geo/longitude_wgs/__init__.py +0 -21
  156. csv_detective/detect_labels/geo/lonlat_wgs/__init__.py +0 -23
  157. csv_detective/detect_labels/other/__init__.py +0 -0
  158. csv_detective/detect_labels/other/booleen/__init__.py +0 -8
  159. csv_detective/detect_labels/other/email/__init__.py +0 -20
  160. csv_detective/detect_labels/other/float/__init__.py +0 -8
  161. csv_detective/detect_labels/other/int/__init__.py +0 -8
  162. csv_detective/detect_labels/other/money/__init__.py +0 -8
  163. csv_detective/detect_labels/other/mongo_object_id/__init__.py +0 -8
  164. csv_detective/detect_labels/other/twitter/__init__.py +0 -8
  165. csv_detective/detect_labels/other/url/__init__.py +0 -23
  166. csv_detective/detect_labels/other/uuid/__init__.py +0 -8
  167. csv_detective/detect_labels/temp/__init__.py +0 -0
  168. csv_detective/detect_labels/temp/date/__init__.py +0 -28
  169. csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +0 -19
  170. csv_detective/detect_labels/temp/year/__init__.py +0 -19
  171. csv_detective/load_tests.py +0 -59
  172. csv_detective-0.9.3.dev2241.dist-info/RECORD +0 -166
  173. /csv_detective/{detect_fields/FR/other/csp_insee → formats/data}/csp_insee.txt +0 -0
  174. /csv_detective/{detect_fields/geo/iso_country_code_alpha2 → formats/data}/iso_country_code_alpha2.txt +0 -0
  175. /csv_detective/{detect_fields/geo/iso_country_code_alpha3 → formats/data}/iso_country_code_alpha3.txt +0 -0
  176. /csv_detective/{detect_fields/geo/iso_country_code_numeric → formats/data}/iso_country_code_numeric.txt +0 -0
  177. {csv_detective-0.9.3.dev2241.dist-info → csv_detective-0.9.3.dev2319.dist-info}/WHEEL +0 -0
  178. {csv_detective-0.9.3.dev2241.dist-info → csv_detective-0.9.3.dev2319.dist-info}/entry_points.txt +0 -0
  179. {csv_detective-0.9.3.dev2241.dist-info → csv_detective-0.9.3.dev2319.dist-info}/licenses/LICENSE +0 -0
  180. {csv_detective-0.9.3.dev2241.dist-info → csv_detective-0.9.3.dev2319.dist-info}/top_level.txt +0 -0
@@ -1,14 +0,0 @@
1
- import re
2
- from os.path import dirname, join
3
-
4
- PROPORTION = 1
5
-
6
- with open(join(dirname(__file__), "iso_country_code_alpha3.txt"), "r") as iofile:
7
- liste_pays = iofile.read().split("\n")
8
-
9
-
10
- def _is(val):
11
- """Renvoie True si val peut etre un code iso pays alpha-3, False sinon"""
12
- if not isinstance(val, str) or not bool(re.match(r"[A-Z]{3}$", val)):
13
- return False
14
- return val in set(liste_pays)
@@ -1,15 +0,0 @@
1
- import re
2
- from os.path import dirname, join
3
-
4
- PROPORTION = 1
5
-
6
- with open(join(dirname(__file__), "iso_country_code_numeric.txt"), "r") as iofile:
7
- liste_pays = iofile.read().split("\n")
8
- liste_pays = set(liste_pays)
9
-
10
-
11
- def _is(val):
12
- """Renvoie True si val peut etre un code iso pays numerique, False sinon"""
13
- if not isinstance(val, str) or not bool(re.match(r"[0-9]{3}$", val)):
14
- return False
15
- return val in liste_pays
@@ -1,18 +0,0 @@
1
- import json
2
-
3
- PROPORTION = 0.9
4
-
5
-
6
- def _is(val):
7
- """Renvoie True si val peut etre un geojson"""
8
-
9
- try:
10
- j = json.loads(val)
11
- if isinstance(j, dict):
12
- if "type" in j and "coordinates" in j:
13
- return True
14
- if "geometry" in j and "coordinates" in j["geometry"]:
15
- return True
16
- except Exception:
17
- pass
18
- return False
@@ -1,13 +0,0 @@
1
- from csv_detective.detect_fields.other.float import _is as is_float
2
-
3
- PROPORTION = 1
4
-
5
-
6
- def _is(val):
7
- """Renvoie True si val peut etre une latitude"""
8
- try:
9
- return is_float(val) and float(val) >= -90 and float(val) <= 90
10
- except ValueError:
11
- return False
12
- except OverflowError:
13
- return False
@@ -1,16 +0,0 @@
1
- from ..latitude_wgs import _is as is_lat
2
- from ..longitude_wgs import _is as is_lon
3
-
4
- PROPORTION = 1
5
-
6
-
7
- def _is(val):
8
- """Renvoie True si val peut etre une latitude,longitude"""
9
-
10
- if not isinstance(val, str) or val.count(",") != 1:
11
- return False
12
- lat, lon = val.split(",")
13
- # handling [lat,lon]
14
- if lat.startswith("[") and lon.endswith("]"):
15
- lat, lon = lat[1:], lon[:-1]
16
- return is_lat(lat) and is_lon(lon.replace(" ", ""))
@@ -1,13 +0,0 @@
1
- from csv_detective.detect_fields.other.float import _is as is_float
2
-
3
- PROPORTION = 1
4
-
5
-
6
- def _is(val):
7
- """Renvoie True si val peut etre une longitude"""
8
- try:
9
- return is_float(val) and float(val) >= -180 and float(val) <= 180
10
- except ValueError:
11
- return False
12
- except OverflowError:
13
- return False
@@ -1,16 +0,0 @@
1
- from ..latitude_wgs import _is as is_lat
2
- from ..longitude_wgs import _is as is_lon
3
-
4
- PROPORTION = 1
5
-
6
-
7
- def _is(val):
8
- """Renvoie True si val peut etre une longitude,latitude"""
9
-
10
- if not isinstance(val, str) or val.count(",") != 1:
11
- return False
12
- lon, lat = val.split(",")
13
- # handling [lon,lat]
14
- if lon.startswith("[") and lat.endswith("]"):
15
- lon, lat = lon[1:], lat[:-1]
16
- return is_lon(lon) and is_lat(lat.replace(" ", ""))
File without changes
@@ -1,10 +0,0 @@
1
- import re
2
-
3
- PROPORTION = 0.9
4
-
5
-
6
- def _is(val):
7
- """Detects e-mails"""
8
- return isinstance(val, str) and bool(
9
- re.match(r"^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$", val, re.IGNORECASE)
10
- )
@@ -1,11 +0,0 @@
1
- from ..float import _is as is_float
2
-
3
- currencies = set(["€", "$", "£", "¥"])
4
-
5
- PROPORTION = 0.8
6
-
7
-
8
- def _is(val: str):
9
- if not isinstance(val, str) or val[-1] not in currencies:
10
- return False
11
- return is_float(val[:-1])
@@ -1,8 +0,0 @@
1
- import re
2
-
3
- PROPORTION = 0.8
4
-
5
-
6
- def _is(val):
7
- """Detects Mongo ObjectIds"""
8
- return isinstance(val, str) and bool(re.match(r"^[0-9a-fA-F]{24}$", val))
@@ -1,9 +0,0 @@
1
- from ..float import _is as is_float
2
-
3
- PROPORTION = 0.8
4
-
5
-
6
- def _is(val: str):
7
- if not isinstance(val, str) or val[-1] != "%":
8
- return False
9
- return is_float(val[:-1])
@@ -1,8 +0,0 @@
1
- import re
2
-
3
- PROPORTION = 1
4
-
5
-
6
- def _is(val):
7
- """Detects twitter accounts"""
8
- return isinstance(val, str) and bool(re.match(r"^@[A-Za-z0-9_]+$", val))
@@ -1,14 +0,0 @@
1
- import re
2
-
3
- PROPORTION = 1
4
- url_pattern = re.compile(
5
- r"^((https?|ftp)://|www\.)(([A-Za-z0-9-]+\.)+[A-Za-z]{2,6})"
6
- r"(/[A-Za-z0-9._~:/?#[@!$&'()*+,;=%-]*)?$"
7
- )
8
-
9
-
10
- def _is(val):
11
- """Detects urls"""
12
- if not isinstance(val, str):
13
- return False
14
- return bool(url_pattern.match(val))
@@ -1,10 +0,0 @@
1
- import re
2
-
3
- PROPORTION = 0.8
4
-
5
-
6
- def _is(val):
7
- """Detects UUIDs"""
8
- return isinstance(val, str) and bool(
9
- re.match(r"^[{]?[0-9a-fA-F]{8}" + "-?([0-9a-fA-F]{4}-?)" + "{3}[0-9a-fA-F]{12}[}]?$", val)
10
- )
File without changes
@@ -1,10 +0,0 @@
1
- PROPORTION = 1
2
-
3
-
4
- def _is(val):
5
- """Returns True if val can be a year"""
6
- try:
7
- val = int(val)
8
- except ValueError:
9
- return False
10
- return (1800 <= val) and (val <= 2100)
File without changes
File without changes
@@ -1,15 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "adresse",
9
- "adresse postale",
10
- "adresse geographique",
11
- "adr",
12
- "adresse complete",
13
- "adresse station",
14
- ]
15
- return header_score(header, words_combinations_list)
@@ -1,17 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "code commune insee",
9
- "code insee",
10
- "codes insee",
11
- "code commune",
12
- "code insee commune",
13
- "insee",
14
- "code com",
15
- "com",
16
- ]
17
- return header_score(header, words_combinations_list)
@@ -1,15 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- # "dep": Possible confusion with dep name?
8
- words_combinations_list = [
9
- "code departement",
10
- "code_departement",
11
- "dep",
12
- "departement",
13
- "dept",
14
- ]
15
- return header_score(header, words_combinations_list)
@@ -1,12 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "cadastre1",
9
- "code fantoir",
10
- "fantoir",
11
- ]
12
- return header_score(header, words_combinations_list)
@@ -1,16 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "code postal",
9
- "postal code",
10
- "postcode",
11
- "post code",
12
- "cp",
13
- "codes postaux",
14
- "location postcode",
15
- ]
16
- return header_score(header, words_combinations_list)
@@ -1,14 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- # "reg" : possible confusion with region name?
8
- words_combinations_list = [
9
- "code region",
10
- "reg",
11
- "code insee region",
12
- "region",
13
- ]
14
- return header_score(header, words_combinations_list)
@@ -1,12 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "commune",
9
- "ville",
10
- "libelle commune",
11
- ]
12
- return header_score(header, words_combinations_list)
@@ -1,22 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "departement",
9
- "libelle du departement",
10
- "deplib",
11
- "nom dept",
12
- "dept",
13
- "libdepartement",
14
- "nom departement",
15
- "libelle dep",
16
- "libelle departement",
17
- "lb departements",
18
- "dep libusage",
19
- "lb departement",
20
- "nom dep",
21
- ]
22
- return header_score(header, words_combinations_list)
@@ -1,13 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "insee canton",
9
- "canton",
10
- "cant",
11
- "nom canton",
12
- ]
13
- return header_score(header, words_combinations_list)
@@ -1,30 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- # Does not always detect CRS
8
- words_combinations_list = [
9
- "latitude",
10
- "lat",
11
- "y",
12
- "yf",
13
- "yd",
14
- "y l93",
15
- "coordonnee y",
16
- "latitude lb93",
17
- "coord y",
18
- "ycoord",
19
- "geocodage y gps",
20
- "location latitude",
21
- "ylatitude",
22
- "ylat",
23
- "latitude (y)",
24
- "latitudeorg",
25
- "coordinates.latitude",
26
- "googlemap latitude",
27
- "latitudelieu",
28
- "latitude googlemap",
29
- ]
30
- return header_score(header, words_combinations_list)
@@ -1,30 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "latitude",
9
- "lat",
10
- "y",
11
- "yf",
12
- "yd",
13
- "coordonnee y",
14
- "coord y",
15
- "ycoord",
16
- "geocodage y gps",
17
- "location latitude",
18
- "ylatitude",
19
- "ylat",
20
- "latitude (y)",
21
- "latitudeorg",
22
- "coordinates.latitude",
23
- "googlemap latitude",
24
- "latitudelieu",
25
- "latitude googlemap",
26
- "latitude wgs84",
27
- "y wgs84",
28
- "latitude (wgs84)",
29
- ]
30
- return header_score(header, words_combinations_list)
@@ -1,21 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- # Does not detect CRS
8
- words_combinations_list = [
9
- "longitude",
10
- "lon",
11
- "long",
12
- "geocodage x gps",
13
- "location longitude",
14
- "xlongitude",
15
- "lng",
16
- "xlong",
17
- "x",
18
- "xf",
19
- "xd",
20
- ]
21
- return header_score(header, words_combinations_list)
@@ -1,21 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- # Does not detect CRS
8
- words_combinations_list = [
9
- "longitude",
10
- "lon",
11
- "long",
12
- "geocodage x gps",
13
- "location longitude",
14
- "xlongitude",
15
- "lng",
16
- "xlong",
17
- "x",
18
- "xf",
19
- "xd",
20
- ]
21
- return header_score(header, words_combinations_list)
@@ -1,20 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "pays",
9
- "payslieu",
10
- "paysorg",
11
- "country",
12
- "pays lib",
13
- "lieupays",
14
- "pays beneficiaire",
15
- "nom du pays",
16
- "journey start country",
17
- "libelle pays",
18
- "journey end country",
19
- ]
20
- return header_score(header, words_combinations_list)
@@ -1,20 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "region",
9
- "libelle region",
10
- "nom region",
11
- "libelle reg",
12
- "nom reg",
13
- "reg libusage",
14
- "nom de la region",
15
- "regionorg",
16
- "regionlieu",
17
- "reg",
18
- "nom officiel region",
19
- ]
20
- return header_score(header, words_combinations_list)
File without changes
@@ -1,8 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = ["code csp insee", "code csp"]
8
- return header_score(header, words_combinations_list)
@@ -1,13 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "code rna",
9
- "rna",
10
- "n° inscription association",
11
- "identifiant association",
12
- ]
13
- return header_score(header, words_combinations_list)
@@ -1,8 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = ["code waldec", "waldec"]
8
- return header_score(header, words_combinations_list)
@@ -1,13 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- # To improve? No specific header found in data
8
- words_combinations_list = [
9
- "csp insee",
10
- "csp",
11
- "categorie socioprofessionnelle",
12
- ]
13
- return header_score(header, words_combinations_list)
@@ -1,9 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- # To improve: no header specific to "fr" found in data
8
- words_combinations_list = ["date"]
9
- return header_score(header, words_combinations_list)
@@ -1,15 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "code ape",
9
- "code activite (ape)",
10
- "code naf",
11
- "code naf organisme designe",
12
- "code naf organisme designant",
13
- "base sirene : code ape de l'etablissement siege",
14
- ]
15
- return header_score(header, words_combinations_list)
@@ -1,8 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = ["sexe", "sex", "civilite", "genre", "id sexe"]
8
- return header_score(header, words_combinations_list)
@@ -1,17 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "siren",
9
- "siren organisme designe",
10
- "siren organisme designant",
11
- "n° siren",
12
- "siren organisme",
13
- "siren titulaire",
14
- "numero siren",
15
- "epci",
16
- ]
17
- return header_score(header, words_combinations_list)
@@ -1,16 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "siret",
9
- "siret d",
10
- "num siret",
11
- "siretacheteur",
12
- "n° siret",
13
- "coll siret",
14
- "epci",
15
- ]
16
- return header_score(header, words_combinations_list)
@@ -1,20 +0,0 @@
1
- from csv_detective.parsing.text import header_score
2
-
3
- PROPORTION = 0.5
4
-
5
-
6
- def _is(header: str) -> float:
7
- words_combinations_list = [
8
- "telephone",
9
- "tel",
10
- "tel1",
11
- "tel2",
12
- "phone",
13
- "num tel",
14
- "tel mob",
15
- "telephone sav",
16
- "telephone1",
17
- "coordinates.phone",
18
- "telephone du lieu",
19
- ]
20
- return header_score(header, words_combinations_list)