csv-detective 0.8.1.dev1549__py3-none-any.whl → 0.8.1.dev1578__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,6 +53,7 @@ from .geo import (
53
53
  latitude_wgs,
54
54
  longitude_wgs,
55
55
  latlon_wgs,
56
+ lonlat_wgs,
56
57
  json_geojson,
57
58
  )
58
59
 
@@ -5,9 +5,12 @@ PROPORTION = 1
5
5
 
6
6
 
7
7
  def _is(val):
8
- '''Renvoie True si val peut etre une latitude,longitude'''
8
+ """Renvoie True si val peut etre une latitude,longitude"""
9
9
 
10
10
  if not isinstance(val, str) or val.count(",") != 1:
11
11
  return False
12
12
  lat, lon = val.split(",")
13
+ # handling [lat,lon]
14
+ if lat.startswith("[") and lon.endswith("]"):
15
+ lat, lon = lat[1:], lon[:-1]
13
16
  return is_lat(lat) and is_lon(lon.replace(" ", ""))
@@ -0,0 +1,16 @@
1
+ from ..latitude_wgs import _is as is_lat
2
+ from ..longitude_wgs import _is as is_lon
3
+
4
+ PROPORTION = 1
5
+
6
+
7
+ def _is(val):
8
+ """Renvoie True si val peut etre une longitude,latitude"""
9
+
10
+ if not isinstance(val, str) or val.count(",") != 1:
11
+ return False
12
+ lon, lat = val.split(",")
13
+ # handling [lon,lat]
14
+ if lon.startswith("[") and lat.endswith("]"):
15
+ lon, lat = lon[1:], lat[:-1]
16
+ return is_lon(lon) and is_lat(lat.replace(" ", ""))
@@ -14,7 +14,7 @@ from .FR.geo import (
14
14
  longitude_l93,
15
15
  longitude_wgs_fr_metropole,
16
16
  pays,
17
- region
17
+ region,
18
18
  )
19
19
  from .FR.other import (
20
20
  code_csp_insee,
@@ -27,7 +27,7 @@ from .FR.other import (
27
27
  siren,
28
28
  siret,
29
29
  tel_fr,
30
- uai
30
+ uai,
31
31
  )
32
32
  from .FR.temp import jour_de_la_semaine, mois_de_annee
33
33
  from .geo import (
@@ -37,7 +37,8 @@ from .geo import (
37
37
  json_geojson,
38
38
  latitude_wgs,
39
39
  latlon_wgs,
40
- longitude_wgs
40
+ longitude_wgs,
41
+ lonlat_wgs,
41
42
  )
42
43
  from .other import booleen, email, float, int, money, mongo_object_id, twitter, url, uuid
43
44
  from .temp import date, datetime_rfc822, year
@@ -2,42 +2,45 @@ from csv_detective.parsing.text import header_score
2
2
 
3
3
  PROPORTION = 0.5
4
4
 
5
+ COMMON_COORDS_LABELS = [
6
+ "c geo",
7
+ "code geo",
8
+ "coord gps",
9
+ "coordonnees",
10
+ "coordonnees ban",
11
+ "coordonnees finales",
12
+ "coordonnees geo",
13
+ "coordonnees geographiques",
14
+ "coordonnees geoloc",
15
+ "coordonnees geoloc",
16
+ "coordonnees gps",
17
+ "coordonnees insee",
18
+ "coordonnees xy",
19
+ "geo",
20
+ "geo coordinates",
21
+ "geo cp",
22
+ "geo localisation",
23
+ "geo point",
24
+ "geo point 2d",
25
+ "geolocalisation",
26
+ "geom x y",
27
+ "geometry x y",
28
+ "geopoint",
29
+ "point geo",
30
+ "point geo insee",
31
+ "position",
32
+ "position geographique",
33
+ "wgs84",
34
+ "x y",
35
+ "xy",
36
+ ]
37
+
5
38
 
6
39
  def _is(header: str) -> float:
7
40
  words_combinations_list = [
8
41
  "latlon wgs",
9
42
  "latlon",
10
- "geo point",
11
- "geo point 2d",
12
- "wgs84",
13
- "geolocalisation",
14
- "geo",
15
- "coordonnees finales",
16
- "coordonnees",
17
- "coordonnees ban",
18
- "xy",
19
- "geometry x y",
20
- "coordonnees insee",
21
- "coordonnees geographiques",
22
- "position",
23
- "coordonnes gps",
24
- "geopoint",
25
- "geom x y",
26
- "coord gps",
27
43
  "latlong",
28
- "position geographique",
29
- "c geo",
30
- "coordonnes geoloc",
31
44
  "lat lon",
32
- "code geo",
33
- "geo localisation",
34
- "coordonnes geo",
35
- "geo cp",
36
- "x y",
37
- "geo coordinates",
38
- "point geo",
39
- "point geo insee",
40
- "coordonnees geoloc",
41
- "coordonnees xy",
42
- ]
45
+ ] + COMMON_COORDS_LABELS
43
46
  return header_score(header, words_combinations_list)
@@ -0,0 +1,14 @@
1
+ from csv_detective.parsing.text import header_score
2
+ from ..latlon_wgs import COMMON_COORDS_LABELS
3
+
4
+ PROPORTION = 0.5
5
+
6
+
7
+ def _is(header: str) -> float:
8
+ words_combinations_list = [
9
+ "lonlat wgs",
10
+ "lonlat",
11
+ "longlat",
12
+ "lon lat",
13
+ ] + COMMON_COORDS_LABELS
14
+ return header_score(header, words_combinations_list)
@@ -51,6 +51,7 @@ def get_description(format: str) -> str:
51
51
  "latitude_wgs": "La latitude au format WGS",
52
52
  "longitude_wgs": "La longitude au format WGS",
53
53
  "latlon_wgs": "Les coordonnées XY (latitude et longitude)",
54
+ "lonlat_wgs": "Les coordonnées XY (longitude et latitude)",
54
55
  "booleen": "Booléen",
55
56
  "email": "L'adresse couriel (email)",
56
57
  "float": "Nombre flottant (à virgule)",
@@ -116,6 +117,7 @@ def get_validata_type(format: str) -> str:
116
117
  "latitude_wgs": "number",
117
118
  "latitude_wgs_fr_metropole": "number",
118
119
  "latlon_wgs": "geo_point",
120
+ "lonlat_wgs": "geo_point",
119
121
  "longitude": "number",
120
122
  "longitude_l93": "number",
121
123
  "longitude_wgs": "number",
@@ -162,6 +164,7 @@ def get_example(format: str) -> str:
162
164
  "latitude_wgs": 42.42,
163
165
  "latitude_wgs_fr_metropole": 41.3,
164
166
  "latlon_wgs": "42.42, 0.0",
167
+ "lonlat_wgs": "0.0, 42.42",
165
168
  "longitude": 0.0,
166
169
  "longitude_l93": -357823,
167
170
  "longitude_wgs": 0.0,
@@ -36,6 +36,9 @@ def prepare_output_dict(return_table: pd.DataFrame, limited_output: bool):
36
36
  formats_to_remove.add("code_departement")
37
37
  if "datetime_rfc822" in formats_detected:
38
38
  formats_to_remove.add("datetime_aware")
39
+ # if there is no way to discriminate the case, default to latlon
40
+ if "latlon_wgs" in formats_detected:
41
+ formats_to_remove.add("lonlat_wgs")
39
42
 
40
43
  formats_to_keep = formats_detected - formats_to_remove
41
44
 
@@ -10,6 +10,7 @@
10
10
  - Split aware and naive datetimes for hydra to cast them separately [#130](https://github.com/datagouv/csv-detective/pull/130)
11
11
  - Validate using the testing function, to consider PROPORTIONS [#131](https://github.com/datagouv/csv-detective/pull/131)
12
12
  - Remove `datetime_iso` format due to ambiguous cast in db (can be naive or aware) [#132](https://github.com/datagouv/csv-detective/pull/132)
13
+ - Add `lonlat_wgs` format and handle optional brackets for `latlon_wgs` [#133](https://github.com/datagouv/csv-detective/pull/133)
13
14
 
14
15
  ## 0.8.0 (2025-05-20)
15
16
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: csv_detective
3
- Version: 0.8.1.dev1549
3
+ Version: 0.8.1.dev1578
4
4
  Summary: Detect tabular files column content
5
5
  Home-page: https://github.com/datagouv/csv_detective
6
6
  Author: Etalab
@@ -5,7 +5,7 @@ csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2
5
5
  csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
6
6
  csv_detective/utils.py,sha256=-tIs9yV7RJPGj65lQ7LjRGch6Iws9UeuIPQsd2uUUJM,1025
7
7
  csv_detective/validate.py,sha256=5Li_vfvU9wdfoZjNjef-MBUoKcKoJ-c7381QoX9aDXY,2818
8
- csv_detective/detect_fields/__init__.py,sha256=jThGn0_HO8U0mMoSbf38x8l46ABRQcmHcNLvjZqQQdc,984
8
+ csv_detective/detect_fields/__init__.py,sha256=0A5SZTp_IhhJ9z7lWeH4K5_0uwMK_VdMudjPm7oggVg,1000
9
9
  csv_detective/detect_fields/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  csv_detective/detect_fields/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  csv_detective/detect_fields/FR/geo/adresse/__init__.py,sha256=NqV8ULf9gY9iFnA1deKR-1Yobr96WwCsn5JfbP_MjiY,1675
@@ -51,8 +51,9 @@ csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=wJAy
51
51
  csv_detective/detect_fields/geo/iso_country_code_numeric/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
52
52
  csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=FPHOfTrfXJs62-NgeOcNGOvwPd7I1fEVp8lTdMNfj3w,433
53
53
  csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=ArS6PuYEd0atZwSqNDZhXZz1TwzdiwdV8ovRYTOacpg,327
54
- csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=7_mnO9uC_kI7e2WR8xIer7Kqw8zi-v-JKaAD4zcoGbE,342
54
+ csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=IXDTqD4YFUJYI1FYZ5ZfkqXY6KvNY7sgBVFRAvgTHtI,454
55
55
  csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=G7afWOKiGh_Tv7gwDNGt1a4B_A8hkCBkIxn3THDCUFk,330
56
+ csv_detective/detect_fields/geo/lonlat_wgs/__init__.py,sha256=CnBMYevfGdhBvureF3oc_zqT-RZjG419iAuUlugQFLc,454
56
57
  csv_detective/detect_fields/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
58
  csv_detective/detect_fields/other/booleen/__init__.py,sha256=wn_yyTAmGxqo0l0b7JRpGb0da_E27iGxES9zWCrnsqc,497
58
59
  csv_detective/detect_fields/other/email/__init__.py,sha256=O9tgJmq0O8Q-8iin63NqEEDhlsUJjxFZNaNFM4GZaws,178
@@ -71,7 +72,7 @@ csv_detective/detect_fields/temp/datetime_aware/__init__.py,sha256=bEfWvXx_GNCRU
71
72
  csv_detective/detect_fields/temp/datetime_naive/__init__.py,sha256=GtQo55SrrXfoT-L7ZXW63jrlAYvNT5m56wMfhuY3pyI,836
72
73
  csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=JtUzg3BXYd-XJMLGxQ0P1OAJGOQ7DlYMD4fCU9yndg0,511
73
74
  csv_detective/detect_fields/temp/year/__init__.py,sha256=RjsiIHoplnI4Odi5587TzRhKTDT-FTqGOBpdartuShA,194
74
- csv_detective/detect_labels/__init__.py,sha256=oVq2fiO6QkaWB0wZImL8YVW7oiwPky8ivmLZAFmK55Q,864
75
+ csv_detective/detect_labels/__init__.py,sha256=8vrFUrMc8a_VOC5gvYNMKL-Do_q9eMTrghJRI9Xotvk,883
75
76
  csv_detective/detect_labels/FR/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
77
  csv_detective/detect_labels/FR/geo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
78
  csv_detective/detect_labels/FR/geo/adresse/__init__.py,sha256=fNWFW-Wo3n6azDBfmi0J0qnzP-p2StLxCc9eNiE9NNE,346
@@ -110,8 +111,9 @@ csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py,sha256=biUZP
110
111
  csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py,sha256=biUZP8gAsVpjXLTx1WeS19qR4ia0pzpi6R69wJgu4B0,348
111
112
  csv_detective/detect_labels/geo/json_geojson/__init__.py,sha256=On8VOCDD0EspZra6fTQCXH4MYao2xmRu-o7xWcab7Jg,355
112
113
  csv_detective/detect_labels/geo/latitude_wgs/__init__.py,sha256=ME_KjniqDSdAwXP7XnKXyr5IA75KrGSLIhvPNfsux6E,664
113
- csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=dbWX1LKpoev7zwWthw9vlwGQp6CSlgYrTBnPpvyNC-A,989
114
+ csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=tDndlFyEM7qKS3ATxp0Xs0FsPsOPpRWhDe1ockbWw8s,923
114
115
  csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=_8IV2FLtrOjzhQNsk-fsgc9-jbAgzKDVMr4tXu2P-s4,429
116
+ csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=NNKlFcMsKVqnUKEm_4flGxcNUGS2-iS3m6ihQf2AVTk,345
115
117
  csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
116
118
  csv_detective/detect_labels/other/booleen/__init__.py,sha256=BZwnfR-Zcv8dqscLrBKhttgwm4Dqq16M0PaGirxYWio,214
117
119
  csv_detective/detect_labels/other/email/__init__.py,sha256=Poagn45-eC2a_Wdk5Qs6d2BgYdncCQKZp2yEB50IuNw,431
@@ -139,8 +141,8 @@ csv_detective/output/__init__.py,sha256=5KTevPfp_4MRxByJyOntQjToNfeG7dPQn-_13wSq
139
141
  csv_detective/output/dataframe.py,sha256=89iQRE59cHQyQQEsujQVIKP2YAUYpPklWkdDOqZE-wE,2183
140
142
  csv_detective/output/example.py,sha256=EdPX1iqHhIG4DsiHuYdy-J7JxOkjgUh_o2D5nrfM5fA,8649
141
143
  csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
142
- csv_detective/output/schema.py,sha256=Hpav3RgIP7gOb93h154s1wNSlEZtHNJVzFDDwp54UcQ,13669
143
- csv_detective/output/utils.py,sha256=RcOkFQihwfmEIOD-gwrUKi2r5CwBbs17vkuAf8n7-Wo,2405
144
+ csv_detective/output/schema.py,sha256=yC9K1vw6NUTULNv9a7CaMGns9iXmbzFLbtHI4wegqEc,13812
145
+ csv_detective/output/utils.py,sha256=sAFc-oviDn3iR3kr8MdXQa14nCMWgAITsRYBW-f3WM0,2574
144
146
  csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
145
147
  csv_detective/parsing/columns.py,sha256=rLzAU36cHMpVynEPhj8uMdr3IRO3_Yq58Yw7Z6oLPiQ,5693
146
148
  csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
@@ -148,19 +150,19 @@ csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,
148
150
  csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
149
151
  csv_detective/parsing/load.py,sha256=u6fbGFZsL2GwPQRzhAXgt32JpUur7vbQdErREHxNJ-w,3661
150
152
  csv_detective/parsing/text.py,sha256=_TprGi0gHZlRsafizI3dqQhBehZW4BazqxmypMcAZ-o,1824
151
- csv_detective-0.8.1.dev1549.data/data/share/csv_detective/CHANGELOG.md,sha256=1jO_wJx_-DK1TqmdmIu2bmbnvg2iJ2iX78MEb29MZYY,9425
152
- csv_detective-0.8.1.dev1549.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
153
- csv_detective-0.8.1.dev1549.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
154
- csv_detective-0.8.1.dev1549.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
153
+ csv_detective-0.8.1.dev1578.data/data/share/csv_detective/CHANGELOG.md,sha256=arQzNTnW3gZp5MyTCoDkb4fMBM7BaMhGnxVF_AhYPp4,9556
154
+ csv_detective-0.8.1.dev1578.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
155
+ csv_detective-0.8.1.dev1578.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
156
+ csv_detective-0.8.1.dev1578.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
155
157
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
156
158
  tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
157
- tests/test_fields.py,sha256=tTFOmlb9gewtCwcZV7B6Gc3aH6xXK5kMUFSEBi7iIy4,10638
159
+ tests/test_fields.py,sha256=DzDtYXZDj2YJXNuKROHmoqA__PkE39sqETmnS3a01qQ,10887
158
160
  tests/test_file.py,sha256=0bHV9wx9mSRoav_DVF19g694yohb1p0bw7rtcBeKG-8,8451
159
161
  tests/test_labels.py,sha256=Nkr645bUewrj8hjNDKr67FQ6Sy_TID6f3E5Kfkl231M,464
160
162
  tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
161
163
  tests/test_validation.py,sha256=CTGonR6htxcWF9WH8MxumDD8cF45Y-G4hm94SM4lFjU,3246
162
- csv_detective-0.8.1.dev1549.dist-info/METADATA,sha256=LQkj1jrN7dsdUjGOV3Z8BRKANccZTxdXdoRWtAvOa6w,10443
163
- csv_detective-0.8.1.dev1549.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
164
- csv_detective-0.8.1.dev1549.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
165
- csv_detective-0.8.1.dev1549.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
166
- csv_detective-0.8.1.dev1549.dist-info/RECORD,,
164
+ csv_detective-0.8.1.dev1578.dist-info/METADATA,sha256=EcvSGV_PUHKAxG2HlebEXE05CuYh9ZND4erSbWCoIqo,10443
165
+ csv_detective-0.8.1.dev1578.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
166
+ csv_detective-0.8.1.dev1578.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
167
+ csv_detective-0.8.1.dev1578.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
168
+ csv_detective-0.8.1.dev1578.dist-info/RECORD,,
tests/test_fields.py CHANGED
@@ -44,6 +44,7 @@ from csv_detective.detect_fields.geo import (
44
44
  latitude_wgs,
45
45
  latlon_wgs,
46
46
  longitude_wgs,
47
+ lonlat_wgs,
47
48
  )
48
49
  from csv_detective.detect_fields.other import (
49
50
  booleen,
@@ -263,13 +264,17 @@ fields = {
263
264
  False: ["100"],
264
265
  },
265
266
  latlon_wgs: {
266
- True: ["43.2,-22.6", "-10.7,140", "-40.7, 10.8"],
267
- False: ["0.1,192", "-102, 92"],
267
+ True: ["43.2,-22.6", "-10.7,140", "-40.7, 10.8", "[12,-0.28]"],
268
+ False: ["0.1,192", "-102, 92", "[23.02,4.1", "23.02,4.1]", "160.1,-27"],
268
269
  },
269
270
  longitude_wgs: {
270
271
  True: ["120", "-20.2"],
271
272
  False: ["-200"],
272
273
  },
274
+ lonlat_wgs: {
275
+ True: ["-22.6,43.2", "140,-10.7", "10.8, -40.7", "[-0.28,12]"],
276
+ False: ["192,0.1", "92, -102", "[4.1,23.02", "4.1,23.02]", "-27,160.1"],
277
+ },
273
278
  booleen: {
274
279
  True: ["oui", "0", "1", "yes", "false", "True"],
275
280
  False: ["nein", "ja", "2", "-0"],