csv-detective 0.8.1.dev1578__py3-none-any.whl → 0.8.1.dev1599__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detect_fields/geo/json_geojson/__init__.py +6 -10
- csv_detective/detect_fields/other/json/__init__.py +3 -13
- csv_detective/detect_labels/other/booleen/__init__.py +1 -1
- csv_detective/detection/formats.py +0 -2
- csv_detective/output/utils.py +56 -38
- csv_detective/parsing/columns.py +0 -1
- {csv_detective-0.8.1.dev1578.data → csv_detective-0.8.1.dev1599.data}/data/share/csv_detective/CHANGELOG.md +1 -0
- {csv_detective-0.8.1.dev1578.dist-info → csv_detective-0.8.1.dev1599.dist-info}/METADATA +1 -1
- {csv_detective-0.8.1.dev1578.dist-info → csv_detective-0.8.1.dev1599.dist-info}/RECORD +16 -16
- tests/test_fields.py +26 -1
- {csv_detective-0.8.1.dev1578.data → csv_detective-0.8.1.dev1599.data}/data/share/csv_detective/LICENSE +0 -0
- {csv_detective-0.8.1.dev1578.data → csv_detective-0.8.1.dev1599.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.8.1.dev1578.dist-info → csv_detective-0.8.1.dev1599.dist-info}/WHEEL +0 -0
- {csv_detective-0.8.1.dev1578.dist-info → csv_detective-0.8.1.dev1599.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.8.1.dev1578.dist-info → csv_detective-0.8.1.dev1599.dist-info}/licenses/LICENSE +0 -0
- {csv_detective-0.8.1.dev1578.dist-info → csv_detective-0.8.1.dev1599.dist-info}/top_level.txt +0 -0
|
@@ -1,22 +1,18 @@
|
|
|
1
1
|
import json
|
|
2
|
-
from json import JSONDecodeError
|
|
3
2
|
|
|
4
3
|
PROPORTION = 0.9
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
def _is(val):
|
|
8
|
-
|
|
7
|
+
"""Renvoie True si val peut etre un geojson"""
|
|
9
8
|
|
|
10
9
|
try:
|
|
11
10
|
j = json.loads(val)
|
|
12
|
-
if
|
|
13
|
-
|
|
14
|
-
if 'geometry' in j:
|
|
15
|
-
if 'coordinates' in j['geometry']:
|
|
11
|
+
if isinstance(j, dict):
|
|
12
|
+
if "type" in j and "coordinates" in j:
|
|
16
13
|
return True
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
except
|
|
14
|
+
if "geometry" in j and "coordinates" in j["geometry"]:
|
|
15
|
+
return True
|
|
16
|
+
except Exception:
|
|
20
17
|
pass
|
|
21
|
-
|
|
22
18
|
return False
|
|
@@ -5,20 +5,10 @@ PROPORTION = 1
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
def _is(val):
|
|
8
|
-
|
|
8
|
+
"""Detects json"""
|
|
9
9
|
try:
|
|
10
10
|
loaded = json.loads(val)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
any(
|
|
14
|
-
[
|
|
15
|
-
geo in loaded for geo in ['coordinates', 'geometry']
|
|
16
|
-
]
|
|
17
|
-
)
|
|
18
|
-
)
|
|
19
|
-
):
|
|
20
|
-
return True
|
|
21
|
-
else:
|
|
22
|
-
return False
|
|
11
|
+
# we don't want to consider integers for instance
|
|
12
|
+
return isinstance(loaded, (list, dict))
|
|
23
13
|
except (JSONDecodeError, TypeError):
|
|
24
14
|
return False
|
|
@@ -110,11 +110,9 @@ def detect_formats(
|
|
|
110
110
|
"datetime_naive": "datetime",
|
|
111
111
|
"datetime_rfc822": "datetime",
|
|
112
112
|
"date": "date",
|
|
113
|
-
"latitude": "float",
|
|
114
113
|
"latitude_l93": "float",
|
|
115
114
|
"latitude_wgs": "float",
|
|
116
115
|
"latitude_wgs_fr_metropole": "float",
|
|
117
|
-
"longitude": "float",
|
|
118
116
|
"longitude_l93": "float",
|
|
119
117
|
"longitude_wgs": "float",
|
|
120
118
|
"longitude_wgs_fr_metropole": "float",
|
csv_detective/output/utils.py
CHANGED
|
@@ -2,55 +2,73 @@ import pandas as pd
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
def prepare_output_dict(return_table: pd.DataFrame, limited_output: bool):
|
|
5
|
+
# -> dict[str, dict | list[dict]] (to be added when upgrading to python>=3.10)
|
|
5
6
|
return_dict_cols = return_table.to_dict("dict")
|
|
6
|
-
|
|
7
|
+
output_dict = {}
|
|
7
8
|
for column_name in return_dict_cols:
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
9
|
+
# keep only formats with a non-zero score
|
|
10
|
+
output_dict[column_name] = [
|
|
11
|
+
{
|
|
12
|
+
"format": detected_value_type,
|
|
13
|
+
"score": return_dict_cols[column_name][detected_value_type],
|
|
14
|
+
}
|
|
15
|
+
for detected_value_type in return_dict_cols[column_name]
|
|
16
|
+
if return_dict_cols[column_name][detected_value_type] > 0
|
|
17
|
+
]
|
|
18
|
+
priorities = [
|
|
19
|
+
# no need to specify int and float everywhere, they are deprioritized anyway
|
|
20
|
+
("int", ("float",)),
|
|
21
|
+
# bool over everything
|
|
22
|
+
("booleen", (
|
|
23
|
+
"latitude_l93",
|
|
24
|
+
"latitude_wgs",
|
|
25
|
+
"latitude_wgs_fr_metropole",
|
|
26
|
+
"longitude_l93",
|
|
27
|
+
"longitude_wgs",
|
|
28
|
+
"longitude_wgs_fr_metropole",
|
|
29
|
+
)),
|
|
30
|
+
("geojson", ("json",)),
|
|
31
|
+
# latlon over lonlat if no longitude allows to discriminate
|
|
32
|
+
("latlon_wgs", ("json", "lonlat_wgs")),
|
|
33
|
+
("lonlat_wgs", ("json",)),
|
|
34
|
+
("latitude_wgs_fr_metropole", ("latitude_l93", "latitude_wgs")),
|
|
35
|
+
("longitude_wgs_fr_metropole", ("longitude_l93", "longitude_wgs")),
|
|
36
|
+
("latitude_wgs", ("latitude_l93",)),
|
|
37
|
+
("longitude_wgs", ("longitude_l93",)),
|
|
38
|
+
("code_region", ("code_departement",)),
|
|
39
|
+
("datetime_rfc822", ("datetime_aware",)),
|
|
40
|
+
]
|
|
41
|
+
detected_formats = set(x["format"] for x in output_dict[column_name])
|
|
21
42
|
formats_to_remove = set()
|
|
22
43
|
# Deprioritise float and int detection vs others
|
|
23
|
-
if len(
|
|
44
|
+
if len(detected_formats - {"float", "int"}) > 0:
|
|
24
45
|
formats_to_remove = formats_to_remove.union({"float", "int"})
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
if "latlon_wgs" in formats_detected:
|
|
41
|
-
formats_to_remove.add("lonlat_wgs")
|
|
46
|
+
# Deprioritize less specific formats if:
|
|
47
|
+
# secondary score is even or worse
|
|
48
|
+
# or priority score is at least 1 (max of the field score)
|
|
49
|
+
for prio_format, secondary_formats in priorities:
|
|
50
|
+
if prio_format in detected_formats:
|
|
51
|
+
for secondary in secondary_formats:
|
|
52
|
+
if (
|
|
53
|
+
secondary in detected_formats
|
|
54
|
+
and (
|
|
55
|
+
return_dict_cols[column_name][prio_format]
|
|
56
|
+
>= return_dict_cols[column_name][secondary]
|
|
57
|
+
or return_dict_cols[column_name][prio_format] >= 1
|
|
58
|
+
)
|
|
59
|
+
):
|
|
60
|
+
formats_to_remove.add(secondary)
|
|
42
61
|
|
|
43
|
-
formats_to_keep =
|
|
62
|
+
formats_to_keep = detected_formats - formats_to_remove
|
|
44
63
|
|
|
45
|
-
detections =
|
|
46
|
-
detections = [x for x in detections if x["format"] in formats_to_keep]
|
|
64
|
+
detections = [x for x in output_dict[column_name] if x["format"] in formats_to_keep]
|
|
47
65
|
if not limited_output:
|
|
48
|
-
|
|
66
|
+
output_dict[column_name] = detections
|
|
49
67
|
else:
|
|
50
|
-
|
|
68
|
+
output_dict[column_name] = (
|
|
51
69
|
max(detections, key=lambda x: x["score"])
|
|
52
70
|
if len(detections) > 0
|
|
53
71
|
else {"format": "string", "score": 1.0}
|
|
54
72
|
)
|
|
55
73
|
|
|
56
|
-
return
|
|
74
|
+
return output_dict
|
csv_detective/parsing/columns.py
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
- Validate using the testing function, to consider PROPORTIONS [#131](https://github.com/datagouv/csv-detective/pull/131)
|
|
12
12
|
- Remove `datetime_iso` format due to ambiguous cast in db (can be naive or aware) [#132](https://github.com/datagouv/csv-detective/pull/132)
|
|
13
13
|
- Add `lonlat_wgs` format and handle optional brackets for `latlon_wgs` [#133](https://github.com/datagouv/csv-detective/pull/133)
|
|
14
|
+
- Refactor format prioritizing [#134](https://github.com/datagouv/csv-detective/pull/134)
|
|
14
15
|
|
|
15
16
|
## 0.8.0 (2025-05-20)
|
|
16
17
|
|
|
@@ -49,7 +49,7 @@ csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py,sha256=u98rn
|
|
|
49
49
|
csv_detective/detect_fields/geo/iso_country_code_alpha3/iso_country_code_alpha3.txt,sha256=aYqKSohgXuBtcIBfF52f8JWYDdxL_HV_Ol1srGnWBp4,1003
|
|
50
50
|
csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=wJAynAkGZN7jKeI3xOeLXQ_irxQBb_J56pRkLDYVClY,436
|
|
51
51
|
csv_detective/detect_fields/geo/iso_country_code_numeric/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
|
|
52
|
-
csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=
|
|
52
|
+
csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=6wlwlxQmsVIZ21g-THvH3nBj-I8FuoF2sBlZAoEMGiQ,393
|
|
53
53
|
csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=ArS6PuYEd0atZwSqNDZhXZz1TwzdiwdV8ovRYTOacpg,327
|
|
54
54
|
csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=IXDTqD4YFUJYI1FYZ5ZfkqXY6KvNY7sgBVFRAvgTHtI,454
|
|
55
55
|
csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=G7afWOKiGh_Tv7gwDNGt1a4B_A8hkCBkIxn3THDCUFk,330
|
|
@@ -59,7 +59,7 @@ csv_detective/detect_fields/other/booleen/__init__.py,sha256=wn_yyTAmGxqo0l0b7JR
|
|
|
59
59
|
csv_detective/detect_fields/other/email/__init__.py,sha256=O9tgJmq0O8Q-8iin63NqEEDhlsUJjxFZNaNFM4GZaws,178
|
|
60
60
|
csv_detective/detect_fields/other/float/__init__.py,sha256=AT4Kpgwoz5PuAoLx00u0SL8DjjXZxsE8zSRbN18uAv4,578
|
|
61
61
|
csv_detective/detect_fields/other/int/__init__.py,sha256=QN3kQJLYqLRBiubUK7g4Xq03PlA5wqVwx2pPPIO9FdI,320
|
|
62
|
-
csv_detective/detect_fields/other/json/__init__.py,sha256=
|
|
62
|
+
csv_detective/detect_fields/other/json/__init__.py,sha256=AkRWZAidEM1dWkVRFThEBI5M7kMUu5Yu12iCViGM8lU,310
|
|
63
63
|
csv_detective/detect_fields/other/money/__init__.py,sha256=g_ZwBZXl9LhldwFYQotC5WqLiE8qQCZHtoI9eJvl_9M,232
|
|
64
64
|
csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=7fcrHsOZAqXp2_N0IjPskYJ_qi4xRlo9iyNNDQVLzsU,156
|
|
65
65
|
csv_detective/detect_fields/other/percent/__init__.py,sha256=vgpekNOPBRuunoVBXMi81rwHv4uSOhe78pbVtQ5SBO8,177
|
|
@@ -115,7 +115,7 @@ csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=tDndlFyEM7qKS3ATxp
|
|
|
115
115
|
csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=_8IV2FLtrOjzhQNsk-fsgc9-jbAgzKDVMr4tXu2P-s4,429
|
|
116
116
|
csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=NNKlFcMsKVqnUKEm_4flGxcNUGS2-iS3m6ihQf2AVTk,345
|
|
117
117
|
csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
|
-
csv_detective/detect_labels/other/booleen/__init__.py,sha256=
|
|
118
|
+
csv_detective/detect_labels/other/booleen/__init__.py,sha256=zEkarex7L4T3vmYjR5hdhtnhugTVDsvkgG_it6nN0aA,214
|
|
119
119
|
csv_detective/detect_labels/other/email/__init__.py,sha256=Poagn45-eC2a_Wdk5Qs6d2BgYdncCQKZp2yEB50IuNw,431
|
|
120
120
|
csv_detective/detect_labels/other/float/__init__.py,sha256=X0axZN2GAfC_y01zRfIyvOfRsOy2KNQcQ-mlQAKxqT4,216
|
|
121
121
|
csv_detective/detect_labels/other/int/__init__.py,sha256=_1AY7thEBCcgSBQQ2YbY4YaPaxGRQ71BtmaFaX088ig,215
|
|
@@ -132,7 +132,7 @@ csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
|
|
|
132
132
|
csv_detective/detection/columns.py,sha256=vfE-DKESA6J9Rfsl-a8tjgZfE21VmzArO5TrbzL0KmE,2905
|
|
133
133
|
csv_detective/detection/encoding.py,sha256=tpjJEMNM_2TcLXDzn1lNQPnSRnsWYjs83tQ8jNwTj4E,973
|
|
134
134
|
csv_detective/detection/engine.py,sha256=HiIrU-l9EO5Fbc2Vh8W_Uy5-dpKcQQzlxCqMuWc09LY,1530
|
|
135
|
-
csv_detective/detection/formats.py,sha256=
|
|
135
|
+
csv_detective/detection/formats.py,sha256=c0LFTWbibWbEJSZaPy_86LIMOY3qRxj-I_agwpb4zbI,6284
|
|
136
136
|
csv_detective/detection/headers.py,sha256=wrVII2RQpsVmHhrO1DHf3dmiu8kbtOjBlskf41cnQmc,1172
|
|
137
137
|
csv_detective/detection/rows.py,sha256=3qvsbsBcMxiqqfSYYkOgsRpX777rk22tnRHDwUA97kU,742
|
|
138
138
|
csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
|
|
@@ -142,27 +142,27 @@ csv_detective/output/dataframe.py,sha256=89iQRE59cHQyQQEsujQVIKP2YAUYpPklWkdDOqZ
|
|
|
142
142
|
csv_detective/output/example.py,sha256=EdPX1iqHhIG4DsiHuYdy-J7JxOkjgUh_o2D5nrfM5fA,8649
|
|
143
143
|
csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
|
|
144
144
|
csv_detective/output/schema.py,sha256=yC9K1vw6NUTULNv9a7CaMGns9iXmbzFLbtHI4wegqEc,13812
|
|
145
|
-
csv_detective/output/utils.py,sha256=
|
|
145
|
+
csv_detective/output/utils.py,sha256=xPM2KYdqousmjU22-w7HnaF6AR74fj8lhQY77Y9xs7w,3310
|
|
146
146
|
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
|
-
csv_detective/parsing/columns.py,sha256=
|
|
147
|
+
csv_detective/parsing/columns.py,sha256=aMdG6-G-2Tj_2JdHotAIveQwaG_r8chGcGieFiUaBRk,5634
|
|
148
148
|
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
149
149
|
csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,1626
|
|
150
150
|
csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
|
|
151
151
|
csv_detective/parsing/load.py,sha256=u6fbGFZsL2GwPQRzhAXgt32JpUur7vbQdErREHxNJ-w,3661
|
|
152
152
|
csv_detective/parsing/text.py,sha256=_TprGi0gHZlRsafizI3dqQhBehZW4BazqxmypMcAZ-o,1824
|
|
153
|
-
csv_detective-0.8.1.
|
|
154
|
-
csv_detective-0.8.1.
|
|
155
|
-
csv_detective-0.8.1.
|
|
156
|
-
csv_detective-0.8.1.
|
|
153
|
+
csv_detective-0.8.1.dev1599.data/data/share/csv_detective/CHANGELOG.md,sha256=WQ8cTB2D5YkAJ9AsS2ziKtZL8m1sPclGPenTD1BxZ_g,9646
|
|
154
|
+
csv_detective-0.8.1.dev1599.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
155
|
+
csv_detective-0.8.1.dev1599.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
|
|
156
|
+
csv_detective-0.8.1.dev1599.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
157
157
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
158
158
|
tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
|
|
159
|
-
tests/test_fields.py,sha256=
|
|
159
|
+
tests/test_fields.py,sha256=IwMpjOn8W5kDCvJYp3Cer4m571qomzjupOAvSRFMg_Q,11819
|
|
160
160
|
tests/test_file.py,sha256=0bHV9wx9mSRoav_DVF19g694yohb1p0bw7rtcBeKG-8,8451
|
|
161
161
|
tests/test_labels.py,sha256=Nkr645bUewrj8hjNDKr67FQ6Sy_TID6f3E5Kfkl231M,464
|
|
162
162
|
tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
|
|
163
163
|
tests/test_validation.py,sha256=CTGonR6htxcWF9WH8MxumDD8cF45Y-G4hm94SM4lFjU,3246
|
|
164
|
-
csv_detective-0.8.1.
|
|
165
|
-
csv_detective-0.8.1.
|
|
166
|
-
csv_detective-0.8.1.
|
|
167
|
-
csv_detective-0.8.1.
|
|
168
|
-
csv_detective-0.8.1.
|
|
164
|
+
csv_detective-0.8.1.dev1599.dist-info/METADATA,sha256=NoE1tBjCZxO2uffbH9wSgkuNzOVOgLRA2qkjth7ynyk,10443
|
|
165
|
+
csv_detective-0.8.1.dev1599.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
166
|
+
csv_detective-0.8.1.dev1599.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
167
|
+
csv_detective-0.8.1.dev1599.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
|
|
168
|
+
csv_detective-0.8.1.dev1599.dist-info/RECORD,,
|
tests/test_fields.py
CHANGED
|
@@ -72,6 +72,7 @@ from csv_detective.detection.variables import (
|
|
|
72
72
|
)
|
|
73
73
|
from csv_detective.load_tests import return_all_tests
|
|
74
74
|
from csv_detective.output.dataframe import cast
|
|
75
|
+
from csv_detective.output.utils import prepare_output_dict
|
|
75
76
|
|
|
76
77
|
|
|
77
78
|
def test_all_tests_return_bool():
|
|
@@ -285,7 +286,7 @@ fields = {
|
|
|
285
286
|
},
|
|
286
287
|
json: {
|
|
287
288
|
True: ['{"pomme": "fruit", "reponse": 42}', "[1,2,3,4]"],
|
|
288
|
-
False: [
|
|
289
|
+
False: ["5", '{"zefib":', '{"a"}'],
|
|
289
290
|
},
|
|
290
291
|
money: {
|
|
291
292
|
True: ["120€", "-20.2$"],
|
|
@@ -410,3 +411,27 @@ def test_fields_with_values(args):
|
|
|
410
411
|
def test_cast(args):
|
|
411
412
|
value, detected_type, cast_type = args
|
|
412
413
|
assert isinstance(cast(value, detected_type), cast_type)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
@pytest.mark.parametrize(
|
|
417
|
+
"args",
|
|
418
|
+
(
|
|
419
|
+
# there is a specific numerical format => specific wins
|
|
420
|
+
({"int": 1, "float": 1, "latitude_wgs": 1}, "latitude_wgs"),
|
|
421
|
+
# scores are equal for related formats => priority wins
|
|
422
|
+
({"int": 1, "float": 1}, "int"),
|
|
423
|
+
# score is lower for priority format => secondary wins
|
|
424
|
+
({"int": 0.5, "float": 1}, "float"),
|
|
425
|
+
# score is lower for priority format, but is 1 => priority wins
|
|
426
|
+
({"int": 1, "float": 1.25}, "int"),
|
|
427
|
+
# two rounds of priority => highest priority wins
|
|
428
|
+
({"latlon_wgs": 1, "lonlat_wgs": 1, "json": 1}, "latlon_wgs"),
|
|
429
|
+
# no detection => default to string
|
|
430
|
+
({}, "string"),
|
|
431
|
+
),
|
|
432
|
+
)
|
|
433
|
+
def test_priority(args):
|
|
434
|
+
detections, expected = args
|
|
435
|
+
col = "col1"
|
|
436
|
+
output = prepare_output_dict(pd.DataFrame({col: detections}), limited_output=True)
|
|
437
|
+
assert output[col]["format"] == expected
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{csv_detective-0.8.1.dev1578.dist-info → csv_detective-0.8.1.dev1599.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{csv_detective-0.8.1.dev1578.dist-info → csv_detective-0.8.1.dev1599.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{csv_detective-0.8.1.dev1578.dist-info → csv_detective-0.8.1.dev1599.dist-info}/top_level.txt
RENAMED
|
File without changes
|