csv-detective 0.8.1.dev1578__py3-none-any.whl → 0.8.1.dev1599__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,18 @@
1
1
  import json
2
- from json import JSONDecodeError
3
2
 
4
3
  PROPORTION = 0.9
5
4
 
6
5
 
7
6
  def _is(val):
8
- '''Renvoie True si val peut etre geojson'''
7
+ """Renvoie True si val peut etre un geojson"""
9
8
 
10
9
  try:
11
10
  j = json.loads(val)
12
- if 'type' in j and 'coordinates' in j:
13
- return True
14
- if 'geometry' in j:
15
- if 'coordinates' in j['geometry']:
11
+ if isinstance(j, dict):
12
+ if "type" in j and "coordinates" in j:
16
13
  return True
17
- except JSONDecodeError:
18
- pass
19
- except TypeError:
14
+ if "geometry" in j and "coordinates" in j["geometry"]:
15
+ return True
16
+ except Exception:
20
17
  pass
21
-
22
18
  return False
@@ -5,20 +5,10 @@ PROPORTION = 1
5
5
 
6
6
 
7
7
  def _is(val):
8
- '''Detects json'''
8
+ """Detects json"""
9
9
  try:
10
10
  loaded = json.loads(val)
11
- if isinstance(loaded, list) or (
12
- isinstance(loaded, dict) and not (
13
- any(
14
- [
15
- geo in loaded for geo in ['coordinates', 'geometry']
16
- ]
17
- )
18
- )
19
- ):
20
- return True
21
- else:
22
- return False
11
+ # we don't want to consider integers for instance
12
+ return isinstance(loaded, (list, dict))
23
13
  except (JSONDecodeError, TypeError):
24
14
  return False
@@ -4,5 +4,5 @@ PROPORTION = 0.5
4
4
 
5
5
 
6
6
  def _is(header: str) -> float:
7
- words_combinations_list = ["is_", "has_", "est_"]
7
+ words_combinations_list = ["is ", "has ", "est "]
8
8
  return header_score(header, words_combinations_list)
@@ -110,11 +110,9 @@ def detect_formats(
110
110
  "datetime_naive": "datetime",
111
111
  "datetime_rfc822": "datetime",
112
112
  "date": "date",
113
- "latitude": "float",
114
113
  "latitude_l93": "float",
115
114
  "latitude_wgs": "float",
116
115
  "latitude_wgs_fr_metropole": "float",
117
- "longitude": "float",
118
116
  "longitude_l93": "float",
119
117
  "longitude_wgs": "float",
120
118
  "longitude_wgs_fr_metropole": "float",
@@ -2,55 +2,73 @@ import pandas as pd
2
2
 
3
3
 
4
4
  def prepare_output_dict(return_table: pd.DataFrame, limited_output: bool):
5
+ # -> dict[str, dict | list[dict]] (to be added when upgrading to python>=3.10)
5
6
  return_dict_cols = return_table.to_dict("dict")
6
- return_dict_cols_intermediary = {}
7
+ output_dict = {}
7
8
  for column_name in return_dict_cols:
8
- return_dict_cols_intermediary[column_name] = []
9
- for detected_value_type in return_dict_cols[column_name]:
10
- if return_dict_cols[column_name][detected_value_type] == 0:
11
- continue
12
- dict_tmp = {}
13
- dict_tmp["format"] = detected_value_type
14
- dict_tmp["score"] = return_dict_cols[column_name][detected_value_type]
15
- return_dict_cols_intermediary[column_name].append(dict_tmp)
16
-
17
- # Clean dict using priorities
18
- formats_detected = {
19
- x["format"] for x in return_dict_cols_intermediary[column_name]
20
- }
9
+ # keep only formats with a non-zero score
10
+ output_dict[column_name] = [
11
+ {
12
+ "format": detected_value_type,
13
+ "score": return_dict_cols[column_name][detected_value_type],
14
+ }
15
+ for detected_value_type in return_dict_cols[column_name]
16
+ if return_dict_cols[column_name][detected_value_type] > 0
17
+ ]
18
+ priorities = [
19
+ # no need to specify int and float everywhere, they are deprioritized anyway
20
+ ("int", ("float",)),
21
+ # bool over everything
22
+ ("booleen", (
23
+ "latitude_l93",
24
+ "latitude_wgs",
25
+ "latitude_wgs_fr_metropole",
26
+ "longitude_l93",
27
+ "longitude_wgs",
28
+ "longitude_wgs_fr_metropole",
29
+ )),
30
+ ("geojson", ("json",)),
31
+ # latlon over lonlat if no longitude allows to discriminate
32
+ ("latlon_wgs", ("json", "lonlat_wgs")),
33
+ ("lonlat_wgs", ("json",)),
34
+ ("latitude_wgs_fr_metropole", ("latitude_l93", "latitude_wgs")),
35
+ ("longitude_wgs_fr_metropole", ("longitude_l93", "longitude_wgs")),
36
+ ("latitude_wgs", ("latitude_l93",)),
37
+ ("longitude_wgs", ("longitude_l93",)),
38
+ ("code_region", ("code_departement",)),
39
+ ("datetime_rfc822", ("datetime_aware",)),
40
+ ]
41
+ detected_formats = set(x["format"] for x in output_dict[column_name])
21
42
  formats_to_remove = set()
22
43
  # Deprioritise float and int detection vs others
23
- if len(formats_detected - {"float", "int"}) > 0:
44
+ if len(detected_formats - {"float", "int"}) > 0:
24
45
  formats_to_remove = formats_to_remove.union({"float", "int"})
25
- if "int" in formats_detected:
26
- formats_to_remove.add("float")
27
- if "latitude_wgs_fr_metropole" in formats_detected:
28
- formats_to_remove.add("latitude_l93")
29
- formats_to_remove.add("latitude_wgs")
30
- if "longitude_wgs_fr_metropole" in formats_detected:
31
- formats_to_remove.add("longitude_l93")
32
- formats_to_remove.add("longitude_wgs")
33
- if "longitude_wgs" in formats_detected:
34
- formats_to_remove.add("longitude_l93")
35
- if "code_region" in formats_detected:
36
- formats_to_remove.add("code_departement")
37
- if "datetime_rfc822" in formats_detected:
38
- formats_to_remove.add("datetime_aware")
39
- # if there is no way to discriminate the case, default to latlon
40
- if "latlon_wgs" in formats_detected:
41
- formats_to_remove.add("lonlat_wgs")
46
+ # Deprioritize less specific formats if:
47
+ # secondary score is even or worse
48
+ # or priority score is at least 1 (max of the field score)
49
+ for prio_format, secondary_formats in priorities:
50
+ if prio_format in detected_formats:
51
+ for secondary in secondary_formats:
52
+ if (
53
+ secondary in detected_formats
54
+ and (
55
+ return_dict_cols[column_name][prio_format]
56
+ >= return_dict_cols[column_name][secondary]
57
+ or return_dict_cols[column_name][prio_format] >= 1
58
+ )
59
+ ):
60
+ formats_to_remove.add(secondary)
42
61
 
43
- formats_to_keep = formats_detected - formats_to_remove
62
+ formats_to_keep = detected_formats - formats_to_remove
44
63
 
45
- detections = return_dict_cols_intermediary[column_name]
46
- detections = [x for x in detections if x["format"] in formats_to_keep]
64
+ detections = [x for x in output_dict[column_name] if x["format"] in formats_to_keep]
47
65
  if not limited_output:
48
- return_dict_cols_intermediary[column_name] = detections
66
+ output_dict[column_name] = detections
49
67
  else:
50
- return_dict_cols_intermediary[column_name] = (
68
+ output_dict[column_name] = (
51
69
  max(detections, key=lambda x: x["score"])
52
70
  if len(detections) > 0
53
71
  else {"format": "string", "score": 1.0}
54
72
  )
55
73
 
56
- return return_dict_cols_intermediary
74
+ return output_dict
@@ -46,7 +46,6 @@ def test_col_val(
46
46
  ]: # Pour ne pas faire d'opérations inutiles, on commence par 1,
47
47
  # puis 5 valeurs puis la serie complète
48
48
  if all(apply_test_func(serie, test_func, _range)):
49
- # print(serie.name, ': check OK')
50
49
  pass
51
50
  else:
52
51
  return 0.0
@@ -11,6 +11,7 @@
11
11
  - Validate using the testing function, to consider PROPORTIONS [#131](https://github.com/datagouv/csv-detective/pull/131)
12
12
  - Remove `datetime_iso` format due to ambiguous cast in db (can be naive or aware) [#132](https://github.com/datagouv/csv-detective/pull/132)
13
13
  - Add `lonlat_wgs` format and handle optional brackets for `latlon_wgs` [#133](https://github.com/datagouv/csv-detective/pull/133)
14
+ - Refactor format prioritizing [#134](https://github.com/datagouv/csv-detective/pull/134)
14
15
 
15
16
  ## 0.8.0 (2025-05-20)
16
17
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: csv_detective
3
- Version: 0.8.1.dev1578
3
+ Version: 0.8.1.dev1599
4
4
  Summary: Detect tabular files column content
5
5
  Home-page: https://github.com/datagouv/csv_detective
6
6
  Author: Etalab
@@ -49,7 +49,7 @@ csv_detective/detect_fields/geo/iso_country_code_alpha3/__init__.py,sha256=u98rn
49
49
  csv_detective/detect_fields/geo/iso_country_code_alpha3/iso_country_code_alpha3.txt,sha256=aYqKSohgXuBtcIBfF52f8JWYDdxL_HV_Ol1srGnWBp4,1003
50
50
  csv_detective/detect_fields/geo/iso_country_code_numeric/__init__.py,sha256=wJAynAkGZN7jKeI3xOeLXQ_irxQBb_J56pRkLDYVClY,436
51
51
  csv_detective/detect_fields/geo/iso_country_code_numeric/iso_country_code_numeric.txt,sha256=2GtEhuporsHYV-pU4q9kfXU5iOtfW5C0GYBTTKQtnnA,1004
52
- csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=FPHOfTrfXJs62-NgeOcNGOvwPd7I1fEVp8lTdMNfj3w,433
52
+ csv_detective/detect_fields/geo/json_geojson/__init__.py,sha256=6wlwlxQmsVIZ21g-THvH3nBj-I8FuoF2sBlZAoEMGiQ,393
53
53
  csv_detective/detect_fields/geo/latitude_wgs/__init__.py,sha256=ArS6PuYEd0atZwSqNDZhXZz1TwzdiwdV8ovRYTOacpg,327
54
54
  csv_detective/detect_fields/geo/latlon_wgs/__init__.py,sha256=IXDTqD4YFUJYI1FYZ5ZfkqXY6KvNY7sgBVFRAvgTHtI,454
55
55
  csv_detective/detect_fields/geo/longitude_wgs/__init__.py,sha256=G7afWOKiGh_Tv7gwDNGt1a4B_A8hkCBkIxn3THDCUFk,330
@@ -59,7 +59,7 @@ csv_detective/detect_fields/other/booleen/__init__.py,sha256=wn_yyTAmGxqo0l0b7JR
59
59
  csv_detective/detect_fields/other/email/__init__.py,sha256=O9tgJmq0O8Q-8iin63NqEEDhlsUJjxFZNaNFM4GZaws,178
60
60
  csv_detective/detect_fields/other/float/__init__.py,sha256=AT4Kpgwoz5PuAoLx00u0SL8DjjXZxsE8zSRbN18uAv4,578
61
61
  csv_detective/detect_fields/other/int/__init__.py,sha256=QN3kQJLYqLRBiubUK7g4Xq03PlA5wqVwx2pPPIO9FdI,320
62
- csv_detective/detect_fields/other/json/__init__.py,sha256=DhzyvT12kOqgum89silIu3uoSYXmC_s_AaxLtXAD4eU,540
62
+ csv_detective/detect_fields/other/json/__init__.py,sha256=AkRWZAidEM1dWkVRFThEBI5M7kMUu5Yu12iCViGM8lU,310
63
63
  csv_detective/detect_fields/other/money/__init__.py,sha256=g_ZwBZXl9LhldwFYQotC5WqLiE8qQCZHtoI9eJvl_9M,232
64
64
  csv_detective/detect_fields/other/mongo_object_id/__init__.py,sha256=7fcrHsOZAqXp2_N0IjPskYJ_qi4xRlo9iyNNDQVLzsU,156
65
65
  csv_detective/detect_fields/other/percent/__init__.py,sha256=vgpekNOPBRuunoVBXMi81rwHv4uSOhe78pbVtQ5SBO8,177
@@ -115,7 +115,7 @@ csv_detective/detect_labels/geo/latlon_wgs/__init__.py,sha256=tDndlFyEM7qKS3ATxp
115
115
  csv_detective/detect_labels/geo/longitude_wgs/__init__.py,sha256=_8IV2FLtrOjzhQNsk-fsgc9-jbAgzKDVMr4tXu2P-s4,429
116
116
  csv_detective/detect_labels/geo/lonlat_wgs/__init__.py,sha256=NNKlFcMsKVqnUKEm_4flGxcNUGS2-iS3m6ihQf2AVTk,345
117
117
  csv_detective/detect_labels/other/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
118
- csv_detective/detect_labels/other/booleen/__init__.py,sha256=BZwnfR-Zcv8dqscLrBKhttgwm4Dqq16M0PaGirxYWio,214
118
+ csv_detective/detect_labels/other/booleen/__init__.py,sha256=zEkarex7L4T3vmYjR5hdhtnhugTVDsvkgG_it6nN0aA,214
119
119
  csv_detective/detect_labels/other/email/__init__.py,sha256=Poagn45-eC2a_Wdk5Qs6d2BgYdncCQKZp2yEB50IuNw,431
120
120
  csv_detective/detect_labels/other/float/__init__.py,sha256=X0axZN2GAfC_y01zRfIyvOfRsOy2KNQcQ-mlQAKxqT4,216
121
121
  csv_detective/detect_labels/other/int/__init__.py,sha256=_1AY7thEBCcgSBQQ2YbY4YaPaxGRQ71BtmaFaX088ig,215
@@ -132,7 +132,7 @@ csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
132
132
  csv_detective/detection/columns.py,sha256=vfE-DKESA6J9Rfsl-a8tjgZfE21VmzArO5TrbzL0KmE,2905
133
133
  csv_detective/detection/encoding.py,sha256=tpjJEMNM_2TcLXDzn1lNQPnSRnsWYjs83tQ8jNwTj4E,973
134
134
  csv_detective/detection/engine.py,sha256=HiIrU-l9EO5Fbc2Vh8W_Uy5-dpKcQQzlxCqMuWc09LY,1530
135
- csv_detective/detection/formats.py,sha256=3vf7VdjxTmdt5KaTqGBwT5GuZhHuw98R-sIemTcOIJg,6345
135
+ csv_detective/detection/formats.py,sha256=c0LFTWbibWbEJSZaPy_86LIMOY3qRxj-I_agwpb4zbI,6284
136
136
  csv_detective/detection/headers.py,sha256=wrVII2RQpsVmHhrO1DHf3dmiu8kbtOjBlskf41cnQmc,1172
137
137
  csv_detective/detection/rows.py,sha256=3qvsbsBcMxiqqfSYYkOgsRpX777rk22tnRHDwUA97kU,742
138
138
  csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
@@ -142,27 +142,27 @@ csv_detective/output/dataframe.py,sha256=89iQRE59cHQyQQEsujQVIKP2YAUYpPklWkdDOqZ
142
142
  csv_detective/output/example.py,sha256=EdPX1iqHhIG4DsiHuYdy-J7JxOkjgUh_o2D5nrfM5fA,8649
143
143
  csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
144
144
  csv_detective/output/schema.py,sha256=yC9K1vw6NUTULNv9a7CaMGns9iXmbzFLbtHI4wegqEc,13812
145
- csv_detective/output/utils.py,sha256=sAFc-oviDn3iR3kr8MdXQa14nCMWgAITsRYBW-f3WM0,2574
145
+ csv_detective/output/utils.py,sha256=xPM2KYdqousmjU22-w7HnaF6AR74fj8lhQY77Y9xs7w,3310
146
146
  csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
147
- csv_detective/parsing/columns.py,sha256=rLzAU36cHMpVynEPhj8uMdr3IRO3_Yq58Yw7Z6oLPiQ,5693
147
+ csv_detective/parsing/columns.py,sha256=aMdG6-G-2Tj_2JdHotAIveQwaG_r8chGcGieFiUaBRk,5634
148
148
  csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
149
149
  csv_detective/parsing/csv.py,sha256=11mibDnJhIjykXLGZvA5ZEU5U7KgxIrbyO6BNv6jlro,1626
150
150
  csv_detective/parsing/excel.py,sha256=AslE2S1e67o8yTIAIhp-lAnJ6-XqeBBRz1-VMFqhZBM,7055
151
151
  csv_detective/parsing/load.py,sha256=u6fbGFZsL2GwPQRzhAXgt32JpUur7vbQdErREHxNJ-w,3661
152
152
  csv_detective/parsing/text.py,sha256=_TprGi0gHZlRsafizI3dqQhBehZW4BazqxmypMcAZ-o,1824
153
- csv_detective-0.8.1.dev1578.data/data/share/csv_detective/CHANGELOG.md,sha256=arQzNTnW3gZp5MyTCoDkb4fMBM7BaMhGnxVF_AhYPp4,9556
154
- csv_detective-0.8.1.dev1578.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
155
- csv_detective-0.8.1.dev1578.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
156
- csv_detective-0.8.1.dev1578.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
153
+ csv_detective-0.8.1.dev1599.data/data/share/csv_detective/CHANGELOG.md,sha256=WQ8cTB2D5YkAJ9AsS2ziKtZL8m1sPclGPenTD1BxZ_g,9646
154
+ csv_detective-0.8.1.dev1599.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
155
+ csv_detective-0.8.1.dev1599.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
156
+ csv_detective-0.8.1.dev1599.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
157
157
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
158
  tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
159
- tests/test_fields.py,sha256=DzDtYXZDj2YJXNuKROHmoqA__PkE39sqETmnS3a01qQ,10887
159
+ tests/test_fields.py,sha256=IwMpjOn8W5kDCvJYp3Cer4m571qomzjupOAvSRFMg_Q,11819
160
160
  tests/test_file.py,sha256=0bHV9wx9mSRoav_DVF19g694yohb1p0bw7rtcBeKG-8,8451
161
161
  tests/test_labels.py,sha256=Nkr645bUewrj8hjNDKr67FQ6Sy_TID6f3E5Kfkl231M,464
162
162
  tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
163
163
  tests/test_validation.py,sha256=CTGonR6htxcWF9WH8MxumDD8cF45Y-G4hm94SM4lFjU,3246
164
- csv_detective-0.8.1.dev1578.dist-info/METADATA,sha256=EcvSGV_PUHKAxG2HlebEXE05CuYh9ZND4erSbWCoIqo,10443
165
- csv_detective-0.8.1.dev1578.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
166
- csv_detective-0.8.1.dev1578.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
167
- csv_detective-0.8.1.dev1578.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
168
- csv_detective-0.8.1.dev1578.dist-info/RECORD,,
164
+ csv_detective-0.8.1.dev1599.dist-info/METADATA,sha256=NoE1tBjCZxO2uffbH9wSgkuNzOVOgLRA2qkjth7ynyk,10443
165
+ csv_detective-0.8.1.dev1599.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
166
+ csv_detective-0.8.1.dev1599.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
167
+ csv_detective-0.8.1.dev1599.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
168
+ csv_detective-0.8.1.dev1599.dist-info/RECORD,,
tests/test_fields.py CHANGED
@@ -72,6 +72,7 @@ from csv_detective.detection.variables import (
72
72
  )
73
73
  from csv_detective.load_tests import return_all_tests
74
74
  from csv_detective.output.dataframe import cast
75
+ from csv_detective.output.utils import prepare_output_dict
75
76
 
76
77
 
77
78
  def test_all_tests_return_bool():
@@ -285,7 +286,7 @@ fields = {
285
286
  },
286
287
  json: {
287
288
  True: ['{"pomme": "fruit", "reponse": 42}', "[1,2,3,4]"],
288
- False: ['{"coordinates": [45.783753, 3.049342], "citycode": "63870"}', "{zefib:"],
289
+ False: ["5", '{"zefib":', '{"a"}'],
289
290
  },
290
291
  money: {
291
292
  True: ["120€", "-20.2$"],
@@ -410,3 +411,27 @@ def test_fields_with_values(args):
410
411
  def test_cast(args):
411
412
  value, detected_type, cast_type = args
412
413
  assert isinstance(cast(value, detected_type), cast_type)
414
+
415
+
416
+ @pytest.mark.parametrize(
417
+ "args",
418
+ (
419
+ # there is a specific numerical format => specific wins
420
+ ({"int": 1, "float": 1, "latitude_wgs": 1}, "latitude_wgs"),
421
+ # scores are equal for related formats => priority wins
422
+ ({"int": 1, "float": 1}, "int"),
423
+ # score is lower for priority format => secondary wins
424
+ ({"int": 0.5, "float": 1}, "float"),
425
+ # score is lower for priority format, but is 1 => priority wins
426
+ ({"int": 1, "float": 1.25}, "int"),
427
+ # two rounds of priority => highest priority wins
428
+ ({"latlon_wgs": 1, "lonlat_wgs": 1, "json": 1}, "latlon_wgs"),
429
+ # no detection => default to string
430
+ ({}, "string"),
431
+ ),
432
+ )
433
+ def test_priority(args):
434
+ detections, expected = args
435
+ col = "col1"
436
+ output = prepare_output_dict(pd.DataFrame({col: detections}), limited_output=True)
437
+ assert output[col]["format"] == expected