csv-detective 0.10.3.dev7__py3-none-any.whl → 0.10.4.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ from csv_detective.format import Format, FormatsManager
11
11
  from csv_detective.output.utils import prepare_output_dict
12
12
  from csv_detective.parsing.columns import (
13
13
  MAX_NUMBER_CATEGORICAL_VALUES,
14
+ handle_empty_columns,
14
15
  test_col,
15
16
  test_col_chunks,
16
17
  test_label,
@@ -49,6 +50,7 @@ def detect_formats(
49
50
  skipna=skipna,
50
51
  verbose=verbose,
51
52
  )
53
+ handle_empty_columns(scores_table_fields)
52
54
  res_categorical, _ = detect_categorical_variable(
53
55
  table,
54
56
  max_number_categorical_values=MAX_NUMBER_CATEGORICAL_VALUES,
@@ -13,6 +13,13 @@ from csv_detective.utils import display_logs_depending_process_time
13
13
  MAX_NUMBER_CATEGORICAL_VALUES = 25
14
14
 
15
15
 
16
+ def handle_empty_columns(return_table: pd.DataFrame):
17
+ # handling that empty columns score 1 everywhere
18
+ for col in return_table.columns:
19
+ if sum(return_table[col]) == len(return_table):
20
+ return_table[col] = 0
21
+
22
+
16
23
  def test_col_val(
17
24
  serie: pd.Series,
18
25
  format: Format,
@@ -222,10 +229,7 @@ def test_col_chunks(
222
229
  analysis["categorical"] = [
223
230
  col for col, values in col_values.items() if len(values) <= MAX_NUMBER_CATEGORICAL_VALUES
224
231
  ]
225
- # handling that empty columns score 1 everywhere
226
- for col in return_table.columns:
227
- if sum(return_table[col]) == len(return_table):
228
- return_table[col] = 0
232
+ handle_empty_columns(return_table)
229
233
  if verbose:
230
234
  display_logs_depending_process_time(
231
235
  f"Done testing chunks in {round(time() - start, 3)}s", time() - start
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: csv-detective
3
- Version: 0.10.3.dev7
3
+ Version: 0.10.4.dev1
4
4
  Summary: Detect tabular files column content
5
5
  Author-email: "data.gouv.fr" <opendatateam@data.gouv.fr>
6
6
  License: MIT
@@ -8,7 +8,7 @@ csv_detective/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
8
8
  csv_detective/detection/columns.py,sha256=_JtZHBr3aoEmSWh2xVe2ISnt-G7hpnA9vqlvcaGd0Go,2887
9
9
  csv_detective/detection/encoding.py,sha256=KZ8W8BPfZAq9UiP5wgaeupYa5INU8KPz98E2L3XpX2Y,999
10
10
  csv_detective/detection/engine.py,sha256=wQeDKpp2DKF-HcS1R8H6GgQyaUgQme4szPtEHgAjBII,1552
11
- csv_detective/detection/formats.py,sha256=9aIE4gwTN8c8pa-kofeJ7zalo8NqjGZabYD-G79kV5I,4734
11
+ csv_detective/detection/formats.py,sha256=cgECpxRaygwnedPhOteG1P_697qCoceeDrKK9G_O-u8,4812
12
12
  csv_detective/detection/headers.py,sha256=lnbWRxkI6rdyoWGtmxSfsPkqNjS0Nlpgw-pVevtmBP0,899
13
13
  csv_detective/detection/rows.py,sha256=JQsmKP8-i8wzcZIWI_13LUer5mpYRIqaKg6qW01ZO3A,750
14
14
  csv_detective/detection/separator.py,sha256=XjeDBqhiBxVfkCPJKem9BAgJqs_hOgQltc_pxrH_-Tg,1547
@@ -80,17 +80,17 @@ csv_detective/output/profile.py,sha256=R9YMl-dANde69RXkFlZpvMDBsX7e1SyMAnlW8p1XN
80
80
  csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
81
81
  csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
82
82
  csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
- csv_detective/parsing/columns.py,sha256=H_dKHhSgQMIiOfxibnGl6HwTW9bRwGtIeUcYBN13-3A,9245
83
+ csv_detective/parsing/columns.py,sha256=MFtEJFLsFdlKdM5AXtgXbf5p6HRW6DuOC4XnxhFMpIY,9344
84
84
  csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
85
85
  csv_detective/parsing/csv.py,sha256=5rw6gXZFQC1T4NT9CnW0AumidrYOkF8kjrfWGmk949I,1716
86
86
  csv_detective/parsing/excel.py,sha256=pX6dbhAdAdbRpoGcrGsL1lSaF-fbzEb4WcvwcCGEgFw,6978
87
87
  csv_detective/parsing/load.py,sha256=1Fk43ikIOJwtWJUY-e8oNeNOk4MMtpmZV7s-VbQBS1k,4345
88
88
  csv_detective/parsing/text.py,sha256=yDAcop5xJQc25UtbZcV0guHXAZQfm-H8WuJORTy8Rr8,1734
89
- csv_detective-0.10.3.dev7.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
89
+ csv_detective-0.10.4.dev1.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
90
90
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
91
  tests/test_example.py,sha256=uTWswvUzBWEADGXZmMAdZvKhKvIjvT5zWOVVABgCDN4,1987
92
92
  tests/test_fields.py,sha256=DSI-ZXDcRt69iZArYZZAr_3OEb-qvwgOVBZxmYAKIkI,5918
93
- tests/test_file.py,sha256=9Zne9ULDqkr-ajgc03lEMEod4d71Y-UDY4ckT6FFw_I,15205
93
+ tests/test_file.py,sha256=Ov9NGvZQxeoehxTpfcsnwEybebM0tnbmcRsFwe46cjg,15277
94
94
  tests/test_labels.py,sha256=lgxRbLrGV1C-MkASf3KIQ120BG-UHzFQ4pqDWaeBvaw,539
95
95
  tests/test_structure.py,sha256=XDbviuuvk-0Mu9Y9PI6He2e5hry2dXVJ6yBVwEqF_2o,1043
96
96
  tests/test_validation.py,sha256=309k3Axgbp-1Wh6qvCj2BpeMBp3HXzLi5j9UKm1bRQs,5384
@@ -104,8 +104,8 @@ tests/data/file.ods,sha256=4dR7zWptz5djALIBVeWHQ20GaZNfA63fevIJGFIk1_U,11832
104
104
  tests/data/file.xls,sha256=QYmNX3FF0QfcQSzYQMtaMJaepJf5EZpDa1miKc4wMdQ,21495
105
105
  tests/data/file.xlsx,sha256=naWzL02PK4pdIjMzfEyfSW9GQhkYYd_e7bpJvB8Pb2w,8314
106
106
  tests/data/xlsx_file,sha256=NyOyN_rIe7ryJuHQLqjxVdKCc8V4s5pxyHl6wWFykCM,8305
107
- csv_detective-0.10.3.dev7.dist-info/METADATA,sha256=Kj-Nd01oXyqZpNwaVNJ9CwRcYYX80YCKtIhsijoO89A,10925
108
- csv_detective-0.10.3.dev7.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
109
- csv_detective-0.10.3.dev7.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
110
- csv_detective-0.10.3.dev7.dist-info/top_level.txt,sha256=KDI4gyOpkmormGgUvSWrE3jen2e0unIsxR2b96DRvcw,25
111
- csv_detective-0.10.3.dev7.dist-info/RECORD,,
107
+ csv_detective-0.10.4.dev1.dist-info/METADATA,sha256=le1Rn1JIh8MoIf_RTc3Fi9DOOlvug4eR-Mwpw4AK0To,10925
108
+ csv_detective-0.10.4.dev1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
109
+ csv_detective-0.10.4.dev1.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
110
+ csv_detective-0.10.4.dev1.dist-info/top_level.txt,sha256=KDI4gyOpkmormGgUvSWrE3jen2e0unIsxR2b96DRvcw,25
111
+ csv_detective-0.10.4.dev1.dist-info/RECORD,,
tests/test_file.py CHANGED
@@ -388,10 +388,11 @@ def test_almost_uniform_column(mocked_responses):
388
388
  assert analysis["columns"][col_name]["format"] == "int"
389
389
 
390
390
 
391
- def test_full_nan_column(mocked_responses):
391
+ @pytest.mark.parametrize("nb_rows", (CHUNK_SIZE // 10, CHUNK_SIZE + 1))
392
+ def test_full_nan_column(mocked_responses, nb_rows):
392
393
  # we want a file that needs sampling
393
394
  col_name = "only_nan"
394
- expected_content = f"{col_name},second_col\n" + ",1\n" * (CHUNK_SIZE + 1)
395
+ expected_content = f"{col_name},second_col\n" + ",1\n" * nb_rows
395
396
  mocked_responses.get(
396
397
  "http://example.com/test.csv",
397
398
  body=expected_content,