csv-detective 0.7.5.dev1171__py3-none-any.whl → 0.7.5.dev1197__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,11 @@ def date_casting(val: str) -> Optional[datetime]:
14
14
  return dateutil_parser(val)
15
15
  except ParserError:
16
16
  return date_parser(val)
17
+ except OverflowError:
18
+ return None
19
+
20
+
21
+ threshold = 0.3
17
22
 
18
23
 
19
24
  def _is(val):
@@ -21,7 +26,6 @@ def _is(val):
21
26
  # early stops, to cut processing time
22
27
  if not isinstance(val, str) or len(val) > 20 or len(val) < 8:
23
28
  return False
24
- threshold = 0.3
25
29
  if sum([char.isdigit() for char in val]) / len(val) < threshold:
26
30
  return False
27
31
  res = date_casting(val)
@@ -1,10 +1,11 @@
1
- from typing import TextIO, Optional, Union
1
+ from typing import TextIO, Optional
2
2
  from collections import defaultdict
3
3
  import pandas as pd
4
4
  import math
5
5
  import csv
6
6
  from cchardet import detect
7
7
  from ast import literal_eval
8
+ import gzip
8
9
  import logging
9
10
  from time import time
10
11
  import openpyxl
@@ -21,10 +22,13 @@ NEW_EXCEL_EXT = [".xlsx", ".xlsm", ".xltx", ".xltm"]
21
22
  OLD_EXCEL_EXT = [".xls"]
22
23
  OPEN_OFFICE_EXT = [".odf", ".ods", ".odt"]
23
24
  XLS_LIKE_EXT = NEW_EXCEL_EXT + OLD_EXCEL_EXT + OPEN_OFFICE_EXT
25
+ EXCEL_ENGINES = ["openpyxl", "xlrd", "odf"]
26
+ COMPRESSION_ENGINES = ["gzip"]
24
27
  engine_to_file = {
25
28
  "openpyxl": "Excel",
26
29
  "xlrd": "old Excel",
27
- "odf": "OpenOffice"
30
+ "odf": "OpenOffice",
31
+ "gzip": "csv.gz",
28
32
  }
29
33
 
30
34
 
@@ -128,6 +132,8 @@ def detect_engine(csv_file_path: str, verbose=False) -> Optional[str]:
128
132
  if verbose:
129
133
  start = time()
130
134
  mapping = {
135
+ "application/gzip": "gzip",
136
+ "application/x-gzip": "gzip",
131
137
  'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'openpyxl',
132
138
  'application/vnd.ms-excel': 'xlrd',
133
139
  'application/vnd.oasis.opendocument.spreadsheet': 'odf',
@@ -141,8 +147,12 @@ def detect_engine(csv_file_path: str, verbose=False) -> Optional[str]:
141
147
  else:
142
148
  engine = mapping.get(magic.from_file(csv_file_path, mime=True))
143
149
  if verbose:
150
+ message = (
151
+ f"File is not csv, detected {engine_to_file.get(engine, 'csv')}"
152
+ if engine else "Processing the file as a csv"
153
+ )
144
154
  display_logs_depending_process_time(
145
- f'File has no extension, detected {engine_to_file.get(engine, "csv")}',
155
+ message,
146
156
  time() - start,
147
157
  )
148
158
  return engine
@@ -174,7 +184,9 @@ def detect_separator(file: TextIO, verbose: bool = False) -> str:
174
184
  break
175
185
  rows_lengths.add(len(row))
176
186
  if len(rows_lengths) > 1:
177
- raise ValueError('Number of columns is not even across the first 10 rows.')
187
+ raise ValueError(
188
+ f"Number of columns is not even across the first 10 rows (detected separator: {sep})."
189
+ )
178
190
 
179
191
  if verbose:
180
192
  display_logs_depending_process_time(
@@ -184,19 +196,22 @@ def detect_separator(file: TextIO, verbose: bool = False) -> str:
184
196
  return sep
185
197
 
186
198
 
187
- def detect_encoding(csv_file_path: str, verbose: bool = False) -> str:
199
+ def unzip(binary_file: BytesIO, engine: str) -> BytesIO:
200
+ if engine == "gzip":
201
+ with gzip.open(binary_file, mode="rb") as binary_file:
202
+ file_content = binary_file.read()
203
+ else:
204
+ raise NotImplementedError(f"{engine} is not yet supported")
205
+ return BytesIO(file_content)
206
+
207
+
208
+ def detect_encoding(binary_file: BytesIO, verbose: bool = False) -> str:
188
209
  """
189
210
  Detects file encoding using faust-cchardet (forked from the original cchardet)
190
211
  """
191
212
  if verbose:
192
213
  start = time()
193
214
  logging.info("Detecting encoding")
194
- if is_url(csv_file_path):
195
- r = requests.get(csv_file_path)
196
- r.raise_for_status()
197
- binary_file = BytesIO(r.content)
198
- else:
199
- binary_file = open(csv_file_path, mode="rb")
200
215
  encoding_dict = detect(binary_file.read())
201
216
  if not encoding_dict["encoding"]:
202
217
  raise ValueError("Could not detect the file's encoding. Consider specifying it in the routine call.")
@@ -12,7 +12,7 @@ import tempfile
12
12
  import logging
13
13
  from time import time
14
14
  import requests
15
- from io import StringIO
15
+ from io import BytesIO, StringIO
16
16
  import pandas as pd
17
17
 
18
18
  # flake8: noqa
@@ -39,7 +39,10 @@ from .detection import (
39
39
  detetect_categorical_variable,
40
40
  # detect_continuous_variable,
41
41
  is_url,
42
+ unzip,
42
43
  XLS_LIKE_EXT,
44
+ EXCEL_ENGINES,
45
+ COMPRESSION_ENGINES,
43
46
  )
44
47
 
45
48
 
@@ -81,16 +84,14 @@ def return_all_tests(
81
84
 
82
85
  if isinstance(user_input_tests, str):
83
86
  user_input_tests = [user_input_tests]
84
- if "ALL" in user_input_tests:
87
+ if "ALL" in user_input_tests or all(x[0] == "-" for x in user_input_tests):
85
88
  tests_to_do = [detect_type]
86
89
  else:
87
- # can't require to only skip tests
88
- assert not all(x[0] == "-" for x in user_input_tests)
89
90
  tests_to_do = [
90
- detect_type + "." + x for x in user_input_tests if x[0] != "-"
91
+ f"{detect_type}.{x}" for x in user_input_tests if x[0] != "-"
91
92
  ]
92
93
  tests_skipped = [
93
- detect_type + "." + x[1:] for x in user_input_tests if x[0] == "-"
94
+ f"{detect_type}.{x[1:]}" for x in user_input_tests if x[0] == "-"
94
95
  ]
95
96
  all_tests = [
96
97
  # this is why we need to import detect_fields/labels
@@ -155,12 +156,12 @@ def routine(
155
156
 
156
157
  file_name = csv_file_path.split('/')[-1]
157
158
  engine = None
158
- if '.' not in file_name:
159
+ if '.' not in file_name or not file_name.endswith("csv"):
159
160
  # file has no extension, we'll investigate how to read it
160
161
  engine = detect_engine(csv_file_path, verbose=verbose)
161
162
 
162
163
  is_xls_like = False
163
- if engine or any([csv_file_path.endswith(k) for k in XLS_LIKE_EXT]):
164
+ if engine in EXCEL_ENGINES or any([csv_file_path.endswith(k) for k in XLS_LIKE_EXT]):
164
165
  is_xls_like = True
165
166
  encoding, sep, heading_columns, trailing_columns = None, None, None, None
166
167
  table, total_lines, nb_duplicates, sheet_name, engine, header_row_idx = parse_excel(
@@ -172,12 +173,23 @@ def routine(
172
173
  )
173
174
  header = table.columns.to_list()
174
175
  else:
175
- if encoding is None:
176
- encoding = detect_encoding(csv_file_path, verbose=verbose)
176
+ # fetching or reading file as binary
177
177
  if is_url(csv_file_path):
178
178
  r = requests.get(csv_file_path, allow_redirects=True)
179
179
  r.raise_for_status()
180
- str_file = StringIO(r.content.decode(encoding=encoding))
180
+ binary_file = BytesIO(r.content)
181
+ else:
182
+ binary_file = open(csv_file_path, "rb")
183
+ # handling compression
184
+ if engine in COMPRESSION_ENGINES:
185
+ binary_file: BytesIO = unzip(binary_file=binary_file, engine=engine)
186
+ # detecting encoding if not specified
187
+ if encoding is None:
188
+ encoding: str = detect_encoding(binary_file, verbose=verbose)
189
+ binary_file.seek(0)
190
+ # decoding and reading file
191
+ if is_url(csv_file_path) or engine in COMPRESSION_ENGINES:
192
+ str_file = StringIO(binary_file.read().decode(encoding=encoding))
181
193
  else:
182
194
  str_file = open(csv_file_path, "r", encoding=encoding)
183
195
  if sep is None:
@@ -257,17 +269,19 @@ def routine(
257
269
  )
258
270
 
259
271
  # To reduce false positives: ensure these formats are detected only if the label yields
260
- # a detection.
272
+ # a detection (skipping the ones that have been excluded by the users).
261
273
  formats_with_mandatory_label = [
262
- "code_departement",
263
- "code_commune_insee",
264
- "code_postal",
265
- "latitude_wgs",
266
- "longitude_wgs",
267
- "latitude_wgs_fr_metropole",
268
- "longitude_wgs_fr_metropole",
269
- "latitude_l93",
270
- "longitude_l93",
274
+ f for f in [
275
+ "code_departement",
276
+ "code_commune_insee",
277
+ "code_postal",
278
+ "latitude_wgs",
279
+ "longitude_wgs",
280
+ "latitude_wgs_fr_metropole",
281
+ "longitude_wgs_fr_metropole",
282
+ "latitude_l93",
283
+ "longitude_l93",
284
+ ] if f in scores_table.index
271
285
  ]
272
286
  scores_table.loc[formats_with_mandatory_label, :] = np.where(
273
287
  scores_table_labels.loc[formats_with_mandatory_label, :],
@@ -7,7 +7,10 @@
7
7
  - Better naming, hint types and minor refactors [#103](https://github.com/datagouv/csv-detective/pull/103)
8
8
  - The returned dataframe has its columns properly cast to the detected types [#104](https://github.com/datagouv/csv-detective/pull/104)
9
9
  - Raise an error if the encoding could not be guessed [#106](https://github.com/datagouv/csv-detective/pull/106)
10
+ - Allow to only specify tests to skip ("all but...") [#108](https://github.com/datagouv/csv-detective/pull/108)
10
11
  - Fix bool casting [#109](https://github.com/datagouv/csv-detective/pull/109)
12
+ - Handle csv.gz files [#110](https://github.com/datagouv/csv-detective/pull/110)
13
+ - Refactor file tests [#110](https://github.com/datagouv/csv-detective/pull/110)
11
14
 
12
15
  ## 0.7.4 (2024-11-15)
13
16
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: csv_detective
3
- Version: 0.7.5.dev1171
3
+ Version: 0.7.5.dev1197
4
4
  Summary: Detect CSV column content
5
5
  Home-page: https://github.com/etalab/csv_detective
6
6
  Author: Etalab
@@ -37,5 +37,6 @@ Dynamic: description-content-type
37
37
  Dynamic: home-page
38
38
  Dynamic: keywords
39
39
  Dynamic: license
40
+ Dynamic: license-file
40
41
  Dynamic: requires-dist
41
42
  Dynamic: summary
@@ -1,8 +1,8 @@
1
1
  csv_detective/__init__.py,sha256=Au4bNJ_Gi6P6o0uO4R56nYdshG7M6-7Rg_xX4whLmLI,143
2
2
  csv_detective/cli.py,sha256=Ua7SE1wMH2uFUsTmfumh4nJk7O06okpMd2gvjUDO1II,1048
3
3
  csv_detective/create_example.py,sha256=358e7Q7RWMrY_eEo3pUteJWmg2smFb5edJ_AzcQPrqA,8646
4
- csv_detective/detection.py,sha256=zrP8qvLDvhVXTHi7Ty8G_ga4zfZPjBhuyApqFQkPq2Y,22373
5
- csv_detective/explore_csv.py,sha256=-AxnM0hGlhrbI4w1wdZwC_w-DYgoOCFpMQ94agIaD5U,17380
4
+ csv_detective/detection.py,sha256=dqjAKR-h7QC2pbl7FEUleS15bvGHBiTleu9CtVKp_Vo,22806
5
+ csv_detective/explore_csv.py,sha256=HM4RlNV2eWfP9wTDvhrow-_yDMbGuE3JDvFCfmMNWyY,18087
6
6
  csv_detective/process_text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
7
7
  csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
8
8
  csv_detective/schema_generation.py,sha256=D1Cq4QRajsKtY8EJSwbRTIB-T_Cb2ZpcmYtCrJ6DvJQ,13135
@@ -65,7 +65,7 @@ csv_detective/detect_fields/other/twitter/__init__.py,sha256=qbwLKsTBRFQ4PyTNVeE
65
65
  csv_detective/detect_fields/other/url/__init__.py,sha256=9WaTqCglEsw_lJG_xZsBMdxJXg2yuQ92_fkX6CXWNV0,286
66
66
  csv_detective/detect_fields/other/uuid/__init__.py,sha256=3-z0fDax29SJc57zPjNGR6DPICJu6gfuNGC5L3jh4d0,223
67
67
  csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
- csv_detective/detect_fields/temp/date/__init__.py,sha256=aFP1feFWFhCpR6Q9s_4BBwWxFtwFiMXY1iduSeQIjdA,943
68
+ csv_detective/detect_fields/temp/date/__init__.py,sha256=1a_Ra9fmT4wgGMrcknXP7eN7A2QiaMF0Yjy0-BMihtA,987
69
69
  csv_detective/detect_fields/temp/datetime/__init__.py,sha256=Ykwhk2ospjY9P0KOG0AitgqN0sld6UmhOlbMz_XGQzQ,597
70
70
  csv_detective/detect_fields/temp/datetime_iso/__init__.py,sha256=DOfli-A7gPlZmiV2J6Ka5_yDUCaOgxis29LET_tfhA4,444
71
71
  csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=JtUzg3BXYd-XJMLGxQ0P1OAJGOQ7DlYMD4fCU9yndg0,511
@@ -127,18 +127,18 @@ csv_detective/detect_labels/temp/date/__init__.py,sha256=-R7VqlryozelSn4wH_7w9x6
127
127
  csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=Ih9l56nBcdmGLyWDavVUWuUUuVZBz9QUDE1hHzADvVg,1157
128
128
  csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=DQ_h4uDW1e6qu2rATEhgGKw6O-vVi7HbDhbEDDCT9uY,1175
129
129
  csv_detective/detect_labels/temp/year/__init__.py,sha256=zPF_mvhzhXMAlHPAskS8mhuxjLj2AlKpV4ss8Q4tDms,1150
130
- csv_detective-0.7.5.dev1171.data/data/share/csv_detective/CHANGELOG.md,sha256=MU0DrzId6qDxIPeAp9nAazYlEYwh1A8mlqnvkyFK55c,7254
131
- csv_detective-0.7.5.dev1171.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
132
- csv_detective-0.7.5.dev1171.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
130
+ csv_detective-0.7.5.dev1197.data/data/share/csv_detective/CHANGELOG.md,sha256=YlXiPqHlJv23g6HfqEXzic6y14IfPWUoz5ADOis0YeY,7528
131
+ csv_detective-0.7.5.dev1197.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
132
+ csv_detective-0.7.5.dev1197.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
133
+ csv_detective-0.7.5.dev1197.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
133
134
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
135
  tests/test_example.py,sha256=0NfChooJQlFxTo2nY5FOQIcsK4zzWA_SBmt2LwVQovY,2014
135
136
  tests/test_fields.py,sha256=_96htvTzvM7u-W57RpOBbsacWirIm4R36PP7JhPEaYQ,11123
136
- tests/test_file.py,sha256=HO-Zqv0ZDFy3d0ZrpjWQPXBrwgUmzesseoEofy8G2UU,7529
137
+ tests/test_file.py,sha256=w-nKXnm8A5l5_MAtA6E99ouTefOkU38B6jMJVyBHr50,7858
137
138
  tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
138
139
  tests/test_structure.py,sha256=SVsnluVoIIprYw_67I1_gB3cp9m1wlO8C7SpdsLW8cM,1161
139
- csv_detective-0.7.5.dev1171.dist-info/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
140
- csv_detective-0.7.5.dev1171.dist-info/METADATA,sha256=MhOtq7Bv7pJMRUoavz8f0VcKXpY1z-n5NLSbAJkyRLg,1364
141
- csv_detective-0.7.5.dev1171.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
142
- csv_detective-0.7.5.dev1171.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
143
- csv_detective-0.7.5.dev1171.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
144
- csv_detective-0.7.5.dev1171.dist-info/RECORD,,
140
+ csv_detective-0.7.5.dev1197.dist-info/METADATA,sha256=2B1bE17lCw02QHnXyk_2Rt9M-fcN8J_RlUlkKzNL4tM,1386
141
+ csv_detective-0.7.5.dev1197.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
142
+ csv_detective-0.7.5.dev1197.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
143
+ csv_detective-0.7.5.dev1197.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
144
+ csv_detective-0.7.5.dev1197.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
tests/test_file.py CHANGED
@@ -69,7 +69,7 @@ def test_profile_output_on_file():
69
69
  assert output["profile"]["GEO_INFO"]["nb_distinct"] == 1
70
70
 
71
71
 
72
- def test_exception():
72
+ def test_profile_with_num_rows():
73
73
  with pytest.raises(ValueError):
74
74
  routine(
75
75
  csv_file_path="tests/a_test_file.csv",
@@ -131,52 +131,37 @@ def test_schema_on_file():
131
131
  assert is_column_reg
132
132
 
133
133
 
134
- def test_non_csv_files():
135
- _ = routine(
136
- csv_file_path="tests/file.ods",
137
- num_rows=-1,
138
- output_profile=False,
139
- save_results=False,
140
- )
141
- assert _['engine'] == 'odf'
142
-
134
+ params_csv = [
135
+ ("csv_file", {"engine": None, "sheet_name": None}),
136
+ ("file.csv.gz", {"engine": None, "sheet_name": None, "separator": ",", "columns.len": 3}),
137
+ ]
138
+ params_others = [
139
+ ("file.ods", {"engine": "odf"}),
143
140
  # this is a "tricked" xls file that is actually read as odf
144
- _ = routine(
145
- csv_file_path="tests/file.xls",
146
- num_rows=-1,
147
- output_profile=False,
148
- save_results=False,
149
- )
150
- assert _['engine'] == 'odf'
141
+ ("file.xls", {"engine": "odf"}),
142
+ # this file has an empty first row; check if the sheet we consider is the largest
143
+ ("file.xlsx", {"engine": "openpyxl", "header_row_idx": 1, "sheet_name": "REI_1987"}),
144
+ ("xlsx_file", {"engine": "openpyxl"}),
145
+ ]
151
146
 
152
- _ = routine(
153
- csv_file_path="tests/file.xlsx",
154
- num_rows=-1,
155
- output_profile=False,
156
- save_results=False,
157
- )
158
- assert _['engine'] == 'openpyxl'
159
- # this file has an empty first row
160
- assert _['header_row_idx'] == 1
161
- # check if the sheet we consider is the largest
162
- assert _['sheet_name'] == 'REI_1987'
163
147
 
148
+ @pytest.mark.parametrize("params", params_csv + params_others)
149
+ def test_non_csv_files(params):
150
+ file_name, checks = params
164
151
  _ = routine(
165
- csv_file_path="tests/csv_file",
152
+ csv_file_path=f"tests/{file_name}",
166
153
  num_rows=-1,
167
154
  output_profile=False,
168
155
  save_results=False,
169
156
  )
170
- assert not _.get('engine')
171
- assert not _.get('sheet_name')
172
-
173
- _ = routine(
174
- csv_file_path="tests/xlsx_file",
175
- num_rows=-1,
176
- output_profile=False,
177
- save_results=False,
178
- )
179
- assert _['engine'] == 'openpyxl'
157
+ for k, v in checks.items():
158
+ if v is None:
159
+ assert not _.get(k)
160
+ elif "." in k:
161
+ key, func = k.split(".")
162
+ assert eval(func)(_[key]) == v
163
+ else:
164
+ assert _[k] == v
180
165
 
181
166
 
182
167
  @pytest.fixture
@@ -185,21 +170,34 @@ def mocked_responses():
185
170
  yield rsps
186
171
 
187
172
 
188
- def test_urls(mocked_responses):
189
- url = 'http://example.com/test.csv'
190
- expected_content = 'id,name,first_name\n1,John,Smith\n2,Jane,Doe\n3,Bob,Johnson'
173
+ @pytest.mark.parametrize(
174
+ "params",
175
+ # ideally we'd like to do the same with params_others but pandas.read_excel uses urllib
176
+ # which doesn't support the way we mock the response, TBC
177
+ params_csv + [("a_test_file.csv", {"separator": ";", "header_row_idx": 2, "total_lines": 414})]
178
+ )
179
+ def test_urls(mocked_responses, params):
180
+ file_name, checks = params
181
+ url = f"http://example.com/{file_name}"
191
182
  mocked_responses.get(
192
183
  url,
193
- body=expected_content,
184
+ body=open(f"tests/{file_name}", "rb").read(),
194
185
  status=200,
195
186
  )
196
- output = routine(
187
+ _ = routine(
197
188
  csv_file_path=url,
198
189
  num_rows=-1,
199
190
  output_profile=False,
200
191
  save_results=False,
201
192
  )
202
- assert output['header'] == ["id", "name", "first_name"]
193
+ for k, v in checks.items():
194
+ if v is None:
195
+ assert not _.get(k)
196
+ elif "." in k:
197
+ key, func = k.split(".")
198
+ assert eval(func)(_[key]) == v
199
+ else:
200
+ assert _[k] == v
203
201
 
204
202
 
205
203
  @pytest.mark.parametrize(