csv-detective 0.7.5.dev1180__py3-none-any.whl → 0.7.5.dev1197__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/detect_fields/temp/date/__init__.py +5 -1
- csv_detective/detection.py +26 -11
- csv_detective/explore_csv.py +20 -6
- {csv_detective-0.7.5.dev1180.data → csv_detective-0.7.5.dev1197.data}/data/share/csv_detective/CHANGELOG.md +2 -0
- {csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1197.dist-info}/METADATA +3 -2
- {csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1197.dist-info}/RECORD +13 -13
- {csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1197.dist-info}/WHEEL +1 -1
- tests/test_file.py +43 -45
- {csv_detective-0.7.5.dev1180.data → csv_detective-0.7.5.dev1197.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1180.data → csv_detective-0.7.5.dev1197.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1197.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1197.dist-info/licenses}/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1197.dist-info}/top_level.txt +0 -0
|
@@ -14,6 +14,11 @@ def date_casting(val: str) -> Optional[datetime]:
|
|
|
14
14
|
return dateutil_parser(val)
|
|
15
15
|
except ParserError:
|
|
16
16
|
return date_parser(val)
|
|
17
|
+
except OverflowError:
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
threshold = 0.3
|
|
17
22
|
|
|
18
23
|
|
|
19
24
|
def _is(val):
|
|
@@ -21,7 +26,6 @@ def _is(val):
|
|
|
21
26
|
# early stops, to cut processing time
|
|
22
27
|
if not isinstance(val, str) or len(val) > 20 or len(val) < 8:
|
|
23
28
|
return False
|
|
24
|
-
threshold = 0.3
|
|
25
29
|
if sum([char.isdigit() for char in val]) / len(val) < threshold:
|
|
26
30
|
return False
|
|
27
31
|
res = date_casting(val)
|
csv_detective/detection.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
from typing import TextIO, Optional
|
|
1
|
+
from typing import TextIO, Optional
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import math
|
|
5
5
|
import csv
|
|
6
6
|
from cchardet import detect
|
|
7
7
|
from ast import literal_eval
|
|
8
|
+
import gzip
|
|
8
9
|
import logging
|
|
9
10
|
from time import time
|
|
10
11
|
import openpyxl
|
|
@@ -21,10 +22,13 @@ NEW_EXCEL_EXT = [".xlsx", ".xlsm", ".xltx", ".xltm"]
|
|
|
21
22
|
OLD_EXCEL_EXT = [".xls"]
|
|
22
23
|
OPEN_OFFICE_EXT = [".odf", ".ods", ".odt"]
|
|
23
24
|
XLS_LIKE_EXT = NEW_EXCEL_EXT + OLD_EXCEL_EXT + OPEN_OFFICE_EXT
|
|
25
|
+
EXCEL_ENGINES = ["openpyxl", "xlrd", "odf"]
|
|
26
|
+
COMPRESSION_ENGINES = ["gzip"]
|
|
24
27
|
engine_to_file = {
|
|
25
28
|
"openpyxl": "Excel",
|
|
26
29
|
"xlrd": "old Excel",
|
|
27
|
-
"odf": "OpenOffice"
|
|
30
|
+
"odf": "OpenOffice",
|
|
31
|
+
"gzip": "csv.gz",
|
|
28
32
|
}
|
|
29
33
|
|
|
30
34
|
|
|
@@ -128,6 +132,8 @@ def detect_engine(csv_file_path: str, verbose=False) -> Optional[str]:
|
|
|
128
132
|
if verbose:
|
|
129
133
|
start = time()
|
|
130
134
|
mapping = {
|
|
135
|
+
"application/gzip": "gzip",
|
|
136
|
+
"application/x-gzip": "gzip",
|
|
131
137
|
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'openpyxl',
|
|
132
138
|
'application/vnd.ms-excel': 'xlrd',
|
|
133
139
|
'application/vnd.oasis.opendocument.spreadsheet': 'odf',
|
|
@@ -141,8 +147,12 @@ def detect_engine(csv_file_path: str, verbose=False) -> Optional[str]:
|
|
|
141
147
|
else:
|
|
142
148
|
engine = mapping.get(magic.from_file(csv_file_path, mime=True))
|
|
143
149
|
if verbose:
|
|
150
|
+
message = (
|
|
151
|
+
f"File is not csv, detected {engine_to_file.get(engine, 'csv')}"
|
|
152
|
+
if engine else "Processing the file as a csv"
|
|
153
|
+
)
|
|
144
154
|
display_logs_depending_process_time(
|
|
145
|
-
|
|
155
|
+
message,
|
|
146
156
|
time() - start,
|
|
147
157
|
)
|
|
148
158
|
return engine
|
|
@@ -174,7 +184,9 @@ def detect_separator(file: TextIO, verbose: bool = False) -> str:
|
|
|
174
184
|
break
|
|
175
185
|
rows_lengths.add(len(row))
|
|
176
186
|
if len(rows_lengths) > 1:
|
|
177
|
-
raise ValueError(
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"Number of columns is not even across the first 10 rows (detected separator: {sep})."
|
|
189
|
+
)
|
|
178
190
|
|
|
179
191
|
if verbose:
|
|
180
192
|
display_logs_depending_process_time(
|
|
@@ -184,19 +196,22 @@ def detect_separator(file: TextIO, verbose: bool = False) -> str:
|
|
|
184
196
|
return sep
|
|
185
197
|
|
|
186
198
|
|
|
187
|
-
def
|
|
199
|
+
def unzip(binary_file: BytesIO, engine: str) -> BytesIO:
|
|
200
|
+
if engine == "gzip":
|
|
201
|
+
with gzip.open(binary_file, mode="rb") as binary_file:
|
|
202
|
+
file_content = binary_file.read()
|
|
203
|
+
else:
|
|
204
|
+
raise NotImplementedError(f"{engine} is not yet supported")
|
|
205
|
+
return BytesIO(file_content)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def detect_encoding(binary_file: BytesIO, verbose: bool = False) -> str:
|
|
188
209
|
"""
|
|
189
210
|
Detects file encoding using faust-cchardet (forked from the original cchardet)
|
|
190
211
|
"""
|
|
191
212
|
if verbose:
|
|
192
213
|
start = time()
|
|
193
214
|
logging.info("Detecting encoding")
|
|
194
|
-
if is_url(csv_file_path):
|
|
195
|
-
r = requests.get(csv_file_path)
|
|
196
|
-
r.raise_for_status()
|
|
197
|
-
binary_file = BytesIO(r.content)
|
|
198
|
-
else:
|
|
199
|
-
binary_file = open(csv_file_path, mode="rb")
|
|
200
215
|
encoding_dict = detect(binary_file.read())
|
|
201
216
|
if not encoding_dict["encoding"]:
|
|
202
217
|
raise ValueError("Could not detect the file's encoding. Consider specifying it in the routine call.")
|
csv_detective/explore_csv.py
CHANGED
|
@@ -12,7 +12,7 @@ import tempfile
|
|
|
12
12
|
import logging
|
|
13
13
|
from time import time
|
|
14
14
|
import requests
|
|
15
|
-
from io import StringIO
|
|
15
|
+
from io import BytesIO, StringIO
|
|
16
16
|
import pandas as pd
|
|
17
17
|
|
|
18
18
|
# flake8: noqa
|
|
@@ -39,7 +39,10 @@ from .detection import (
|
|
|
39
39
|
detetect_categorical_variable,
|
|
40
40
|
# detect_continuous_variable,
|
|
41
41
|
is_url,
|
|
42
|
+
unzip,
|
|
42
43
|
XLS_LIKE_EXT,
|
|
44
|
+
EXCEL_ENGINES,
|
|
45
|
+
COMPRESSION_ENGINES,
|
|
43
46
|
)
|
|
44
47
|
|
|
45
48
|
|
|
@@ -153,12 +156,12 @@ def routine(
|
|
|
153
156
|
|
|
154
157
|
file_name = csv_file_path.split('/')[-1]
|
|
155
158
|
engine = None
|
|
156
|
-
if '.' not in file_name:
|
|
159
|
+
if '.' not in file_name or not file_name.endswith("csv"):
|
|
157
160
|
# file has no extension, we'll investigate how to read it
|
|
158
161
|
engine = detect_engine(csv_file_path, verbose=verbose)
|
|
159
162
|
|
|
160
163
|
is_xls_like = False
|
|
161
|
-
if engine or any([csv_file_path.endswith(k) for k in XLS_LIKE_EXT]):
|
|
164
|
+
if engine in EXCEL_ENGINES or any([csv_file_path.endswith(k) for k in XLS_LIKE_EXT]):
|
|
162
165
|
is_xls_like = True
|
|
163
166
|
encoding, sep, heading_columns, trailing_columns = None, None, None, None
|
|
164
167
|
table, total_lines, nb_duplicates, sheet_name, engine, header_row_idx = parse_excel(
|
|
@@ -170,12 +173,23 @@ def routine(
|
|
|
170
173
|
)
|
|
171
174
|
header = table.columns.to_list()
|
|
172
175
|
else:
|
|
173
|
-
|
|
174
|
-
encoding = detect_encoding(csv_file_path, verbose=verbose)
|
|
176
|
+
# fetching or reading file as binary
|
|
175
177
|
if is_url(csv_file_path):
|
|
176
178
|
r = requests.get(csv_file_path, allow_redirects=True)
|
|
177
179
|
r.raise_for_status()
|
|
178
|
-
|
|
180
|
+
binary_file = BytesIO(r.content)
|
|
181
|
+
else:
|
|
182
|
+
binary_file = open(csv_file_path, "rb")
|
|
183
|
+
# handling compression
|
|
184
|
+
if engine in COMPRESSION_ENGINES:
|
|
185
|
+
binary_file: BytesIO = unzip(binary_file=binary_file, engine=engine)
|
|
186
|
+
# detecting encoding if not specified
|
|
187
|
+
if encoding is None:
|
|
188
|
+
encoding: str = detect_encoding(binary_file, verbose=verbose)
|
|
189
|
+
binary_file.seek(0)
|
|
190
|
+
# decoding and reading file
|
|
191
|
+
if is_url(csv_file_path) or engine in COMPRESSION_ENGINES:
|
|
192
|
+
str_file = StringIO(binary_file.read().decode(encoding=encoding))
|
|
179
193
|
else:
|
|
180
194
|
str_file = open(csv_file_path, "r", encoding=encoding)
|
|
181
195
|
if sep is None:
|
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
- Raise an error if the encoding could not be guessed [#106](https://github.com/datagouv/csv-detective/pull/106)
|
|
10
10
|
- Allow to only specify tests to skip ("all but...") [#108](https://github.com/datagouv/csv-detective/pull/108)
|
|
11
11
|
- Fix bool casting [#109](https://github.com/datagouv/csv-detective/pull/109)
|
|
12
|
+
- Handle csv.gz files [#110](https://github.com/datagouv/csv-detective/pull/110)
|
|
13
|
+
- Refactor file tests [#110](https://github.com/datagouv/csv-detective/pull/110)
|
|
12
14
|
|
|
13
15
|
## 0.7.4 (2024-11-15)
|
|
14
16
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: csv_detective
|
|
3
|
-
Version: 0.7.5.
|
|
3
|
+
Version: 0.7.5.dev1197
|
|
4
4
|
Summary: Detect CSV column content
|
|
5
5
|
Home-page: https://github.com/etalab/csv_detective
|
|
6
6
|
Author: Etalab
|
|
@@ -37,5 +37,6 @@ Dynamic: description-content-type
|
|
|
37
37
|
Dynamic: home-page
|
|
38
38
|
Dynamic: keywords
|
|
39
39
|
Dynamic: license
|
|
40
|
+
Dynamic: license-file
|
|
40
41
|
Dynamic: requires-dist
|
|
41
42
|
Dynamic: summary
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
csv_detective/__init__.py,sha256=Au4bNJ_Gi6P6o0uO4R56nYdshG7M6-7Rg_xX4whLmLI,143
|
|
2
2
|
csv_detective/cli.py,sha256=Ua7SE1wMH2uFUsTmfumh4nJk7O06okpMd2gvjUDO1II,1048
|
|
3
3
|
csv_detective/create_example.py,sha256=358e7Q7RWMrY_eEo3pUteJWmg2smFb5edJ_AzcQPrqA,8646
|
|
4
|
-
csv_detective/detection.py,sha256=
|
|
5
|
-
csv_detective/explore_csv.py,sha256=
|
|
4
|
+
csv_detective/detection.py,sha256=dqjAKR-h7QC2pbl7FEUleS15bvGHBiTleu9CtVKp_Vo,22806
|
|
5
|
+
csv_detective/explore_csv.py,sha256=HM4RlNV2eWfP9wTDvhrow-_yDMbGuE3JDvFCfmMNWyY,18087
|
|
6
6
|
csv_detective/process_text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
|
|
7
7
|
csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
|
|
8
8
|
csv_detective/schema_generation.py,sha256=D1Cq4QRajsKtY8EJSwbRTIB-T_Cb2ZpcmYtCrJ6DvJQ,13135
|
|
@@ -65,7 +65,7 @@ csv_detective/detect_fields/other/twitter/__init__.py,sha256=qbwLKsTBRFQ4PyTNVeE
|
|
|
65
65
|
csv_detective/detect_fields/other/url/__init__.py,sha256=9WaTqCglEsw_lJG_xZsBMdxJXg2yuQ92_fkX6CXWNV0,286
|
|
66
66
|
csv_detective/detect_fields/other/uuid/__init__.py,sha256=3-z0fDax29SJc57zPjNGR6DPICJu6gfuNGC5L3jh4d0,223
|
|
67
67
|
csv_detective/detect_fields/temp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
|
-
csv_detective/detect_fields/temp/date/__init__.py,sha256=
|
|
68
|
+
csv_detective/detect_fields/temp/date/__init__.py,sha256=1a_Ra9fmT4wgGMrcknXP7eN7A2QiaMF0Yjy0-BMihtA,987
|
|
69
69
|
csv_detective/detect_fields/temp/datetime/__init__.py,sha256=Ykwhk2ospjY9P0KOG0AitgqN0sld6UmhOlbMz_XGQzQ,597
|
|
70
70
|
csv_detective/detect_fields/temp/datetime_iso/__init__.py,sha256=DOfli-A7gPlZmiV2J6Ka5_yDUCaOgxis29LET_tfhA4,444
|
|
71
71
|
csv_detective/detect_fields/temp/datetime_rfc822/__init__.py,sha256=JtUzg3BXYd-XJMLGxQ0P1OAJGOQ7DlYMD4fCU9yndg0,511
|
|
@@ -127,18 +127,18 @@ csv_detective/detect_labels/temp/date/__init__.py,sha256=-R7VqlryozelSn4wH_7w9x6
|
|
|
127
127
|
csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=Ih9l56nBcdmGLyWDavVUWuUUuVZBz9QUDE1hHzADvVg,1157
|
|
128
128
|
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=DQ_h4uDW1e6qu2rATEhgGKw6O-vVi7HbDhbEDDCT9uY,1175
|
|
129
129
|
csv_detective/detect_labels/temp/year/__init__.py,sha256=zPF_mvhzhXMAlHPAskS8mhuxjLj2AlKpV4ss8Q4tDms,1150
|
|
130
|
-
csv_detective-0.7.5.
|
|
131
|
-
csv_detective-0.7.5.
|
|
132
|
-
csv_detective-0.7.5.
|
|
130
|
+
csv_detective-0.7.5.dev1197.data/data/share/csv_detective/CHANGELOG.md,sha256=YlXiPqHlJv23g6HfqEXzic6y14IfPWUoz5ADOis0YeY,7528
|
|
131
|
+
csv_detective-0.7.5.dev1197.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
132
|
+
csv_detective-0.7.5.dev1197.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
|
|
133
|
+
csv_detective-0.7.5.dev1197.dist-info/licenses/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
133
134
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
134
135
|
tests/test_example.py,sha256=0NfChooJQlFxTo2nY5FOQIcsK4zzWA_SBmt2LwVQovY,2014
|
|
135
136
|
tests/test_fields.py,sha256=_96htvTzvM7u-W57RpOBbsacWirIm4R36PP7JhPEaYQ,11123
|
|
136
|
-
tests/test_file.py,sha256=
|
|
137
|
+
tests/test_file.py,sha256=w-nKXnm8A5l5_MAtA6E99ouTefOkU38B6jMJVyBHr50,7858
|
|
137
138
|
tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
|
|
138
139
|
tests/test_structure.py,sha256=SVsnluVoIIprYw_67I1_gB3cp9m1wlO8C7SpdsLW8cM,1161
|
|
139
|
-
csv_detective-0.7.5.
|
|
140
|
-
csv_detective-0.7.5.
|
|
141
|
-
csv_detective-0.7.5.
|
|
142
|
-
csv_detective-0.7.5.
|
|
143
|
-
csv_detective-0.7.5.
|
|
144
|
-
csv_detective-0.7.5.dev1180.dist-info/RECORD,,
|
|
140
|
+
csv_detective-0.7.5.dev1197.dist-info/METADATA,sha256=2B1bE17lCw02QHnXyk_2Rt9M-fcN8J_RlUlkKzNL4tM,1386
|
|
141
|
+
csv_detective-0.7.5.dev1197.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
142
|
+
csv_detective-0.7.5.dev1197.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
143
|
+
csv_detective-0.7.5.dev1197.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
|
|
144
|
+
csv_detective-0.7.5.dev1197.dist-info/RECORD,,
|
tests/test_file.py
CHANGED
|
@@ -69,7 +69,7 @@ def test_profile_output_on_file():
|
|
|
69
69
|
assert output["profile"]["GEO_INFO"]["nb_distinct"] == 1
|
|
70
70
|
|
|
71
71
|
|
|
72
|
-
def
|
|
72
|
+
def test_profile_with_num_rows():
|
|
73
73
|
with pytest.raises(ValueError):
|
|
74
74
|
routine(
|
|
75
75
|
csv_file_path="tests/a_test_file.csv",
|
|
@@ -131,52 +131,37 @@ def test_schema_on_file():
|
|
|
131
131
|
assert is_column_reg
|
|
132
132
|
|
|
133
133
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
)
|
|
141
|
-
assert _['engine'] == 'odf'
|
|
142
|
-
|
|
134
|
+
params_csv = [
|
|
135
|
+
("csv_file", {"engine": None, "sheet_name": None}),
|
|
136
|
+
("file.csv.gz", {"engine": None, "sheet_name": None, "separator": ",", "columns.len": 3}),
|
|
137
|
+
]
|
|
138
|
+
params_others = [
|
|
139
|
+
("file.ods", {"engine": "odf"}),
|
|
143
140
|
# this is a "tricked" xls file that is actually read as odf
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
)
|
|
150
|
-
assert _['engine'] == 'odf'
|
|
141
|
+
("file.xls", {"engine": "odf"}),
|
|
142
|
+
# this file has an empty first row; check if the sheet we consider is the largest
|
|
143
|
+
("file.xlsx", {"engine": "openpyxl", "header_row_idx": 1, "sheet_name": "REI_1987"}),
|
|
144
|
+
("xlsx_file", {"engine": "openpyxl"}),
|
|
145
|
+
]
|
|
151
146
|
|
|
152
|
-
_ = routine(
|
|
153
|
-
csv_file_path="tests/file.xlsx",
|
|
154
|
-
num_rows=-1,
|
|
155
|
-
output_profile=False,
|
|
156
|
-
save_results=False,
|
|
157
|
-
)
|
|
158
|
-
assert _['engine'] == 'openpyxl'
|
|
159
|
-
# this file has an empty first row
|
|
160
|
-
assert _['header_row_idx'] == 1
|
|
161
|
-
# check if the sheet we consider is the largest
|
|
162
|
-
assert _['sheet_name'] == 'REI_1987'
|
|
163
147
|
|
|
148
|
+
@pytest.mark.parametrize("params", params_csv + params_others)
|
|
149
|
+
def test_non_csv_files(params):
|
|
150
|
+
file_name, checks = params
|
|
164
151
|
_ = routine(
|
|
165
|
-
csv_file_path="tests/
|
|
152
|
+
csv_file_path=f"tests/{file_name}",
|
|
166
153
|
num_rows=-1,
|
|
167
154
|
output_profile=False,
|
|
168
155
|
save_results=False,
|
|
169
156
|
)
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
)
|
|
179
|
-
assert _['engine'] == 'openpyxl'
|
|
157
|
+
for k, v in checks.items():
|
|
158
|
+
if v is None:
|
|
159
|
+
assert not _.get(k)
|
|
160
|
+
elif "." in k:
|
|
161
|
+
key, func = k.split(".")
|
|
162
|
+
assert eval(func)(_[key]) == v
|
|
163
|
+
else:
|
|
164
|
+
assert _[k] == v
|
|
180
165
|
|
|
181
166
|
|
|
182
167
|
@pytest.fixture
|
|
@@ -185,21 +170,34 @@ def mocked_responses():
|
|
|
185
170
|
yield rsps
|
|
186
171
|
|
|
187
172
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
173
|
+
@pytest.mark.parametrize(
|
|
174
|
+
"params",
|
|
175
|
+
# ideally we'd like to do the same with params_others but pandas.read_excel uses urllib
|
|
176
|
+
# which doesn't support the way we mock the response, TBC
|
|
177
|
+
params_csv + [("a_test_file.csv", {"separator": ";", "header_row_idx": 2, "total_lines": 414})]
|
|
178
|
+
)
|
|
179
|
+
def test_urls(mocked_responses, params):
|
|
180
|
+
file_name, checks = params
|
|
181
|
+
url = f"http://example.com/{file_name}"
|
|
191
182
|
mocked_responses.get(
|
|
192
183
|
url,
|
|
193
|
-
body=
|
|
184
|
+
body=open(f"tests/{file_name}", "rb").read(),
|
|
194
185
|
status=200,
|
|
195
186
|
)
|
|
196
|
-
|
|
187
|
+
_ = routine(
|
|
197
188
|
csv_file_path=url,
|
|
198
189
|
num_rows=-1,
|
|
199
190
|
output_profile=False,
|
|
200
191
|
save_results=False,
|
|
201
192
|
)
|
|
202
|
-
|
|
193
|
+
for k, v in checks.items():
|
|
194
|
+
if v is None:
|
|
195
|
+
assert not _.get(k)
|
|
196
|
+
elif "." in k:
|
|
197
|
+
key, func = k.split(".")
|
|
198
|
+
assert eval(func)(_[key]) == v
|
|
199
|
+
else:
|
|
200
|
+
assert _[k] == v
|
|
203
201
|
|
|
204
202
|
|
|
205
203
|
@pytest.mark.parametrize(
|
|
File without changes
|
|
File without changes
|
{csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1197.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
{csv_detective-0.7.5.dev1180.dist-info → csv_detective-0.7.5.dev1197.dist-info}/top_level.txt
RENAMED
|
File without changes
|