csv-detective 0.8.1.dev1440__py3-none-any.whl → 0.8.1.dev1460__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/explore_csv.py +2 -0
- {csv_detective-0.8.1.dev1440.data → csv_detective-0.8.1.dev1460.data}/data/share/csv_detective/CHANGELOG.md +1 -0
- {csv_detective-0.8.1.dev1440.dist-info → csv_detective-0.8.1.dev1460.dist-info}/METADATA +1 -1
- {csv_detective-0.8.1.dev1440.dist-info → csv_detective-0.8.1.dev1460.dist-info}/RECORD +11 -11
- tests/test_file.py +39 -32
- {csv_detective-0.8.1.dev1440.data → csv_detective-0.8.1.dev1460.data}/data/share/csv_detective/LICENSE +0 -0
- {csv_detective-0.8.1.dev1440.data → csv_detective-0.8.1.dev1460.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.8.1.dev1440.dist-info → csv_detective-0.8.1.dev1460.dist-info}/WHEEL +0 -0
- {csv_detective-0.8.1.dev1440.dist-info → csv_detective-0.8.1.dev1460.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.8.1.dev1440.dist-info → csv_detective-0.8.1.dev1460.dist-info}/licenses/LICENSE +0 -0
- {csv_detective-0.8.1.dev1440.dist-info → csv_detective-0.8.1.dev1460.dist-info}/top_level.txt +0 -0
csv_detective/explore_csv.py
CHANGED
|
@@ -75,6 +75,7 @@ def routine(
|
|
|
75
75
|
analysis = detect_formats(
|
|
76
76
|
table=table,
|
|
77
77
|
analysis=analysis,
|
|
78
|
+
file_path=file_path,
|
|
78
79
|
user_input_tests=user_input_tests,
|
|
79
80
|
limited_output=limited_output,
|
|
80
81
|
skipna=skipna,
|
|
@@ -145,6 +146,7 @@ def validate_then_detect(
|
|
|
145
146
|
analysis = detect_formats(
|
|
146
147
|
table=table,
|
|
147
148
|
analysis=analysis,
|
|
149
|
+
file_path=file_path,
|
|
148
150
|
user_input_tests=user_input_tests,
|
|
149
151
|
limited_output=limited_output,
|
|
150
152
|
skipna=skipna,
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
- Refactor label testing [#119](https://github.com/datagouv/csv-detective/pull/119)
|
|
6
6
|
- Refactor repo metadata and requirements [#120](https://github.com/datagouv/csv-detective/pull/120) [#122](https://github.com/datagouv/csv-detective/pull/122)
|
|
7
7
|
- Better URL detection [#121](https://github.com/datagouv/csv-detective/pull/121)
|
|
8
|
+
- For big files, analyse on sample then validate on whole file [#124](https://github.com/datagouv/csv-detective/pull/124)
|
|
8
9
|
|
|
9
10
|
## 0.8.0 (2025-05-20)
|
|
10
11
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
csv_detective/__init__.py,sha256=fxctDlEyUexNk_ePriWu6V05xZEeirMV0v_StnEZ8vQ,165
|
|
2
2
|
csv_detective/cli.py,sha256=itooHtpyfC6DUsL_DchPKe1xo7m0MYJIp1L4R8eqoTk,1401
|
|
3
|
-
csv_detective/explore_csv.py,sha256=
|
|
3
|
+
csv_detective/explore_csv.py,sha256=YxXgaUqUNdAGsU8bC-cs_TVvSza4wc4aMJQjWRkRT5s,9144
|
|
4
4
|
csv_detective/load_tests.py,sha256=GILvfkd4OVI-72mA4nzbPlZqgcXZ4wznOhGfZ1ucWkM,2385
|
|
5
5
|
csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
|
|
6
6
|
csv_detective/utils.py,sha256=-tIs9yV7RJPGj65lQ7LjRGch6Iws9UeuIPQsd2uUUJM,1025
|
|
@@ -127,19 +127,19 @@ csv_detective/output/example.py,sha256=EdPX1iqHhIG4DsiHuYdy-J7JxOkjgUh_o2D5nrfM5
|
|
|
127
127
|
csv_detective/output/profile.py,sha256=B8YU541T_YPDezJGh4dkHckOShiwHSrZd9GS8jbmz7A,2919
|
|
128
128
|
csv_detective/output/schema.py,sha256=ZDBWDOD8IYp7rcB0_n8l9JXGIhOQ6bTZHFWfTmnNNEQ,13480
|
|
129
129
|
csv_detective/output/utils.py,sha256=HbmvCCCmFo7NJxhD_UsJIveuw-rrfhrvYckv1CJn_10,2301
|
|
130
|
-
csv_detective-0.8.1.
|
|
131
|
-
csv_detective-0.8.1.
|
|
132
|
-
csv_detective-0.8.1.
|
|
133
|
-
csv_detective-0.8.1.
|
|
130
|
+
csv_detective-0.8.1.dev1460.data/data/share/csv_detective/CHANGELOG.md,sha256=BsmO9YQAMi31co_c0I8aYRsm2m5Q5--vORWoJArdhOM,8725
|
|
131
|
+
csv_detective-0.8.1.dev1460.data/data/share/csv_detective/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
132
|
+
csv_detective-0.8.1.dev1460.data/data/share/csv_detective/README.md,sha256=gKLFmC8kuCCywS9eAhMak_JNriUWWNOsBKleAu5TIEY,8501
|
|
133
|
+
csv_detective-0.8.1.dev1460.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
|
|
134
134
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
135
135
|
tests/test_example.py,sha256=JeHxSK0IVDcSrOhSZlNGSQv4JAc_r6mzvJM8PfmLTMw,2018
|
|
136
136
|
tests/test_fields.py,sha256=d2tNvjtal6ZbO646x1GDbp_CGgp-EIcdg2SgMG72J6E,10270
|
|
137
|
-
tests/test_file.py,sha256=
|
|
137
|
+
tests/test_file.py,sha256=FWVtYHlD5uU7tPeYsqlQg6O4lpU8Ct35vddkbzhvvjA,8508
|
|
138
138
|
tests/test_labels.py,sha256=Nkr645bUewrj8hjNDKr67FQ6Sy_TID6f3E5Kfkl231M,464
|
|
139
139
|
tests/test_structure.py,sha256=bv-tjgXohvQAxwmxzH0BynFpK2TyPjcxvtIAmIRlZmA,1393
|
|
140
140
|
tests/test_validation.py,sha256=CTGonR6htxcWF9WH8MxumDD8cF45Y-G4hm94SM4lFjU,3246
|
|
141
|
-
csv_detective-0.8.1.
|
|
142
|
-
csv_detective-0.8.1.
|
|
143
|
-
csv_detective-0.8.1.
|
|
144
|
-
csv_detective-0.8.1.
|
|
145
|
-
csv_detective-0.8.1.
|
|
141
|
+
csv_detective-0.8.1.dev1460.dist-info/METADATA,sha256=Rhi872uRXV2PcYpcI64GJ9vw12TsYIEQJxf8H1srLic,10443
|
|
142
|
+
csv_detective-0.8.1.dev1460.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
143
|
+
csv_detective-0.8.1.dev1460.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
144
|
+
csv_detective-0.8.1.dev1460.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
|
|
145
|
+
csv_detective-0.8.1.dev1460.dist-info/RECORD,,
|
tests/test_file.py
CHANGED
|
@@ -1,42 +1,49 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
import pytest
|
|
3
3
|
import responses
|
|
4
|
+
from unittest.mock import patch
|
|
4
5
|
|
|
5
6
|
from csv_detective import routine
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
9
|
+
@pytest.mark.parametrize(
|
|
10
|
+
"reduce_max_rows_analysis",
|
|
11
|
+
(True, False),
|
|
12
|
+
)
|
|
13
|
+
def test_columns_output_on_file(reduce_max_rows_analysis):
|
|
14
|
+
patched = 100 if reduce_max_rows_analysis else 1e5
|
|
15
|
+
with patch("csv_detective.detection.formats.MAX_ROWS_ANALYSIS", patched):
|
|
16
|
+
output = routine(
|
|
17
|
+
file_path="tests/data/a_test_file.csv",
|
|
18
|
+
num_rows=-1,
|
|
19
|
+
output_profile=False,
|
|
20
|
+
save_results=False,
|
|
21
|
+
)
|
|
22
|
+
assert isinstance(output, dict)
|
|
23
|
+
assert output["separator"] == ";"
|
|
24
|
+
assert output["header_row_idx"] == 2
|
|
25
|
+
assert output["header"] == [
|
|
26
|
+
"NUMCOM",
|
|
27
|
+
"NOMCOM",
|
|
28
|
+
"NUMDEP",
|
|
29
|
+
"NOMDEP",
|
|
30
|
+
"NUMEPCI",
|
|
31
|
+
"NOMEPCI",
|
|
32
|
+
"TXCOUVGLO_COM_2014",
|
|
33
|
+
"TXCOUVGLO_DEP_2014",
|
|
34
|
+
"TXCOUVGLO_EPCI_2014",
|
|
35
|
+
"STRUCTURED_INFO",
|
|
36
|
+
"GEO_INFO",
|
|
37
|
+
]
|
|
38
|
+
assert output["total_lines"] == 404
|
|
39
|
+
assert output["nb_duplicates"] == 7
|
|
40
|
+
assert output["columns"]["NOMCOM"]["format"] == "commune"
|
|
41
|
+
assert output["columns"]["NOMDEP"]["format"] == "departement"
|
|
42
|
+
assert output["columns"]["NUMEPCI"]["format"] == "siren"
|
|
43
|
+
assert output["columns"]["STRUCTURED_INFO"]["python_type"] == "json"
|
|
44
|
+
assert output["columns"]["STRUCTURED_INFO"]["format"] == "json"
|
|
45
|
+
assert output["columns"]["GEO_INFO"]["python_type"] == "json"
|
|
46
|
+
assert output["columns"]["GEO_INFO"]["format"] == "json_geojson"
|
|
40
47
|
|
|
41
48
|
|
|
42
49
|
def test_profile_output_on_file():
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{csv_detective-0.8.1.dev1440.dist-info → csv_detective-0.8.1.dev1460.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{csv_detective-0.8.1.dev1440.dist-info → csv_detective-0.8.1.dev1460.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{csv_detective-0.8.1.dev1440.dist-info → csv_detective-0.8.1.dev1460.dist-info}/top_level.txt
RENAMED
|
File without changes
|