csv-detective 0.7.5.dev1056__py3-none-any.whl → 0.7.5.dev1069__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/explore_csv.py +6 -1
- {csv_detective-0.7.5.dev1056.data → csv_detective-0.7.5.dev1069.data}/data/share/csv_detective/CHANGELOG.md +1 -0
- {csv_detective-0.7.5.dev1056.dist-info → csv_detective-0.7.5.dev1069.dist-info}/METADATA +1 -2
- {csv_detective-0.7.5.dev1056.dist-info → csv_detective-0.7.5.dev1069.dist-info}/RECORD +11 -11
- {csv_detective-0.7.5.dev1056.dist-info → csv_detective-0.7.5.dev1069.dist-info}/WHEEL +1 -1
- tests/test_file.py +14 -0
- {csv_detective-0.7.5.dev1056.data → csv_detective-0.7.5.dev1069.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1056.data → csv_detective-0.7.5.dev1069.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.7.5.dev1056.dist-info → csv_detective-0.7.5.dev1069.dist-info}/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1056.dist-info → csv_detective-0.7.5.dev1069.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.7.5.dev1056.dist-info → csv_detective-0.7.5.dev1069.dist-info}/top_level.txt +0 -0
csv_detective/explore_csv.py
CHANGED
|
@@ -12,6 +12,7 @@ import logging
|
|
|
12
12
|
from time import time
|
|
13
13
|
import requests
|
|
14
14
|
from io import StringIO
|
|
15
|
+
import pandas as pd
|
|
15
16
|
|
|
16
17
|
# flake8: noqa
|
|
17
18
|
from csv_detective import detect_fields, detect_labels
|
|
@@ -108,9 +109,10 @@ def routine(
|
|
|
108
109
|
skipna: bool = True,
|
|
109
110
|
output_profile: bool = False,
|
|
110
111
|
output_schema: bool = False,
|
|
112
|
+
output_df: bool = False,
|
|
111
113
|
verbose: bool = False,
|
|
112
114
|
sheet_name: Union[str, int] = None,
|
|
113
|
-
):
|
|
115
|
+
) -> Union[dict, tuple[dict, pd.DataFrame]]:
|
|
114
116
|
"""Returns a dict with information about the csv table and possible
|
|
115
117
|
column contents.
|
|
116
118
|
|
|
@@ -123,6 +125,7 @@ def routine(
|
|
|
123
125
|
save_results: whether or not to save the results in a json file, or the path where to dump the output
|
|
124
126
|
output_profile: whether or not to add the 'profile' field to the output
|
|
125
127
|
output_schema: whether or not to add the 'schema' field to the output (tableschema)
|
|
128
|
+
output_df: whether or not to return the loaded DataFrame along with the analysis report
|
|
126
129
|
verbose: whether or not to print process logs in console
|
|
127
130
|
sheet_name: if reading multi-sheet file (xls-like), which sheet to consider
|
|
128
131
|
skipna: whether to keep NaN (empty cells) for tests
|
|
@@ -353,6 +356,8 @@ def routine(
|
|
|
353
356
|
f'Routine completed in {round(time() - start_routine, 3)}s',
|
|
354
357
|
time() - start_routine
|
|
355
358
|
)
|
|
359
|
+
if output_df:
|
|
360
|
+
return return_dict, table
|
|
356
361
|
return return_dict
|
|
357
362
|
|
|
358
363
|
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
## Current (in progress)
|
|
4
4
|
|
|
5
5
|
- New function that creates a csv from a list of fields and constraints, or from a TableSchema [#101](https://github.com/datagouv/csv-detective/pull/101)
|
|
6
|
+
- Enable outputing loaded dataframe [#102](https://github.com/datagouv/csv-detective/pull/102)
|
|
6
7
|
|
|
7
8
|
## 0.7.4 (2024-11-15)
|
|
8
9
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: csv_detective
|
|
3
|
-
Version: 0.7.5.
|
|
3
|
+
Version: 0.7.5.dev1069
|
|
4
4
|
Summary: Detect CSV column content
|
|
5
5
|
Home-page: https://github.com/etalab/csv_detective
|
|
6
6
|
Author: Etalab
|
|
@@ -29,4 +29,3 @@ Requires-Dist: python-magic==0.4.27
|
|
|
29
29
|
Requires-Dist: frformat==0.4.0
|
|
30
30
|
Requires-Dist: faker==33.0.0
|
|
31
31
|
Requires-Dist: rstr==3.2.2
|
|
32
|
-
|
|
@@ -2,7 +2,7 @@ csv_detective/__init__.py,sha256=Au4bNJ_Gi6P6o0uO4R56nYdshG7M6-7Rg_xX4whLmLI,143
|
|
|
2
2
|
csv_detective/cli.py,sha256=Ua7SE1wMH2uFUsTmfumh4nJk7O06okpMd2gvjUDO1II,1048
|
|
3
3
|
csv_detective/create_example.py,sha256=358e7Q7RWMrY_eEo3pUteJWmg2smFb5edJ_AzcQPrqA,8646
|
|
4
4
|
csv_detective/detection.py,sha256=AuXlPOZfzqznZY2ybAAgaXIq6qVITYd3MXf2CoigI3I,22097
|
|
5
|
-
csv_detective/explore_csv.py,sha256=
|
|
5
|
+
csv_detective/explore_csv.py,sha256=6kGl1E061_CefAdei-wgwafZT1g8oKWg0eE1D5zWTOk,17216
|
|
6
6
|
csv_detective/process_text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
|
|
7
7
|
csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
|
|
8
8
|
csv_detective/schema_generation.py,sha256=D1Cq4QRajsKtY8EJSwbRTIB-T_Cb2ZpcmYtCrJ6DvJQ,13135
|
|
@@ -126,18 +126,18 @@ csv_detective/detect_labels/temp/date/__init__.py,sha256=GrIbo64WVM3hi7ShBRKKyKU
|
|
|
126
126
|
csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=Ih9l56nBcdmGLyWDavVUWuUUuVZBz9QUDE1hHzADvVg,1157
|
|
127
127
|
csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=DQ_h4uDW1e6qu2rATEhgGKw6O-vVi7HbDhbEDDCT9uY,1175
|
|
128
128
|
csv_detective/detect_labels/temp/year/__init__.py,sha256=zPF_mvhzhXMAlHPAskS8mhuxjLj2AlKpV4ss8Q4tDms,1150
|
|
129
|
-
csv_detective-0.7.5.
|
|
130
|
-
csv_detective-0.7.5.
|
|
131
|
-
csv_detective-0.7.5.
|
|
129
|
+
csv_detective-0.7.5.dev1069.data/data/share/csv_detective/CHANGELOG.md,sha256=QbZKEEWbkt7a-TMHB6CpzzliDqv3BLECa_zkJgZOFkY,6820
|
|
130
|
+
csv_detective-0.7.5.dev1069.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
131
|
+
csv_detective-0.7.5.dev1069.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
|
|
132
132
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
133
133
|
tests/test_example.py,sha256=0NfChooJQlFxTo2nY5FOQIcsK4zzWA_SBmt2LwVQovY,2014
|
|
134
134
|
tests/test_fields.py,sha256=kXel-hiyQYrJ3OLmwUMg1K3DKbbwBLvUplxZWxpp18I,10605
|
|
135
|
-
tests/test_file.py,sha256=
|
|
135
|
+
tests/test_file.py,sha256=oQITvAxdcrqDby2wWSh_X9TCwFqdFaP34XNy92ibXyg,6725
|
|
136
136
|
tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
|
|
137
137
|
tests/test_structure.py,sha256=SVsnluVoIIprYw_67I1_gB3cp9m1wlO8C7SpdsLW8cM,1161
|
|
138
|
-
csv_detective-0.7.5.
|
|
139
|
-
csv_detective-0.7.5.
|
|
140
|
-
csv_detective-0.7.5.
|
|
141
|
-
csv_detective-0.7.5.
|
|
142
|
-
csv_detective-0.7.5.
|
|
143
|
-
csv_detective-0.7.5.
|
|
138
|
+
csv_detective-0.7.5.dev1069.dist-info/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
|
|
139
|
+
csv_detective-0.7.5.dev1069.dist-info/METADATA,sha256=sqa9hWFoiOj9-MpBX1uuwOl5qyPCSoca3wo0RrglmNY,1145
|
|
140
|
+
csv_detective-0.7.5.dev1069.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
141
|
+
csv_detective-0.7.5.dev1069.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
|
|
142
|
+
csv_detective-0.7.5.dev1069.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
|
|
143
|
+
csv_detective-0.7.5.dev1069.dist-info/RECORD,,
|
tests/test_file.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from csv_detective import routine
|
|
2
2
|
import pytest
|
|
3
3
|
import responses
|
|
4
|
+
import pandas as pd
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def test_columns_output_on_file():
|
|
@@ -218,3 +219,16 @@ def test_nan_values(expected_type):
|
|
|
218
219
|
skipna=skipna,
|
|
219
220
|
)
|
|
220
221
|
assert output["columns"]["partly_empty"]["python_type"] == expected_type
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def test_output_df():
|
|
225
|
+
output, df = routine(
|
|
226
|
+
csv_file_path="tests/b_test_file.csv",
|
|
227
|
+
num_rows=-1,
|
|
228
|
+
output_profile=False,
|
|
229
|
+
save_results=False,
|
|
230
|
+
output_df=True,
|
|
231
|
+
)
|
|
232
|
+
assert isinstance(output, dict)
|
|
233
|
+
assert isinstance(df, pd.DataFrame)
|
|
234
|
+
assert len(df) == 6
|
|
File without changes
|
|
File without changes
|
{csv_detective-0.7.5.dev1056.dist-info → csv_detective-0.7.5.dev1069.dist-info}/LICENSE.AGPL.txt
RENAMED
|
File without changes
|
{csv_detective-0.7.5.dev1056.dist-info → csv_detective-0.7.5.dev1069.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{csv_detective-0.7.5.dev1056.dist-info → csv_detective-0.7.5.dev1069.dist-info}/top_level.txt
RENAMED
|
File without changes
|