csv-detective 0.7.5.dev1197__py3-none-any.whl → 0.7.5.dev1228__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/__init__.py +1 -1
- csv_detective/detect_fields/FR/geo/adresse/__init__.py +1 -1
- csv_detective/detect_fields/FR/other/code_csp_insee/__init__.py +1 -1
- csv_detective/detect_fields/FR/other/csp_insee/__init__.py +1 -1
- csv_detective/detect_fields/FR/other/insee_ape700/__init__.py +1 -1
- csv_detective/detect_fields/FR/other/sexe/__init__.py +1 -1
- csv_detective/detect_fields/other/float/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/adresse/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_departement/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_fantoir/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_postal/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/code_region/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/commune/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/departement/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/insee_canton/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/latitude_l93/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/latitude_wgs_fr_metropole/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/longitude_l93/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/longitude_wgs_fr_metropole/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/pays/__init__.py +1 -1
- csv_detective/detect_labels/FR/geo/region/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/code_csp_insee/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/code_rna/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/code_waldec/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/csp_insee/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/date_fr/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/insee_ape700/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/sexe/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/siren/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/siret/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/tel_fr/__init__.py +1 -1
- csv_detective/detect_labels/FR/other/uai/__init__.py +1 -1
- csv_detective/detect_labels/FR/temp/jour_de_la_semaine/__init__.py +1 -1
- csv_detective/detect_labels/FR/temp/mois_de_annee/__init__.py +1 -1
- csv_detective/detect_labels/geo/iso_country_code_alpha2/__init__.py +1 -1
- csv_detective/detect_labels/geo/iso_country_code_alpha3/__init__.py +1 -1
- csv_detective/detect_labels/geo/iso_country_code_numeric/__init__.py +1 -1
- csv_detective/detect_labels/geo/json_geojson/__init__.py +1 -1
- csv_detective/detect_labels/geo/latitude_wgs/__init__.py +1 -1
- csv_detective/detect_labels/geo/latlon_wgs/__init__.py +1 -1
- csv_detective/detect_labels/geo/longitude_wgs/__init__.py +1 -1
- csv_detective/detect_labels/other/booleen/__init__.py +1 -1
- csv_detective/detect_labels/other/email/__init__.py +1 -1
- csv_detective/detect_labels/other/float/__init__.py +1 -1
- csv_detective/detect_labels/other/int/__init__.py +1 -1
- csv_detective/detect_labels/other/mongo_object_id/__init__.py +1 -1
- csv_detective/detect_labels/other/twitter/__init__.py +1 -1
- csv_detective/detect_labels/other/url/__init__.py +1 -1
- csv_detective/detect_labels/other/uuid/__init__.py +1 -1
- csv_detective/detect_labels/temp/date/__init__.py +1 -1
- csv_detective/detect_labels/temp/datetime_iso/__init__.py +1 -1
- csv_detective/detect_labels/temp/datetime_rfc822/__init__.py +1 -1
- csv_detective/detect_labels/temp/year/__init__.py +1 -1
- csv_detective/detection/columns.py +89 -0
- csv_detective/detection/encoding.py +27 -0
- csv_detective/detection/engine.py +46 -0
- csv_detective/detection/headers.py +32 -0
- csv_detective/detection/rows.py +18 -0
- csv_detective/detection/separator.py +44 -0
- csv_detective/detection/variables.py +98 -0
- csv_detective/explore_csv.py +40 -124
- csv_detective/output/dataframe.py +55 -0
- csv_detective/{create_example.py → output/example.py} +10 -9
- csv_detective/output/profile.py +87 -0
- csv_detective/{schema_generation.py → output/schema.py} +344 -343
- csv_detective/output/utils.py +51 -0
- csv_detective/parsing/columns.py +141 -0
- csv_detective/parsing/compression.py +11 -0
- csv_detective/parsing/csv.py +55 -0
- csv_detective/parsing/excel.py +169 -0
- csv_detective/parsing/load.py +97 -0
- csv_detective/utils.py +10 -236
- {csv_detective-0.7.5.dev1197.data → csv_detective-0.7.5.dev1228.data}/data/share/csv_detective/CHANGELOG.md +3 -0
- {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1228.dist-info}/METADATA +1 -1
- {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1228.dist-info}/RECORD +85 -71
- tests/test_fields.py +8 -7
- tests/test_file.py +15 -14
- csv_detective/detection.py +0 -633
- /csv_detective/{process_text.py → parsing/text.py} +0 -0
- {csv_detective-0.7.5.dev1197.data → csv_detective-0.7.5.dev1228.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1197.data → csv_detective-0.7.5.dev1228.data}/data/share/csv_detective/README.md +0 -0
- {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1228.dist-info}/WHEEL +0 -0
- {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1228.dist-info}/entry_points.txt +0 -0
- {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1228.dist-info}/licenses/LICENSE.AGPL.txt +0 -0
- {csv_detective-0.7.5.dev1197.dist-info → csv_detective-0.7.5.dev1228.dist-info}/top_level.txt +0 -0
tests/test_file.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
|
|
1
|
+
import pandas as pd
|
|
2
2
|
import pytest
|
|
3
3
|
import responses
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
from csv_detective import routine
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
def test_columns_output_on_file():
|
|
8
9
|
output = routine(
|
|
9
|
-
|
|
10
|
+
file_path="tests/data/a_test_file.csv",
|
|
10
11
|
num_rows=-1,
|
|
11
12
|
output_profile=False,
|
|
12
13
|
save_results=False,
|
|
@@ -40,7 +41,7 @@ def test_columns_output_on_file():
|
|
|
40
41
|
|
|
41
42
|
def test_profile_output_on_file():
|
|
42
43
|
output = routine(
|
|
43
|
-
|
|
44
|
+
file_path="tests/data/a_test_file.csv",
|
|
44
45
|
num_rows=-1,
|
|
45
46
|
output_profile=True,
|
|
46
47
|
save_results=False,
|
|
@@ -72,7 +73,7 @@ def test_profile_output_on_file():
|
|
|
72
73
|
def test_profile_with_num_rows():
|
|
73
74
|
with pytest.raises(ValueError):
|
|
74
75
|
routine(
|
|
75
|
-
|
|
76
|
+
file_path="tests/data/a_test_file.csv",
|
|
76
77
|
num_rows=50,
|
|
77
78
|
output_profile=True,
|
|
78
79
|
save_results=False,
|
|
@@ -85,7 +86,7 @@ def test_exception_different_number_of_columns():
|
|
|
85
86
|
"""
|
|
86
87
|
with pytest.raises(ValueError):
|
|
87
88
|
routine(
|
|
88
|
-
|
|
89
|
+
file_path="tests/data/c_test_file.csv",
|
|
89
90
|
num_rows=-1,
|
|
90
91
|
output_profile=True,
|
|
91
92
|
save_results=False,
|
|
@@ -94,7 +95,7 @@ def test_exception_different_number_of_columns():
|
|
|
94
95
|
|
|
95
96
|
def test_code_dep_reg_on_file():
|
|
96
97
|
output = routine(
|
|
97
|
-
|
|
98
|
+
file_path="tests/data/b_test_file.csv",
|
|
98
99
|
num_rows=-1,
|
|
99
100
|
output_profile=False,
|
|
100
101
|
save_results=False,
|
|
@@ -106,7 +107,7 @@ def test_code_dep_reg_on_file():
|
|
|
106
107
|
|
|
107
108
|
def test_schema_on_file():
|
|
108
109
|
output = routine(
|
|
109
|
-
|
|
110
|
+
file_path="tests/data/b_test_file.csv",
|
|
110
111
|
num_rows=-1,
|
|
111
112
|
output_schema=True,
|
|
112
113
|
save_results=False,
|
|
@@ -149,7 +150,7 @@ params_others = [
|
|
|
149
150
|
def test_non_csv_files(params):
|
|
150
151
|
file_name, checks = params
|
|
151
152
|
_ = routine(
|
|
152
|
-
|
|
153
|
+
file_path=f"tests/data/{file_name}",
|
|
153
154
|
num_rows=-1,
|
|
154
155
|
output_profile=False,
|
|
155
156
|
save_results=False,
|
|
@@ -181,11 +182,11 @@ def test_urls(mocked_responses, params):
|
|
|
181
182
|
url = f"http://example.com/{file_name}"
|
|
182
183
|
mocked_responses.get(
|
|
183
184
|
url,
|
|
184
|
-
body=open(f"tests/{file_name}", "rb").read(),
|
|
185
|
+
body=open(f"tests/data/{file_name}", "rb").read(),
|
|
185
186
|
status=200,
|
|
186
187
|
)
|
|
187
188
|
_ = routine(
|
|
188
|
-
|
|
189
|
+
file_path=url,
|
|
189
190
|
num_rows=-1,
|
|
190
191
|
output_profile=False,
|
|
191
192
|
save_results=False,
|
|
@@ -211,7 +212,7 @@ def test_nan_values(expected_type):
|
|
|
211
212
|
# if skipping NaN, the column contains only ints
|
|
212
213
|
skipna, expected_type = expected_type
|
|
213
214
|
output = routine(
|
|
214
|
-
|
|
215
|
+
file_path="tests/data/b_test_file.csv",
|
|
215
216
|
num_rows=-1,
|
|
216
217
|
save_results=False,
|
|
217
218
|
skipna=skipna,
|
|
@@ -221,7 +222,7 @@ def test_nan_values(expected_type):
|
|
|
221
222
|
|
|
222
223
|
def test_output_df():
|
|
223
224
|
output, df = routine(
|
|
224
|
-
|
|
225
|
+
file_path="tests/data/b_test_file.csv",
|
|
225
226
|
num_rows=-1,
|
|
226
227
|
output_profile=False,
|
|
227
228
|
save_results=False,
|
|
@@ -249,7 +250,7 @@ def test_cast_json(mocked_responses, cast_json):
|
|
|
249
250
|
status=200,
|
|
250
251
|
)
|
|
251
252
|
analysis, df = routine(
|
|
252
|
-
|
|
253
|
+
file_path='http://example.com/test.csv',
|
|
253
254
|
num_rows=-1,
|
|
254
255
|
output_profile=False,
|
|
255
256
|
save_results=False,
|