csv-detective 0.7.5.dev1277__py3-none-any.whl → 0.7.5.dev1298__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. csv_detective/__init__.py +1 -1
  2. csv_detective/detect_fields/__init__.py +6 -4
  3. csv_detective/detect_fields/geo/latlon_wgs/__init__.py +7 -7
  4. csv_detective/detect_fields/other/float/__init__.py +4 -4
  5. csv_detective/detect_fields/other/money/__init__.py +11 -0
  6. csv_detective/detect_fields/other/percent/__init__.py +9 -0
  7. csv_detective/detection/formats.py +145 -0
  8. csv_detective/explore_csv.py +94 -222
  9. csv_detective/load_tests.py +62 -0
  10. csv_detective/output/__init__.py +64 -0
  11. csv_detective/output/dataframe.py +0 -0
  12. csv_detective/output/example.py +77 -77
  13. csv_detective/output/profile.py +0 -0
  14. csv_detective/output/schema.py +0 -0
  15. csv_detective/output/utils.py +0 -0
  16. csv_detective/utils.py +2 -0
  17. csv_detective/validate.py +70 -0
  18. {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/CHANGELOG.md +2 -0
  19. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/METADATA +1 -1
  20. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/RECORD +27 -20
  21. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/WHEEL +1 -1
  22. tests/test_example.py +10 -10
  23. tests/test_fields.py +270 -415
  24. tests/test_file.py +19 -9
  25. tests/test_structure.py +6 -0
  26. tests/test_validation.py +18 -0
  27. {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/LICENSE.AGPL.txt +0 -0
  28. {csv_detective-0.7.5.dev1277.data → csv_detective-0.7.5.dev1298.data}/data/share/csv_detective/README.md +0 -0
  29. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/entry_points.txt +0 -0
  30. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/licenses/LICENSE.AGPL.txt +0 -0
  31. {csv_detective-0.7.5.dev1277.dist-info → csv_detective-0.7.5.dev1298.dist-info}/top_level.txt +0 -0
csv_detective/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from .explore_csv import routine, routine_minio # noqa
1
+ from .explore_csv import routine, routine_minio, validate_then_detect # noqa
2
2
  from .output.example import create_example_csv_file # noqa
3
3
 
4
4
  __version__ = '0.7.5.dev'
@@ -10,19 +10,21 @@ from .FR.other import (
10
10
  insee_ape700,
11
11
  date_fr,
12
12
  code_waldec,
13
- code_rna
13
+ code_rna,
14
14
  )
15
15
 
16
16
  from .other import (
17
17
  email,
18
18
  url,
19
19
  booleen,
20
+ money,
20
21
  mongo_object_id,
22
+ percent,
21
23
  twitter,
22
24
  float,
23
25
  int,
24
26
  uuid,
25
- json
27
+ json,
26
28
  )
27
29
 
28
30
  from .FR.geo import (
@@ -40,7 +42,7 @@ from .FR.geo import (
40
42
  code_region,
41
43
  latitude_l93,
42
44
  longitude_l93,
43
- insee_canton
45
+ insee_canton,
44
46
  )
45
47
 
46
48
  from .geo import (
@@ -50,7 +52,7 @@ from .geo import (
50
52
  latitude_wgs,
51
53
  longitude_wgs,
52
54
  latlon_wgs,
53
- json_geojson
55
+ json_geojson,
54
56
  )
55
57
 
56
58
  from .FR.temp import jour_de_la_semaine, mois_de_annee
@@ -1,13 +1,13 @@
1
- import re
1
+ from ..latitude_wgs import _is as is_lat
2
+ from ..longitude_wgs import _is as is_lon
2
3
 
3
- PROPORTION = 0.9
4
+ PROPORTION = 1
4
5
 
5
6
 
6
7
  def _is(val):
7
8
  '''Renvoie True si val peut etre une latitude,longitude'''
8
9
 
9
- return isinstance(val, str) and bool(
10
- re.match(
11
- r'^\[?[\+\-]?[0-8]?\d\.\d* ?, ?[\+\-]?(1[0-7]\d|\d{1,2})\.\d+\]?$', val
12
- )
13
- )
10
+ if not isinstance(val, str) or val.count(",") != 1:
11
+ return False
12
+ lat, lon = val.split(",")
13
+ return is_lat(lat) and is_lon(lon.replace(" ", ""))
@@ -2,16 +2,16 @@ PROPORTION = 1
2
2
 
3
3
 
4
4
  def float_casting(val: str) -> float:
5
- return float(val.replace(',', '.'))
5
+ return float(val.replace(",", "."))
6
6
 
7
7
 
8
8
  def _is(val):
9
- '''Detects floats, assuming that tables will not have scientific
10
- notations (3e6) or "+" in the string. "-" is still accepted.'''
9
+ """Detects floats, assuming that tables will not have scientific
10
+ notations (3e6) or "+" in the string. "-" is still accepted."""
11
11
  try:
12
12
  if (
13
13
  not isinstance(val, str)
14
- or any([k in val for k in ['_', '+', 'e', 'E']])
14
+ or any([k in val for k in ["_", "+", "e", "E"]])
15
15
  or (val.startswith("0") and len(val) > 1 and val[1] not in [".", ","])
16
16
  ):
17
17
  return False
@@ -0,0 +1,11 @@
1
+ from ..float import _is as is_float
2
+
3
+ currencies = set(["€", "$", "£", "¥"])
4
+
5
+ PROPORTION = 0.8
6
+
7
+
8
+ def _is(val: str):
9
+ if not isinstance(val, str) or val[-1] not in currencies:
10
+ return False
11
+ return is_float(val[:-1])
@@ -0,0 +1,9 @@
1
+ from ..float import _is as is_float
2
+
3
+ PROPORTION = 0.8
4
+
5
+
6
+ def _is(val: str):
7
+ if not isinstance(val, str) or val[-1] != "%":
8
+ return False
9
+ return is_float(val[:-1])
@@ -0,0 +1,145 @@
1
+ from collections import defaultdict
2
+ from typing import Union
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ from csv_detective.detection.variables import (
7
+ detect_categorical_variable,
8
+ # detect_continuous_variable,
9
+ )
10
+ from csv_detective.load_tests import return_all_tests
11
+ from csv_detective.output.utils import prepare_output_dict
12
+ from csv_detective.parsing.columns import test_col, test_label
13
+
14
+
15
+ def detect_formats(
16
+ table: pd.DataFrame,
17
+ analysis: dict,
18
+ user_input_tests: Union[str, list[str]] = "ALL",
19
+ limited_output: bool = True,
20
+ skipna: bool = True,
21
+ verbose: bool = False,
22
+ ):
23
+
24
+ if table.empty:
25
+ res_categorical = []
26
+ # res_continuous = []
27
+ else:
28
+ # Detects columns that are categorical
29
+ res_categorical, categorical_mask = detect_categorical_variable(table, verbose=verbose)
30
+ res_categorical = list(res_categorical)
31
+ # Detect columns that are continuous (we already know the categorical) :
32
+ # we don't need this for now, cuts processing time
33
+ # res_continuous = list(
34
+ # detect_continuous_variable(table.iloc[:, ~categorical_mask.values], verbose=verbose)
35
+ # )
36
+
37
+ analysis.update({
38
+ "categorical": res_categorical,
39
+ # "continuous": res_continuous,
40
+ })
41
+
42
+ # list testing to be performed
43
+ all_tests_fields = return_all_tests(
44
+ user_input_tests, detect_type="detect_fields"
45
+ ) # list all tests for the fields
46
+ all_tests_labels = return_all_tests(
47
+ user_input_tests, detect_type="detect_labels"
48
+ ) # list all tests for the labels
49
+
50
+ # if no testing then return
51
+ if not all_tests_fields and not all_tests_labels:
52
+ return analysis
53
+
54
+ # Perform testing on fields
55
+ scores_table_fields = test_col(table, all_tests_fields, limited_output, skipna=skipna, verbose=verbose)
56
+ analysis["columns_fields"] = prepare_output_dict(scores_table_fields, limited_output)
57
+
58
+ # Perform testing on labels
59
+ scores_table_labels = test_label(table, all_tests_labels, limited_output, verbose=verbose)
60
+ analysis["columns_labels"] = prepare_output_dict(scores_table_labels, limited_output)
61
+
62
+ # Multiply the results of the fields by 1 + 0.5 * the results of the labels.
63
+ # This is because the fields are more important than the labels and yields a max
64
+ # of 1.5 for the final score.
65
+ scores_table = scores_table_fields * (
66
+ 1
67
+ + scores_table_labels.reindex(
68
+ index=scores_table_fields.index, fill_value=0
69
+ ).values / 2
70
+ )
71
+
72
+ # To reduce false positives: ensure these formats are detected only if the label yields
73
+ # a detection (skipping the ones that have been excluded by the users).
74
+ formats_with_mandatory_label = [
75
+ f for f in [
76
+ "code_departement",
77
+ "code_commune_insee",
78
+ "code_postal",
79
+ "latitude_wgs",
80
+ "longitude_wgs",
81
+ "latitude_wgs_fr_metropole",
82
+ "longitude_wgs_fr_metropole",
83
+ "latitude_l93",
84
+ "longitude_l93",
85
+ ] if f in scores_table.index
86
+ ]
87
+ scores_table.loc[formats_with_mandatory_label, :] = np.where(
88
+ scores_table_labels.loc[formats_with_mandatory_label, :],
89
+ scores_table.loc[formats_with_mandatory_label, :],
90
+ 0,
91
+ )
92
+ analysis["columns"] = prepare_output_dict(scores_table, limited_output)
93
+
94
+ metier_to_python_type = {
95
+ "booleen": "bool",
96
+ "int": "int",
97
+ "float": "float",
98
+ "string": "string",
99
+ "json": "json",
100
+ "json_geojson": "json",
101
+ "datetime": "datetime",
102
+ "datetime_iso": "datetime",
103
+ "datetime_rfc822": "datetime",
104
+ "date": "date",
105
+ "latitude": "float",
106
+ "latitude_l93": "float",
107
+ "latitude_wgs": "float",
108
+ "latitude_wgs_fr_metropole": "float",
109
+ "longitude": "float",
110
+ "longitude_l93": "float",
111
+ "longitude_wgs": "float",
112
+ "longitude_wgs_fr_metropole": "float",
113
+ }
114
+
115
+ if not limited_output:
116
+ for detection_method in ["columns_fields", "columns_labels", "columns"]:
117
+ analysis[detection_method] = {
118
+ col_name: [
119
+ {
120
+ "python_type": metier_to_python_type.get(
121
+ detection["format"], "string"
122
+ ),
123
+ **detection,
124
+ }
125
+ for detection in detections
126
+ ]
127
+ for col_name, detections in analysis[detection_method].items()
128
+ }
129
+ else:
130
+ for detection_method in ["columns_fields", "columns_labels", "columns"]:
131
+ analysis[detection_method] = {
132
+ col_name: {
133
+ "python_type": metier_to_python_type.get(
134
+ detection["format"], "string"
135
+ ),
136
+ **detection,
137
+ }
138
+ for col_name, detection in analysis[detection_method].items()
139
+ }
140
+
141
+ # Add detection with formats as keys
142
+ analysis["formats"] = defaultdict(list)
143
+ for header, col_metadata in analysis["columns"].items():
144
+ analysis["formats"][col_metadata["format"]].append(header)
145
+ return analysis
@@ -1,4 +1,3 @@
1
- from collections import defaultdict
2
1
  import json
3
2
  import logging
4
3
  import os
@@ -6,80 +5,16 @@ import tempfile
6
5
  from time import time
7
6
  from typing import Optional, Union
8
7
 
9
- import numpy as np
10
8
  import pandas as pd
11
9
 
12
- # flake8: noqa
13
- from csv_detective import detect_fields, detect_labels
14
- from .detection.variables import (
15
- detect_categorical_variable,
16
- # detect_continuous_variable,
17
- )
18
- from .output.dataframe import cast_df
19
- from .output.profile import create_profile
20
- from .output.schema import generate_table_schema
21
- from .output.utils import prepare_output_dict
10
+ from .detection.formats import detect_formats
11
+ from .output import generate_output, generate_table_schema
22
12
  from .parsing.load import load_file
23
- from .parsing.columns import test_col, test_label
24
13
  from .s3_utils import download_from_minio, upload_to_minio
25
14
  from .utils import display_logs_depending_process_time, is_url
15
+ from .validate import validate
26
16
 
27
-
28
- def get_all_packages(detect_type: str) -> list:
29
- root_dir = os.path.dirname(os.path.abspath(__file__)) + "/" + detect_type
30
- modules = []
31
- for dirpath, _, filenames in os.walk(root_dir):
32
- for filename in filenames:
33
- file = os.path.join(dirpath, filename).replace(root_dir, "")
34
- if file.endswith("__init__.py"):
35
- module = (
36
- file.replace("__init__.py", "")
37
- .replace("/", ".").replace("\\", ".")[:-1]
38
- )
39
- if module:
40
- modules.append(detect_type + module)
41
- return modules
42
-
43
-
44
- def return_all_tests(
45
- user_input_tests: Union[str, list],
46
- detect_type: str,
47
- ) -> list:
48
- """
49
- returns all tests that have a method _is and are listed in the user_input_tests
50
- the function can select a sub_package from csv_detective
51
- user_input_tests may look like this:
52
- - "ALL": all possible tests are made
53
- - "FR.other.siren" (or any other path-like string to one of the tests, or a group of tests, like "FR.geo"):
54
- this specifc (group of) test(s) only
55
- - ["FR.temp.mois_de_annee", "geo", ...]: only the specified tests will be made ; you may also skip
56
- specific (groups of) tests by add "-" at the start (e.g "-temp.date")
57
- """
58
- assert detect_type in ["detect_fields", "detect_labels"]
59
- all_packages = get_all_packages(detect_type=detect_type)
60
-
61
- if isinstance(user_input_tests, str):
62
- user_input_tests = [user_input_tests]
63
- if "ALL" in user_input_tests or all(x[0] == "-" for x in user_input_tests):
64
- tests_to_do = [detect_type]
65
- else:
66
- tests_to_do = [
67
- f"{detect_type}.{x}" for x in user_input_tests if x[0] != "-"
68
- ]
69
- tests_skipped = [
70
- f"{detect_type}.{x[1:]}" for x in user_input_tests if x[0] == "-"
71
- ]
72
- all_tests = [
73
- # this is why we need to import detect_fields/labels
74
- eval(x) for x in all_packages
75
- if any([y == x[: len(y)] for y in tests_to_do])
76
- and all([y != x[: len(y)] for y in tests_skipped])
77
- ]
78
- # to remove groups of tests
79
- all_tests = [
80
- test for test in all_tests if "_is" in dir(test)
81
- ]
82
- return all_tests
17
+ logging.basicConfig(level=logging.INFO)
83
18
 
84
19
 
85
20
  def routine(
@@ -99,7 +34,7 @@ def routine(
99
34
  sheet_name: Optional[Union[str, int]] = None,
100
35
  ) -> Union[dict, tuple[dict, pd.DataFrame]]:
101
36
  """Returns a dict with information about the csv table and possible
102
- column contents.
37
+ column contents, and if requested the DataFrame with columns cast according to analysis.
103
38
 
104
39
  Args:
105
40
  file_path: local path to CSV file if not using Minio
@@ -112,14 +47,14 @@ def routine(
112
47
  output_schema: whether or not to add the 'schema' field to the output (tableschema)
113
48
  output_df: whether or not to return the loaded DataFrame along with the analysis report
114
49
  cast_json: whether or not to cast json columns into objects (otherwise they are returned as strings)
115
- verbose: whether or not to print process logs in console
50
+ verbose: whether or not to print process logs in console
116
51
  sheet_name: if reading multi-sheet file (xls-like), which sheet to consider
117
52
  skipna: whether to keep NaN (empty cells) for tests
118
53
 
119
54
  Returns:
120
55
  dict: a dict with information about the csv and possible types for each column
121
56
  """
122
-
57
+
123
58
  if not (isinstance(save_results, bool) or (isinstance(save_results, str) and save_results.endswith(".json"))):
124
59
  raise ValueError("`save_results` must be a bool or a valid path to a json file.")
125
60
 
@@ -137,168 +72,105 @@ def routine(
137
72
  sheet_name=sheet_name,
138
73
  )
139
74
 
140
- if table.empty:
141
- res_categorical = []
142
- # res_continuous = []
143
- else:
144
- # Detects columns that are categorical
145
- res_categorical, categorical_mask = detect_categorical_variable(table, verbose=verbose)
146
- res_categorical = list(res_categorical)
147
- # Detect columns that are continuous (we already know the categorical) : we don't need this for now, cuts processing time
148
- # res_continuous = list(
149
- # detect_continuous_variable(table.iloc[:, ~categorical_mask.values], verbose=verbose)
150
- # )
151
-
152
- analysis.update({
153
- "categorical": res_categorical,
154
- # "continuous": res_continuous,
155
- })
156
-
157
- # list testing to be performed
158
- all_tests_fields = return_all_tests(
159
- user_input_tests, detect_type="detect_fields"
160
- ) # list all tests for the fields
161
- all_tests_labels = return_all_tests(
162
- user_input_tests, detect_type="detect_labels"
163
- ) # list all tests for the labels
164
-
165
- # if no testing then return
166
- if not all_tests_fields and not all_tests_labels:
167
- return analysis
168
-
169
- # Perform testing on fields
170
- scores_table_fields = test_col(table, all_tests_fields, limited_output, skipna=skipna, verbose=verbose)
171
- analysis["columns_fields"] = prepare_output_dict(scores_table_fields, limited_output)
172
-
173
- # Perform testing on labels
174
- scores_table_labels = test_label(table, all_tests_labels, limited_output, verbose=verbose)
175
- analysis["columns_labels"] = prepare_output_dict(scores_table_labels, limited_output)
176
-
177
- # Multiply the results of the fields by 1 + 0.5 * the results of the labels.
178
- # This is because the fields are more important than the labels and yields a max
179
- # of 1.5 for the final score.
180
- scores_table = scores_table_fields * (
181
- 1
182
- + scores_table_labels.reindex(
183
- index=scores_table_fields.index, fill_value=0
184
- ).values / 2
185
- )
186
-
187
- # To reduce false positives: ensure these formats are detected only if the label yields
188
- # a detection (skipping the ones that have been excluded by the users).
189
- formats_with_mandatory_label = [
190
- f for f in [
191
- "code_departement",
192
- "code_commune_insee",
193
- "code_postal",
194
- "latitude_wgs",
195
- "longitude_wgs",
196
- "latitude_wgs_fr_metropole",
197
- "longitude_wgs_fr_metropole",
198
- "latitude_l93",
199
- "longitude_l93",
200
- ] if f in scores_table.index
201
- ]
202
- scores_table.loc[formats_with_mandatory_label, :] = np.where(
203
- scores_table_labels.loc[formats_with_mandatory_label, :],
204
- scores_table.loc[formats_with_mandatory_label, :],
205
- 0,
75
+ analysis = detect_formats(
76
+ table=table,
77
+ analysis=analysis,
78
+ user_input_tests=user_input_tests,
79
+ limited_output=limited_output,
80
+ skipna=skipna,
81
+ verbose=verbose,
206
82
  )
207
- analysis["columns"] = prepare_output_dict(scores_table, limited_output)
208
-
209
- metier_to_python_type = {
210
- "booleen": "bool",
211
- "int": "int",
212
- "float": "float",
213
- "string": "string",
214
- "json": "json",
215
- "json_geojson": "json",
216
- "datetime": "datetime",
217
- "datetime_iso": "datetime",
218
- "datetime_rfc822": "datetime",
219
- "date": "date",
220
- "latitude": "float",
221
- "latitude_l93": "float",
222
- "latitude_wgs": "float",
223
- "latitude_wgs_fr_metropole": "float",
224
- "longitude": "float",
225
- "longitude_l93": "float",
226
- "longitude_wgs": "float",
227
- "longitude_wgs_fr_metropole": "float",
228
- }
229
-
230
- if not limited_output:
231
- for detection_method in ["columns_fields", "columns_labels", "columns"]:
232
- analysis[detection_method] = {
233
- col_name: [
234
- {
235
- "python_type": metier_to_python_type.get(
236
- detection["format"], "string"
237
- ),
238
- **detection,
239
- }
240
- for detection in detections
241
- ]
242
- for col_name, detections in analysis[detection_method].items()
243
- }
244
- else:
245
- for detection_method in ["columns_fields", "columns_labels", "columns"]:
246
- analysis[detection_method] = {
247
- col_name: {
248
- "python_type": metier_to_python_type.get(
249
- detection["format"], "string"
250
- ),
251
- **detection,
252
- }
253
- for col_name, detection in analysis[detection_method].items()
254
- }
255
83
 
256
- # Add detection with formats as keys
257
- analysis["formats"] = defaultdict(list)
258
- for header, col_metadata in analysis["columns"].items():
259
- analysis["formats"][col_metadata["format"]].append(header)
260
-
261
- if output_profile:
262
- analysis["profile"] = create_profile(
84
+ try:
85
+ return generate_output(
263
86
  table=table,
264
- dict_cols_fields=analysis["columns"],
87
+ analysis=analysis,
88
+ file_path=file_path,
265
89
  num_rows=num_rows,
266
90
  limited_output=limited_output,
91
+ save_results=save_results,
92
+ output_profile=output_profile,
93
+ output_schema=output_schema,
94
+ output_df=output_df,
95
+ cast_json=cast_json,
267
96
  verbose=verbose,
97
+ sheet_name=sheet_name,
268
98
  )
99
+ finally:
100
+ if verbose:
101
+ display_logs_depending_process_time(
102
+ f"Routine completed in {round(time() - start_routine, 3)}s",
103
+ time() - start_routine
104
+ )
269
105
 
270
- if save_results:
271
- if isinstance(save_results, str):
272
- output_path = save_results
273
- else:
274
- output_path = os.path.splitext(file_path)[0]
275
- if is_url(output_path):
276
- output_path = output_path.split('/')[-1]
277
- if analysis.get("sheet_name"):
278
- output_path += "_sheet-" + str(sheet_name)
279
- output_path += ".json"
280
- with open(output_path, "w", encoding="utf8") as fp:
281
- json.dump(analysis, fp, indent=4, separators=(",", ": "), ensure_ascii=False)
282
106
 
283
- if output_schema:
284
- analysis["schema"] = generate_table_schema(
285
- analysis,
286
- save_file=False,
287
- verbose=verbose
288
- )
107
+ def validate_then_detect(
108
+ file_path: str,
109
+ previous_analysis: dict,
110
+ num_rows: int = 500,
111
+ user_input_tests: Union[str, list[str]] = "ALL",
112
+ limited_output: bool = True,
113
+ save_results: Union[bool, str] = True,
114
+ encoding: str = None,
115
+ sep: str = None,
116
+ skipna: bool = True,
117
+ output_profile: bool = False,
118
+ output_schema: bool = False,
119
+ output_df: bool = False,
120
+ cast_json: bool = True,
121
+ verbose: bool = False,
122
+ sheet_name: Union[str, int] = None,
123
+ ):
124
+
289
125
  if verbose:
290
- display_logs_depending_process_time(
291
- f'Routine completed in {round(time() - start_routine, 3)}s',
292
- time() - start_routine
126
+ start_routine = time()
127
+ if is_url(file_path):
128
+ logging.info("Path recognized as a URL")
129
+
130
+ is_valid, table, analysis = validate(
131
+ file_path=file_path,
132
+ previous_analysis=previous_analysis,
133
+ num_rows=num_rows,
134
+ encoding=encoding,
135
+ sep=sep,
136
+ verbose=verbose,
137
+ skipna=skipna,
138
+ sheet_name=sheet_name,
139
+ )
140
+ if is_valid:
141
+ # skipping formats detection as the validation is successful
142
+ analysis = previous_analysis
143
+ del analysis["profile"]
144
+ else:
145
+ analysis = detect_formats(
146
+ table=table,
147
+ analysis=analysis,
148
+ user_input_tests=user_input_tests,
149
+ limited_output=limited_output,
150
+ skipna=skipna,
151
+ verbose=verbose,
293
152
  )
294
- if output_df:
295
- return analysis, cast_df(
296
- df=table,
297
- columns=analysis["columns"],
153
+ try:
154
+ return generate_output(
155
+ table=table,
156
+ analysis=analysis,
157
+ file_path=file_path,
158
+ num_rows=num_rows,
159
+ limited_output=limited_output,
160
+ save_results=save_results,
161
+ output_profile=output_profile,
162
+ output_schema=output_schema,
163
+ output_df=output_df,
298
164
  cast_json=cast_json,
299
165
  verbose=verbose,
166
+ sheet_name=sheet_name,
300
167
  )
301
- return analysis
168
+ finally:
169
+ if verbose:
170
+ display_logs_depending_process_time(
171
+ f"Process completed in {round(time() - start_routine, 3)}s",
172
+ time() - start_routine
173
+ )
302
174
 
303
175
 
304
176
  def routine_minio(
@@ -369,8 +241,8 @@ def routine_minio(
369
241
  minio_pwd=minio_pwd,
370
242
  )
371
243
 
372
- analysis = routine(file_path,
373
- num_rows,
244
+ analysis = routine(
245
+ file_path,
374
246
  save_results=True,
375
247
  **kwargs,
376
248
  )