csv-detective 0.7.5.dev1056__py3-none-any.whl → 0.7.5.dev1069__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ import logging
12
12
  from time import time
13
13
  import requests
14
14
  from io import StringIO
15
+ import pandas as pd
15
16
 
16
17
  # flake8: noqa
17
18
  from csv_detective import detect_fields, detect_labels
@@ -108,9 +109,10 @@ def routine(
108
109
  skipna: bool = True,
109
110
  output_profile: bool = False,
110
111
  output_schema: bool = False,
112
+ output_df: bool = False,
111
113
  verbose: bool = False,
112
114
  sheet_name: Union[str, int] = None,
113
- ):
115
+ ) -> Union[dict, tuple[dict, pd.DataFrame]]:
114
116
  """Returns a dict with information about the csv table and possible
115
117
  column contents.
116
118
 
@@ -123,6 +125,7 @@ def routine(
123
125
  save_results: whether or not to save the results in a json file, or the path where to dump the output
124
126
  output_profile: whether or not to add the 'profile' field to the output
125
127
  output_schema: whether or not to add the 'schema' field to the output (tableschema)
128
+ output_df: whether or not to return the loaded DataFrame along with the analysis report
126
129
  verbose: whether or not to print process logs in console
127
130
  sheet_name: if reading multi-sheet file (xls-like), which sheet to consider
128
131
  skipna: whether to keep NaN (empty cells) for tests
@@ -353,6 +356,8 @@ def routine(
353
356
  f'Routine completed in {round(time() - start_routine, 3)}s',
354
357
  time() - start_routine
355
358
  )
359
+ if output_df:
360
+ return return_dict, table
356
361
  return return_dict
357
362
 
358
363
 
@@ -3,6 +3,7 @@
3
3
  ## Current (in progress)
4
4
 
5
5
  - New function that creates a csv from a list of fields and constraints, or from a TableSchema [#101](https://github.com/datagouv/csv-detective/pull/101)
6
+ - Enable outputing loaded dataframe [#102](https://github.com/datagouv/csv-detective/pull/102)
6
7
 
7
8
  ## 0.7.4 (2024-11-15)
8
9
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: csv_detective
3
- Version: 0.7.5.dev1056
3
+ Version: 0.7.5.dev1069
4
4
  Summary: Detect CSV column content
5
5
  Home-page: https://github.com/etalab/csv_detective
6
6
  Author: Etalab
@@ -29,4 +29,3 @@ Requires-Dist: python-magic==0.4.27
29
29
  Requires-Dist: frformat==0.4.0
30
30
  Requires-Dist: faker==33.0.0
31
31
  Requires-Dist: rstr==3.2.2
32
-
@@ -2,7 +2,7 @@ csv_detective/__init__.py,sha256=Au4bNJ_Gi6P6o0uO4R56nYdshG7M6-7Rg_xX4whLmLI,143
2
2
  csv_detective/cli.py,sha256=Ua7SE1wMH2uFUsTmfumh4nJk7O06okpMd2gvjUDO1II,1048
3
3
  csv_detective/create_example.py,sha256=358e7Q7RWMrY_eEo3pUteJWmg2smFb5edJ_AzcQPrqA,8646
4
4
  csv_detective/detection.py,sha256=AuXlPOZfzqznZY2ybAAgaXIq6qVITYd3MXf2CoigI3I,22097
5
- csv_detective/explore_csv.py,sha256=X5yZS3WCUsafUMcs5tOnDTeMGzMnfr0iB9vEDx7xiqg,16977
5
+ csv_detective/explore_csv.py,sha256=6kGl1E061_CefAdei-wgwafZT1g8oKWg0eE1D5zWTOk,17216
6
6
  csv_detective/process_text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
7
7
  csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
8
8
  csv_detective/schema_generation.py,sha256=D1Cq4QRajsKtY8EJSwbRTIB-T_Cb2ZpcmYtCrJ6DvJQ,13135
@@ -126,18 +126,18 @@ csv_detective/detect_labels/temp/date/__init__.py,sha256=GrIbo64WVM3hi7ShBRKKyKU
126
126
  csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=Ih9l56nBcdmGLyWDavVUWuUUuVZBz9QUDE1hHzADvVg,1157
127
127
  csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=DQ_h4uDW1e6qu2rATEhgGKw6O-vVi7HbDhbEDDCT9uY,1175
128
128
  csv_detective/detect_labels/temp/year/__init__.py,sha256=zPF_mvhzhXMAlHPAskS8mhuxjLj2AlKpV4ss8Q4tDms,1150
129
- csv_detective-0.7.5.dev1056.data/data/share/csv_detective/CHANGELOG.md,sha256=clfZIuAoLOi34VOevzdE4_bR__6SVWiNNmWOFOqpm-k,6725
130
- csv_detective-0.7.5.dev1056.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
131
- csv_detective-0.7.5.dev1056.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
129
+ csv_detective-0.7.5.dev1069.data/data/share/csv_detective/CHANGELOG.md,sha256=QbZKEEWbkt7a-TMHB6CpzzliDqv3BLECa_zkJgZOFkY,6820
130
+ csv_detective-0.7.5.dev1069.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
131
+ csv_detective-0.7.5.dev1069.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
132
132
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
133
133
  tests/test_example.py,sha256=0NfChooJQlFxTo2nY5FOQIcsK4zzWA_SBmt2LwVQovY,2014
134
134
  tests/test_fields.py,sha256=kXel-hiyQYrJ3OLmwUMg1K3DKbbwBLvUplxZWxpp18I,10605
135
- tests/test_file.py,sha256=1fEOu3bArGBaarRKAoTXAF3cSIGJfFN3UIwOW6esWRs,6399
135
+ tests/test_file.py,sha256=oQITvAxdcrqDby2wWSh_X9TCwFqdFaP34XNy92ibXyg,6725
136
136
  tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
137
137
  tests/test_structure.py,sha256=SVsnluVoIIprYw_67I1_gB3cp9m1wlO8C7SpdsLW8cM,1161
138
- csv_detective-0.7.5.dev1056.dist-info/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
139
- csv_detective-0.7.5.dev1056.dist-info/METADATA,sha256=oNvlYf9ZGV54EfE_2tGMTHzh1jOTPaMwGt1uYyjU0BM,1146
140
- csv_detective-0.7.5.dev1056.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
141
- csv_detective-0.7.5.dev1056.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
142
- csv_detective-0.7.5.dev1056.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
143
- csv_detective-0.7.5.dev1056.dist-info/RECORD,,
138
+ csv_detective-0.7.5.dev1069.dist-info/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
139
+ csv_detective-0.7.5.dev1069.dist-info/METADATA,sha256=sqa9hWFoiOj9-MpBX1uuwOl5qyPCSoca3wo0RrglmNY,1145
140
+ csv_detective-0.7.5.dev1069.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
141
+ csv_detective-0.7.5.dev1069.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
142
+ csv_detective-0.7.5.dev1069.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
143
+ csv_detective-0.7.5.dev1069.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.5.0)
2
+ Generator: setuptools (75.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
tests/test_file.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from csv_detective import routine
2
2
  import pytest
3
3
  import responses
4
+ import pandas as pd
4
5
 
5
6
 
6
7
  def test_columns_output_on_file():
@@ -218,3 +219,16 @@ def test_nan_values(expected_type):
218
219
  skipna=skipna,
219
220
  )
220
221
  assert output["columns"]["partly_empty"]["python_type"] == expected_type
222
+
223
+
224
+ def test_output_df():
225
+ output, df = routine(
226
+ csv_file_path="tests/b_test_file.csv",
227
+ num_rows=-1,
228
+ output_profile=False,
229
+ save_results=False,
230
+ output_df=True,
231
+ )
232
+ assert isinstance(output, dict)
233
+ assert isinstance(df, pd.DataFrame)
234
+ assert len(df) == 6