csv-detective 0.7.5.dev1069__py3-none-any.whl → 0.7.5.dev1078__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
- from typing import TextIO, Optional
1
+ from typing import TextIO, Optional, Union
2
+ from collections import defaultdict
2
3
  import pandas as pd
3
4
  import math
4
5
  import csv
@@ -27,7 +28,7 @@ engine_to_file = {
27
28
  }
28
29
 
29
30
 
30
- def is_url(csv_file_path: str):
31
+ def is_url(csv_file_path: str) -> bool:
31
32
  # could be more sophisticated if needed
32
33
  return csv_file_path.startswith('http')
33
34
 
@@ -35,17 +36,14 @@ def is_url(csv_file_path: str):
35
36
  def detect_continuous_variable(table: pd.DataFrame, continuous_th: float = 0.9, verbose: bool = False):
36
37
  """
37
38
  Detects whether a column contains continuous variables. We consider a continuous column
38
- one that contains
39
- a considerable amount of float values.
39
+ one that contains a considerable amount of float values.
40
40
  We removed the integers as we then end up with postal codes, insee codes, and all sort
41
41
  of codes and types.
42
42
  This is not optimal but it will do for now.
43
- :param table:
44
- :return:
45
43
  """
46
44
  # if we need this again in the future, could be first based on columns detected as int/float to cut time
47
45
 
48
- def check_threshold(serie: pd.Series, continuous_th: float):
46
+ def check_threshold(serie: pd.Series, continuous_th: float) -> bool:
49
47
  count = serie.value_counts().to_dict()
50
48
  total_nb = len(serie)
51
49
  if float in count:
@@ -75,7 +73,7 @@ def detect_continuous_variable(table: pd.DataFrame, continuous_th: float = 0.9,
75
73
  if verbose:
76
74
  display_logs_depending_process_time(
77
75
  f"Detected {sum(res)} continuous columns in {round(time() - start, 3)}s",
78
- time() - start
76
+ time() - start,
79
77
  )
80
78
  return res.index[res]
81
79
 
@@ -121,12 +119,12 @@ def detetect_categorical_variable(
121
119
  if verbose:
122
120
  display_logs_depending_process_time(
123
121
  f"Detected {sum(res)} categorical columns out of {len(table.columns)} in {round(time() - start, 3)}s",
124
- time() - start
122
+ time() - start,
125
123
  )
126
124
  return res.index[res], res
127
125
 
128
126
 
129
- def detect_engine(csv_file_path: str, verbose=False):
127
+ def detect_engine(csv_file_path: str, verbose=False) -> Optional[str]:
130
128
  if verbose:
131
129
  start = time()
132
130
  mapping = {
@@ -145,12 +143,12 @@ def detect_engine(csv_file_path: str, verbose=False):
145
143
  if verbose:
146
144
  display_logs_depending_process_time(
147
145
  f'File has no extension, detected {engine_to_file.get(engine, "csv")}',
148
- time() - start
146
+ time() - start,
149
147
  )
150
148
  return engine
151
149
 
152
150
 
153
- def detect_separator(file: TextIO, verbose: bool = False):
151
+ def detect_separator(file: TextIO, verbose: bool = False) -> str:
154
152
  """Detects csv separator"""
155
153
  # TODO: add a robust detection:
156
154
  # si on a un point virgule comme texte et \t comme séparateur, on renvoie
@@ -181,12 +179,12 @@ def detect_separator(file: TextIO, verbose: bool = False):
181
179
  if verbose:
182
180
  display_logs_depending_process_time(
183
181
  f'Detected separator: "{sep}" in {round(time() - start, 3)}s',
184
- time() - start
182
+ time() - start,
185
183
  )
186
184
  return sep
187
185
 
188
186
 
189
- def detect_encoding(csv_file_path: str, verbose: bool = False):
187
+ def detect_encoding(csv_file_path: str, verbose: bool = False) -> str:
190
188
  """
191
189
  Detects file encoding using faust-cchardet (forked from the original cchardet)
192
190
  """
@@ -205,7 +203,7 @@ def detect_encoding(csv_file_path: str, verbose: bool = False):
205
203
  message += f' in {round(time() - start, 3)}s (confidence: {round(encoding_dict["confidence"]*100)}%)'
206
204
  display_logs_depending_process_time(
207
205
  message,
208
- time() - start
206
+ time() - start,
209
207
  )
210
208
  return encoding_dict['encoding']
211
209
 
@@ -218,8 +216,7 @@ def parse_table(
218
216
  skiprows: int,
219
217
  random_state: int = 42,
220
218
  verbose : bool = False,
221
- ):
222
- # Takes care of some problems
219
+ ) -> tuple[pd.DataFrame, int, int]:
223
220
  if verbose:
224
221
  start = time()
225
222
  logging.info("Parsing table")
@@ -230,7 +227,6 @@ def parse_table(
230
227
 
231
228
  total_lines = None
232
229
  for encoding in [encoding, "ISO-8859-1", "utf-8"]:
233
- # TODO : modification systematique
234
230
  if encoding is None:
235
231
  continue
236
232
 
@@ -251,17 +247,16 @@ def parse_table(
251
247
  print("Trying encoding : {encoding}".format(encoding=encoding))
252
248
 
253
249
  if table is None:
254
- logging.error(" >> encoding not found")
255
- return table, "NA", "NA"
250
+ raise ValueError("Could not load file")
256
251
  if verbose:
257
252
  display_logs_depending_process_time(
258
253
  f'Table parsed successfully in {round(time() - start, 3)}s',
259
- time() - start
254
+ time() - start,
260
255
  )
261
256
  return table, total_lines, nb_duplicates
262
257
 
263
258
 
264
- def remove_empty_first_rows(table: pd.DataFrame):
259
+ def remove_empty_first_rows(table: pd.DataFrame) -> tuple[pd.DataFrame, int]:
265
260
  """Analog process to detect_headers for csv files, determines how many rows to skip
266
261
  to end up with the header at the right place"""
267
262
  idx = 0
@@ -274,7 +269,7 @@ def remove_empty_first_rows(table: pd.DataFrame):
274
269
  cols = table.iloc[idx - 1]
275
270
  table = table.iloc[idx:]
276
271
  table.columns = cols.to_list()
277
- # +1 here because the columns should count as a row
272
+ # +1 here because the headers should count as a row
278
273
  return table, idx
279
274
 
280
275
 
@@ -285,7 +280,7 @@ def parse_excel(
285
280
  sheet_name: Optional[str] = None,
286
281
  random_state: int = 42,
287
282
  verbose : bool = False,
288
- ):
283
+ ) -> tuple[pd.DataFrame, int, int, str, str, int]:
289
284
  """"Excel-like parsing is really slow, could be a good improvement for future development"""
290
285
  if verbose:
291
286
  start = time()
@@ -309,7 +304,7 @@ def parse_excel(
309
304
  if verbose:
310
305
  display_logs_depending_process_time(
311
306
  f'Detected {engine_to_file[engine]} file, no sheet specified, reading the largest one',
312
- time() - start
307
+ time() - start,
313
308
  )
314
309
  try:
315
310
  if engine == "openpyxl":
@@ -341,7 +336,7 @@ def parse_excel(
341
336
  if verbose:
342
337
  display_logs_depending_process_time(
343
338
  'Could not read file with classic xls reader, trying with ODS',
344
- time() - start
339
+ time() - start,
345
340
  )
346
341
  engine = "odf"
347
342
 
@@ -354,33 +349,33 @@ def parse_excel(
354
349
  if verbose:
355
350
  display_logs_depending_process_time(
356
351
  f'Detected {engine_to_file[engine]} file, no sheet specified, reading the largest one',
357
- time() - start
352
+ time() - start,
358
353
  )
359
354
  tables = pd.read_excel(
360
355
  csv_file_path,
361
356
  engine="odf",
362
357
  sheet_name=None,
363
- dtype="unicode"
358
+ dtype="unicode",
364
359
  )
365
360
  sizes = {sheet_name: table.size for sheet_name, table in tables.items()}
366
361
  sheet_name = max(sizes, key=sizes.get)
367
362
  if verbose:
368
363
  display_logs_depending_process_time(
369
364
  f'Going forwards with sheet "{sheet_name}"',
370
- time() - start
365
+ time() - start,
371
366
  )
372
367
  table = tables[sheet_name]
373
368
  else:
374
369
  if verbose:
375
370
  display_logs_depending_process_time(
376
371
  f'Detected {engine_to_file[engine]} file, reading sheet "{sheet_name}"',
377
- time() - start
372
+ time() - start,
378
373
  )
379
374
  table = pd.read_excel(
380
375
  csv_file_path,
381
376
  engine="odf",
382
377
  sheet_name=sheet_name,
383
- dtype="unicode"
378
+ dtype="unicode",
384
379
  )
385
380
  table, header_row_idx = remove_empty_first_rows(table)
386
381
  total_lines = len(table)
@@ -391,7 +386,7 @@ def parse_excel(
391
386
  if verbose:
392
387
  display_logs_depending_process_time(
393
388
  f'Table parsed successfully in {round(time() - start, 3)}s',
394
- time() - start
389
+ time() - start,
395
390
  )
396
391
  return table, total_lines, nb_duplicates, sheet_name, engine, header_row_idx
397
392
 
@@ -400,18 +395,18 @@ def parse_excel(
400
395
  if no_sheet_specified:
401
396
  display_logs_depending_process_time(
402
397
  f'Going forwards with sheet "{sheet_name}"',
403
- time() - start
398
+ time() - start,
404
399
  )
405
400
  else:
406
401
  display_logs_depending_process_time(
407
402
  f'Detected {engine_to_file[engine]} file, reading sheet "{sheet_name}"',
408
- time() - start
403
+ time() - start,
409
404
  )
410
405
  table = pd.read_excel(
411
406
  csv_file_path,
412
407
  engine=engine,
413
408
  sheet_name=sheet_name,
414
- dtype="unicode"
409
+ dtype="unicode",
415
410
  )
416
411
  table, header_row_idx = remove_empty_first_rows(table)
417
412
  total_lines = len(table)
@@ -422,12 +417,12 @@ def parse_excel(
422
417
  if verbose:
423
418
  display_logs_depending_process_time(
424
419
  f'Table parsed successfully in {round(time() - start, 3)}s',
425
- time() - start
420
+ time() - start,
426
421
  )
427
422
  return table, total_lines, nb_duplicates, sheet_name, engine, header_row_idx
428
423
 
429
424
 
430
- def prevent_nan(value: float):
425
+ def prevent_nan(value: float) -> Optional[float]:
431
426
  if math.isnan(value):
432
427
  return None
433
428
  return value
@@ -439,7 +434,7 @@ def create_profile(
439
434
  num_rows: int,
440
435
  limited_output: bool = True,
441
436
  verbose: bool = False,
442
- ):
437
+ ) -> dict:
443
438
  if verbose:
444
439
  start = time()
445
440
  logging.info("Creating profile")
@@ -466,9 +461,8 @@ def create_profile(
466
461
  safe_table[c] = safe_table[c].apply(
467
462
  lambda s: float_casting(s) if isinstance(s, str) else s
468
463
  )
469
- profile = {}
464
+ profile = defaultdict(dict)
470
465
  for c in safe_table.columns:
471
- profile[c] = {}
472
466
  if map_python_types.get(dict_cols_fields[c]["python_type"], str) in [
473
467
  float,
474
468
  int,
@@ -494,10 +488,10 @@ def create_profile(
494
488
  .to_dict(orient="records")
495
489
  tops = []
496
490
  for tb in tops_bruts:
497
- top = {}
498
- top["count"] = tb["count"]
499
- top["value"] = tb[c]
500
- tops.append(top)
491
+ tops.append({
492
+ "count": tb["count"],
493
+ "value": tb[c],
494
+ })
501
495
  profile[c].update(
502
496
  tops=tops,
503
497
  nb_distinct=safe_table[c].nunique(),
@@ -506,7 +500,7 @@ def create_profile(
506
500
  if verbose:
507
501
  display_logs_depending_process_time(
508
502
  f"Created profile in {round(time() - start, 3)}s",
509
- time() - start
503
+ time() - start,
510
504
  )
511
505
  return profile
512
506
 
@@ -540,7 +534,7 @@ def detect_extra_columns(file: TextIO, sep: str):
540
534
  return nb_useless_col, retour
541
535
 
542
536
 
543
- def detect_headers(file: TextIO, sep: str, verbose: bool = False):
537
+ def detect_headers(file: TextIO, sep: str, verbose: bool = False) -> tuple[int, Optional[list]]:
544
538
  """Tests 10 first rows for possible header (header not in 1st line)"""
545
539
  if verbose:
546
540
  start = time()
@@ -559,7 +553,7 @@ def detect_headers(file: TextIO, sep: str, verbose: bool = False):
559
553
  if verbose:
560
554
  display_logs_depending_process_time(
561
555
  f'Detected headers in {round(time() - start, 3)}s',
562
- time() - start
556
+ time() - start,
563
557
  )
564
558
  return i, chaine
565
559
  if verbose:
@@ -567,7 +561,7 @@ def detect_headers(file: TextIO, sep: str, verbose: bool = False):
567
561
  return 0, None
568
562
 
569
563
 
570
- def detect_heading_columns(file: TextIO, sep: str, verbose : bool = False):
564
+ def detect_heading_columns(file: TextIO, sep: str, verbose : bool = False) -> int:
571
565
  """Tests first 10 lines to see if there are empty heading columns"""
572
566
  if verbose:
573
567
  start = time()
@@ -581,18 +575,18 @@ def detect_heading_columns(file: TextIO, sep: str, verbose : bool = False):
581
575
  if verbose:
582
576
  display_logs_depending_process_time(
583
577
  f'No heading column detected in {round(time() - start, 3)}s',
584
- time() - start
578
+ time() - start,
585
579
  )
586
580
  return 0
587
581
  if verbose:
588
582
  display_logs_depending_process_time(
589
583
  f'{return_int} heading columns detected in {round(time() - start, 3)}s',
590
- time() - start
584
+ time() - start,
591
585
  )
592
586
  return return_int
593
587
 
594
588
 
595
- def detect_trailing_columns(file: TextIO, sep: str, heading_columns: int, verbose : bool = False):
589
+ def detect_trailing_columns(file: TextIO, sep: str, heading_columns: int, verbose : bool = False) -> int:
596
590
  """Tests first 10 lines to see if there are empty trailing columns"""
597
591
  if verbose:
598
592
  start = time()
@@ -611,12 +605,12 @@ def detect_trailing_columns(file: TextIO, sep: str, heading_columns: int, verbos
611
605
  if verbose:
612
606
  display_logs_depending_process_time(
613
607
  f'No trailing column detected in {round(time() - start, 3)}s',
614
- time() - start
608
+ time() - start,
615
609
  )
616
610
  return 0
617
611
  if verbose:
618
612
  display_logs_depending_process_time(
619
613
  f'{return_int} trailing columns detected in {round(time() - start, 3)}s',
620
- time() - start
614
+ time() - start,
621
615
  )
622
616
  return return_int
@@ -4,6 +4,7 @@ contenu possible des champs
4
4
  """
5
5
 
6
6
  from typing import Dict, List, Union
7
+ from collections import defaultdict
7
8
  import json
8
9
  import numpy as np
9
10
  import os
@@ -39,7 +40,7 @@ from .detection import (
39
40
  logging.basicConfig(level=logging.INFO)
40
41
 
41
42
 
42
- def get_all_packages(detect_type):
43
+ def get_all_packages(detect_type) -> list:
43
44
  root_dir = os.path.dirname(os.path.abspath(__file__)) + "/" + detect_type
44
45
  modules = []
45
46
  for dirpath, _, filenames in os.walk(root_dir):
@@ -58,7 +59,7 @@ def get_all_packages(detect_type):
58
59
  def return_all_tests(
59
60
  user_input_tests: Union[str, list],
60
61
  detect_type: str,
61
- ):
62
+ ) -> list:
62
63
  """
63
64
  returns all tests that have a method _is and are listed in the user_input_tests
64
65
  the function can select a sub_package from csv_detective
@@ -175,12 +176,10 @@ def routine(
175
176
  sep = detect_separator(str_file, verbose=verbose)
176
177
  header_row_idx, header = detect_headers(str_file, sep, verbose=verbose)
177
178
  if header is None:
178
- return_dict = {"error": True}
179
- return return_dict
179
+ return {"error": True}
180
180
  elif isinstance(header, list):
181
181
  if any([x is None for x in header]):
182
- return_dict = {"error": True}
183
- return return_dict
182
+ return {"error": True}
184
183
  heading_columns = detect_heading_columns(str_file, sep, verbose=verbose)
185
184
  trailing_columns = detect_trailing_columns(str_file, sep, heading_columns, verbose=verbose)
186
185
  table, total_lines, nb_duplicates = parse_table(
@@ -200,7 +199,7 @@ def routine(
200
199
  # )
201
200
 
202
201
  # Creating return dictionary
203
- return_dict = {
202
+ analysis = {
204
203
  "header_row_idx": header_row_idx,
205
204
  "header": header,
206
205
  "total_lines": total_lines,
@@ -212,12 +211,12 @@ def routine(
212
211
  }
213
212
  # this is only relevant for xls-like
214
213
  if is_xls_like:
215
- return_dict["engine"] = engine
216
- return_dict["sheet_name"] = sheet_name
214
+ analysis["engine"] = engine
215
+ analysis["sheet_name"] = sheet_name
217
216
  # this is only relevant for csv
218
217
  else:
219
- return_dict["encoding"] = encoding
220
- return_dict["separator"] = sep
218
+ analysis["encoding"] = encoding
219
+ analysis["separator"] = sep
221
220
 
222
221
  # list testing to be performed
223
222
  all_tests_fields = return_all_tests(
@@ -229,25 +228,24 @@ def routine(
229
228
 
230
229
  # if no testing then return
231
230
  if not all_tests_fields and not all_tests_labels:
232
- return return_dict
231
+ return analysis
233
232
 
234
233
  # Perform testing on fields
235
- return_table_fields = test_col(table, all_tests_fields, limited_output, skipna=skipna, verbose=verbose)
236
- return_dict["columns_fields"] = prepare_output_dict(return_table_fields, limited_output)
234
+ scores_table_fields = test_col(table, all_tests_fields, limited_output, skipna=skipna, verbose=verbose)
235
+ analysis["columns_fields"] = prepare_output_dict(scores_table_fields, limited_output)
237
236
 
238
237
  # Perform testing on labels
239
- return_table_labels = test_label(table, all_tests_labels, limited_output, verbose=verbose)
240
- return_dict["columns_labels"] = prepare_output_dict(return_table_labels, limited_output)
238
+ scores_table_labels = test_label(table, all_tests_labels, limited_output, verbose=verbose)
239
+ analysis["columns_labels"] = prepare_output_dict(scores_table_labels, limited_output)
241
240
 
242
241
  # Multiply the results of the fields by 1 + 0.5 * the results of the labels.
243
242
  # This is because the fields are more important than the labels and yields a max
244
243
  # of 1.5 for the final score.
245
- return_table = return_table_fields * (
244
+ scores_table = scores_table_fields * (
246
245
  1
247
- + return_table_labels.reindex(
248
- index=return_table_fields.index, fill_value=0
249
- ).values
250
- / 2
246
+ + scores_table_labels.reindex(
247
+ index=scores_table_fields.index, fill_value=0
248
+ ).values / 2
251
249
  )
252
250
 
253
251
  # To reduce false positives: ensure these formats are detected only if the label yields
@@ -263,12 +261,12 @@ def routine(
263
261
  "latitude_l93",
264
262
  "longitude_l93",
265
263
  ]
266
- return_table.loc[formats_with_mandatory_label, :] = np.where(
267
- return_table_labels.loc[formats_with_mandatory_label, :],
268
- return_table.loc[formats_with_mandatory_label, :],
264
+ scores_table.loc[formats_with_mandatory_label, :] = np.where(
265
+ scores_table_labels.loc[formats_with_mandatory_label, :],
266
+ scores_table.loc[formats_with_mandatory_label, :],
269
267
  0,
270
268
  )
271
- return_dict["columns"] = prepare_output_dict(return_table, limited_output)
269
+ analysis["columns"] = prepare_output_dict(scores_table, limited_output)
272
270
 
273
271
  metier_to_python_type = {
274
272
  "booleen": "bool",
@@ -291,7 +289,7 @@ def routine(
291
289
 
292
290
  if not limited_output:
293
291
  for detection_method in ["columns_fields", "columns_labels", "columns"]:
294
- return_dict[detection_method] = {
292
+ analysis[detection_method] = {
295
293
  col_name: [
296
294
  {
297
295
  "python_type": metier_to_python_type.get(
@@ -301,32 +299,29 @@ def routine(
301
299
  }
302
300
  for detection in detections
303
301
  ]
304
- for col_name, detections in return_dict[detection_method].items()
302
+ for col_name, detections in analysis[detection_method].items()
305
303
  }
306
304
  else:
307
305
  for detection_method in ["columns_fields", "columns_labels", "columns"]:
308
- return_dict[detection_method] = {
306
+ analysis[detection_method] = {
309
307
  col_name: {
310
308
  "python_type": metier_to_python_type.get(
311
309
  detection["format"], "string"
312
310
  ),
313
311
  **detection,
314
312
  }
315
- for col_name, detection in return_dict[detection_method].items()
313
+ for col_name, detection in analysis[detection_method].items()
316
314
  }
317
315
 
318
316
  # Add detection with formats as keys
319
- return_dict["formats"] = {
320
- column_metadata["format"]: []
321
- for column_metadata in return_dict["columns"].values()
322
- }
323
- for header, col_metadata in return_dict["columns"].items():
324
- return_dict["formats"][col_metadata["format"]].append(header)
317
+ analysis["formats"] = defaultdict(list)
318
+ for header, col_metadata in analysis["columns"].items():
319
+ analysis["formats"][col_metadata["format"]].append(header)
325
320
 
326
321
  if output_profile:
327
- return_dict["profile"] = create_profile(
322
+ analysis["profile"] = create_profile(
328
323
  table=table,
329
- dict_cols_fields=return_dict["columns"],
324
+ dict_cols_fields=analysis["columns"],
330
325
  num_rows=num_rows,
331
326
  limited_output=limited_output,
332
327
  verbose=verbose,
@@ -343,11 +338,11 @@ def routine(
343
338
  output_path += "_sheet-" + str(sheet_name)
344
339
  output_path += ".json"
345
340
  with open(output_path, "w", encoding="utf8") as fp:
346
- json.dump(return_dict, fp, indent=4, separators=(",", ": "), ensure_ascii=False)
341
+ json.dump(analysis, fp, indent=4, separators=(",", ": "), ensure_ascii=False)
347
342
 
348
343
  if output_schema:
349
- return_dict["schema"] = generate_table_schema(
350
- return_dict,
344
+ analysis["schema"] = generate_table_schema(
345
+ analysis,
351
346
  save_file=False,
352
347
  verbose=verbose
353
348
  )
@@ -357,8 +352,8 @@ def routine(
357
352
  time() - start_routine
358
353
  )
359
354
  if output_df:
360
- return return_dict, table
361
- return return_dict
355
+ return analysis, table
356
+ return analysis
362
357
 
363
358
 
364
359
  def routine_minio(
@@ -436,7 +431,7 @@ def routine_minio(
436
431
  minio_pwd=minio_pwd,
437
432
  )
438
433
 
439
- return_dict = routine(
434
+ analysis = routine(
440
435
  csv_file_path,
441
436
  num_rows,
442
437
  user_input_tests,
@@ -449,7 +444,7 @@ def routine_minio(
449
444
  # Write report JSON file.
450
445
  output_path_to_store_minio_file = os.path.splitext(csv_file_path)[0] + ".json"
451
446
  with open(output_path_to_store_minio_file, "w", encoding="utf8") as fp:
452
- json.dump(return_dict, fp, indent=4, separators=(",", ": "))
447
+ json.dump(analysis, fp, indent=4, separators=(",", ": "))
453
448
 
454
449
  upload_to_minio(
455
450
  netloc=output_minio_location["netloc"],
@@ -464,7 +459,7 @@ def routine_minio(
464
459
  os.remove(csv_file_path)
465
460
 
466
461
  generate_table_schema(
467
- return_dict,
462
+ analysis,
468
463
  True,
469
464
  netloc=tableschema_minio_location["netloc"],
470
465
  bucket=tableschema_minio_location["bucket"],
@@ -473,4 +468,4 @@ def routine_minio(
473
468
  minio_pwd=minio_pwd,
474
469
  )
475
470
 
476
- return return_dict
471
+ return analysis
@@ -4,6 +4,7 @@
4
4
 
5
5
  - New function that creates a csv from a list of fields and constraints, or from a TableSchema [#101](https://github.com/datagouv/csv-detective/pull/101)
6
6
  - Enable outputing loaded dataframe [#102](https://github.com/datagouv/csv-detective/pull/102)
7
+ - Better naming, hint types and minor refactors [#103](https://github.com/datagouv/csv-detective/pull/103)
7
8
 
8
9
  ## 0.7.4 (2024-11-15)
9
10
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: csv_detective
3
- Version: 0.7.5.dev1069
3
+ Version: 0.7.5.dev1078
4
4
  Summary: Detect CSV column content
5
5
  Home-page: https://github.com/etalab/csv_detective
6
6
  Author: Etalab
@@ -1,8 +1,8 @@
1
1
  csv_detective/__init__.py,sha256=Au4bNJ_Gi6P6o0uO4R56nYdshG7M6-7Rg_xX4whLmLI,143
2
2
  csv_detective/cli.py,sha256=Ua7SE1wMH2uFUsTmfumh4nJk7O06okpMd2gvjUDO1II,1048
3
3
  csv_detective/create_example.py,sha256=358e7Q7RWMrY_eEo3pUteJWmg2smFb5edJ_AzcQPrqA,8646
4
- csv_detective/detection.py,sha256=AuXlPOZfzqznZY2ybAAgaXIq6qVITYd3MXf2CoigI3I,22097
5
- csv_detective/explore_csv.py,sha256=6kGl1E061_CefAdei-wgwafZT1g8oKWg0eE1D5zWTOk,17216
4
+ csv_detective/detection.py,sha256=SUNGMvvuM_bj3gKYw-x6-CjjkirqCPoeAm0NCPkijrM,22225
5
+ csv_detective/explore_csv.py,sha256=i1m1JmnMSILlGnPhXlXsUbDVcgXaJ1E2nKE7_6D2xEE,16996
6
6
  csv_detective/process_text.py,sha256=rsfk66BCmdpsCOd0kDJ8tmqMsEWd-OeBkEisWc4Ej9k,1246
7
7
  csv_detective/s3_utils.py,sha256=1cIVdQUYY2ovErbMwp72Gqtqx2bkB8nfVhn-QaOFTT0,1451
8
8
  csv_detective/schema_generation.py,sha256=D1Cq4QRajsKtY8EJSwbRTIB-T_Cb2ZpcmYtCrJ6DvJQ,13135
@@ -126,18 +126,18 @@ csv_detective/detect_labels/temp/date/__init__.py,sha256=GrIbo64WVM3hi7ShBRKKyKU
126
126
  csv_detective/detect_labels/temp/datetime_iso/__init__.py,sha256=Ih9l56nBcdmGLyWDavVUWuUUuVZBz9QUDE1hHzADvVg,1157
127
127
  csv_detective/detect_labels/temp/datetime_rfc822/__init__.py,sha256=DQ_h4uDW1e6qu2rATEhgGKw6O-vVi7HbDhbEDDCT9uY,1175
128
128
  csv_detective/detect_labels/temp/year/__init__.py,sha256=zPF_mvhzhXMAlHPAskS8mhuxjLj2AlKpV4ss8Q4tDms,1150
129
- csv_detective-0.7.5.dev1069.data/data/share/csv_detective/CHANGELOG.md,sha256=QbZKEEWbkt7a-TMHB6CpzzliDqv3BLECa_zkJgZOFkY,6820
130
- csv_detective-0.7.5.dev1069.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
131
- csv_detective-0.7.5.dev1069.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
129
+ csv_detective-0.7.5.dev1078.data/data/share/csv_detective/CHANGELOG.md,sha256=5M95hTftsY9Ic2q_jexDNp-MgAFAXuPZyWGyFABi3l4,6927
130
+ csv_detective-0.7.5.dev1078.data/data/share/csv_detective/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
131
+ csv_detective-0.7.5.dev1078.data/data/share/csv_detective/README.md,sha256=Qr8xRXc-dxQ-tdXCpCTCKp1Uliqq84r0UOlPRNuGCpI,9506
132
132
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
133
133
  tests/test_example.py,sha256=0NfChooJQlFxTo2nY5FOQIcsK4zzWA_SBmt2LwVQovY,2014
134
134
  tests/test_fields.py,sha256=kXel-hiyQYrJ3OLmwUMg1K3DKbbwBLvUplxZWxpp18I,10605
135
135
  tests/test_file.py,sha256=oQITvAxdcrqDby2wWSh_X9TCwFqdFaP34XNy92ibXyg,6725
136
136
  tests/test_labels.py,sha256=6MOKrGznkwU5fjZ_3oiB6Scmb480Eu-9geBJs0UDLds,159
137
137
  tests/test_structure.py,sha256=SVsnluVoIIprYw_67I1_gB3cp9m1wlO8C7SpdsLW8cM,1161
138
- csv_detective-0.7.5.dev1069.dist-info/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
139
- csv_detective-0.7.5.dev1069.dist-info/METADATA,sha256=sqa9hWFoiOj9-MpBX1uuwOl5qyPCSoca3wo0RrglmNY,1145
140
- csv_detective-0.7.5.dev1069.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
141
- csv_detective-0.7.5.dev1069.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
142
- csv_detective-0.7.5.dev1069.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
143
- csv_detective-0.7.5.dev1069.dist-info/RECORD,,
138
+ csv_detective-0.7.5.dev1078.dist-info/LICENSE.AGPL.txt,sha256=2N5ReRelkdqkR9a-KP-y-shmcD5P62XoYiG-miLTAzo,34519
139
+ csv_detective-0.7.5.dev1078.dist-info/METADATA,sha256=NSxmqCJpApiSavZ59QEMfRuzeB_pmOk2Wm_zTy-o2eQ,1145
140
+ csv_detective-0.7.5.dev1078.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
141
+ csv_detective-0.7.5.dev1078.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
142
+ csv_detective-0.7.5.dev1078.dist-info/top_level.txt,sha256=M0Nv646VHo-49zWjPkwo2C48UmtfddV8_9mEZeIxy8Q,20
143
+ csv_detective-0.7.5.dev1078.dist-info/RECORD,,