csv-detective 0.9.3.dev2438__py3-none-any.whl → 0.9.3.dev2456__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -78,15 +78,14 @@ def test_col(
78
78
  logging.info(f"\t- Starting with format '{label}'")
79
79
  # improvement lead : put the longest tests behind and make them only if previous tests not satisfactory
80
80
  # => the following needs to change, "apply" means all columns are tested for one type at once
81
- return_table.loc[label] = table.apply(
82
- lambda serie: test_col_val(
83
- serie,
81
+ for col in table.columns:
82
+ return_table.loc[label, col] = test_col_val(
83
+ table[col],
84
84
  format,
85
85
  skipna=skipna,
86
86
  limited_output=limited_output,
87
87
  verbose=verbose,
88
88
  )
89
- )
90
89
  if verbose:
91
90
  display_logs_depending_process_time(
92
91
  f'\t> Done with type "{label}" in {round(time() - start_type, 3)}s ({idx + 1}/{len(formats)})',
@@ -152,7 +151,7 @@ def test_col_chunks(
152
151
  remaining_tests_per_col = build_remaining_tests_per_col(return_table)
153
152
 
154
153
  # hashing rows to get nb_duplicates
155
- row_hashes_count = table.apply(lambda row: hash(tuple(row)), axis=1).value_counts()
154
+ row_hashes_count = pd.util.hash_pandas_object(table, index=False).value_counts()
156
155
  # getting values for profile to read the file only once
157
156
  col_values = {col: table[col].value_counts(dropna=False) for col in table.columns}
158
157
 
@@ -190,7 +189,7 @@ def test_col_chunks(
190
189
  batch = pd.concat(batch, ignore_index=True)
191
190
  analysis["total_lines"] += len(batch)
192
191
  row_hashes_count = row_hashes_count.add(
193
- batch.apply(lambda row: hash(tuple(row)), axis=1).value_counts(),
192
+ pd.util.hash_pandas_object(batch, index=False).value_counts(),
194
193
  fill_value=0,
195
194
  )
196
195
  for col in batch.columns:
csv_detective/validate.py CHANGED
@@ -82,7 +82,7 @@ def validate(
82
82
  )
83
83
 
84
84
  # hashing rows to get nb_duplicates
85
- row_hashes_count = first_chunk.apply(lambda row: hash(tuple(row)), axis=1).value_counts()
85
+ row_hashes_count = pd.util.hash_pandas_object(first_chunk, index=False).value_counts()
86
86
  # getting values for profile to read the file only once
87
87
  col_values = {col: first_chunk[col].value_counts(dropna=False) for col in first_chunk.columns}
88
88
  analysis["total_lines"] = 0
@@ -91,7 +91,7 @@ def validate(
91
91
  logging.info(f"> Testing chunk number {idx}")
92
92
  analysis["total_lines"] += len(chunk)
93
93
  row_hashes_count = row_hashes_count.add(
94
- chunk.apply(lambda row: hash(tuple(row)), axis=1).value_counts(),
94
+ pd.util.hash_pandas_object(chunk, index=False).value_counts(),
95
95
  fill_value=0,
96
96
  )
97
97
  for col in chunk.columns:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: csv-detective
3
- Version: 0.9.3.dev2438
3
+ Version: 0.9.3.dev2456
4
4
  Summary: Detect tabular files column content
5
5
  Keywords: CSV,data processing,encoding,guess,parser,tabular
6
6
  Author: data.gouv.fr
@@ -78,15 +78,15 @@ csv_detective/output/profile.py,sha256=VUQp0VJ22dfY4R5TybTpuQW_TOX_rLEp98cOzu-Jf
78
78
  csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
79
79
  csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
80
80
  csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
- csv_detective/parsing/columns.py,sha256=nihmB7Cv5BUNPh2EhMRPLdAxvcjrGZF-QFbJDd6rR2M,9246
81
+ csv_detective/parsing/columns.py,sha256=WwivsR4r-SAkugzVSmYeUkgbNXz3CWXnEl2ZmoX_tcs,9238
82
82
  csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
83
83
  csv_detective/parsing/csv.py,sha256=0T0gpaXzwJo-sq41IoLQD704GiMUYeDVVASVbat-zWg,1726
84
84
  csv_detective/parsing/excel.py,sha256=oAVTuoDccJc4-kVjHXiIPLQx3lq3aZRRZQxkG1c06JQ,6992
85
85
  csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0,4317
86
86
  csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
87
87
  csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
88
- csv_detective/validate.py,sha256=XldlbGkUlPaIh0y4z9iaWlmmahwCrD1900s5Cxlq5wI,5430
89
- csv_detective-0.9.3.dev2438.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
90
- csv_detective-0.9.3.dev2438.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
91
- csv_detective-0.9.3.dev2438.dist-info/METADATA,sha256=FHlHT6UPBByKisTmFR5TImWGG8rXSKiTP_lc7-yHRDU,11063
92
- csv_detective-0.9.3.dev2438.dist-info/RECORD,,
88
+ csv_detective/validate.py,sha256=CjZXhhDP-n6wGgEqbwrGRqebU8L5bidwnvQp-TbnvFA,5424
89
+ csv_detective-0.9.3.dev2456.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
90
+ csv_detective-0.9.3.dev2456.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
91
+ csv_detective-0.9.3.dev2456.dist-info/METADATA,sha256=164BvQ11YE3f07bbQNEK_0DrdLyO5SPXWSNt5lwYIYk,11063
92
+ csv_detective-0.9.3.dev2456.dist-info/RECORD,,