PyPI - csv-detective - Versions diffs - 0.9.3.dev2438__py3-none-any.whl → 0.9.3.dev2456__py3-none-any.whl - Mend

csv-detective 0.9.3.dev2438py3-none-any.whl → 0.9.3.dev2456py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

csv_detective/parsing/columns.py CHANGED Viewed

@@ -78,15 +78,14 @@ def test_col(
             logging.info(f"\t- Starting with format '{label}'")
         # improvement lead : put the longest tests behind and make them only if previous tests not satisfactory
         # => the following needs to change, "apply" means all columns are tested for one type at once
-        return_table.loc[label] = table.apply(
-            lambda serie: test_col_val(
-                serie,
+        for col in table.columns:
+            return_table.loc[label, col] = test_col_val(
+                table[col],
                 format,
                 skipna=skipna,
                 limited_output=limited_output,
                 verbose=verbose,
             )
-        )
         if verbose:
             display_logs_depending_process_time(
                 f'\t> Done with type "{label}" in {round(time() - start_type, 3)}s ({idx + 1}/{len(formats)})',
@@ -152,7 +151,7 @@ def test_col_chunks(
     remaining_tests_per_col = build_remaining_tests_per_col(return_table)
     # hashing rows to get nb_duplicates
-    row_hashes_count = table.apply(lambda row: hash(tuple(row)), axis=1).value_counts()
+    row_hashes_count = pd.util.hash_pandas_object(table, index=False).value_counts()
     # getting values for profile to read the file only once
     col_values = {col: table[col].value_counts(dropna=False) for col in table.columns}
@@ -190,7 +189,7 @@ def test_col_chunks(
         batch = pd.concat(batch, ignore_index=True)
         analysis["total_lines"] += len(batch)
         row_hashes_count = row_hashes_count.add(
-            batch.apply(lambda row: hash(tuple(row)), axis=1).value_counts(),
+            pd.util.hash_pandas_object(batch, index=False).value_counts(),
             fill_value=0,
         )
         for col in batch.columns:

csv_detective/validate.py CHANGED Viewed

@@ -82,7 +82,7 @@ def validate(
         )
     # hashing rows to get nb_duplicates
-    row_hashes_count = first_chunk.apply(lambda row: hash(tuple(row)), axis=1).value_counts()
+    row_hashes_count = pd.util.hash_pandas_object(first_chunk, index=False).value_counts()
     # getting values for profile to read the file only once
     col_values = {col: first_chunk[col].value_counts(dropna=False) for col in first_chunk.columns}
     analysis["total_lines"] = 0
@@ -91,7 +91,7 @@ def validate(
             logging.info(f"> Testing chunk number {idx}")
         analysis["total_lines"] += len(chunk)
         row_hashes_count = row_hashes_count.add(
-            chunk.apply(lambda row: hash(tuple(row)), axis=1).value_counts(),
+            pd.util.hash_pandas_object(chunk, index=False).value_counts(),
             fill_value=0,
         )
         for col in chunk.columns:

{csv_detective-0.9.3.dev2438.dist-info → csv_detective-0.9.3.dev2456.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: csv-detective
-Version: 0.9.3.dev2438
+Version: 0.9.3.dev2456
 Summary: Detect tabular files column content
 Keywords: CSV,data processing,encoding,guess,parser,tabular
 Author: data.gouv.fr

{csv_detective-0.9.3.dev2438.dist-info → csv_detective-0.9.3.dev2456.dist-info}/RECORD RENAMED Viewed

@@ -78,15 +78,15 @@ csv_detective/output/profile.py,sha256=VUQp0VJ22dfY4R5TybTpuQW_TOX_rLEp98cOzu-Jf
 csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
 csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
 csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/parsing/columns.py,sha256=nihmB7Cv5BUNPh2EhMRPLdAxvcjrGZF-QFbJDd6rR2M,9246
+csv_detective/parsing/columns.py,sha256=WwivsR4r-SAkugzVSmYeUkgbNXz3CWXnEl2ZmoX_tcs,9238
 csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
 csv_detective/parsing/csv.py,sha256=0T0gpaXzwJo-sq41IoLQD704GiMUYeDVVASVbat-zWg,1726
 csv_detective/parsing/excel.py,sha256=oAVTuoDccJc4-kVjHXiIPLQx3lq3aZRRZQxkG1c06JQ,6992
 csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0,4317
 csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
 csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
-csv_detective/validate.py,sha256=XldlbGkUlPaIh0y4z9iaWlmmahwCrD1900s5Cxlq5wI,5430
-csv_detective-0.9.3.dev2438.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
-csv_detective-0.9.3.dev2438.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
-csv_detective-0.9.3.dev2438.dist-info/METADATA,sha256=FHlHT6UPBByKisTmFR5TImWGG8rXSKiTP_lc7-yHRDU,11063
-csv_detective-0.9.3.dev2438.dist-info/RECORD,,
+csv_detective/validate.py,sha256=CjZXhhDP-n6wGgEqbwrGRqebU8L5bidwnvQp-TbnvFA,5424
+csv_detective-0.9.3.dev2456.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
+csv_detective-0.9.3.dev2456.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
+csv_detective-0.9.3.dev2456.dist-info/METADATA,sha256=164BvQ11YE3f07bbQNEK_0DrdLyO5SPXWSNt5lwYIYk,11063
+csv_detective-0.9.3.dev2456.dist-info/RECORD,,

{csv_detective-0.9.3.dev2438.dist-info → csv_detective-0.9.3.dev2456.dist-info}/WHEEL RENAMED Viewed

File without changes

{csv_detective-0.9.3.dev2438.dist-info → csv_detective-0.9.3.dev2456.dist-info}/entry_points.txt RENAMED Viewed

File without changes

csv-detective 0.9.3.dev2438__py3-none-any.whl → 0.9.3.dev2456__py3-none-any.whl

csv-detective 0.9.3.dev2438py3-none-any.whl → 0.9.3.dev2456py3-none-any.whl