PyPI - csv-detective - Versions diffs - 0.9.3.dev2447__py3-none-any.whl → 0.9.3.dev2473__py3-none-any.whl - Mend

csv-detective 0.9.3.dev2447py3-none-any.whl → 0.9.3.dev2473py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

csv_detective/parsing/columns.py CHANGED Viewed

@@ -151,7 +151,7 @@ def test_col_chunks(
     remaining_tests_per_col = build_remaining_tests_per_col(return_table)
     # hashing rows to get nb_duplicates
-    row_hashes_count = table.apply(lambda row: hash(tuple(row)), axis=1).value_counts()
+    row_hashes_count = pd.util.hash_pandas_object(table, index=False).value_counts()
     # getting values for profile to read the file only once
     col_values = {col: table[col].value_counts(dropna=False) for col in table.columns}
@@ -189,7 +189,7 @@ def test_col_chunks(
         batch = pd.concat(batch, ignore_index=True)
         analysis["total_lines"] += len(batch)
         row_hashes_count = row_hashes_count.add(
-            batch.apply(lambda row: hash(tuple(row)), axis=1).value_counts(),
+            pd.util.hash_pandas_object(batch, index=False).value_counts(),
             fill_value=0,
         )
         for col in batch.columns:

csv_detective/parsing/csv.py CHANGED Viewed

@@ -42,7 +42,7 @@ def parse_csv(
                 logging.warning(f"File is too long, analysing in chunks of {CHUNK_SIZE} rows")
             total_lines, nb_duplicates = None, None
         else:
-            nb_duplicates = len(table.loc[table.duplicated()])
+            nb_duplicates = table.duplicated().sum()
         if num_rows > 0:
             num_rows = min(num_rows, total_lines or len(table))
             table = table.sample(num_rows, random_state=random_state)

csv_detective/parsing/excel.py CHANGED Viewed

@@ -124,7 +124,7 @@ def parse_excel(
             )
         table, header_row_idx = remove_empty_first_rows(table)
         total_lines = len(table)
-        nb_duplicates = len(table.loc[table.duplicated()])
+        nb_duplicates = table.duplicated().sum()
         if num_rows > 0:
             num_rows = min(num_rows - 1, total_lines)
             table = table.sample(num_rows, random_state=random_state)
@@ -155,7 +155,7 @@ def parse_excel(
     )
     table, header_row_idx = remove_empty_first_rows(table)
     total_lines = len(table)
-    nb_duplicates = len(table.loc[table.duplicated()])
+    nb_duplicates = table.duplicated().sum()
     if num_rows > 0:
         num_rows = min(num_rows - 1, total_lines)
         table = table.sample(num_rows, random_state=random_state)

csv_detective/validate.py CHANGED Viewed

@@ -82,7 +82,7 @@ def validate(
         )
     # hashing rows to get nb_duplicates
-    row_hashes_count = first_chunk.apply(lambda row: hash(tuple(row)), axis=1).value_counts()
+    row_hashes_count = pd.util.hash_pandas_object(first_chunk, index=False).value_counts()
     # getting values for profile to read the file only once
     col_values = {col: first_chunk[col].value_counts(dropna=False) for col in first_chunk.columns}
     analysis["total_lines"] = 0
@@ -91,7 +91,7 @@ def validate(
             logging.info(f"> Testing chunk number {idx}")
         analysis["total_lines"] += len(chunk)
         row_hashes_count = row_hashes_count.add(
-            chunk.apply(lambda row: hash(tuple(row)), axis=1).value_counts(),
+            pd.util.hash_pandas_object(chunk, index=False).value_counts(),
             fill_value=0,
         )
         for col in chunk.columns:

{csv_detective-0.9.3.dev2447.dist-info → csv_detective-0.9.3.dev2473.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: csv-detective
-Version: 0.9.3.dev2447
+Version: 0.9.3.dev2473
 Summary: Detect tabular files column content
 Keywords: CSV,data processing,encoding,guess,parser,tabular
 Author: data.gouv.fr

{csv_detective-0.9.3.dev2447.dist-info → csv_detective-0.9.3.dev2473.dist-info}/RECORD RENAMED Viewed

@@ -78,15 +78,15 @@ csv_detective/output/profile.py,sha256=VUQp0VJ22dfY4R5TybTpuQW_TOX_rLEp98cOzu-Jf
 csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
 csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
 csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-csv_detective/parsing/columns.py,sha256=Eo8GUec5ykTuDTR15OC0S_hiMkcpuZnTPCPomW80aSQ,9244
+csv_detective/parsing/columns.py,sha256=WwivsR4r-SAkugzVSmYeUkgbNXz3CWXnEl2ZmoX_tcs,9238
 csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
-csv_detective/parsing/csv.py,sha256=0T0gpaXzwJo-sq41IoLQD704GiMUYeDVVASVbat-zWg,1726
-csv_detective/parsing/excel.py,sha256=oAVTuoDccJc4-kVjHXiIPLQx3lq3aZRRZQxkG1c06JQ,6992
+csv_detective/parsing/csv.py,sha256=5rw6gXZFQC1T4NT9CnW0AumidrYOkF8kjrfWGmk949I,1716
+csv_detective/parsing/excel.py,sha256=tb65I78tdYlZci_tzvvQt8U6bZSYKjeVdn2CEvsET1o,6972
 csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0,4317
 csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
 csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
-csv_detective/validate.py,sha256=XldlbGkUlPaIh0y4z9iaWlmmahwCrD1900s5Cxlq5wI,5430
-csv_detective-0.9.3.dev2447.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
-csv_detective-0.9.3.dev2447.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
-csv_detective-0.9.3.dev2447.dist-info/METADATA,sha256=Pl4Yw1e2r6GcmmTp405LNmslBeHfsYAHNpc1kvGYz14,11063
-csv_detective-0.9.3.dev2447.dist-info/RECORD,,
+csv_detective/validate.py,sha256=CjZXhhDP-n6wGgEqbwrGRqebU8L5bidwnvQp-TbnvFA,5424
+csv_detective-0.9.3.dev2473.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
+csv_detective-0.9.3.dev2473.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
+csv_detective-0.9.3.dev2473.dist-info/METADATA,sha256=qf-8rc4HEOh9ZdHD07eoWRcvFxcMrUYprp0hU73rzJ0,11063
+csv_detective-0.9.3.dev2473.dist-info/RECORD,,

{csv_detective-0.9.3.dev2447.dist-info → csv_detective-0.9.3.dev2473.dist-info}/WHEEL RENAMED Viewed

File without changes

{csv_detective-0.9.3.dev2447.dist-info → csv_detective-0.9.3.dev2473.dist-info}/entry_points.txt RENAMED Viewed

File without changes

csv-detective 0.9.3.dev2447__py3-none-any.whl → 0.9.3.dev2473__py3-none-any.whl

csv-detective 0.9.3.dev2447py3-none-any.whl → 0.9.3.dev2473py3-none-any.whl