csv-detective 0.9.3.dev2447__py3-none-any.whl → 0.9.3.dev2456__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/parsing/columns.py +2 -2
- csv_detective/validate.py +2 -2
- {csv_detective-0.9.3.dev2447.dist-info → csv_detective-0.9.3.dev2456.dist-info}/METADATA +1 -1
- {csv_detective-0.9.3.dev2447.dist-info → csv_detective-0.9.3.dev2456.dist-info}/RECORD +6 -6
- {csv_detective-0.9.3.dev2447.dist-info → csv_detective-0.9.3.dev2456.dist-info}/WHEEL +0 -0
- {csv_detective-0.9.3.dev2447.dist-info → csv_detective-0.9.3.dev2456.dist-info}/entry_points.txt +0 -0
csv_detective/parsing/columns.py
CHANGED
|
@@ -151,7 +151,7 @@ def test_col_chunks(
|
|
|
151
151
|
remaining_tests_per_col = build_remaining_tests_per_col(return_table)
|
|
152
152
|
|
|
153
153
|
# hashing rows to get nb_duplicates
|
|
154
|
-
row_hashes_count =
|
|
154
|
+
row_hashes_count = pd.util.hash_pandas_object(table, index=False).value_counts()
|
|
155
155
|
# getting values for profile to read the file only once
|
|
156
156
|
col_values = {col: table[col].value_counts(dropna=False) for col in table.columns}
|
|
157
157
|
|
|
@@ -189,7 +189,7 @@ def test_col_chunks(
|
|
|
189
189
|
batch = pd.concat(batch, ignore_index=True)
|
|
190
190
|
analysis["total_lines"] += len(batch)
|
|
191
191
|
row_hashes_count = row_hashes_count.add(
|
|
192
|
-
|
|
192
|
+
pd.util.hash_pandas_object(batch, index=False).value_counts(),
|
|
193
193
|
fill_value=0,
|
|
194
194
|
)
|
|
195
195
|
for col in batch.columns:
|
csv_detective/validate.py
CHANGED
|
@@ -82,7 +82,7 @@ def validate(
|
|
|
82
82
|
)
|
|
83
83
|
|
|
84
84
|
# hashing rows to get nb_duplicates
|
|
85
|
-
row_hashes_count =
|
|
85
|
+
row_hashes_count = pd.util.hash_pandas_object(first_chunk, index=False).value_counts()
|
|
86
86
|
# getting values for profile to read the file only once
|
|
87
87
|
col_values = {col: first_chunk[col].value_counts(dropna=False) for col in first_chunk.columns}
|
|
88
88
|
analysis["total_lines"] = 0
|
|
@@ -91,7 +91,7 @@ def validate(
|
|
|
91
91
|
logging.info(f"> Testing chunk number {idx}")
|
|
92
92
|
analysis["total_lines"] += len(chunk)
|
|
93
93
|
row_hashes_count = row_hashes_count.add(
|
|
94
|
-
|
|
94
|
+
pd.util.hash_pandas_object(chunk, index=False).value_counts(),
|
|
95
95
|
fill_value=0,
|
|
96
96
|
)
|
|
97
97
|
for col in chunk.columns:
|
|
@@ -78,15 +78,15 @@ csv_detective/output/profile.py,sha256=VUQp0VJ22dfY4R5TybTpuQW_TOX_rLEp98cOzu-Jf
|
|
|
78
78
|
csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
|
|
79
79
|
csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
|
|
80
80
|
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
|
-
csv_detective/parsing/columns.py,sha256=
|
|
81
|
+
csv_detective/parsing/columns.py,sha256=WwivsR4r-SAkugzVSmYeUkgbNXz3CWXnEl2ZmoX_tcs,9238
|
|
82
82
|
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
83
83
|
csv_detective/parsing/csv.py,sha256=0T0gpaXzwJo-sq41IoLQD704GiMUYeDVVASVbat-zWg,1726
|
|
84
84
|
csv_detective/parsing/excel.py,sha256=oAVTuoDccJc4-kVjHXiIPLQx3lq3aZRRZQxkG1c06JQ,6992
|
|
85
85
|
csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0,4317
|
|
86
86
|
csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
|
|
87
87
|
csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
|
|
88
|
-
csv_detective/validate.py,sha256=
|
|
89
|
-
csv_detective-0.9.3.
|
|
90
|
-
csv_detective-0.9.3.
|
|
91
|
-
csv_detective-0.9.3.
|
|
92
|
-
csv_detective-0.9.3.
|
|
88
|
+
csv_detective/validate.py,sha256=CjZXhhDP-n6wGgEqbwrGRqebU8L5bidwnvQp-TbnvFA,5424
|
|
89
|
+
csv_detective-0.9.3.dev2456.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
|
|
90
|
+
csv_detective-0.9.3.dev2456.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
|
|
91
|
+
csv_detective-0.9.3.dev2456.dist-info/METADATA,sha256=164BvQ11YE3f07bbQNEK_0DrdLyO5SPXWSNt5lwYIYk,11063
|
|
92
|
+
csv_detective-0.9.3.dev2456.dist-info/RECORD,,
|
|
File without changes
|
{csv_detective-0.9.3.dev2447.dist-info → csv_detective-0.9.3.dev2456.dist-info}/entry_points.txt
RENAMED
|
File without changes
|