csv-detective 0.9.3.dev2438__py3-none-any.whl → 0.9.3.dev2456__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_detective/parsing/columns.py +5 -6
- csv_detective/validate.py +2 -2
- {csv_detective-0.9.3.dev2438.dist-info → csv_detective-0.9.3.dev2456.dist-info}/METADATA +1 -1
- {csv_detective-0.9.3.dev2438.dist-info → csv_detective-0.9.3.dev2456.dist-info}/RECORD +6 -6
- {csv_detective-0.9.3.dev2438.dist-info → csv_detective-0.9.3.dev2456.dist-info}/WHEEL +0 -0
- {csv_detective-0.9.3.dev2438.dist-info → csv_detective-0.9.3.dev2456.dist-info}/entry_points.txt +0 -0
csv_detective/parsing/columns.py
CHANGED
|
@@ -78,15 +78,14 @@ def test_col(
|
|
|
78
78
|
logging.info(f"\t- Starting with format '{label}'")
|
|
79
79
|
# improvement lead : put the longest tests behind and make them only if previous tests not satisfactory
|
|
80
80
|
# => the following needs to change, "apply" means all columns are tested for one type at once
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
81
|
+
for col in table.columns:
|
|
82
|
+
return_table.loc[label, col] = test_col_val(
|
|
83
|
+
table[col],
|
|
84
84
|
format,
|
|
85
85
|
skipna=skipna,
|
|
86
86
|
limited_output=limited_output,
|
|
87
87
|
verbose=verbose,
|
|
88
88
|
)
|
|
89
|
-
)
|
|
90
89
|
if verbose:
|
|
91
90
|
display_logs_depending_process_time(
|
|
92
91
|
f'\t> Done with type "{label}" in {round(time() - start_type, 3)}s ({idx + 1}/{len(formats)})',
|
|
@@ -152,7 +151,7 @@ def test_col_chunks(
|
|
|
152
151
|
remaining_tests_per_col = build_remaining_tests_per_col(return_table)
|
|
153
152
|
|
|
154
153
|
# hashing rows to get nb_duplicates
|
|
155
|
-
row_hashes_count =
|
|
154
|
+
row_hashes_count = pd.util.hash_pandas_object(table, index=False).value_counts()
|
|
156
155
|
# getting values for profile to read the file only once
|
|
157
156
|
col_values = {col: table[col].value_counts(dropna=False) for col in table.columns}
|
|
158
157
|
|
|
@@ -190,7 +189,7 @@ def test_col_chunks(
|
|
|
190
189
|
batch = pd.concat(batch, ignore_index=True)
|
|
191
190
|
analysis["total_lines"] += len(batch)
|
|
192
191
|
row_hashes_count = row_hashes_count.add(
|
|
193
|
-
|
|
192
|
+
pd.util.hash_pandas_object(batch, index=False).value_counts(),
|
|
194
193
|
fill_value=0,
|
|
195
194
|
)
|
|
196
195
|
for col in batch.columns:
|
csv_detective/validate.py
CHANGED
|
@@ -82,7 +82,7 @@ def validate(
|
|
|
82
82
|
)
|
|
83
83
|
|
|
84
84
|
# hashing rows to get nb_duplicates
|
|
85
|
-
row_hashes_count =
|
|
85
|
+
row_hashes_count = pd.util.hash_pandas_object(first_chunk, index=False).value_counts()
|
|
86
86
|
# getting values for profile to read the file only once
|
|
87
87
|
col_values = {col: first_chunk[col].value_counts(dropna=False) for col in first_chunk.columns}
|
|
88
88
|
analysis["total_lines"] = 0
|
|
@@ -91,7 +91,7 @@ def validate(
|
|
|
91
91
|
logging.info(f"> Testing chunk number {idx}")
|
|
92
92
|
analysis["total_lines"] += len(chunk)
|
|
93
93
|
row_hashes_count = row_hashes_count.add(
|
|
94
|
-
|
|
94
|
+
pd.util.hash_pandas_object(chunk, index=False).value_counts(),
|
|
95
95
|
fill_value=0,
|
|
96
96
|
)
|
|
97
97
|
for col in chunk.columns:
|
|
@@ -78,15 +78,15 @@ csv_detective/output/profile.py,sha256=VUQp0VJ22dfY4R5TybTpuQW_TOX_rLEp98cOzu-Jf
|
|
|
78
78
|
csv_detective/output/schema.py,sha256=XoKljXPXP00DfqPCiz1ydwTHYGAFsvNxnaPCNBuuBIo,10443
|
|
79
79
|
csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8,3297
|
|
80
80
|
csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
|
-
csv_detective/parsing/columns.py,sha256=
|
|
81
|
+
csv_detective/parsing/columns.py,sha256=WwivsR4r-SAkugzVSmYeUkgbNXz3CWXnEl2ZmoX_tcs,9238
|
|
82
82
|
csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
|
|
83
83
|
csv_detective/parsing/csv.py,sha256=0T0gpaXzwJo-sq41IoLQD704GiMUYeDVVASVbat-zWg,1726
|
|
84
84
|
csv_detective/parsing/excel.py,sha256=oAVTuoDccJc4-kVjHXiIPLQx3lq3aZRRZQxkG1c06JQ,6992
|
|
85
85
|
csv_detective/parsing/load.py,sha256=f-8aKiNpy_47qg4Lq-UZUR4NNrbJ_-KEGvcUQZ8cmb0,4317
|
|
86
86
|
csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
|
|
87
87
|
csv_detective/utils.py,sha256=RJ_zFOJ1DRY8HtDrKPiCdNk5gU6-KwOrOKOyfSkBZZY,1118
|
|
88
|
-
csv_detective/validate.py,sha256=
|
|
89
|
-
csv_detective-0.9.3.
|
|
90
|
-
csv_detective-0.9.3.
|
|
91
|
-
csv_detective-0.9.3.
|
|
92
|
-
csv_detective-0.9.3.
|
|
88
|
+
csv_detective/validate.py,sha256=CjZXhhDP-n6wGgEqbwrGRqebU8L5bidwnvQp-TbnvFA,5424
|
|
89
|
+
csv_detective-0.9.3.dev2456.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
|
|
90
|
+
csv_detective-0.9.3.dev2456.dist-info/entry_points.txt,sha256=1J86TQNCanjsLMboAufdEUla03qEQaC9QmVGYgt2FCQ,57
|
|
91
|
+
csv_detective-0.9.3.dev2456.dist-info/METADATA,sha256=164BvQ11YE3f07bbQNEK_0DrdLyO5SPXWSNt5lwYIYk,11063
|
|
92
|
+
csv_detective-0.9.3.dev2456.dist-info/RECORD,,
|
|
File without changes
|
{csv_detective-0.9.3.dev2438.dist-info → csv_detective-0.9.3.dev2456.dist-info}/entry_points.txt
RENAMED
|
File without changes
|