csv-detective 0.9.1.dev1830__py3-none-any.whl → 0.9.1.dev1847__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,9 +32,7 @@ def parse_csv(
32
32
  if "ISO-8859" in encoding:
33
33
  encoding = "ISO-8859-1"
34
34
  try:
35
- table = pd.read_csv(
36
- the_file, sep=sep, dtype="unicode", encoding=encoding, skiprows=skiprows
37
- )
35
+ table = pd.read_csv(the_file, sep=sep, dtype=str, encoding=encoding, skiprows=skiprows)
38
36
  total_lines = len(table)
39
37
  nb_duplicates = len(table.loc[table.duplicated()])
40
38
  if num_rows > 0:
@@ -101,7 +101,7 @@ def parse_excel(
101
101
  file_path,
102
102
  engine="odf",
103
103
  sheet_name=None,
104
- dtype="unicode",
104
+ dtype=str,
105
105
  )
106
106
  sizes = {sheet_name: table.size for sheet_name, table in tables.items()}
107
107
  sheet_name = max(sizes, key=sizes.get)
@@ -121,7 +121,7 @@ def parse_excel(
121
121
  file_path,
122
122
  engine="odf",
123
123
  sheet_name=sheet_name,
124
- dtype="unicode",
124
+ dtype=str,
125
125
  )
126
126
  table, header_row_idx = remove_empty_first_rows(table)
127
127
  total_lines = len(table)
@@ -152,7 +152,7 @@ def parse_excel(
152
152
  file_path,
153
153
  engine=engine,
154
154
  sheet_name=sheet_name,
155
- dtype="unicode",
155
+ dtype=str,
156
156
  )
157
157
  table, header_row_idx = remove_empty_first_rows(table)
158
158
  total_lines = len(table)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: csv-detective
3
- Version: 0.9.1.dev1830
3
+ Version: 0.9.1.dev1847
4
4
  Summary: Detect tabular files column content
5
5
  Author-email: Etalab <opendatateam@data.gouv.fr>
6
6
  License: MIT
@@ -146,14 +146,14 @@ csv_detective/output/utils.py,sha256=tbji3dEH7bDc6gLCeVSVquqU3xaHA1CQOMuaJT4Hub8
146
146
  csv_detective/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
147
147
  csv_detective/parsing/columns.py,sha256=fbvQMu12gAmz4TnNCL7pLnMFB-mWN_O-zEoj8jEGj0A,5696
148
148
  csv_detective/parsing/compression.py,sha256=Fnw5tj-PpBNI8NYsWj5gD-DUoWcVLnsVpiKm9MpxmIA,350
149
- csv_detective/parsing/csv.py,sha256=qZFLOT3YCPoHF0svfVfQBnS8eHtucjDZ7dFITAPgLhc,1626
150
- csv_detective/parsing/excel.py,sha256=ULUDw76z6hs1Xm2yL9KBM0EOvIsfBLkxwqTZfDEx6aE,7045
149
+ csv_detective/parsing/csv.py,sha256=fJkjKvyk7InkNnYKtmivyi48mmcwvrha7gvZ5J4-86A,1588
150
+ csv_detective/parsing/excel.py,sha256=sKD5PRN1TlzPPOKFnZ3VRb0r1yIjPLlpxVWmZQeLYFk,7027
151
151
  csv_detective/parsing/load.py,sha256=C3M8nvgWenOb8aDFi5dpDGCoAw9EBqr4EB63zbz2M14,3699
152
152
  csv_detective/parsing/text.py,sha256=uz8wfmNTQnOd_4fjrIZ_5rxmFmgrg343hJh2szB73Hc,1770
153
- csv_detective-0.9.1.dev1830.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
153
+ csv_detective-0.9.1.dev1847.dist-info/licenses/LICENSE,sha256=A1dQrzxyxRHRih02KwibWj1khQyF7GeA6SqdOU87Gk4,1088
154
154
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
155
  tests/test_example.py,sha256=uTWswvUzBWEADGXZmMAdZvKhKvIjvT5zWOVVABgCDN4,1987
156
- tests/test_fields.py,sha256=VhhQny2Jqy_Z6SplpnN_qAXqBRQCuA42IgSNu37R2cc,12560
156
+ tests/test_fields.py,sha256=5901OxKDReGMPQm3ZJ36oDjtJ8H3El5jPxf1YNu5wVg,12542
157
157
  tests/test_file.py,sha256=YuVbSfeo_ASPiLT8CyxXqJENcDpj4wAFXzLwu_GzsOA,8437
158
158
  tests/test_labels.py,sha256=Y0XlOpztCyV65pk7iAS_nMMfdysoBujlBmz10vHul9A,469
159
159
  tests/test_structure.py,sha256=GRDYKy0UcdqlN4qglzsRC0puFj5cb-SVvONjvcPvtAA,1400
@@ -161,8 +161,8 @@ tests/test_validation.py,sha256=ie-Xf0vk6-M6GQq-x7kY5yse1EmXfxQkbaV7fR3fvYo,3308
161
161
  venv/bin/activate_this.py,sha256=NRy3waFmwW1pOaNUp33wNN0vD1Kzkd-zXX-Sgl4EiVI,1286
162
162
  venv/bin/jp.py,sha256=7z7dvRg0M7HzpZG4ssQID7nScjvQx7bcYTxJWDOrS6E,1717
163
163
  venv/bin/runxlrd.py,sha256=YlZMuycM_V_hzNt2yt3FyXPuwouMCmMhvj1oZaBeeuw,16092
164
- csv_detective-0.9.1.dev1830.dist-info/METADATA,sha256=eYNe6QPycRGL5VnIyx_kj0e79azipmi7qu5jh766OD0,9767
165
- csv_detective-0.9.1.dev1830.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
166
- csv_detective-0.9.1.dev1830.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
167
- csv_detective-0.9.1.dev1830.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
168
- csv_detective-0.9.1.dev1830.dist-info/RECORD,,
164
+ csv_detective-0.9.1.dev1847.dist-info/METADATA,sha256=4GPrJUwsDAkxwVV9fnFv4pVHmelYX1C1H4QCh_zG8wc,9767
165
+ csv_detective-0.9.1.dev1847.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
166
+ csv_detective-0.9.1.dev1847.dist-info/entry_points.txt,sha256=JjweTReFqKJmuvkegzlew2j3D5pZzfxvbEGOtGVGmaY,56
167
+ csv_detective-0.9.1.dev1847.dist-info/top_level.txt,sha256=cYKb4Ok3XgYA7rMDOYtxysjSJp_iUA9lJjynhVzue8g,30
168
+ csv_detective-0.9.1.dev1847.dist-info/RECORD,,
tests/test_fields.py CHANGED
@@ -99,7 +99,7 @@ def test_detetect_categorical_variable():
99
99
  "cat2": categorical_col2,
100
100
  "not_cat": not_categorical_col,
101
101
  }
102
- df = pd.DataFrame(df_dict, dtype="unicode")
102
+ df = pd.DataFrame(df_dict, dtype=str)
103
103
 
104
104
  res, _ = detect_categorical_variable(df)
105
105
  assert len(res.values) and all(k in res.values for k in ["cat", "cat2"])
@@ -114,8 +114,8 @@ def test_detect_continuous_variable():
114
114
  df_dict = {"cont": continuous_col, "not_cont": not_continuous_col}
115
115
  df_dict_2 = {"cont": continuous_col_2, "not_cont": not_continuous_col}
116
116
 
117
- df = pd.DataFrame(df_dict, dtype="unicode")
118
- df2 = pd.DataFrame(df_dict_2, dtype="unicode")
117
+ df = pd.DataFrame(df_dict, dtype=str)
118
+ df2 = pd.DataFrame(df_dict_2, dtype=str)
119
119
 
120
120
  res = detect_continuous_variable(df)
121
121
  res2 = detect_continuous_variable(df2, continuous_th=0.65)