deepcsv 0.6.3__tar.gz → 0.6.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. deepcsv-0.6.4/CHANGELOG.md +16 -0
  2. {deepcsv-0.6.3/deepcsv.egg-info → deepcsv-0.6.4}/PKG-INFO +12 -10
  3. {deepcsv-0.6.3 → deepcsv-0.6.4}/README.md +7 -0
  4. deepcsv-0.6.4/deepcsv/__init__.py +31 -0
  5. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv/deepcsv.py +18 -5
  6. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv/utils.py +64 -61
  7. {deepcsv-0.6.3 → deepcsv-0.6.4/deepcsv.egg-info}/PKG-INFO +12 -10
  8. {deepcsv-0.6.3 → deepcsv-0.6.4}/setup.py +1 -1
  9. deepcsv-0.6.3/CHANGELOG.md +0 -21
  10. deepcsv-0.6.3/deepcsv/__init__.py +0 -17
  11. {deepcsv-0.6.3 → deepcsv-0.6.4}/LICENSE +0 -0
  12. {deepcsv-0.6.3 → deepcsv-0.6.4}/MANIFEST.in +0 -0
  13. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/SOURCES.txt +0 -0
  14. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dependency_links.txt +0 -0
  15. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.5.0-py3-none-any.whl +0 -0
  16. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.5.0.tar.gz +0 -0
  17. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.5.0b1-py3-none-any.whl +0 -0
  18. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.5.0b1.tar.gz +0 -0
  19. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.6.0-py3-none-any.whl +0 -0
  20. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.6.0.tar.gz +0 -0
  21. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.6.1-py3-none-any.whl +0 -0
  22. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.6.1.tar.gz +0 -0
  23. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.6.2-py3-none-any.whl +0 -0
  24. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.6.2.tar.gz +0 -0
  25. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.6.2b1-py3-none-any.whl +0 -0
  26. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.6.2b1.tar.gz +0 -0
  27. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.6.2b2-py3-none-any.whl +0 -0
  28. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.6.2b2.tar.gz +0 -0
  29. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.6.3b1-py3-none-any.whl +0 -0
  30. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/dist/deepcsv-0.6.3b1.tar.gz +0 -0
  31. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/requires.txt +0 -0
  32. {deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv.egg-info/top_level.txt +0 -0
  33. {deepcsv-0.6.3 → deepcsv-0.6.4}/setup.cfg +0 -0
@@ -0,0 +1,16 @@
1
+ # Changelog
2
+
3
+ ---
4
+
5
+ ### Added
6
+
7
+ - `process_file()` — Converting any columns have Numbers as strings to Numerical Values (Float)
8
+ - `save_as` — Added function to save dataset file
9
+
10
+ ---
11
+
12
+ ### Fixes
13
+
14
+ - `process_file()` — Fixed some bugs in `file_format` parameter
15
+
16
+ ---
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepcsv
3
- Version: 0.6.3
3
+ Version: 0.6.4
4
4
  Summary: Automatically processes data files in directories, converts array-like strings to NumPy arrays, detects and fixes data type issues, and saves results as optimized Parquet files and MORE!
5
5
  Home-page: https://github.com/abdubakr77/deepcsv
6
6
  Author: Abdullah Bakr
@@ -35,6 +35,13 @@ Dynamic: requires-python
35
35
  Dynamic: summary
36
36
 
37
37
  # deepcsv
38
+
39
+ ![PyPI Downloads](https://img.shields.io/pypi/dm/deepcsv?color=blue&label=Downloads)
40
+ ![PyPI Version](https://img.shields.io/pypi/v/deepcsv?color=green&label=Version)
41
+ ![Python](https://img.shields.io/pypi/pyversions/deepcsv?color=yellow&label=Python)
42
+ ![License](https://img.shields.io/github/license/abdubakr77/deepcsv?color=orange)
43
+ ![GitHub Stars](https://img.shields.io/github/stars/abdubakr77/deepcsv?color=yellow)
44
+
38
45
  > *"You think you saved a list. You open it tomorrow — and it's a string."*
39
46
 
40
47
  `deepcsv` was built to solve exactly this problem.
@@ -245,18 +252,13 @@ auto_fix(data_input: Union[str, pd.DataFrame])
245
252
 
246
253
  ### Added
247
254
 
248
- - `process_file` — Added Doc & Examples for new params in function
249
- - `process_all_files` — Added Doc & Examples for new params in function
250
- - `process_file` & `process_all_file` — Added New parameter `to_list` to be real python list if you don't need array
251
- - Added `auto_fix` function for automatic data type correction in DataFrames for mixed Dtypes.
252
- - Added logg to `auto_fix` to track changes made to columns.
253
- - Added Documentation for `auto_fix` function to understand more about function.
255
+ - `process_file()` — Converting any columns have Numbers as strings to Numerical Values (Float)
256
+ - `save_as` — Added function to save dataset file
254
257
 
255
258
  ---
256
259
 
257
- ### Changed
260
+ ### Fixes
258
261
 
259
- - `process_file()` — Changed `save_file_extension` parameter to `file_format`
260
- - `process_all_files()` — Changed `file_extension` parameter to `file_format`
262
+ - `process_file()` — Fixed some bugs in `file_format` parameter
261
263
 
262
264
  ---
@@ -1,4 +1,11 @@
1
1
  # deepcsv
2
+
3
+ ![PyPI Downloads](https://img.shields.io/pypi/dm/deepcsv?color=blue&label=Downloads)
4
+ ![PyPI Version](https://img.shields.io/pypi/v/deepcsv?color=green&label=Version)
5
+ ![Python](https://img.shields.io/pypi/pyversions/deepcsv?color=yellow&label=Python)
6
+ ![License](https://img.shields.io/github/license/abdubakr77/deepcsv?color=orange)
7
+ ![GitHub Stars](https://img.shields.io/github/stars/abdubakr77/deepcsv?color=yellow)
8
+
2
9
  > *"You think you saved a list. You open it tomorrow — and it's a string."*
3
10
 
4
11
  `deepcsv` was built to solve exactly this problem.
@@ -0,0 +1,31 @@
1
+ from .deepcsv import process_all_files, process_file
2
+ from .utils import read_any, clean_values, auto_fix, save_as ,_validate_cols , _validate_index , _parse_operator , _validate_condition , _val_dtype
3
+ from importlib.metadata import PackageNotFoundError, version as _version
4
+ import requests as _requests
5
+
6
+ __all__ = [
7
+ "process_file",
8
+ "process_all_files",
9
+ "read_any",
10
+ "clean_values",
11
+ "auto_fix",
12
+ "save_as"
13
+ ]
14
+
15
+
16
+ def _check_for_updates():
17
+ try:
18
+ response = _requests.get("https://pypi.org/pypi/deepcsv/json", timeout=5)
19
+ latest = response.json()["info"]["version"]
20
+ current = _version("deepcsv")
21
+ if latest != current:
22
+ print(
23
+ f"DeepCSV: update available — run 'pip install -U deepcsv' ({latest})"
24
+ )
25
+ except PackageNotFoundError:
26
+ pass
27
+ except Exception:
28
+ pass
29
+
30
+
31
+ _check_for_updates()
@@ -1,6 +1,6 @@
1
1
  import pyarrow
2
2
  import pandas as pd
3
- from .utils import read_any, clean_values, _validate_cols, _validate_index,_parse_operator,_validate_condition,_save_as
3
+ from deepcsv import read_any, save_as
4
4
  from typing import Union
5
5
  from ast import literal_eval
6
6
  from numpy import nan,array
@@ -9,7 +9,7 @@ from os.path import join,relpath,dirname,isfile,isdir
9
9
  from warnings import filterwarnings
10
10
  filterwarnings("ignore")
11
11
 
12
- def process_file(data_input: Union[str, pd.DataFrame] , file_format= str, to_list = False) -> pd.DataFrame:
12
+ def process_file(data_input: Union[str, pd.DataFrame] , file_format: None, to_list = False) -> pd.DataFrame:
13
13
  """
14
14
  Parses string representations of lists in DataFrame columns to actual NumPy arrays.
15
15
 
@@ -58,6 +58,19 @@ def process_file(data_input: Union[str, pd.DataFrame] , file_format= str, to_lis
58
58
 
59
59
  data[ColName] = pd.to_numeric(data[ColName], errors='coerce')
60
60
  print("System : Done!")
61
+ print("-" * 50)
62
+
63
+
64
+
65
+ elif len(data[data[ColName].apply(str).str.isnumeric()]) >= len(data[data[ColName].apply(str).str.isnumeric() == False]):
66
+
67
+ print(f"WARNING:\nThis Dataset Name ({data_input.split('\\')[-1]}) Found numbers as {len(data[ColName].apply(type).unique())} in a column called ({ColName})\nPath : {data_input}")
68
+
69
+ print(f"System : Trying to fix and converting the column as a Numerical Values...")
70
+ data[ColName] = pd.to_numeric(data[ColName], errors='coerce')
71
+ print("System : Done!")
72
+ print("-" * 50)
73
+
61
74
 
62
75
  elif isinstance(First_Value , str) and First_Value.strip().startswith("["):
63
76
  if to_list:
@@ -66,8 +79,8 @@ def process_file(data_input: Union[str, pd.DataFrame] , file_format= str, to_lis
66
79
  data[f"{ColName.capitalize()}List"] = data[ColName].apply(lambda x : array(literal_eval(x)) if pd.notna(x) else nan)
67
80
  data.drop(ColName,inplace=True,axis=1)
68
81
 
69
- if file_format.strip().lower() in ['csv','txt','tsv','xls','xlsx','json','parquet','pkl','feather','db','sqlite']:
70
- _save_as(data=data,ext=file_format)
82
+ if file_format != None and file_format.strip().lower() in ['csv','txt','tsv','xls','xlsx','json','parquet','pkl','feather','db','sqlite']:
83
+ save_as(data=data,ext=file_format)
71
84
 
72
85
 
73
86
  return data
@@ -125,7 +138,7 @@ def process_all_files(directory_path: str, output_dir="All CSV Files is Converte
125
138
  if "List" in df_converted.columns[-1]:
126
139
  print(Sub_Item_Path)
127
140
  makedirs(dirname(output),exist_ok=True)
128
- _save_as(data=df_converted,
141
+ save_as(data=df_converted,
129
142
  current_dir=output.replace(f".{Sub_Item_Path.split(".")[-1].strip().lower()}", f".{file_format}"),
130
143
  ext=file_format,to_list=to_list)
131
144
 
@@ -3,6 +3,8 @@ import pandas as pd
3
3
  from typing import Optional, Union
4
4
  from pathlib import Path
5
5
 
6
+ __all__ = ['read_any', 'clean_values', 'auto_fix',"save_as"]
7
+
6
8
  # ──────────────────────────────────────────────
7
9
  # PRIVATE HELPERS
8
10
  # ──────────────────────────────────────────────
@@ -82,66 +84,6 @@ def _validate_condition(condition):
82
84
 
83
85
  return op_func, cond_val
84
86
 
85
- def _save_as(data: pd.DataFrame, current_dir = str(Path.cwd()), ext= str) -> None:
86
- """
87
- Saves a DataFrame to a file with the specified format.
88
-
89
- Parameters
90
- ----------
91
- data : pd.DataFrame
92
- The DataFrame to save.
93
- file_path : str
94
- Path without extension. Example: "data/myfile"
95
- ext : str
96
- File extension. Supported:
97
- - .csv → pd.to_csv()
98
- - .xlsx → pd.to_excel()
99
- - .json → pd.to_json()
100
- - .parquet → pd.to_parquet()
101
- - .pkl → pd.to_pickle()
102
- - .feather → pd.to_feather()
103
- - .tsv → pd.to_csv(sep='\\t')
104
- - .html → pd.to_html()
105
- - .xml → pd.to_xml()
106
-
107
- Returns
108
- -------
109
- None
110
-
111
- Examples
112
- --------
113
- >>> _save_as(df, "data/myfile", ".parquet")
114
- >>> _save_as(df, "data/myfile", ".csv")
115
- """
116
- ext = ext.strip().lower()
117
- if not ext.startswith("."):
118
- ext = f".{ext}"
119
-
120
- full_path = f"{current_dir}{ext}"
121
-
122
- writers = {
123
- ".csv": lambda: data.to_csv(full_path, index=False),
124
- ".tsv": lambda: data.to_csv(full_path, sep='\t', index=False),
125
- ".xlsx": lambda: data.to_excel(full_path, index=False),
126
- ".json": lambda: data.to_json(full_path, orient="records", indent=2),
127
- ".parquet": lambda: data.to_parquet(full_path, index=False),
128
- ".pkl": lambda: data.to_pickle(full_path),
129
- ".feather": lambda: data.to_feather(full_path),
130
- ".html": lambda: data.to_html(full_path, index=False),
131
- ".xml": lambda: data.to_xml(full_path, index=False),
132
- }
133
-
134
- writer = writers.get(ext)
135
- if writer is None:
136
- raise ValueError(
137
- f"Unsupported extension: {ext!r}\n"
138
- f"Supported: {list(writers.keys())}"
139
- )
140
-
141
- writer()
142
- print(f"Saved: {full_path}")
143
- print("-"*50)
144
-
145
87
 
146
88
  def _val_dtype(x,dtype):
147
89
  if dtype == str:
@@ -380,4 +322,65 @@ def auto_fix(data_input: Union[str, pd.DataFrame]):
380
322
  df[ColName] = df[ColName].apply(lambda x: _val_dtype(x,dtype))
381
323
  print("Done!")
382
324
  print("—"*35)
383
- return df
325
+ return df
326
+
327
+
328
+ def save_as(data: pd.DataFrame, current_dir = str(Path.cwd()), ext= str) -> None:
329
+ """
330
+ Saves a DataFrame to a file with the specified format.
331
+
332
+ Parameters
333
+ ----------
334
+ data : pd.DataFrame
335
+ The DataFrame to save.
336
+ file_path : str
337
+ Path without extension. Example: "data/myfile"
338
+ ext : str
339
+ File extension. Supported:
340
+ - .csv → pd.to_csv()
341
+ - .xlsx → pd.to_excel()
342
+ - .json → pd.to_json()
343
+ - .parquet → pd.to_parquet()
344
+ - .pkl → pd.to_pickle()
345
+ - .feather → pd.to_feather()
346
+ - .tsv → pd.to_csv(sep='\\t')
347
+ - .html → pd.to_html()
348
+ - .xml → pd.to_xml()
349
+
350
+ Returns
351
+ -------
352
+ None
353
+
354
+ Examples
355
+ --------
356
+ >>> _save_as(df, "data/myfile", ".parquet")
357
+ >>> _save_as(df, "data/myfile", ".csv")
358
+ """
359
+ ext = ext.strip().lower()
360
+ if not ext.startswith("."):
361
+ ext = f".{ext}"
362
+
363
+ full_path = f"{current_dir}{ext}"
364
+
365
+ writers = {
366
+ ".csv": lambda: data.to_csv(full_path, index=False),
367
+ ".tsv": lambda: data.to_csv(full_path, sep='\t', index=False),
368
+ ".xlsx": lambda: data.to_excel(full_path, index=False),
369
+ ".json": lambda: data.to_json(full_path, orient="records", indent=2),
370
+ ".parquet": lambda: data.to_parquet(full_path, index=False),
371
+ ".pkl": lambda: data.to_pickle(full_path),
372
+ ".feather": lambda: data.to_feather(full_path),
373
+ ".html": lambda: data.to_html(full_path, index=False),
374
+ ".xml": lambda: data.to_xml(full_path, index=False),
375
+ }
376
+
377
+ writer = writers.get(ext)
378
+ if writer is None:
379
+ raise ValueError(
380
+ f"Unsupported extension: {ext!r}\n"
381
+ f"Supported: {list(writers.keys())}"
382
+ )
383
+
384
+ writer()
385
+ print(f"Saved: {full_path}")
386
+ print("-"*50)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepcsv
3
- Version: 0.6.3
3
+ Version: 0.6.4
4
4
  Summary: Automatically processes data files in directories, converts array-like strings to NumPy arrays, detects and fixes data type issues, and saves results as optimized Parquet files and MORE!
5
5
  Home-page: https://github.com/abdubakr77/deepcsv
6
6
  Author: Abdullah Bakr
@@ -35,6 +35,13 @@ Dynamic: requires-python
35
35
  Dynamic: summary
36
36
 
37
37
  # deepcsv
38
+
39
+ ![PyPI Downloads](https://img.shields.io/pypi/dm/deepcsv?color=blue&label=Downloads)
40
+ ![PyPI Version](https://img.shields.io/pypi/v/deepcsv?color=green&label=Version)
41
+ ![Python](https://img.shields.io/pypi/pyversions/deepcsv?color=yellow&label=Python)
42
+ ![License](https://img.shields.io/github/license/abdubakr77/deepcsv?color=orange)
43
+ ![GitHub Stars](https://img.shields.io/github/stars/abdubakr77/deepcsv?color=yellow)
44
+
38
45
  > *"You think you saved a list. You open it tomorrow — and it's a string."*
39
46
 
40
47
  `deepcsv` was built to solve exactly this problem.
@@ -245,18 +252,13 @@ auto_fix(data_input: Union[str, pd.DataFrame])
245
252
 
246
253
  ### Added
247
254
 
248
- - `process_file` — Added Doc & Examples for new params in function
249
- - `process_all_files` — Added Doc & Examples for new params in function
250
- - `process_file` & `process_all_file` — Added New parameter `to_list` to be real python list if you don't need array
251
- - Added `auto_fix` function for automatic data type correction in DataFrames for mixed Dtypes.
252
- - Added logg to `auto_fix` to track changes made to columns.
253
- - Added Documentation for `auto_fix` function to understand more about function.
255
+ - `process_file()` — Converting any columns have Numbers as strings to Numerical Values (Float)
256
+ - `save_as` — Added function to save dataset file
254
257
 
255
258
  ---
256
259
 
257
- ### Changed
260
+ ### Fixes
258
261
 
259
- - `process_file()` — Changed `save_file_extension` parameter to `file_format`
260
- - `process_all_files()` — Changed `file_extension` parameter to `file_format`
262
+ - `process_file()` — Fixed some bugs in `file_format` parameter
261
263
 
262
264
  ---
@@ -8,7 +8,7 @@ changelog = (this_directory / "CHANGELOG.md").read_text(encoding="utf-8")
8
8
 
9
9
  setup(
10
10
  name="deepcsv",
11
- version="0.6.3",
11
+ version="0.6.4",
12
12
  author="Abdullah Bakr",
13
13
  author_email="abdubakora1232@gmail.com",
14
14
  description="Automatically processes data files in directories, converts array-like strings to NumPy arrays, detects and fixes data type issues, and saves results as optimized Parquet files and MORE!",
@@ -1,21 +0,0 @@
1
- # Changelog
2
-
3
- ---
4
-
5
- ### Added
6
-
7
- - `process_file` — Added Doc & Examples for new params in function
8
- - `process_all_files` — Added Doc & Examples for new params in function
9
- - `process_file` & `process_all_file` — Added New parameter `to_list` to be real python list if you don't need array
10
- - Added `auto_fix` function for automatic data type correction in DataFrames for mixed Dtypes.
11
- - Added logg to `auto_fix` to track changes made to columns.
12
- - Added Documentation for `auto_fix` function to understand more about function.
13
-
14
- ---
15
-
16
- ### Changed
17
-
18
- - `process_file()` — Changed `save_file_extension` parameter to `file_format`
19
- - `process_all_files()` — Changed `file_extension` parameter to `file_format`
20
-
21
- ---
@@ -1,17 +0,0 @@
1
- from .deepcsv import process_all_files, process_file
2
- from .utils import read_any, clean_values
3
- from importlib.metadata import version
4
- import requests
5
-
6
- def _check_for_updates():
7
- try:
8
- response = requests.get("https://pypi.org/pypi/deepcsv/json")
9
- latest = response.json()["info"]["version"]
10
- current = version("deepcsv")
11
- if latest != current:
12
- print(f"DeepCSV: New version {latest} available! — run 'pip install -U deepcsv'")
13
- except Exception:
14
- pass
15
-
16
-
17
- _check_for_updates()
File without changes
File without changes
File without changes