PyPI - deepcsv - Versions diffs - 0.6.3__tar.gz → 0.6.4__tar.gz - Mend

deepcsv 0.6.3tar.gz → 0.6.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

deepcsv-0.6.4/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,16 @@
+# Changelog
+---
+### Added
+- `process_file()` — Converting any columns have Numbers as strings to Numerical Values (Float)
+- `save_as` — Added function to save dataset file
+---
+### Fixes
+- `process_file()` — Fixed some bugs in `file_format` parameter
+---

{deepcsv-0.6.3/deepcsv.egg-info → deepcsv-0.6.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deepcsv
-Version: 0.6.3
+Version: 0.6.4
 Summary: Automatically processes data files in directories, converts array-like strings to NumPy arrays, detects and fixes data type issues, and saves results as optimized Parquet files and MORE!
 Home-page: https://github.com/abdubakr77/deepcsv
 Author: Abdullah Bakr
@@ -35,6 +35,13 @@ Dynamic: requires-python
 Dynamic: summary
 # deepcsv
+![PyPI Downloads](https://img.shields.io/pypi/dm/deepcsv?color=blue&label=Downloads)
+![PyPI Version](https://img.shields.io/pypi/v/deepcsv?color=green&label=Version)
+![Python](https://img.shields.io/pypi/pyversions/deepcsv?color=yellow&label=Python)
+![License](https://img.shields.io/github/license/abdubakr77/deepcsv?color=orange)
+![GitHub Stars](https://img.shields.io/github/stars/abdubakr77/deepcsv?color=yellow)
 > *"You think you saved a list. You open it tomorrow — and it's a string."*
 `deepcsv` was built to solve exactly this problem.
@@ -245,18 +252,13 @@ auto_fix(data_input: Union[str, pd.DataFrame])
 ### Added
-- `process_file` — Added Doc & Examples for new params in function
-- `process_all_files` — Added Doc & Examples for new params in function
-- `process_file` & `process_all_file` — Added New parameter `to_list` to be real python list if you don't need array
-- Added `auto_fix` function for automatic data type correction in DataFrames for mixed Dtypes.
-- Added logg to `auto_fix` to track changes made to columns.
-- Added Documentation for `auto_fix` function to understand more about function.
+- `process_file()` — Converting any columns have Numbers as strings to Numerical Values (Float)
+- `save_as` — Added function to save dataset file
 ---
-### Changed
+### Fixes
-- `process_file()` — Changed `save_file_extension` parameter to `file_format`
-- `process_all_files()` — Changed `file_extension` parameter to `file_format`
+- `process_file()` — Fixed some bugs in `file_format` parameter
 ---

{deepcsv-0.6.3 → deepcsv-0.6.4}/README.md RENAMED Viewed

@@ -1,4 +1,11 @@
 # deepcsv
+![PyPI Downloads](https://img.shields.io/pypi/dm/deepcsv?color=blue&label=Downloads)
+![PyPI Version](https://img.shields.io/pypi/v/deepcsv?color=green&label=Version)
+![Python](https://img.shields.io/pypi/pyversions/deepcsv?color=yellow&label=Python)
+![License](https://img.shields.io/github/license/abdubakr77/deepcsv?color=orange)
+![GitHub Stars](https://img.shields.io/github/stars/abdubakr77/deepcsv?color=yellow)
 > *"You think you saved a list. You open it tomorrow — and it's a string."*
 `deepcsv` was built to solve exactly this problem.

deepcsv-0.6.4/deepcsv/__init__.py ADDED Viewed

@@ -0,0 +1,31 @@
+from .deepcsv import process_all_files, process_file
+from .utils import read_any, clean_values, auto_fix, save_as ,_validate_cols , _validate_index , _parse_operator , _validate_condition , _val_dtype
+from importlib.metadata import PackageNotFoundError, version as _version
+import requests as _requests
+__all__ = [
+    "process_file",
+    "process_all_files",
+    "read_any",
+    "clean_values",
+    "auto_fix",
+    "save_as"
+]
+def _check_for_updates():
+    try:
+        response = _requests.get("https://pypi.org/pypi/deepcsv/json", timeout=5)
+        latest = response.json()["info"]["version"]
+        current = _version("deepcsv")
+        if latest != current:
+            print(
+                f"DeepCSV: update available — run 'pip install -U deepcsv' ({latest})"
+            )
+    except PackageNotFoundError:
+        pass
+    except Exception:
+        pass
+_check_for_updates()

{deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv/deepcsv.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import pyarrow
 import pandas as pd
-from .utils import read_any, clean_values, _validate_cols, _validate_index,_parse_operator,_validate_condition,_save_as
+from deepcsv import read_any, save_as
 from typing import Union
 from ast import literal_eval
 from numpy import nan,array
@@ -9,7 +9,7 @@ from os.path import join,relpath,dirname,isfile,isdir
 from warnings import filterwarnings
 filterwarnings("ignore")
-def process_file(data_input: Union[str, pd.DataFrame] , file_format= str, to_list = False) -> pd.DataFrame:
+def process_file(data_input: Union[str, pd.DataFrame] , file_format: None, to_list = False) -> pd.DataFrame:
     """
     Parses string representations of lists in DataFrame columns to actual NumPy arrays.
@@ -58,6 +58,19 @@ def process_file(data_input: Union[str, pd.DataFrame] , file_format= str, to_lis
                 data[ColName] = pd.to_numeric(data[ColName], errors='coerce')
                 print("System : Done!")
+                print("-" * 50)
+        elif len(data[data[ColName].apply(str).str.isnumeric()]) >= len(data[data[ColName].apply(str).str.isnumeric() == False]):
+            print(f"WARNING:\nThis Dataset Name ({data_input.split('\\')[-1]}) Found numbers as {len(data[ColName].apply(type).unique())} in a column called ({ColName})\nPath : {data_input}")
+            print(f"System : Trying to fix and converting the column as a Numerical Values...")
+            data[ColName] = pd.to_numeric(data[ColName], errors='coerce')
+            print("System : Done!")
+            print("-" * 50)
         elif isinstance(First_Value , str) and First_Value.strip().startswith("["):
             if to_list:
@@ -66,8 +79,8 @@ def process_file(data_input: Union[str, pd.DataFrame] , file_format= str, to_lis
                 data[f"{ColName.capitalize()}List"] = data[ColName].apply(lambda x : array(literal_eval(x)) if pd.notna(x) else nan)
             data.drop(ColName,inplace=True,axis=1)
-    if file_format.strip().lower() in ['csv','txt','tsv','xls','xlsx','json','parquet','pkl','feather','db','sqlite']:
-        _save_as(data=data,ext=file_format)
+    if file_format != None and file_format.strip().lower() in ['csv','txt','tsv','xls','xlsx','json','parquet','pkl','feather','db','sqlite']:
+        save_as(data=data,ext=file_format)
     return data
@@ -125,7 +138,7 @@ def process_all_files(directory_path: str, output_dir="All CSV Files is Converte
                     if "List" in df_converted.columns[-1]:
                         print(Sub_Item_Path)
                         makedirs(dirname(output),exist_ok=True)
-                        _save_as(data=df_converted,
+                        save_as(data=df_converted,
                                 current_dir=output.replace(f".{Sub_Item_Path.split(".")[-1].strip().lower()}", f".{file_format}"),
                                 ext=file_format,to_list=to_list)

{deepcsv-0.6.3 → deepcsv-0.6.4}/deepcsv/utils.py RENAMED Viewed

@@ -3,6 +3,8 @@ import pandas as pd
 from typing import Optional, Union
 from pathlib import Path
+__all__ = ['read_any', 'clean_values', 'auto_fix',"save_as"]
 # ──────────────────────────────────────────────
 #               PRIVATE HELPERS
 # ──────────────────────────────────────────────
@@ -82,66 +84,6 @@ def _validate_condition(condition):
     return op_func, cond_val
-def _save_as(data: pd.DataFrame, current_dir = str(Path.cwd()), ext= str) -> None:
-    """
-    Saves a DataFrame to a file with the specified format.
-    Parameters
-    ----------
-    data : pd.DataFrame
-        The DataFrame to save.
-    file_path : str
-        Path without extension. Example: "data/myfile"
-    ext : str
-        File extension. Supported:
-        - .csv      → pd.to_csv()
-        - .xlsx     → pd.to_excel()
-        - .json     → pd.to_json()
-        - .parquet  → pd.to_parquet()
-        - .pkl      → pd.to_pickle()
-        - .feather  → pd.to_feather()
-        - .tsv      → pd.to_csv(sep='\\t')
-        - .html     → pd.to_html()
-        - .xml      → pd.to_xml()
-    Returns
-    -------
-    None
-    Examples
-    --------
-    >>> _save_as(df, "data/myfile", ".parquet")
-    >>> _save_as(df, "data/myfile", ".csv")
-    """
-    ext = ext.strip().lower()
-    if not ext.startswith("."):
-        ext = f".{ext}"
-    full_path = f"{current_dir}{ext}"
-    writers = {
-        ".csv":     lambda: data.to_csv(full_path, index=False),
-        ".tsv":     lambda: data.to_csv(full_path, sep='\t', index=False),
-        ".xlsx":    lambda: data.to_excel(full_path, index=False),
-        ".json":    lambda: data.to_json(full_path, orient="records", indent=2),
-        ".parquet": lambda: data.to_parquet(full_path, index=False),
-        ".pkl":     lambda: data.to_pickle(full_path),
-        ".feather": lambda: data.to_feather(full_path),
-        ".html":    lambda: data.to_html(full_path, index=False),
-        ".xml":     lambda: data.to_xml(full_path, index=False),
-    }
-    writer = writers.get(ext)
-    if writer is None:
-        raise ValueError(
-            f"Unsupported extension: {ext!r}\n"
-            f"Supported: {list(writers.keys())}"
-        )
-    writer()
-    print(f"Saved: {full_path}")
-    print("-"*50)
 def _val_dtype(x,dtype):
     if dtype == str:
@@ -380,4 +322,65 @@ def auto_fix(data_input: Union[str, pd.DataFrame]):
                 df[ColName] = df[ColName].apply(lambda x: _val_dtype(x,dtype))
             print("Done!")
             print("—"*35)
-    return df
+    return df
+def save_as(data: pd.DataFrame, current_dir = str(Path.cwd()), ext= str) -> None:
+    """
+    Saves a DataFrame to a file with the specified format.
+    Parameters
+    ----------
+    data : pd.DataFrame
+        The DataFrame to save.
+    file_path : str
+        Path without extension. Example: "data/myfile"
+    ext : str
+        File extension. Supported:
+        - .csv      → pd.to_csv()
+        - .xlsx     → pd.to_excel()
+        - .json     → pd.to_json()
+        - .parquet  → pd.to_parquet()
+        - .pkl      → pd.to_pickle()
+        - .feather  → pd.to_feather()
+        - .tsv      → pd.to_csv(sep='\\t')
+        - .html     → pd.to_html()
+        - .xml      → pd.to_xml()
+    Returns
+    -------
+    None
+    Examples
+    --------
+    >>> _save_as(df, "data/myfile", ".parquet")
+    >>> _save_as(df, "data/myfile", ".csv")
+    """
+    ext = ext.strip().lower()
+    if not ext.startswith("."):
+        ext = f".{ext}"
+    full_path = f"{current_dir}{ext}"
+    writers = {
+        ".csv":     lambda: data.to_csv(full_path, index=False),
+        ".tsv":     lambda: data.to_csv(full_path, sep='\t', index=False),
+        ".xlsx":    lambda: data.to_excel(full_path, index=False),
+        ".json":    lambda: data.to_json(full_path, orient="records", indent=2),
+        ".parquet": lambda: data.to_parquet(full_path, index=False),
+        ".pkl":     lambda: data.to_pickle(full_path),
+        ".feather": lambda: data.to_feather(full_path),
+        ".html":    lambda: data.to_html(full_path, index=False),
+        ".xml":     lambda: data.to_xml(full_path, index=False),
+    }
+    writer = writers.get(ext)
+    if writer is None:
+        raise ValueError(
+            f"Unsupported extension: {ext!r}\n"
+            f"Supported: {list(writers.keys())}"
+        )
+    writer()
+    print(f"Saved: {full_path}")
+    print("-"*50)

{deepcsv-0.6.3 → deepcsv-0.6.4/deepcsv.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deepcsv
-Version: 0.6.3
+Version: 0.6.4
 Summary: Automatically processes data files in directories, converts array-like strings to NumPy arrays, detects and fixes data type issues, and saves results as optimized Parquet files and MORE!
 Home-page: https://github.com/abdubakr77/deepcsv
 Author: Abdullah Bakr
@@ -35,6 +35,13 @@ Dynamic: requires-python
 Dynamic: summary
 # deepcsv
+![PyPI Downloads](https://img.shields.io/pypi/dm/deepcsv?color=blue&label=Downloads)
+![PyPI Version](https://img.shields.io/pypi/v/deepcsv?color=green&label=Version)
+![Python](https://img.shields.io/pypi/pyversions/deepcsv?color=yellow&label=Python)
+![License](https://img.shields.io/github/license/abdubakr77/deepcsv?color=orange)
+![GitHub Stars](https://img.shields.io/github/stars/abdubakr77/deepcsv?color=yellow)
 > *"You think you saved a list. You open it tomorrow — and it's a string."*
 `deepcsv` was built to solve exactly this problem.
@@ -245,18 +252,13 @@ auto_fix(data_input: Union[str, pd.DataFrame])
 ### Added
-- `process_file` — Added Doc & Examples for new params in function
-- `process_all_files` — Added Doc & Examples for new params in function
-- `process_file` & `process_all_file` — Added New parameter `to_list` to be real python list if you don't need array
-- Added `auto_fix` function for automatic data type correction in DataFrames for mixed Dtypes.
-- Added logg to `auto_fix` to track changes made to columns.
-- Added Documentation for `auto_fix` function to understand more about function.
+- `process_file()` — Converting any columns have Numbers as strings to Numerical Values (Float)
+- `save_as` — Added function to save dataset file
 ---
-### Changed
+### Fixes
-- `process_file()` — Changed `save_file_extension` parameter to `file_format`
-- `process_all_files()` — Changed `file_extension` parameter to `file_format`
+- `process_file()` — Fixed some bugs in `file_format` parameter
 ---

{deepcsv-0.6.3 → deepcsv-0.6.4}/setup.py RENAMED Viewed

@@ -8,7 +8,7 @@ changelog = (this_directory / "CHANGELOG.md").read_text(encoding="utf-8")
 setup(
     name="deepcsv",
-    version="0.6.3",
+    version="0.6.4",
     author="Abdullah Bakr",
     author_email="abdubakora1232@gmail.com",
     description="Automatically processes data files in directories, converts array-like strings to NumPy arrays, detects and fixes data type issues, and saves results as optimized Parquet files and MORE!",

deepcsv-0.6.3/CHANGELOG.md DELETED Viewed

@@ -1,21 +0,0 @@
-# Changelog
----
-### Added
-- `process_file` — Added Doc & Examples for new params in function
-- `process_all_files` — Added Doc & Examples for new params in function
-- `process_file` & `process_all_file` — Added New parameter `to_list` to be real python list if you don't need array
-- Added `auto_fix` function for automatic data type correction in DataFrames for mixed Dtypes.
-- Added logg to `auto_fix` to track changes made to columns.
-- Added Documentation for `auto_fix` function to understand more about function.
----
-### Changed
-- `process_file()` — Changed `save_file_extension` parameter to `file_format`
-- `process_all_files()` — Changed `file_extension` parameter to `file_format`
----

deepcsv-0.6.3/deepcsv/__init__.py DELETED Viewed

@@ -1,17 +0,0 @@
-from .deepcsv import process_all_files, process_file
-from .utils import read_any, clean_values
-from importlib.metadata import version
-import requests
-def _check_for_updates():
-    try:
-        response = requests.get("https://pypi.org/pypi/deepcsv/json")
-        latest = response.json()["info"]["version"]
-        current = version("deepcsv")
-        if latest != current:
-            print(f"DeepCSV: New version {latest} available! — run 'pip install -U deepcsv'")
-    except Exception:
-        pass
-_check_for_updates()