PyPI - junshan-kit - Versions diffs - 2.4.7__py2.py3-none-any.whl → 2.4.9__py2.py3-none-any.whl - Mend

junshan-kit 2.4.7py2.py3-none-any.whl → 2.4.9py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of junshan-kit might be problematic. Click here for more details.

Files changed (19) hide show

junshan_kit/DataHub.py +114 -0
junshan_kit/DataProcessor.py +114 -24
junshan_kit/DataSets.py +186 -37
junshan_kit/{Models.py → ModelsHub.py} +5 -0
junshan_kit/ParametersHub.py +404 -0
junshan_kit/Print_Info.py +6 -2
junshan_kit/TrainingHub.py +75 -0
junshan_kit/kit.py +94 -30
{junshan_kit-2.4.7.dist-info → junshan_kit-2.4.9.dist-info}/METADATA +2 -2
junshan_kit-2.4.9.dist-info/RECORD +12 -0
junshan_kit/ComOptimizers.py +0 -126
junshan_kit/ExperimentHub.py +0 -338
junshan_kit/SPBM.py +0 -350
junshan_kit/SPBM_func.py +0 -601
junshan_kit/TrainingParas.py +0 -470
junshan_kit/check_args.py +0 -116
junshan_kit/datahub.py +0 -281
junshan_kit-2.4.7.dist-info/RECORD +0 -16
{junshan_kit-2.4.7.dist-info → junshan_kit-2.4.9.dist-info}/WHEEL +0 -0

junshan_kit/kit.py CHANGED Viewed

@@ -6,7 +6,7 @@
 """
 import zipfile
-import os, time
+import os, time, openml
 from selenium import webdriver
 from selenium.webdriver.common.by import By
@@ -35,7 +35,7 @@ def unzip_file(zip_path: str, unzip_folder: str):
     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
         zip_ref.extractall(unzip_folder)
-    print(f"✅ Extracted '{zip_path}' to '{os.path.abspath(unzip_folder)}'")
+    print(f"- Extracted '{zip_path}' to '{os.path.abspath(unzip_folder)}'")
 # =============================================================
@@ -73,13 +73,13 @@ class JianguoyunDownloaderChrome:
         self.driver = webdriver.Chrome(options=self.chrome_options)
     def open_page(self):
-        print(f"🌐 Opening page: {self.url}")
+        print(f">>> Opening page: {self.url}")
         self.driver.get(self.url)
-        print(f"✅ Page loaded: {self.driver.title}")
+        print(f">>> Page loaded: {self.driver.title}")
     def click_download_button(self):
         """Find and click the 'Download' button (supports English and Chinese)."""
-        print("🔍 Searching for the download button...")
+        print(">>> Searching for the download button...")
         wait = WebDriverWait(self.driver, 30)
         try:
@@ -97,33 +97,33 @@ class JianguoyunDownloaderChrome:
             # Click using JavaScript to avoid overlay or interaction issues
             self.driver.execute_script("arguments[0].click();", button)
-            print(f"✅ Download button clicked. Files will be saved to: {self.download_path}")
+            print(f">>> Download button clicked. Files will be saved to: {self.download_path}")
             # If the cloud service opens a new tab, switch to it
             time.sleep(3)
             if len(self.driver.window_handles) > 1:
                 self.driver.switch_to.window(self.driver.window_handles[-1])
-                print("📂 Switched to the new download tab.")
+                print(">>> Switched to the new download tab.")
         except Exception as e:
-            print("❌ Failed to find or click the download button:", e)
+            print(">>> Failed to find or click the download button:", e)
             raise
     def wait_for_downloads(self, timeout=3600):
-        print("⏳ Waiting for downloads to finish...")
+        print(">>> Waiting for downloads to finish...")
         start_time = time.time()
         while time.time() - start_time < timeout:
             downloading = [f for f in os.listdir(self.download_path) if f.endswith(".crdownload")]
             if not downloading:
-                print("✅ Download completed!")
+                print(">>> Download completed!")
                 return
             time.sleep(2)
-        print("⚠️ Timeout: download not completed within 1 hour")
+        print(">>> Timeout: download not completed within 1 hour")
     def close(self):
         self.driver.quit()
-        print("🚪 Browser closed.")
+        print(">>> Browser closed.")
     def run(self):
         print('*' * 60)
@@ -132,7 +132,7 @@ class JianguoyunDownloaderChrome:
             self.click_download_button()
             self.wait_for_downloads()
         except Exception as e:
-            print("❌ Error:", e)
+            print(">>> Error:", e)
         finally:
             self.close()
         print('*' * 60)
@@ -169,13 +169,13 @@ class JianguoyunDownloaderFirefox:
         self.driver = webdriver.Firefox(service=service, options=options)
     def open_page(self):
-        print(f"🌐 Opening page: {self.url}")
+        print(f">>> Opening page: {self.url}")
         self.driver.get(self.url)
-        print(f"✅ Page loaded: {self.driver.title}")
+        print(f">>> Page loaded: {self.driver.title}")
     def click_download_button(self):
         """Find and click the 'Download' button (supports English and Chinese)."""
-        print("🔍 Searching for the download button...")
+        print(">>> Searching for the download button...")
         wait = WebDriverWait(self.driver, 30)
         try:
@@ -193,21 +193,21 @@ class JianguoyunDownloaderFirefox:
             # Click using JavaScript to avoid overlay or interaction issues
             self.driver.execute_script("arguments[0].click();", button)
-            print(f"✅ Download button clicked. Files will be saved to: {self.download_path}")
+            print(f">>> Download button clicked. Files will be saved to: {self.download_path}")
             # If the cloud service opens a new tab, switch to it
             time.sleep(3)
             if len(self.driver.window_handles) > 1:
                 self.driver.switch_to.window(self.driver.window_handles[-1])
-                print("📂 Switched to the new download tab.")
+                print(">>> Switched to the new download tab.")
         except Exception as e:
-            print("❌ Failed to find or click the download button:", e)
+            print(">>> Failed to find or click the download button:", e)
             raise
     def wait_for_download(self, timeout=3600):
         """Wait until all downloads are finished (auto-detects browser type)."""
-        print("⏳ Waiting for downloads to finish...")
+        print(">>> Waiting for downloads to finish...")
         start_time = time.time()
         # Determine the temporary file extension based on the browser type
@@ -216,13 +216,13 @@ class JianguoyunDownloaderFirefox:
         while time.time() - start_time < timeout:
             downloading = [f for f in os.listdir(self.download_path) if f.endswith(temp_ext)]
             if not downloading:
-                print("✅ Download completed!")
+                print(">>> Download completed!")
                 return True
             time.sleep(2)
     def close(self):
-        print("🛑 Closing browser...")
+        print(">>> Closing browser...")
         self.driver.quit()
     def run(self):
@@ -232,17 +232,81 @@ class JianguoyunDownloaderFirefox:
             self.click_download_button()
             self.wait_for_download(timeout=3600)
         except Exception as e:
-            print("❌ Error:", e)
+            print(">>> Error:", e)
         finally:
             self.close()
         print('*' * 60)
-def set_seed(seed=42):
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
-    np.random.seed(seed)
-    random.seed(seed)
-    torch.backends.cudnn.deterministic = True
-    torch.backends.cudnn.benchmark = False
+def download_openml_data(data_name):
+    """
+    Returns
+    -------
+    X : ndarray, dataframe, or sparse matrix, shape (n_samples, n_columns)
+        Dataset
+    y : ndarray or pd.Series, shape (n_samples, ) or None
+        Target column
+    categorical_indicator : boolean ndarray
+        Mask that indicate categorical features.
+    attribute_names : List[str]
+        List of attribute names.
+    """
+    openml.config.set_root_cache_directory(f"./exp_data/{data_name}")
+    dataset = openml.datasets.get_dataset(f'{data_name}', download_data=True)
+    X, y, categorical_indicator, attribute_names = dataset.get_data(dataset_format="dataframe")
+    return X, y, categorical_indicator, attribute_names
+def import_data_path_to_ignore():
+    def get_folder_size(folder_path):
+        """Recursively calculate the total size of a folder (in bytes)."""
+        total_size = 0
+        for root, dirs, files in os.walk(folder_path):
+            for f in files:
+                try:
+                    total_size += os.path.getsize(os.path.join(root, f))
+                except Exception:
+                    pass
+        return total_size
+    def list_and_ignore_large_folders(folder_path, limit_mb=99):
+        """List folder sizes and append large ones (> limit_mb) to .gitignore."""
+        gitignore_path = os.path.join(os.getcwd(), ".gitignore")
+        ignore_list = []
+        # Read existing .gitignore entries to avoid duplicates
+        if os.path.exists(gitignore_path):
+            with open(gitignore_path, "r", encoding="utf-8") as f:
+                existing_ignores = set(line.strip() for line in f if line.strip())
+        else:
+            existing_ignores = set()
+        for entry in os.scandir(folder_path):
+            if entry.is_dir():
+                folder_size_mb = get_folder_size(entry.path) / (1024 * 1024)
+                print(f"{entry.path}/ - {folder_size_mb:.2f} MB")
+                if folder_size_mb > limit_mb:
+                    rel_path = os.path.relpath(entry.path, start=os.getcwd())
+                    if rel_path not in existing_ignores:
+                        ignore_list.append(rel_path)
+            elif entry.is_file():
+                file_size_mb = os.path.getsize(entry.path) / (1024 * 1024)
+                print(f"{entry.path} - {file_size_mb:.2f} MB")
+        # Append new paths to .gitignore
+        if ignore_list:
+            with open(gitignore_path, "a", encoding="utf-8") as f:
+                for p in ignore_list:
+                    f.write(p + "\n")
+            print(f"\n✅ The following paths have been added to .gitignore:\n" + "\n".join(ignore_list))
+        else:
+            print("\nNo folders exceed the size limit (99 MB).")
+    folder_path = "./exp_data"
+    list_and_ignore_large_folders(folder_path, limit_mb=99)

{junshan_kit-2.4.7.dist-info → junshan_kit-2.4.9.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.4
 Name: junshan_kit
-Version: 2.4.7
+Version: 2.4.9
 Summary: This is an optimization tool.
 Author-email: Junshan Yin <junshanyin@163.com>
 Requires-Dist: kaggle==1.7.4.5
 Requires-Dist: kagglehub==0.3.13
-Requires-Dist: pandas==2.3.3
+Requires-Dist: openml==0.15.1
 Requires-Dist: selenium==4.36.0

junshan_kit-2.4.9.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+junshan_kit/DataHub.py,sha256=D9G2vjCNvDLer4qoKdowgWJChLMQQn7cVhAPZLvRrbE,3332
+junshan_kit/DataProcessor.py,sha256=-6qjG52NDYq746vBPpc0uW2cfbc4syqSWZIzTxJt6fE,11806
+junshan_kit/DataSets.py,sha256=hwGnJsb-Lj90lk6VBwmsDBb3-IA_WgUWzAKayHyq2AI,13391
+junshan_kit/ModelsHub.py,sha256=z9NyC4PTxo3wCxa2XxOfcjrw9NcDs0LCjBGCp6Z-90s,7084
+junshan_kit/ParametersHub.py,sha256=zG3Dus5PZ57j0ZC-Xq4PGQSMt0MYzelhe-g2ZQMyL90,11244
+junshan_kit/Print_Info.py,sha256=yiGc6Qlprj0ds6w2DP7ScAgTBZwswxXqxuIrQ3_liL8,3111
+junshan_kit/TrainingHub.py,sha256=YDs6u__5RVxlY6f5jOILhAi-DRAe2FXkRtc7dvdrENY,2457
+junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+junshan_kit/kit.py,sha256=F9f5qqn9ve-UVoYtXlFmNGl4YJ3eEy6T1yRrC0s-Wpw,12367
+junshan_kit-2.4.9.dist-info/METADATA,sha256=msRObtJFPgLAjRSCFhpEkL4RS_Vh0n5fNC1xRp-0gYs,267
+junshan_kit-2.4.9.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
+junshan_kit-2.4.9.dist-info/RECORD,,

junshan_kit/ComOptimizers.py DELETED Viewed

@@ -1,126 +0,0 @@
-import torch, os, time
-from torch.optim.optimizer import Optimizer
-from torch.nn.utils import parameters_to_vector, vector_to_parameters
-import junshan_kit.SPBM_func as SPBM_func
-class SPSmax(Optimizer):
-    def __init__(self, params, model, hyperparams, Paras):
-        defaults = dict()
-        super().__init__(params, defaults)
-        self.model = model
-        self.c = hyperparams['c']
-        self.gamma = hyperparams['gamma']
-        if 'f_star' not in Paras or Paras['f_star'] is None:
-            self.f_star = 0
-        else:
-            self.f_star = Paras['f_star']
-        self.step_size = []
-    def step(self, closure=None):
-        if closure is None:
-            raise RuntimeError("Closure required for SPSmax")
-        # Reset the gradient and perform forward computation
-        loss = closure()
-        with torch.no_grad():
-            xk = parameters_to_vector(self.model.parameters())
-            # print(torch.norm(xk))
-            g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
-            # Step-size
-            step_size = (loss - self.f_star) / ((self.c * torch.norm(g_k, p=2) ** 2) + 1e-8)
-            step_size = min(step_size, self.gamma)
-            self.step_size.append(step_size)
-            # Update
-            xk = xk - step_size * g_k
-            # print(len(self.f_his))
-            vector_to_parameters(xk, self.model.parameters())
-        # emporarily return loss (tensor type)
-        return loss
-class ALR_SMAG(Optimizer):
-    def __init__(self, params, model, hyperparams, Paras):
-        defaults = dict()
-        super().__init__(params, defaults)
-        self.model = model
-        self.c = hyperparams['c']
-        self.eta_max = hyperparams['eta_max']
-        self.beta = hyperparams['beta']
-        if 'f_star' not in Paras or Paras['f_star'] is None:
-            self.f_star = 0
-        else:
-            self.f_star = Paras['f_star']
-        self.step_size = []
-        self.d_k = torch.zeros_like(parameters_to_vector(self.model.parameters()))
-    def step(self, closure=None):
-        if closure is None:
-            raise RuntimeError("Closure required for SPSmax")
-        # Reset the gradient and perform forward computation
-        loss = closure()
-        with torch.no_grad():
-            xk = parameters_to_vector(self.model.parameters())
-            # print(torch.norm(xk))
-            g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
-            self.d_k = self.beta * self.d_k + g_k
-            # Step-size
-            step_size = (loss - self.f_star) / ((self.c * torch.norm(self.d_k, p=2) ** 2) + 1e-8)
-            step_size = min(step_size, self.eta_max)
-            self.step_size.append(step_size)
-            # Update
-            xk = xk - step_size * g_k
-            # print(len(self.f_his))
-            vector_to_parameters(xk, self.model.parameters())
-        # emporarily return loss (tensor type)
-        return loss
-# ------------ Bundle Method --------------------
-class Bundle(Optimizer):
-    def __init__(self, params, model, hyperparams, Paras):
-        defaults = dict()
-        super().__init__(params, defaults)
-        self.model = model
-        self.cutting_num = hyperparams['cutting_number']
-        self.delta = hyperparams['delta']
-        self.Paras = Paras
-        self.x_his, self.g_his, self.f_his = [], [], []
-    def step(self, closure=None):
-        if closure is None:
-            raise RuntimeError("Closure required for CuttingPlaneOptimizer")
-        # Reset the gradient and perform forward computation
-        loss = closure()
-        with torch.no_grad():
-            xk = parameters_to_vector(self.model.parameters())
-            # print(torch.norm(xk))
-            g_k = parameters_to_vector([p.grad if p.grad is not None else torch.zeros_like(p) for p in self.model.parameters()])
-            # Add cutting plane
-            x_his, f_his, g_his = SPBM_func.add_cutting(self.x_his, self.f_his, self.g_his,xk.detach().clone(), g_k.detach().clone(), loss.detach().clone(), self.cutting_num)
-            # the coefficient of dual problem
-            Gk, rk, ek = SPBM_func.get_var(x_his, f_his, g_his, self.delta)
-            # SOVER (dual)
-            xk = SPBM_func.bundle(Gk, ek, xk, self.delta, self.Paras)
-            # print(len(self.f_his))
-            vector_to_parameters(xk, self.model.parameters())
-        # loss（tensor）
-        return loss

junshan-kit 2.4.7__py2.py3-none-any.whl → 2.4.9__py2.py3-none-any.whl

Potentially problematic release.

junshan-kit 2.4.7py2.py3-none-any.whl → 2.4.9py2.py3-none-any.whl