junshan-kit 2.1.7__py2.py3-none-any.whl β†’ 2.1.8__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -91,11 +91,11 @@ class CSVToPandas:
91
91
  print('='*70)
92
92
  print(f"{'Original size:':<35} {m_before} rows x {n_before} cols")
93
93
  print(f"{'Size after dropping NaNs:':<35} {m_after} rows x {n_after} cols")
94
- print(f"{'Export size (after encoding & scaling):':<35} {m_export} rows x {n_export} cols")
95
94
  print(f"{'Positive samples (+1):':<35} {pos_count}")
96
95
  print(f"{'Negative samples (-1):':<35} {neg_count}")
96
+ print(f"{'Export size (after encoding & scaling):':<35} {m_export} rows x {n_export} cols")
97
97
  print('-'*70)
98
- print(f"{'Note: categorical columns one-hot encoded, numerical standardized.':^70}")
98
+ print(f"{'Note: categorical columns one-hot encoded, numerical standardized.'}")
99
99
  print(f"More details: https://www.jianguoyun.com/p/DU6Lr9oQqdHDDRj5sI0GIAA")
100
100
  print('='*70 + '\n')
101
101
 
junshan_kit/datahub.py CHANGED
@@ -1,8 +1,13 @@
1
1
  import kagglehub
2
- import os
2
+ import os, time
3
3
  import warnings
4
4
  import shutil
5
5
  from kaggle.api.kaggle_api_extended import KaggleApi
6
+ from selenium import webdriver
7
+ from selenium.webdriver.common.by import By
8
+ from selenium.webdriver.chrome.options import Options
9
+ from selenium.webdriver.support.ui import WebDriverWait
10
+ from selenium.webdriver.support import expected_conditions as EC
6
11
 
7
12
  class kaggle_data:
8
13
  def list_datasets(self):
@@ -42,20 +47,91 @@ class kaggle_data:
42
47
  # example: read_data(copy_path='./exp_data')
43
48
 
44
49
 
50
+ class JianguoDownloader:
51
+ def __init__(self, url: str, download_path: str = "./downloads"):
52
+ self.url = url
53
+ self.download_path = os.path.abspath(download_path)
54
+ os.makedirs(self.download_path, exist_ok=True)
45
55
 
56
+ # Configure Chrome options
57
+ self.chrome_options = Options()
58
+ prefs = {
59
+ "download.default_directory": self.download_path,
60
+ "download.prompt_for_download": False,
61
+ "download.directory_upgrade": True,
62
+ "safebrowsing.enabled": True,
63
+ "profile.default_content_setting_values.automatic_downloads": 1,
64
+ }
65
+ self.chrome_options.add_experimental_option("prefs", prefs)
46
66
 
67
+ # Optional stability flags
68
+ self.chrome_options.add_argument("--disable-gpu")
69
+ self.chrome_options.add_argument("--no-sandbox")
70
+ self.chrome_options.add_argument("--disable-dev-shm-usage")
71
+ self.chrome_options.add_argument("--enable-features=NetworkService,NetworkServiceInProcess")
47
72
 
48
- if __name__ == "__main__":
49
- # Your code here
50
- data = kaggle_data()
51
- # Example usage
52
- data.list_user_datasets()
53
- data.download_data(data_name='letter-libsvm', copy_path='./exp_data/Letter')
73
+ # Start Chrome
74
+ self.driver = webdriver.Chrome(options=self.chrome_options)
54
75
 
55
- """
56
- import junshan_kit.datahub
57
- data = junshan_kit.datahub.kaggle_data()
58
- data.list_user_datasets()
59
- data.read_data(data_name='letter-libsvm', copy_path='./exp_data/Letter')
60
- """
76
+ def open_page(self):
77
+ """Open the Jianguoyun share page."""
78
+ print(f"🌐 Opening link: {self.url}")
79
+ self.driver.get(self.url)
80
+
81
+ def click_download_button(self):
82
+ """Find and click the download button."""
83
+ print("πŸ” Looking for the download button...")
84
+ wait = WebDriverWait(self.driver, 30)
85
+ span = wait.until(
86
+ EC.presence_of_element_located((By.XPATH, "//span[contains(text(),'δΈ‹θ½½')]"))
87
+ )
88
+ parent = span.find_element(By.XPATH, "./..")
89
+ self.driver.execute_script("arguments[0].click();", parent)
90
+ print(f"βœ… Download button clicked. Files will be saved to: {self.download_path}")
91
+
92
+ # If Jianguoyun opens a new tab, switch to it
93
+ time.sleep(3)
94
+ if len(self.driver.window_handles) > 1:
95
+ self.driver.switch_to.window(self.driver.window_handles[-1])
96
+ print("πŸ“‚ Switched to download tab.")
97
+
98
+ def wait_for_downloads(self, timeout=300):
99
+ """Wait until all downloads are finished."""
100
+ print("⏳ Waiting for downloads to finish...")
101
+ start_time = time.time()
102
+ while True:
103
+ downloading = [f for f in os.listdir(self.download_path) if f.endswith(".crdownload")]
104
+ if not downloading:
105
+ print("βœ… Download completed!")
106
+ return True
107
+ if time.time() - start_time > timeout:
108
+ print("⏰ Timeout: downloads may not have finished.")
109
+ return False
110
+ time.sleep(2)
111
+
112
+ def get_latest_file(self):
113
+ """Return the most recently downloaded file (if any)."""
114
+ files = [os.path.join(self.download_path, f) for f in os.listdir(self.download_path)]
115
+ return max(files, key=os.path.getctime) if files else None
116
+
117
+ def close(self):
118
+ """Close the browser."""
119
+ self.driver.quit()
120
+ print("πŸšͺ Browser closed.")
121
+
122
+ def run(self):
123
+ """Run the complete download process."""
124
+ print('*'*50)
125
+ try:
126
+ self.open_page()
127
+ self.click_download_button()
128
+ self.wait_for_downloads()
129
+ latest = self.get_latest_file()
130
+ if latest:
131
+ print(f"πŸ“„ Latest downloaded file: {latest}")
132
+ except Exception as e:
133
+ print("❌ Error occurred:", e)
134
+ finally:
135
+ self.close()
136
+ print('*'*50)
61
137
 
junshan_kit/test.py CHANGED
@@ -1,5 +1,3 @@
1
- import DataProcessor
1
+ from datahub import JianguoDownloader
2
2
 
3
-
4
- data_loader = DataProcessor.CSVToPandas()
5
- data_loader.ghpdd_kaggle()
3
+ data2 = JianguoDownloader('www.lka.com', './expspe')
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: junshan_kit
3
+ Version: 2.1.8
4
+ Summary: This is an optimization tool.
5
+ Author-email: Junshan Yin <junshanyin@163.com>
6
+ Requires-Dist: kaggle==1.7.4.5
7
+ Requires-Dist: kagglehub==0.3.13
8
+ Requires-Dist: numpy==2.2.6
9
+ Requires-Dist: pandas==2.3.3
10
+ Requires-Dist: scikit-learn==1.7.1
11
+ Requires-Dist: selenium==4.36.0
@@ -0,0 +1,7 @@
1
+ junshan_kit/DataProcessor.py,sha256=AW_1jROexC3s41-RgzqzYVwPI0sOf3tzjiph4qa_Vcw,3882
2
+ junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ junshan_kit/datahub.py,sha256=mofbkp8ry6_LM_vW1LcZolp5tfkqOp_cUiwjfDFbRqI,5153
4
+ junshan_kit/test.py,sha256=uSckjcr_Wgj__YPTwD6x0GY8Hfn5GBEXIpRf9vIYBbU,91
5
+ junshan_kit-2.1.8.dist-info/METADATA,sha256=eFQmrVEUORZRhZqBCOlctfSU3vwCQ2RB4Jpyj1coAmE,329
6
+ junshan_kit-2.1.8.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
7
+ junshan_kit-2.1.8.dist-info/RECORD,,
@@ -1,31 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: junshan_kit
3
- Version: 2.1.7
4
- Summary: This is an optimization tool.
5
- Author-email: Junshan Yin <junshanyin@163.com>
6
- Requires-Dist: kaggle==1.7.4.5
7
- Requires-Dist: kagglehub==0.3.13
8
- Requires-Dist: numpy==2.2.6
9
- Requires-Dist: pandas==2.3.3
10
- Requires-Dist: scikit-learn==1.7.1
11
- Description-Content-Type: text/markdown
12
-
13
- - For class kaggle_data in datahub
14
- - We need to set API of kaggle.
15
-
16
- ```python
17
- import junshan_kit.datahub
18
- data = junshan_kit.datahub.kaggle_data()
19
- data.list_user_datasets()
20
- data.read_data(data_name='letter-libsvm', copy_path='./exp_data/Letter')
21
- ```
22
-
23
-
24
-
25
-
26
-
27
-
28
-
29
-
30
-
31
-
@@ -1,7 +0,0 @@
1
- junshan_kit/DataProcessor.py,sha256=rp_w325h8EvKcLMSa12w5B-UA8G75O1qP0ogE6GDSE0,3886
2
- junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- junshan_kit/datahub.py,sha256=BWcG_TPW1xf_y_GzxRXanuOAB01WugBiO5r53EDbr8s,1815
4
- junshan_kit/test.py,sha256=jyZQPgX40HlLM23vGMbuZFwFBk7YiFqzzh9xuOTzbw8,91
5
- junshan_kit-2.1.7.dist-info/METADATA,sha256=ePQG7bT7y7yVU7iSI3CxnfNacwJLyAlSB7nEmAG3_NM,599
6
- junshan_kit-2.1.7.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
7
- junshan_kit-2.1.7.dist-info/RECORD,,