junshan-kit 2.1.8__py2.py3-none-any.whl → 2.1.9__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
junshan_kit/datahub.py CHANGED
@@ -47,7 +47,7 @@ class kaggle_data:
47
47
  # example: read_data(copy_path='./exp_data')
48
48
 
49
49
 
50
- class JianguoDownloader:
50
+ class JianguoDownloaderChrome:
51
51
  def __init__(self, url: str, download_path: str = "./downloads"):
52
52
  self.url = url
53
53
  self.download_path = os.path.abspath(download_path)
@@ -95,7 +95,7 @@ class JianguoDownloader:
95
95
  self.driver.switch_to.window(self.driver.window_handles[-1])
96
96
  print("📂 Switched to download tab.")
97
97
 
98
- def wait_for_downloads(self, timeout=300):
98
+ def wait_for_downloads(self, timeout=30000):
99
99
  """Wait until all downloads are finished."""
100
100
  print("⏳ Waiting for downloads to finish...")
101
101
  start_time = time.time()
@@ -135,3 +135,5 @@ class JianguoDownloader:
135
135
  self.close()
136
136
  print('*'*50)
137
137
 
138
+
139
+
junshan_kit/meta.py ADDED
@@ -0,0 +1,248 @@
1
+ import os
2
+ import time
3
+ import shutil
4
+ import warnings
5
+ import kagglehub
6
+ from kaggle.api.kaggle_api_extended import KaggleApi
7
+
8
+ from selenium import webdriver
9
+ from selenium.webdriver.common.by import By
10
+ from selenium.webdriver.support.ui import WebDriverWait
11
+ from selenium.webdriver.support import expected_conditions as EC
12
+
13
+ # =============================================================
14
+ # KAGGLE DATA MANAGEMENT
15
+ # =============================================================
16
+
17
+ class KaggleData:
18
+ def list_datasets(self):
19
+ """List available datasets from a specific user."""
20
+ api = KaggleApi()
21
+ api.authenticate()
22
+ datasets = api.dataset_list(user='junshan888')
23
+ print('Available datasets:')
24
+ print('*' * 60)
25
+ if datasets:
26
+ for ds in datasets:
27
+ print(ds.title) # type: ignore
28
+ print('*' * 60)
29
+
30
+ def list_user_datasets(self):
31
+ warnings.warn(
32
+ "list_user_datasets() is deprecated. Use list_datasets() instead.",
33
+ DeprecationWarning,
34
+ stacklevel=2
35
+ )
36
+ return self.list_datasets()
37
+
38
+ def download_data(self, data_name='letter-libsvm', copy_path=None):
39
+ """Download a Kaggle dataset and optionally copy it to a target folder."""
40
+ path = kagglehub.dataset_download(f'junshan888/{data_name}')
41
+ if copy_path:
42
+ os.makedirs(copy_path, exist_ok=True)
43
+ shutil.copytree(path, copy_path, dirs_exist_ok=True)
44
+ print(f"✅ Dataset copied to: {copy_path}")
45
+
46
+
47
+ # =============================================================
48
+ # JIANGUOYUN (NUTSTORE) CHROME VERSION
49
+ # =============================================================
50
+
51
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
52
+
53
+ class JianguoyunDownloaderChrome:
54
+ def __init__(self, url, download_path="./downloads"):
55
+ self.url = url
56
+ self.download_path = os.path.abspath(download_path)
57
+ os.makedirs(self.download_path, exist_ok=True)
58
+
59
+ self.chrome_options = ChromeOptions()
60
+ prefs = {
61
+ "download.default_directory": self.download_path,
62
+ "download.prompt_for_download": False,
63
+ "download.directory_upgrade": True,
64
+ "safebrowsing.enabled": True,
65
+ "profile.default_content_setting_values.automatic_downloads": 1,
66
+ }
67
+ self.chrome_options.add_experimental_option("prefs", prefs)
68
+ self.chrome_options.add_argument("--disable-gpu")
69
+ self.chrome_options.add_argument("--no-sandbox")
70
+ self.chrome_options.add_argument("--disable-dev-shm-usage")
71
+ self.chrome_options.add_argument("--enable-features=NetworkService,NetworkServiceInProcess")
72
+ # Uncomment for headless mode:
73
+ # self.chrome_options.add_argument("--headless")
74
+
75
+ self.driver = webdriver.Chrome(options=self.chrome_options)
76
+
77
+ def open_page(self):
78
+ print(f"🌐 Opening page: {self.url}")
79
+ self.driver.get(self.url)
80
+ print(f"✅ Page loaded: {self.driver.title}")
81
+
82
+ def click_download_button(self):
83
+ """Find and click the 'Download' button (supports English and Chinese)."""
84
+ print("🔍 Searching for the download button...")
85
+ wait = WebDriverWait(self.driver, 30)
86
+
87
+ try:
88
+ # Match both English 'Download' (case-insensitive) and Chinese '下载'
89
+ xpath = (
90
+ "//span[contains(translate(text(),'DOWNLOAD下载','download下载'),'download')]"
91
+ " | //button[contains(translate(text(),'DOWNLOAD下载','download下载'),'download')]"
92
+ " | //a[contains(translate(text(),'DOWNLOAD下载','download下载'),'download')]"
93
+ " | //span[contains(text(),'下载')]"
94
+ " | //button[contains(text(),'下载')]"
95
+ " | //a[contains(text(),'下载')]"
96
+ )
97
+
98
+ button = wait.until(EC.element_to_be_clickable((By.XPATH, xpath)))
99
+
100
+ # Click using JavaScript to avoid overlay or interaction issues
101
+ self.driver.execute_script("arguments[0].click();", button)
102
+ print(f"✅ Download button clicked. Files will be saved to: {self.download_path}")
103
+
104
+ # If the cloud service opens a new tab, switch to it
105
+ time.sleep(3)
106
+ if len(self.driver.window_handles) > 1:
107
+ self.driver.switch_to.window(self.driver.window_handles[-1])
108
+ print("📂 Switched to the new download tab.")
109
+
110
+ except Exception as e:
111
+ print("❌ Failed to find or click the download button:", e)
112
+ raise
113
+
114
+
115
+ def wait_for_downloads(self, timeout=3600):
116
+ print("⏳ Waiting for downloads to finish...")
117
+ start_time = time.time()
118
+ while time.time() - start_time < timeout:
119
+ downloading = [f for f in os.listdir(self.download_path) if f.endswith(".crdownload")]
120
+ if not downloading:
121
+ print("✅ Download completed!")
122
+ return
123
+ time.sleep(2)
124
+ print("⚠️ Timeout: download not completed within 1 hour")
125
+
126
+ def close(self):
127
+ self.driver.quit()
128
+ print("🚪 Browser closed.")
129
+
130
+ def run(self):
131
+ print('*' * 60)
132
+ try:
133
+ self.open_page()
134
+ self.click_download_button()
135
+ self.wait_for_downloads()
136
+ except Exception as e:
137
+ print("❌ Error:", e)
138
+ finally:
139
+ self.close()
140
+ print('*' * 60)
141
+
142
+
143
+ # =============================================================
144
+ # JIANGUOYUN (NUTSTORE) FIREFOX VERSION
145
+ # =============================================================
146
+
147
+ from selenium.webdriver.firefox.options import Options as FirefoxOptions
148
+ from selenium.webdriver.firefox.service import Service
149
+
150
+ class JianguoyunDownloaderFirefox:
151
+ def __init__(self, url, download_path="./downloads"):
152
+ self.url = url
153
+ self.download_path = os.path.abspath(download_path)
154
+ os.makedirs(self.download_path, exist_ok=True)
155
+
156
+ options = FirefoxOptions()
157
+ options.add_argument("--headless")
158
+ options.set_preference("browser.download.folderList", 2)
159
+ options.set_preference("browser.download.manager.showWhenStarting", False)
160
+ options.set_preference("browser.download.dir", self.download_path)
161
+ options.set_preference("browser.helperApps.neverAsk.saveToDisk",
162
+ "application/zip,application/octet-stream,application/x-zip-compressed,multipart/x-zip")
163
+ options.set_preference("pdfjs.disabled", True)
164
+
165
+ service = Service("/snap/bin/geckodriver")
166
+ self.driver = webdriver.Firefox(service=service, options=options)
167
+
168
+ def open_page(self):
169
+ print(f"🌐 Opening page: {self.url}")
170
+ self.driver.get(self.url)
171
+ print(f"✅ Page loaded: {self.driver.title}")
172
+
173
+ def click_download_button(self):
174
+ """Find and click the 'Download' button (supports English and Chinese)."""
175
+ print("🔍 Searching for the download button...")
176
+ wait = WebDriverWait(self.driver, 30)
177
+
178
+ try:
179
+ # Match both English 'Download' (case-insensitive) and Chinese '下载'
180
+ xpath = (
181
+ "//span[contains(translate(text(),'DOWNLOAD下载','download下载'),'download')]"
182
+ " | //button[contains(translate(text(),'DOWNLOAD下载','download下载'),'download')]"
183
+ " | //a[contains(translate(text(),'DOWNLOAD下载','download下载'),'download')]"
184
+ " | //span[contains(text(),'下载')]"
185
+ " | //button[contains(text(),'下载')]"
186
+ " | //a[contains(text(),'下载')]"
187
+ )
188
+
189
+ button = wait.until(EC.element_to_be_clickable((By.XPATH, xpath)))
190
+
191
+ # Click using JavaScript to avoid overlay or interaction issues
192
+ self.driver.execute_script("arguments[0].click();", button)
193
+ print(f"✅ Download button clicked. Files will be saved to: {self.download_path}")
194
+
195
+ # If the cloud service opens a new tab, switch to it
196
+ time.sleep(3)
197
+ if len(self.driver.window_handles) > 1:
198
+ self.driver.switch_to.window(self.driver.window_handles[-1])
199
+ print("📂 Switched to the new download tab.")
200
+
201
+ except Exception as e:
202
+ print("❌ Failed to find or click the download button:", e)
203
+ raise
204
+
205
+
206
+ def wait_for_download(self, timeout=3600):
207
+ print("⏳ Waiting for downloads to finish...")
208
+ start_time = time.time()
209
+ while time.time() - start_time < timeout:
210
+ files = os.listdir(self.download_path)
211
+ if any(not f.endswith(".part") for f in files):
212
+ print("✅ Download completed!")
213
+ return
214
+ time.sleep(5)
215
+ print("⚠️ Timeout: download not completed within 1 hour")
216
+
217
+ def close(self):
218
+ print("🛑 Closing browser...")
219
+ self.driver.quit()
220
+
221
+ def run(self):
222
+ print('*' * 60)
223
+ try:
224
+ self.open_page()
225
+ self.click_download_button()
226
+ self.wait_for_download(timeout=3600)
227
+ except Exception as e:
228
+ print("❌ Error:", e)
229
+ finally:
230
+ self.close()
231
+ print('*' * 60)
232
+
233
+
234
+ # =============================================================
235
+ # MAIN
236
+ # =============================================================
237
+
238
+ # if __name__ == "__main__":
239
+ # url = "https://www.jianguoyun.com/p/DdyHJxUQqdHDDRjvtI0GIAA"
240
+
241
+ # use_chrome = True # Set True to use Chrome, False for Firefox
242
+
243
+ # if use_chrome:
244
+ # downloader = JianguoyunDownloaderChrome(url)
245
+ # else:
246
+ # downloader = JianguoyunDownloaderFirefox(url)
247
+
248
+ # downloader.run()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: junshan_kit
3
- Version: 2.1.8
3
+ Version: 2.1.9
4
4
  Summary: This is an optimization tool.
5
5
  Author-email: Junshan Yin <junshanyin@163.com>
6
6
  Requires-Dist: kaggle==1.7.4.5
@@ -0,0 +1,8 @@
1
+ junshan_kit/DataProcessor.py,sha256=AW_1jROexC3s41-RgzqzYVwPI0sOf3tzjiph4qa_Vcw,3882
2
+ junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ junshan_kit/datahub.py,sha256=I34e26psFS8WK4X6SNucKPLtdBm0Ujzqa0VDIRACah4,5163
4
+ junshan_kit/meta.py,sha256=wzctYAckD3Gex1MMK5J6diV5z66E_rDUb814NzBQ5VY,9816
5
+ junshan_kit/test.py,sha256=uSckjcr_Wgj__YPTwD6x0GY8Hfn5GBEXIpRf9vIYBbU,91
6
+ junshan_kit-2.1.9.dist-info/METADATA,sha256=AjGMtbds79-V-BRJoU-vx0_PYNy9tJ380zOKlI8bOPs,329
7
+ junshan_kit-2.1.9.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
8
+ junshan_kit-2.1.9.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- junshan_kit/DataProcessor.py,sha256=AW_1jROexC3s41-RgzqzYVwPI0sOf3tzjiph4qa_Vcw,3882
2
- junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- junshan_kit/datahub.py,sha256=mofbkp8ry6_LM_vW1LcZolp5tfkqOp_cUiwjfDFbRqI,5153
4
- junshan_kit/test.py,sha256=uSckjcr_Wgj__YPTwD6x0GY8Hfn5GBEXIpRf9vIYBbU,91
5
- junshan_kit-2.1.8.dist-info/METADATA,sha256=eFQmrVEUORZRhZqBCOlctfSU3vwCQ2RB4Jpyj1coAmE,329
6
- junshan_kit-2.1.8.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
7
- junshan_kit-2.1.8.dist-info/RECORD,,