junshan-kit 2.2.3__py2.py3-none-any.whl → 2.5.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
junshan_kit/meta.py DELETED
@@ -1,256 +0,0 @@
1
- """
2
- ----------------------------------------------------------------------
3
- >>> Author : Junshan Yin
4
- >>> Last Updated : 2025-10-12
5
- ----------------------------------------------------------------------
6
- """
7
-
8
- import os
9
- import time
10
- import shutil
11
- import warnings
12
- import kagglehub
13
- from kaggle.api.kaggle_api_extended import KaggleApi
14
-
15
- from selenium import webdriver
16
- from selenium.webdriver.common.by import By
17
- from selenium.webdriver.support.ui import WebDriverWait
18
- from selenium.webdriver.support import expected_conditions as EC
19
-
20
-
21
- # =============================================================
22
- # KAGGLE DATA MANAGEMENT
23
- # =============================================================
24
-
25
- class KaggleData:
26
- def list_datasets(self):
27
- """
28
- List available datasets from a specific user.
29
- """
30
- api = KaggleApi()
31
- api.authenticate()
32
- datasets = api.dataset_list(user='junshan888')
33
- print('Available datasets:')
34
- print('*' * 60)
35
- if datasets:
36
- for ds in datasets:
37
- print(ds.title) # type: ignore
38
- print('*' * 60)
39
-
40
- def list_user_datasets(self):
41
- warnings.warn(
42
- "list_user_datasets() is deprecated. Use list_datasets() instead.",
43
- DeprecationWarning,
44
- stacklevel=2
45
- )
46
- return self.list_datasets()
47
-
48
- def download_data(self, data_name='letter-libsvm', copy_path=None):
49
- """Download a Kaggle dataset and optionally copy it to a target folder."""
50
- path = kagglehub.dataset_download(f'junshan888/{data_name}')
51
- if copy_path:
52
- os.makedirs(copy_path, exist_ok=True)
53
- shutil.copytree(path, copy_path, dirs_exist_ok=True)
54
- print(f"✅ Dataset copied to: {copy_path}")
55
-
56
-
57
- # =============================================================
58
- # JIANGUOYUN (NUTSTORE) CHROME VERSION
59
- # =============================================================
60
-
61
- from selenium.webdriver.chrome.options import Options as ChromeOptions
62
-
63
- class JianguoyunDownloaderChrome:
64
- """ Example:
65
- >>> url = "https://www.jianguoyun.com/p/DdyHJxUQqdHDDRjvtI0GIAA"
66
- >>> downloader = JianguoyunDownloaderChrome(url)
67
- >>> downloader.run()
68
- """
69
- def __init__(self, url, download_path="./downloads"):
70
- self.url = url
71
- self.download_path = os.path.abspath(download_path)
72
- os.makedirs(self.download_path, exist_ok=True)
73
-
74
- self.chrome_options = ChromeOptions()
75
- prefs = {
76
- "download.default_directory": self.download_path,
77
- "download.prompt_for_download": False,
78
- "download.directory_upgrade": True,
79
- "safebrowsing.enabled": True,
80
- "profile.default_content_setting_values.automatic_downloads": 1,
81
- }
82
- self.chrome_options.add_experimental_option("prefs", prefs)
83
- self.chrome_options.add_argument("--disable-gpu")
84
- self.chrome_options.add_argument("--no-sandbox")
85
- self.chrome_options.add_argument("--disable-dev-shm-usage")
86
- self.chrome_options.add_argument("--enable-features=NetworkService,NetworkServiceInProcess")
87
- # Uncomment for headless mode:
88
- # self.chrome_options.add_argument("--headless")
89
-
90
- self.driver = webdriver.Chrome(options=self.chrome_options)
91
-
92
- def open_page(self):
93
- print(f"🌐 Opening page: {self.url}")
94
- self.driver.get(self.url)
95
- print(f"✅ Page loaded: {self.driver.title}")
96
-
97
- def click_download_button(self):
98
- """Find and click the 'Download' button (supports English and Chinese)."""
99
- print("🔍 Searching for the download button...")
100
- wait = WebDriverWait(self.driver, 30)
101
-
102
- try:
103
- # Match both English 'Download' (case-insensitive) and Chinese '下载'
104
- xpath = (
105
- "//span[contains(translate(text(),'DOWNLOAD下载','download下载'),'download')]"
106
- " | //button[contains(translate(text(),'DOWNLOAD下载','download下载'),'download')]"
107
- " | //a[contains(translate(text(),'DOWNLOAD下载','download下载'),'download')]"
108
- " | //span[contains(text(),'下载')]"
109
- " | //button[contains(text(),'下载')]"
110
- " | //a[contains(text(),'下载')]"
111
- )
112
-
113
- button = wait.until(EC.element_to_be_clickable((By.XPATH, xpath)))
114
-
115
- # Click using JavaScript to avoid overlay or interaction issues
116
- self.driver.execute_script("arguments[0].click();", button)
117
- print(f"✅ Download button clicked. Files will be saved to: {self.download_path}")
118
-
119
- # If the cloud service opens a new tab, switch to it
120
- time.sleep(3)
121
- if len(self.driver.window_handles) > 1:
122
- self.driver.switch_to.window(self.driver.window_handles[-1])
123
- print("📂 Switched to the new download tab.")
124
-
125
- except Exception as e:
126
- print("❌ Failed to find or click the download button:", e)
127
- raise
128
-
129
-
130
- def wait_for_downloads(self, timeout=3600):
131
- print("⏳ Waiting for downloads to finish...")
132
- start_time = time.time()
133
- while time.time() - start_time < timeout:
134
- downloading = [f for f in os.listdir(self.download_path) if f.endswith(".crdownload")]
135
- if not downloading:
136
- print("✅ Download completed!")
137
- return
138
- time.sleep(2)
139
- print("⚠️ Timeout: download not completed within 1 hour")
140
-
141
- def close(self):
142
- self.driver.quit()
143
- print("🚪 Browser closed.")
144
-
145
- def run(self):
146
- print('*' * 60)
147
- try:
148
- self.open_page()
149
- self.click_download_button()
150
- self.wait_for_downloads()
151
- except Exception as e:
152
- print("❌ Error:", e)
153
- finally:
154
- self.close()
155
- print('*' * 60)
156
-
157
-
158
- # =============================================================
159
- # JIANGUOYUN (NUTSTORE) FIREFOX VERSION
160
- # =============================================================
161
-
162
- from selenium.webdriver.firefox.options import Options as FirefoxOptions
163
- from selenium.webdriver.firefox.service import Service
164
-
165
- class JianguoyunDownloaderFirefox:
166
- """ Example:
167
- >>> url = "https://www.jianguoyun.com/p/DdyHJxUQqdHDDRjvtI0GIAA"
168
- >>> downloader = JianguoyunDownloaderFirefox(url)
169
- >>> downloader.run()
170
- """
171
- def __init__(self, url, download_path="./downloads"):
172
- self.url = url
173
- self.download_path = os.path.abspath(download_path)
174
- os.makedirs(self.download_path, exist_ok=True)
175
-
176
- options = FirefoxOptions()
177
- options.add_argument("--headless")
178
- options.set_preference("browser.download.folderList", 2)
179
- options.set_preference("browser.download.manager.showWhenStarting", False)
180
- options.set_preference("browser.download.dir", self.download_path)
181
- options.set_preference("browser.helperApps.neverAsk.saveToDisk",
182
- "application/zip,application/octet-stream,application/x-zip-compressed,multipart/x-zip")
183
- options.set_preference("pdfjs.disabled", True)
184
-
185
- service = Service("/snap/bin/geckodriver")
186
- self.driver = webdriver.Firefox(service=service, options=options)
187
-
188
- def open_page(self):
189
- print(f"🌐 Opening page: {self.url}")
190
- self.driver.get(self.url)
191
- print(f"✅ Page loaded: {self.driver.title}")
192
-
193
- def click_download_button(self):
194
- """Find and click the 'Download' button (supports English and Chinese)."""
195
- print("🔍 Searching for the download button...")
196
- wait = WebDriverWait(self.driver, 30)
197
-
198
- try:
199
- # Match both English 'Download' (case-insensitive) and Chinese '下载'
200
- xpath = (
201
- "//span[contains(translate(text(),'DOWNLOAD下载','download下载'),'download')]"
202
- " | //button[contains(translate(text(),'DOWNLOAD下载','download下载'),'download')]"
203
- " | //a[contains(translate(text(),'DOWNLOAD下载','download下载'),'download')]"
204
- " | //span[contains(text(),'下载')]"
205
- " | //button[contains(text(),'下载')]"
206
- " | //a[contains(text(),'下载')]"
207
- )
208
-
209
- button = wait.until(EC.element_to_be_clickable((By.XPATH, xpath)))
210
-
211
- # Click using JavaScript to avoid overlay or interaction issues
212
- self.driver.execute_script("arguments[0].click();", button)
213
- print(f"✅ Download button clicked. Files will be saved to: {self.download_path}")
214
-
215
- # If the cloud service opens a new tab, switch to it
216
- time.sleep(3)
217
- if len(self.driver.window_handles) > 1:
218
- self.driver.switch_to.window(self.driver.window_handles[-1])
219
- print("📂 Switched to the new download tab.")
220
-
221
- except Exception as e:
222
- print("❌ Failed to find or click the download button:", e)
223
- raise
224
-
225
- def wait_for_download(self, timeout=3600):
226
- """Wait until all downloads are finished (auto-detects browser type)."""
227
- print("⏳ Waiting for downloads to finish...")
228
- start_time = time.time()
229
-
230
- # Determine the temporary file extension based on the browser type
231
- temp_ext = ".crdownload" if "chrome" in self.driver.capabilities["browserName"].lower() else ".part"
232
-
233
- while time.time() - start_time < timeout:
234
- downloading = [f for f in os.listdir(self.download_path) if f.endswith(temp_ext)]
235
- if not downloading:
236
- print("✅ Download completed!")
237
- return True
238
- time.sleep(2)
239
-
240
-
241
- def close(self):
242
- print("🛑 Closing browser...")
243
- self.driver.quit()
244
-
245
- def run(self):
246
- print('*' * 60)
247
- try:
248
- self.open_page()
249
- self.click_download_button()
250
- self.wait_for_download(timeout=3600)
251
- except Exception as e:
252
- print("❌ Error:", e)
253
- finally:
254
- self.close()
255
- print('*' * 60)
256
-
junshan_kit/test.py DELETED
@@ -1,8 +0,0 @@
1
- from DataProcessor import CSV_TO_Pandas
2
-
3
-
4
- data_ = CSV_TO_Pandas()
5
-
6
-
7
-
8
- data_.clean_data('data_csv/Electric Vehicle Population Data/Electric_Vehicle_Population_Data.csv', [], [], {})
@@ -1,9 +0,0 @@
1
- junshan_kit/DataProcessor.py,sha256=rDL3NLD-WlT3x6x74XkB_542_sk3BrnIk5p4rYlVn5o,7212
2
- junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- junshan_kit/datahub.py,sha256=_Q_3AlZ8vk1Ma6h9I44SxWBA8w9m1CQNvYztMcsxzUo,5377
4
- junshan_kit/kit.py,sha256=h4Q_87hEJbXH4A9ryaGMu_nle5RlM8OR_PaW_hWCVBY,1040
5
- junshan_kit/meta.py,sha256=SiY9P93aABrksNE6G3ft5gzcuP2cUgc4Vx6LH7ZFmzg,10113
6
- junshan_kit/test.py,sha256=FgzG4oG7kkq6rWasxdBSY1qx_B0navRI5Ei-wJ1Dvo0,180
7
- junshan_kit-2.2.3.dist-info/METADATA,sha256=h4_Z0LMIigJgrkt2hD5TcYJwOCkArMRySh-OopgZ9Xo,329
8
- junshan_kit-2.2.3.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
9
- junshan_kit-2.2.3.dist-info/RECORD,,