PgsFile 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PgsFile might be problematic. Click here for more details.
- PgsFile/PgsFile.py +3 -3
- {PgsFile-0.2.6.dist-info → PgsFile-0.2.7.dist-info}/METADATA +1 -1
- {PgsFile-0.2.6.dist-info → PgsFile-0.2.7.dist-info}/RECORD +6 -6
- {PgsFile-0.2.6.dist-info → PgsFile-0.2.7.dist-info}/LICENSE +0 -0
- {PgsFile-0.2.6.dist-info → PgsFile-0.2.7.dist-info}/WHEEL +0 -0
- {PgsFile-0.2.6.dist-info → PgsFile-0.2.7.dist-info}/top_level.txt +0 -0
PgsFile/PgsFile.py
CHANGED
|
@@ -2139,8 +2139,8 @@ def find_table_with_most_rows(tables):
|
|
|
2139
2139
|
max_rows=0
|
|
2140
2140
|
max_table_index=-1
|
|
2141
2141
|
for i, table in enumerate(tables):
|
|
2142
|
-
if isinstance(table, pd.DataFrame) and table.shape[0] > max_rows:
|
|
2143
|
-
max_rows=table.shape[0]
|
|
2142
|
+
if isinstance(table, pd.DataFrame) and len(str(table.shape[0])) > max_rows:
|
|
2143
|
+
max_rows=len(str(table.shape[0]))
|
|
2144
2144
|
max_table_index=i
|
|
2145
2145
|
return max_table_index, max_rows if max_table_index!= -1 else None
|
|
2146
2146
|
|
|
@@ -2173,7 +2173,7 @@ def get_data_table_html_string(html_string, output_file, most_rows=True):
|
|
|
2173
2173
|
df=tables[0]
|
|
2174
2174
|
else:
|
|
2175
2175
|
# 2. get the table with most rows
|
|
2176
|
-
target_table=find_table_with_most_rows(tables)[
|
|
2176
|
+
target_table=find_table_with_most_rows(tables)[1] # (1, 32)
|
|
2177
2177
|
df=tables[target_table]
|
|
2178
2178
|
|
|
2179
2179
|
df.to_excel(output_file, index=False)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: PgsFile
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: This module streamlines Python package management, script execution, file handling, web scraping, multimedia downloads, data cleaning, and NLP tasks such as word tokenization and POS tagging. It also assists with generating word lists and plotting data, making these tasks more accessible and convenient for literary students. Whether you need to scrape data from websites, clean text, or analyze language, this module provides user-friendly tools to simplify your workflow.
|
|
5
5
|
Home-page: https://mp.weixin.qq.com/s/12-KVLfaPszoZkCxuRd-nQ?token=1589547443&lang=zh_CN
|
|
6
6
|
Author: Pan Guisheng
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
PgsFile/PgsFile.py,sha256=
|
|
1
|
+
PgsFile/PgsFile.py,sha256=707ODGrvDtytkVqWg1Wvn-o-UE5rFn2Og2gAvB_X6r0,104833
|
|
2
2
|
PgsFile/__init__.py,sha256=eduFbjUeZk2f4nUU3B-_2SQzHLNMtHrwDEcOveIu_-0,2903
|
|
3
3
|
PgsFile/Corpora/Corpora/Parallel/HK Financial and Legal EC Parallel Corpora/HK-Press releases of the Financial Secretary Office (2007-2019).tsv,sha256=IpLGQQY5cXbFWmUPFEdzEPz8CXuCdR2DdZOhBxA7FWw,2035252
|
|
4
4
|
PgsFile/Corpora/Corpora/Parallel/HK Financial and Legal EC Parallel Corpora/Hong Kong bilingual court decisions (1997-2017).tsv,sha256=BMmPr5eYBIv06Wnfb8nOBrfIzpAl-LLoRk3R60dLxe0,5928126
|
|
@@ -2669,8 +2669,8 @@ PgsFile/models/fonts/全新硬笔楷书简.ttf,sha256=mPemGYMpgQxvFL1pFjjnyUMIpr
|
|
|
2669
2669
|
PgsFile/models/fonts/全新硬笔行书简.ttf,sha256=bUtbl71eK_ellp1z0tCmmR_P-JhqVFIpzeuRlrEBo9g,2611516
|
|
2670
2670
|
PgsFile/models/fonts/博洋行书3500.TTF,sha256=VrgeHr8cgOL6JD05QyuD9ZSyw4J2aIVxKxW8zSajq6Q,4410732
|
|
2671
2671
|
PgsFile/models/fonts/陆柬之行书字体.ttf,sha256=Zpd4Z7E9w-Qy74yklXHk4vM7HOtHuQgllvygxZZ1Hvs,1247288
|
|
2672
|
-
PgsFile-0.2.
|
|
2673
|
-
PgsFile-0.2.
|
|
2674
|
-
PgsFile-0.2.
|
|
2675
|
-
PgsFile-0.2.
|
|
2676
|
-
PgsFile-0.2.
|
|
2672
|
+
PgsFile-0.2.7.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
|
|
2673
|
+
PgsFile-0.2.7.dist-info/METADATA,sha256=JSeZDARZBj4qQJvODtLoI13k5B1BJR7ujrF6ugE_Drk,2711
|
|
2674
|
+
PgsFile-0.2.7.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
2675
|
+
PgsFile-0.2.7.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
|
|
2676
|
+
PgsFile-0.2.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|