PgsFile 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PgsFile might be problematic. Click here for more details.
- PgsFile/PgsFile.py +10 -8
- PgsFile/__init__.py +1 -1
- {PgsFile-0.2.5.dist-info → PgsFile-0.2.7.dist-info}/METADATA +1 -1
- {PgsFile-0.2.5.dist-info → PgsFile-0.2.7.dist-info}/RECORD +7 -7
- {PgsFile-0.2.5.dist-info → PgsFile-0.2.7.dist-info}/LICENSE +0 -0
- {PgsFile-0.2.5.dist-info → PgsFile-0.2.7.dist-info}/WHEEL +0 -0
- {PgsFile-0.2.5.dist-info → PgsFile-0.2.7.dist-info}/top_level.txt +0 -0
PgsFile/PgsFile.py
CHANGED
|
@@ -2139,8 +2139,8 @@ def find_table_with_most_rows(tables):
|
|
|
2139
2139
|
max_rows=0
|
|
2140
2140
|
max_table_index=-1
|
|
2141
2141
|
for i, table in enumerate(tables):
|
|
2142
|
-
if isinstance(table, pd.DataFrame) and table.shape[0] > max_rows:
|
|
2143
|
-
max_rows=table.shape[0]
|
|
2142
|
+
if isinstance(table, pd.DataFrame) and len(str(table.shape[0])) > max_rows:
|
|
2143
|
+
max_rows=len(str(table.shape[0]))
|
|
2144
2144
|
max_table_index=i
|
|
2145
2145
|
return max_table_index, max_rows if max_table_index!= -1 else None
|
|
2146
2146
|
|
|
@@ -2173,7 +2173,7 @@ def get_data_table_html_string(html_string, output_file, most_rows=True):
|
|
|
2173
2173
|
df=tables[0]
|
|
2174
2174
|
else:
|
|
2175
2175
|
# 2. get the table with most rows
|
|
2176
|
-
target_table=find_table_with_most_rows(tables)[
|
|
2176
|
+
target_table=find_table_with_most_rows(tables)[1] # (1, 32)
|
|
2177
2177
|
df=tables[target_table]
|
|
2178
2178
|
|
|
2179
2179
|
df.to_excel(output_file, index=False)
|
|
@@ -2265,12 +2265,14 @@ simhei_default_font_path_MacOS_Windows=["/System/Library/Fonts/STHeiti Medium.tt
|
|
|
2265
2265
|
def get_env_variable(variable_name):
|
|
2266
2266
|
# Get the value of the specified environment variable
|
|
2267
2267
|
value = os.getenv(variable_name)
|
|
2268
|
+
return value
|
|
2268
2269
|
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2270
|
+
def get_all_env_variables():
|
|
2271
|
+
# Get all environment variables
|
|
2272
|
+
env_vars = os.environ
|
|
2273
|
+
|
|
2274
|
+
# Print all user environment variables
|
|
2275
|
+
return dict(env_vars)
|
|
2274
2276
|
|
|
2275
2277
|
import subprocess
|
|
2276
2278
|
def set_permanent_environment_variable(variable_name, variable_value, system_wide=False):
|
PgsFile/__init__.py
CHANGED
|
@@ -28,7 +28,7 @@ from .PgsFile import remove_empty_folders, remove_empty_txts, remove_empty_lines
|
|
|
28
28
|
from .PgsFile import concatenate_excel_files
|
|
29
29
|
from .PgsFile import set_permanent_environment_variable
|
|
30
30
|
from .PgsFile import delete_permanent_environment_variable
|
|
31
|
-
from .PgsFile import get_env_variable
|
|
31
|
+
from .PgsFile import get_env_variable, get_all_env_variables
|
|
32
32
|
|
|
33
33
|
# 6. Data cleaning
|
|
34
34
|
from .PgsFile import BigPunctuation, StopTags, Special, yhd
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: PgsFile
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: This module streamlines Python package management, script execution, file handling, web scraping, multimedia downloads, data cleaning, and NLP tasks such as word tokenization and POS tagging. It also assists with generating word lists and plotting data, making these tasks more accessible and convenient for literary students. Whether you need to scrape data from websites, clean text, or analyze language, this module provides user-friendly tools to simplify your workflow.
|
|
5
5
|
Home-page: https://mp.weixin.qq.com/s/12-KVLfaPszoZkCxuRd-nQ?token=1589547443&lang=zh_CN
|
|
6
6
|
Author: Pan Guisheng
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
PgsFile/PgsFile.py,sha256=
|
|
2
|
-
PgsFile/__init__.py,sha256=
|
|
1
|
+
PgsFile/PgsFile.py,sha256=707ODGrvDtytkVqWg1Wvn-o-UE5rFn2Og2gAvB_X6r0,104833
|
|
2
|
+
PgsFile/__init__.py,sha256=eduFbjUeZk2f4nUU3B-_2SQzHLNMtHrwDEcOveIu_-0,2903
|
|
3
3
|
PgsFile/Corpora/Corpora/Parallel/HK Financial and Legal EC Parallel Corpora/HK-Press releases of the Financial Secretary Office (2007-2019).tsv,sha256=IpLGQQY5cXbFWmUPFEdzEPz8CXuCdR2DdZOhBxA7FWw,2035252
|
|
4
4
|
PgsFile/Corpora/Corpora/Parallel/HK Financial and Legal EC Parallel Corpora/Hong Kong bilingual court decisions (1997-2017).tsv,sha256=BMmPr5eYBIv06Wnfb8nOBrfIzpAl-LLoRk3R60dLxe0,5928126
|
|
5
5
|
PgsFile/Corpora/Corpora/Parallel/HK Financial and Legal EC Parallel Corpora/HongKong-Legislation.tsv,sha256=PJjiJIKV9aEzE0tAcqRNRCrunyWGiuD3sbkwkD9hoqo,4460018
|
|
@@ -2669,8 +2669,8 @@ PgsFile/models/fonts/全新硬笔楷书简.ttf,sha256=mPemGYMpgQxvFL1pFjjnyUMIpr
|
|
|
2669
2669
|
PgsFile/models/fonts/全新硬笔行书简.ttf,sha256=bUtbl71eK_ellp1z0tCmmR_P-JhqVFIpzeuRlrEBo9g,2611516
|
|
2670
2670
|
PgsFile/models/fonts/博洋行书3500.TTF,sha256=VrgeHr8cgOL6JD05QyuD9ZSyw4J2aIVxKxW8zSajq6Q,4410732
|
|
2671
2671
|
PgsFile/models/fonts/陆柬之行书字体.ttf,sha256=Zpd4Z7E9w-Qy74yklXHk4vM7HOtHuQgllvygxZZ1Hvs,1247288
|
|
2672
|
-
PgsFile-0.2.
|
|
2673
|
-
PgsFile-0.2.
|
|
2674
|
-
PgsFile-0.2.
|
|
2675
|
-
PgsFile-0.2.
|
|
2676
|
-
PgsFile-0.2.
|
|
2672
|
+
PgsFile-0.2.7.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
|
|
2673
|
+
PgsFile-0.2.7.dist-info/METADATA,sha256=JSeZDARZBj4qQJvODtLoI13k5B1BJR7ujrF6ugE_Drk,2711
|
|
2674
|
+
PgsFile-0.2.7.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
2675
|
+
PgsFile-0.2.7.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
|
|
2676
|
+
PgsFile-0.2.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|