PgsFile 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PgsFile might be problematic. Click here for more details.
- PgsFile/PgsFile.py +75 -10
- {PgsFile-0.5.0.dist-info → PgsFile-0.5.2.dist-info}/METADATA +3 -2
- {PgsFile-0.5.0.dist-info → PgsFile-0.5.2.dist-info}/RECORD +6 -6
- {PgsFile-0.5.0.dist-info → PgsFile-0.5.2.dist-info}/LICENSE +0 -0
- {PgsFile-0.5.0.dist-info → PgsFile-0.5.2.dist-info}/WHEEL +0 -0
- {PgsFile-0.5.0.dist-info → PgsFile-0.5.2.dist-info}/top_level.txt +0 -0
PgsFile/PgsFile.py
CHANGED
|
@@ -4144,9 +4144,21 @@ def tfidf_keyword_extraction(documents, top_percent=(0.0, 0.10)):
|
|
|
4144
4144
|
|
|
4145
4145
|
|
|
4146
4146
|
from xml.dom.minidom import Document
|
|
4147
|
-
|
|
4147
|
+
from datetime import datetime
|
|
4148
|
+
def maketmx(
|
|
4149
|
+
tmx_path,
|
|
4150
|
+
source_list,
|
|
4151
|
+
target_list,
|
|
4152
|
+
source_lang="zh-CN",
|
|
4153
|
+
target_lang="en-US",
|
|
4154
|
+
author="Petercusin",
|
|
4155
|
+
client_name=None,
|
|
4156
|
+
project_id=None,
|
|
4157
|
+
domain=None,
|
|
4158
|
+
status="Final"
|
|
4159
|
+
):
|
|
4148
4160
|
"""
|
|
4149
|
-
Generate a TMX (Translation Memory eXchange) file for any two language pairs.
|
|
4161
|
+
Generate a TMX (Translation Memory eXchange) file for any two language pairs, with optional metadata.
|
|
4150
4162
|
|
|
4151
4163
|
Parameters
|
|
4152
4164
|
----------
|
|
@@ -4161,34 +4173,86 @@ def maketmx(tmx_path, source_list, target_list, source_lang="zh-CN", target_lang
|
|
|
4161
4173
|
Source language code, e.g., "zh-CN" (default), "fr-FR", "de-DE".
|
|
4162
4174
|
target_lang : str, optional
|
|
4163
4175
|
Target language code, e.g., "en-US" (default), "es-ES", "ja-JP".
|
|
4176
|
+
author : str, optional
|
|
4177
|
+
Author of the TMX file, e.g., "Petercusin" (default).
|
|
4178
|
+
client_name : str, optional
|
|
4179
|
+
Name of the client or company, e.g., "Acme Corp".
|
|
4180
|
+
project_id : str, optional
|
|
4181
|
+
Project identifier, e.g., "Project_XYZ_2025".
|
|
4182
|
+
domain : str, optional
|
|
4183
|
+
Domain or subject field, e.g., "Medical", "Legal", "Technical".
|
|
4184
|
+
status : str, optional
|
|
4185
|
+
Translation status, e.g., "Draft", "Reviewed", "Final" (default).
|
|
4164
4186
|
|
|
4165
4187
|
Returns
|
|
4166
4188
|
-------
|
|
4167
4189
|
None
|
|
4168
4190
|
Writes the TMX file to the specified path.
|
|
4169
4191
|
|
|
4192
|
+
Raises
|
|
4193
|
+
------
|
|
4194
|
+
ValueError
|
|
4195
|
+
If `source_list` and `target_list` have different lengths.
|
|
4196
|
+
|
|
4170
4197
|
Example
|
|
4171
4198
|
-------
|
|
4172
|
-
# Chinese to English
|
|
4173
|
-
maketmx(
|
|
4174
|
-
|
|
4175
|
-
|
|
4176
|
-
|
|
4199
|
+
# Chinese to English, with metadata
|
|
4200
|
+
maketmx(
|
|
4201
|
+
"zh_en.tmx",
|
|
4202
|
+
["你好", "再见"],
|
|
4203
|
+
["Hello", "Goodbye"],
|
|
4204
|
+
"zh-CN",
|
|
4205
|
+
"en-US",
|
|
4206
|
+
author="Dr. Guisheng PAN",
|
|
4207
|
+
client_name="Acme Corp",
|
|
4208
|
+
project_id="Project_XYZ_2025",
|
|
4209
|
+
domain="Technical",
|
|
4210
|
+
status="Final"
|
|
4211
|
+
)
|
|
4177
4212
|
"""
|
|
4213
|
+
if len(source_list) != len(target_list):
|
|
4214
|
+
raise ValueError("source_list and target_list must have the same number of elements.")
|
|
4215
|
+
|
|
4178
4216
|
doc = Document()
|
|
4179
4217
|
tmx = doc.createElement("tmx")
|
|
4180
|
-
tmx.setAttribute("version", "1.
|
|
4218
|
+
tmx.setAttribute("version", "1.4")
|
|
4181
4219
|
doc.appendChild(tmx)
|
|
4182
4220
|
|
|
4183
4221
|
header = doc.createElement("header")
|
|
4184
4222
|
header.setAttribute("creationtool", "PgsFile")
|
|
4185
|
-
header.setAttribute("creationtoolversion", "
|
|
4186
|
-
header.setAttribute("creationtooldeveloper",
|
|
4223
|
+
header.setAttribute("creationtoolversion", "0.5.1")
|
|
4224
|
+
header.setAttribute("creationtooldeveloper", author)
|
|
4225
|
+
header.setAttribute("creationdate", datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"))
|
|
4187
4226
|
header.setAttribute("srclang", source_lang)
|
|
4188
4227
|
header.setAttribute("trtlang", target_lang)
|
|
4189
4228
|
header.setAttribute("datatype", "plaintext")
|
|
4229
|
+
header.setAttribute("segtype", "sentence")
|
|
4230
|
+
header.setAttribute("adminlang", "en-US")
|
|
4231
|
+
header.setAttribute("o-tmf", "PgsFile_TM")
|
|
4190
4232
|
tmx.appendChild(header)
|
|
4191
4233
|
|
|
4234
|
+
# Add optional metadata as <prop> elements
|
|
4235
|
+
if client_name:
|
|
4236
|
+
prop = doc.createElement("prop")
|
|
4237
|
+
prop.setAttribute("type", "x-Client")
|
|
4238
|
+
prop.appendChild(doc.createTextNode(client_name))
|
|
4239
|
+
header.appendChild(prop)
|
|
4240
|
+
if project_id:
|
|
4241
|
+
prop = doc.createElement("prop")
|
|
4242
|
+
prop.setAttribute("type", "x-Project")
|
|
4243
|
+
prop.appendChild(doc.createTextNode(project_id))
|
|
4244
|
+
header.appendChild(prop)
|
|
4245
|
+
if domain:
|
|
4246
|
+
prop = doc.createElement("prop")
|
|
4247
|
+
prop.setAttribute("type", "x-Domain")
|
|
4248
|
+
prop.appendChild(doc.createTextNode(domain))
|
|
4249
|
+
header.appendChild(prop)
|
|
4250
|
+
if status:
|
|
4251
|
+
prop = doc.createElement("prop")
|
|
4252
|
+
prop.setAttribute("type", "x-Status")
|
|
4253
|
+
prop.appendChild(doc.createTextNode(status))
|
|
4254
|
+
header.appendChild(prop)
|
|
4255
|
+
|
|
4192
4256
|
body = doc.createElement("body")
|
|
4193
4257
|
tmx.appendChild(body)
|
|
4194
4258
|
|
|
@@ -4216,3 +4280,4 @@ def maketmx(tmx_path, source_list, target_list, source_lang="zh-CN", target_lang
|
|
|
4216
4280
|
|
|
4217
4281
|
with open(tmx_path, 'w', encoding='utf-8') as f:
|
|
4218
4282
|
doc.writexml(f, indent='\t', newl='\n', addindent='\t')
|
|
4283
|
+
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: PgsFile
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: This module simplifies Python package management, script execution, file handling, web scraping, and multimedia downloads. The module supports (LLM-based) NLP tasks such as OCR, tokenization, lemmatization, POS tagging, NER, ATE, dependency parsing, MDD, WSD, LIWC, MIP analysis and Chinese-English sentence alignment. It also generates word lists, and plots data, aiding literary students. Ideal for scraping data, cleaning text, and analyzing language, it offers user-friendly tools to streamline workflows.
|
|
5
|
-
Home-page: https://
|
|
5
|
+
Home-page: https://github.com/Petercusin/PgsFile
|
|
6
6
|
Author: Pan Guisheng
|
|
7
7
|
Author-email: panguisheng@sufe.edu.cn
|
|
8
8
|
License: Educational free
|
|
@@ -40,3 +40,4 @@ Key Features:
|
|
|
40
40
|
|
|
41
41
|
Author: Pan Guisheng, a PhD student at the Graduate Institute of Interpretation and Translation of Shanghai International Studies University
|
|
42
42
|
Email: 895284504@qq.com
|
|
43
|
+
Homepage: https://mp.weixin.qq.com/s/lWMkYDWQMjBJNKY2vMYTpw
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
PgsFile/PgsFile.py,sha256=
|
|
1
|
+
PgsFile/PgsFile.py,sha256=YzdCEPFvT1jisKcihRUNyi2hqAWhStkP-yIcj_xqiLI,174646
|
|
2
2
|
PgsFile/__init__.py,sha256=YkDTLWtveSeN4I5ZXSmdp7YZFOHb4yEzxQB82DEHk9s,3704
|
|
3
3
|
PgsFile/Corpora/Idioms/English_Idioms_8774.txt,sha256=qlsP0yI_XGECBRiPZuLkGZpdasc77sWSKexANu7v8_M,175905
|
|
4
4
|
PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000000.txt,sha256=SLGGSMSb7Ff1RoBstsTW3yX2wNZpqEUchFNpcI-mrR4,1513
|
|
@@ -2593,8 +2593,8 @@ PgsFile/models/prompts/5. ATE prompt.txt,sha256=5wu0gGlsV7DI0LruYM3-uAC6brppyYD0
|
|
|
2593
2593
|
PgsFile/models/prompts/6. ATE3 prompt.txt,sha256=VnaXpPa6BgZHUcm8PxmP_qgU-8xEoTB3XcBqjwCUy_g,1254
|
|
2594
2594
|
PgsFile/models/prompts/7. SentAlign prompt.txt,sha256=hXpqqC-CAgo8EytkJ0MaLhevLefALazWriY-ew39jxs,1537
|
|
2595
2595
|
PgsFile/models/prompts/8. TitleCase prompt.txt,sha256=4p-LfGy0xAj2uPi9amyMm41T6Z17VNpFFsGZOgWhROs,1136
|
|
2596
|
-
PgsFile-0.5.
|
|
2597
|
-
PgsFile-0.5.
|
|
2598
|
-
PgsFile-0.5.
|
|
2599
|
-
PgsFile-0.5.
|
|
2600
|
-
PgsFile-0.5.
|
|
2596
|
+
PgsFile-0.5.2.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
|
|
2597
|
+
PgsFile-0.5.2.dist-info/METADATA,sha256=fEpBEg0EhJCZOZdSTzLdD5Yn1eRlvdv1YFmWKmM4F2E,3119
|
|
2598
|
+
PgsFile-0.5.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
2599
|
+
PgsFile-0.5.2.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
|
|
2600
|
+
PgsFile-0.5.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|