PgsFile 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PgsFile might be problematic. Click here for more details.

PgsFile/PgsFile.py CHANGED
@@ -4144,9 +4144,21 @@ def tfidf_keyword_extraction(documents, top_percent=(0.0, 0.10)):
4144
4144
 
4145
4145
 
4146
4146
  from xml.dom.minidom import Document
4147
- def maketmx(tmx_path, source_list, target_list, source_lang="zh-CN", target_lang="en-US"):
4147
+ from datetime import datetime
4148
+ def maketmx(
4149
+ tmx_path,
4150
+ source_list,
4151
+ target_list,
4152
+ source_lang="zh-CN",
4153
+ target_lang="en-US",
4154
+ author="Petercusin",
4155
+ client_name=None,
4156
+ project_id=None,
4157
+ domain=None,
4158
+ status="Final"
4159
+ ):
4148
4160
  """
4149
- Generate a TMX (Translation Memory eXchange) file for any two language pairs.
4161
+ Generate a TMX (Translation Memory eXchange) file for any two language pairs, with optional metadata.
4150
4162
 
4151
4163
  Parameters
4152
4164
  ----------
@@ -4161,34 +4173,86 @@ def maketmx(tmx_path, source_list, target_list, source_lang="zh-CN", target_lang
4161
4173
  Source language code, e.g., "zh-CN" (default), "fr-FR", "de-DE".
4162
4174
  target_lang : str, optional
4163
4175
  Target language code, e.g., "en-US" (default), "es-ES", "ja-JP".
4176
+ author : str, optional
4177
+ Author of the TMX file, e.g., "Petercusin" (default).
4178
+ client_name : str, optional
4179
+ Name of the client or company, e.g., "Acme Corp".
4180
+ project_id : str, optional
4181
+ Project identifier, e.g., "Project_XYZ_2025".
4182
+ domain : str, optional
4183
+ Domain or subject field, e.g., "Medical", "Legal", "Technical".
4184
+ status : str, optional
4185
+ Translation status, e.g., "Draft", "Reviewed", "Final" (default).
4164
4186
 
4165
4187
  Returns
4166
4188
  -------
4167
4189
  None
4168
4190
  Writes the TMX file to the specified path.
4169
4191
 
4192
+ Raises
4193
+ ------
4194
+ ValueError
4195
+ If `source_list` and `target_list` have different lengths.
4196
+
4170
4197
  Example
4171
4198
  -------
4172
- # Chinese to English
4173
- maketmx("zh_en.tmx", ["你好", "再见"], ["Hello", "Goodbye"], "zh-CN", "en-US")
4174
-
4175
- # French to German
4176
- maketmx("fr_de.tmx", ["Bonjour", "Au revoir"], ["Hallo", "Auf Wiedersehen"], "fr-FR", "de-DE")
4199
+ # Chinese to English, with metadata
4200
+ maketmx(
4201
+ "zh_en.tmx",
4202
+ ["你好", "再见"],
4203
+ ["Hello", "Goodbye"],
4204
+ "zh-CN",
4205
+ "en-US",
4206
+ author="Dr. Guisheng PAN",
4207
+ client_name="Acme Corp",
4208
+ project_id="Project_XYZ_2025",
4209
+ domain="Technical",
4210
+ status="Final"
4211
+ )
4177
4212
  """
4213
+ if len(source_list) != len(target_list):
4214
+ raise ValueError("source_list and target_list must have the same number of elements.")
4215
+
4178
4216
  doc = Document()
4179
4217
  tmx = doc.createElement("tmx")
4180
- tmx.setAttribute("version", "1.0")
4218
+ tmx.setAttribute("version", "1.4")
4181
4219
  doc.appendChild(tmx)
4182
4220
 
4183
4221
  header = doc.createElement("header")
4184
4222
  header.setAttribute("creationtool", "PgsFile")
4185
- header.setAttribute("creationtoolversion", "1.1.0")
4186
- header.setAttribute("creationtooldeveloper", "Dr. Guisheng PAN, panguisheng@sufe.edu.cn")
4223
+ header.setAttribute("creationtoolversion", "0.5.1")
4224
+ header.setAttribute("creationtooldeveloper", author)
4225
+ header.setAttribute("creationdate", datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"))
4187
4226
  header.setAttribute("srclang", source_lang)
4188
4227
  header.setAttribute("trtlang", target_lang)
4189
4228
  header.setAttribute("datatype", "plaintext")
4229
+ header.setAttribute("segtype", "sentence")
4230
+ header.setAttribute("adminlang", "en-US")
4231
+ header.setAttribute("o-tmf", "PgsFile_TM")
4190
4232
  tmx.appendChild(header)
4191
4233
 
4234
+ # Add optional metadata as <prop> elements
4235
+ if client_name:
4236
+ prop = doc.createElement("prop")
4237
+ prop.setAttribute("type", "x-Client")
4238
+ prop.appendChild(doc.createTextNode(client_name))
4239
+ header.appendChild(prop)
4240
+ if project_id:
4241
+ prop = doc.createElement("prop")
4242
+ prop.setAttribute("type", "x-Project")
4243
+ prop.appendChild(doc.createTextNode(project_id))
4244
+ header.appendChild(prop)
4245
+ if domain:
4246
+ prop = doc.createElement("prop")
4247
+ prop.setAttribute("type", "x-Domain")
4248
+ prop.appendChild(doc.createTextNode(domain))
4249
+ header.appendChild(prop)
4250
+ if status:
4251
+ prop = doc.createElement("prop")
4252
+ prop.setAttribute("type", "x-Status")
4253
+ prop.appendChild(doc.createTextNode(status))
4254
+ header.appendChild(prop)
4255
+
4192
4256
  body = doc.createElement("body")
4193
4257
  tmx.appendChild(body)
4194
4258
 
@@ -4216,3 +4280,4 @@ def maketmx(tmx_path, source_list, target_list, source_lang="zh-CN", target_lang
4216
4280
 
4217
4281
  with open(tmx_path, 'w', encoding='utf-8') as f:
4218
4282
  doc.writexml(f, indent='\t', newl='\n', addindent='\t')
4283
+
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PgsFile
3
- Version: 0.5.0
3
+ Version: 0.5.2
4
4
  Summary: This module simplifies Python package management, script execution, file handling, web scraping, and multimedia downloads. The module supports (LLM-based) NLP tasks such as OCR, tokenization, lemmatization, POS tagging, NER, ATE, dependency parsing, MDD, WSD, LIWC, MIP analysis and Chinese-English sentence alignment. It also generates word lists, and plots data, aiding literary students. Ideal for scraping data, cleaning text, and analyzing language, it offers user-friendly tools to streamline workflows.
5
- Home-page: https://mp.weixin.qq.com/s/lWMkYDWQMjBJNKY2vMYTpw
5
+ Home-page: https://github.com/Petercusin/PgsFile
6
6
  Author: Pan Guisheng
7
7
  Author-email: panguisheng@sufe.edu.cn
8
8
  License: Educational free
@@ -40,3 +40,4 @@ Key Features:
40
40
 
41
41
  Author: Pan Guisheng, a PhD student at the Graduate Institute of Interpretation and Translation of Shanghai International Studies University
42
42
  Email: 895284504@qq.com
43
+ Homepage: https://mp.weixin.qq.com/s/lWMkYDWQMjBJNKY2vMYTpw
@@ -1,4 +1,4 @@
1
- PgsFile/PgsFile.py,sha256=szt4jGqE_q2FYN399mZzdtsWUNK678Sy7y41xEvdL3M,172583
1
+ PgsFile/PgsFile.py,sha256=YzdCEPFvT1jisKcihRUNyi2hqAWhStkP-yIcj_xqiLI,174646
2
2
  PgsFile/__init__.py,sha256=YkDTLWtveSeN4I5ZXSmdp7YZFOHb4yEzxQB82DEHk9s,3704
3
3
  PgsFile/Corpora/Idioms/English_Idioms_8774.txt,sha256=qlsP0yI_XGECBRiPZuLkGZpdasc77sWSKexANu7v8_M,175905
4
4
  PgsFile/Corpora/Monolingual/Chinese/People's Daily 20130605/Raw/00000000.txt,sha256=SLGGSMSb7Ff1RoBstsTW3yX2wNZpqEUchFNpcI-mrR4,1513
@@ -2593,8 +2593,8 @@ PgsFile/models/prompts/5. ATE prompt.txt,sha256=5wu0gGlsV7DI0LruYM3-uAC6brppyYD0
2593
2593
  PgsFile/models/prompts/6. ATE3 prompt.txt,sha256=VnaXpPa6BgZHUcm8PxmP_qgU-8xEoTB3XcBqjwCUy_g,1254
2594
2594
  PgsFile/models/prompts/7. SentAlign prompt.txt,sha256=hXpqqC-CAgo8EytkJ0MaLhevLefALazWriY-ew39jxs,1537
2595
2595
  PgsFile/models/prompts/8. TitleCase prompt.txt,sha256=4p-LfGy0xAj2uPi9amyMm41T6Z17VNpFFsGZOgWhROs,1136
2596
- PgsFile-0.5.0.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
2597
- PgsFile-0.5.0.dist-info/METADATA,sha256=PFoIaOZCwuxKpZtCcvblbO2Xs-i6DQnUvn9s9uIuW_A,3070
2598
- PgsFile-0.5.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
2599
- PgsFile-0.5.0.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
2600
- PgsFile-0.5.0.dist-info/RECORD,,
2596
+ PgsFile-0.5.2.dist-info/LICENSE,sha256=cE5c-QToSkG1KTUsU8drQXz1vG0EbJWuU4ybHTRb5SE,1138
2597
+ PgsFile-0.5.2.dist-info/METADATA,sha256=fEpBEg0EhJCZOZdSTzLdD5Yn1eRlvdv1YFmWKmM4F2E,3119
2598
+ PgsFile-0.5.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
2599
+ PgsFile-0.5.2.dist-info/top_level.txt,sha256=028hCfwhF3UpfD6X0rwtWpXI1RKSTeZ1ALwagWaSmX8,8
2600
+ PgsFile-0.5.2.dist-info/RECORD,,