complex-text-tools 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Your Name
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,2 @@
1
+ include README.md
2
+ include LICENSE
@@ -0,0 +1,54 @@
1
+ Metadata-Version: 2.1
2
+ Name: complex-text-tools
3
+ Version: 0.1.0
4
+ Summary: A package for processing complex text with mixed Chinese and English characters
5
+ Home-page: https://github.com/yourusername/complex-text-tools
6
+ Author: Your Name
7
+ Author-email: Your Name <your.email@example.com>
8
+ License: MIT
9
+ Project-URL: Homepage, https://github.com/yourusername/complex-text-tools
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.6
16
+ Classifier: Programming Language :: Python :: 3.7
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Requires-Python: >=3.6
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+
24
+ # Complex Text Tools
25
+
26
+ A Python package for processing complex text containing mixed Chinese and English characters, removing extra spaces and standardizing punctuation.
27
+
28
+ ## Features
29
+
30
+ - Remove extra spaces between Chinese characters
31
+ - Remove extra spaces between Chinese and English characters
32
+ - Handle spacing around punctuation marks correctly
33
+ - Process mixed language texts efficiently
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install complex-text-tools
39
+ ```
40
+
41
+ ## Usage
42
+
43
+ ```python
44
+ from complex_text_tools import remove_extra_spaces
45
+
46
+ text = "这 是 中文 测试 文本 , mixed English text here , 还 有 symbols : ; ! "
47
+ clean_text = remove_extra_spaces(text)
48
+ print(clean_text)
49
+ # Output: "这是中文测试文本,mixed English text here,还有 symbols:;!"
50
+ ```
51
+
52
+ ## License
53
+
54
+ This project is licensed under the MIT License.
@@ -0,0 +1,31 @@
1
+ # Complex Text Tools
2
+
3
+ A Python package for processing complex text containing mixed Chinese and English characters, removing extra spaces and standardizing punctuation.
4
+
5
+ ## Features
6
+
7
+ - Remove extra spaces between Chinese characters
8
+ - Remove extra spaces between Chinese and English characters
9
+ - Handle spacing around punctuation marks correctly
10
+ - Process mixed language texts efficiently
11
+
12
+ ## Installation
13
+
14
+ ```bash
15
+ pip install complex-text-tools
16
+ ```
17
+
18
+ ## Usage
19
+
20
+ ```python
21
+ from complex_text_tools import remove_extra_spaces
22
+
23
+ text = "这 是 中文 测试 文本 , mixed English text here , 还 有 symbols : ; ! "
24
+ clean_text = remove_extra_spaces(text)
25
+ print(clean_text)
26
+ # Output: "这是中文测试文本,mixed English text here,还有 symbols:;!"
27
+ ```
28
+
29
+ ## License
30
+
31
+ This project is licensed under the MIT License.
@@ -0,0 +1,3 @@
1
+ from .text_processor import remove_extra_spaces
2
+
3
+ __all__ = ['remove_extra_spaces']
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ from .text_processor import remove_extra_spaces
5
+
6
+
7
+ def test_remove_extra_spaces():
8
+ # 测试用例:验证各种空格移除功能
9
+ test_text = "这 是 中文 测试 文本 , mixed English text here , 还 有 symbols : ; ! "
10
+ expected_result = "这是中文测试文本,mixed English text here,还有 symbols:;!"
11
+ result = remove_extra_spaces(test_text)
12
+
13
+ print("原始文本:", test_text)
14
+ print("处理结果:", result)
15
+ print("预期结果:", expected_result)
16
+ print("测试通过:", result == expected_result)
17
+
18
+ return result == expected_result
19
+
20
+
21
+ if __name__ == "__main__":
22
+ test_remove_extra_spaces()
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+
5
+
6
+ def remove_extra_spaces(text: str) -> str:
7
+ """
8
+ 移除混合字符串中多余的空格
9
+ """
10
+ # 移除中文字符(包括符号)之间的空格
11
+ pattern1 = r"(?<=[\u4e00-\u9fff\u3000-\u303f\uff00-\uffef])\s+(?=[\u4e00-\u9fff\u3000-\u303f\uff00-\uffef])"
12
+
13
+ # 移除中英文字符(包括符号)之间的空格
14
+ pattern2 = r"(?<=[\u4e00-\u9fff\u3000-\u303f\uff00-\uffef])\s+(?=[a-zA-Z])|(?<=[a-zA-Z])\s+(?=[\u4e00-\u9fff\u3000-\u303f\uff00-\uffef])"
15
+
16
+ # 移除中文字符与英文符号之间的空格(扩展支持更多符号)
17
+ pattern3 = r"(?<=[\u4e00-\u9fff\u3000-\u303f\uff00-\uffef])\s+(?=[\[\]\(\)\{\}\"\'\:\;\?\!\,\.\`\~])|(?<=[\[\]\(\)\{\}\"\'\:\;\?\!\,\.\`\~])\s+(?=[\u4e00-\u9fff\u3000-\u303f\uff00-\uffef])"
18
+
19
+ # 处理英文单词与标点之间的空格
20
+ # 单词和后面的标点应该是紧挨着的(无空格)
21
+ # 标点后面的单词之间应该有且仅有一个空格
22
+ pattern4a = r"(\w)\s+([^\w\s])" # 单词和标点之间(移除空格)
23
+ pattern4b = r"([^\w\s])\s+(\w)" # 标点和单词之间(保留一个空格)
24
+ pattern4c = r"(\w)\s{2,}(\w)" # 单词和单词之间多余的空格(替换为一个空格)
25
+
26
+ # 处理英文符号之间的多余空格(确保符号之间只有一个空格)
27
+ pattern5 = r"([^\w\s])\s{2,}([^\w\s])"
28
+ # 移除英文标点之间的空格
29
+ pattern7 = r"([^\w\s])\s+([^\w\s])"
30
+ # 处理英文符号和中文之间的空格问题
31
+ # pattern6 = r"([\u4e00-\u9fff\u3000-\u303f\uff00-\uffef])([^\w\s])|([^\w\s])([\u4e00-\u9fff\u3000-\u303f\uff00-\uffef])"
32
+
33
+ # 按顺序处理各种空格
34
+ # 先处理中英文之间的空格
35
+ text = re.sub(pattern2, "", text)
36
+ # 再处理中文之间的空格
37
+ text = re.sub(pattern1, "", text)
38
+ # 然后处理中文与英文符号之间的空格
39
+ text = re.sub(pattern3, "", text)
40
+ # 处理英文单词和标点之间的空格
41
+ text = re.sub(pattern4a, r"\1\2", text) # 单词和标点之间无空格
42
+ text = re.sub(pattern4b, r"\1 \2", text) # 标点和单词之间保留一个空格
43
+ text = re.sub(pattern4c, r"\1 \2", text) # 单词间多余空格替换为一个空格
44
+ # 处理英文符号之间的多余空格
45
+ text = re.sub(pattern5, r"\1\2", text)
46
+ # 处理英文符号和中文之间的空格(确保有空格)
47
+ text = re.sub(pattern7, r"\1\2", text)
48
+
49
+ return text
@@ -0,0 +1,54 @@
1
+ Metadata-Version: 2.1
2
+ Name: complex-text-tools
3
+ Version: 0.1.0
4
+ Summary: A package for processing complex text with mixed Chinese and English characters
5
+ Home-page: https://github.com/yourusername/complex-text-tools
6
+ Author: Your Name
7
+ Author-email: Your Name <your.email@example.com>
8
+ License: MIT
9
+ Project-URL: Homepage, https://github.com/yourusername/complex-text-tools
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.6
16
+ Classifier: Programming Language :: Python :: 3.7
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Requires-Python: >=3.6
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+
24
+ # Complex Text Tools
25
+
26
+ A Python package for processing complex text containing mixed Chinese and English characters, removing extra spaces and standardizing punctuation.
27
+
28
+ ## Features
29
+
30
+ - Remove extra spaces between Chinese characters
31
+ - Remove extra spaces between Chinese and English characters
32
+ - Handle spacing around punctuation marks correctly
33
+ - Process mixed language texts efficiently
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install complex-text-tools
39
+ ```
40
+
41
+ ## Usage
42
+
43
+ ```python
44
+ from complex_text_tools import remove_extra_spaces
45
+
46
+ text = "这 是 中文 测试 文本 , mixed English text here , 还 有 symbols : ; ! "
47
+ clean_text = remove_extra_spaces(text)
48
+ print(clean_text)
49
+ # Output: "这是中文测试文本,mixed English text here,还有 symbols:;!"
50
+ ```
51
+
52
+ ## License
53
+
54
+ This project is licensed under the MIT License.
@@ -0,0 +1,12 @@
1
+ LICENSE
2
+ MANIFEST.in
3
+ README.md
4
+ pyproject.toml
5
+ setup.py
6
+ complex_text_tools/__init__.py
7
+ complex_text_tools/test_processor.py
8
+ complex_text_tools/text_processor.py
9
+ complex_text_tools.egg-info/PKG-INFO
10
+ complex_text_tools.egg-info/SOURCES.txt
11
+ complex_text_tools.egg-info/dependency_links.txt
12
+ complex_text_tools.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ complex_text_tools
@@ -0,0 +1,30 @@
1
+ [build-system]
2
+ requires = ["setuptools>=45", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "complex-text-tools"
7
+ version = "0.1.0"
8
+ description = "A package for processing complex text with mixed Chinese and English characters"
9
+ readme = "README.md"
10
+ authors = [
11
+ {name = "Your Name", email = "your.email@example.com"}
12
+ ]
13
+ license = {text = "MIT"}
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.6",
21
+ "Programming Language :: Python :: 3.7",
22
+ "Programming Language :: Python :: 3.8",
23
+ "Programming Language :: Python :: 3.9",
24
+ "Programming Language :: Python :: 3.10",
25
+ ]
26
+ requires-python = ">=3.6"
27
+ dependencies = []
28
+
29
+ [project.urls]
30
+ Homepage = "https://github.com/yourusername/complex-text-tools"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,32 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ with open("README.md", "r", encoding="utf-8") as fh:
4
+ long_description = fh.read()
5
+
6
+ setup(
7
+ name="complex-text-tools",
8
+ version="0.1.0",
9
+ author="Your Name",
10
+ author_email="your.email@example.com",
11
+ description="A package for processing complex text with mixed Chinese and English characters",
12
+ long_description=long_description,
13
+ long_description_content_type="text/markdown",
14
+ url="https://github.com/yourusername/complex-text-tools",
15
+ packages=find_packages(),
16
+ classifiers=[
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Operating System :: OS Independent",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.6",
23
+ "Programming Language :: Python :: 3.7",
24
+ "Programming Language :: Python :: 3.8",
25
+ "Programming Language :: Python :: 3.9",
26
+ "Programming Language :: Python :: 3.10",
27
+ ],
28
+ python_requires=">=3.6",
29
+ install_requires=[
30
+ # 无额外依赖
31
+ ],
32
+ )