maze-moderation-sdk 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ # Changelog
2
+
3
+ All notable changes to `maze-moderation-sdk` will be documented in this file.
4
+
5
+ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project uses semantic versioning during public releases.
6
+
7
+ ## [Unreleased]
8
+
9
+ ## [0.1.0] - 2026-06-17
10
+
11
+ ### Added
12
+
13
+ - Public SDK surface: `normalize`, `scan_local`, `review_cloud`, `collect_signals`, `review_text`.
14
+ - Frozen dataclass contracts (`Hit`, `CloudVerdict`, `Signals`, `Verdict`) and `RiskStrategy` / `CloudProvider` protocols.
15
+ - Local review: variant normalization, AC automaton lexicon scan, privacy regex, bundled default lexicon snapshot.
16
+ - Cloud review: `AliyunProvider` for Alibaba Cloud text moderation APIs.
17
+ - `DefaultRiskStrategy` with L0–L3 verdict mapping, fail-secure and degraded-mode handling.
18
+ - PyPI publish pipeline via GitHub tag and Trusted Publishing.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Tada
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,7 @@
1
+ include README.md
2
+ include LICENSE
3
+ include CHANGELOG.md
4
+ recursive-exclude * __pycache__
5
+ recursive-exclude * *.py[co]
6
+ recursive-exclude * .ruff_cache *
7
+ global-exclude .ruff_cache/*
@@ -0,0 +1,158 @@
1
+ Metadata-Version: 2.4
2
+ Name: maze-moderation-sdk
3
+ Version: 0.1.0
4
+ Summary: Maze 内容审核 SDK:文本审核先行,后续扩展图片与视频审核。
5
+ Author: Tada
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/MazeAI-pro/content_security
8
+ Project-URL: Repository, https://github.com/MazeAI-pro/content_security
9
+ Project-URL: Documentation, https://github.com/MazeAI-pro/content_security/tree/main/sdks/moderation#readme
10
+ Project-URL: Changelog, https://github.com/MazeAI-pro/content_security/blob/main/sdks/moderation/CHANGELOG.md
11
+ Project-URL: Issues, https://github.com/MazeAI-pro/content_security/issues
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: pyahocorasick>=2.0
16
+ Requires-Dist: opencc>=1.1
17
+ Requires-Dist: pypinyin>=0.51
18
+ Requires-Dist: httpx>=0.27
19
+ Provides-Extra: dev
20
+ Requires-Dist: pytest>=8.0; extra == "dev"
21
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
22
+ Requires-Dist: pytest-cov>=5.0; extra == "dev"
23
+ Dynamic: license-file
24
+
25
+ # maze-moderation-sdk
26
+
27
+ Maze 内容审核 SDK。当前首发文本审核能力,后续在同一个 SDK 下扩展图片、视频审核。
28
+
29
+ 文本审核模块是无状态文本审核核——不碰会话状态、不碰 Redis、不碰配置中心、不碰任何 channel。所有配置由调用方构造 `ReviewConfig` 注入,密钥由调用方构造 `CloudProvider` 时注入,永不进包逻辑。
30
+
31
+ 完整设计见 `../../../to-tada/security/design-sdk.md`,开发计划见 `../../../dev-plan/text-censor-sdk-dev-plan.md`。
32
+
33
+ ## 作为 SDK 引入
34
+
35
+ 阶段 A(本地联调)推荐使用 sibling editable path:
36
+
37
+ ```toml
38
+ [project]
39
+ dependencies = ["maze-moderation-sdk"]
40
+
41
+ [tool.uv.sources]
42
+ maze-moderation-sdk = { path = "../content_security/sdks/moderation", editable = true }
43
+ ```
44
+
45
+ 团队内 CI 或跨仓验证可锁定 Git tag:
46
+
47
+ ```toml
48
+ [project]
49
+ dependencies = ["maze-moderation-sdk"]
50
+
51
+ [tool.uv.sources]
52
+ maze-moderation-sdk = { git = "ssh://git@github.com/MazeAI-pro/content_security.git", tag = "v0.1.0", subdirectory = "sdks/moderation" }
53
+ ```
54
+
55
+ 公开发布到 PyPI 后:
56
+
57
+ ```toml
58
+ dependencies = ["maze-moderation-sdk>=0.1,<0.2"]
59
+ ```
60
+
61
+ 不要把本地 path 依赖写进生产环境配置。PyPI 发布说明见 [`docs/publish.md`](docs/publish.md)。
62
+
63
+ ## 公共 API
64
+
65
+ 文本审核能力位于 `maze_moderation.text`:
66
+
67
+ | 函数 | 层 | 说明 |
68
+ | --- | --- | --- |
69
+ | `normalize(text)` | 纯函数 | 变体归一化(零宽/全半角/繁简/大小写/重复压缩/拼音) |
70
+ | `scan_local(text, *, lexicon)` | 只本地 | 已归一化文本上的 AC 自动机扫描 |
71
+ | `review_cloud(text, *, provider, chat_id, done)` | 只云 | 只调云 API,不跑本地 DFA |
72
+ | `collect_signals(text, *, config)` | mechanism | 跑 DFA + 云,返回原始 `Signals`,不做判定 |
73
+ | `review_text(text, *, config)` | mechanism+policy | 收集信号后交由 `config.risk_strategy` 判定,出 `Verdict` |
74
+
75
+ 外加 `RiskStrategy` / `CloudProvider` 两个 Protocol 和一组 frozen dataclass(`Hit`/`CloudVerdict`/`Signals`/`Verdict`)。
76
+
77
+ ## 三种用法
78
+
79
+ | 消费方 | 用法 |
80
+ | --- | --- |
81
+ | tada | `review_text` + `DefaultRiskStrategy`,阈值走 config 微调;流式切片在包外多次调 |
82
+ | 第二个项目 | `review_text` + 默认策略,非流式,一段调一次 |
83
+ | 任意未来项目 | `collect_signals` 拿原始信号自己合并,或传自定义 `RiskStrategy` |
84
+
85
+ ## 接入示例
86
+
87
+ ```python
88
+ from maze_moderation.text import ReviewConfig, review_text, DefaultRiskStrategy
89
+ from maze_moderation.text.cloud.aliyun import AliyunProvider
90
+
91
+ # 有状态编排(如 tada):编排层构造 config,会话状态/流式切片留在包外
92
+ cfg = ReviewConfig(
93
+ cloud_provider=AliyunProvider(
94
+ access_key="...",
95
+ access_secret="...",
96
+ region="cn-shanghai",
97
+ service="llm_response_moderation", # 场景由调用方选:评论/昵称/LLM 等
98
+ ),
99
+ risk_strategy=DefaultRiskStrategy(l2_confidence_threshold=0.75),
100
+ lexicon_path="", # "" = 包内默认词库
101
+ )
102
+ verdict = await review_text(user_text, config=cfg)
103
+ if verdict.action == "refuse":
104
+ ... # 调用方自己处置
105
+
106
+ # 无状态、非流式(第二个项目):同样的 review_text 直调
107
+ cfg = ReviewConfig(cloud_provider=AliyunProvider(...))
108
+ verdict = await review_text(output_text, config=cfg)
109
+ ```
110
+
111
+ ## 依赖边界
112
+
113
+ - **依赖**:`pyahocorasick`、`opencc`、`pypinyin`、`httpx`。
114
+ - **不依赖**:`redis`、`fastapi`、`agentscope`、Config Center、任何 tada 内部模块。
115
+
116
+ ## 版本策略
117
+
118
+ 公共面(五函数签名 + frozen dataclass + 两个 Protocol)= SemVer 契约,改字段即 major bump。首发 `0.1.0` 已发布到 PyPI;后续 breaking change 走 major bump。
119
+
120
+ ## 本地审核(M2)
121
+
122
+ > **local 层定位**:不替云端识别敏感词,泛化召回(语义/上下文/对抗变体)交给云端。local 层只做云端难以低成本覆盖的四件事——**确定性拦截**(高置信明确违禁词,零延迟)、**断网兜底**(云端降级时仍出 L3)、**隐私前置**(PII 正则本地命中即拦,不外发)、**业务热修**(竞品/品牌/黑话黑名单,加词分钟级生效)。故词库**控规模、提精度**,不追求泛化覆盖。
123
+
124
+ ```python
125
+ from maze_moderation.text import load_lexicon, scan_local, ReviewConfig
126
+
127
+ # 包内默认词库(konsheng 快照 v1,3267 词,全 hard,按 4 类分文件)
128
+ lexicon = load_lexicon()
129
+
130
+ # 外部词库覆盖 + 业务黑名单
131
+ lexicon = load_lexicon(
132
+ "/path/to/lexicon",
133
+ extra_words=ReviewConfig(business_blacklist=("竞品名称",)).business_blacklist,
134
+ )
135
+ hits = scan_local("待审文本", lexicon=lexicon)
136
+ ```
137
+
138
+ 外部词库目录结构:
139
+
140
+ ```
141
+ lexicon/
142
+ manifest.json # version + files 元数据(category/severity)
143
+ v1/ # 与 manifest.version 同名的子目录
144
+ 涉政.txt # 文件名即 category;本地层全 hard,故只分类不分级
145
+ 暴恐.txt # 每行一个词;跨分类冲突按红线优先级归一文件
146
+ 色情.txt
147
+ 灰区.txt
148
+ ```
149
+
150
+ 词库维护:当前快照基于 konsheng 词库经人工加工(合并去重、剔除误伤词、分类调整),
151
+ 维护与重建说明见 `docs/lexicon-maintenance.md`。
152
+
153
+ ## 开发
154
+
155
+ ```bash
156
+ pip install -e ".[dev]"
157
+ pytest # 覆盖率门槛 80%(见 pyproject.toml)
158
+ ```
@@ -0,0 +1,134 @@
1
+ # maze-moderation-sdk
2
+
3
+ Maze 内容审核 SDK。当前首发文本审核能力,后续在同一个 SDK 下扩展图片、视频审核。
4
+
5
+ 文本审核模块是无状态文本审核核——不碰会话状态、不碰 Redis、不碰配置中心、不碰任何 channel。所有配置由调用方构造 `ReviewConfig` 注入,密钥由调用方构造 `CloudProvider` 时注入,永不进包逻辑。
6
+
7
+ 完整设计见 `../../../to-tada/security/design-sdk.md`,开发计划见 `../../../dev-plan/text-censor-sdk-dev-plan.md`。
8
+
9
+ ## 作为 SDK 引入
10
+
11
+ 阶段 A(本地联调)推荐使用 sibling editable path:
12
+
13
+ ```toml
14
+ [project]
15
+ dependencies = ["maze-moderation-sdk"]
16
+
17
+ [tool.uv.sources]
18
+ maze-moderation-sdk = { path = "../content_security/sdks/moderation", editable = true }
19
+ ```
20
+
21
+ 团队内 CI 或跨仓验证可锁定 Git tag:
22
+
23
+ ```toml
24
+ [project]
25
+ dependencies = ["maze-moderation-sdk"]
26
+
27
+ [tool.uv.sources]
28
+ maze-moderation-sdk = { git = "ssh://git@github.com/MazeAI-pro/content_security.git", tag = "v0.1.0", subdirectory = "sdks/moderation" }
29
+ ```
30
+
31
+ 公开发布到 PyPI 后:
32
+
33
+ ```toml
34
+ dependencies = ["maze-moderation-sdk>=0.1,<0.2"]
35
+ ```
36
+
37
+ 不要把本地 path 依赖写进生产环境配置。PyPI 发布说明见 [`docs/publish.md`](docs/publish.md)。
38
+
39
+ ## 公共 API
40
+
41
+ 文本审核能力位于 `maze_moderation.text`:
42
+
43
+ | 函数 | 层 | 说明 |
44
+ | --- | --- | --- |
45
+ | `normalize(text)` | 纯函数 | 变体归一化(零宽/全半角/繁简/大小写/重复压缩/拼音) |
46
+ | `scan_local(text, *, lexicon)` | 只本地 | 已归一化文本上的 AC 自动机扫描 |
47
+ | `review_cloud(text, *, provider, chat_id, done)` | 只云 | 只调云 API,不跑本地 DFA |
48
+ | `collect_signals(text, *, config)` | mechanism | 跑 DFA + 云,返回原始 `Signals`,不做判定 |
49
+ | `review_text(text, *, config)` | mechanism+policy | 收集信号后交由 `config.risk_strategy` 判定,出 `Verdict` |
50
+
51
+ 外加 `RiskStrategy` / `CloudProvider` 两个 Protocol 和一组 frozen dataclass(`Hit`/`CloudVerdict`/`Signals`/`Verdict`)。
52
+
53
+ ## 三种用法
54
+
55
+ | 消费方 | 用法 |
56
+ | --- | --- |
57
+ | tada | `review_text` + `DefaultRiskStrategy`,阈值走 config 微调;流式切片在包外多次调 |
58
+ | 第二个项目 | `review_text` + 默认策略,非流式,一段调一次 |
59
+ | 任意未来项目 | `collect_signals` 拿原始信号自己合并,或传自定义 `RiskStrategy` |
60
+
61
+ ## 接入示例
62
+
63
+ ```python
64
+ from maze_moderation.text import ReviewConfig, review_text, DefaultRiskStrategy
65
+ from maze_moderation.text.cloud.aliyun import AliyunProvider
66
+
67
+ # 有状态编排(如 tada):编排层构造 config,会话状态/流式切片留在包外
68
+ cfg = ReviewConfig(
69
+ cloud_provider=AliyunProvider(
70
+ access_key="...",
71
+ access_secret="...",
72
+ region="cn-shanghai",
73
+ service="llm_response_moderation", # 场景由调用方选:评论/昵称/LLM 等
74
+ ),
75
+ risk_strategy=DefaultRiskStrategy(l2_confidence_threshold=0.75),
76
+ lexicon_path="", # "" = 包内默认词库
77
+ )
78
+ verdict = await review_text(user_text, config=cfg)
79
+ if verdict.action == "refuse":
80
+ ... # 调用方自己处置
81
+
82
+ # 无状态、非流式(第二个项目):同样的 review_text 直调
83
+ cfg = ReviewConfig(cloud_provider=AliyunProvider(...))
84
+ verdict = await review_text(output_text, config=cfg)
85
+ ```
86
+
87
+ ## 依赖边界
88
+
89
+ - **依赖**:`pyahocorasick`、`opencc`、`pypinyin`、`httpx`。
90
+ - **不依赖**:`redis`、`fastapi`、`agentscope`、Config Center、任何 tada 内部模块。
91
+
92
+ ## 版本策略
93
+
94
+ 公共面(五函数签名 + frozen dataclass + 两个 Protocol)= SemVer 契约,改字段即 major bump。首发 `0.1.0` 已发布到 PyPI;后续 breaking change 走 major bump。
95
+
96
+ ## 本地审核(M2)
97
+
98
+ > **local 层定位**:不替云端识别敏感词,泛化召回(语义/上下文/对抗变体)交给云端。local 层只做云端难以低成本覆盖的四件事——**确定性拦截**(高置信明确违禁词,零延迟)、**断网兜底**(云端降级时仍出 L3)、**隐私前置**(PII 正则本地命中即拦,不外发)、**业务热修**(竞品/品牌/黑话黑名单,加词分钟级生效)。故词库**控规模、提精度**,不追求泛化覆盖。
99
+
100
+ ```python
101
+ from maze_moderation.text import load_lexicon, scan_local, ReviewConfig
102
+
103
+ # 包内默认词库(konsheng 快照 v1,3267 词,全 hard,按 4 类分文件)
104
+ lexicon = load_lexicon()
105
+
106
+ # 外部词库覆盖 + 业务黑名单
107
+ lexicon = load_lexicon(
108
+ "/path/to/lexicon",
109
+ extra_words=ReviewConfig(business_blacklist=("竞品名称",)).business_blacklist,
110
+ )
111
+ hits = scan_local("待审文本", lexicon=lexicon)
112
+ ```
113
+
114
+ 外部词库目录结构:
115
+
116
+ ```
117
+ lexicon/
118
+ manifest.json # version + files 元数据(category/severity)
119
+ v1/ # 与 manifest.version 同名的子目录
120
+ 涉政.txt # 文件名即 category;本地层全 hard,故只分类不分级
121
+ 暴恐.txt # 每行一个词;跨分类冲突按红线优先级归一文件
122
+ 色情.txt
123
+ 灰区.txt
124
+ ```
125
+
126
+ 词库维护:当前快照基于 konsheng 词库经人工加工(合并去重、剔除误伤词、分类调整),
127
+ 维护与重建说明见 `docs/lexicon-maintenance.md`。
128
+
129
+ ## 开发
130
+
131
+ ```bash
132
+ pip install -e ".[dev]"
133
+ pytest # 覆盖率门槛 80%(见 pyproject.toml)
134
+ ```
@@ -0,0 +1,47 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "maze-moderation-sdk"
7
+ version = "0.1.0"
8
+ description = "Maze 内容审核 SDK:文本审核先行,后续扩展图片与视频审核。"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ authors = [{ name = "Tada" }]
13
+ dependencies = [
14
+ "pyahocorasick>=2.0",
15
+ "opencc>=1.1",
16
+ "pypinyin>=0.51",
17
+ "httpx>=0.27",
18
+ ]
19
+
20
+ [project.urls]
21
+ Homepage = "https://github.com/MazeAI-pro/content_security"
22
+ Repository = "https://github.com/MazeAI-pro/content_security"
23
+ Documentation = "https://github.com/MazeAI-pro/content_security/tree/main/sdks/moderation#readme"
24
+ Changelog = "https://github.com/MazeAI-pro/content_security/blob/main/sdks/moderation/CHANGELOG.md"
25
+ Issues = "https://github.com/MazeAI-pro/content_security/issues"
26
+
27
+ [project.optional-dependencies]
28
+ dev = [
29
+ "pytest>=8.0",
30
+ "pytest-asyncio>=0.23",
31
+ "pytest-cov>=5.0",
32
+ ]
33
+
34
+ [tool.setuptools.packages.find]
35
+ where = ["src"]
36
+ include = ["maze_moderation*"]
37
+
38
+ [tool.setuptools.package-data]
39
+ "maze_moderation.text" = [
40
+ "local/lexicon/manifest.json",
41
+ "local/lexicon/v1/*.txt",
42
+ ]
43
+
44
+ [tool.pytest.ini_options]
45
+ asyncio_mode = "auto"
46
+ addopts = "--cov=maze_moderation --cov-report=term-missing --cov-fail-under=80"
47
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,5 @@
1
+ CHANGELOG.md
2
+ LICENSE
3
+ MANIFEST.in
4
+ README.md
5
+ pyproject.toml