MenuPilot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
menupilot/config.py ADDED
@@ -0,0 +1,113 @@
1
+ """
2
+ 配置文件 — API Key、模型参数、匹配阈值。
3
+
4
+ 配置优先级:~/.menupilot/config.json > 环境变量 > 程序默认值
5
+
6
+ 使用方式:
7
+ from config import DEEPSEEK_API_KEY, DEEPSEEK_BASE_URL, MATCHING_CONFIG
8
+ """
9
+
10
+ import json
11
+ import os
12
+
13
+ # ── 用户配置文件加载 ─────────────────────────────────────────────
14
+
15
+ _CONFIG_DIR = os.path.expanduser("~/.menupilot")
16
+ _CONFIG_PATH = os.path.join(_CONFIG_DIR, "config.json")
17
+
18
+
19
+ def _load_json_config() -> dict:
20
+ """从 ~/.menupilot/config.json 加载用户配置。"""
21
+ if os.path.exists(_CONFIG_PATH):
22
+ try:
23
+ with open(_CONFIG_PATH, "r", encoding="utf-8") as f:
24
+ return json.load(f)
25
+ except (json.JSONDecodeError, IOError):
26
+ pass
27
+ return {}
28
+
29
+
30
+ _json = _load_json_config()
31
+
32
+ # ── DeepSeek API 配置 ───────────────────────────────────────────
33
+ # 优先级:JSON 配置文件 > 环境变量 > 默认值
34
+
35
+ DEEPSEEK_API_KEY = (
36
+ _json.get("DEEPSEEK_API_KEY") or
37
+ os.environ.get("DEEPSEEK_API_KEY") or
38
+ "" # 不再有硬编码 fallback,未配置时为空字符串(首次运行触发配置向导)
39
+ )
40
+
41
+ DEEPSEEK_BASE_URL = (
42
+ _json.get("DEEPSEEK_BASE_URL") or
43
+ os.environ.get("DEEPSEEK_BASE_URL") or
44
+ "https://api.deepseek.com/v1"
45
+ )
46
+
47
+ DEEPSEEK_MODEL = (
48
+ _json.get("DEEPSEEK_MODEL") or
49
+ os.environ.get("DEEPSEEK_MODEL") or
50
+ "deepseek-chat"
51
+ )
52
+
53
+ # API 调用参数
54
+ LLM_TEMPERATURE = 0.1 # Schema 分析和 Token 分类需要确定性输出
55
+ LLM_MAX_TOKENS = 4096
56
+ LLM_TIMEOUT_SECONDS = 30
57
+
58
+ # ── 匹配引擎配置 ────────────────────────────────────────────────
59
+
60
+ MATCHING_CONFIG = {
61
+ # 商品名匹配(RapidFuzz token_sort_ratio)
62
+ "product_name_threshold": 90, # ≥ 此值视为匹配
63
+ "product_name_scorer": "token_sort_ratio",
64
+
65
+ # Embedding 兜底
66
+ "embedding_enabled": False, # 默认关闭(按 README 设计)
67
+ "embedding_model": "paraphrase-multilingual-MiniLM-L12-v2",
68
+ "embedding_top_k": 3, # 候选召回数量
69
+ "embedding_similarity_threshold": 0.85,
70
+
71
+ # 低置信度
72
+ "low_confidence_threshold": 80, # 商品名匹配低于此值直接 LOW_CONFIDENCE
73
+ }
74
+
75
+ # ── 日志配置 ────────────────────────────────────────────────────
76
+
77
+ LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
78
+
79
+ # ── 自测用 Mock 配置 ───────────────────────────────────────────
80
+
81
+ # 自测时使用此标志跳过真实 LLM 调用
82
+ USE_MOCK_LLM = os.environ.get("USE_MOCK_LLM", "0") == "1"
83
+
84
+ # Mock LLM 响应(Schema Analyzer)
85
+ MOCK_SCHEMA_RESPONSE = {
86
+ "field_mapping": {
87
+ "菜品名称": "product_name",
88
+ "规格": "size",
89
+ },
90
+ "composite_col": "口味做法组合",
91
+ "target_col": "配料",
92
+ "irrelevant_cols": [],
93
+ }
94
+
95
+ # Mock LLM 响应(Token Classifier)
96
+ MOCK_TOKEN_RESPONSE = [
97
+ {
98
+ "tokens": [
99
+ {"value": "红茶", "type": "茶底"},
100
+ {"value": "十二分糖", "type": "糖度"},
101
+ {"value": "温热", "type": "温度"},
102
+ ],
103
+ "missing": ["奶底"],
104
+ },
105
+ {
106
+ "tokens": [
107
+ {"value": "红茶", "type": "茶底"},
108
+ {"value": "十二分糖", "type": "糖度"},
109
+ {"value": "正常冰", "type": "温度"},
110
+ ],
111
+ "missing": ["奶底"],
112
+ },
113
+ ]
File without changes
@@ -0,0 +1,135 @@
1
+ """
2
+ Canonical Schema — 所有模板字段统一转换为此内部标准结构。
3
+
4
+ 位于 Rule Engine 层的核心数据模型,被 Matching Engine / workflow 引用。
5
+ """
6
+
7
+ from typing import Dict, List
8
+
9
+ # ── Canonical 字段列表 ────────────────────────────────────────
10
+ # 所有字段(含 tea_base 扩展字段 + composite_col/sop 特殊字段)
11
+ CANONICAL_FIELDS = [
12
+ "product_name", "size", "milk_base", "temperature", "sugar", "tea_base",
13
+ "composite_col", "sop",
14
+ ]
15
+
16
+ # 必要维度:匹配时这些字段必须存在
17
+ REQUIRED_DIMENSIONS = ["size", "temperature", "sugar"]
18
+
19
+ # 可通配维度:主数据中为空时匹配任意值
20
+ WILDCARD_DIMENSIONS = ["milk_base", "tea_base"]
21
+
22
+ # ── 主数据表固定列映射 ────────────────────────────────────────
23
+ # 主数据表字段名固定为中文,不需要 LLM 识别
24
+ MASTER_COLUMN_MAP: Dict[str, str] = {
25
+ "品名": "product_name",
26
+ "杯型": "size",
27
+ "奶底": "milk_base",
28
+ "做法": "temperature",
29
+ "糖": "sugar",
30
+ }
31
+
32
+ # ── Token 中文类型名 → Canonical 字段名 ────────────────────────
33
+ TOKEN_TYPE_TO_FIELD: Dict[str, str] = {
34
+ "温度": "temperature",
35
+ "糖度": "sugar",
36
+ "奶底": "milk_base",
37
+ "规格": "size",
38
+ "茶底": "tea_base",
39
+ }
40
+
41
+ # 反向映射:Canonical 字段名 → Token 中文类型名
42
+ FIELD_TO_TOKEN_TYPE: Dict[str, str] = {v: k for k, v in TOKEN_TYPE_TO_FIELD.items()}
43
+
44
+
45
+ def create_canonical_row(**kwargs) -> Dict:
46
+ """创建一条 Canonical Schema 行,缺失字段默认为 None。"""
47
+ row = {f: None for f in CANONICAL_FIELDS}
48
+ row.update(kwargs)
49
+ return row
50
+
51
+
52
+ def get_missing_required(row: Dict) -> List[str]:
53
+ """返回行中缺失的必要维度列表。"""
54
+ return [f for f in REQUIRED_DIMENSIONS if row.get(f) is None]
55
+
56
+
57
+ def is_wildcard(field: str) -> bool:
58
+ """检查字段是否允许通配(主数据为空时可匹配任意值)。"""
59
+ return field in WILDCARD_DIMENSIONS
60
+
61
+
62
+ # ── 自测 ──────────────────────────────────────────────────────
63
+
64
+ if __name__ == "__main__":
65
+ passed = 0
66
+ failed = 0
67
+
68
+ def check(condition, msg):
69
+ global passed, failed
70
+ if condition:
71
+ passed += 1
72
+ print(f" PASS {msg}")
73
+ else:
74
+ failed += 1
75
+ print(f" FAIL {msg}")
76
+
77
+ print("=== Canonical Schema 自测 ===\n")
78
+
79
+ # 1. 字段数量
80
+ print("1. 字段定义")
81
+ check(len(CANONICAL_FIELDS) == 8, f"CANONICAL_FIELDS 包含 8 个字段(实际 {len(CANONICAL_FIELDS)})")
82
+ check("tea_base" in CANONICAL_FIELDS, "包含 tea_base 扩展字段")
83
+ check("composite_col" in CANONICAL_FIELDS, "包含 composite_col 特殊字段")
84
+ check("sop" in CANONICAL_FIELDS, "包含 sop 特殊字段")
85
+ check(len(REQUIRED_DIMENSIONS) == 3, "3 个必要维度")
86
+ check(len(WILDCARD_DIMENSIONS) == 2, "2 个通配维度")
87
+ print()
88
+
89
+ # 2. 主数据列映射
90
+ print("2. 主数据列映射")
91
+ check(MASTER_COLUMN_MAP["品名"] == "product_name", "品名 → product_name")
92
+ check(MASTER_COLUMN_MAP["做法"] == "temperature", "做法 → temperature")
93
+ check(MASTER_COLUMN_MAP["糖"] == "sugar", "糖 → sugar")
94
+ check(len(MASTER_COLUMN_MAP) == 5, "共 5 个映射")
95
+ print()
96
+
97
+ # 3. Token 类型映射
98
+ print("3. Token 类型映射")
99
+ check(TOKEN_TYPE_TO_FIELD["温度"] == "temperature", "温度 → temperature")
100
+ check(TOKEN_TYPE_TO_FIELD["茶底"] == "tea_base", "茶底 → tea_base")
101
+ check(FIELD_TO_TOKEN_TYPE["temperature"] == "温度", "反向: temperature → 温度")
102
+ print()
103
+
104
+ # 4. create_canonical_row
105
+ print("4. create_canonical_row")
106
+ row = create_canonical_row(product_name="测试", size="中杯")
107
+ check(row["product_name"] == "测试", "自定义字段生效")
108
+ check(row["size"] == "中杯", "自定义字段生效")
109
+ check(row["milk_base"] is None, "未指定字段默认 None")
110
+ check(len(row) == 8, f"始终包含 8 个字段(实际 {len(row)})")
111
+ print()
112
+
113
+ # 5. get_missing_required
114
+ print("5. get_missing_required")
115
+ complete = {"size": "中杯", "temperature": "少冰", "sugar": "七分糖"}
116
+ check(get_missing_required(complete) == [], "完整行 → 空列表")
117
+
118
+ partial = {"size": "大杯", "temperature": None, "sugar": "全糖"}
119
+ missing = get_missing_required(partial)
120
+ check("temperature" in missing and len(missing) == 1, "缺 1 个维度被检测到")
121
+
122
+ empty = {"size": None, "temperature": None, "sugar": None}
123
+ check(len(get_missing_required(empty)) == 3, "缺 3 个维度全部检测到")
124
+ print()
125
+
126
+ # 6. is_wildcard
127
+ print("6. is_wildcard")
128
+ check(is_wildcard("milk_base") is True, "milk_base 是通配维度")
129
+ check(is_wildcard("tea_base") is True, "tea_base 是通配维度")
130
+ check(is_wildcard("size") is False, "size 不是通配维度")
131
+ check(is_wildcard("temperature") is False, "temperature 不是通配维度")
132
+ print()
133
+
134
+ # ── 汇总 ──
135
+ print(f"=== 结果: {passed} passed, {failed} failed ===")
@@ -0,0 +1,387 @@
1
+ # PosAgent 模板映射规则
2
+ # ========================
3
+ # 从 agent/rule_engine.py、data/canonical_schema.py、data/token_dict.py、
4
+ # agent/matching_engine.py、config.py 中提取,作为映射规则的声明式参考。
5
+ #
6
+ # 用途:
7
+ # - 新增模板映射规则时的参考基准
8
+ # - 未来可从 YAML 加载规则替代硬编码
9
+ #
10
+ # 更新日期:2026-06-16
11
+
12
+ version: "1.0"
13
+ description: "PosAgent 模板映射规则 — 所有模板字段统一转换为此内部标准结构"
14
+
15
+ # ═══════════════════════════════════════════════════════════════════
16
+ # 1. Canonical Schema(内部标准字段)
17
+ # ═══════════════════════════════════════════════════════════════════
18
+
19
+ canonical_schema:
20
+ # 所有 canonical 字段(含扩展字段和特殊字段)
21
+ fields:
22
+ - product_name # 商品名称
23
+ - size # 规格/杯型
24
+ - milk_base # 奶底
25
+ - temperature # 温度/做法
26
+ - sugar # 糖度
27
+ - tea_base # 茶底(扩展字段)
28
+ - composite_col # 组合字段名(如"口味做法组合",特殊字段)
29
+ - sop # SOP 代码(目标值,特殊字段)
30
+
31
+ # 必要维度:匹配时这些字段必须有值,缺列直接报错
32
+ required_dimensions:
33
+ - size
34
+ - temperature
35
+ - sugar
36
+
37
+ # 通配维度:主数据中为空时可匹配任意值
38
+ wildcard_dimensions:
39
+ - milk_base
40
+ - tea_base
41
+
42
+ # ═══════════════════════════════════════════════════════════════════
43
+ # 2. 主数据表列映射(中文列名 → Canonical 字段)
44
+ # ═══════════════════════════════════════════════════════════════════
45
+ # 主数据表字段名固定为中文,不需要 LLM 识别
46
+ # 来源: data/canonical_schema.py MASTER_COLUMN_MAP
47
+
48
+ master_column_map:
49
+ 品名: product_name
50
+ 杯型: size
51
+ 奶底: milk_base
52
+ 做法: temperature
53
+ 糖: sugar
54
+
55
+ # 主数据表 SOP 列的候选名称(按优先级查找)
56
+ master_sop_candidates:
57
+ - SOP
58
+ - 配料
59
+ - SOP 代码
60
+ - 代码
61
+
62
+ # ═══════════════════════════════════════════════════════════════════
63
+ # 3. Token 类型映射(中文类型名 → Canonical 字段)
64
+ # ═══════════════════════════════════════════════════════════════════
65
+ # 来源: data/canonical_schema.py TOKEN_TYPE_TO_FIELD
66
+
67
+ token_type_mapping:
68
+ 温度: temperature
69
+ 糖度: sugar
70
+ 奶底: milk_base
71
+ 规格: size
72
+ 茶底: tea_base
73
+
74
+ # ═══════════════════════════════════════════════════════════════════
75
+ # 4. Token 词典(属性值 → 类型)
76
+ # ═══════════════════════════════════════════════════════════════════
77
+ # 词典为软约束:LLM Token Classifier 可识别词典外的值并标注 UNKNOWN_TOKEN
78
+ # 来源: data/token_dict.py _RAW_TOKENS
79
+
80
+ token_dictionary:
81
+ 温度:
82
+ - 热
83
+ - 温热
84
+ - 正常冰
85
+ - 少冰
86
+ - 去冰
87
+ - 冰沙
88
+
89
+ 糖度:
90
+ - 全糖
91
+ - 十二分糖
92
+ - 标准糖
93
+ - 七分糖
94
+ - 五分糖
95
+ - 三分糖
96
+ - 不另加糖
97
+ - 无糖
98
+
99
+ 奶底:
100
+ - 牛奶
101
+ - 燕麦奶
102
+ - 厚乳
103
+ - 椰乳
104
+
105
+ 规格:
106
+ - 大杯
107
+ - 中杯
108
+ - 小杯
109
+ - 五角瓶
110
+
111
+ 茶底:
112
+ - 红茶
113
+ - 绿茶
114
+ - 乌龙茶
115
+ - 五角排红茶
116
+ - 五黄标准茶
117
+ - 茉莉绿茶 # 2026-06-05: testdata 口味做法组合中出现
118
+
119
+ # 已知后缀模式(文档备忘,normalize_token 通过分隔符边界检测自动处理)
120
+ known_suffixes:
121
+ - "|推荐" # 主数据糖度列常见:七分糖|推荐
122
+ - "/新" # 模板规格列偶尔出现:大杯/新
123
+
124
+ # normalize_token 分隔符集合(用于子串边界检测和后缀切割)
125
+ separators:
126
+ - "|"
127
+ - "/"
128
+ - " "
129
+
130
+ # ═══════════════════════════════════════════════════════════════════
131
+ # 5. 匹配引擎配置
132
+ # ═══════════════════════════════════════════════════════════════════
133
+ # 来源: config.py MATCHING_CONFIG + agent/matching_engine.py 常量
134
+
135
+ matching:
136
+ # 商品名匹配(RapidFuzz)
137
+ product_name_threshold: 90 # token_sort_ratio ≥ 此值视为高置信匹配
138
+ low_confidence_threshold: 80 # 低于此值直接 LOW_CONFIDENCE
139
+ scorer: token_sort_ratio
140
+
141
+ # Embedding 兜底(默认关闭)
142
+ embedding:
143
+ enabled: false
144
+ model: paraphrase-multilingual-MiniLM-L12-v2
145
+ top_k: 3
146
+ similarity_threshold: 0.85
147
+
148
+ # 匹配类型枚举
149
+ match_types:
150
+ exact: "exact" # 商品名 + 全属性精确匹配
151
+ attribute_match: "attribute_match" # 属性组合匹配
152
+ product_only: "product_only" # 仅商品名匹配(属性不匹配)
153
+ best_guess: "best_guess" # 兜底最佳猜测
154
+
155
+ # 置信度枚举
156
+ confidence_levels:
157
+ high: "HIGH"
158
+ low_confidence: "LOW_CONFIDENCE"
159
+
160
+ # ═══════════════════════════════════════════════════════════════════
161
+ # 6. 失败原因码及中文描述
162
+ # ═══════════════════════════════════════════════════════════════════
163
+ # 来源: agent/matching_engine.py _REASON_CN_MAP / _REASON_SUGGESTION_MAP
164
+
165
+ failure_reasons:
166
+ PRODUCT_NOT_FOUND:
167
+ cn: "商品名称在主数据中未找到"
168
+ suggestion: "检查商品名称是否有错别字,或补充主数据表"
169
+
170
+ MILK_BASE_NOT_FOUND:
171
+ cn: "{extra}规格在主数据中缺失"
172
+ suggestion: "补充 {extra} 相关 SOP 到主数据表"
173
+
174
+ SIZE_NOT_FOUND:
175
+ cn: "规格在主数据中缺失"
176
+ suggestion: ""
177
+
178
+ TEMPERATURE_NOT_FOUND:
179
+ cn: "温度/做法在主数据中缺失"
180
+ suggestion: ""
181
+
182
+ SUGAR_NOT_FOUND:
183
+ cn: "糖度在主数据中缺失"
184
+ suggestion: ""
185
+
186
+ TEA_BASE_NOT_FOUND:
187
+ cn: "茶底在主数据中缺失"
188
+ suggestion: ""
189
+
190
+ # ═══════════════════════════════════════════════════════════════════
191
+ # 7. 模板预处理规则
192
+ # ═══════════════════════════════════════════════════════════════════
193
+ # 来源: agent/template_preprocessor.py
194
+
195
+ template_preprocessing:
196
+ # 模板类型检测条件(chowbus 类型)
197
+ detection:
198
+ chowbus:
199
+ # 条件 1: 第一行至少有 3 个纯英文列名
200
+ min_english_headers: 3
201
+ # 条件 2: 第二行包含中文字符
202
+ row1_contains_chinese: true
203
+ # 条件 3: 必须存在 item_cn 列
204
+ required_columns:
205
+ - item_cn
206
+ # 条件 4: 必须存在至少一个 customization{N}_id 列
207
+ customization_pattern: "^customization\\d+_id$"
208
+
209
+ # chowbus 行收集规则
210
+ chowbus_row_collection:
211
+ # 从第 3 行开始读取(跳过英文表头 + 中文注释行)
212
+ data_start_row: 2
213
+ # 以 item_cn 列为锚点
214
+ anchor_column: item_cn
215
+ # 仅收集锚点右侧的中文值
216
+ scan_direction: right
217
+ # 过滤规则:纯英文、纯数字值跳过
218
+ skip_patterns:
219
+ - "^[a-zA-Z_][a-zA-Z0-9_]*$" # 纯英文标识符
220
+ - "^\\d+$" # 纯数字
221
+ # 值清洗:去除 |推荐 等后缀标记
222
+ clean_suffix_markers: true
223
+
224
+ # ═══════════════════════════════════════════════════════════════════
225
+ # 8. 属性匹配规则
226
+ # ═══════════════════════════════════════════════════════════════════
227
+ # 来源: agent/matching_engine.py _attributes_match()
228
+
229
+ attribute_matching:
230
+ # 必要维度匹配:master 和 template 都必须有值且精确相等
231
+ required_rules:
232
+ - field: size
233
+ match: exact
234
+ miss_penalty: unmatched
235
+ - field: temperature
236
+ match: exact
237
+ miss_penalty: unmatched
238
+ - field: sugar
239
+ match: exact
240
+ miss_penalty: unmatched
241
+
242
+ # 通配维度匹配:master 为空时通配(跳过),master 有值时精确匹配
243
+ wildcard_rules:
244
+ - field: milk_base
245
+ master_empty: wildcard # master 为空 → 通配任意 template 值
246
+ master_has_value: exact # master 有值 → 必须精确匹配
247
+ - field: tea_base
248
+ master_empty: wildcard
249
+ master_has_value: exact
250
+
251
+ # LOW_CONFIDENCE 失败原因推断优先级(从高到低)
252
+ failure_reason_priority:
253
+ - milk_base
254
+ - size
255
+ - temperature
256
+ - sugar
257
+ - tea_base
258
+
259
+
260
+ # ═══════════════════════════════════════════════════════════════════
261
+ # 9. 选项规格模板展开规则(Option Specification Template Expander)
262
+ # ═══════════════════════════════════════════════════════════════════
263
+ # 将主数据表中的选项规格数据展开为空白选项模板的行。
264
+ # 来源: agent/option_expander.py
265
+
266
+ option_expansion:
267
+ description: >
268
+ 将包含选项规格定义的主数据表展开为选项模板的明细行。
269
+ 每个主数据行按 5 个维度(糖度/温度/规格/奶底/茶底)展开,
270
+ 每个维度的每个选项值生成一行模板数据。
271
+ 纯规则引擎,不调用 LLM。
272
+
273
+ # ── 主数据表列定义 ──
274
+
275
+ master_columns:
276
+ # 固定列(必须存在)
277
+ fixed:
278
+ - name: 主编码
279
+ canonical: product_code
280
+ required: true
281
+ description: 商品唯一编码
282
+
283
+ - name: 商品名称
284
+ canonical: product_name
285
+ required: true
286
+ description: 商品名称
287
+
288
+ # 展开维度(按遍历顺序)
289
+ dimensions:
290
+ - 糖度
291
+ - 温度
292
+ - 规格
293
+ - 奶底
294
+ - 茶底
295
+
296
+ # 每个维度下的 3 种列模式
297
+ dimension_column_patterns:
298
+ - prefix: "推荐"
299
+ example: 推荐糖度
300
+ description: 推荐值(单个,可为空)
301
+ - prefix: "默认"
302
+ example: 默认糖度
303
+ description: 默认值(单个,可为空)
304
+ - prefix: ""
305
+ example: 糖度
306
+ description: 选项值列表(;分隔)
307
+
308
+ # ── 模板输出列定义 ──
309
+
310
+ template_columns:
311
+ - 商品编码 # ← master.主编码
312
+ - 商品名称 # ← master.商品名称
313
+ - 口味做法组名 # ← 维度名(糖度/温度/规格/奶底/茶底)
314
+ - 选项名称 # ← 列表中的单个值
315
+ - 最少必选 # ← 固定为 1(严格整数)
316
+ - 最多可选 # ← 固定为 1(严格整数)
317
+ - 推荐项 # ← "是" 或 "否"
318
+ - 默认项 # ← "是" 或 "否"
319
+
320
+ # ── 展开逻辑 ──
321
+
322
+ expansion:
323
+ dimension_list_separator: ";"
324
+ dimension_order:
325
+ - 糖度
326
+ - 温度
327
+ - 规格
328
+ - 奶底
329
+ - 茶底
330
+
331
+ recommendation_rule: >
332
+ 选项值 == 推荐{dim} → 推荐项 = "是",否则 "否"
333
+ 选项值 == 默认{dim} → 默认项 = "是",否则 "否"
334
+ 推荐/默认值为空时,全部填"否"
335
+ 同一值可以同时是推荐项和默认项
336
+
337
+ deduplicate: true
338
+ skip_empty_values: true
339
+
340
+ empty_dimension_behavior: skip
341
+ all_dimensions_empty_behavior: skip
342
+
343
+ constants:
344
+ 最少必选: 1
345
+ 最多可选: 1
346
+ yes_value: "是"
347
+ no_value: "否"
348
+
349
+ differences_from_sop_pipeline:
350
+ - "不需要 Schema Analyzer:主数据列名固定,不需要 LLM 分析"
351
+ - "不需要 Token Classifier:没有组合字段需要解析"
352
+ - "不需要 Rule Engine:不需要 Canonical Schema 标准化"
353
+ - "不需要 Matching Engine:不做主数据与模板的匹配"
354
+ - "不需要 LangGraph 工作流:纯顺序执行,无条件路由"
355
+ - "不使用 LLM:100% 规则驱动,API 调用 = 0"
356
+
357
+
358
+ # ═══════════════════════════════════════════════════════════════════
359
+ # 10. LLM Tool 注册表(agent/tools.py + agent/sandbox.py)
360
+ # ═══════════════════════════════════════════════════════════════════
361
+
362
+ llm_tool_registry:
363
+ description: >
364
+ 将规则函数注册为 LLM 可调用的 Tool,供 LangGraph create_react_agent 消费。
365
+ pipeline 类 Tool 禁止 LLM 自行实现;supplementary 类 Tool 供辅助操作。
366
+
367
+ tools:
368
+ - name: run_sop_matching
369
+ category: pipeline
370
+ handler: agent.orchestration.run_sop_pipeline
371
+ guardrail: "LLM 必须调用此 tool 执行匹配,禁止自行生成匹配代码"
372
+
373
+ - name: run_option_expansion
374
+ category: pipeline
375
+ handler: agent.orchestration.run_expand_pipeline
376
+ guardrail: "LLM 必须调用此 tool 执行展开"
377
+
378
+ - name: execute_python
379
+ category: supplementary
380
+ handler: agent.sandbox.execute
381
+ guardrail: "LLM 可生成 Python 代码增删改查数据,禁止生成匹配/填充逻辑"
382
+ sandbox_allowed: [pandas, openpyxl, numpy, json, csv, re, collections]
383
+
384
+ - name: query_token_dict
385
+ category: supplementary
386
+ handler: agent.tools.query_token_dict
387
+ guardrail: "只读操作,可安全调用"