MenuPilot 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- menupilot/__init__.py +3 -0
- menupilot/__main__.py +4 -0
- menupilot/agent/__init__.py +0 -0
- menupilot/agent/agent_loop.py +414 -0
- menupilot/agent/matching_engine.py +974 -0
- menupilot/agent/option_expander.py +490 -0
- menupilot/agent/orchestration.py +570 -0
- menupilot/agent/rule_engine.py +509 -0
- menupilot/agent/sandbox.py +216 -0
- menupilot/agent/schema_analyzer.py +1026 -0
- menupilot/agent/template_preprocessor.py +293 -0
- menupilot/agent/token_classifier.py +816 -0
- menupilot/agent/tools.py +365 -0
- menupilot/agent/workflow.py +1072 -0
- menupilot/cli/human_review.py +191 -0
- menupilot/cli/repl.py +821 -0
- menupilot/config.py +113 -0
- menupilot/data/__init__.py +0 -0
- menupilot/data/canonical_schema.py +135 -0
- menupilot/data/mapping_rules.yaml +387 -0
- menupilot/data/memory.py +674 -0
- menupilot/data/token_dict.py +275 -0
- menupilot/excel_io/__init__.py +0 -0
- menupilot/excel_io/excel_reader.py +552 -0
- menupilot/excel_io/excel_writer.py +413 -0
- menupilot/main.py +322 -0
- menupilot/wizard.py +86 -0
- menupilot-0.1.0.dist-info/METADATA +397 -0
- menupilot-0.1.0.dist-info/RECORD +33 -0
- menupilot-0.1.0.dist-info/WHEEL +5 -0
- menupilot-0.1.0.dist-info/entry_points.txt +2 -0
- menupilot-0.1.0.dist-info/licenses/LICENSE +21 -0
- menupilot-0.1.0.dist-info/top_level.txt +1 -0
menupilot/config.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""
|
|
2
|
+
配置文件 — API Key、模型参数、匹配阈值。
|
|
3
|
+
|
|
4
|
+
配置优先级:~/.menupilot/config.json > 环境变量 > 程序默认值
|
|
5
|
+
|
|
6
|
+
使用方式:
|
|
7
|
+
from config import DEEPSEEK_API_KEY, DEEPSEEK_BASE_URL, MATCHING_CONFIG
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
# ── 用户配置文件加载 ─────────────────────────────────────────────
|
|
14
|
+
|
|
15
|
+
_CONFIG_DIR = os.path.expanduser("~/.menupilot")
|
|
16
|
+
_CONFIG_PATH = os.path.join(_CONFIG_DIR, "config.json")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _load_json_config() -> dict:
|
|
20
|
+
"""从 ~/.menupilot/config.json 加载用户配置。"""
|
|
21
|
+
if os.path.exists(_CONFIG_PATH):
|
|
22
|
+
try:
|
|
23
|
+
with open(_CONFIG_PATH, "r", encoding="utf-8") as f:
|
|
24
|
+
return json.load(f)
|
|
25
|
+
except (json.JSONDecodeError, IOError):
|
|
26
|
+
pass
|
|
27
|
+
return {}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
_json = _load_json_config()
|
|
31
|
+
|
|
32
|
+
# ── DeepSeek API 配置 ───────────────────────────────────────────
|
|
33
|
+
# 优先级:JSON 配置文件 > 环境变量 > 默认值
|
|
34
|
+
|
|
35
|
+
DEEPSEEK_API_KEY = (
|
|
36
|
+
_json.get("DEEPSEEK_API_KEY") or
|
|
37
|
+
os.environ.get("DEEPSEEK_API_KEY") or
|
|
38
|
+
"" # 不再有硬编码 fallback,未配置时为空字符串(首次运行触发配置向导)
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
DEEPSEEK_BASE_URL = (
|
|
42
|
+
_json.get("DEEPSEEK_BASE_URL") or
|
|
43
|
+
os.environ.get("DEEPSEEK_BASE_URL") or
|
|
44
|
+
"https://api.deepseek.com/v1"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
DEEPSEEK_MODEL = (
|
|
48
|
+
_json.get("DEEPSEEK_MODEL") or
|
|
49
|
+
os.environ.get("DEEPSEEK_MODEL") or
|
|
50
|
+
"deepseek-chat"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# API 调用参数
|
|
54
|
+
LLM_TEMPERATURE = 0.1 # Schema 分析和 Token 分类需要确定性输出
|
|
55
|
+
LLM_MAX_TOKENS = 4096
|
|
56
|
+
LLM_TIMEOUT_SECONDS = 30
|
|
57
|
+
|
|
58
|
+
# ── 匹配引擎配置 ────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
MATCHING_CONFIG = {
|
|
61
|
+
# 商品名匹配(RapidFuzz token_sort_ratio)
|
|
62
|
+
"product_name_threshold": 90, # ≥ 此值视为匹配
|
|
63
|
+
"product_name_scorer": "token_sort_ratio",
|
|
64
|
+
|
|
65
|
+
# Embedding 兜底
|
|
66
|
+
"embedding_enabled": False, # 默认关闭(按 README 设计)
|
|
67
|
+
"embedding_model": "paraphrase-multilingual-MiniLM-L12-v2",
|
|
68
|
+
"embedding_top_k": 3, # 候选召回数量
|
|
69
|
+
"embedding_similarity_threshold": 0.85,
|
|
70
|
+
|
|
71
|
+
# 低置信度
|
|
72
|
+
"low_confidence_threshold": 80, # 商品名匹配低于此值直接 LOW_CONFIDENCE
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
# ── 日志配置 ────────────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
|
|
78
|
+
|
|
79
|
+
# ── 自测用 Mock 配置 ───────────────────────────────────────────
|
|
80
|
+
|
|
81
|
+
# 自测时使用此标志跳过真实 LLM 调用
|
|
82
|
+
USE_MOCK_LLM = os.environ.get("USE_MOCK_LLM", "0") == "1"
|
|
83
|
+
|
|
84
|
+
# Mock LLM 响应(Schema Analyzer)
|
|
85
|
+
MOCK_SCHEMA_RESPONSE = {
|
|
86
|
+
"field_mapping": {
|
|
87
|
+
"菜品名称": "product_name",
|
|
88
|
+
"规格": "size",
|
|
89
|
+
},
|
|
90
|
+
"composite_col": "口味做法组合",
|
|
91
|
+
"target_col": "配料",
|
|
92
|
+
"irrelevant_cols": [],
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
# Mock LLM 响应(Token Classifier)
|
|
96
|
+
MOCK_TOKEN_RESPONSE = [
|
|
97
|
+
{
|
|
98
|
+
"tokens": [
|
|
99
|
+
{"value": "红茶", "type": "茶底"},
|
|
100
|
+
{"value": "十二分糖", "type": "糖度"},
|
|
101
|
+
{"value": "温热", "type": "温度"},
|
|
102
|
+
],
|
|
103
|
+
"missing": ["奶底"],
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
"tokens": [
|
|
107
|
+
{"value": "红茶", "type": "茶底"},
|
|
108
|
+
{"value": "十二分糖", "type": "糖度"},
|
|
109
|
+
{"value": "正常冰", "type": "温度"},
|
|
110
|
+
],
|
|
111
|
+
"missing": ["奶底"],
|
|
112
|
+
},
|
|
113
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Canonical Schema — 所有模板字段统一转换为此内部标准结构。
|
|
3
|
+
|
|
4
|
+
位于 Rule Engine 层的核心数据模型,被 Matching Engine / workflow 引用。
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, List
|
|
8
|
+
|
|
9
|
+
# ── Canonical 字段列表 ────────────────────────────────────────
|
|
10
|
+
# 所有字段(含 tea_base 扩展字段 + composite_col/sop 特殊字段)
|
|
11
|
+
CANONICAL_FIELDS = [
|
|
12
|
+
"product_name", "size", "milk_base", "temperature", "sugar", "tea_base",
|
|
13
|
+
"composite_col", "sop",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
# 必要维度:匹配时这些字段必须存在
|
|
17
|
+
REQUIRED_DIMENSIONS = ["size", "temperature", "sugar"]
|
|
18
|
+
|
|
19
|
+
# 可通配维度:主数据中为空时匹配任意值
|
|
20
|
+
WILDCARD_DIMENSIONS = ["milk_base", "tea_base"]
|
|
21
|
+
|
|
22
|
+
# ── 主数据表固定列映射 ────────────────────────────────────────
|
|
23
|
+
# 主数据表字段名固定为中文,不需要 LLM 识别
|
|
24
|
+
MASTER_COLUMN_MAP: Dict[str, str] = {
|
|
25
|
+
"品名": "product_name",
|
|
26
|
+
"杯型": "size",
|
|
27
|
+
"奶底": "milk_base",
|
|
28
|
+
"做法": "temperature",
|
|
29
|
+
"糖": "sugar",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# ── Token 中文类型名 → Canonical 字段名 ────────────────────────
|
|
33
|
+
TOKEN_TYPE_TO_FIELD: Dict[str, str] = {
|
|
34
|
+
"温度": "temperature",
|
|
35
|
+
"糖度": "sugar",
|
|
36
|
+
"奶底": "milk_base",
|
|
37
|
+
"规格": "size",
|
|
38
|
+
"茶底": "tea_base",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
# 反向映射:Canonical 字段名 → Token 中文类型名
|
|
42
|
+
FIELD_TO_TOKEN_TYPE: Dict[str, str] = {v: k for k, v in TOKEN_TYPE_TO_FIELD.items()}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def create_canonical_row(**kwargs) -> Dict:
|
|
46
|
+
"""创建一条 Canonical Schema 行,缺失字段默认为 None。"""
|
|
47
|
+
row = {f: None for f in CANONICAL_FIELDS}
|
|
48
|
+
row.update(kwargs)
|
|
49
|
+
return row
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_missing_required(row: Dict) -> List[str]:
|
|
53
|
+
"""返回行中缺失的必要维度列表。"""
|
|
54
|
+
return [f for f in REQUIRED_DIMENSIONS if row.get(f) is None]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def is_wildcard(field: str) -> bool:
|
|
58
|
+
"""检查字段是否允许通配(主数据为空时可匹配任意值)。"""
|
|
59
|
+
return field in WILDCARD_DIMENSIONS
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# ── 自测 ──────────────────────────────────────────────────────
|
|
63
|
+
|
|
64
|
+
if __name__ == "__main__":
|
|
65
|
+
passed = 0
|
|
66
|
+
failed = 0
|
|
67
|
+
|
|
68
|
+
def check(condition, msg):
|
|
69
|
+
global passed, failed
|
|
70
|
+
if condition:
|
|
71
|
+
passed += 1
|
|
72
|
+
print(f" PASS {msg}")
|
|
73
|
+
else:
|
|
74
|
+
failed += 1
|
|
75
|
+
print(f" FAIL {msg}")
|
|
76
|
+
|
|
77
|
+
print("=== Canonical Schema 自测 ===\n")
|
|
78
|
+
|
|
79
|
+
# 1. 字段数量
|
|
80
|
+
print("1. 字段定义")
|
|
81
|
+
check(len(CANONICAL_FIELDS) == 8, f"CANONICAL_FIELDS 包含 8 个字段(实际 {len(CANONICAL_FIELDS)})")
|
|
82
|
+
check("tea_base" in CANONICAL_FIELDS, "包含 tea_base 扩展字段")
|
|
83
|
+
check("composite_col" in CANONICAL_FIELDS, "包含 composite_col 特殊字段")
|
|
84
|
+
check("sop" in CANONICAL_FIELDS, "包含 sop 特殊字段")
|
|
85
|
+
check(len(REQUIRED_DIMENSIONS) == 3, "3 个必要维度")
|
|
86
|
+
check(len(WILDCARD_DIMENSIONS) == 2, "2 个通配维度")
|
|
87
|
+
print()
|
|
88
|
+
|
|
89
|
+
# 2. 主数据列映射
|
|
90
|
+
print("2. 主数据列映射")
|
|
91
|
+
check(MASTER_COLUMN_MAP["品名"] == "product_name", "品名 → product_name")
|
|
92
|
+
check(MASTER_COLUMN_MAP["做法"] == "temperature", "做法 → temperature")
|
|
93
|
+
check(MASTER_COLUMN_MAP["糖"] == "sugar", "糖 → sugar")
|
|
94
|
+
check(len(MASTER_COLUMN_MAP) == 5, "共 5 个映射")
|
|
95
|
+
print()
|
|
96
|
+
|
|
97
|
+
# 3. Token 类型映射
|
|
98
|
+
print("3. Token 类型映射")
|
|
99
|
+
check(TOKEN_TYPE_TO_FIELD["温度"] == "temperature", "温度 → temperature")
|
|
100
|
+
check(TOKEN_TYPE_TO_FIELD["茶底"] == "tea_base", "茶底 → tea_base")
|
|
101
|
+
check(FIELD_TO_TOKEN_TYPE["temperature"] == "温度", "反向: temperature → 温度")
|
|
102
|
+
print()
|
|
103
|
+
|
|
104
|
+
# 4. create_canonical_row
|
|
105
|
+
print("4. create_canonical_row")
|
|
106
|
+
row = create_canonical_row(product_name="测试", size="中杯")
|
|
107
|
+
check(row["product_name"] == "测试", "自定义字段生效")
|
|
108
|
+
check(row["size"] == "中杯", "自定义字段生效")
|
|
109
|
+
check(row["milk_base"] is None, "未指定字段默认 None")
|
|
110
|
+
check(len(row) == 8, f"始终包含 8 个字段(实际 {len(row)})")
|
|
111
|
+
print()
|
|
112
|
+
|
|
113
|
+
# 5. get_missing_required
|
|
114
|
+
print("5. get_missing_required")
|
|
115
|
+
complete = {"size": "中杯", "temperature": "少冰", "sugar": "七分糖"}
|
|
116
|
+
check(get_missing_required(complete) == [], "完整行 → 空列表")
|
|
117
|
+
|
|
118
|
+
partial = {"size": "大杯", "temperature": None, "sugar": "全糖"}
|
|
119
|
+
missing = get_missing_required(partial)
|
|
120
|
+
check("temperature" in missing and len(missing) == 1, "缺 1 个维度被检测到")
|
|
121
|
+
|
|
122
|
+
empty = {"size": None, "temperature": None, "sugar": None}
|
|
123
|
+
check(len(get_missing_required(empty)) == 3, "缺 3 个维度全部检测到")
|
|
124
|
+
print()
|
|
125
|
+
|
|
126
|
+
# 6. is_wildcard
|
|
127
|
+
print("6. is_wildcard")
|
|
128
|
+
check(is_wildcard("milk_base") is True, "milk_base 是通配维度")
|
|
129
|
+
check(is_wildcard("tea_base") is True, "tea_base 是通配维度")
|
|
130
|
+
check(is_wildcard("size") is False, "size 不是通配维度")
|
|
131
|
+
check(is_wildcard("temperature") is False, "temperature 不是通配维度")
|
|
132
|
+
print()
|
|
133
|
+
|
|
134
|
+
# ── 汇总 ──
|
|
135
|
+
print(f"=== 结果: {passed} passed, {failed} failed ===")
|
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
# PosAgent 模板映射规则
|
|
2
|
+
# ========================
|
|
3
|
+
# 从 agent/rule_engine.py、data/canonical_schema.py、data/token_dict.py、
|
|
4
|
+
# agent/matching_engine.py、config.py 中提取,作为映射规则的声明式参考。
|
|
5
|
+
#
|
|
6
|
+
# 用途:
|
|
7
|
+
# - 新增模板映射规则时的参考基准
|
|
8
|
+
# - 未来可从 YAML 加载规则替代硬编码
|
|
9
|
+
#
|
|
10
|
+
# 更新日期:2026-06-16
|
|
11
|
+
|
|
12
|
+
version: "1.0"
|
|
13
|
+
description: "PosAgent 模板映射规则 — 所有模板字段统一转换为此内部标准结构"
|
|
14
|
+
|
|
15
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
16
|
+
# 1. Canonical Schema(内部标准字段)
|
|
17
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
18
|
+
|
|
19
|
+
canonical_schema:
|
|
20
|
+
# 所有 canonical 字段(含扩展字段和特殊字段)
|
|
21
|
+
fields:
|
|
22
|
+
- product_name # 商品名称
|
|
23
|
+
- size # 规格/杯型
|
|
24
|
+
- milk_base # 奶底
|
|
25
|
+
- temperature # 温度/做法
|
|
26
|
+
- sugar # 糖度
|
|
27
|
+
- tea_base # 茶底(扩展字段)
|
|
28
|
+
- composite_col # 组合字段名(如"口味做法组合",特殊字段)
|
|
29
|
+
- sop # SOP 代码(目标值,特殊字段)
|
|
30
|
+
|
|
31
|
+
# 必要维度:匹配时这些字段必须有值,缺列直接报错
|
|
32
|
+
required_dimensions:
|
|
33
|
+
- size
|
|
34
|
+
- temperature
|
|
35
|
+
- sugar
|
|
36
|
+
|
|
37
|
+
# 通配维度:主数据中为空时可匹配任意值
|
|
38
|
+
wildcard_dimensions:
|
|
39
|
+
- milk_base
|
|
40
|
+
- tea_base
|
|
41
|
+
|
|
42
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
43
|
+
# 2. 主数据表列映射(中文列名 → Canonical 字段)
|
|
44
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
45
|
+
# 主数据表字段名固定为中文,不需要 LLM 识别
|
|
46
|
+
# 来源: data/canonical_schema.py MASTER_COLUMN_MAP
|
|
47
|
+
|
|
48
|
+
master_column_map:
|
|
49
|
+
品名: product_name
|
|
50
|
+
杯型: size
|
|
51
|
+
奶底: milk_base
|
|
52
|
+
做法: temperature
|
|
53
|
+
糖: sugar
|
|
54
|
+
|
|
55
|
+
# 主数据表 SOP 列的候选名称(按优先级查找)
|
|
56
|
+
master_sop_candidates:
|
|
57
|
+
- SOP
|
|
58
|
+
- 配料
|
|
59
|
+
- SOP 代码
|
|
60
|
+
- 代码
|
|
61
|
+
|
|
62
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
63
|
+
# 3. Token 类型映射(中文类型名 → Canonical 字段)
|
|
64
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
65
|
+
# 来源: data/canonical_schema.py TOKEN_TYPE_TO_FIELD
|
|
66
|
+
|
|
67
|
+
token_type_mapping:
|
|
68
|
+
温度: temperature
|
|
69
|
+
糖度: sugar
|
|
70
|
+
奶底: milk_base
|
|
71
|
+
规格: size
|
|
72
|
+
茶底: tea_base
|
|
73
|
+
|
|
74
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
75
|
+
# 4. Token 词典(属性值 → 类型)
|
|
76
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
77
|
+
# 词典为软约束:LLM Token Classifier 可识别词典外的值并标注 UNKNOWN_TOKEN
|
|
78
|
+
# 来源: data/token_dict.py _RAW_TOKENS
|
|
79
|
+
|
|
80
|
+
token_dictionary:
|
|
81
|
+
温度:
|
|
82
|
+
- 热
|
|
83
|
+
- 温热
|
|
84
|
+
- 正常冰
|
|
85
|
+
- 少冰
|
|
86
|
+
- 去冰
|
|
87
|
+
- 冰沙
|
|
88
|
+
|
|
89
|
+
糖度:
|
|
90
|
+
- 全糖
|
|
91
|
+
- 十二分糖
|
|
92
|
+
- 标准糖
|
|
93
|
+
- 七分糖
|
|
94
|
+
- 五分糖
|
|
95
|
+
- 三分糖
|
|
96
|
+
- 不另加糖
|
|
97
|
+
- 无糖
|
|
98
|
+
|
|
99
|
+
奶底:
|
|
100
|
+
- 牛奶
|
|
101
|
+
- 燕麦奶
|
|
102
|
+
- 厚乳
|
|
103
|
+
- 椰乳
|
|
104
|
+
|
|
105
|
+
规格:
|
|
106
|
+
- 大杯
|
|
107
|
+
- 中杯
|
|
108
|
+
- 小杯
|
|
109
|
+
- 五角瓶
|
|
110
|
+
|
|
111
|
+
茶底:
|
|
112
|
+
- 红茶
|
|
113
|
+
- 绿茶
|
|
114
|
+
- 乌龙茶
|
|
115
|
+
- 五角排红茶
|
|
116
|
+
- 五黄标准茶
|
|
117
|
+
- 茉莉绿茶 # 2026-06-05: testdata 口味做法组合中出现
|
|
118
|
+
|
|
119
|
+
# 已知后缀模式(文档备忘,normalize_token 通过分隔符边界检测自动处理)
|
|
120
|
+
known_suffixes:
|
|
121
|
+
- "|推荐" # 主数据糖度列常见:七分糖|推荐
|
|
122
|
+
- "/新" # 模板规格列偶尔出现:大杯/新
|
|
123
|
+
|
|
124
|
+
# normalize_token 分隔符集合(用于子串边界检测和后缀切割)
|
|
125
|
+
separators:
|
|
126
|
+
- "|"
|
|
127
|
+
- "/"
|
|
128
|
+
- " "
|
|
129
|
+
|
|
130
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
131
|
+
# 5. 匹配引擎配置
|
|
132
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
133
|
+
# 来源: config.py MATCHING_CONFIG + agent/matching_engine.py 常量
|
|
134
|
+
|
|
135
|
+
matching:
|
|
136
|
+
# 商品名匹配(RapidFuzz)
|
|
137
|
+
product_name_threshold: 90 # token_sort_ratio ≥ 此值视为高置信匹配
|
|
138
|
+
low_confidence_threshold: 80 # 低于此值直接 LOW_CONFIDENCE
|
|
139
|
+
scorer: token_sort_ratio
|
|
140
|
+
|
|
141
|
+
# Embedding 兜底(默认关闭)
|
|
142
|
+
embedding:
|
|
143
|
+
enabled: false
|
|
144
|
+
model: paraphrase-multilingual-MiniLM-L12-v2
|
|
145
|
+
top_k: 3
|
|
146
|
+
similarity_threshold: 0.85
|
|
147
|
+
|
|
148
|
+
# 匹配类型枚举
|
|
149
|
+
match_types:
|
|
150
|
+
exact: "exact" # 商品名 + 全属性精确匹配
|
|
151
|
+
attribute_match: "attribute_match" # 属性组合匹配
|
|
152
|
+
product_only: "product_only" # 仅商品名匹配(属性不匹配)
|
|
153
|
+
best_guess: "best_guess" # 兜底最佳猜测
|
|
154
|
+
|
|
155
|
+
# 置信度枚举
|
|
156
|
+
confidence_levels:
|
|
157
|
+
high: "HIGH"
|
|
158
|
+
low_confidence: "LOW_CONFIDENCE"
|
|
159
|
+
|
|
160
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
161
|
+
# 6. 失败原因码及中文描述
|
|
162
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
163
|
+
# 来源: agent/matching_engine.py _REASON_CN_MAP / _REASON_SUGGESTION_MAP
|
|
164
|
+
|
|
165
|
+
failure_reasons:
|
|
166
|
+
PRODUCT_NOT_FOUND:
|
|
167
|
+
cn: "商品名称在主数据中未找到"
|
|
168
|
+
suggestion: "检查商品名称是否有错别字,或补充主数据表"
|
|
169
|
+
|
|
170
|
+
MILK_BASE_NOT_FOUND:
|
|
171
|
+
cn: "{extra}规格在主数据中缺失"
|
|
172
|
+
suggestion: "补充 {extra} 相关 SOP 到主数据表"
|
|
173
|
+
|
|
174
|
+
SIZE_NOT_FOUND:
|
|
175
|
+
cn: "规格在主数据中缺失"
|
|
176
|
+
suggestion: ""
|
|
177
|
+
|
|
178
|
+
TEMPERATURE_NOT_FOUND:
|
|
179
|
+
cn: "温度/做法在主数据中缺失"
|
|
180
|
+
suggestion: ""
|
|
181
|
+
|
|
182
|
+
SUGAR_NOT_FOUND:
|
|
183
|
+
cn: "糖度在主数据中缺失"
|
|
184
|
+
suggestion: ""
|
|
185
|
+
|
|
186
|
+
TEA_BASE_NOT_FOUND:
|
|
187
|
+
cn: "茶底在主数据中缺失"
|
|
188
|
+
suggestion: ""
|
|
189
|
+
|
|
190
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
191
|
+
# 7. 模板预处理规则
|
|
192
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
193
|
+
# 来源: agent/template_preprocessor.py
|
|
194
|
+
|
|
195
|
+
template_preprocessing:
|
|
196
|
+
# 模板类型检测条件(chowbus 类型)
|
|
197
|
+
detection:
|
|
198
|
+
chowbus:
|
|
199
|
+
# 条件 1: 第一行至少有 3 个纯英文列名
|
|
200
|
+
min_english_headers: 3
|
|
201
|
+
# 条件 2: 第二行包含中文字符
|
|
202
|
+
row1_contains_chinese: true
|
|
203
|
+
# 条件 3: 必须存在 item_cn 列
|
|
204
|
+
required_columns:
|
|
205
|
+
- item_cn
|
|
206
|
+
# 条件 4: 必须存在至少一个 customization{N}_id 列
|
|
207
|
+
customization_pattern: "^customization\\d+_id$"
|
|
208
|
+
|
|
209
|
+
# chowbus 行收集规则
|
|
210
|
+
chowbus_row_collection:
|
|
211
|
+
# 从第 3 行开始读取(跳过英文表头 + 中文注释行)
|
|
212
|
+
data_start_row: 2
|
|
213
|
+
# 以 item_cn 列为锚点
|
|
214
|
+
anchor_column: item_cn
|
|
215
|
+
# 仅收集锚点右侧的中文值
|
|
216
|
+
scan_direction: right
|
|
217
|
+
# 过滤规则:纯英文、纯数字值跳过
|
|
218
|
+
skip_patterns:
|
|
219
|
+
- "^[a-zA-Z_][a-zA-Z0-9_]*$" # 纯英文标识符
|
|
220
|
+
- "^\\d+$" # 纯数字
|
|
221
|
+
# 值清洗:去除 |推荐 等后缀标记
|
|
222
|
+
clean_suffix_markers: true
|
|
223
|
+
|
|
224
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
225
|
+
# 8. 属性匹配规则
|
|
226
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
227
|
+
# 来源: agent/matching_engine.py _attributes_match()
|
|
228
|
+
|
|
229
|
+
attribute_matching:
|
|
230
|
+
# 必要维度匹配:master 和 template 都必须有值且精确相等
|
|
231
|
+
required_rules:
|
|
232
|
+
- field: size
|
|
233
|
+
match: exact
|
|
234
|
+
miss_penalty: unmatched
|
|
235
|
+
- field: temperature
|
|
236
|
+
match: exact
|
|
237
|
+
miss_penalty: unmatched
|
|
238
|
+
- field: sugar
|
|
239
|
+
match: exact
|
|
240
|
+
miss_penalty: unmatched
|
|
241
|
+
|
|
242
|
+
# 通配维度匹配:master 为空时通配(跳过),master 有值时精确匹配
|
|
243
|
+
wildcard_rules:
|
|
244
|
+
- field: milk_base
|
|
245
|
+
master_empty: wildcard # master 为空 → 通配任意 template 值
|
|
246
|
+
master_has_value: exact # master 有值 → 必须精确匹配
|
|
247
|
+
- field: tea_base
|
|
248
|
+
master_empty: wildcard
|
|
249
|
+
master_has_value: exact
|
|
250
|
+
|
|
251
|
+
# LOW_CONFIDENCE 失败原因推断优先级(从高到低)
|
|
252
|
+
failure_reason_priority:
|
|
253
|
+
- milk_base
|
|
254
|
+
- size
|
|
255
|
+
- temperature
|
|
256
|
+
- sugar
|
|
257
|
+
- tea_base
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
261
|
+
# 9. 选项规格模板展开规则(Option Specification Template Expander)
|
|
262
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
263
|
+
# 将主数据表中的选项规格数据展开为空白选项模板的行。
|
|
264
|
+
# 来源: agent/option_expander.py
|
|
265
|
+
|
|
266
|
+
option_expansion:
|
|
267
|
+
description: >
|
|
268
|
+
将包含选项规格定义的主数据表展开为选项模板的明细行。
|
|
269
|
+
每个主数据行按 5 个维度(糖度/温度/规格/奶底/茶底)展开,
|
|
270
|
+
每个维度的每个选项值生成一行模板数据。
|
|
271
|
+
纯规则引擎,不调用 LLM。
|
|
272
|
+
|
|
273
|
+
# ── 主数据表列定义 ──
|
|
274
|
+
|
|
275
|
+
master_columns:
|
|
276
|
+
# 固定列(必须存在)
|
|
277
|
+
fixed:
|
|
278
|
+
- name: 主编码
|
|
279
|
+
canonical: product_code
|
|
280
|
+
required: true
|
|
281
|
+
description: 商品唯一编码
|
|
282
|
+
|
|
283
|
+
- name: 商品名称
|
|
284
|
+
canonical: product_name
|
|
285
|
+
required: true
|
|
286
|
+
description: 商品名称
|
|
287
|
+
|
|
288
|
+
# 展开维度(按遍历顺序)
|
|
289
|
+
dimensions:
|
|
290
|
+
- 糖度
|
|
291
|
+
- 温度
|
|
292
|
+
- 规格
|
|
293
|
+
- 奶底
|
|
294
|
+
- 茶底
|
|
295
|
+
|
|
296
|
+
# 每个维度下的 3 种列模式
|
|
297
|
+
dimension_column_patterns:
|
|
298
|
+
- prefix: "推荐"
|
|
299
|
+
example: 推荐糖度
|
|
300
|
+
description: 推荐值(单个,可为空)
|
|
301
|
+
- prefix: "默认"
|
|
302
|
+
example: 默认糖度
|
|
303
|
+
description: 默认值(单个,可为空)
|
|
304
|
+
- prefix: ""
|
|
305
|
+
example: 糖度
|
|
306
|
+
description: 选项值列表(;分隔)
|
|
307
|
+
|
|
308
|
+
# ── 模板输出列定义 ──
|
|
309
|
+
|
|
310
|
+
template_columns:
|
|
311
|
+
- 商品编码 # ← master.主编码
|
|
312
|
+
- 商品名称 # ← master.商品名称
|
|
313
|
+
- 口味做法组名 # ← 维度名(糖度/温度/规格/奶底/茶底)
|
|
314
|
+
- 选项名称 # ← 列表中的单个值
|
|
315
|
+
- 最少必选 # ← 固定为 1(严格整数)
|
|
316
|
+
- 最多可选 # ← 固定为 1(严格整数)
|
|
317
|
+
- 推荐项 # ← "是" 或 "否"
|
|
318
|
+
- 默认项 # ← "是" 或 "否"
|
|
319
|
+
|
|
320
|
+
# ── 展开逻辑 ──
|
|
321
|
+
|
|
322
|
+
expansion:
|
|
323
|
+
dimension_list_separator: ";"
|
|
324
|
+
dimension_order:
|
|
325
|
+
- 糖度
|
|
326
|
+
- 温度
|
|
327
|
+
- 规格
|
|
328
|
+
- 奶底
|
|
329
|
+
- 茶底
|
|
330
|
+
|
|
331
|
+
recommendation_rule: >
|
|
332
|
+
选项值 == 推荐{dim} → 推荐项 = "是",否则 "否"
|
|
333
|
+
选项值 == 默认{dim} → 默认项 = "是",否则 "否"
|
|
334
|
+
推荐/默认值为空时,全部填"否"
|
|
335
|
+
同一值可以同时是推荐项和默认项
|
|
336
|
+
|
|
337
|
+
deduplicate: true
|
|
338
|
+
skip_empty_values: true
|
|
339
|
+
|
|
340
|
+
empty_dimension_behavior: skip
|
|
341
|
+
all_dimensions_empty_behavior: skip
|
|
342
|
+
|
|
343
|
+
constants:
|
|
344
|
+
最少必选: 1
|
|
345
|
+
最多可选: 1
|
|
346
|
+
yes_value: "是"
|
|
347
|
+
no_value: "否"
|
|
348
|
+
|
|
349
|
+
differences_from_sop_pipeline:
|
|
350
|
+
- "不需要 Schema Analyzer:主数据列名固定,不需要 LLM 分析"
|
|
351
|
+
- "不需要 Token Classifier:没有组合字段需要解析"
|
|
352
|
+
- "不需要 Rule Engine:不需要 Canonical Schema 标准化"
|
|
353
|
+
- "不需要 Matching Engine:不做主数据与模板的匹配"
|
|
354
|
+
- "不需要 LangGraph 工作流:纯顺序执行,无条件路由"
|
|
355
|
+
- "不使用 LLM:100% 规则驱动,API 调用 = 0"
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
359
|
+
# 10. LLM Tool 注册表(agent/tools.py + agent/sandbox.py)
|
|
360
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
361
|
+
|
|
362
|
+
llm_tool_registry:
|
|
363
|
+
description: >
|
|
364
|
+
将规则函数注册为 LLM 可调用的 Tool,供 LangGraph create_react_agent 消费。
|
|
365
|
+
pipeline 类 Tool 禁止 LLM 自行实现;supplementary 类 Tool 供辅助操作。
|
|
366
|
+
|
|
367
|
+
tools:
|
|
368
|
+
- name: run_sop_matching
|
|
369
|
+
category: pipeline
|
|
370
|
+
handler: agent.orchestration.run_sop_pipeline
|
|
371
|
+
guardrail: "LLM 必须调用此 tool 执行匹配,禁止自行生成匹配代码"
|
|
372
|
+
|
|
373
|
+
- name: run_option_expansion
|
|
374
|
+
category: pipeline
|
|
375
|
+
handler: agent.orchestration.run_expand_pipeline
|
|
376
|
+
guardrail: "LLM 必须调用此 tool 执行展开"
|
|
377
|
+
|
|
378
|
+
- name: execute_python
|
|
379
|
+
category: supplementary
|
|
380
|
+
handler: agent.sandbox.execute
|
|
381
|
+
guardrail: "LLM 可生成 Python 代码增删改查数据,禁止生成匹配/填充逻辑"
|
|
382
|
+
sandbox_allowed: [pandas, openpyxl, numpy, json, csv, re, collections]
|
|
383
|
+
|
|
384
|
+
- name: query_token_dict
|
|
385
|
+
category: supplementary
|
|
386
|
+
handler: agent.tools.query_token_dict
|
|
387
|
+
guardrail: "只读操作,可安全调用"
|