MenuPilot 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- menupilot/__init__.py +3 -0
- menupilot/__main__.py +4 -0
- menupilot/agent/__init__.py +0 -0
- menupilot/agent/agent_loop.py +414 -0
- menupilot/agent/matching_engine.py +974 -0
- menupilot/agent/option_expander.py +490 -0
- menupilot/agent/orchestration.py +570 -0
- menupilot/agent/rule_engine.py +509 -0
- menupilot/agent/sandbox.py +216 -0
- menupilot/agent/schema_analyzer.py +1026 -0
- menupilot/agent/template_preprocessor.py +293 -0
- menupilot/agent/token_classifier.py +816 -0
- menupilot/agent/tools.py +365 -0
- menupilot/agent/workflow.py +1072 -0
- menupilot/cli/human_review.py +191 -0
- menupilot/cli/repl.py +821 -0
- menupilot/config.py +113 -0
- menupilot/data/__init__.py +0 -0
- menupilot/data/canonical_schema.py +135 -0
- menupilot/data/mapping_rules.yaml +387 -0
- menupilot/data/memory.py +674 -0
- menupilot/data/token_dict.py +275 -0
- menupilot/excel_io/__init__.py +0 -0
- menupilot/excel_io/excel_reader.py +552 -0
- menupilot/excel_io/excel_writer.py +413 -0
- menupilot/main.py +322 -0
- menupilot/wizard.py +86 -0
- menupilot-0.1.0.dist-info/METADATA +397 -0
- menupilot-0.1.0.dist-info/RECORD +33 -0
- menupilot-0.1.0.dist-info/WHEEL +5 -0
- menupilot-0.1.0.dist-info/entry_points.txt +2 -0
- menupilot-0.1.0.dist-info/licenses/LICENSE +21 -0
- menupilot-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Token 词典 — 正向查找(词→类型)与反向查找(类型→词列表)。
|
|
3
|
+
|
|
4
|
+
词典为软约束:LLM Token Classifier 可识别词典外的值并标注为 UNKNOWN_TOKEN。
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
# ── 原始词典定义 ──────────────────────────────────────────────
|
|
10
|
+
|
|
11
|
+
_RAW_TOKENS: Dict[str, List[str]] = {
|
|
12
|
+
"温度": ["热", "温热", "正常冰", "少冰", "去冰", "冰沙"],
|
|
13
|
+
"糖度": ["全糖", "十二分糖", "标准糖", "七分糖", "五分糖", "三分糖", "不另加糖", "无糖"],
|
|
14
|
+
"奶底": ["牛奶", "燕麦奶", "厚乳", "椰乳"],
|
|
15
|
+
"规格": ["大杯", "中杯", "小杯", "五角瓶"],
|
|
16
|
+
"茶底": ["红茶", "绿茶", "乌龙茶", "五角排红茶", "五黄标准茶",
|
|
17
|
+
# 以下来自 testdata/待匹配数据表.xlsx 真实数据扫描补充
|
|
18
|
+
"茉莉绿茶"], # 2026-06-05: testdata 口味做法组合中出现
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
# ── 已知后缀模式(文档备忘)──────────────────────────────────
|
|
22
|
+
# 这些后缀会附加到属性值尾部,导致精确匹配失败。
|
|
23
|
+
# normalize_token() 通过分隔符边界检测自动处理,不依赖此列表。
|
|
24
|
+
# 列表仅供人工审查,发现新后缀时追加至此。
|
|
25
|
+
KNOWN_SUFFIXES: List[str] = [
|
|
26
|
+
"|推荐", # 主数据糖度列常见:七分糖|推荐
|
|
27
|
+
"/新", # 模板规格列偶尔出现:大杯/新
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
# 分隔符集合:用于子串边界检测和后缀切割
|
|
31
|
+
_SEPARATORS = frozenset({"|", "/", " "})
|
|
32
|
+
|
|
33
|
+
# ── 构建查找结构 ──────────────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
# 正向:词 → 类型
|
|
36
|
+
TOKEN_MAP: Dict[str, str] = {}
|
|
37
|
+
for _type, _words in _RAW_TOKENS.items():
|
|
38
|
+
for _w in _words:
|
|
39
|
+
TOKEN_MAP[_w] = _type
|
|
40
|
+
|
|
41
|
+
# 反向:类型 → 词列表(只读视图)
|
|
42
|
+
TOKEN_BY_TYPE: Dict[str, List[str]] = {k: list(v) for k, v in _RAW_TOKENS.items()}
|
|
43
|
+
|
|
44
|
+
UNKNOWN_TOKEN = "UNKNOWN_TOKEN"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ── 公开 API ──────────────────────────────────────────────────
|
|
48
|
+
|
|
49
|
+
def lookup(token: str) -> str:
|
|
50
|
+
"""正向查找:返回 token 对应的类型,词典中没有则返回 UNKNOWN_TOKEN。"""
|
|
51
|
+
return TOKEN_MAP.get(token, UNKNOWN_TOKEN)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def normalize_token(raw_value: str) -> str:
|
|
55
|
+
"""按四级优先级从词典中清洗带后缀的属性值。
|
|
56
|
+
|
|
57
|
+
四级优先级:
|
|
58
|
+
Step 1 精确匹配:直接在词典中查找,命中即返回原值
|
|
59
|
+
Step 2 子串匹配:词典词作为 raw_value 的子串出现,且子串的
|
|
60
|
+
左右边界均为字符串端点或分隔符(|、/、空格)。
|
|
61
|
+
多个匹配时取最长词,防止"冰"误匹配"正常冰沙"。
|
|
62
|
+
Step 3 分隔符切割:按 |、/、空格依次切分,取第一个 token 精确匹配
|
|
63
|
+
Step 4 全部失败:返回原值(调用方通过 lookup/is_known 兜底)
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
raw_value: 原始属性值,可能带后缀,如 "正常冰|推荐"、"大杯/新"。
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
清洗后的 token 值(词典中存在时)或原始值(无法匹配时)。
|
|
70
|
+
注意:返回值可能不在词典中,调用方应配合 lookup() 使用。
|
|
71
|
+
|
|
72
|
+
Examples:
|
|
73
|
+
>>> normalize_token("正常冰")
|
|
74
|
+
"正常冰" # Step 1 精确命中
|
|
75
|
+
>>> normalize_token("正常冰|推荐")
|
|
76
|
+
"正常冰" # Step 2 子串边界匹配
|
|
77
|
+
>>> normalize_token("大杯/新")
|
|
78
|
+
"大杯" # Step 2 子串边界匹配
|
|
79
|
+
>>> normalize_token("七分糖|推荐")
|
|
80
|
+
"七分糖" # Step 2 子串边界匹配
|
|
81
|
+
>>> normalize_token("红茶, 十二分糖")
|
|
82
|
+
"红茶, 十二分糖" # Step 4 无法匹配,返回原值
|
|
83
|
+
"""
|
|
84
|
+
if not raw_value or not isinstance(raw_value, str):
|
|
85
|
+
return raw_value if raw_value is not None else ""
|
|
86
|
+
|
|
87
|
+
val = raw_value.strip()
|
|
88
|
+
if not val:
|
|
89
|
+
return raw_value
|
|
90
|
+
|
|
91
|
+
# ── Step 1: 精确匹配 ──
|
|
92
|
+
if val in TOKEN_MAP:
|
|
93
|
+
return val
|
|
94
|
+
|
|
95
|
+
# ── Step 2: 子串匹配(分隔符边界检测) ──
|
|
96
|
+
# 遍历所有词典词,找出所有满足边界条件的匹配,取最长者。
|
|
97
|
+
# 边界条件:word 左侧为字符串头或分隔符,右侧为字符串尾或分隔符。
|
|
98
|
+
best_match: Optional[str] = None
|
|
99
|
+
for word in TOKEN_MAP:
|
|
100
|
+
idx = val.find(word)
|
|
101
|
+
if idx == -1:
|
|
102
|
+
continue
|
|
103
|
+
# 左侧边界:字符串开头 或 前一个字符是分隔符
|
|
104
|
+
left_ok = (idx == 0) or (val[idx - 1] in _SEPARATORS)
|
|
105
|
+
# 右侧边界:字符串结尾 或 后一个字符是分隔符
|
|
106
|
+
right_ok = (idx + len(word) == len(val)) or (val[idx + len(word)] in _SEPARATORS)
|
|
107
|
+
if left_ok and right_ok:
|
|
108
|
+
if best_match is None or len(word) > len(best_match):
|
|
109
|
+
best_match = word
|
|
110
|
+
|
|
111
|
+
if best_match is not None:
|
|
112
|
+
return best_match
|
|
113
|
+
|
|
114
|
+
# ── Step 3: 分隔符切割,取第一个 token 精确匹配 ──
|
|
115
|
+
for sep in ("|", "/"):
|
|
116
|
+
parts = val.split(sep)
|
|
117
|
+
first = parts[0].strip()
|
|
118
|
+
if first in TOKEN_MAP:
|
|
119
|
+
return first
|
|
120
|
+
|
|
121
|
+
# 空格切割
|
|
122
|
+
parts = val.split()
|
|
123
|
+
if parts:
|
|
124
|
+
first = parts[0].strip()
|
|
125
|
+
if first in TOKEN_MAP:
|
|
126
|
+
return first
|
|
127
|
+
|
|
128
|
+
# ── Step 4: 全部失败,返回原值 ──
|
|
129
|
+
return raw_value
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def get_tokens_by_type(token_type: str) -> List[str]:
|
|
133
|
+
"""反向查找:返回指定类型下的所有 token 列表。"""
|
|
134
|
+
return TOKEN_BY_TYPE.get(token_type, [])
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def list_types() -> List[str]:
|
|
138
|
+
"""返回所有 Token 类型名称。"""
|
|
139
|
+
return list(TOKEN_BY_TYPE.keys())
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def is_known(token: str) -> bool:
|
|
143
|
+
"""检查 token 是否在词典中。"""
|
|
144
|
+
return token in TOKEN_MAP
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# ── 自测 ──────────────────────────────────────────────────────
|
|
148
|
+
|
|
149
|
+
if __name__ == "__main__":
|
|
150
|
+
passed = 0
|
|
151
|
+
failed = 0
|
|
152
|
+
|
|
153
|
+
def check(condition, msg):
|
|
154
|
+
global passed, failed
|
|
155
|
+
if condition:
|
|
156
|
+
passed += 1
|
|
157
|
+
print(f" PASS {msg}")
|
|
158
|
+
else:
|
|
159
|
+
failed += 1
|
|
160
|
+
print(f" FAIL {msg}")
|
|
161
|
+
|
|
162
|
+
print("=== Token 词典自测 ===\n")
|
|
163
|
+
|
|
164
|
+
# ── 1. 正向查找:正常词 ──
|
|
165
|
+
print("1. 正向查找(正常词)")
|
|
166
|
+
check(lookup("热") == "温度", "'热' → 温度")
|
|
167
|
+
check(lookup("少冰") == "温度", "'少冰' → 温度")
|
|
168
|
+
check(lookup("七分糖") == "糖度", "'七分糖' → 糖度")
|
|
169
|
+
check(lookup("无糖") == "糖度", "'无糖' → 糖度")
|
|
170
|
+
check(lookup("牛奶") == "奶底", "'牛奶' → 奶底")
|
|
171
|
+
check(lookup("椰乳") == "奶底", "'椰乳' → 奶底")
|
|
172
|
+
check(lookup("中杯") == "规格", "'中杯' → 规格")
|
|
173
|
+
check(lookup("五角瓶") == "规格", "'五角瓶' → 规格")
|
|
174
|
+
check(lookup("红茶") == "茶底", "'红茶' → 茶底")
|
|
175
|
+
check(lookup("乌龙茶") == "茶底", "'乌龙茶' → 茶底")
|
|
176
|
+
print()
|
|
177
|
+
|
|
178
|
+
# ── 2. 正向查找:未知词 ──
|
|
179
|
+
print("2. 正向查找(未知词 → UNKNOWN_TOKEN)")
|
|
180
|
+
check(lookup("珍珠") == UNKNOWN_TOKEN, "'珍珠' → UNKNOWN_TOKEN")
|
|
181
|
+
check(lookup("布丁") == UNKNOWN_TOKEN, "'布丁' → UNKNOWN_TOKEN")
|
|
182
|
+
check(lookup("") == UNKNOWN_TOKEN, "空字符串 → UNKNOWN_TOKEN")
|
|
183
|
+
print()
|
|
184
|
+
|
|
185
|
+
# ── 3. 反向查找 ──
|
|
186
|
+
print("3. 反向查找(类型 → 词列表)")
|
|
187
|
+
temp_tokens = get_tokens_by_type("温度")
|
|
188
|
+
check(len(temp_tokens) == 6, f"温度 包含 6 个词(实际 {len(temp_tokens)})")
|
|
189
|
+
check("正常冰" in temp_tokens, "温度 包含 '正常冰'")
|
|
190
|
+
|
|
191
|
+
sugar_tokens = get_tokens_by_type("糖度")
|
|
192
|
+
check(len(sugar_tokens) == 8, f"糖度 包含 8 个词(实际 {len(sugar_tokens)})")
|
|
193
|
+
check("标准糖" in sugar_tokens, "糖度 包含 '标准糖'")
|
|
194
|
+
|
|
195
|
+
milk_tokens = get_tokens_by_type("奶底")
|
|
196
|
+
check(len(milk_tokens) == 4, f"奶底 包含 4 个词(实际 {len(milk_tokens)})")
|
|
197
|
+
|
|
198
|
+
size_tokens = get_tokens_by_type("规格")
|
|
199
|
+
check(len(size_tokens) == 4, f"规格 包含 4 个词(实际 {len(size_tokens)})")
|
|
200
|
+
|
|
201
|
+
tea_tokens = get_tokens_by_type("茶底")
|
|
202
|
+
check(len(tea_tokens) == 6, f"茶底 包含 6 个词(实际 {len(tea_tokens)})")
|
|
203
|
+
|
|
204
|
+
unknown_tokens = get_tokens_by_type("不存在的类型")
|
|
205
|
+
check(unknown_tokens == [], "不存在的类型 → 空列表")
|
|
206
|
+
print()
|
|
207
|
+
|
|
208
|
+
# ── 4. is_known 辅助 ──
|
|
209
|
+
print("4. is_known 辅助函数")
|
|
210
|
+
check(is_known("去冰") is True, "'去冰' is known")
|
|
211
|
+
check(is_known("珍珠") is False, "'珍珠' is NOT known")
|
|
212
|
+
print()
|
|
213
|
+
|
|
214
|
+
# ── 5. list_types ──
|
|
215
|
+
print("5. list_types")
|
|
216
|
+
types = list_types()
|
|
217
|
+
check(len(types) == 5, f"共 5 种类型(实际 {len(types)})")
|
|
218
|
+
check("温度" in types and "茶底" in types, "包含 '温度' 和 '茶底'")
|
|
219
|
+
print()
|
|
220
|
+
|
|
221
|
+
# ── 6. normalize_token: Step 1 精确匹配 ──
|
|
222
|
+
print("6. normalize_token(Step 1: 精确匹配 → 返回原值)")
|
|
223
|
+
check(normalize_token("正常冰") == "正常冰", "clean '正常冰' → '正常冰'")
|
|
224
|
+
check(normalize_token("七分糖") == "七分糖", "clean '七分糖' → '七分糖'")
|
|
225
|
+
check(normalize_token("大杯") == "大杯", "clean '大杯' → '大杯'")
|
|
226
|
+
check(normalize_token("燕麦奶") == "燕麦奶", "clean '燕麦奶' → '燕麦奶'")
|
|
227
|
+
print()
|
|
228
|
+
|
|
229
|
+
# ── 7. normalize_token: Step 2 子串边界匹配 ──
|
|
230
|
+
print("7. normalize_token(Step 2: 子串边界匹配,去后缀)")
|
|
231
|
+
check(normalize_token("正常冰|推荐") == "正常冰", "'正常冰|推荐' → '正常冰'")
|
|
232
|
+
check(normalize_token("七分糖|推荐") == "七分糖", "'七分糖|推荐' → '七分糖'")
|
|
233
|
+
check(normalize_token("大杯/新") == "大杯", "'大杯/新' → '大杯'")
|
|
234
|
+
check(normalize_token("标准糖|推荐") == "标准糖", "'标准糖|推荐' → '标准糖'")
|
|
235
|
+
check(normalize_token("去冰|推荐") == "去冰", "'去冰|推荐' → '去冰'")
|
|
236
|
+
# 空格边界
|
|
237
|
+
check(normalize_token("少冰 推荐") == "少冰", "'少冰 推荐' → '少冰'")
|
|
238
|
+
print()
|
|
239
|
+
|
|
240
|
+
# ── 8. normalize_token: Step 2 防误匹配 ──
|
|
241
|
+
print("8. normalize_token(Step 2: 防误匹配 — 禁止任意位置子串)")
|
|
242
|
+
check(normalize_token("正常冰沙") == "正常冰沙", "'正常冰沙' ≠ '冰'(左边界不是分隔符)")
|
|
243
|
+
check(normalize_token("冰沙") == "冰沙", "'冰沙' 本身在词典中 → 返回原值(Step 1)")
|
|
244
|
+
# 多词同存取最长
|
|
245
|
+
check(normalize_token("五黄标准茶|推荐") == "五黄标准茶",
|
|
246
|
+
"'五黄标准茶|推荐' → '五黄标准茶'(3字,优先于'茶'1字)")
|
|
247
|
+
print()
|
|
248
|
+
|
|
249
|
+
# ── 9. normalize_token: Step 3 分隔符切割 ──
|
|
250
|
+
print("9. normalize_token(Step 3: 分隔符切割后首 token 匹配)")
|
|
251
|
+
check(normalize_token("全糖/新/尝鲜") == "全糖", "'全糖/新/尝鲜' → '全糖'")
|
|
252
|
+
check(normalize_token("牛奶|推荐|热销") == "牛奶", "'牛奶|推荐|热销' → '牛奶'")
|
|
253
|
+
# 空格切割
|
|
254
|
+
check(normalize_token("大杯 热门") == "大杯", "'大杯 热门' → '大杯'")
|
|
255
|
+
print()
|
|
256
|
+
|
|
257
|
+
# ── 10. normalize_token: Step 4 无法匹配 ──
|
|
258
|
+
print("10. normalize_token(Step 4: 无法匹配 → 返回原值)")
|
|
259
|
+
check(normalize_token("珍珠奶茶") == "珍珠奶茶", "未知值 '珍珠奶茶' 原样返回")
|
|
260
|
+
check(normalize_token("") == "", "空字符串 → ''")
|
|
261
|
+
check(normalize_token(" 全糖 ") == "全糖", "' 全糖 ' → '全糖'(去空白后 Step 1)")
|
|
262
|
+
print()
|
|
263
|
+
|
|
264
|
+
# ── 11. KNOWN_SUFFIXES 文档验证 ──
|
|
265
|
+
print("11. KNOWN_SUFFIXES 文档")
|
|
266
|
+
check(len(KNOWN_SUFFIXES) == 2, f"记录了 2 个已知后缀模式: {KNOWN_SUFFIXES}")
|
|
267
|
+
check("|推荐" in KNOWN_SUFFIXES, "包含 '|推荐'")
|
|
268
|
+
check("/新" in KNOWN_SUFFIXES, "包含 '/新'")
|
|
269
|
+
print()
|
|
270
|
+
|
|
271
|
+
# ── 汇总 ──
|
|
272
|
+
total_types = len(TOKEN_BY_TYPE)
|
|
273
|
+
total_tokens = len(TOKEN_MAP)
|
|
274
|
+
print(f"=== 结果: {passed} passed, {failed} failed ===")
|
|
275
|
+
print(f"=== Token 类型: {total_types} 种, Token 总数: {total_tokens} 个 ===")
|
|
File without changes
|