@aster-cloud/aster-lang-ts 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/dist/src/ast/ast.d.ts +2 -2
- package/dist/src/ast/ast.d.ts.map +1 -1
- package/dist/src/ast/ast.js +5 -2
- package/dist/src/ast/ast.js.map +1 -1
- package/dist/src/browser.d.ts +16 -1
- package/dist/src/browser.d.ts.map +1 -1
- package/dist/src/browser.js +10 -2
- package/dist/src/browser.js.map +1 -1
- package/dist/src/config/effect_config.d.ts.map +1 -1
- package/dist/src/config/effect_config.js +7 -1
- package/dist/src/config/effect_config.js.map +1 -1
- package/dist/src/config/lexicons/de-DE.d.ts.map +1 -1
- package/dist/src/config/lexicons/de-DE.js +1 -0
- package/dist/src/config/lexicons/de-DE.js.map +1 -1
- package/dist/src/config/lexicons/en-US.d.ts.map +1 -1
- package/dist/src/config/lexicons/en-US.js +1 -0
- package/dist/src/config/lexicons/en-US.js.map +1 -1
- package/dist/src/config/lexicons/identifiers/registry.d.ts +9 -0
- package/dist/src/config/lexicons/identifiers/registry.d.ts.map +1 -1
- package/dist/src/config/lexicons/identifiers/registry.js +12 -0
- package/dist/src/config/lexicons/identifiers/registry.js.map +1 -1
- package/dist/src/config/lexicons/template.d.ts.map +1 -1
- package/dist/src/config/lexicons/template.js +2 -0
- package/dist/src/config/lexicons/template.js.map +1 -1
- package/dist/src/config/lexicons/zh-CN.d.ts.map +1 -1
- package/dist/src/config/lexicons/zh-CN.js +36 -20
- package/dist/src/config/lexicons/zh-CN.js.map +1 -1
- package/dist/src/config/semantic.d.ts +1 -0
- package/dist/src/config/semantic.d.ts.map +1 -1
- package/dist/src/config/semantic.js +1 -0
- package/dist/src/config/semantic.js.map +1 -1
- package/dist/src/config/token-kind.d.ts +2 -0
- package/dist/src/config/token-kind.d.ts.map +1 -1
- package/dist/src/config/token-kind.js +3 -1
- package/dist/src/config/token-kind.js.map +1 -1
- package/dist/src/core/core_ir.d.ts +2 -2
- package/dist/src/core/core_ir.d.ts.map +1 -1
- package/dist/src/core/core_ir.js +5 -2
- package/dist/src/core/core_ir.js.map +1 -1
- package/dist/src/core/interpreter.js +4 -1
- package/dist/src/core/interpreter.js.map +1 -1
- package/dist/src/diagnostics/error_codes.d.ts +2 -0
- package/dist/src/diagnostics/error_codes.d.ts.map +1 -1
- package/dist/src/diagnostics/error_codes.js +11 -5
- package/dist/src/diagnostics/error_codes.js.map +1 -1
- package/dist/src/effects/effect_inference_browser.d.ts.map +1 -1
- package/dist/src/effects/effect_inference_browser.js +22 -4
- package/dist/src/effects/effect_inference_browser.js.map +1 -1
- package/dist/src/frontend/canonicalizer.d.ts +33 -0
- package/dist/src/frontend/canonicalizer.d.ts.map +1 -1
- package/dist/src/frontend/canonicalizer.js +77 -5
- package/dist/src/frontend/canonicalizer.js.map +1 -1
- package/dist/src/lower_to_core.js +2 -2
- package/dist/src/lower_to_core.js.map +1 -1
- package/dist/src/lsp/config.d.ts +13 -9
- package/dist/src/lsp/config.d.ts.map +1 -1
- package/dist/src/lsp/config.js +19 -5
- package/dist/src/lsp/config.js.map +1 -1
- package/dist/src/lsp/server.js +19 -7
- package/dist/src/lsp/server.js.map +1 -1
- package/dist/src/parser/context.d.ts.map +1 -1
- package/dist/src/parser/context.js +32 -12
- package/dist/src/parser/context.js.map +1 -1
- package/dist/src/parser/decl-parser.d.ts +2 -2
- package/dist/src/parser/decl-parser.d.ts.map +1 -1
- package/dist/src/parser/decl-parser.js +101 -13
- package/dist/src/parser/decl-parser.js.map +1 -1
- package/dist/src/parser/expr-stmt-parser.d.ts.map +1 -1
- package/dist/src/parser/expr-stmt-parser.js +31 -1
- package/dist/src/parser/expr-stmt-parser.js.map +1 -1
- package/dist/src/parser/import-parser.d.ts +3 -2
- package/dist/src/parser/import-parser.d.ts.map +1 -1
- package/dist/src/parser/import-parser.js +12 -3
- package/dist/src/parser/import-parser.js.map +1 -1
- package/dist/src/typecheck/alias.d.ts +24 -0
- package/dist/src/typecheck/alias.d.ts.map +1 -0
- package/dist/src/typecheck/alias.js +30 -0
- package/dist/src/typecheck/alias.js.map +1 -0
- package/dist/src/typecheck/browser.d.ts +41 -4
- package/dist/src/typecheck/browser.d.ts.map +1 -1
- package/dist/src/typecheck/browser.js +112 -16
- package/dist/src/typecheck/browser.js.map +1 -1
- package/dist/src/typecheck/context.js +1 -1
- package/dist/src/typecheck/context.js.map +1 -1
- package/dist/src/typecheck/effects.js +1 -1
- package/dist/src/typecheck/effects.js.map +1 -1
- package/dist/src/typecheck/expression.js +1 -1
- package/dist/src/typecheck/expression.js.map +1 -1
- package/dist/src/typecheck/generics.js +1 -1
- package/dist/src/typecheck/generics.js.map +1 -1
- package/dist/src/typecheck/module.d.ts.map +1 -1
- package/dist/src/typecheck/module.js +13 -8
- package/dist/src/typecheck/module.js.map +1 -1
- package/dist/src/typecheck/pattern.js +1 -1
- package/dist/src/typecheck/pattern.js.map +1 -1
- package/dist/src/typecheck/pure.d.ts +66 -0
- package/dist/src/typecheck/pure.d.ts.map +1 -0
- package/dist/src/typecheck/pure.js +163 -0
- package/dist/src/typecheck/pure.js.map +1 -0
- package/dist/src/typecheck/statement.js +1 -1
- package/dist/src/typecheck/statement.js.map +1 -1
- package/dist/src/typecheck/type_system.d.ts +6 -1
- package/dist/src/typecheck/type_system.d.ts.map +1 -1
- package/dist/src/typecheck/type_system.js +61 -13
- package/dist/src/typecheck/type_system.js.map +1 -1
- package/dist/src/typecheck/utils.d.ts +4 -69
- package/dist/src/typecheck/utils.d.ts.map +1 -1
- package/dist/src/typecheck/utils.js +17 -170
- package/dist/src/typecheck/utils.js.map +1 -1
- package/dist/src/typecheck/workflow.js +1 -1
- package/dist/src/typecheck/workflow.js.map +1 -1
- package/dist/src/typecheck-pii.d.ts.map +1 -1
- package/dist/src/typecheck-pii.js +11 -1
- package/dist/src/typecheck-pii.js.map +1 -1
- package/dist/src/types/base.d.ts +1 -0
- package/dist/src/types/base.d.ts.map +1 -1
- package/dist/src/types.d.ts +17 -0
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/types.js.map +1 -1
- package/dist/test/type-checker/pii-propagation.test.js +24 -18
- package/dist/test/type-checker/pii-propagation.test.js.map +1 -1
- package/dist/test/unit/browser/typecheck-browser-pii-failure.test.d.ts +2 -0
- package/dist/test/unit/browser/typecheck-browser-pii-failure.test.d.ts.map +1 -0
- package/dist/test/unit/browser/typecheck-browser-pii-failure.test.js +193 -0
- package/dist/test/unit/browser/typecheck-browser-pii-failure.test.js.map +1 -0
- package/dist/test/unit/browser/typecheck-browser-unsupported.test.js +21 -11
- package/dist/test/unit/browser/typecheck-browser-unsupported.test.js.map +1 -1
- package/dist/test/unit/canonicalizer/canonicalizer.test.js +91 -1
- package/dist/test/unit/canonicalizer/canonicalizer.test.js.map +1 -1
- package/dist/test/unit/canonicalizer/cjk-punctuation.test.d.ts +2 -0
- package/dist/test/unit/canonicalizer/cjk-punctuation.test.d.ts.map +1 -0
- package/dist/test/unit/canonicalizer/cjk-punctuation.test.js +118 -0
- package/dist/test/unit/canonicalizer/cjk-punctuation.test.js.map +1 -0
- package/dist/test/unit/canonicalizer/conformance-cjk-v2.test.d.ts +2 -0
- package/dist/test/unit/canonicalizer/conformance-cjk-v2.test.d.ts.map +1 -0
- package/dist/test/unit/canonicalizer/conformance-cjk-v2.test.js +40 -0
- package/dist/test/unit/canonicalizer/conformance-cjk-v2.test.js.map +1 -0
- package/dist/test/unit/config/lexicons/i18n.test.js +11 -3
- package/dist/test/unit/config/lexicons/i18n.test.js.map +1 -1
- package/dist/test/unit/config/lexicons/zh-CN.test.js +129 -132
- package/dist/test/unit/config/lexicons/zh-CN.test.js.map +1 -1
- package/dist/test/unit/keyword-translator.test.js +16 -19
- package/dist/test/unit/keyword-translator.test.js.map +1 -1
- package/dist/test/unit/lowering/lowering.test.js +12 -0
- package/dist/test/unit/lowering/lowering.test.js.map +1 -1
- package/dist/test/unit/parser/entry-annotations.test.d.ts +2 -0
- package/dist/test/unit/parser/entry-annotations.test.d.ts.map +1 -0
- package/dist/test/unit/parser/entry-annotations.test.js +110 -0
- package/dist/test/unit/parser/entry-annotations.test.js.map +1 -0
- package/dist/test/unit/parser/parser.test.js +30 -6
- package/dist/test/unit/parser/parser.test.js.map +1 -1
- package/dist/test/unit/scripts/verify-browser-entry.test.d.ts +2 -0
- package/dist/test/unit/scripts/verify-browser-entry.test.d.ts.map +1 -0
- package/dist/test/unit/scripts/verify-browser-entry.test.js +179 -0
- package/dist/test/unit/scripts/verify-browser-entry.test.js.map +1 -0
- package/dist/test/unit/typecheck/function-return-inference.test.d.ts +2 -0
- package/dist/test/unit/typecheck/function-return-inference.test.d.ts.map +1 -0
- package/dist/test/unit/typecheck/function-return-inference.test.js +79 -0
- package/dist/test/unit/typecheck/function-return-inference.test.js.map +1 -0
- package/dist/test/unit/typecheck/pii-cross-runtime-conformance.test.d.ts +2 -0
- package/dist/test/unit/typecheck/pii-cross-runtime-conformance.test.d.ts.map +1 -0
- package/dist/test/unit/typecheck/pii-cross-runtime-conformance.test.js +191 -0
- package/dist/test/unit/typecheck/pii-cross-runtime-conformance.test.js.map +1 -0
- package/dist/test/unit/typecheck/should-enforce-pii.test.js +25 -110
- package/dist/test/unit/typecheck/should-enforce-pii.test.js.map +1 -1
- package/dist/test/unit/typecheck/type-system.test.js +83 -3
- package/dist/test/unit/typecheck/type-system.test.js.map +1 -1
- package/package.json +38 -9
|
@@ -62,8 +62,8 @@ describe('ZH_CN Lexicon 测试套件', () => {
|
|
|
62
62
|
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.IF], '如果');
|
|
63
63
|
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.OTHERWISE], '否则');
|
|
64
64
|
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.RETURN], '返回');
|
|
65
|
-
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.MATCH], '
|
|
66
|
-
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.WHEN], '
|
|
65
|
+
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.MATCH], '匹配于');
|
|
66
|
+
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.WHEN], '当');
|
|
67
67
|
});
|
|
68
68
|
it('应正确映射类型定义关键字', () => {
|
|
69
69
|
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.TYPE_DEF], '定义');
|
|
@@ -72,13 +72,13 @@ describe('ZH_CN Lexicon 测试套件', () => {
|
|
|
72
72
|
});
|
|
73
73
|
it('应正确映射变量操作关键字', () => {
|
|
74
74
|
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.LET], '令');
|
|
75
|
-
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.BE], '
|
|
75
|
+
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.BE], '定义为');
|
|
76
76
|
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.SET], '将');
|
|
77
77
|
});
|
|
78
78
|
it('应正确映射布尔和null字面量', () => {
|
|
79
|
-
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.TRUE], '
|
|
80
|
-
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.FALSE], '
|
|
81
|
-
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.NULL], '
|
|
79
|
+
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.TRUE], '真值');
|
|
80
|
+
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.FALSE], '假值');
|
|
81
|
+
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.NULL], '空值');
|
|
82
82
|
});
|
|
83
83
|
it('应正确映射基础类型', () => {
|
|
84
84
|
assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.TEXT], '文本');
|
|
@@ -131,33 +131,36 @@ describe('ZH_CN Lexicon 测试套件', () => {
|
|
|
131
131
|
assert.ok(letBe.contextualKeywords.includes(SemanticTokenKind.BE));
|
|
132
132
|
assert.strictEqual(letBe.closer, 'NEWLINE');
|
|
133
133
|
});
|
|
134
|
-
it('应正确解析包含
|
|
134
|
+
it('应正确解析包含 匹配于...当 和 为以下之一 的程序(v2 关键字)', () => {
|
|
135
135
|
const source = `模块 测试。
|
|
136
136
|
|
|
137
137
|
定义 状态 为以下之一 成功、失败。
|
|
138
138
|
|
|
139
139
|
规则 检查 包含 状态,产出 文本:
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
140
|
+
匹配于 状态:
|
|
141
|
+
当 成功,返回 「成功」。
|
|
142
|
+
当 失败,返回 「失败」。`;
|
|
143
143
|
const can = canonicalize(source, ZH_CN);
|
|
144
144
|
const tokens = lex(can, ZH_CN);
|
|
145
145
|
// 验证 "为以下之一" 作为单独 token
|
|
146
146
|
const oneOfToken = findIdent(tokens, '为以下之一');
|
|
147
147
|
assert.ok(oneOfToken, '应识别 "为以下之一" 为单独 token');
|
|
148
|
-
// 验证 "
|
|
149
|
-
const whenCount = countIdent(tokens, '
|
|
150
|
-
assert.strictEqual(whenCount, 2, '应有 2 个 "
|
|
151
|
-
// 验证 "
|
|
152
|
-
const matchCount = countIdent(tokens, '
|
|
153
|
-
assert.strictEqual(matchCount, 1, '应有 1 个 "
|
|
148
|
+
// 验证 "当" 作为 WHEN 关键词(多次出现)
|
|
149
|
+
const whenCount = countIdent(tokens, '当');
|
|
150
|
+
assert.strictEqual(whenCount, 2, '应有 2 个 "当" token');
|
|
151
|
+
// 验证 "匹配于" 作为 MATCH 关键词
|
|
152
|
+
const matchCount = countIdent(tokens, '匹配于');
|
|
153
|
+
assert.strictEqual(matchCount, 1, '应有 1 个 "匹配于" token');
|
|
154
154
|
});
|
|
155
155
|
});
|
|
156
156
|
describe('Canonicalizer 中文支持', () => {
|
|
157
|
-
it('
|
|
158
|
-
|
|
157
|
+
it('应将中文标点归一化为英文等价(v2 软边界)', () => {
|
|
158
|
+
// v2 行为:「。」→「.」、「:」→「:」、「,」「;」「、」→ 空格
|
|
159
|
+
// 仅字符串外生效(见 normalizeCJKPunctuation + ADR-0008)
|
|
160
|
+
const input = '令 变量 定义为 42。';
|
|
159
161
|
const result = canonicalize(input, ZH_CN);
|
|
160
|
-
assert.
|
|
162
|
+
assert.strictEqual(result.includes('。'), false, '字符串外的「。」应被归一化为「.」');
|
|
163
|
+
assert.ok(result.includes('.'), 'canonical 结果应以英文句号结尾');
|
|
161
164
|
});
|
|
162
165
|
it('应全角数字转半角', () => {
|
|
163
166
|
const input = '123';
|
|
@@ -192,16 +195,18 @@ describe('ZH_CN Lexicon 测试套件', () => {
|
|
|
192
195
|
assert.ok(!result.includes('['), '全角左方括号应被移除');
|
|
193
196
|
assert.ok(!result.includes(']'), '全角右方括号应被移除');
|
|
194
197
|
});
|
|
195
|
-
it('
|
|
198
|
+
it('应移除英文冠词但中文不受冠词规则影响', () => {
|
|
196
199
|
// 英文模式移除冠词
|
|
197
200
|
const enInput = 'define the User has a name.';
|
|
198
201
|
const enResult = canonicalize(enInput, EN_US);
|
|
199
202
|
assert.ok(!enResult.includes(' the '));
|
|
200
203
|
assert.ok(!enResult.includes(' a '));
|
|
201
|
-
//
|
|
204
|
+
// 中文模式不应用冠词规则;标识符内容应保留
|
|
205
|
+
// 注意:v2 行为下中文句号会被归一化为英文句号(见 normalizeCJKPunctuation)
|
|
202
206
|
const zhInput = '定义 用户 包含 名字。';
|
|
203
207
|
const zhResult = canonicalize(zhInput, ZH_CN);
|
|
204
|
-
assert.
|
|
208
|
+
assert.ok(zhResult.includes('定义 用户 包含 名字'), '中文标识符应保留');
|
|
209
|
+
assert.ok(zhResult.endsWith('.') || zhResult.endsWith('.\n'), '语句应以归一化的英文句号结尾');
|
|
205
210
|
});
|
|
206
211
|
it('应将智能引号转换为直角引号', () => {
|
|
207
212
|
// 左右智能引号 → 直角引号
|
|
@@ -234,21 +239,21 @@ describe('ZH_CN Lexicon 测试套件', () => {
|
|
|
234
239
|
assert.strictEqual(nonEofTokens[0].value, '变量名');
|
|
235
240
|
});
|
|
236
241
|
it('应识别中文布尔值', () => {
|
|
237
|
-
const tokens = lex('
|
|
242
|
+
const tokens = lex('真值', ZH_CN);
|
|
238
243
|
const nonEofTokens = tokens.filter((t) => t.kind !== TokenKind.EOF);
|
|
239
244
|
assert.strictEqual(nonEofTokens.length, 1);
|
|
240
245
|
assert.strictEqual(nonEofTokens[0].kind, TokenKind.BOOL);
|
|
241
246
|
assert.strictEqual(nonEofTokens[0].value, true);
|
|
242
247
|
});
|
|
243
248
|
it('应识别中文 false', () => {
|
|
244
|
-
const tokens = lex('
|
|
249
|
+
const tokens = lex('假值', ZH_CN);
|
|
245
250
|
const nonEofTokens = tokens.filter((t) => t.kind !== TokenKind.EOF);
|
|
246
251
|
assert.strictEqual(nonEofTokens.length, 1);
|
|
247
252
|
assert.strictEqual(nonEofTokens[0].kind, TokenKind.BOOL);
|
|
248
253
|
assert.strictEqual(nonEofTokens[0].value, false);
|
|
249
254
|
});
|
|
250
255
|
it('应识别中文 null', () => {
|
|
251
|
-
const tokens = lex('
|
|
256
|
+
const tokens = lex('空值', ZH_CN);
|
|
252
257
|
const nonEofTokens = tokens.filter((t) => t.kind !== TokenKind.EOF);
|
|
253
258
|
assert.strictEqual(nonEofTokens.length, 1);
|
|
254
259
|
assert.strictEqual(nonEofTokens[0].kind, TokenKind.NULL);
|
|
@@ -330,13 +335,13 @@ describe('ZH_CN Lexicon 测试套件', () => {
|
|
|
330
335
|
assert.ok(findIdent(tokens, '否则'), '应识别「否则」标识符');
|
|
331
336
|
assert.strictEqual(countIdent(tokens, '返回'), 2, '应有两个「返回」标识符');
|
|
332
337
|
});
|
|
333
|
-
it('应正确词法分析
|
|
334
|
-
const input = '
|
|
338
|
+
it('应正确词法分析 匹配于/当 模式匹配(v2 关键字)', () => {
|
|
339
|
+
const input = '匹配于 用户:\n 当 空值,返回 「访客」。\n 当 用户(编号, 名字),返回 名字。';
|
|
335
340
|
const result = canonicalize(input, ZH_CN);
|
|
336
341
|
const tokens = lex(result, ZH_CN);
|
|
337
342
|
// 验证模式匹配关键词
|
|
338
|
-
assert.ok(findIdent(tokens, '
|
|
339
|
-
assert.strictEqual(countIdent(tokens, '
|
|
343
|
+
assert.ok(findIdent(tokens, '匹配于'), '应识别「匹配于」标识符');
|
|
344
|
+
assert.strictEqual(countIdent(tokens, '当'), 2, '应有两个「当」标识符');
|
|
340
345
|
});
|
|
341
346
|
it('应正确词法分析嵌套条件', () => {
|
|
342
347
|
const input = '如果 甲:\n 如果 乙:\n 返回 「甲乙」。\n 否则:\n 返回 「仅甲」。\n否则:\n 返回 「无」。';
|
|
@@ -384,66 +389,66 @@ describe('ZH_CN Lexicon 测试套件', () => {
|
|
|
384
389
|
assert.strictEqual(countTokenKind(tokens, TokenKind.DOT), 2, '应有2个句号');
|
|
385
390
|
});
|
|
386
391
|
});
|
|
387
|
-
describe('
|
|
388
|
-
it('
|
|
389
|
-
const input = '甲
|
|
392
|
+
describe('中文布尔运算语法(v2 关键字)', () => {
|
|
393
|
+
it('应正确词法分析「或者」运算', () => {
|
|
394
|
+
const input = '甲 或者 乙。';
|
|
390
395
|
const result = canonicalize(input, ZH_CN);
|
|
391
396
|
const tokens = lex(result, ZH_CN);
|
|
392
|
-
assert.ok(findIdent(tokens, '
|
|
397
|
+
assert.ok(findIdent(tokens, '或者'), '应识别「或者」标识符');
|
|
393
398
|
// Token 结构验证
|
|
394
399
|
assert.strictEqual(countTokenKind(tokens, TokenKind.IDENT), 3, '应有3个标识符');
|
|
395
400
|
assert.strictEqual(countTokenKind(tokens, TokenKind.DOT), 1, '应有1个句号');
|
|
396
401
|
});
|
|
397
|
-
it('
|
|
398
|
-
const input = '甲
|
|
402
|
+
it('应正确词法分析「并且」运算', () => {
|
|
403
|
+
const input = '甲 并且 乙。';
|
|
399
404
|
const result = canonicalize(input, ZH_CN);
|
|
400
405
|
const tokens = lex(result, ZH_CN);
|
|
401
|
-
assert.ok(findIdent(tokens, '
|
|
406
|
+
assert.ok(findIdent(tokens, '并且'), '应识别「并且」标识符');
|
|
402
407
|
// Token 结构验证
|
|
403
408
|
assert.strictEqual(countTokenKind(tokens, TokenKind.IDENT), 3, '应有3个标识符');
|
|
404
409
|
});
|
|
405
|
-
it('
|
|
406
|
-
const input = '
|
|
410
|
+
it('应正确词法分析「不是」运算', () => {
|
|
411
|
+
const input = '不是 甲。';
|
|
407
412
|
const result = canonicalize(input, ZH_CN);
|
|
408
413
|
const tokens = lex(result, ZH_CN);
|
|
409
|
-
assert.ok(findIdent(tokens, '
|
|
414
|
+
assert.ok(findIdent(tokens, '不是'), '应识别「不是」标识符');
|
|
410
415
|
// Token 结构验证
|
|
411
416
|
assert.strictEqual(countTokenKind(tokens, TokenKind.IDENT), 2, '应有2个标识符');
|
|
412
417
|
});
|
|
413
418
|
it('应正确词法分析复合布尔表达式', () => {
|
|
414
|
-
const input = '(甲
|
|
419
|
+
const input = '(甲 并且 乙) 或者 (不是 丙)。';
|
|
415
420
|
const result = canonicalize(input, ZH_CN);
|
|
416
421
|
const tokens = lex(result, ZH_CN);
|
|
417
|
-
assert.strictEqual(countIdent(tokens, '
|
|
418
|
-
assert.strictEqual(countIdent(tokens, '
|
|
419
|
-
assert.strictEqual(countIdent(tokens, '
|
|
422
|
+
assert.strictEqual(countIdent(tokens, '并且'), 1, '应有1个「并且」');
|
|
423
|
+
assert.strictEqual(countIdent(tokens, '或者'), 1, '应有1个「或者」');
|
|
424
|
+
assert.strictEqual(countIdent(tokens, '不是'), 1, '应有1个「不是」');
|
|
420
425
|
// Token 结构验证:括号
|
|
421
426
|
assert.strictEqual(countTokenKind(tokens, TokenKind.LPAREN), 2, '应有2个左括号');
|
|
422
427
|
assert.strictEqual(countTokenKind(tokens, TokenKind.RPAREN), 2, '应有2个右括号');
|
|
423
428
|
});
|
|
424
429
|
});
|
|
425
|
-
describe('
|
|
426
|
-
it('
|
|
427
|
-
const input = '1
|
|
430
|
+
describe('中文算术运算语法(v2 关键字)', () => {
|
|
431
|
+
it('应正确词法分析「加上」运算', () => {
|
|
432
|
+
const input = '1 加上 2。';
|
|
428
433
|
const result = canonicalize(input, ZH_CN);
|
|
429
434
|
const tokens = lex(result, ZH_CN);
|
|
430
|
-
assert.ok(findIdent(tokens, '
|
|
435
|
+
assert.ok(findIdent(tokens, '加上'), '应识别「加上」标识符');
|
|
431
436
|
// Token 结构验证:数字
|
|
432
437
|
assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 2, '应有2个整数');
|
|
433
438
|
});
|
|
434
|
-
it('
|
|
435
|
-
const input = '5
|
|
439
|
+
it('应正确词法分析「减去」运算', () => {
|
|
440
|
+
const input = '5 减去 3。';
|
|
436
441
|
const result = canonicalize(input, ZH_CN);
|
|
437
442
|
const tokens = lex(result, ZH_CN);
|
|
438
|
-
assert.ok(findIdent(tokens, '
|
|
443
|
+
assert.ok(findIdent(tokens, '减去'), '应识别「减去」标识符');
|
|
439
444
|
// Token 结构验证
|
|
440
445
|
assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 2, '应有2个整数');
|
|
441
446
|
});
|
|
442
|
-
it('
|
|
443
|
-
const input = '4
|
|
447
|
+
it('应正确词法分析「乘以」运算', () => {
|
|
448
|
+
const input = '4 乘以 2。';
|
|
444
449
|
const result = canonicalize(input, ZH_CN);
|
|
445
450
|
const tokens = lex(result, ZH_CN);
|
|
446
|
-
assert.ok(findIdent(tokens, '
|
|
451
|
+
assert.ok(findIdent(tokens, '乘以'), '应识别「乘以」标识符');
|
|
447
452
|
// Token 结构验证
|
|
448
453
|
assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 2, '应有2个整数');
|
|
449
454
|
});
|
|
@@ -456,73 +461,60 @@ describe('ZH_CN Lexicon 测试套件', () => {
|
|
|
456
461
|
assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 2, '应有2个整数');
|
|
457
462
|
});
|
|
458
463
|
it('应正确词法分析复合算术表达式', () => {
|
|
459
|
-
const input = '(1
|
|
464
|
+
const input = '(1 加上 2) 乘以 (10 除以 5)。';
|
|
460
465
|
const result = canonicalize(input, ZH_CN);
|
|
461
466
|
const tokens = lex(result, ZH_CN);
|
|
462
|
-
assert.strictEqual(countIdent(tokens, '
|
|
463
|
-
assert.strictEqual(countIdent(tokens, '
|
|
467
|
+
assert.strictEqual(countIdent(tokens, '加上'), 1, '应有1个「加上」');
|
|
468
|
+
assert.strictEqual(countIdent(tokens, '乘以'), 1, '应有1个「乘以」');
|
|
464
469
|
assert.strictEqual(countIdent(tokens, '除以'), 1, '应有1个「除以」');
|
|
465
470
|
// Token 结构验证
|
|
466
471
|
assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 4, '应有4个整数');
|
|
467
472
|
assert.strictEqual(countTokenKind(tokens, TokenKind.LPAREN), 2, '应有2个左括号');
|
|
468
473
|
assert.strictEqual(countTokenKind(tokens, TokenKind.RPAREN), 2, '应有2个右括号');
|
|
469
474
|
});
|
|
470
|
-
// Codex Round 3 建议:新增 FLOAT 和 LONG 测试用例
|
|
471
475
|
it('应正确词法分析浮点数运算', () => {
|
|
472
|
-
const input = '1.5
|
|
476
|
+
const input = '1.5 加上 2.5。';
|
|
473
477
|
const result = canonicalize(input, ZH_CN);
|
|
474
478
|
const tokens = lex(result, ZH_CN);
|
|
475
|
-
assert.ok(findIdent(tokens, '
|
|
476
|
-
// Token 结构验证:浮点数
|
|
479
|
+
assert.ok(findIdent(tokens, '加上'), '应识别「加上」标识符');
|
|
477
480
|
assert.strictEqual(countTokenKind(tokens, TokenKind.FLOAT), 2, '应有2个浮点数');
|
|
478
|
-
// 验证无整数(避免误解析)
|
|
479
481
|
assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 0, '不应有整数');
|
|
480
482
|
});
|
|
481
483
|
it('应正确词法分析全角数字浮点数运算', () => {
|
|
482
|
-
|
|
483
|
-
const input = '3.14 乘 2.0。';
|
|
484
|
+
const input = '3.14 乘以 2.0。';
|
|
484
485
|
const result = canonicalize(input, ZH_CN);
|
|
485
486
|
const tokens = lex(result, ZH_CN);
|
|
486
|
-
assert.ok(findIdent(tokens, '
|
|
487
|
-
// Token 结构验证:全角数字转半角后产生浮点数
|
|
487
|
+
assert.ok(findIdent(tokens, '乘以'), '应识别「乘以」标识符');
|
|
488
488
|
assert.strictEqual(countTokenKind(tokens, TokenKind.FLOAT), 2, '应有2个浮点数');
|
|
489
|
-
// 验证规范化后的文本
|
|
490
489
|
assert.ok(result.includes('3.14'), '全角3.14应规范化为半角3.14');
|
|
491
490
|
});
|
|
492
491
|
it('应正确词法分析长整数运算(大写 L)', () => {
|
|
493
|
-
const input = '1000000000000L
|
|
492
|
+
const input = '1000000000000L 加上 500000000000L。';
|
|
494
493
|
const result = canonicalize(input, ZH_CN);
|
|
495
494
|
const tokens = lex(result, ZH_CN);
|
|
496
|
-
assert.ok(findIdent(tokens, '
|
|
497
|
-
// Token 结构验证:长整数
|
|
495
|
+
assert.ok(findIdent(tokens, '加上'), '应识别「加上」标识符');
|
|
498
496
|
assert.strictEqual(countTokenKind(tokens, TokenKind.LONG), 2, '应有2个长整数');
|
|
499
|
-
// 验证无普通整数
|
|
500
497
|
assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 0, '不应有普通整数');
|
|
501
498
|
});
|
|
502
|
-
// Codex Round 4 建议:补充小写 l 和全角 l 测试
|
|
503
499
|
it('应正确词法分析长整数运算(小写 l)', () => {
|
|
504
|
-
const input = '100l
|
|
500
|
+
const input = '100l 减去 50l。';
|
|
505
501
|
const result = canonicalize(input, ZH_CN);
|
|
506
502
|
const tokens = lex(result, ZH_CN);
|
|
507
|
-
assert.ok(findIdent(tokens, '
|
|
508
|
-
// Token 结构验证:小写 l 应被识别为长整数
|
|
503
|
+
assert.ok(findIdent(tokens, '减去'), '应识别「减去」标识符');
|
|
509
504
|
assert.strictEqual(countTokenKind(tokens, TokenKind.LONG), 2, '应有2个长整数(小写l)');
|
|
510
505
|
assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 0, '不应有普通整数');
|
|
511
506
|
});
|
|
512
507
|
it('应正确词法分析长整数运算(全角 l)', () => {
|
|
513
|
-
|
|
514
|
-
const input = '100l 乘 2l。';
|
|
508
|
+
const input = '100l 乘以 2l。';
|
|
515
509
|
const result = canonicalize(input, ZH_CN);
|
|
516
510
|
const tokens = lex(result, ZH_CN);
|
|
517
|
-
assert.ok(findIdent(tokens, '
|
|
518
|
-
// Token 结构验证:全角 l 应被规范化后识别为长整数
|
|
511
|
+
assert.ok(findIdent(tokens, '乘以'), '应识别「乘以」标识符');
|
|
519
512
|
assert.strictEqual(countTokenKind(tokens, TokenKind.LONG), 2, '应有2个长整数(全角l转半角后)');
|
|
520
|
-
// Codex Round 5 建议:验证规范化输出(确保 canonicalizer 工作正常)
|
|
521
513
|
assert.ok(result.includes('100l'), '全角 100l 应规范化为半角 100l');
|
|
522
514
|
assert.ok(!result.includes('l'), '规范化后不应再含全角 l');
|
|
523
515
|
});
|
|
524
516
|
it('应正确词法分析混合数值类型表达式', () => {
|
|
525
|
-
const input = '(1
|
|
517
|
+
const input = '(1 加上 1.5) 乘以 100L。';
|
|
526
518
|
const result = canonicalize(input, ZH_CN);
|
|
527
519
|
const tokens = lex(result, ZH_CN);
|
|
528
520
|
// Token 结构验证:三种数值类型共存
|
|
@@ -542,22 +534,22 @@ describe('ZH_CN Lexicon 测试套件', () => {
|
|
|
542
534
|
assert.strictEqual(countTokenKind(tokens, TokenKind.COLON), 1, '应有1个冒号');
|
|
543
535
|
assert.strictEqual(countTokenKind(tokens, TokenKind.DOT), 1, '应有1个句号');
|
|
544
536
|
});
|
|
545
|
-
it('
|
|
546
|
-
const input = '对每个 行
|
|
537
|
+
it('应正确词法分析嵌套循环(v2: 属于)', () => {
|
|
538
|
+
const input = '对每个 行 属于 表格:\n 对每个 列 属于 行:\n 处理 列。';
|
|
547
539
|
const result = canonicalize(input, ZH_CN);
|
|
548
540
|
const tokens = lex(result, ZH_CN);
|
|
549
541
|
assert.strictEqual(countIdent(tokens, '对每个'), 2, '应有2个「对每个」');
|
|
550
|
-
assert.strictEqual(countIdent(tokens, '
|
|
542
|
+
assert.strictEqual(countIdent(tokens, '属于'), 2, '应有2个「属于」');
|
|
551
543
|
// Token 结构验证
|
|
552
544
|
assert.strictEqual(countTokenKind(tokens, TokenKind.COLON), 2, '应有2个冒号');
|
|
553
545
|
assert.strictEqual(countTokenKind(tokens, TokenKind.DOT), 1, '应有1个句号');
|
|
554
546
|
});
|
|
555
|
-
it('
|
|
556
|
-
const input = '对每个 数字
|
|
547
|
+
it('应正确词法分析带条件的循环(v2: 属于)', () => {
|
|
548
|
+
const input = '对每个 数字 属于 数列:\n 如果 数字 大于 0:\n 累加 数字。';
|
|
557
549
|
const result = canonicalize(input, ZH_CN);
|
|
558
550
|
const tokens = lex(result, ZH_CN);
|
|
559
551
|
assert.ok(findIdent(tokens, '对每个'), '应识别「对每个」标识符');
|
|
560
|
-
assert.ok(findIdent(tokens, '
|
|
552
|
+
assert.ok(findIdent(tokens, '属于'), '应识别「属于」标识符');
|
|
561
553
|
assert.ok(findIdent(tokens, '如果'), '应识别「如果」标识符');
|
|
562
554
|
assert.ok(findIdent(tokens, '大于'), '应识别「大于」标识符');
|
|
563
555
|
// Token 结构验证
|
|
@@ -577,12 +569,12 @@ describe('ZH_CN Lexicon 测试套件', () => {
|
|
|
577
569
|
assert.strictEqual(countTokenKind(tokens, TokenKind.COLON), 1, '应有1个冒号');
|
|
578
570
|
assert.strictEqual(countTokenKind(tokens, TokenKind.DOT), 2, '应有2个句号');
|
|
579
571
|
});
|
|
580
|
-
it('
|
|
581
|
-
const input = '流程 构建:\n 步骤 编译 依赖
|
|
572
|
+
it('应正确词法分析带依赖的步骤(v2: 基于)', () => {
|
|
573
|
+
const input = '流程 构建:\n 步骤 编译 依赖 基于 下载。\n 步骤 下载。';
|
|
582
574
|
const result = canonicalize(input, ZH_CN);
|
|
583
575
|
const tokens = lex(result, ZH_CN);
|
|
584
576
|
assert.ok(findIdent(tokens, '依赖'), '应识别「依赖」标识符');
|
|
585
|
-
assert.ok(findIdent(tokens, '
|
|
577
|
+
assert.ok(findIdent(tokens, '基于'), '应识别「基于」标识符');
|
|
586
578
|
});
|
|
587
579
|
it('应正确词法分析带补偿的步骤', () => {
|
|
588
580
|
const input = '步骤 扣款 补偿 退款。';
|
|
@@ -636,31 +628,30 @@ describe('ZH_CN Lexicon 测试套件', () => {
|
|
|
636
628
|
assert.ok(findIdent(tokens, '产出'), '应识别「产出」标识符');
|
|
637
629
|
assert.ok(findIdent(tokens, '返回'), '应识别「返回」标识符');
|
|
638
630
|
});
|
|
639
|
-
it('
|
|
640
|
-
const input = '令 结果
|
|
631
|
+
it('应正确词法分析变量绑定(v2: 定义为)', () => {
|
|
632
|
+
const input = '令 结果 定义为 计算(42)。';
|
|
641
633
|
const result = canonicalize(input, ZH_CN);
|
|
642
634
|
const tokens = lex(result, ZH_CN);
|
|
643
635
|
assert.ok(findIdent(tokens, '令'), '应识别「令」标识符');
|
|
644
|
-
assert.ok(findIdent(tokens, '
|
|
636
|
+
assert.ok(findIdent(tokens, '定义为'), '应识别「定义为」标识符');
|
|
645
637
|
});
|
|
646
|
-
it('
|
|
638
|
+
it('应正确词法分析完整中文程序(v2 关键字)', () => {
|
|
647
639
|
const program = `模块 应用。
|
|
648
640
|
|
|
649
641
|
定义 用户 包含 编号:文本,名字:文本。
|
|
650
642
|
|
|
651
643
|
问候 入参 用户:可选 用户,产出 文本:
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
644
|
+
匹配于 用户:
|
|
645
|
+
当 空值,返回 「你好,访客」。
|
|
646
|
+
当 用户(编号, 名字),返回 「欢迎,」加上 名字。`;
|
|
655
647
|
const result = canonicalize(program, ZH_CN);
|
|
656
648
|
const tokens = lex(result, ZH_CN);
|
|
657
649
|
// 验证关键词精确出现次数(在 lexer 阶段为 IDENT)
|
|
658
|
-
// Codex 审查建议:使用精确断言替代宽松的 >= 判断
|
|
659
650
|
assert.strictEqual(countIdent(tokens, '包含'), 1, '应有1个「包含」');
|
|
660
651
|
assert.strictEqual(countIdent(tokens, '入参'), 1, '应有1个「入参」');
|
|
661
652
|
assert.strictEqual(countIdent(tokens, '产出'), 1, '应有1个「产出」');
|
|
662
|
-
assert.strictEqual(countIdent(tokens, '
|
|
663
|
-
assert.strictEqual(countIdent(tokens, '
|
|
653
|
+
assert.strictEqual(countIdent(tokens, '匹配于'), 1, '应有1个「匹配于」');
|
|
654
|
+
assert.strictEqual(countIdent(tokens, '当'), 2, '应有2个「当」');
|
|
664
655
|
assert.strictEqual(countIdent(tokens, '返回'), 2, '应有2个「返回」');
|
|
665
656
|
assert.strictEqual(countIdent(tokens, '可选'), 1, '应有1个「可选」');
|
|
666
657
|
});
|
|
@@ -676,15 +667,15 @@ Rule greet given user: User?, produce Text:
|
|
|
676
667
|
Match user:
|
|
677
668
|
When null, Return "Hi, guest".
|
|
678
669
|
When User(id, name, 42), Return "Welcome, " plus name.`;
|
|
679
|
-
// 对应的中文程序(
|
|
670
|
+
// 对应的中文程序(v2 关键字:匹配于/当/加上/空值)
|
|
680
671
|
const zhProgram = `模块 应用。
|
|
681
672
|
|
|
682
673
|
定义 用户 包含 编号:文本,名字:文本,年龄:整数。
|
|
683
674
|
|
|
684
675
|
问候 入参 用户:可选 用户,产出 文本:
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
676
|
+
匹配于 用户:
|
|
677
|
+
当 空值,返回 「你好,访客」。
|
|
678
|
+
当 用户(编号, 名字, 42),返回 「欢迎,」加上 名字。`;
|
|
688
679
|
const enTokens = lex(canonicalize(enProgram, EN_US), EN_US);
|
|
689
680
|
const zhTokens = lex(canonicalize(zhProgram, ZH_CN), ZH_CN);
|
|
690
681
|
// 获取 token 类型分布(Codex 审查建议:拆分更多 Token 类型以细化比较)
|
|
@@ -763,9 +754,9 @@ Rule greet given user: User?, produce Text:
|
|
|
763
754
|
const enFalseTokens = lex('false', EN_US);
|
|
764
755
|
assert.ok(enTrueTokens.some((t) => t.kind === TokenKind.BOOL && t.value === true));
|
|
765
756
|
assert.ok(enFalseTokens.some((t) => t.kind === TokenKind.BOOL && t.value === false));
|
|
766
|
-
//
|
|
767
|
-
const zhTrueTokens = lex('
|
|
768
|
-
const zhFalseTokens = lex('
|
|
757
|
+
// 中文(v2 关键字:真值 / 假值)
|
|
758
|
+
const zhTrueTokens = lex('真值', ZH_CN);
|
|
759
|
+
const zhFalseTokens = lex('假值', ZH_CN);
|
|
769
760
|
assert.ok(zhTrueTokens.some((t) => t.kind === TokenKind.BOOL && t.value === true));
|
|
770
761
|
assert.ok(zhFalseTokens.some((t) => t.kind === TokenKind.BOOL && t.value === false));
|
|
771
762
|
});
|
|
@@ -773,8 +764,8 @@ Rule greet given user: User?, produce Text:
|
|
|
773
764
|
// 英文
|
|
774
765
|
const enNullTokens = lex('null', EN_US);
|
|
775
766
|
assert.ok(enNullTokens.some((t) => t.kind === TokenKind.NULL && t.value === null));
|
|
776
|
-
//
|
|
777
|
-
const zhNullTokens = lex('
|
|
767
|
+
// 中文(v2 关键字:空值)
|
|
768
|
+
const zhNullTokens = lex('空值', ZH_CN);
|
|
778
769
|
assert.ok(zhNullTokens.some((t) => t.kind === TokenKind.NULL && t.value === null));
|
|
779
770
|
});
|
|
780
771
|
});
|
|
@@ -800,9 +791,9 @@ Rule greet given user: User?, produce Text:
|
|
|
800
791
|
it('切换默认后 lex 应使用新默认', () => {
|
|
801
792
|
const originalDefault = LexiconRegistry.getDefault();
|
|
802
793
|
try {
|
|
803
|
-
//
|
|
794
|
+
// 切换到中文并测试中文布尔值(v2 关键字:真值)
|
|
804
795
|
LexiconRegistry.setDefault('zh-CN');
|
|
805
|
-
const zhTokens = lex('
|
|
796
|
+
const zhTokens = lex('真值');
|
|
806
797
|
const zhBool = zhTokens.find((t) => t.kind === TokenKind.BOOL);
|
|
807
798
|
assert.ok(zhBool, '应识别中文布尔值');
|
|
808
799
|
assert.strictEqual(zhBool?.value, true);
|
|
@@ -851,7 +842,7 @@ Rule greet given user: User?, produce Text:
|
|
|
851
842
|
const stringTokens = tokens.filter((t) => t.kind === TokenKind.STRING);
|
|
852
843
|
assert.ok(stringTokens.length > 0, '应有字符串 token');
|
|
853
844
|
});
|
|
854
|
-
it('loan_decision.aster
|
|
845
|
+
it('loan_decision.aster 应正确解析(v2 关键字)', () => {
|
|
855
846
|
const { tokens } = parseZhCNFile('loan_decision.aster');
|
|
856
847
|
// 验证类型定义关键词(定义 作为纯关键字直接解析为 IDENT)
|
|
857
848
|
assert.ok(findIdent(tokens, '定义'), '应有 定义 关键词');
|
|
@@ -859,12 +850,12 @@ Rule greet given user: User?, produce Text:
|
|
|
859
850
|
// 验证控制流关键词
|
|
860
851
|
assert.ok(findIdent(tokens, '如果'), '应有 如果 关键词');
|
|
861
852
|
assert.ok(findIdent(tokens, '返回'), '应有 返回 关键词');
|
|
862
|
-
//
|
|
853
|
+
// 验证变量绑定(v2: 定义为)
|
|
863
854
|
assert.ok(findIdent(tokens, '令'), '应有 令 关键词');
|
|
864
|
-
assert.ok(findIdent(tokens, '
|
|
865
|
-
//
|
|
855
|
+
assert.ok(findIdent(tokens, '定义为'), '应有 定义为 关键词(v2 BE)');
|
|
856
|
+
// 验证布尔值(v2: 真值/假值)
|
|
866
857
|
const boolTokens = tokens.filter((t) => t.kind === TokenKind.BOOL);
|
|
867
|
-
assert.ok(boolTokens.length >= 2, '
|
|
858
|
+
assert.ok(boolTokens.length >= 2, '应有多个布尔值(真值/假值)');
|
|
868
859
|
// 验证整数
|
|
869
860
|
const intTokens = tokens.filter((t) => t.kind === TokenKind.INT);
|
|
870
861
|
assert.ok(intTokens.length >= 2, '应有整数字面量(18, 100000)');
|
|
@@ -883,31 +874,31 @@ Rule greet given user: User?, produce Text:
|
|
|
883
874
|
const stringTokens = tokens.filter((t) => t.kind === TokenKind.STRING);
|
|
884
875
|
assert.ok(stringTokens.length >= 3, '应有字符串(「申请人未满18岁」等)');
|
|
885
876
|
});
|
|
886
|
-
it('user_greeting.aster
|
|
877
|
+
it('user_greeting.aster 应正确解析(v2 关键字)', () => {
|
|
887
878
|
const { tokens } = parseZhCNFile('user_greeting.aster');
|
|
888
|
-
//
|
|
889
|
-
assert.ok(findIdent(tokens, '
|
|
890
|
-
assert.ok(findIdent(tokens, '
|
|
879
|
+
// 验证模式匹配关键词(v2: 匹配于/当)
|
|
880
|
+
assert.ok(findIdent(tokens, '匹配于'), '应有 匹配于 关键词(v2 MATCH)');
|
|
881
|
+
assert.ok(findIdent(tokens, '当'), '应有 当 关键词(v2 WHEN)');
|
|
891
882
|
// 可选类型已改为推断,应不再显式出现
|
|
892
883
|
assert.ok(!findIdent(tokens, '可选'), '不应显式出现 可选 关键词');
|
|
893
|
-
// 验证 null
|
|
884
|
+
// 验证 null 值(v2: 空值)
|
|
894
885
|
const nullTokens = tokens.filter((t) => t.kind === TokenKind.NULL);
|
|
895
|
-
assert.ok(nullTokens.length > 0, '应有
|
|
886
|
+
assert.ok(nullTokens.length > 0, '应有 空值 token');
|
|
896
887
|
});
|
|
897
|
-
it('arithmetic.aster
|
|
888
|
+
it('arithmetic.aster 应正确解析(v2 关键字)', () => {
|
|
898
889
|
const { tokens } = parseZhCNFile('arithmetic.aster');
|
|
899
|
-
//
|
|
900
|
-
assert.ok(findIdent(tokens, '
|
|
901
|
-
assert.ok(findIdent(tokens, '
|
|
902
|
-
assert.ok(findIdent(tokens, '
|
|
890
|
+
// 验证算术运算关键词(v2: 加上/减去/乘以/除以)
|
|
891
|
+
assert.ok(findIdent(tokens, '加上'), '应有 加上 关键词(v2 PLUS)');
|
|
892
|
+
assert.ok(findIdent(tokens, '减去'), '应有 减去 关键词(v2 MINUS)');
|
|
893
|
+
assert.ok(findIdent(tokens, '乘以'), '应有 乘以 关键词(v2 TIMES)');
|
|
903
894
|
assert.ok(findIdent(tokens, '除以'), '应有 除以 关键词');
|
|
904
|
-
//
|
|
895
|
+
// 验证变量绑定(v2: 定义为)
|
|
905
896
|
const letCount = countIdent(tokens, '令');
|
|
906
|
-
const beCount = countIdent(tokens, '
|
|
897
|
+
const beCount = countIdent(tokens, '定义为');
|
|
907
898
|
assert.ok(letCount >= 2, '应有多个 令 关键词');
|
|
908
|
-
assert.ok(beCount >= 2, '应有多个
|
|
899
|
+
assert.ok(beCount >= 2, '应有多个 定义为 关键词(v2 BE)');
|
|
909
900
|
});
|
|
910
|
-
it('所有中文 CNL
|
|
901
|
+
it('所有中文 CNL 文件应成功规范化(v2 软边界)', () => {
|
|
911
902
|
const files = [...zhCNCorpusIndex.keys()];
|
|
912
903
|
assert.ok(files.length >= 4, '应有至少 4 个 .aster 文件');
|
|
913
904
|
for (const file of files) {
|
|
@@ -915,8 +906,14 @@ Rule greet given user: User?, produce Text:
|
|
|
915
906
|
// 验证规范化后不包含智能引号
|
|
916
907
|
assert.ok(!canonical.includes('"'), `${file} 不应包含左智能引号`);
|
|
917
908
|
assert.ok(!canonical.includes('"'), `${file} 不应包含右智能引号`);
|
|
918
|
-
//
|
|
919
|
-
|
|
909
|
+
// v2 行为:中文标点被归一化为英文等价(字符串外)
|
|
910
|
+
// 字符串内的中文标点保留;用 lexicon 引号「」分段后验证字符串外
|
|
911
|
+
const stringSegments = canonical.split(/[「」]/);
|
|
912
|
+
const outside = stringSegments.filter((_, i) => i % 2 === 0).join('');
|
|
913
|
+
assert.strictEqual(outside.includes('。'), false, `${file} 字符串外不应有「。」`);
|
|
914
|
+
assert.strictEqual(outside.includes(','), false, `${file} 字符串外不应有「,」`);
|
|
915
|
+
// canonical 应至少包含一个英文句号(语句终止)
|
|
916
|
+
assert.ok(canonical.includes('.'), `${file} 应包含归一化后的英文句号`);
|
|
920
917
|
}
|
|
921
918
|
});
|
|
922
919
|
it('中文 CNL 文件的 token 分布应合理', () => {
|