@aster-cloud/aster-lang-ts 0.1.5 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. package/CHANGELOG.md +72 -0
  2. package/LICENSE +201 -0
  3. package/README.md +2 -0
  4. package/dist/scripts/core-ir-json.d.ts +3 -0
  5. package/dist/scripts/core-ir-json.d.ts.map +1 -0
  6. package/dist/scripts/core-ir-json.js +72 -0
  7. package/dist/scripts/core-ir-json.js.map +1 -0
  8. package/dist/scripts/golden.js +49 -0
  9. package/dist/scripts/golden.js.map +1 -1
  10. package/dist/src/ai/providers/anthropic.d.ts +17 -0
  11. package/dist/src/ai/providers/anthropic.d.ts.map +1 -1
  12. package/dist/src/ai/providers/anthropic.js +58 -15
  13. package/dist/src/ai/providers/anthropic.js.map +1 -1
  14. package/dist/src/ast/ast.d.ts +2 -2
  15. package/dist/src/ast/ast.d.ts.map +1 -1
  16. package/dist/src/ast/ast.js +5 -2
  17. package/dist/src/ast/ast.js.map +1 -1
  18. package/dist/src/browser/schema.d.ts +52 -0
  19. package/dist/src/browser/schema.d.ts.map +1 -0
  20. package/dist/src/browser/schema.js +205 -0
  21. package/dist/src/browser/schema.js.map +1 -0
  22. package/dist/src/browser.d.ts +19 -54
  23. package/dist/src/browser.d.ts.map +1 -1
  24. package/dist/src/browser.js +30 -188
  25. package/dist/src/browser.js.map +1 -1
  26. package/dist/src/cli/policy-converter.js +8 -1
  27. package/dist/src/cli/policy-converter.js.map +1 -1
  28. package/dist/src/config/effect_config.d.ts.map +1 -1
  29. package/dist/src/config/effect_config.js +7 -1
  30. package/dist/src/config/effect_config.js.map +1 -1
  31. package/dist/src/config/lexicons/de-DE.d.ts.map +1 -1
  32. package/dist/src/config/lexicons/de-DE.js +1 -0
  33. package/dist/src/config/lexicons/de-DE.js.map +1 -1
  34. package/dist/src/config/lexicons/en-US.d.ts.map +1 -1
  35. package/dist/src/config/lexicons/en-US.js +1 -0
  36. package/dist/src/config/lexicons/en-US.js.map +1 -1
  37. package/dist/src/config/lexicons/fallback-lexicon.d.ts +41 -0
  38. package/dist/src/config/lexicons/fallback-lexicon.d.ts.map +1 -0
  39. package/dist/src/config/lexicons/fallback-lexicon.js +88 -0
  40. package/dist/src/config/lexicons/fallback-lexicon.js.map +1 -0
  41. package/dist/src/config/lexicons/identifiers/registry.d.ts +9 -0
  42. package/dist/src/config/lexicons/identifiers/registry.d.ts.map +1 -1
  43. package/dist/src/config/lexicons/identifiers/registry.js +12 -0
  44. package/dist/src/config/lexicons/identifiers/registry.js.map +1 -1
  45. package/dist/src/config/lexicons/index.d.ts +25 -3
  46. package/dist/src/config/lexicons/index.d.ts.map +1 -1
  47. package/dist/src/config/lexicons/index.js +42 -7
  48. package/dist/src/config/lexicons/index.js.map +1 -1
  49. package/dist/src/config/lexicons/registry.d.ts.map +1 -1
  50. package/dist/src/config/lexicons/registry.js +43 -5
  51. package/dist/src/config/lexicons/registry.js.map +1 -1
  52. package/dist/src/config/lexicons/template.d.ts.map +1 -1
  53. package/dist/src/config/lexicons/template.js +2 -0
  54. package/dist/src/config/lexicons/template.js.map +1 -1
  55. package/dist/src/config/lexicons/zh-CN.d.ts.map +1 -1
  56. package/dist/src/config/lexicons/zh-CN.js +36 -20
  57. package/dist/src/config/lexicons/zh-CN.js.map +1 -1
  58. package/dist/src/config/semantic.d.ts +13 -0
  59. package/dist/src/config/semantic.d.ts.map +1 -1
  60. package/dist/src/config/semantic.js +17 -0
  61. package/dist/src/config/semantic.js.map +1 -1
  62. package/dist/src/config/token-kind.d.ts +2 -0
  63. package/dist/src/config/token-kind.d.ts.map +1 -1
  64. package/dist/src/config/token-kind.js +3 -1
  65. package/dist/src/config/token-kind.js.map +1 -1
  66. package/dist/src/core/core_ir.d.ts +2 -2
  67. package/dist/src/core/core_ir.d.ts.map +1 -1
  68. package/dist/src/core/core_ir.js +5 -2
  69. package/dist/src/core/core_ir.js.map +1 -1
  70. package/dist/src/core/interpreter.js +6 -1
  71. package/dist/src/core/interpreter.js.map +1 -1
  72. package/dist/src/core/visitor.d.ts.map +1 -1
  73. package/dist/src/core/visitor.js +17 -0
  74. package/dist/src/core/visitor.js.map +1 -1
  75. package/dist/src/diagnostics/error_codes.d.ts +2 -0
  76. package/dist/src/diagnostics/error_codes.d.ts.map +1 -1
  77. package/dist/src/diagnostics/error_codes.js +11 -5
  78. package/dist/src/diagnostics/error_codes.js.map +1 -1
  79. package/dist/src/effects/effect_inference_browser.d.ts.map +1 -1
  80. package/dist/src/effects/effect_inference_browser.js +22 -4
  81. package/dist/src/effects/effect_inference_browser.js.map +1 -1
  82. package/dist/src/frontend/canonicalizer.d.ts +33 -0
  83. package/dist/src/frontend/canonicalizer.d.ts.map +1 -1
  84. package/dist/src/frontend/canonicalizer.js +77 -5
  85. package/dist/src/frontend/canonicalizer.js.map +1 -1
  86. package/dist/src/frontend/lexer.d.ts.map +1 -1
  87. package/dist/src/frontend/lexer.js +7 -1
  88. package/dist/src/frontend/lexer.js.map +1 -1
  89. package/dist/src/jvm/emitter.js +2 -2
  90. package/dist/src/jvm/emitter.js.map +1 -1
  91. package/dist/src/lower_to_core.js +2 -2
  92. package/dist/src/lower_to_core.js.map +1 -1
  93. package/dist/src/lsp/config.d.ts +13 -9
  94. package/dist/src/lsp/config.d.ts.map +1 -1
  95. package/dist/src/lsp/config.js +19 -5
  96. package/dist/src/lsp/config.js.map +1 -1
  97. package/dist/src/lsp/diagnostics.d.ts +10 -0
  98. package/dist/src/lsp/diagnostics.d.ts.map +1 -1
  99. package/dist/src/lsp/diagnostics.js +43 -1
  100. package/dist/src/lsp/diagnostics.js.map +1 -1
  101. package/dist/src/lsp/pii_diagnostics.d.ts.map +1 -1
  102. package/dist/src/lsp/pii_diagnostics.js +4 -1
  103. package/dist/src/lsp/pii_diagnostics.js.map +1 -1
  104. package/dist/src/lsp/server.js +49 -17
  105. package/dist/src/lsp/server.js.map +1 -1
  106. package/dist/src/lsp/utils.js +1 -1
  107. package/dist/src/lsp/utils.js.map +1 -1
  108. package/dist/src/parser/context.d.ts.map +1 -1
  109. package/dist/src/parser/context.js +32 -12
  110. package/dist/src/parser/context.js.map +1 -1
  111. package/dist/src/parser/decl-parser.d.ts +2 -2
  112. package/dist/src/parser/decl-parser.d.ts.map +1 -1
  113. package/dist/src/parser/decl-parser.js +113 -14
  114. package/dist/src/parser/decl-parser.js.map +1 -1
  115. package/dist/src/parser/expr-stmt-parser.d.ts.map +1 -1
  116. package/dist/src/parser/expr-stmt-parser.js +111 -4
  117. package/dist/src/parser/expr-stmt-parser.js.map +1 -1
  118. package/dist/src/parser/import-parser.d.ts +3 -2
  119. package/dist/src/parser/import-parser.d.ts.map +1 -1
  120. package/dist/src/parser/import-parser.js +12 -3
  121. package/dist/src/parser/import-parser.js.map +1 -1
  122. package/dist/src/parser/input-generator.d.ts +2 -0
  123. package/dist/src/parser/input-generator.d.ts.map +1 -1
  124. package/dist/src/parser/input-generator.js +11 -1
  125. package/dist/src/parser/input-generator.js.map +1 -1
  126. package/dist/src/parser/type-parser.js +1 -1
  127. package/dist/src/parser/type-parser.js.map +1 -1
  128. package/dist/src/typecheck/alias.d.ts +24 -0
  129. package/dist/src/typecheck/alias.d.ts.map +1 -0
  130. package/dist/src/typecheck/alias.js +30 -0
  131. package/dist/src/typecheck/alias.js.map +1 -0
  132. package/dist/src/typecheck/browser.d.ts +41 -4
  133. package/dist/src/typecheck/browser.d.ts.map +1 -1
  134. package/dist/src/typecheck/browser.js +189 -14
  135. package/dist/src/typecheck/browser.js.map +1 -1
  136. package/dist/src/typecheck/context.js +1 -1
  137. package/dist/src/typecheck/context.js.map +1 -1
  138. package/dist/src/typecheck/effects.d.ts.map +1 -1
  139. package/dist/src/typecheck/effects.js +18 -4
  140. package/dist/src/typecheck/effects.js.map +1 -1
  141. package/dist/src/typecheck/expression.js +1 -1
  142. package/dist/src/typecheck/expression.js.map +1 -1
  143. package/dist/src/typecheck/generics.js +1 -1
  144. package/dist/src/typecheck/generics.js.map +1 -1
  145. package/dist/src/typecheck/module.d.ts.map +1 -1
  146. package/dist/src/typecheck/module.js +13 -8
  147. package/dist/src/typecheck/module.js.map +1 -1
  148. package/dist/src/typecheck/pattern.js +1 -1
  149. package/dist/src/typecheck/pattern.js.map +1 -1
  150. package/dist/src/typecheck/pure.d.ts +66 -0
  151. package/dist/src/typecheck/pure.d.ts.map +1 -0
  152. package/dist/src/typecheck/pure.js +163 -0
  153. package/dist/src/typecheck/pure.js.map +1 -0
  154. package/dist/src/typecheck/statement.js +1 -1
  155. package/dist/src/typecheck/statement.js.map +1 -1
  156. package/dist/src/typecheck/type_system.d.ts +6 -1
  157. package/dist/src/typecheck/type_system.d.ts.map +1 -1
  158. package/dist/src/typecheck/type_system.js +61 -13
  159. package/dist/src/typecheck/type_system.js.map +1 -1
  160. package/dist/src/typecheck/utils.d.ts +4 -69
  161. package/dist/src/typecheck/utils.d.ts.map +1 -1
  162. package/dist/src/typecheck/utils.js +17 -170
  163. package/dist/src/typecheck/utils.js.map +1 -1
  164. package/dist/src/typecheck/workflow.js +1 -1
  165. package/dist/src/typecheck/workflow.js.map +1 -1
  166. package/dist/src/typecheck-pii.d.ts.map +1 -1
  167. package/dist/src/typecheck-pii.js +11 -1
  168. package/dist/src/typecheck-pii.js.map +1 -1
  169. package/dist/src/types/base.d.ts +1 -0
  170. package/dist/src/types/base.d.ts.map +1 -1
  171. package/dist/src/types.d.ts +18 -0
  172. package/dist/src/types.d.ts.map +1 -1
  173. package/dist/src/types.js +1 -0
  174. package/dist/src/types.js.map +1 -1
  175. package/dist/test/e2e/runner/golden-runner.js +21 -10
  176. package/dist/test/e2e/runner/golden-runner.js.map +1 -1
  177. package/dist/test/integration/compliance/compliance-smoke.test.js +27 -35
  178. package/dist/test/integration/compliance/compliance-smoke.test.js.map +1 -1
  179. package/dist/test/integration/lsp/lsp-multi-rename.test.js +8 -7
  180. package/dist/test/integration/lsp/lsp-multi-rename.test.js.map +1 -1
  181. package/dist/test/policy-converter/round-trip.test.js +6 -4
  182. package/dist/test/policy-converter/round-trip.test.js.map +1 -1
  183. package/dist/test/regression/type-checker-golden.test.js +8 -4
  184. package/dist/test/regression/type-checker-golden.test.js.map +1 -1
  185. package/dist/test/type-checker/cross-module-packages.test.js +5 -3
  186. package/dist/test/type-checker/cross-module-packages.test.js.map +1 -1
  187. package/dist/test/type-checker/pii-propagation.test.js +24 -18
  188. package/dist/test/type-checker/pii-propagation.test.js.map +1 -1
  189. package/dist/test/unit/browser/typecheck-browser-pii-failure.test.d.ts +2 -0
  190. package/dist/test/unit/browser/typecheck-browser-pii-failure.test.d.ts.map +1 -0
  191. package/dist/test/unit/browser/typecheck-browser-pii-failure.test.js +193 -0
  192. package/dist/test/unit/browser/typecheck-browser-pii-failure.test.js.map +1 -0
  193. package/dist/test/unit/browser/typecheck-browser-unsupported.test.d.ts +2 -0
  194. package/dist/test/unit/browser/typecheck-browser-unsupported.test.d.ts.map +1 -0
  195. package/dist/test/unit/browser/typecheck-browser-unsupported.test.js +110 -0
  196. package/dist/test/unit/browser/typecheck-browser-unsupported.test.js.map +1 -0
  197. package/dist/test/unit/canonicalizer/canonicalizer.test.js +91 -1
  198. package/dist/test/unit/canonicalizer/canonicalizer.test.js.map +1 -1
  199. package/dist/test/unit/canonicalizer/cjk-punctuation.test.d.ts +2 -0
  200. package/dist/test/unit/canonicalizer/cjk-punctuation.test.d.ts.map +1 -0
  201. package/dist/test/unit/canonicalizer/cjk-punctuation.test.js +118 -0
  202. package/dist/test/unit/canonicalizer/cjk-punctuation.test.js.map +1 -0
  203. package/dist/test/unit/canonicalizer/conformance-cjk-v2.test.d.ts +2 -0
  204. package/dist/test/unit/canonicalizer/conformance-cjk-v2.test.d.ts.map +1 -0
  205. package/dist/test/unit/canonicalizer/conformance-cjk-v2.test.js +40 -0
  206. package/dist/test/unit/canonicalizer/conformance-cjk-v2.test.js.map +1 -0
  207. package/dist/test/unit/config/lexicons/i18n.test.js +31 -14
  208. package/dist/test/unit/config/lexicons/i18n.test.js.map +1 -1
  209. package/dist/test/unit/config/lexicons/zh-CN.test.js +150 -143
  210. package/dist/test/unit/config/lexicons/zh-CN.test.js.map +1 -1
  211. package/dist/test/unit/keyword-translator.test.js +16 -19
  212. package/dist/test/unit/keyword-translator.test.js.map +1 -1
  213. package/dist/test/unit/lexicons/fallback-lexicon.test.d.ts +16 -0
  214. package/dist/test/unit/lexicons/fallback-lexicon.test.d.ts.map +1 -0
  215. package/dist/test/unit/lexicons/fallback-lexicon.test.js +98 -0
  216. package/dist/test/unit/lexicons/fallback-lexicon.test.js.map +1 -0
  217. package/dist/test/unit/llm-providers.test.js +105 -93
  218. package/dist/test/unit/llm-providers.test.js.map +1 -1
  219. package/dist/test/unit/lowering/lowering.test.js +12 -0
  220. package/dist/test/unit/lowering/lowering.test.js.map +1 -1
  221. package/dist/test/unit/parser/entry-annotations.test.d.ts +2 -0
  222. package/dist/test/unit/parser/entry-annotations.test.d.ts.map +1 -0
  223. package/dist/test/unit/parser/entry-annotations.test.js +110 -0
  224. package/dist/test/unit/parser/entry-annotations.test.js.map +1 -0
  225. package/dist/test/unit/parser/parser.test.js +30 -6
  226. package/dist/test/unit/parser/parser.test.js.map +1 -1
  227. package/dist/test/unit/scripts/verify-browser-entry.test.d.ts +2 -0
  228. package/dist/test/unit/scripts/verify-browser-entry.test.d.ts.map +1 -0
  229. package/dist/test/unit/scripts/verify-browser-entry.test.js +179 -0
  230. package/dist/test/unit/scripts/verify-browser-entry.test.js.map +1 -0
  231. package/dist/test/unit/typecheck/function-return-inference.test.d.ts +2 -0
  232. package/dist/test/unit/typecheck/function-return-inference.test.d.ts.map +1 -0
  233. package/dist/test/unit/typecheck/function-return-inference.test.js +79 -0
  234. package/dist/test/unit/typecheck/function-return-inference.test.js.map +1 -0
  235. package/dist/test/unit/typecheck/pii-cross-runtime-conformance.test.d.ts +2 -0
  236. package/dist/test/unit/typecheck/pii-cross-runtime-conformance.test.d.ts.map +1 -0
  237. package/dist/test/unit/typecheck/pii-cross-runtime-conformance.test.js +191 -0
  238. package/dist/test/unit/typecheck/pii-cross-runtime-conformance.test.js.map +1 -0
  239. package/dist/test/unit/typecheck/should-enforce-pii.test.js +25 -110
  240. package/dist/test/unit/typecheck/should-enforce-pii.test.js.map +1 -1
  241. package/dist/test/unit/typecheck/type-system.test.js +83 -3
  242. package/dist/test/unit/typecheck/type-system.test.js.map +1 -1
  243. package/package.json +41 -9
@@ -5,13 +5,12 @@
5
5
  */
6
6
  import { describe, it } from 'node:test';
7
7
  import assert from 'node:assert';
8
- import fs from 'node:fs';
9
- import path from 'node:path';
8
+ import { listTier3Bucket } from '@aster-cloud/aster-lang-test';
10
9
  import { canonicalize } from '../../../../src/frontend/canonicalizer.js';
11
10
  import { lex } from '../../../../src/frontend/lexer.js';
12
11
  import { ZH_CN } from '../../../../src/config/lexicons/zh-CN.js';
13
12
  import { EN_US } from '../../../../src/config/lexicons/en-US.js';
14
- import { LexiconRegistry, initializeDefaultLexicons } from '../../../../src/config/lexicons/index.js';
13
+ import { LexiconRegistry, initializeAllBundledLexicons } from '../../../../src/config/lexicons/index.js';
15
14
  import { SemanticTokenKind } from '../../../../src/config/token-kind.js';
16
15
  import { TokenKind } from '../../../../src/frontend/tokens.js';
17
16
  // ============================================================================
@@ -40,8 +39,9 @@ const countTokenKind = (tokens, kind) => tokens.filter((t) => t.kind === kind).l
40
39
  */
41
40
  const CROSS_LANG_IDENT_TOLERANCE = 15;
42
41
  describe('ZH_CN Lexicon 测试套件', () => {
43
- // 初始化注册表
44
- initializeDefaultLexicons();
42
+ // 该套件覆盖 zh-CN/en-US 两套词法表,必须显式注册全部内置语言
43
+ // initializeDefaultLexicons() 已收窄为仅 en-US,需要 initializeAllBundledLexicons()
44
+ initializeAllBundledLexicons();
45
45
  describe('Lexicon 注册与获取', () => {
46
46
  it('应成功注册中文词法表', () => {
47
47
  assert.ok(LexiconRegistry.has('zh-CN'));
@@ -62,8 +62,8 @@ describe('ZH_CN Lexicon 测试套件', () => {
62
62
  assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.IF], '如果');
63
63
  assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.OTHERWISE], '否则');
64
64
  assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.RETURN], '返回');
65
- assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.MATCH], '');
66
- assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.WHEN], '');
65
+ assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.MATCH], '匹配于');
66
+ assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.WHEN], '');
67
67
  });
68
68
  it('应正确映射类型定义关键字', () => {
69
69
  assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.TYPE_DEF], '定义');
@@ -72,13 +72,13 @@ describe('ZH_CN Lexicon 测试套件', () => {
72
72
  });
73
73
  it('应正确映射变量操作关键字', () => {
74
74
  assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.LET], '令');
75
- assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.BE], '');
75
+ assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.BE], '定义为');
76
76
  assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.SET], '将');
77
77
  });
78
78
  it('应正确映射布尔和null字面量', () => {
79
- assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.TRUE], '');
80
- assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.FALSE], '');
81
- assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.NULL], '');
79
+ assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.TRUE], '真值');
80
+ assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.FALSE], '假值');
81
+ assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.NULL], '空值');
82
82
  });
83
83
  it('应正确映射基础类型', () => {
84
84
  assert.strictEqual(ZH_CN.keywords[SemanticTokenKind.TEXT], '文本');
@@ -131,33 +131,36 @@ describe('ZH_CN Lexicon 测试套件', () => {
131
131
  assert.ok(letBe.contextualKeywords.includes(SemanticTokenKind.BE));
132
132
  assert.strictEqual(letBe.closer, 'NEWLINE');
133
133
  });
134
- it('应正确解析包含 若...为 和 为以下之一 的程序', () => {
134
+ it('应正确解析包含 匹配于...当 和 为以下之一 的程序(v2 关键字)', () => {
135
135
  const source = `模块 测试。
136
136
 
137
137
  定义 状态 为以下之一 成功、失败。
138
138
 
139
139
  规则 检查 包含 状态,产出 文本:
140
- 状态:
141
- 成功,返回 「成功」。
142
- 失败,返回 「失败」。`;
140
+ 匹配于 状态:
141
+ 成功,返回 「成功」。
142
+ 失败,返回 「失败」。`;
143
143
  const can = canonicalize(source, ZH_CN);
144
144
  const tokens = lex(can, ZH_CN);
145
145
  // 验证 "为以下之一" 作为单独 token
146
146
  const oneOfToken = findIdent(tokens, '为以下之一');
147
147
  assert.ok(oneOfToken, '应识别 "为以下之一" 为单独 token');
148
- // 验证 "" 作为 WHEN 关键词(多次出现)
149
- const whenCount = countIdent(tokens, '');
150
- assert.strictEqual(whenCount, 2, '应有 2 个 "" token');
151
- // 验证 "" 作为 MATCH 关键词
152
- const matchCount = countIdent(tokens, '');
153
- assert.strictEqual(matchCount, 1, '应有 1 个 "" token');
148
+ // 验证 "" 作为 WHEN 关键词(多次出现)
149
+ const whenCount = countIdent(tokens, '');
150
+ assert.strictEqual(whenCount, 2, '应有 2 个 "" token');
151
+ // 验证 "匹配于" 作为 MATCH 关键词
152
+ const matchCount = countIdent(tokens, '匹配于');
153
+ assert.strictEqual(matchCount, 1, '应有 1 个 "匹配于" token');
154
154
  });
155
155
  });
156
156
  describe('Canonicalizer 中文支持', () => {
157
- it('应保留中文标点', () => {
158
- const input = '令 变量 为 42。';
157
+ it('应将中文标点归一化为英文等价(v2 软边界)', () => {
158
+ // v2 行为:「。」→「.」、「:」→「:」、「,」「;」「、」→ 空格
159
+ // 仅字符串外生效(见 normalizeCJKPunctuation + ADR-0008)
160
+ const input = '令 变量 定义为 42。';
159
161
  const result = canonicalize(input, ZH_CN);
160
- assert.ok(result.includes('。'));
162
+ assert.strictEqual(result.includes('。'), false, '字符串外的「。」应被归一化为「.」');
163
+ assert.ok(result.includes('.'), 'canonical 结果应以英文句号结尾');
161
164
  });
162
165
  it('应全角数字转半角', () => {
163
166
  const input = '123';
@@ -192,16 +195,18 @@ describe('ZH_CN Lexicon 测试套件', () => {
192
195
  assert.ok(!result.includes('['), '全角左方括号应被移除');
193
196
  assert.ok(!result.includes(']'), '全角右方括号应被移除');
194
197
  });
195
- it('应移除英文冠词但保留中文', () => {
198
+ it('应移除英文冠词但中文不受冠词规则影响', () => {
196
199
  // 英文模式移除冠词
197
200
  const enInput = 'define the User has a name.';
198
201
  const enResult = canonicalize(enInput, EN_US);
199
202
  assert.ok(!enResult.includes(' the '));
200
203
  assert.ok(!enResult.includes(' a '));
201
- // 中文模式不移除冠词(中文没有冠词)
204
+ // 中文模式不应用冠词规则;标识符内容应保留
205
+ // 注意:v2 行为下中文句号会被归一化为英文句号(见 normalizeCJKPunctuation)
202
206
  const zhInput = '定义 用户 包含 名字。';
203
207
  const zhResult = canonicalize(zhInput, ZH_CN);
204
- assert.strictEqual(zhResult, zhInput);
208
+ assert.ok(zhResult.includes('定义 用户 包含 名字'), '中文标识符应保留');
209
+ assert.ok(zhResult.endsWith('.') || zhResult.endsWith('.\n'), '语句应以归一化的英文句号结尾');
205
210
  });
206
211
  it('应将智能引号转换为直角引号', () => {
207
212
  // 左右智能引号 → 直角引号
@@ -234,21 +239,21 @@ describe('ZH_CN Lexicon 测试套件', () => {
234
239
  assert.strictEqual(nonEofTokens[0].value, '变量名');
235
240
  });
236
241
  it('应识别中文布尔值', () => {
237
- const tokens = lex('', ZH_CN);
242
+ const tokens = lex('真值', ZH_CN);
238
243
  const nonEofTokens = tokens.filter((t) => t.kind !== TokenKind.EOF);
239
244
  assert.strictEqual(nonEofTokens.length, 1);
240
245
  assert.strictEqual(nonEofTokens[0].kind, TokenKind.BOOL);
241
246
  assert.strictEqual(nonEofTokens[0].value, true);
242
247
  });
243
248
  it('应识别中文 false', () => {
244
- const tokens = lex('', ZH_CN);
249
+ const tokens = lex('假值', ZH_CN);
245
250
  const nonEofTokens = tokens.filter((t) => t.kind !== TokenKind.EOF);
246
251
  assert.strictEqual(nonEofTokens.length, 1);
247
252
  assert.strictEqual(nonEofTokens[0].kind, TokenKind.BOOL);
248
253
  assert.strictEqual(nonEofTokens[0].value, false);
249
254
  });
250
255
  it('应识别中文 null', () => {
251
- const tokens = lex('', ZH_CN);
256
+ const tokens = lex('空值', ZH_CN);
252
257
  const nonEofTokens = tokens.filter((t) => t.kind !== TokenKind.EOF);
253
258
  assert.strictEqual(nonEofTokens.length, 1);
254
259
  assert.strictEqual(nonEofTokens[0].kind, TokenKind.NULL);
@@ -330,13 +335,13 @@ describe('ZH_CN Lexicon 测试套件', () => {
330
335
  assert.ok(findIdent(tokens, '否则'), '应识别「否则」标识符');
331
336
  assert.strictEqual(countIdent(tokens, '返回'), 2, '应有两个「返回」标识符');
332
337
  });
333
- it('应正确词法分析 若/为 模式匹配', () => {
334
- const input = ' 用户:\n 空,返回 「访客」。\n 用户(编号, 名字),返回 名字。';
338
+ it('应正确词法分析 匹配于/当 模式匹配(v2 关键字)', () => {
339
+ const input = '匹配于 用户:\n 空值,返回 「访客」。\n 用户(编号, 名字),返回 名字。';
335
340
  const result = canonicalize(input, ZH_CN);
336
341
  const tokens = lex(result, ZH_CN);
337
342
  // 验证模式匹配关键词
338
- assert.ok(findIdent(tokens, ''), '应识别「若」标识符');
339
- assert.strictEqual(countIdent(tokens, ''), 2, '应有两个「为」标识符');
343
+ assert.ok(findIdent(tokens, '匹配于'), '应识别「匹配于」标识符');
344
+ assert.strictEqual(countIdent(tokens, ''), 2, '应有两个「当」标识符');
340
345
  });
341
346
  it('应正确词法分析嵌套条件', () => {
342
347
  const input = '如果 甲:\n 如果 乙:\n 返回 「甲乙」。\n 否则:\n 返回 「仅甲」。\n否则:\n 返回 「无」。';
@@ -384,66 +389,66 @@ describe('ZH_CN Lexicon 测试套件', () => {
384
389
  assert.strictEqual(countTokenKind(tokens, TokenKind.DOT), 2, '应有2个句号');
385
390
  });
386
391
  });
387
- describe('中文布尔运算语法', () => {
388
- it('应正确词法分析「或」运算', () => {
389
- const input = '甲 乙。';
392
+ describe('中文布尔运算语法(v2 关键字)', () => {
393
+ it('应正确词法分析「或者」运算', () => {
394
+ const input = '甲 或者 乙。';
390
395
  const result = canonicalize(input, ZH_CN);
391
396
  const tokens = lex(result, ZH_CN);
392
- assert.ok(findIdent(tokens, ''), '应识别「或」标识符');
397
+ assert.ok(findIdent(tokens, '或者'), '应识别「或者」标识符');
393
398
  // Token 结构验证
394
399
  assert.strictEqual(countTokenKind(tokens, TokenKind.IDENT), 3, '应有3个标识符');
395
400
  assert.strictEqual(countTokenKind(tokens, TokenKind.DOT), 1, '应有1个句号');
396
401
  });
397
- it('应正确词法分析「且」运算', () => {
398
- const input = '甲 乙。';
402
+ it('应正确词法分析「并且」运算', () => {
403
+ const input = '甲 并且 乙。';
399
404
  const result = canonicalize(input, ZH_CN);
400
405
  const tokens = lex(result, ZH_CN);
401
- assert.ok(findIdent(tokens, ''), '应识别「且」标识符');
406
+ assert.ok(findIdent(tokens, '并且'), '应识别「并且」标识符');
402
407
  // Token 结构验证
403
408
  assert.strictEqual(countTokenKind(tokens, TokenKind.IDENT), 3, '应有3个标识符');
404
409
  });
405
- it('应正确词法分析「非」运算', () => {
406
- const input = ' 甲。';
410
+ it('应正确词法分析「不是」运算', () => {
411
+ const input = '不是 甲。';
407
412
  const result = canonicalize(input, ZH_CN);
408
413
  const tokens = lex(result, ZH_CN);
409
- assert.ok(findIdent(tokens, ''), '应识别「非」标识符');
414
+ assert.ok(findIdent(tokens, '不是'), '应识别「不是」标识符');
410
415
  // Token 结构验证
411
416
  assert.strictEqual(countTokenKind(tokens, TokenKind.IDENT), 2, '应有2个标识符');
412
417
  });
413
418
  it('应正确词法分析复合布尔表达式', () => {
414
- const input = '(甲 乙) ( 丙)。';
419
+ const input = '(甲 并且 乙) 或者 (不是 丙)。';
415
420
  const result = canonicalize(input, ZH_CN);
416
421
  const tokens = lex(result, ZH_CN);
417
- assert.strictEqual(countIdent(tokens, ''), 1, '应有1个「且」');
418
- assert.strictEqual(countIdent(tokens, ''), 1, '应有1个「或」');
419
- assert.strictEqual(countIdent(tokens, ''), 1, '应有1个「非」');
422
+ assert.strictEqual(countIdent(tokens, '并且'), 1, '应有1个「并且」');
423
+ assert.strictEqual(countIdent(tokens, '或者'), 1, '应有1个「或者」');
424
+ assert.strictEqual(countIdent(tokens, '不是'), 1, '应有1个「不是」');
420
425
  // Token 结构验证:括号
421
426
  assert.strictEqual(countTokenKind(tokens, TokenKind.LPAREN), 2, '应有2个左括号');
422
427
  assert.strictEqual(countTokenKind(tokens, TokenKind.RPAREN), 2, '应有2个右括号');
423
428
  });
424
429
  });
425
- describe('中文算术运算语法', () => {
426
- it('应正确词法分析「加」运算', () => {
427
- const input = '1 2。';
430
+ describe('中文算术运算语法(v2 关键字)', () => {
431
+ it('应正确词法分析「加上」运算', () => {
432
+ const input = '1 加上 2。';
428
433
  const result = canonicalize(input, ZH_CN);
429
434
  const tokens = lex(result, ZH_CN);
430
- assert.ok(findIdent(tokens, ''), '应识别「加」标识符');
435
+ assert.ok(findIdent(tokens, '加上'), '应识别「加上」标识符');
431
436
  // Token 结构验证:数字
432
437
  assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 2, '应有2个整数');
433
438
  });
434
- it('应正确词法分析「减」运算', () => {
435
- const input = '5 3。';
439
+ it('应正确词法分析「减去」运算', () => {
440
+ const input = '5 减去 3。';
436
441
  const result = canonicalize(input, ZH_CN);
437
442
  const tokens = lex(result, ZH_CN);
438
- assert.ok(findIdent(tokens, ''), '应识别「减」标识符');
443
+ assert.ok(findIdent(tokens, '减去'), '应识别「减去」标识符');
439
444
  // Token 结构验证
440
445
  assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 2, '应有2个整数');
441
446
  });
442
- it('应正确词法分析「乘」运算', () => {
443
- const input = '4 2。';
447
+ it('应正确词法分析「乘以」运算', () => {
448
+ const input = '4 乘以 2。';
444
449
  const result = canonicalize(input, ZH_CN);
445
450
  const tokens = lex(result, ZH_CN);
446
- assert.ok(findIdent(tokens, ''), '应识别「乘」标识符');
451
+ assert.ok(findIdent(tokens, '乘以'), '应识别「乘以」标识符');
447
452
  // Token 结构验证
448
453
  assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 2, '应有2个整数');
449
454
  });
@@ -456,73 +461,60 @@ describe('ZH_CN Lexicon 测试套件', () => {
456
461
  assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 2, '应有2个整数');
457
462
  });
458
463
  it('应正确词法分析复合算术表达式', () => {
459
- const input = '(1 2) (10 除以 5)。';
464
+ const input = '(1 加上 2) 乘以 (10 除以 5)。';
460
465
  const result = canonicalize(input, ZH_CN);
461
466
  const tokens = lex(result, ZH_CN);
462
- assert.strictEqual(countIdent(tokens, ''), 1, '应有1个「加」');
463
- assert.strictEqual(countIdent(tokens, ''), 1, '应有1个「乘」');
467
+ assert.strictEqual(countIdent(tokens, '加上'), 1, '应有1个「加上」');
468
+ assert.strictEqual(countIdent(tokens, '乘以'), 1, '应有1个「乘以」');
464
469
  assert.strictEqual(countIdent(tokens, '除以'), 1, '应有1个「除以」');
465
470
  // Token 结构验证
466
471
  assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 4, '应有4个整数');
467
472
  assert.strictEqual(countTokenKind(tokens, TokenKind.LPAREN), 2, '应有2个左括号');
468
473
  assert.strictEqual(countTokenKind(tokens, TokenKind.RPAREN), 2, '应有2个右括号');
469
474
  });
470
- // Codex Round 3 建议:新增 FLOAT 和 LONG 测试用例
471
475
  it('应正确词法分析浮点数运算', () => {
472
- const input = '1.5 2.5。';
476
+ const input = '1.5 加上 2.5。';
473
477
  const result = canonicalize(input, ZH_CN);
474
478
  const tokens = lex(result, ZH_CN);
475
- assert.ok(findIdent(tokens, ''), '应识别「加」标识符');
476
- // Token 结构验证:浮点数
479
+ assert.ok(findIdent(tokens, '加上'), '应识别「加上」标识符');
477
480
  assert.strictEqual(countTokenKind(tokens, TokenKind.FLOAT), 2, '应有2个浮点数');
478
- // 验证无整数(避免误解析)
479
481
  assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 0, '不应有整数');
480
482
  });
481
483
  it('应正确词法分析全角数字浮点数运算', () => {
482
- // 全角数字应被规范化为半角(注意:小数点为半角,全角句点未被规范化)
483
- const input = '3.14 乘 2.0。';
484
+ const input = '3.14 乘以 2.0。';
484
485
  const result = canonicalize(input, ZH_CN);
485
486
  const tokens = lex(result, ZH_CN);
486
- assert.ok(findIdent(tokens, ''), '应识别「乘」标识符');
487
- // Token 结构验证:全角数字转半角后产生浮点数
487
+ assert.ok(findIdent(tokens, '乘以'), '应识别「乘以」标识符');
488
488
  assert.strictEqual(countTokenKind(tokens, TokenKind.FLOAT), 2, '应有2个浮点数');
489
- // 验证规范化后的文本
490
489
  assert.ok(result.includes('3.14'), '全角3.14应规范化为半角3.14');
491
490
  });
492
491
  it('应正确词法分析长整数运算(大写 L)', () => {
493
- const input = '1000000000000L 500000000000L。';
492
+ const input = '1000000000000L 加上 500000000000L。';
494
493
  const result = canonicalize(input, ZH_CN);
495
494
  const tokens = lex(result, ZH_CN);
496
- assert.ok(findIdent(tokens, ''), '应识别「加」标识符');
497
- // Token 结构验证:长整数
495
+ assert.ok(findIdent(tokens, '加上'), '应识别「加上」标识符');
498
496
  assert.strictEqual(countTokenKind(tokens, TokenKind.LONG), 2, '应有2个长整数');
499
- // 验证无普通整数
500
497
  assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 0, '不应有普通整数');
501
498
  });
502
- // Codex Round 4 建议:补充小写 l 和全角 l 测试
503
499
  it('应正确词法分析长整数运算(小写 l)', () => {
504
- const input = '100l 50l。';
500
+ const input = '100l 减去 50l。';
505
501
  const result = canonicalize(input, ZH_CN);
506
502
  const tokens = lex(result, ZH_CN);
507
- assert.ok(findIdent(tokens, ''), '应识别「减」标识符');
508
- // Token 结构验证:小写 l 应被识别为长整数
503
+ assert.ok(findIdent(tokens, '减去'), '应识别「减去」标识符');
509
504
  assert.strictEqual(countTokenKind(tokens, TokenKind.LONG), 2, '应有2个长整数(小写l)');
510
505
  assert.strictEqual(countTokenKind(tokens, TokenKind.INT), 0, '不应有普通整数');
511
506
  });
512
507
  it('应正确词法分析长整数运算(全角 l)', () => {
513
- // 全角字母(U+FF4C) 应被规范化为半角 l
514
- const input = '100l 乘 2l。';
508
+ const input = '100乘以 2l。';
515
509
  const result = canonicalize(input, ZH_CN);
516
510
  const tokens = lex(result, ZH_CN);
517
- assert.ok(findIdent(tokens, ''), '应识别「乘」标识符');
518
- // Token 结构验证:全角 l 应被规范化后识别为长整数
511
+ assert.ok(findIdent(tokens, '乘以'), '应识别「乘以」标识符');
519
512
  assert.strictEqual(countTokenKind(tokens, TokenKind.LONG), 2, '应有2个长整数(全角l转半角后)');
520
- // Codex Round 5 建议:验证规范化输出(确保 canonicalizer 工作正常)
521
513
  assert.ok(result.includes('100l'), '全角 100l 应规范化为半角 100l');
522
514
  assert.ok(!result.includes('l'), '规范化后不应再含全角 l');
523
515
  });
524
516
  it('应正确词法分析混合数值类型表达式', () => {
525
- const input = '(1 1.5) 100L。';
517
+ const input = '(1 加上 1.5) 乘以 100L。';
526
518
  const result = canonicalize(input, ZH_CN);
527
519
  const tokens = lex(result, ZH_CN);
528
520
  // Token 结构验证:三种数值类型共存
@@ -542,22 +534,22 @@ describe('ZH_CN Lexicon 测试套件', () => {
542
534
  assert.strictEqual(countTokenKind(tokens, TokenKind.COLON), 1, '应有1个冒号');
543
535
  assert.strictEqual(countTokenKind(tokens, TokenKind.DOT), 1, '应有1个句号');
544
536
  });
545
- it('应正确词法分析嵌套循环', () => {
546
- const input = '对每个 行 表格:\n 对每个 列 行:\n 处理 列。';
537
+ it('应正确词法分析嵌套循环(v2: 属于)', () => {
538
+ const input = '对每个 行 属于 表格:\n 对每个 列 属于 行:\n 处理 列。';
547
539
  const result = canonicalize(input, ZH_CN);
548
540
  const tokens = lex(result, ZH_CN);
549
541
  assert.strictEqual(countIdent(tokens, '对每个'), 2, '应有2个「对每个」');
550
- assert.strictEqual(countIdent(tokens, ''), 2, '应有2个「在」');
542
+ assert.strictEqual(countIdent(tokens, '属于'), 2, '应有2个「属于」');
551
543
  // Token 结构验证
552
544
  assert.strictEqual(countTokenKind(tokens, TokenKind.COLON), 2, '应有2个冒号');
553
545
  assert.strictEqual(countTokenKind(tokens, TokenKind.DOT), 1, '应有1个句号');
554
546
  });
555
- it('应正确词法分析带条件的循环', () => {
556
- const input = '对每个 数字 数列:\n 如果 数字 大于 0:\n 累加 数字。';
547
+ it('应正确词法分析带条件的循环(v2: 属于)', () => {
548
+ const input = '对每个 数字 属于 数列:\n 如果 数字 大于 0:\n 累加 数字。';
557
549
  const result = canonicalize(input, ZH_CN);
558
550
  const tokens = lex(result, ZH_CN);
559
551
  assert.ok(findIdent(tokens, '对每个'), '应识别「对每个」标识符');
560
- assert.ok(findIdent(tokens, ''), '应识别「在」标识符');
552
+ assert.ok(findIdent(tokens, '属于'), '应识别「属于」标识符');
561
553
  assert.ok(findIdent(tokens, '如果'), '应识别「如果」标识符');
562
554
  assert.ok(findIdent(tokens, '大于'), '应识别「大于」标识符');
563
555
  // Token 结构验证
@@ -577,12 +569,12 @@ describe('ZH_CN Lexicon 测试套件', () => {
577
569
  assert.strictEqual(countTokenKind(tokens, TokenKind.COLON), 1, '应有1个冒号');
578
570
  assert.strictEqual(countTokenKind(tokens, TokenKind.DOT), 2, '应有2个句号');
579
571
  });
580
- it('应正确词法分析带依赖的步骤', () => {
581
- const input = '流程 构建:\n 步骤 编译 依赖 下载。\n 步骤 下载。';
572
+ it('应正确词法分析带依赖的步骤(v2: 基于)', () => {
573
+ const input = '流程 构建:\n 步骤 编译 依赖 基于 下载。\n 步骤 下载。';
582
574
  const result = canonicalize(input, ZH_CN);
583
575
  const tokens = lex(result, ZH_CN);
584
576
  assert.ok(findIdent(tokens, '依赖'), '应识别「依赖」标识符');
585
- assert.ok(findIdent(tokens, ''), '应识别「于」标识符');
577
+ assert.ok(findIdent(tokens, '基于'), '应识别「基于」标识符');
586
578
  });
587
579
  it('应正确词法分析带补偿的步骤', () => {
588
580
  const input = '步骤 扣款 补偿 退款。';
@@ -636,31 +628,30 @@ describe('ZH_CN Lexicon 测试套件', () => {
636
628
  assert.ok(findIdent(tokens, '产出'), '应识别「产出」标识符');
637
629
  assert.ok(findIdent(tokens, '返回'), '应识别「返回」标识符');
638
630
  });
639
- it('应正确词法分析变量绑定', () => {
640
- const input = '令 结果 计算(42)。';
631
+ it('应正确词法分析变量绑定(v2: 定义为)', () => {
632
+ const input = '令 结果 定义为 计算(42)。';
641
633
  const result = canonicalize(input, ZH_CN);
642
634
  const tokens = lex(result, ZH_CN);
643
635
  assert.ok(findIdent(tokens, '令'), '应识别「令」标识符');
644
- assert.ok(findIdent(tokens, ''), '应识别「为」标识符');
636
+ assert.ok(findIdent(tokens, '定义为'), '应识别「定义为」标识符');
645
637
  });
646
- it('应正确词法分析完整中文程序', () => {
638
+ it('应正确词法分析完整中文程序(v2 关键字)', () => {
647
639
  const program = `模块 应用。
648
640
 
649
641
  定义 用户 包含 编号:文本,名字:文本。
650
642
 
651
643
  问候 入参 用户:可选 用户,产出 文本:
652
- 用户:
653
- 空,返回 「你好,访客」。
654
- 用户(编号, 名字),返回 「欢迎,」加 名字。`;
644
+ 匹配于 用户:
645
+ 空值,返回 「你好,访客」。
646
+ 用户(编号, 名字),返回 「欢迎,」加上 名字。`;
655
647
  const result = canonicalize(program, ZH_CN);
656
648
  const tokens = lex(result, ZH_CN);
657
649
  // 验证关键词精确出现次数(在 lexer 阶段为 IDENT)
658
- // Codex 审查建议:使用精确断言替代宽松的 >= 判断
659
650
  assert.strictEqual(countIdent(tokens, '包含'), 1, '应有1个「包含」');
660
651
  assert.strictEqual(countIdent(tokens, '入参'), 1, '应有1个「入参」');
661
652
  assert.strictEqual(countIdent(tokens, '产出'), 1, '应有1个「产出」');
662
- assert.strictEqual(countIdent(tokens, ''), 1, '应有1个「若」');
663
- assert.strictEqual(countIdent(tokens, ''), 2, '应有2个「为」');
653
+ assert.strictEqual(countIdent(tokens, '匹配于'), 1, '应有1个「匹配于」');
654
+ assert.strictEqual(countIdent(tokens, ''), 2, '应有2个「当」');
664
655
  assert.strictEqual(countIdent(tokens, '返回'), 2, '应有2个「返回」');
665
656
  assert.strictEqual(countIdent(tokens, '可选'), 1, '应有1个「可选」');
666
657
  });
@@ -676,15 +667,15 @@ Rule greet given user: User?, produce Text:
676
667
  Match user:
677
668
  When null, Return "Hi, guest".
678
669
  When User(id, name, 42), Return "Welcome, " plus name.`;
679
- // 对应的中文程序(Codex Round 6 建议:添加数字字面量以验证 INT 统计)
670
+ // 对应的中文程序(v2 关键字:匹配于/当/加上/空值)
680
671
  const zhProgram = `模块 应用。
681
672
 
682
673
  定义 用户 包含 编号:文本,名字:文本,年龄:整数。
683
674
 
684
675
  问候 入参 用户:可选 用户,产出 文本:
685
- 用户:
686
- 空,返回 「你好,访客」。
687
- 用户(编号, 名字, 42),返回 「欢迎,」加 名字。`;
676
+ 匹配于 用户:
677
+ 空值,返回 「你好,访客」。
678
+ 用户(编号, 名字, 42),返回 「欢迎,」加上 名字。`;
688
679
  const enTokens = lex(canonicalize(enProgram, EN_US), EN_US);
689
680
  const zhTokens = lex(canonicalize(zhProgram, ZH_CN), ZH_CN);
690
681
  // 获取 token 类型分布(Codex 审查建议:拆分更多 Token 类型以细化比较)
@@ -763,9 +754,9 @@ Rule greet given user: User?, produce Text:
763
754
  const enFalseTokens = lex('false', EN_US);
764
755
  assert.ok(enTrueTokens.some((t) => t.kind === TokenKind.BOOL && t.value === true));
765
756
  assert.ok(enFalseTokens.some((t) => t.kind === TokenKind.BOOL && t.value === false));
766
- // 中文
767
- const zhTrueTokens = lex('', ZH_CN);
768
- const zhFalseTokens = lex('', ZH_CN);
757
+ // 中文(v2 关键字:真值 / 假值)
758
+ const zhTrueTokens = lex('真值', ZH_CN);
759
+ const zhFalseTokens = lex('假值', ZH_CN);
769
760
  assert.ok(zhTrueTokens.some((t) => t.kind === TokenKind.BOOL && t.value === true));
770
761
  assert.ok(zhFalseTokens.some((t) => t.kind === TokenKind.BOOL && t.value === false));
771
762
  });
@@ -773,8 +764,8 @@ Rule greet given user: User?, produce Text:
773
764
  // 英文
774
765
  const enNullTokens = lex('null', EN_US);
775
766
  assert.ok(enNullTokens.some((t) => t.kind === TokenKind.NULL && t.value === null));
776
- // 中文
777
- const zhNullTokens = lex('', ZH_CN);
767
+ // 中文(v2 关键字:空值)
768
+ const zhNullTokens = lex('空值', ZH_CN);
778
769
  assert.ok(zhNullTokens.some((t) => t.kind === TokenKind.NULL && t.value === null));
779
770
  });
780
771
  });
@@ -800,9 +791,9 @@ Rule greet given user: User?, produce Text:
800
791
  it('切换默认后 lex 应使用新默认', () => {
801
792
  const originalDefault = LexiconRegistry.getDefault();
802
793
  try {
803
- // 切换到中文并测试中文布尔值
794
+ // 切换到中文并测试中文布尔值(v2 关键字:真值)
804
795
  LexiconRegistry.setDefault('zh-CN');
805
- const zhTokens = lex('');
796
+ const zhTokens = lex('真值');
806
797
  const zhBool = zhTokens.find((t) => t.kind === TokenKind.BOOL);
807
798
  assert.ok(zhBool, '应识别中文布尔值');
808
799
  assert.strictEqual(zhBool?.value, true);
@@ -817,14 +808,24 @@ Rule greet given user: User?, produce Text:
817
808
  // 中文 CNL 文件解析测试
818
809
  // ============================================================================
819
810
  describe('中文 CNL 文件解析', () => {
820
- // 使用项目根目录解析,因为 .aster 文件不会被编译到 dist
821
- const zhCNDir = path.resolve(process.cwd(), 'test/cnl/programs/zh-CN');
811
+ // 索引 corpus 中所有 source 起自 aster-lang-ts/test/cnl/programs/zh-CN/* 的样本
812
+ const zhCNCorpusIndex = new Map();
813
+ for (const sample of listTier3Bucket('lexicon-i18n')) {
814
+ const src = sample.meta.source;
815
+ if (!src)
816
+ continue;
817
+ const m = /aster-lang-ts\/test\/cnl\/programs\/zh-CN\/([^/]+\.aster)$/.exec(src);
818
+ if (m && m[1])
819
+ zhCNCorpusIndex.set(m[1], sample);
820
+ }
822
821
  /**
823
- * 读取中文 CNL 文件并进行规范化和词法分析
822
+ * 读取中文 CNL 文件并进行规范化和词法分析(共享 corpus)
824
823
  */
825
824
  const parseZhCNFile = (filename) => {
826
- const filePath = path.join(zhCNDir, filename);
827
- const source = fs.readFileSync(filePath, 'utf-8');
825
+ const sample = zhCNCorpusIndex.get(filename);
826
+ if (!sample)
827
+ throw new Error(`corpus sample not found: zh-CN/${filename}`);
828
+ const source = sample.readSource();
828
829
  const canonical = canonicalize(source, ZH_CN);
829
830
  const tokens = lex(canonical, ZH_CN);
830
831
  return { source, canonical, tokens };
@@ -841,7 +842,7 @@ Rule greet given user: User?, produce Text:
841
842
  const stringTokens = tokens.filter((t) => t.kind === TokenKind.STRING);
842
843
  assert.ok(stringTokens.length > 0, '应有字符串 token');
843
844
  });
844
- it('loan_decision.aster 应正确解析', () => {
845
+ it('loan_decision.aster 应正确解析(v2 关键字)', () => {
845
846
  const { tokens } = parseZhCNFile('loan_decision.aster');
846
847
  // 验证类型定义关键词(定义 作为纯关键字直接解析为 IDENT)
847
848
  assert.ok(findIdent(tokens, '定义'), '应有 定义 关键词');
@@ -849,12 +850,12 @@ Rule greet given user: User?, produce Text:
849
850
  // 验证控制流关键词
850
851
  assert.ok(findIdent(tokens, '如果'), '应有 如果 关键词');
851
852
  assert.ok(findIdent(tokens, '返回'), '应有 返回 关键词');
852
- // 验证变量绑定
853
+ // 验证变量绑定(v2: 定义为)
853
854
  assert.ok(findIdent(tokens, '令'), '应有 令 关键词');
854
- assert.ok(findIdent(tokens, ''), '应有 关键词');
855
- // 验证布尔值
855
+ assert.ok(findIdent(tokens, '定义为'), '应有 定义为 关键词(v2 BE)');
856
+ // 验证布尔值(v2: 真值/假值)
856
857
  const boolTokens = tokens.filter((t) => t.kind === TokenKind.BOOL);
857
- assert.ok(boolTokens.length >= 2, '应有多个布尔值(真/假)');
858
+ assert.ok(boolTokens.length >= 2, '应有多个布尔值(真值/假值)');
858
859
  // 验证整数
859
860
  const intTokens = tokens.filter((t) => t.kind === TokenKind.INT);
860
861
  assert.ok(intTokens.length >= 2, '应有整数字面量(18, 100000)');
@@ -873,40 +874,46 @@ Rule greet given user: User?, produce Text:
873
874
  const stringTokens = tokens.filter((t) => t.kind === TokenKind.STRING);
874
875
  assert.ok(stringTokens.length >= 3, '应有字符串(「申请人未满18岁」等)');
875
876
  });
876
- it('user_greeting.aster 应正确解析', () => {
877
+ it('user_greeting.aster 应正确解析(v2 关键字)', () => {
877
878
  const { tokens } = parseZhCNFile('user_greeting.aster');
878
- // 验证模式匹配关键词
879
- assert.ok(findIdent(tokens, ''), '应有 关键词(模式匹配)');
880
- assert.ok(findIdent(tokens, ''), '应有 关键词(when)');
879
+ // 验证模式匹配关键词(v2: 匹配于/当)
880
+ assert.ok(findIdent(tokens, '匹配于'), '应有 匹配于 关键词(v2 MATCH)');
881
+ assert.ok(findIdent(tokens, ''), '应有 关键词(v2 WHEN)');
881
882
  // 可选类型已改为推断,应不再显式出现
882
883
  assert.ok(!findIdent(tokens, '可选'), '不应显式出现 可选 关键词');
883
- // 验证 null
884
+ // 验证 null 值(v2: 空值)
884
885
  const nullTokens = tokens.filter((t) => t.kind === TokenKind.NULL);
885
- assert.ok(nullTokens.length > 0, '应有 token');
886
+ assert.ok(nullTokens.length > 0, '应有 空值 token');
886
887
  });
887
- it('arithmetic.aster 应正确解析', () => {
888
+ it('arithmetic.aster 应正确解析(v2 关键字)', () => {
888
889
  const { tokens } = parseZhCNFile('arithmetic.aster');
889
- // 验证算术运算关键词
890
- assert.ok(findIdent(tokens, ''), '应有 关键词');
891
- assert.ok(findIdent(tokens, ''), '应有 关键词');
892
- assert.ok(findIdent(tokens, ''), '应有 关键词');
890
+ // 验证算术运算关键词(v2: 加上/减去/乘以/除以)
891
+ assert.ok(findIdent(tokens, '加上'), '应有 加上 关键词(v2 PLUS)');
892
+ assert.ok(findIdent(tokens, '减去'), '应有 减去 关键词(v2 MINUS)');
893
+ assert.ok(findIdent(tokens, '乘以'), '应有 乘以 关键词(v2 TIMES)');
893
894
  assert.ok(findIdent(tokens, '除以'), '应有 除以 关键词');
894
- // 验证变量绑定
895
+ // 验证变量绑定(v2: 定义为)
895
896
  const letCount = countIdent(tokens, '令');
896
- const beCount = countIdent(tokens, '');
897
+ const beCount = countIdent(tokens, '定义为');
897
898
  assert.ok(letCount >= 2, '应有多个 令 关键词');
898
- assert.ok(beCount >= 2, '应有多个 关键词');
899
+ assert.ok(beCount >= 2, '应有多个 定义为 关键词(v2 BE)');
899
900
  });
900
- it('所有中文 CNL 文件应成功规范化', () => {
901
- const files = fs.readdirSync(zhCNDir).filter((f) => f.endsWith('.aster'));
901
+ it('所有中文 CNL 文件应成功规范化(v2 软边界)', () => {
902
+ const files = [...zhCNCorpusIndex.keys()];
902
903
  assert.ok(files.length >= 4, '应有至少 4 个 .aster 文件');
903
904
  for (const file of files) {
904
905
  const { canonical } = parseZhCNFile(file);
905
906
  // 验证规范化后不包含智能引号
906
907
  assert.ok(!canonical.includes('"'), `${file} 不应包含左智能引号`);
907
908
  assert.ok(!canonical.includes('"'), `${file} 不应包含右智能引号`);
908
- // 验证规范化后包含中文标点
909
- assert.ok(canonical.includes('。'), `${file} 应包含中文句号`);
909
+ // v2 行为:中文标点被归一化为英文等价(字符串外)
910
+ // 字符串内的中文标点保留;用 lexicon 引号「」分段后验证字符串外
911
+ const stringSegments = canonical.split(/[「」]/);
912
+ const outside = stringSegments.filter((_, i) => i % 2 === 0).join('');
913
+ assert.strictEqual(outside.includes('。'), false, `${file} 字符串外不应有「。」`);
914
+ assert.strictEqual(outside.includes(','), false, `${file} 字符串外不应有「,」`);
915
+ // canonical 应至少包含一个英文句号(语句终止)
916
+ assert.ok(canonical.includes('.'), `${file} 应包含归一化后的英文句号`);
910
917
  }
911
918
  });
912
919
  it('中文 CNL 文件的 token 分布应合理', () => {