@c4a/server-cli 0.4.15-alpha.6 → 0.4.15-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -220991,6 +220991,12 @@ class LlmServiceImpl {
220991
220991
  if (options?.systemPrompt) {
220992
220992
  callSettings.system = options.systemPrompt;
220993
220993
  }
220994
+ if (options?.jsonMode) {
220995
+ callSettings.providerOptions = {
220996
+ openai: { responseFormat: { type: "json_object" } },
220997
+ anthropic: { responseFormat: { type: "json_object" } }
220998
+ };
220999
+ }
220994
221000
  const result = await retry(() => generateText(callSettings), {
220995
221001
  shouldRetry: (error40) => isRetryableStatus(getHttpStatus(error40)) || isTimeoutError2(error40)
220996
221002
  });
@@ -221038,6 +221044,12 @@ class LlmServiceImpl {
221038
221044
  if (options?.systemPrompt) {
221039
221045
  callSettings.system = options.systemPrompt;
221040
221046
  }
221047
+ if (options?.jsonMode) {
221048
+ callSettings.providerOptions = {
221049
+ openai: { responseFormat: { type: "json_object" } },
221050
+ anthropic: { responseFormat: { type: "json_object" } }
221051
+ };
221052
+ }
221041
221053
  const result = await retry(() => Promise.resolve(streamText(callSettings)), {
221042
221054
  shouldRetry: (error40) => isRetryableStatus(getHttpStatus(error40)) || isTimeoutError2(error40)
221043
221055
  });
@@ -221856,6 +221868,7 @@ function parseExtractionOutput(raw, schema) {
221856
221868
  }
221857
221869
  parsed = normalizeFlatOutput(parsed);
221858
221870
  parsed = stripNulls(parsed);
221871
+ parsed = patchAttributeDefaults(parsed);
221859
221872
  const result = schema.safeParse(parsed);
221860
221873
  if (!result.success) {
221861
221874
  return { success: false, error: result.error };
@@ -221923,7 +221936,37 @@ function tryParseJson(raw) {
221923
221936
  return repairAndParse(raw);
221924
221937
  }
221925
221938
  function repairAndParse(raw) {
221926
- return JSON.parse(jsonrepair(raw));
221939
+ try {
221940
+ return JSON.parse(jsonrepair(raw));
221941
+ } catch {
221942
+ const truncated = truncateToLastCompleteEntry(raw);
221943
+ if (truncated) {
221944
+ return JSON.parse(jsonrepair(truncated));
221945
+ }
221946
+ throw new Error(`JSON repair failed for output of length ${raw.length}`);
221947
+ }
221948
+ }
221949
+ function truncateToLastCompleteEntry(raw) {
221950
+ let text2 = raw.trim();
221951
+ if (text2.startsWith("```")) {
221952
+ const firstNewline = text2.indexOf(`
221953
+ `);
221954
+ text2 = text2.slice(firstNewline + 1);
221955
+ }
221956
+ if (text2.endsWith("```")) {
221957
+ text2 = text2.slice(0, -3);
221958
+ }
221959
+ const pattern = /\}\s*,\s*"P\d+"/g;
221960
+ let lastMatch = null;
221961
+ let m;
221962
+ while ((m = pattern.exec(text2)) !== null) {
221963
+ lastMatch = m;
221964
+ }
221965
+ if (!lastMatch)
221966
+ return null;
221967
+ const cutPos = lastMatch.index + 1;
221968
+ const truncated = text2.slice(0, cutPos) + "}";
221969
+ return truncated;
221927
221970
  }
221928
221971
  var PARAGRAPH_TAG_RE = /^P\d+$/;
221929
221972
  function normalizeFlatOutput(parsed) {
@@ -221956,6 +221999,43 @@ function stripNulls(value) {
221956
221999
  }
221957
222000
  return value;
221958
222001
  }
222002
+ function patchAttributeDefaults(value) {
222003
+ if (!value || typeof value !== "object" || Array.isArray(value))
222004
+ return value;
222005
+ const obj = value;
222006
+ if ("paragraphs" in obj && Array.isArray(obj.paragraphs)) {
222007
+ return {
222008
+ ...obj,
222009
+ paragraphs: obj.paragraphs.map((p) => {
222010
+ if (!p || typeof p !== "object")
222011
+ return p;
222012
+ const para = p;
222013
+ const atoms2 = para.atoms;
222014
+ if (!atoms2 || typeof atoms2 !== "object")
222015
+ return p;
222016
+ return { ...para, atoms: patchAttrsInAtoms(atoms2) };
222017
+ })
222018
+ };
222019
+ }
222020
+ return value;
222021
+ }
222022
+ function patchAttrsInAtoms(atoms2) {
222023
+ const attrs = atoms2.attributes;
222024
+ if (!Array.isArray(attrs))
222025
+ return atoms2;
222026
+ return {
222027
+ ...atoms2,
222028
+ attributes: attrs.map((attr) => {
222029
+ if (!attr || typeof attr !== "object" || Array.isArray(attr))
222030
+ return attr;
222031
+ const a = attr;
222032
+ if (typeof a.type !== "string" || a.type === "") {
222033
+ return { ...a, type: "other" };
222034
+ }
222035
+ return attr;
222036
+ })
222037
+ };
222038
+ }
221959
222039
  function isRecord(value) {
221960
222040
  return !!value && typeof value === "object" && "key" in value && "value" in value && typeof value.key === "string";
221961
222041
  }
@@ -222268,6 +222348,7 @@ Return a single JSON object keyed by paragraph tags. Only include paragraphs tha
222268
222348
  - **Enum fields MUST use ONLY the listed values.** For example, entity.kind must be one of "implementation"|"external"|"concept" — do NOT use values from other atom types (e.g., do NOT put "team" or "human" in entity.kind; those belong to roles.kind).
222269
222349
  - Every atom MUST include a "confidence" field (0.0-1.0) indicating how confident you are in the extraction. Use higher values (0.85-1.0) for explicitly stated facts and lower values (0.5-0.7) for inferred or ambiguous information.
222270
222350
  - **Classify correctly:** People, teams, and personas → "roles" (not "entities"). Technical systems, services, modules → "entities".
222351
+ - **Entity naming — extract the subject, not the document:** Entity names should represent the actual system/product/framework being described, not the document itself. If the text says "Next.js is a React framework", the entity is "Next.js" — not "Next.js 官方文档" or "Next.js API 参考". Use names that work as standalone knowledge graph nodes, independent of any document context.
222271
222352
  - **Entity reference consistency (CRITICAL):** Every entity name referenced in relation.from, relation.to, behavior.subject, or any other cross-reference field MUST also appear in the "entities" array of the SAME paragraph (or a preceding paragraph in the same chunk). If an entity is mentioned for the first time in a relation, you MUST also extract it as an entity. This ensures no "dangling references" — every name used in relations has a corresponding entity declaration.
222272
222353
  - **Cross-atom reference consistency:** transitions[].from and transitions[].to values MUST exist in states[].values of the same entity. roles[].performs values MUST match names declared in behaviors[].name.
222273
222354
  - **Constraints vs rules distinction:** Use "constraints" for unconditional declarative mandates ('X must Y'). Use "rules" for conditional logic ('IF X THEN Y'). Do not mix them — a requirement with no condition is a constraint, a requirement triggered by a condition is a rule. Do NOT invent a rule for every constraint — only create a rule when the text explicitly states conditional logic.
@@ -222327,13 +222408,14 @@ var ENTITY_RESOLUTION_SYSTEM_PROMPT = `You are an entity resolution assistant. Y
222327
222408
  - Prefer the LONGER, more descriptive name as the canonical name
222328
222409
  - Do NOT merge names that share a substring but refer to different things
222329
222410
  - When uncertain, do NOT merge — add to "ambiguous" instead
222330
- - Chinese and English names for the same entity SHOULD be merged (e.g. "Vmok" → "Vmok 微模块框架")
222331
- - Abbreviations should be merged with their full forms (e.g. "AGW" → "API Gateway")
222411
+ - Chinese and English names for the same entity SHOULD be merged (e.g. "Webpack" → "Webpack 打包工具")
222412
+ - Abbreviations should be merged with their full forms (e.g. "K8s" → "Kubernetes")
222413
+ - Document-descriptive names (containing "文档", "参考", "指南" etc.) should be merged to the actual product/system name. The entity is the product, not the document about it
222332
222414
 
222333
222415
  ## Task 2: Remove Noise
222334
222416
  - Apply the **identity test**: a real entity is something you can discuss independently ("What is X?", "How does X work?", "Who owns X?"). Names that fail this test — values, addresses, actions, generic descriptions — are noise.
222335
222417
  - Remove names that are NOT meaningful named entities: generic words, action descriptions, or things that are attributes/values rather than independent subjects
222336
- - Examples of REAL entities to KEEP: product names (TTAstra, Gulux), tools (nvm, Rush), services (Op Main 服务), platforms (AGW 平台) — these all pass the identity test
222418
+ - Examples of REAL entities to KEEP: product names (Next.js, Vite), tools (nvm, Turborepo), services (Nginx, Redis), platforms (Kubernetes, Docker) — these all pass the identity test
222337
222419
  - When uncertain, KEEP the name — only remove if it clearly fails the identity test
222338
222420
 
222339
222421
  ## Output
@@ -222674,7 +222756,7 @@ Return ONLY a valid JSON object. No markdown fences, no explanation.`;
222674
222756
  }
222675
222757
  var DOC_DIAGRAM_ANNOTATION_SYSTEM_PROMPT = DIAGRAM_SYSTEM_PROMPT;
222676
222758
  // ../llm/src/chunking/markdownChunker.ts
222677
- var DEFAULT_MAX_TOKENS2 = 4000;
222759
+ var DEFAULT_MAX_TOKENS2 = 3600;
222678
222760
  var DEFAULT_PARAGRAPH_MAX_TOKENS = 500;
222679
222761
  function estimateTokens(text2) {
222680
222762
  return Math.ceil(text2.length / 4);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@c4a/server-cli",
3
- "version": "0.4.15-alpha.6",
3
+ "version": "0.4.15-alpha.7",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "c4a-server": "./index.js"
package/serve.js CHANGED
@@ -194996,6 +194996,12 @@ class LlmServiceImpl {
194996
194996
  if (options?.systemPrompt) {
194997
194997
  callSettings.system = options.systemPrompt;
194998
194998
  }
194999
+ if (options?.jsonMode) {
195000
+ callSettings.providerOptions = {
195001
+ openai: { responseFormat: { type: "json_object" } },
195002
+ anthropic: { responseFormat: { type: "json_object" } }
195003
+ };
195004
+ }
194999
195005
  const result = await retry(() => generateText(callSettings), {
195000
195006
  shouldRetry: (error40) => isRetryableStatus(getHttpStatus(error40)) || isTimeoutError2(error40)
195001
195007
  });
@@ -195043,6 +195049,12 @@ class LlmServiceImpl {
195043
195049
  if (options?.systemPrompt) {
195044
195050
  callSettings.system = options.systemPrompt;
195045
195051
  }
195052
+ if (options?.jsonMode) {
195053
+ callSettings.providerOptions = {
195054
+ openai: { responseFormat: { type: "json_object" } },
195055
+ anthropic: { responseFormat: { type: "json_object" } }
195056
+ };
195057
+ }
195046
195058
  const result = await retry(() => Promise.resolve(streamText(callSettings)), {
195047
195059
  shouldRetry: (error40) => isRetryableStatus(getHttpStatus(error40)) || isTimeoutError2(error40)
195048
195060
  });
@@ -195861,6 +195873,7 @@ function parseExtractionOutput(raw5, schema2) {
195861
195873
  }
195862
195874
  parsed = normalizeFlatOutput(parsed);
195863
195875
  parsed = stripNulls(parsed);
195876
+ parsed = patchAttributeDefaults(parsed);
195864
195877
  const result = schema2.safeParse(parsed);
195865
195878
  if (!result.success) {
195866
195879
  return { success: false, error: result.error };
@@ -195928,7 +195941,37 @@ function tryParseJson(raw5) {
195928
195941
  return repairAndParse(raw5);
195929
195942
  }
195930
195943
  function repairAndParse(raw5) {
195931
- return JSON.parse(jsonrepair(raw5));
195944
+ try {
195945
+ return JSON.parse(jsonrepair(raw5));
195946
+ } catch {
195947
+ const truncated = truncateToLastCompleteEntry(raw5);
195948
+ if (truncated) {
195949
+ return JSON.parse(jsonrepair(truncated));
195950
+ }
195951
+ throw new Error(`JSON repair failed for output of length ${raw5.length}`);
195952
+ }
195953
+ }
195954
+ function truncateToLastCompleteEntry(raw5) {
195955
+ let text2 = raw5.trim();
195956
+ if (text2.startsWith("```")) {
195957
+ const firstNewline = text2.indexOf(`
195958
+ `);
195959
+ text2 = text2.slice(firstNewline + 1);
195960
+ }
195961
+ if (text2.endsWith("```")) {
195962
+ text2 = text2.slice(0, -3);
195963
+ }
195964
+ const pattern = /\}\s*,\s*"P\d+"/g;
195965
+ let lastMatch = null;
195966
+ let m;
195967
+ while ((m = pattern.exec(text2)) !== null) {
195968
+ lastMatch = m;
195969
+ }
195970
+ if (!lastMatch)
195971
+ return null;
195972
+ const cutPos = lastMatch.index + 1;
195973
+ const truncated = text2.slice(0, cutPos) + "}";
195974
+ return truncated;
195932
195975
  }
195933
195976
  var PARAGRAPH_TAG_RE = /^P\d+$/;
195934
195977
  function normalizeFlatOutput(parsed) {
@@ -195961,6 +196004,43 @@ function stripNulls(value) {
195961
196004
  }
195962
196005
  return value;
195963
196006
  }
196007
+ function patchAttributeDefaults(value) {
196008
+ if (!value || typeof value !== "object" || Array.isArray(value))
196009
+ return value;
196010
+ const obj = value;
196011
+ if ("paragraphs" in obj && Array.isArray(obj.paragraphs)) {
196012
+ return {
196013
+ ...obj,
196014
+ paragraphs: obj.paragraphs.map((p4) => {
196015
+ if (!p4 || typeof p4 !== "object")
196016
+ return p4;
196017
+ const para = p4;
196018
+ const atoms2 = para.atoms;
196019
+ if (!atoms2 || typeof atoms2 !== "object")
196020
+ return p4;
196021
+ return { ...para, atoms: patchAttrsInAtoms(atoms2) };
196022
+ })
196023
+ };
196024
+ }
196025
+ return value;
196026
+ }
196027
+ function patchAttrsInAtoms(atoms2) {
196028
+ const attrs = atoms2.attributes;
196029
+ if (!Array.isArray(attrs))
196030
+ return atoms2;
196031
+ return {
196032
+ ...atoms2,
196033
+ attributes: attrs.map((attr) => {
196034
+ if (!attr || typeof attr !== "object" || Array.isArray(attr))
196035
+ return attr;
196036
+ const a = attr;
196037
+ if (typeof a.type !== "string" || a.type === "") {
196038
+ return { ...a, type: "other" };
196039
+ }
196040
+ return attr;
196041
+ })
196042
+ };
196043
+ }
195964
196044
  function isRecord(value) {
195965
196045
  return !!value && typeof value === "object" && "key" in value && "value" in value && typeof value.key === "string";
195966
196046
  }
@@ -196273,6 +196353,7 @@ Return a single JSON object keyed by paragraph tags. Only include paragraphs tha
196273
196353
  - **Enum fields MUST use ONLY the listed values.** For example, entity.kind must be one of "implementation"|"external"|"concept" — do NOT use values from other atom types (e.g., do NOT put "team" or "human" in entity.kind; those belong to roles.kind).
196274
196354
  - Every atom MUST include a "confidence" field (0.0-1.0) indicating how confident you are in the extraction. Use higher values (0.85-1.0) for explicitly stated facts and lower values (0.5-0.7) for inferred or ambiguous information.
196275
196355
  - **Classify correctly:** People, teams, and personas → "roles" (not "entities"). Technical systems, services, modules → "entities".
196356
+ - **Entity naming — extract the subject, not the document:** Entity names should represent the actual system/product/framework being described, not the document itself. If the text says "Next.js is a React framework", the entity is "Next.js" — not "Next.js 官方文档" or "Next.js API 参考". Use names that work as standalone knowledge graph nodes, independent of any document context.
196276
196357
  - **Entity reference consistency (CRITICAL):** Every entity name referenced in relation.from, relation.to, behavior.subject, or any other cross-reference field MUST also appear in the "entities" array of the SAME paragraph (or a preceding paragraph in the same chunk). If an entity is mentioned for the first time in a relation, you MUST also extract it as an entity. This ensures no "dangling references" — every name used in relations has a corresponding entity declaration.
196277
196358
  - **Cross-atom reference consistency:** transitions[].from and transitions[].to values MUST exist in states[].values of the same entity. roles[].performs values MUST match names declared in behaviors[].name.
196278
196359
  - **Constraints vs rules distinction:** Use "constraints" for unconditional declarative mandates ('X must Y'). Use "rules" for conditional logic ('IF X THEN Y'). Do not mix them — a requirement with no condition is a constraint, a requirement triggered by a condition is a rule. Do NOT invent a rule for every constraint — only create a rule when the text explicitly states conditional logic.
@@ -196332,13 +196413,14 @@ var ENTITY_RESOLUTION_SYSTEM_PROMPT = `You are an entity resolution assistant. Y
196332
196413
  - Prefer the LONGER, more descriptive name as the canonical name
196333
196414
  - Do NOT merge names that share a substring but refer to different things
196334
196415
  - When uncertain, do NOT merge — add to "ambiguous" instead
196335
- - Chinese and English names for the same entity SHOULD be merged (e.g. "Vmok" → "Vmok 微模块框架")
196336
- - Abbreviations should be merged with their full forms (e.g. "AGW" → "API Gateway")
196416
+ - Chinese and English names for the same entity SHOULD be merged (e.g. "Webpack" → "Webpack 打包工具")
196417
+ - Abbreviations should be merged with their full forms (e.g. "K8s" → "Kubernetes")
196418
+ - Document-descriptive names (containing "文档", "参考", "指南" etc.) should be merged to the actual product/system name. The entity is the product, not the document about it
196337
196419
 
196338
196420
  ## Task 2: Remove Noise
196339
196421
  - Apply the **identity test**: a real entity is something you can discuss independently ("What is X?", "How does X work?", "Who owns X?"). Names that fail this test — values, addresses, actions, generic descriptions — are noise.
196340
196422
  - Remove names that are NOT meaningful named entities: generic words, action descriptions, or things that are attributes/values rather than independent subjects
196341
- - Examples of REAL entities to KEEP: product names (TTAstra, Gulux), tools (nvm, Rush), services (Op Main 服务), platforms (AGW 平台) — these all pass the identity test
196423
+ - Examples of REAL entities to KEEP: product names (Next.js, Vite), tools (nvm, Turborepo), services (Nginx, Redis), platforms (Kubernetes, Docker) — these all pass the identity test
196342
196424
  - When uncertain, KEEP the name — only remove if it clearly fails the identity test
196343
196425
 
196344
196426
  ## Output
@@ -196679,7 +196761,7 @@ Return ONLY a valid JSON object. No markdown fences, no explanation.`;
196679
196761
  }
196680
196762
  var DOC_DIAGRAM_ANNOTATION_SYSTEM_PROMPT = DIAGRAM_SYSTEM_PROMPT;
196681
196763
  // ../llm/src/chunking/markdownChunker.ts
196682
- var DEFAULT_MAX_TOKENS2 = 4000;
196764
+ var DEFAULT_MAX_TOKENS2 = 3600;
196683
196765
  var DEFAULT_PARAGRAPH_MAX_TOKENS = 500;
196684
196766
  function estimateTokens(text2) {
196685
196767
  return Math.ceil(text2.length / 4);
@@ -197604,7 +197686,8 @@ async function extractTableAtoms(chunk, sections, result, llmService) {
197604
197686
  const prompt = buildDocTableAnnotationPrompt(tableText);
197605
197687
  try {
197606
197688
  const res = await llmService.generateText(prompt, {
197607
- systemPrompt: DOC_TABLE_ANNOTATION_SYSTEM_PROMPT
197689
+ systemPrompt: DOC_TABLE_ANNOTATION_SYSTEM_PROMPT,
197690
+ jsonMode: true
197608
197691
  });
197609
197692
  const parsed = parseExtractionOutput(res.text, docChunkResultSchema);
197610
197693
  if (!parsed.success) {
@@ -197696,7 +197779,8 @@ async function extractDiagramAtoms(chunk, sections, result, llmService) {
197696
197779
  const prompt = buildDocDiagramAnnotationPrompt(diagramText);
197697
197780
  try {
197698
197781
  const res = await llmService.generateText(prompt, {
197699
- systemPrompt: DOC_DIAGRAM_ANNOTATION_SYSTEM_PROMPT
197782
+ systemPrompt: DOC_DIAGRAM_ANNOTATION_SYSTEM_PROMPT,
197783
+ jsonMode: true
197700
197784
  });
197701
197785
  const parsed = parseExtractionOutput(res.text, docChunkResultSchema);
197702
197786
  if (!parsed.success) {
@@ -198214,7 +198298,9 @@ ${trimmed}
198214
198298
  Continue the JSON output from the exact point of truncation. Output ONLY the remaining JSON text.`;
198215
198299
  try {
198216
198300
  const result = await llmService.generateText(continuationPrompt, {
198217
- systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT
198301
+ systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT,
198302
+ jsonMode: true,
198303
+ maxTokens: 16384
198218
198304
  });
198219
198305
  const combined = trimmed + result.text.trim();
198220
198306
  JSON.parse(jsonrepair(combined));
@@ -198256,9 +198342,12 @@ async function processChunk(chunk, chunkIndex, llmService, sections, onStep) {
198256
198342
  const chunkText = injectParagraphTags(chunk, sections);
198257
198343
  let llmCalls = 0;
198258
198344
  let totalTokens = 0;
198345
+ const DOC_INDEX_MAX_OUTPUT_TOKENS = 16384;
198259
198346
  const t1Prompt = buildDocAtomAnnotationPrompt(chunkText);
198260
198347
  const t1Result = await llmService.generateText(t1Prompt, {
198261
- systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT
198348
+ systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT,
198349
+ jsonMode: true,
198350
+ maxTokens: DOC_INDEX_MAX_OUTPUT_TOKENS
198262
198351
  });
198263
198352
  llmCalls++;
198264
198353
  totalTokens += t1Result.usage.totalTokens;
@@ -198268,8 +198357,17 @@ async function processChunk(chunk, chunkIndex, llmService, sections, onStep) {
198268
198357
  onStep?.("T1 done", llmCalls, totalTokens);
198269
198358
  let parseResult = parseExtractionOutput(continued.text, docChunkResultSchema);
198270
198359
  if (!parseResult.success) {
198360
+ const rawLen = continued.text.length;
198271
198361
  const preview = continued.text.slice(0, 500).replace(/\n/g, "\\n");
198272
- console.warn(`[docIndexer] chunk ${chunkIndex} T1 strict parse failed, attempting lenient. ` + `Error: ${parseResult.error.message.slice(0, 200)}. ` + `LLM output preview: ${preview}`);
198362
+ const errMsg = parseResult.error.message.slice(0, 300);
198363
+ const posMatch = errMsg.match(/position\s+(\d+)/);
198364
+ const errContext = posMatch ? continued.text.slice(Math.max(0, +posMatch[1] - 100), +posMatch[1] + 100).replace(/\n/g, "\\n") : "";
198365
+ console.warn(`[docIndexer] chunk ${chunkIndex} T1 strict parse failed, attempting lenient.
198366
+ ` + ` Error: ${errMsg}
198367
+ ` + ` Output length: ${rawLen} chars | finishReason: ${t1Result.finishReason}
198368
+ ` + ` Preview (first 500): ${preview}
198369
+ ` + (errContext ? ` Context around error position: ...${errContext}...
198370
+ ` : ""));
198273
198371
  const lenient = tryLenientParse(continued.text, chunkIndex);
198274
198372
  if (lenient) {
198275
198373
  parseResult = { success: true, data: lenient };
@@ -198288,7 +198386,8 @@ async function processChunk(chunk, chunkIndex, llmService, sections, onStep) {
198288
198386
  extract: async (input) => {
198289
198387
  const prompt = buildDocGleaningPrompt(input.chunkText, input.previousResult);
198290
198388
  const result = await llmService.generateText(prompt, {
198291
- systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT
198389
+ systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT,
198390
+ jsonMode: true
198292
198391
  });
198293
198392
  llmCalls++;
198294
198393
  totalTokens += result.usage.totalTokens;
@@ -198518,6 +198617,17 @@ function ensureAtomConfidence(atoms2) {
198518
198617
  }
198519
198618
  }
198520
198619
  }
198620
+ function sortAtomsByConfidence(sections) {
198621
+ for (const section of sections) {
198622
+ for (const para of section.paragraphs) {
198623
+ for (const atomList of Object.values(para.atoms)) {
198624
+ if (!Array.isArray(atomList) || atomList.length < 2)
198625
+ continue;
198626
+ atomList.sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0));
198627
+ }
198628
+ }
198629
+ }
198630
+ }
198521
198631
  function countAtoms(sections) {
198522
198632
  const counts = {};
198523
198633
  for (const section of sections.values()) {
@@ -198554,7 +198664,8 @@ async function runEntityResolution(sections, entityNames, llmService, onProgress
198554
198664
  ...noiseCandidates.length > 0 ? { noiseCandidates } : {}
198555
198665
  });
198556
198666
  const result = await llmService.generateText(prompt, {
198557
- systemPrompt: ENTITY_RESOLUTION_SYSTEM_PROMPT
198667
+ systemPrompt: ENTITY_RESOLUTION_SYSTEM_PROMPT,
198668
+ jsonMode: true
198558
198669
  });
198559
198670
  let resolution;
198560
198671
  try {
@@ -198679,7 +198790,13 @@ async function indexDocument(input) {
198679
198790
  const paragraphs = sortedParaKeys.map((pk) => {
198680
198791
  const p4 = section.paragraphs.get(pk);
198681
198792
  return { text: p4.text, atoms: p4.atoms };
198793
+ }).filter((p4) => {
198794
+ const hasAtoms = Object.values(p4.atoms).some((arr) => Array.isArray(arr) && arr.length > 0);
198795
+ const hasText = p4.text.replace(/[\s\p{P}\p{S}]/gu, "").length > 0;
198796
+ return hasAtoms || hasText;
198682
198797
  });
198798
+ if (paragraphs.length === 0)
198799
+ continue;
198683
198800
  digestSections.push({
198684
198801
  heading: section.heading,
198685
198802
  level: section.level,
@@ -198710,6 +198827,7 @@ async function indexDocument(input) {
198710
198827
  totalLlmCalls += resolutionResult.llmCalls;
198711
198828
  totalTokens += resolutionResult.totalTokens;
198712
198829
  }
198830
+ sortAtomsByConfidence(digestSections);
198713
198831
  const atomCounts = countAtoms(sectionsMap);
198714
198832
  const paragraphCount = digestSections.reduce((sum, s) => sum + s.paragraphs.length, 0);
198715
198833
  if (paragraphCount === 0) {
@@ -0,0 +1 @@
1
+ import{c as o,u as m,a as c,j as e,b as d,C as h}from"./index-wBO4P6CB.js";const p=[["path",{d:"m12 19-7-7 7-7",key:"1l729n"}],["path",{d:"M19 12H5",key:"x3x0zl"}]],f=o("arrow-left",p);function j({icon:a,title:t,badges:s,meta:l}){const i=m(),{t:r}=c();return e.jsxs("div",{children:[e.jsxs("div",{className:"flex justify-between items-center",children:[e.jsxs("div",{className:"flex min-w-0 items-center gap-2",children:[e.jsx("span",{className:"shrink-0",children:a}),e.jsx("h1",{className:"min-w-0 break-all font-mono text-xl font-bold leading-none text-c4a-text-primary",children:t}),s&&s.length>0&&e.jsx("div",{className:"flex items-center gap-1.5 ml-2",children:s.map((n,x)=>e.jsx("span",{children:n},x))})]}),e.jsxs("button",{onClick:()=>i(-1),className:"flex items-center gap-1 text-c4a-text-muted hover:text-c4a-text-primary cursor-pointer transition-colors text-xs font-mono shrink-0 ml-4",children:[e.jsx(f,{size:14}),r("common.back")]})]}),l&&l.length>0&&e.jsx("div",{className:"mt-2 flex flex-wrap items-baseline gap-4 text-xs",children:l.map(n=>e.jsxs("span",{className:"text-c4a-text-muted",children:[n.label,":",e.jsx("span",{className:"text-c4a-text-secondary",children:n.value})]},n.label))})]})}function N(){const{t:a}=c(),{hashId:t}=d();if(!t)return e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:a("common.loading")})});const s=t.length>16?t.slice(0,16)+"…":t;return e.jsxs("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:[e.jsx(j,{icon:e.jsx("span",{children:"📄"}),title:`Content: ${s}`}),e.jsx(h,{hashId:t})]})}export{N as ContentDetail};
@@ -0,0 +1 @@
1
+ import{c as o,u as m,a as c,j as e,b as d,C as h}from"./index-CMVd9rlp.js";const p=[["path",{d:"m12 19-7-7 7-7",key:"1l729n"}],["path",{d:"M19 12H5",key:"x3x0zl"}]],f=o("arrow-left",p);function j({icon:a,title:t,badges:s,meta:l}){const i=m(),{t:r}=c();return e.jsxs("div",{children:[e.jsxs("div",{className:"flex justify-between items-center",children:[e.jsxs("div",{className:"flex min-w-0 items-center gap-2",children:[e.jsx("span",{className:"shrink-0",children:a}),e.jsx("h1",{className:"min-w-0 break-all font-mono text-xl font-bold leading-none text-c4a-text-primary",children:t}),s&&s.length>0&&e.jsx("div",{className:"flex items-center gap-1.5 ml-2",children:s.map((n,x)=>e.jsx("span",{children:n},x))})]}),e.jsxs("button",{onClick:()=>i(-1),className:"flex items-center gap-1 text-c4a-text-muted hover:text-c4a-text-primary cursor-pointer transition-colors text-xs font-mono shrink-0 ml-4",children:[e.jsx(f,{size:14}),r("common.back")]})]}),l&&l.length>0&&e.jsx("div",{className:"mt-2 flex flex-wrap items-baseline gap-4 text-xs",children:l.map(n=>e.jsxs("span",{className:"text-c4a-text-muted",children:[n.label,":",e.jsx("span",{className:"text-c4a-text-secondary",children:n.value})]},n.label))})]})}function N(){const{t:a}=c(),{hashId:t}=d();if(!t)return e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:a("common.loading")})});const s=t.length>16?t.slice(0,16)+"…":t;return e.jsxs("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:[e.jsx(j,{icon:e.jsx("span",{children:"📄"}),title:`Content: ${s}`}),e.jsx(h,{hashId:t})]})}export{N as ContentDetail};
@@ -0,0 +1 @@
1
+ import{a as n,b as a,j as e,E as i}from"./index-wBO4P6CB.js";function r(){const{t:s}=n(),{entityId:t}=a();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(i,{entityId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as EntityDetail};
@@ -0,0 +1 @@
1
+ import{a as n,b as a,j as e,E as i}from"./index-CMVd9rlp.js";function r(){const{t:s}=n(),{entityId:t}=a();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(i,{entityId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as EntityDetail};
@@ -0,0 +1 @@
1
+ import{a,b as n,j as e,R as l}from"./index-CMVd9rlp.js";function r(){const{t:s}=a(),{relationId:t}=n();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(l,{relationId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as RelationDetail};
@@ -0,0 +1 @@
1
+ import{a,b as n,j as e,R as l}from"./index-wBO4P6CB.js";function r(){const{t:s}=a(),{relationId:t}=n();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(l,{relationId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as RelationDetail};