@c4a/server-cli 0.4.15-alpha.6 → 0.4.15-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -220991,6 +220991,12 @@ class LlmServiceImpl {
220991
220991
  if (options?.systemPrompt) {
220992
220992
  callSettings.system = options.systemPrompt;
220993
220993
  }
220994
+ if (options?.jsonMode) {
220995
+ callSettings.providerOptions = {
220996
+ openai: { responseFormat: { type: "json_object" } },
220997
+ anthropic: { responseFormat: { type: "json_object" } }
220998
+ };
220999
+ }
220994
221000
  const result = await retry(() => generateText(callSettings), {
220995
221001
  shouldRetry: (error40) => isRetryableStatus(getHttpStatus(error40)) || isTimeoutError2(error40)
220996
221002
  });
@@ -221038,6 +221044,12 @@ class LlmServiceImpl {
221038
221044
  if (options?.systemPrompt) {
221039
221045
  callSettings.system = options.systemPrompt;
221040
221046
  }
221047
+ if (options?.jsonMode) {
221048
+ callSettings.providerOptions = {
221049
+ openai: { responseFormat: { type: "json_object" } },
221050
+ anthropic: { responseFormat: { type: "json_object" } }
221051
+ };
221052
+ }
221041
221053
  const result = await retry(() => Promise.resolve(streamText(callSettings)), {
221042
221054
  shouldRetry: (error40) => isRetryableStatus(getHttpStatus(error40)) || isTimeoutError2(error40)
221043
221055
  });
@@ -221856,6 +221868,7 @@ function parseExtractionOutput(raw, schema) {
221856
221868
  }
221857
221869
  parsed = normalizeFlatOutput(parsed);
221858
221870
  parsed = stripNulls(parsed);
221871
+ parsed = patchAttributeDefaults(parsed);
221859
221872
  const result = schema.safeParse(parsed);
221860
221873
  if (!result.success) {
221861
221874
  return { success: false, error: result.error };
@@ -221923,7 +221936,37 @@ function tryParseJson(raw) {
221923
221936
  return repairAndParse(raw);
221924
221937
  }
221925
221938
  function repairAndParse(raw) {
221926
- return JSON.parse(jsonrepair(raw));
221939
+ try {
221940
+ return JSON.parse(jsonrepair(raw));
221941
+ } catch {
221942
+ const truncated = truncateToLastCompleteEntry(raw);
221943
+ if (truncated) {
221944
+ return JSON.parse(jsonrepair(truncated));
221945
+ }
221946
+ throw new Error(`JSON repair failed for output of length ${raw.length}`);
221947
+ }
221948
+ }
221949
+ function truncateToLastCompleteEntry(raw) {
221950
+ let text2 = raw.trim();
221951
+ if (text2.startsWith("```")) {
221952
+ const firstNewline = text2.indexOf(`
221953
+ `);
221954
+ text2 = text2.slice(firstNewline + 1);
221955
+ }
221956
+ if (text2.endsWith("```")) {
221957
+ text2 = text2.slice(0, -3);
221958
+ }
221959
+ const pattern = /\}\s*,\s*"P\d+"/g;
221960
+ let lastMatch = null;
221961
+ let m;
221962
+ while ((m = pattern.exec(text2)) !== null) {
221963
+ lastMatch = m;
221964
+ }
221965
+ if (!lastMatch)
221966
+ return null;
221967
+ const cutPos = lastMatch.index + 1;
221968
+ const truncated = text2.slice(0, cutPos) + "}";
221969
+ return truncated;
221927
221970
  }
221928
221971
  var PARAGRAPH_TAG_RE = /^P\d+$/;
221929
221972
  function normalizeFlatOutput(parsed) {
@@ -221956,6 +221999,43 @@ function stripNulls(value) {
221956
221999
  }
221957
222000
  return value;
221958
222001
  }
222002
+ function patchAttributeDefaults(value) {
222003
+ if (!value || typeof value !== "object" || Array.isArray(value))
222004
+ return value;
222005
+ const obj = value;
222006
+ if ("paragraphs" in obj && Array.isArray(obj.paragraphs)) {
222007
+ return {
222008
+ ...obj,
222009
+ paragraphs: obj.paragraphs.map((p) => {
222010
+ if (!p || typeof p !== "object")
222011
+ return p;
222012
+ const para = p;
222013
+ const atoms2 = para.atoms;
222014
+ if (!atoms2 || typeof atoms2 !== "object")
222015
+ return p;
222016
+ return { ...para, atoms: patchAttrsInAtoms(atoms2) };
222017
+ })
222018
+ };
222019
+ }
222020
+ return value;
222021
+ }
222022
+ function patchAttrsInAtoms(atoms2) {
222023
+ const attrs = atoms2.attributes;
222024
+ if (!Array.isArray(attrs))
222025
+ return atoms2;
222026
+ return {
222027
+ ...atoms2,
222028
+ attributes: attrs.map((attr) => {
222029
+ if (!attr || typeof attr !== "object" || Array.isArray(attr))
222030
+ return attr;
222031
+ const a = attr;
222032
+ if (typeof a.type !== "string" || a.type === "") {
222033
+ return { ...a, type: "other" };
222034
+ }
222035
+ return attr;
222036
+ })
222037
+ };
222038
+ }
221959
222039
  function isRecord(value) {
221960
222040
  return !!value && typeof value === "object" && "key" in value && "value" in value && typeof value.key === "string";
221961
222041
  }
@@ -222268,6 +222348,7 @@ Return a single JSON object keyed by paragraph tags. Only include paragraphs tha
222268
222348
  - **Enum fields MUST use ONLY the listed values.** For example, entity.kind must be one of "implementation"|"external"|"concept" — do NOT use values from other atom types (e.g., do NOT put "team" or "human" in entity.kind; those belong to roles.kind).
222269
222349
  - Every atom MUST include a "confidence" field (0.0-1.0) indicating how confident you are in the extraction. Use higher values (0.85-1.0) for explicitly stated facts and lower values (0.5-0.7) for inferred or ambiguous information.
222270
222350
  - **Classify correctly:** People, teams, and personas → "roles" (not "entities"). Technical systems, services, modules → "entities".
222351
+ - **Entity naming — extract the subject, not the document:** Entity names should represent the actual system/product/framework being described, not the document itself. If the text says "Next.js is a React framework", the entity is "Next.js" — not "Next.js 官方文档" or "Next.js API 参考". Use names that work as standalone knowledge graph nodes, independent of any document context.
222271
222352
  - **Entity reference consistency (CRITICAL):** Every entity name referenced in relation.from, relation.to, behavior.subject, or any other cross-reference field MUST also appear in the "entities" array of the SAME paragraph (or a preceding paragraph in the same chunk). If an entity is mentioned for the first time in a relation, you MUST also extract it as an entity. This ensures no "dangling references" — every name used in relations has a corresponding entity declaration.
222272
222353
  - **Cross-atom reference consistency:** transitions[].from and transitions[].to values MUST exist in states[].values of the same entity. roles[].performs values MUST match names declared in behaviors[].name.
222273
222354
  - **Constraints vs rules distinction:** Use "constraints" for unconditional declarative mandates ('X must Y'). Use "rules" for conditional logic ('IF X THEN Y'). Do not mix them — a requirement with no condition is a constraint, a requirement triggered by a condition is a rule. Do NOT invent a rule for every constraint — only create a rule when the text explicitly states conditional logic.
@@ -222327,13 +222408,14 @@ var ENTITY_RESOLUTION_SYSTEM_PROMPT = `You are an entity resolution assistant. Y
222327
222408
  - Prefer the LONGER, more descriptive name as the canonical name
222328
222409
  - Do NOT merge names that share a substring but refer to different things
222329
222410
  - When uncertain, do NOT merge — add to "ambiguous" instead
222330
- - Chinese and English names for the same entity SHOULD be merged (e.g. "Vmok" → "Vmok 微模块框架")
222331
- - Abbreviations should be merged with their full forms (e.g. "AGW" → "API Gateway")
222411
+ - Chinese and English names for the same entity SHOULD be merged (e.g. "Webpack" → "Webpack 打包工具")
222412
+ - Abbreviations should be merged with their full forms (e.g. "K8s" → "Kubernetes")
222413
+ - Document-descriptive names (containing "文档", "参考", "指南" etc.) should be merged to the actual product/system name. The entity is the product, not the document about it
222332
222414
 
222333
222415
  ## Task 2: Remove Noise
222334
222416
  - Apply the **identity test**: a real entity is something you can discuss independently ("What is X?", "How does X work?", "Who owns X?"). Names that fail this test — values, addresses, actions, generic descriptions — are noise.
222335
222417
  - Remove names that are NOT meaningful named entities: generic words, action descriptions, or things that are attributes/values rather than independent subjects
222336
- - Examples of REAL entities to KEEP: product names (TTAstra, Gulux), tools (nvm, Rush), services (Op Main 服务), platforms (AGW 平台) — these all pass the identity test
222418
+ - Examples of REAL entities to KEEP: product names (Next.js, Vite), tools (nvm, Turborepo), services (Nginx, Redis), platforms (Kubernetes, Docker) — these all pass the identity test
222337
222419
  - When uncertain, KEEP the name — only remove if it clearly fails the identity test
222338
222420
 
222339
222421
  ## Output
@@ -222674,7 +222756,7 @@ Return ONLY a valid JSON object. No markdown fences, no explanation.`;
222674
222756
  }
222675
222757
  var DOC_DIAGRAM_ANNOTATION_SYSTEM_PROMPT = DIAGRAM_SYSTEM_PROMPT;
222676
222758
  // ../llm/src/chunking/markdownChunker.ts
222677
- var DEFAULT_MAX_TOKENS2 = 4000;
222759
+ var DEFAULT_MAX_TOKENS2 = 3600;
222678
222760
  var DEFAULT_PARAGRAPH_MAX_TOKENS = 500;
222679
222761
  function estimateTokens(text2) {
222680
222762
  return Math.ceil(text2.length / 4);
@@ -222950,20 +223032,53 @@ function sectionHeadingLine(section) {
222950
223032
  function buildCoarseParagraphs(sections, paragraphMaxTokens) {
222951
223033
  const result = [];
222952
223034
  const rawEntries = [];
223035
+ let pendingHeadings = [];
222953
223036
  for (let sIdx = 0;sIdx < sections.length; sIdx++) {
222954
223037
  const section = sections[sIdx];
222955
- if (!section.body.trim())
223038
+ const headingLine = sectionHeadingLine(section);
223039
+ if (!section.body.trim()) {
223040
+ if (headingLine)
223041
+ pendingHeadings.push(headingLine);
222956
223042
  continue;
222957
- const bodyTokens = estimateTokens(section.body);
222958
- if (bodyTokens > paragraphMaxTokens) {
223043
+ }
223044
+ const prefixParts = [...pendingHeadings];
223045
+ if (headingLine)
223046
+ prefixParts.push(headingLine);
223047
+ pendingHeadings = [];
223048
+ const prefix = prefixParts.length > 0 ? prefixParts.join(`
223049
+
223050
+ `) : "";
223051
+ const fullText = prefix ? prefix + `
223052
+
223053
+ ` + section.body : section.body;
223054
+ const fullTokens = estimateTokens(fullText);
223055
+ if (fullTokens > paragraphMaxTokens) {
222959
223056
  const parts = splitOversizedText(section.body, paragraphMaxTokens);
222960
- for (const part of parts) {
222961
- rawEntries.push({ sectionIndex: sIdx, text: part, tokens: estimateTokens(part) });
223057
+ for (let i = 0;i < parts.length; i++) {
223058
+ const partText = i === 0 && prefix ? prefix + `
223059
+
223060
+ ` + parts[i] : parts[i];
223061
+ rawEntries.push({ sectionIndex: sIdx, text: partText, tokens: estimateTokens(partText) });
222962
223062
  }
222963
223063
  } else {
222964
- rawEntries.push({ sectionIndex: sIdx, text: section.body, tokens: bodyTokens });
223064
+ rawEntries.push({ sectionIndex: sIdx, text: fullText, tokens: fullTokens });
222965
223065
  }
222966
223066
  }
223067
+ if (pendingHeadings.length > 0 && rawEntries.length > 0) {
223068
+ const last = rawEntries[rawEntries.length - 1];
223069
+ const suffix = pendingHeadings.join(`
223070
+
223071
+ `);
223072
+ last.text = last.text + `
223073
+
223074
+ ` + suffix;
223075
+ last.tokens = estimateTokens(last.text);
223076
+ } else if (pendingHeadings.length > 0) {
223077
+ const text2 = pendingHeadings.join(`
223078
+
223079
+ `);
223080
+ rawEntries.push({ sectionIndex: 0, text: text2, tokens: estimateTokens(text2) });
223081
+ }
222967
223082
  const MERGE_THRESHOLD = 150;
222968
223083
  const merged = [];
222969
223084
  for (const entry of rawEntries) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@c4a/server-cli",
3
- "version": "0.4.15-alpha.6",
3
+ "version": "0.4.15-alpha.8",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "c4a-server": "./index.js"
package/serve.js CHANGED
@@ -194996,6 +194996,12 @@ class LlmServiceImpl {
194996
194996
  if (options?.systemPrompt) {
194997
194997
  callSettings.system = options.systemPrompt;
194998
194998
  }
194999
+ if (options?.jsonMode) {
195000
+ callSettings.providerOptions = {
195001
+ openai: { responseFormat: { type: "json_object" } },
195002
+ anthropic: { responseFormat: { type: "json_object" } }
195003
+ };
195004
+ }
194999
195005
  const result = await retry(() => generateText(callSettings), {
195000
195006
  shouldRetry: (error40) => isRetryableStatus(getHttpStatus(error40)) || isTimeoutError2(error40)
195001
195007
  });
@@ -195043,6 +195049,12 @@ class LlmServiceImpl {
195043
195049
  if (options?.systemPrompt) {
195044
195050
  callSettings.system = options.systemPrompt;
195045
195051
  }
195052
+ if (options?.jsonMode) {
195053
+ callSettings.providerOptions = {
195054
+ openai: { responseFormat: { type: "json_object" } },
195055
+ anthropic: { responseFormat: { type: "json_object" } }
195056
+ };
195057
+ }
195046
195058
  const result = await retry(() => Promise.resolve(streamText(callSettings)), {
195047
195059
  shouldRetry: (error40) => isRetryableStatus(getHttpStatus(error40)) || isTimeoutError2(error40)
195048
195060
  });
@@ -195861,6 +195873,7 @@ function parseExtractionOutput(raw5, schema2) {
195861
195873
  }
195862
195874
  parsed = normalizeFlatOutput(parsed);
195863
195875
  parsed = stripNulls(parsed);
195876
+ parsed = patchAttributeDefaults(parsed);
195864
195877
  const result = schema2.safeParse(parsed);
195865
195878
  if (!result.success) {
195866
195879
  return { success: false, error: result.error };
@@ -195928,7 +195941,37 @@ function tryParseJson(raw5) {
195928
195941
  return repairAndParse(raw5);
195929
195942
  }
195930
195943
  function repairAndParse(raw5) {
195931
- return JSON.parse(jsonrepair(raw5));
195944
+ try {
195945
+ return JSON.parse(jsonrepair(raw5));
195946
+ } catch {
195947
+ const truncated = truncateToLastCompleteEntry(raw5);
195948
+ if (truncated) {
195949
+ return JSON.parse(jsonrepair(truncated));
195950
+ }
195951
+ throw new Error(`JSON repair failed for output of length ${raw5.length}`);
195952
+ }
195953
+ }
195954
+ function truncateToLastCompleteEntry(raw5) {
195955
+ let text2 = raw5.trim();
195956
+ if (text2.startsWith("```")) {
195957
+ const firstNewline = text2.indexOf(`
195958
+ `);
195959
+ text2 = text2.slice(firstNewline + 1);
195960
+ }
195961
+ if (text2.endsWith("```")) {
195962
+ text2 = text2.slice(0, -3);
195963
+ }
195964
+ const pattern = /\}\s*,\s*"P\d+"/g;
195965
+ let lastMatch = null;
195966
+ let m;
195967
+ while ((m = pattern.exec(text2)) !== null) {
195968
+ lastMatch = m;
195969
+ }
195970
+ if (!lastMatch)
195971
+ return null;
195972
+ const cutPos = lastMatch.index + 1;
195973
+ const truncated = text2.slice(0, cutPos) + "}";
195974
+ return truncated;
195932
195975
  }
195933
195976
  var PARAGRAPH_TAG_RE = /^P\d+$/;
195934
195977
  function normalizeFlatOutput(parsed) {
@@ -195961,6 +196004,43 @@ function stripNulls(value) {
195961
196004
  }
195962
196005
  return value;
195963
196006
  }
196007
+ function patchAttributeDefaults(value) {
196008
+ if (!value || typeof value !== "object" || Array.isArray(value))
196009
+ return value;
196010
+ const obj = value;
196011
+ if ("paragraphs" in obj && Array.isArray(obj.paragraphs)) {
196012
+ return {
196013
+ ...obj,
196014
+ paragraphs: obj.paragraphs.map((p4) => {
196015
+ if (!p4 || typeof p4 !== "object")
196016
+ return p4;
196017
+ const para = p4;
196018
+ const atoms2 = para.atoms;
196019
+ if (!atoms2 || typeof atoms2 !== "object")
196020
+ return p4;
196021
+ return { ...para, atoms: patchAttrsInAtoms(atoms2) };
196022
+ })
196023
+ };
196024
+ }
196025
+ return value;
196026
+ }
196027
+ function patchAttrsInAtoms(atoms2) {
196028
+ const attrs = atoms2.attributes;
196029
+ if (!Array.isArray(attrs))
196030
+ return atoms2;
196031
+ return {
196032
+ ...atoms2,
196033
+ attributes: attrs.map((attr) => {
196034
+ if (!attr || typeof attr !== "object" || Array.isArray(attr))
196035
+ return attr;
196036
+ const a = attr;
196037
+ if (typeof a.type !== "string" || a.type === "") {
196038
+ return { ...a, type: "other" };
196039
+ }
196040
+ return attr;
196041
+ })
196042
+ };
196043
+ }
195964
196044
  function isRecord(value) {
195965
196045
  return !!value && typeof value === "object" && "key" in value && "value" in value && typeof value.key === "string";
195966
196046
  }
@@ -196273,6 +196353,7 @@ Return a single JSON object keyed by paragraph tags. Only include paragraphs tha
196273
196353
  - **Enum fields MUST use ONLY the listed values.** For example, entity.kind must be one of "implementation"|"external"|"concept" — do NOT use values from other atom types (e.g., do NOT put "team" or "human" in entity.kind; those belong to roles.kind).
196274
196354
  - Every atom MUST include a "confidence" field (0.0-1.0) indicating how confident you are in the extraction. Use higher values (0.85-1.0) for explicitly stated facts and lower values (0.5-0.7) for inferred or ambiguous information.
196275
196355
  - **Classify correctly:** People, teams, and personas → "roles" (not "entities"). Technical systems, services, modules → "entities".
196356
+ - **Entity naming — extract the subject, not the document:** Entity names should represent the actual system/product/framework being described, not the document itself. If the text says "Next.js is a React framework", the entity is "Next.js" — not "Next.js 官方文档" or "Next.js API 参考". Use names that work as standalone knowledge graph nodes, independent of any document context.
196276
196357
  - **Entity reference consistency (CRITICAL):** Every entity name referenced in relation.from, relation.to, behavior.subject, or any other cross-reference field MUST also appear in the "entities" array of the SAME paragraph (or a preceding paragraph in the same chunk). If an entity is mentioned for the first time in a relation, you MUST also extract it as an entity. This ensures no "dangling references" — every name used in relations has a corresponding entity declaration.
196277
196358
  - **Cross-atom reference consistency:** transitions[].from and transitions[].to values MUST exist in states[].values of the same entity. roles[].performs values MUST match names declared in behaviors[].name.
196278
196359
  - **Constraints vs rules distinction:** Use "constraints" for unconditional declarative mandates ('X must Y'). Use "rules" for conditional logic ('IF X THEN Y'). Do not mix them — a requirement with no condition is a constraint, a requirement triggered by a condition is a rule. Do NOT invent a rule for every constraint — only create a rule when the text explicitly states conditional logic.
@@ -196332,13 +196413,14 @@ var ENTITY_RESOLUTION_SYSTEM_PROMPT = `You are an entity resolution assistant. Y
196332
196413
  - Prefer the LONGER, more descriptive name as the canonical name
196333
196414
  - Do NOT merge names that share a substring but refer to different things
196334
196415
  - When uncertain, do NOT merge — add to "ambiguous" instead
196335
- - Chinese and English names for the same entity SHOULD be merged (e.g. "Vmok" → "Vmok 微模块框架")
196336
- - Abbreviations should be merged with their full forms (e.g. "AGW" → "API Gateway")
196416
+ - Chinese and English names for the same entity SHOULD be merged (e.g. "Webpack" → "Webpack 打包工具")
196417
+ - Abbreviations should be merged with their full forms (e.g. "K8s" → "Kubernetes")
196418
+ - Document-descriptive names (containing "文档", "参考", "指南" etc.) should be merged to the actual product/system name. The entity is the product, not the document about it
196337
196419
 
196338
196420
  ## Task 2: Remove Noise
196339
196421
  - Apply the **identity test**: a real entity is something you can discuss independently ("What is X?", "How does X work?", "Who owns X?"). Names that fail this test — values, addresses, actions, generic descriptions — are noise.
196340
196422
  - Remove names that are NOT meaningful named entities: generic words, action descriptions, or things that are attributes/values rather than independent subjects
196341
- - Examples of REAL entities to KEEP: product names (TTAstra, Gulux), tools (nvm, Rush), services (Op Main 服务), platforms (AGW 平台) — these all pass the identity test
196423
+ - Examples of REAL entities to KEEP: product names (Next.js, Vite), tools (nvm, Turborepo), services (Nginx, Redis), platforms (Kubernetes, Docker) — these all pass the identity test
196342
196424
  - When uncertain, KEEP the name — only remove if it clearly fails the identity test
196343
196425
 
196344
196426
  ## Output
@@ -196679,7 +196761,7 @@ Return ONLY a valid JSON object. No markdown fences, no explanation.`;
196679
196761
  }
196680
196762
  var DOC_DIAGRAM_ANNOTATION_SYSTEM_PROMPT = DIAGRAM_SYSTEM_PROMPT;
196681
196763
  // ../llm/src/chunking/markdownChunker.ts
196682
- var DEFAULT_MAX_TOKENS2 = 4000;
196764
+ var DEFAULT_MAX_TOKENS2 = 3600;
196683
196765
  var DEFAULT_PARAGRAPH_MAX_TOKENS = 500;
196684
196766
  function estimateTokens(text2) {
196685
196767
  return Math.ceil(text2.length / 4);
@@ -196955,20 +197037,53 @@ function sectionHeadingLine(section) {
196955
197037
  function buildCoarseParagraphs(sections, paragraphMaxTokens) {
196956
197038
  const result = [];
196957
197039
  const rawEntries = [];
197040
+ let pendingHeadings = [];
196958
197041
  for (let sIdx = 0;sIdx < sections.length; sIdx++) {
196959
197042
  const section = sections[sIdx];
196960
- if (!section.body.trim())
197043
+ const headingLine = sectionHeadingLine(section);
197044
+ if (!section.body.trim()) {
197045
+ if (headingLine)
197046
+ pendingHeadings.push(headingLine);
196961
197047
  continue;
196962
- const bodyTokens = estimateTokens(section.body);
196963
- if (bodyTokens > paragraphMaxTokens) {
197048
+ }
197049
+ const prefixParts = [...pendingHeadings];
197050
+ if (headingLine)
197051
+ prefixParts.push(headingLine);
197052
+ pendingHeadings = [];
197053
+ const prefix = prefixParts.length > 0 ? prefixParts.join(`
197054
+
197055
+ `) : "";
197056
+ const fullText = prefix ? prefix + `
197057
+
197058
+ ` + section.body : section.body;
197059
+ const fullTokens = estimateTokens(fullText);
197060
+ if (fullTokens > paragraphMaxTokens) {
196964
197061
  const parts = splitOversizedText(section.body, paragraphMaxTokens);
196965
- for (const part of parts) {
196966
- rawEntries.push({ sectionIndex: sIdx, text: part, tokens: estimateTokens(part) });
197062
+ for (let i = 0;i < parts.length; i++) {
197063
+ const partText = i === 0 && prefix ? prefix + `
197064
+
197065
+ ` + parts[i] : parts[i];
197066
+ rawEntries.push({ sectionIndex: sIdx, text: partText, tokens: estimateTokens(partText) });
196967
197067
  }
196968
197068
  } else {
196969
- rawEntries.push({ sectionIndex: sIdx, text: section.body, tokens: bodyTokens });
197069
+ rawEntries.push({ sectionIndex: sIdx, text: fullText, tokens: fullTokens });
196970
197070
  }
196971
197071
  }
197072
+ if (pendingHeadings.length > 0 && rawEntries.length > 0) {
197073
+ const last = rawEntries[rawEntries.length - 1];
197074
+ const suffix = pendingHeadings.join(`
197075
+
197076
+ `);
197077
+ last.text = last.text + `
197078
+
197079
+ ` + suffix;
197080
+ last.tokens = estimateTokens(last.text);
197081
+ } else if (pendingHeadings.length > 0) {
197082
+ const text2 = pendingHeadings.join(`
197083
+
197084
+ `);
197085
+ rawEntries.push({ sectionIndex: 0, text: text2, tokens: estimateTokens(text2) });
197086
+ }
196972
197087
  const MERGE_THRESHOLD = 150;
196973
197088
  const merged = [];
196974
197089
  for (const entry of rawEntries) {
@@ -197604,7 +197719,8 @@ async function extractTableAtoms(chunk, sections, result, llmService) {
197604
197719
  const prompt = buildDocTableAnnotationPrompt(tableText);
197605
197720
  try {
197606
197721
  const res = await llmService.generateText(prompt, {
197607
- systemPrompt: DOC_TABLE_ANNOTATION_SYSTEM_PROMPT
197722
+ systemPrompt: DOC_TABLE_ANNOTATION_SYSTEM_PROMPT,
197723
+ jsonMode: true
197608
197724
  });
197609
197725
  const parsed = parseExtractionOutput(res.text, docChunkResultSchema);
197610
197726
  if (!parsed.success) {
@@ -197696,7 +197812,8 @@ async function extractDiagramAtoms(chunk, sections, result, llmService) {
197696
197812
  const prompt = buildDocDiagramAnnotationPrompt(diagramText);
197697
197813
  try {
197698
197814
  const res = await llmService.generateText(prompt, {
197699
- systemPrompt: DOC_DIAGRAM_ANNOTATION_SYSTEM_PROMPT
197815
+ systemPrompt: DOC_DIAGRAM_ANNOTATION_SYSTEM_PROMPT,
197816
+ jsonMode: true
197700
197817
  });
197701
197818
  const parsed = parseExtractionOutput(res.text, docChunkResultSchema);
197702
197819
  if (!parsed.success) {
@@ -198214,7 +198331,9 @@ ${trimmed}
198214
198331
  Continue the JSON output from the exact point of truncation. Output ONLY the remaining JSON text.`;
198215
198332
  try {
198216
198333
  const result = await llmService.generateText(continuationPrompt, {
198217
- systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT
198334
+ systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT,
198335
+ jsonMode: true,
198336
+ maxTokens: 16384
198218
198337
  });
198219
198338
  const combined = trimmed + result.text.trim();
198220
198339
  JSON.parse(jsonrepair(combined));
@@ -198256,9 +198375,12 @@ async function processChunk(chunk, chunkIndex, llmService, sections, onStep) {
198256
198375
  const chunkText = injectParagraphTags(chunk, sections);
198257
198376
  let llmCalls = 0;
198258
198377
  let totalTokens = 0;
198378
+ const DOC_INDEX_MAX_OUTPUT_TOKENS = 16384;
198259
198379
  const t1Prompt = buildDocAtomAnnotationPrompt(chunkText);
198260
198380
  const t1Result = await llmService.generateText(t1Prompt, {
198261
- systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT
198381
+ systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT,
198382
+ jsonMode: true,
198383
+ maxTokens: DOC_INDEX_MAX_OUTPUT_TOKENS
198262
198384
  });
198263
198385
  llmCalls++;
198264
198386
  totalTokens += t1Result.usage.totalTokens;
@@ -198268,8 +198390,17 @@ async function processChunk(chunk, chunkIndex, llmService, sections, onStep) {
198268
198390
  onStep?.("T1 done", llmCalls, totalTokens);
198269
198391
  let parseResult = parseExtractionOutput(continued.text, docChunkResultSchema);
198270
198392
  if (!parseResult.success) {
198393
+ const rawLen = continued.text.length;
198271
198394
  const preview = continued.text.slice(0, 500).replace(/\n/g, "\\n");
198272
- console.warn(`[docIndexer] chunk ${chunkIndex} T1 strict parse failed, attempting lenient. ` + `Error: ${parseResult.error.message.slice(0, 200)}. ` + `LLM output preview: ${preview}`);
198395
+ const errMsg = parseResult.error.message.slice(0, 300);
198396
+ const posMatch = errMsg.match(/position\s+(\d+)/);
198397
+ const errContext = posMatch ? continued.text.slice(Math.max(0, +posMatch[1] - 100), +posMatch[1] + 100).replace(/\n/g, "\\n") : "";
198398
+ console.warn(`[docIndexer] chunk ${chunkIndex} T1 strict parse failed, attempting lenient.
198399
+ ` + ` Error: ${errMsg}
198400
+ ` + ` Output length: ${rawLen} chars | finishReason: ${t1Result.finishReason}
198401
+ ` + ` Preview (first 500): ${preview}
198402
+ ` + (errContext ? ` Context around error position: ...${errContext}...
198403
+ ` : ""));
198273
198404
  const lenient = tryLenientParse(continued.text, chunkIndex);
198274
198405
  if (lenient) {
198275
198406
  parseResult = { success: true, data: lenient };
@@ -198288,7 +198419,8 @@ async function processChunk(chunk, chunkIndex, llmService, sections, onStep) {
198288
198419
  extract: async (input) => {
198289
198420
  const prompt = buildDocGleaningPrompt(input.chunkText, input.previousResult);
198290
198421
  const result = await llmService.generateText(prompt, {
198291
- systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT
198422
+ systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT,
198423
+ jsonMode: true
198292
198424
  });
198293
198425
  llmCalls++;
198294
198426
  totalTokens += result.usage.totalTokens;
@@ -198518,6 +198650,17 @@ function ensureAtomConfidence(atoms2) {
198518
198650
  }
198519
198651
  }
198520
198652
  }
198653
+ function sortAtomsByConfidence(sections) {
198654
+ for (const section of sections) {
198655
+ for (const para of section.paragraphs) {
198656
+ for (const atomList of Object.values(para.atoms)) {
198657
+ if (!Array.isArray(atomList) || atomList.length < 2)
198658
+ continue;
198659
+ atomList.sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0));
198660
+ }
198661
+ }
198662
+ }
198663
+ }
198521
198664
  function countAtoms(sections) {
198522
198665
  const counts = {};
198523
198666
  for (const section of sections.values()) {
@@ -198554,7 +198697,8 @@ async function runEntityResolution(sections, entityNames, llmService, onProgress
198554
198697
  ...noiseCandidates.length > 0 ? { noiseCandidates } : {}
198555
198698
  });
198556
198699
  const result = await llmService.generateText(prompt, {
198557
- systemPrompt: ENTITY_RESOLUTION_SYSTEM_PROMPT
198700
+ systemPrompt: ENTITY_RESOLUTION_SYSTEM_PROMPT,
198701
+ jsonMode: true
198558
198702
  });
198559
198703
  let resolution;
198560
198704
  try {
@@ -198679,7 +198823,13 @@ async function indexDocument(input) {
198679
198823
  const paragraphs = sortedParaKeys.map((pk) => {
198680
198824
  const p4 = section.paragraphs.get(pk);
198681
198825
  return { text: p4.text, atoms: p4.atoms };
198826
+ }).filter((p4) => {
198827
+ const hasAtoms = Object.values(p4.atoms).some((arr) => Array.isArray(arr) && arr.length > 0);
198828
+ const hasText = p4.text.replace(/[\s\p{P}\p{S}]/gu, "").length > 0;
198829
+ return hasAtoms || hasText;
198682
198830
  });
198831
+ if (paragraphs.length === 0)
198832
+ continue;
198683
198833
  digestSections.push({
198684
198834
  heading: section.heading,
198685
198835
  level: section.level,
@@ -198710,6 +198860,7 @@ async function indexDocument(input) {
198710
198860
  totalLlmCalls += resolutionResult.llmCalls;
198711
198861
  totalTokens += resolutionResult.totalTokens;
198712
198862
  }
198863
+ sortAtomsByConfidence(digestSections);
198713
198864
  const atomCounts = countAtoms(sectionsMap);
198714
198865
  const paragraphCount = digestSections.reduce((sum, s) => sum + s.paragraphs.length, 0);
198715
198866
  if (paragraphCount === 0) {
@@ -0,0 +1 @@
1
+ import{c as o,u as m,a as c,j as e,b as d,C as h}from"./index-y0dn2kLO.js";const p=[["path",{d:"m12 19-7-7 7-7",key:"1l729n"}],["path",{d:"M19 12H5",key:"x3x0zl"}]],f=o("arrow-left",p);function j({icon:a,title:t,badges:s,meta:l}){const i=m(),{t:r}=c();return e.jsxs("div",{children:[e.jsxs("div",{className:"flex justify-between items-center",children:[e.jsxs("div",{className:"flex min-w-0 items-center gap-2",children:[e.jsx("span",{className:"shrink-0",children:a}),e.jsx("h1",{className:"min-w-0 break-all font-mono text-xl font-bold leading-none text-c4a-text-primary",children:t}),s&&s.length>0&&e.jsx("div",{className:"flex items-center gap-1.5 ml-2",children:s.map((n,x)=>e.jsx("span",{children:n},x))})]}),e.jsxs("button",{onClick:()=>i(-1),className:"flex items-center gap-1 text-c4a-text-muted hover:text-c4a-text-primary cursor-pointer transition-colors text-xs font-mono shrink-0 ml-4",children:[e.jsx(f,{size:14}),r("common.back")]})]}),l&&l.length>0&&e.jsx("div",{className:"mt-2 flex flex-wrap items-baseline gap-4 text-xs",children:l.map(n=>e.jsxs("span",{className:"text-c4a-text-muted",children:[n.label,":",e.jsx("span",{className:"text-c4a-text-secondary",children:n.value})]},n.label))})]})}function N(){const{t:a}=c(),{hashId:t}=d();if(!t)return e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:a("common.loading")})});const s=t.length>16?t.slice(0,16)+"…":t;return e.jsxs("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:[e.jsx(j,{icon:e.jsx("span",{children:"📄"}),title:`Content: ${s}`}),e.jsx(h,{hashId:t})]})}export{N as ContentDetail};
@@ -0,0 +1 @@
1
+ import{c as o,u as m,a as c,j as e,b as d,C as h}from"./index-hC_-Br4I.js";const p=[["path",{d:"m12 19-7-7 7-7",key:"1l729n"}],["path",{d:"M19 12H5",key:"x3x0zl"}]],f=o("arrow-left",p);function j({icon:a,title:t,badges:s,meta:l}){const i=m(),{t:r}=c();return e.jsxs("div",{children:[e.jsxs("div",{className:"flex justify-between items-center",children:[e.jsxs("div",{className:"flex min-w-0 items-center gap-2",children:[e.jsx("span",{className:"shrink-0",children:a}),e.jsx("h1",{className:"min-w-0 break-all font-mono text-xl font-bold leading-none text-c4a-text-primary",children:t}),s&&s.length>0&&e.jsx("div",{className:"flex items-center gap-1.5 ml-2",children:s.map((n,x)=>e.jsx("span",{children:n},x))})]}),e.jsxs("button",{onClick:()=>i(-1),className:"flex items-center gap-1 text-c4a-text-muted hover:text-c4a-text-primary cursor-pointer transition-colors text-xs font-mono shrink-0 ml-4",children:[e.jsx(f,{size:14}),r("common.back")]})]}),l&&l.length>0&&e.jsx("div",{className:"mt-2 flex flex-wrap items-baseline gap-4 text-xs",children:l.map(n=>e.jsxs("span",{className:"text-c4a-text-muted",children:[n.label,":",e.jsx("span",{className:"text-c4a-text-secondary",children:n.value})]},n.label))})]})}function N(){const{t:a}=c(),{hashId:t}=d();if(!t)return e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:a("common.loading")})});const s=t.length>16?t.slice(0,16)+"…":t;return e.jsxs("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:[e.jsx(j,{icon:e.jsx("span",{children:"📄"}),title:`Content: ${s}`}),e.jsx(h,{hashId:t})]})}export{N as ContentDetail};
@@ -0,0 +1 @@
1
+ import{c as o,u as m,a as c,j as e,b as d,C as h}from"./index-wBO4P6CB.js";const p=[["path",{d:"m12 19-7-7 7-7",key:"1l729n"}],["path",{d:"M19 12H5",key:"x3x0zl"}]],f=o("arrow-left",p);function j({icon:a,title:t,badges:s,meta:l}){const i=m(),{t:r}=c();return e.jsxs("div",{children:[e.jsxs("div",{className:"flex justify-between items-center",children:[e.jsxs("div",{className:"flex min-w-0 items-center gap-2",children:[e.jsx("span",{className:"shrink-0",children:a}),e.jsx("h1",{className:"min-w-0 break-all font-mono text-xl font-bold leading-none text-c4a-text-primary",children:t}),s&&s.length>0&&e.jsx("div",{className:"flex items-center gap-1.5 ml-2",children:s.map((n,x)=>e.jsx("span",{children:n},x))})]}),e.jsxs("button",{onClick:()=>i(-1),className:"flex items-center gap-1 text-c4a-text-muted hover:text-c4a-text-primary cursor-pointer transition-colors text-xs font-mono shrink-0 ml-4",children:[e.jsx(f,{size:14}),r("common.back")]})]}),l&&l.length>0&&e.jsx("div",{className:"mt-2 flex flex-wrap items-baseline gap-4 text-xs",children:l.map(n=>e.jsxs("span",{className:"text-c4a-text-muted",children:[n.label,":",e.jsx("span",{className:"text-c4a-text-secondary",children:n.value})]},n.label))})]})}function N(){const{t:a}=c(),{hashId:t}=d();if(!t)return e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:a("common.loading")})});const s=t.length>16?t.slice(0,16)+"…":t;return e.jsxs("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:[e.jsx(j,{icon:e.jsx("span",{children:"📄"}),title:`Content: ${s}`}),e.jsx(h,{hashId:t})]})}export{N as ContentDetail};
@@ -0,0 +1 @@
1
+ import{c as o,u as m,a as c,j as e,b as d,C as h}from"./index-CMVd9rlp.js";const p=[["path",{d:"m12 19-7-7 7-7",key:"1l729n"}],["path",{d:"M19 12H5",key:"x3x0zl"}]],f=o("arrow-left",p);function j({icon:a,title:t,badges:s,meta:l}){const i=m(),{t:r}=c();return e.jsxs("div",{children:[e.jsxs("div",{className:"flex justify-between items-center",children:[e.jsxs("div",{className:"flex min-w-0 items-center gap-2",children:[e.jsx("span",{className:"shrink-0",children:a}),e.jsx("h1",{className:"min-w-0 break-all font-mono text-xl font-bold leading-none text-c4a-text-primary",children:t}),s&&s.length>0&&e.jsx("div",{className:"flex items-center gap-1.5 ml-2",children:s.map((n,x)=>e.jsx("span",{children:n},x))})]}),e.jsxs("button",{onClick:()=>i(-1),className:"flex items-center gap-1 text-c4a-text-muted hover:text-c4a-text-primary cursor-pointer transition-colors text-xs font-mono shrink-0 ml-4",children:[e.jsx(f,{size:14}),r("common.back")]})]}),l&&l.length>0&&e.jsx("div",{className:"mt-2 flex flex-wrap items-baseline gap-4 text-xs",children:l.map(n=>e.jsxs("span",{className:"text-c4a-text-muted",children:[n.label,":",e.jsx("span",{className:"text-c4a-text-secondary",children:n.value})]},n.label))})]})}function N(){const{t:a}=c(),{hashId:t}=d();if(!t)return e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:a("common.loading")})});const s=t.length>16?t.slice(0,16)+"…":t;return e.jsxs("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:[e.jsx(j,{icon:e.jsx("span",{children:"📄"}),title:`Content: ${s}`}),e.jsx(h,{hashId:t})]})}export{N as ContentDetail};
@@ -0,0 +1 @@
1
+ import{a as n,b as a,j as e,E as i}from"./index-hC_-Br4I.js";function r(){const{t:s}=n(),{entityId:t}=a();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(i,{entityId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as EntityDetail};
@@ -0,0 +1 @@
1
+ import{a as n,b as a,j as e,E as i}from"./index-wBO4P6CB.js";function r(){const{t:s}=n(),{entityId:t}=a();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(i,{entityId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as EntityDetail};
@@ -0,0 +1 @@
1
+ import{a as n,b as a,j as e,E as i}from"./index-y0dn2kLO.js";function r(){const{t:s}=n(),{entityId:t}=a();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(i,{entityId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as EntityDetail};
@@ -0,0 +1 @@
1
+ import{a as n,b as a,j as e,E as i}from"./index-CMVd9rlp.js";function r(){const{t:s}=n(),{entityId:t}=a();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(i,{entityId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as EntityDetail};
@@ -0,0 +1 @@
1
+ import{a,b as n,j as e,R as l}from"./index-hC_-Br4I.js";function r(){const{t:s}=a(),{relationId:t}=n();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(l,{relationId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as RelationDetail};
@@ -0,0 +1 @@
1
+ import{a,b as n,j as e,R as l}from"./index-y0dn2kLO.js";function r(){const{t:s}=a(),{relationId:t}=n();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(l,{relationId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as RelationDetail};
@@ -0,0 +1 @@
1
+ import{a,b as n,j as e,R as l}from"./index-CMVd9rlp.js";function r(){const{t:s}=a(),{relationId:t}=n();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(l,{relationId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as RelationDetail};
@@ -0,0 +1 @@
1
+ import{a,b as n,j as e,R as l}from"./index-wBO4P6CB.js";function r(){const{t:s}=a(),{relationId:t}=n();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(l,{relationId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as RelationDetail};