@c4a/server-cli 0.4.15-alpha.6 → 0.4.15-alpha.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +87 -5
- package/package.json +1 -1
- package/serve.js +130 -12
- package/web/assets/ContentDetail-DZuam1p0.js +1 -0
- package/web/assets/ContentDetail-qsl_01UW.js +1 -0
- package/web/assets/EntityDetail-BafRMdUD.js +1 -0
- package/web/assets/EntityDetail-Dssp-tuW.js +1 -0
- package/web/assets/RelationDetail-DH86ysel.js +1 -0
- package/web/assets/RelationDetail-kDhcDsKg.js +1 -0
- package/web/assets/index-CMVd9rlp.js +111 -0
- package/web/assets/index-wBO4P6CB.js +111 -0
- package/web/index.html +1 -1
package/index.js
CHANGED
|
@@ -220991,6 +220991,12 @@ class LlmServiceImpl {
|
|
|
220991
220991
|
if (options?.systemPrompt) {
|
|
220992
220992
|
callSettings.system = options.systemPrompt;
|
|
220993
220993
|
}
|
|
220994
|
+
if (options?.jsonMode) {
|
|
220995
|
+
callSettings.providerOptions = {
|
|
220996
|
+
openai: { responseFormat: { type: "json_object" } },
|
|
220997
|
+
anthropic: { responseFormat: { type: "json_object" } }
|
|
220998
|
+
};
|
|
220999
|
+
}
|
|
220994
221000
|
const result = await retry(() => generateText(callSettings), {
|
|
220995
221001
|
shouldRetry: (error40) => isRetryableStatus(getHttpStatus(error40)) || isTimeoutError2(error40)
|
|
220996
221002
|
});
|
|
@@ -221038,6 +221044,12 @@ class LlmServiceImpl {
|
|
|
221038
221044
|
if (options?.systemPrompt) {
|
|
221039
221045
|
callSettings.system = options.systemPrompt;
|
|
221040
221046
|
}
|
|
221047
|
+
if (options?.jsonMode) {
|
|
221048
|
+
callSettings.providerOptions = {
|
|
221049
|
+
openai: { responseFormat: { type: "json_object" } },
|
|
221050
|
+
anthropic: { responseFormat: { type: "json_object" } }
|
|
221051
|
+
};
|
|
221052
|
+
}
|
|
221041
221053
|
const result = await retry(() => Promise.resolve(streamText(callSettings)), {
|
|
221042
221054
|
shouldRetry: (error40) => isRetryableStatus(getHttpStatus(error40)) || isTimeoutError2(error40)
|
|
221043
221055
|
});
|
|
@@ -221856,6 +221868,7 @@ function parseExtractionOutput(raw, schema) {
|
|
|
221856
221868
|
}
|
|
221857
221869
|
parsed = normalizeFlatOutput(parsed);
|
|
221858
221870
|
parsed = stripNulls(parsed);
|
|
221871
|
+
parsed = patchAttributeDefaults(parsed);
|
|
221859
221872
|
const result = schema.safeParse(parsed);
|
|
221860
221873
|
if (!result.success) {
|
|
221861
221874
|
return { success: false, error: result.error };
|
|
@@ -221923,7 +221936,37 @@ function tryParseJson(raw) {
|
|
|
221923
221936
|
return repairAndParse(raw);
|
|
221924
221937
|
}
|
|
221925
221938
|
function repairAndParse(raw) {
|
|
221926
|
-
|
|
221939
|
+
try {
|
|
221940
|
+
return JSON.parse(jsonrepair(raw));
|
|
221941
|
+
} catch {
|
|
221942
|
+
const truncated = truncateToLastCompleteEntry(raw);
|
|
221943
|
+
if (truncated) {
|
|
221944
|
+
return JSON.parse(jsonrepair(truncated));
|
|
221945
|
+
}
|
|
221946
|
+
throw new Error(`JSON repair failed for output of length ${raw.length}`);
|
|
221947
|
+
}
|
|
221948
|
+
}
|
|
221949
|
+
function truncateToLastCompleteEntry(raw) {
|
|
221950
|
+
let text2 = raw.trim();
|
|
221951
|
+
if (text2.startsWith("```")) {
|
|
221952
|
+
const firstNewline = text2.indexOf(`
|
|
221953
|
+
`);
|
|
221954
|
+
text2 = text2.slice(firstNewline + 1);
|
|
221955
|
+
}
|
|
221956
|
+
if (text2.endsWith("```")) {
|
|
221957
|
+
text2 = text2.slice(0, -3);
|
|
221958
|
+
}
|
|
221959
|
+
const pattern = /\}\s*,\s*"P\d+"/g;
|
|
221960
|
+
let lastMatch = null;
|
|
221961
|
+
let m;
|
|
221962
|
+
while ((m = pattern.exec(text2)) !== null) {
|
|
221963
|
+
lastMatch = m;
|
|
221964
|
+
}
|
|
221965
|
+
if (!lastMatch)
|
|
221966
|
+
return null;
|
|
221967
|
+
const cutPos = lastMatch.index + 1;
|
|
221968
|
+
const truncated = text2.slice(0, cutPos) + "}";
|
|
221969
|
+
return truncated;
|
|
221927
221970
|
}
|
|
221928
221971
|
var PARAGRAPH_TAG_RE = /^P\d+$/;
|
|
221929
221972
|
function normalizeFlatOutput(parsed) {
|
|
@@ -221956,6 +221999,43 @@ function stripNulls(value) {
|
|
|
221956
221999
|
}
|
|
221957
222000
|
return value;
|
|
221958
222001
|
}
|
|
222002
|
+
function patchAttributeDefaults(value) {
|
|
222003
|
+
if (!value || typeof value !== "object" || Array.isArray(value))
|
|
222004
|
+
return value;
|
|
222005
|
+
const obj = value;
|
|
222006
|
+
if ("paragraphs" in obj && Array.isArray(obj.paragraphs)) {
|
|
222007
|
+
return {
|
|
222008
|
+
...obj,
|
|
222009
|
+
paragraphs: obj.paragraphs.map((p) => {
|
|
222010
|
+
if (!p || typeof p !== "object")
|
|
222011
|
+
return p;
|
|
222012
|
+
const para = p;
|
|
222013
|
+
const atoms2 = para.atoms;
|
|
222014
|
+
if (!atoms2 || typeof atoms2 !== "object")
|
|
222015
|
+
return p;
|
|
222016
|
+
return { ...para, atoms: patchAttrsInAtoms(atoms2) };
|
|
222017
|
+
})
|
|
222018
|
+
};
|
|
222019
|
+
}
|
|
222020
|
+
return value;
|
|
222021
|
+
}
|
|
222022
|
+
function patchAttrsInAtoms(atoms2) {
|
|
222023
|
+
const attrs = atoms2.attributes;
|
|
222024
|
+
if (!Array.isArray(attrs))
|
|
222025
|
+
return atoms2;
|
|
222026
|
+
return {
|
|
222027
|
+
...atoms2,
|
|
222028
|
+
attributes: attrs.map((attr) => {
|
|
222029
|
+
if (!attr || typeof attr !== "object" || Array.isArray(attr))
|
|
222030
|
+
return attr;
|
|
222031
|
+
const a = attr;
|
|
222032
|
+
if (typeof a.type !== "string" || a.type === "") {
|
|
222033
|
+
return { ...a, type: "other" };
|
|
222034
|
+
}
|
|
222035
|
+
return attr;
|
|
222036
|
+
})
|
|
222037
|
+
};
|
|
222038
|
+
}
|
|
221959
222039
|
function isRecord(value) {
|
|
221960
222040
|
return !!value && typeof value === "object" && "key" in value && "value" in value && typeof value.key === "string";
|
|
221961
222041
|
}
|
|
@@ -222268,6 +222348,7 @@ Return a single JSON object keyed by paragraph tags. Only include paragraphs tha
|
|
|
222268
222348
|
- **Enum fields MUST use ONLY the listed values.** For example, entity.kind must be one of "implementation"|"external"|"concept" — do NOT use values from other atom types (e.g., do NOT put "team" or "human" in entity.kind; those belong to roles.kind).
|
|
222269
222349
|
- Every atom MUST include a "confidence" field (0.0-1.0) indicating how confident you are in the extraction. Use higher values (0.85-1.0) for explicitly stated facts and lower values (0.5-0.7) for inferred or ambiguous information.
|
|
222270
222350
|
- **Classify correctly:** People, teams, and personas → "roles" (not "entities"). Technical systems, services, modules → "entities".
|
|
222351
|
+
- **Entity naming — extract the subject, not the document:** Entity names should represent the actual system/product/framework being described, not the document itself. If the text says "Next.js is a React framework", the entity is "Next.js" — not "Next.js 官方文档" or "Next.js API 参考". Use names that work as standalone knowledge graph nodes, independent of any document context.
|
|
222271
222352
|
- **Entity reference consistency (CRITICAL):** Every entity name referenced in relation.from, relation.to, behavior.subject, or any other cross-reference field MUST also appear in the "entities" array of the SAME paragraph (or a preceding paragraph in the same chunk). If an entity is mentioned for the first time in a relation, you MUST also extract it as an entity. This ensures no "dangling references" — every name used in relations has a corresponding entity declaration.
|
|
222272
222353
|
- **Cross-atom reference consistency:** transitions[].from and transitions[].to values MUST exist in states[].values of the same entity. roles[].performs values MUST match names declared in behaviors[].name.
|
|
222273
222354
|
- **Constraints vs rules distinction:** Use "constraints" for unconditional declarative mandates ('X must Y'). Use "rules" for conditional logic ('IF X THEN Y'). Do not mix them — a requirement with no condition is a constraint, a requirement triggered by a condition is a rule. Do NOT invent a rule for every constraint — only create a rule when the text explicitly states conditional logic.
|
|
@@ -222327,13 +222408,14 @@ var ENTITY_RESOLUTION_SYSTEM_PROMPT = `You are an entity resolution assistant. Y
|
|
|
222327
222408
|
- Prefer the LONGER, more descriptive name as the canonical name
|
|
222328
222409
|
- Do NOT merge names that share a substring but refer to different things
|
|
222329
222410
|
- When uncertain, do NOT merge — add to "ambiguous" instead
|
|
222330
|
-
- Chinese and English names for the same entity SHOULD be merged (e.g. "
|
|
222331
|
-
- Abbreviations should be merged with their full forms (e.g. "
|
|
222411
|
+
- Chinese and English names for the same entity SHOULD be merged (e.g. "Webpack" → "Webpack 打包工具")
|
|
222412
|
+
- Abbreviations should be merged with their full forms (e.g. "K8s" → "Kubernetes")
|
|
222413
|
+
- Document-descriptive names (containing "文档", "参考", "指南" etc.) should be merged to the actual product/system name. The entity is the product, not the document about it
|
|
222332
222414
|
|
|
222333
222415
|
## Task 2: Remove Noise
|
|
222334
222416
|
- Apply the **identity test**: a real entity is something you can discuss independently ("What is X?", "How does X work?", "Who owns X?"). Names that fail this test — values, addresses, actions, generic descriptions — are noise.
|
|
222335
222417
|
- Remove names that are NOT meaningful named entities: generic words, action descriptions, or things that are attributes/values rather than independent subjects
|
|
222336
|
-
- Examples of REAL entities to KEEP: product names (
|
|
222418
|
+
- Examples of REAL entities to KEEP: product names (Next.js, Vite), tools (nvm, Turborepo), services (Nginx, Redis), platforms (Kubernetes, Docker) — these all pass the identity test
|
|
222337
222419
|
- When uncertain, KEEP the name — only remove if it clearly fails the identity test
|
|
222338
222420
|
|
|
222339
222421
|
## Output
|
|
@@ -222674,7 +222756,7 @@ Return ONLY a valid JSON object. No markdown fences, no explanation.`;
|
|
|
222674
222756
|
}
|
|
222675
222757
|
var DOC_DIAGRAM_ANNOTATION_SYSTEM_PROMPT = DIAGRAM_SYSTEM_PROMPT;
|
|
222676
222758
|
// ../llm/src/chunking/markdownChunker.ts
|
|
222677
|
-
var DEFAULT_MAX_TOKENS2 =
|
|
222759
|
+
var DEFAULT_MAX_TOKENS2 = 3600;
|
|
222678
222760
|
var DEFAULT_PARAGRAPH_MAX_TOKENS = 500;
|
|
222679
222761
|
function estimateTokens(text2) {
|
|
222680
222762
|
return Math.ceil(text2.length / 4);
|
package/package.json
CHANGED
package/serve.js
CHANGED
|
@@ -194996,6 +194996,12 @@ class LlmServiceImpl {
|
|
|
194996
194996
|
if (options?.systemPrompt) {
|
|
194997
194997
|
callSettings.system = options.systemPrompt;
|
|
194998
194998
|
}
|
|
194999
|
+
if (options?.jsonMode) {
|
|
195000
|
+
callSettings.providerOptions = {
|
|
195001
|
+
openai: { responseFormat: { type: "json_object" } },
|
|
195002
|
+
anthropic: { responseFormat: { type: "json_object" } }
|
|
195003
|
+
};
|
|
195004
|
+
}
|
|
194999
195005
|
const result = await retry(() => generateText(callSettings), {
|
|
195000
195006
|
shouldRetry: (error40) => isRetryableStatus(getHttpStatus(error40)) || isTimeoutError2(error40)
|
|
195001
195007
|
});
|
|
@@ -195043,6 +195049,12 @@ class LlmServiceImpl {
|
|
|
195043
195049
|
if (options?.systemPrompt) {
|
|
195044
195050
|
callSettings.system = options.systemPrompt;
|
|
195045
195051
|
}
|
|
195052
|
+
if (options?.jsonMode) {
|
|
195053
|
+
callSettings.providerOptions = {
|
|
195054
|
+
openai: { responseFormat: { type: "json_object" } },
|
|
195055
|
+
anthropic: { responseFormat: { type: "json_object" } }
|
|
195056
|
+
};
|
|
195057
|
+
}
|
|
195046
195058
|
const result = await retry(() => Promise.resolve(streamText(callSettings)), {
|
|
195047
195059
|
shouldRetry: (error40) => isRetryableStatus(getHttpStatus(error40)) || isTimeoutError2(error40)
|
|
195048
195060
|
});
|
|
@@ -195861,6 +195873,7 @@ function parseExtractionOutput(raw5, schema2) {
|
|
|
195861
195873
|
}
|
|
195862
195874
|
parsed = normalizeFlatOutput(parsed);
|
|
195863
195875
|
parsed = stripNulls(parsed);
|
|
195876
|
+
parsed = patchAttributeDefaults(parsed);
|
|
195864
195877
|
const result = schema2.safeParse(parsed);
|
|
195865
195878
|
if (!result.success) {
|
|
195866
195879
|
return { success: false, error: result.error };
|
|
@@ -195928,7 +195941,37 @@ function tryParseJson(raw5) {
|
|
|
195928
195941
|
return repairAndParse(raw5);
|
|
195929
195942
|
}
|
|
195930
195943
|
function repairAndParse(raw5) {
|
|
195931
|
-
|
|
195944
|
+
try {
|
|
195945
|
+
return JSON.parse(jsonrepair(raw5));
|
|
195946
|
+
} catch {
|
|
195947
|
+
const truncated = truncateToLastCompleteEntry(raw5);
|
|
195948
|
+
if (truncated) {
|
|
195949
|
+
return JSON.parse(jsonrepair(truncated));
|
|
195950
|
+
}
|
|
195951
|
+
throw new Error(`JSON repair failed for output of length ${raw5.length}`);
|
|
195952
|
+
}
|
|
195953
|
+
}
|
|
195954
|
+
function truncateToLastCompleteEntry(raw5) {
|
|
195955
|
+
let text2 = raw5.trim();
|
|
195956
|
+
if (text2.startsWith("```")) {
|
|
195957
|
+
const firstNewline = text2.indexOf(`
|
|
195958
|
+
`);
|
|
195959
|
+
text2 = text2.slice(firstNewline + 1);
|
|
195960
|
+
}
|
|
195961
|
+
if (text2.endsWith("```")) {
|
|
195962
|
+
text2 = text2.slice(0, -3);
|
|
195963
|
+
}
|
|
195964
|
+
const pattern = /\}\s*,\s*"P\d+"/g;
|
|
195965
|
+
let lastMatch = null;
|
|
195966
|
+
let m;
|
|
195967
|
+
while ((m = pattern.exec(text2)) !== null) {
|
|
195968
|
+
lastMatch = m;
|
|
195969
|
+
}
|
|
195970
|
+
if (!lastMatch)
|
|
195971
|
+
return null;
|
|
195972
|
+
const cutPos = lastMatch.index + 1;
|
|
195973
|
+
const truncated = text2.slice(0, cutPos) + "}";
|
|
195974
|
+
return truncated;
|
|
195932
195975
|
}
|
|
195933
195976
|
var PARAGRAPH_TAG_RE = /^P\d+$/;
|
|
195934
195977
|
function normalizeFlatOutput(parsed) {
|
|
@@ -195961,6 +196004,43 @@ function stripNulls(value) {
|
|
|
195961
196004
|
}
|
|
195962
196005
|
return value;
|
|
195963
196006
|
}
|
|
196007
|
+
function patchAttributeDefaults(value) {
|
|
196008
|
+
if (!value || typeof value !== "object" || Array.isArray(value))
|
|
196009
|
+
return value;
|
|
196010
|
+
const obj = value;
|
|
196011
|
+
if ("paragraphs" in obj && Array.isArray(obj.paragraphs)) {
|
|
196012
|
+
return {
|
|
196013
|
+
...obj,
|
|
196014
|
+
paragraphs: obj.paragraphs.map((p4) => {
|
|
196015
|
+
if (!p4 || typeof p4 !== "object")
|
|
196016
|
+
return p4;
|
|
196017
|
+
const para = p4;
|
|
196018
|
+
const atoms2 = para.atoms;
|
|
196019
|
+
if (!atoms2 || typeof atoms2 !== "object")
|
|
196020
|
+
return p4;
|
|
196021
|
+
return { ...para, atoms: patchAttrsInAtoms(atoms2) };
|
|
196022
|
+
})
|
|
196023
|
+
};
|
|
196024
|
+
}
|
|
196025
|
+
return value;
|
|
196026
|
+
}
|
|
196027
|
+
function patchAttrsInAtoms(atoms2) {
|
|
196028
|
+
const attrs = atoms2.attributes;
|
|
196029
|
+
if (!Array.isArray(attrs))
|
|
196030
|
+
return atoms2;
|
|
196031
|
+
return {
|
|
196032
|
+
...atoms2,
|
|
196033
|
+
attributes: attrs.map((attr) => {
|
|
196034
|
+
if (!attr || typeof attr !== "object" || Array.isArray(attr))
|
|
196035
|
+
return attr;
|
|
196036
|
+
const a = attr;
|
|
196037
|
+
if (typeof a.type !== "string" || a.type === "") {
|
|
196038
|
+
return { ...a, type: "other" };
|
|
196039
|
+
}
|
|
196040
|
+
return attr;
|
|
196041
|
+
})
|
|
196042
|
+
};
|
|
196043
|
+
}
|
|
195964
196044
|
function isRecord(value) {
|
|
195965
196045
|
return !!value && typeof value === "object" && "key" in value && "value" in value && typeof value.key === "string";
|
|
195966
196046
|
}
|
|
@@ -196273,6 +196353,7 @@ Return a single JSON object keyed by paragraph tags. Only include paragraphs tha
|
|
|
196273
196353
|
- **Enum fields MUST use ONLY the listed values.** For example, entity.kind must be one of "implementation"|"external"|"concept" — do NOT use values from other atom types (e.g., do NOT put "team" or "human" in entity.kind; those belong to roles.kind).
|
|
196274
196354
|
- Every atom MUST include a "confidence" field (0.0-1.0) indicating how confident you are in the extraction. Use higher values (0.85-1.0) for explicitly stated facts and lower values (0.5-0.7) for inferred or ambiguous information.
|
|
196275
196355
|
- **Classify correctly:** People, teams, and personas → "roles" (not "entities"). Technical systems, services, modules → "entities".
|
|
196356
|
+
- **Entity naming — extract the subject, not the document:** Entity names should represent the actual system/product/framework being described, not the document itself. If the text says "Next.js is a React framework", the entity is "Next.js" — not "Next.js 官方文档" or "Next.js API 参考". Use names that work as standalone knowledge graph nodes, independent of any document context.
|
|
196276
196357
|
- **Entity reference consistency (CRITICAL):** Every entity name referenced in relation.from, relation.to, behavior.subject, or any other cross-reference field MUST also appear in the "entities" array of the SAME paragraph (or a preceding paragraph in the same chunk). If an entity is mentioned for the first time in a relation, you MUST also extract it as an entity. This ensures no "dangling references" — every name used in relations has a corresponding entity declaration.
|
|
196277
196358
|
- **Cross-atom reference consistency:** transitions[].from and transitions[].to values MUST exist in states[].values of the same entity. roles[].performs values MUST match names declared in behaviors[].name.
|
|
196278
196359
|
- **Constraints vs rules distinction:** Use "constraints" for unconditional declarative mandates ('X must Y'). Use "rules" for conditional logic ('IF X THEN Y'). Do not mix them — a requirement with no condition is a constraint, a requirement triggered by a condition is a rule. Do NOT invent a rule for every constraint — only create a rule when the text explicitly states conditional logic.
|
|
@@ -196332,13 +196413,14 @@ var ENTITY_RESOLUTION_SYSTEM_PROMPT = `You are an entity resolution assistant. Y
|
|
|
196332
196413
|
- Prefer the LONGER, more descriptive name as the canonical name
|
|
196333
196414
|
- Do NOT merge names that share a substring but refer to different things
|
|
196334
196415
|
- When uncertain, do NOT merge — add to "ambiguous" instead
|
|
196335
|
-
- Chinese and English names for the same entity SHOULD be merged (e.g. "
|
|
196336
|
-
- Abbreviations should be merged with their full forms (e.g. "
|
|
196416
|
+
- Chinese and English names for the same entity SHOULD be merged (e.g. "Webpack" → "Webpack 打包工具")
|
|
196417
|
+
- Abbreviations should be merged with their full forms (e.g. "K8s" → "Kubernetes")
|
|
196418
|
+
- Document-descriptive names (containing "文档", "参考", "指南" etc.) should be merged to the actual product/system name. The entity is the product, not the document about it
|
|
196337
196419
|
|
|
196338
196420
|
## Task 2: Remove Noise
|
|
196339
196421
|
- Apply the **identity test**: a real entity is something you can discuss independently ("What is X?", "How does X work?", "Who owns X?"). Names that fail this test — values, addresses, actions, generic descriptions — are noise.
|
|
196340
196422
|
- Remove names that are NOT meaningful named entities: generic words, action descriptions, or things that are attributes/values rather than independent subjects
|
|
196341
|
-
- Examples of REAL entities to KEEP: product names (
|
|
196423
|
+
- Examples of REAL entities to KEEP: product names (Next.js, Vite), tools (nvm, Turborepo), services (Nginx, Redis), platforms (Kubernetes, Docker) — these all pass the identity test
|
|
196342
196424
|
- When uncertain, KEEP the name — only remove if it clearly fails the identity test
|
|
196343
196425
|
|
|
196344
196426
|
## Output
|
|
@@ -196679,7 +196761,7 @@ Return ONLY a valid JSON object. No markdown fences, no explanation.`;
|
|
|
196679
196761
|
}
|
|
196680
196762
|
var DOC_DIAGRAM_ANNOTATION_SYSTEM_PROMPT = DIAGRAM_SYSTEM_PROMPT;
|
|
196681
196763
|
// ../llm/src/chunking/markdownChunker.ts
|
|
196682
|
-
var DEFAULT_MAX_TOKENS2 =
|
|
196764
|
+
var DEFAULT_MAX_TOKENS2 = 3600;
|
|
196683
196765
|
var DEFAULT_PARAGRAPH_MAX_TOKENS = 500;
|
|
196684
196766
|
function estimateTokens(text2) {
|
|
196685
196767
|
return Math.ceil(text2.length / 4);
|
|
@@ -197604,7 +197686,8 @@ async function extractTableAtoms(chunk, sections, result, llmService) {
|
|
|
197604
197686
|
const prompt = buildDocTableAnnotationPrompt(tableText);
|
|
197605
197687
|
try {
|
|
197606
197688
|
const res = await llmService.generateText(prompt, {
|
|
197607
|
-
systemPrompt: DOC_TABLE_ANNOTATION_SYSTEM_PROMPT
|
|
197689
|
+
systemPrompt: DOC_TABLE_ANNOTATION_SYSTEM_PROMPT,
|
|
197690
|
+
jsonMode: true
|
|
197608
197691
|
});
|
|
197609
197692
|
const parsed = parseExtractionOutput(res.text, docChunkResultSchema);
|
|
197610
197693
|
if (!parsed.success) {
|
|
@@ -197696,7 +197779,8 @@ async function extractDiagramAtoms(chunk, sections, result, llmService) {
|
|
|
197696
197779
|
const prompt = buildDocDiagramAnnotationPrompt(diagramText);
|
|
197697
197780
|
try {
|
|
197698
197781
|
const res = await llmService.generateText(prompt, {
|
|
197699
|
-
systemPrompt: DOC_DIAGRAM_ANNOTATION_SYSTEM_PROMPT
|
|
197782
|
+
systemPrompt: DOC_DIAGRAM_ANNOTATION_SYSTEM_PROMPT,
|
|
197783
|
+
jsonMode: true
|
|
197700
197784
|
});
|
|
197701
197785
|
const parsed = parseExtractionOutput(res.text, docChunkResultSchema);
|
|
197702
197786
|
if (!parsed.success) {
|
|
@@ -198214,7 +198298,9 @@ ${trimmed}
|
|
|
198214
198298
|
Continue the JSON output from the exact point of truncation. Output ONLY the remaining JSON text.`;
|
|
198215
198299
|
try {
|
|
198216
198300
|
const result = await llmService.generateText(continuationPrompt, {
|
|
198217
|
-
systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT
|
|
198301
|
+
systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT,
|
|
198302
|
+
jsonMode: true,
|
|
198303
|
+
maxTokens: 16384
|
|
198218
198304
|
});
|
|
198219
198305
|
const combined = trimmed + result.text.trim();
|
|
198220
198306
|
JSON.parse(jsonrepair(combined));
|
|
@@ -198256,9 +198342,12 @@ async function processChunk(chunk, chunkIndex, llmService, sections, onStep) {
|
|
|
198256
198342
|
const chunkText = injectParagraphTags(chunk, sections);
|
|
198257
198343
|
let llmCalls = 0;
|
|
198258
198344
|
let totalTokens = 0;
|
|
198345
|
+
const DOC_INDEX_MAX_OUTPUT_TOKENS = 16384;
|
|
198259
198346
|
const t1Prompt = buildDocAtomAnnotationPrompt(chunkText);
|
|
198260
198347
|
const t1Result = await llmService.generateText(t1Prompt, {
|
|
198261
|
-
systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT
|
|
198348
|
+
systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT,
|
|
198349
|
+
jsonMode: true,
|
|
198350
|
+
maxTokens: DOC_INDEX_MAX_OUTPUT_TOKENS
|
|
198262
198351
|
});
|
|
198263
198352
|
llmCalls++;
|
|
198264
198353
|
totalTokens += t1Result.usage.totalTokens;
|
|
@@ -198268,8 +198357,17 @@ async function processChunk(chunk, chunkIndex, llmService, sections, onStep) {
|
|
|
198268
198357
|
onStep?.("T1 done", llmCalls, totalTokens);
|
|
198269
198358
|
let parseResult = parseExtractionOutput(continued.text, docChunkResultSchema);
|
|
198270
198359
|
if (!parseResult.success) {
|
|
198360
|
+
const rawLen = continued.text.length;
|
|
198271
198361
|
const preview = continued.text.slice(0, 500).replace(/\n/g, "\\n");
|
|
198272
|
-
|
|
198362
|
+
const errMsg = parseResult.error.message.slice(0, 300);
|
|
198363
|
+
const posMatch = errMsg.match(/position\s+(\d+)/);
|
|
198364
|
+
const errContext = posMatch ? continued.text.slice(Math.max(0, +posMatch[1] - 100), +posMatch[1] + 100).replace(/\n/g, "\\n") : "";
|
|
198365
|
+
console.warn(`[docIndexer] chunk ${chunkIndex} T1 strict parse failed, attempting lenient.
|
|
198366
|
+
` + ` Error: ${errMsg}
|
|
198367
|
+
` + ` Output length: ${rawLen} chars | finishReason: ${t1Result.finishReason}
|
|
198368
|
+
` + ` Preview (first 500): ${preview}
|
|
198369
|
+
` + (errContext ? ` Context around error position: ...${errContext}...
|
|
198370
|
+
` : ""));
|
|
198273
198371
|
const lenient = tryLenientParse(continued.text, chunkIndex);
|
|
198274
198372
|
if (lenient) {
|
|
198275
198373
|
parseResult = { success: true, data: lenient };
|
|
@@ -198288,7 +198386,8 @@ async function processChunk(chunk, chunkIndex, llmService, sections, onStep) {
|
|
|
198288
198386
|
extract: async (input) => {
|
|
198289
198387
|
const prompt = buildDocGleaningPrompt(input.chunkText, input.previousResult);
|
|
198290
198388
|
const result = await llmService.generateText(prompt, {
|
|
198291
|
-
systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT
|
|
198389
|
+
systemPrompt: DOC_ANNOTATION_SYSTEM_PROMPT,
|
|
198390
|
+
jsonMode: true
|
|
198292
198391
|
});
|
|
198293
198392
|
llmCalls++;
|
|
198294
198393
|
totalTokens += result.usage.totalTokens;
|
|
@@ -198518,6 +198617,17 @@ function ensureAtomConfidence(atoms2) {
|
|
|
198518
198617
|
}
|
|
198519
198618
|
}
|
|
198520
198619
|
}
|
|
198620
|
+
function sortAtomsByConfidence(sections) {
|
|
198621
|
+
for (const section of sections) {
|
|
198622
|
+
for (const para of section.paragraphs) {
|
|
198623
|
+
for (const atomList of Object.values(para.atoms)) {
|
|
198624
|
+
if (!Array.isArray(atomList) || atomList.length < 2)
|
|
198625
|
+
continue;
|
|
198626
|
+
atomList.sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0));
|
|
198627
|
+
}
|
|
198628
|
+
}
|
|
198629
|
+
}
|
|
198630
|
+
}
|
|
198521
198631
|
function countAtoms(sections) {
|
|
198522
198632
|
const counts = {};
|
|
198523
198633
|
for (const section of sections.values()) {
|
|
@@ -198554,7 +198664,8 @@ async function runEntityResolution(sections, entityNames, llmService, onProgress
|
|
|
198554
198664
|
...noiseCandidates.length > 0 ? { noiseCandidates } : {}
|
|
198555
198665
|
});
|
|
198556
198666
|
const result = await llmService.generateText(prompt, {
|
|
198557
|
-
systemPrompt: ENTITY_RESOLUTION_SYSTEM_PROMPT
|
|
198667
|
+
systemPrompt: ENTITY_RESOLUTION_SYSTEM_PROMPT,
|
|
198668
|
+
jsonMode: true
|
|
198558
198669
|
});
|
|
198559
198670
|
let resolution;
|
|
198560
198671
|
try {
|
|
@@ -198679,7 +198790,13 @@ async function indexDocument(input) {
|
|
|
198679
198790
|
const paragraphs = sortedParaKeys.map((pk) => {
|
|
198680
198791
|
const p4 = section.paragraphs.get(pk);
|
|
198681
198792
|
return { text: p4.text, atoms: p4.atoms };
|
|
198793
|
+
}).filter((p4) => {
|
|
198794
|
+
const hasAtoms = Object.values(p4.atoms).some((arr) => Array.isArray(arr) && arr.length > 0);
|
|
198795
|
+
const hasText = p4.text.replace(/[\s\p{P}\p{S}]/gu, "").length > 0;
|
|
198796
|
+
return hasAtoms || hasText;
|
|
198682
198797
|
});
|
|
198798
|
+
if (paragraphs.length === 0)
|
|
198799
|
+
continue;
|
|
198683
198800
|
digestSections.push({
|
|
198684
198801
|
heading: section.heading,
|
|
198685
198802
|
level: section.level,
|
|
@@ -198710,6 +198827,7 @@ async function indexDocument(input) {
|
|
|
198710
198827
|
totalLlmCalls += resolutionResult.llmCalls;
|
|
198711
198828
|
totalTokens += resolutionResult.totalTokens;
|
|
198712
198829
|
}
|
|
198830
|
+
sortAtomsByConfidence(digestSections);
|
|
198713
198831
|
const atomCounts = countAtoms(sectionsMap);
|
|
198714
198832
|
const paragraphCount = digestSections.reduce((sum, s) => sum + s.paragraphs.length, 0);
|
|
198715
198833
|
if (paragraphCount === 0) {
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{c as o,u as m,a as c,j as e,b as d,C as h}from"./index-wBO4P6CB.js";const p=[["path",{d:"m12 19-7-7 7-7",key:"1l729n"}],["path",{d:"M19 12H5",key:"x3x0zl"}]],f=o("arrow-left",p);function j({icon:a,title:t,badges:s,meta:l}){const i=m(),{t:r}=c();return e.jsxs("div",{children:[e.jsxs("div",{className:"flex justify-between items-center",children:[e.jsxs("div",{className:"flex min-w-0 items-center gap-2",children:[e.jsx("span",{className:"shrink-0",children:a}),e.jsx("h1",{className:"min-w-0 break-all font-mono text-xl font-bold leading-none text-c4a-text-primary",children:t}),s&&s.length>0&&e.jsx("div",{className:"flex items-center gap-1.5 ml-2",children:s.map((n,x)=>e.jsx("span",{children:n},x))})]}),e.jsxs("button",{onClick:()=>i(-1),className:"flex items-center gap-1 text-c4a-text-muted hover:text-c4a-text-primary cursor-pointer transition-colors text-xs font-mono shrink-0 ml-4",children:[e.jsx(f,{size:14}),r("common.back")]})]}),l&&l.length>0&&e.jsx("div",{className:"mt-2 flex flex-wrap items-baseline gap-4 text-xs",children:l.map(n=>e.jsxs("span",{className:"text-c4a-text-muted",children:[n.label,":",e.jsx("span",{className:"text-c4a-text-secondary",children:n.value})]},n.label))})]})}function N(){const{t:a}=c(),{hashId:t}=d();if(!t)return e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:a("common.loading")})});const s=t.length>16?t.slice(0,16)+"…":t;return e.jsxs("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:[e.jsx(j,{icon:e.jsx("span",{children:"📄"}),title:`Content: ${s}`}),e.jsx(h,{hashId:t})]})}export{N as ContentDetail};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{c as o,u as m,a as c,j as e,b as d,C as h}from"./index-CMVd9rlp.js";const p=[["path",{d:"m12 19-7-7 7-7",key:"1l729n"}],["path",{d:"M19 12H5",key:"x3x0zl"}]],f=o("arrow-left",p);function j({icon:a,title:t,badges:s,meta:l}){const i=m(),{t:r}=c();return e.jsxs("div",{children:[e.jsxs("div",{className:"flex justify-between items-center",children:[e.jsxs("div",{className:"flex min-w-0 items-center gap-2",children:[e.jsx("span",{className:"shrink-0",children:a}),e.jsx("h1",{className:"min-w-0 break-all font-mono text-xl font-bold leading-none text-c4a-text-primary",children:t}),s&&s.length>0&&e.jsx("div",{className:"flex items-center gap-1.5 ml-2",children:s.map((n,x)=>e.jsx("span",{children:n},x))})]}),e.jsxs("button",{onClick:()=>i(-1),className:"flex items-center gap-1 text-c4a-text-muted hover:text-c4a-text-primary cursor-pointer transition-colors text-xs font-mono shrink-0 ml-4",children:[e.jsx(f,{size:14}),r("common.back")]})]}),l&&l.length>0&&e.jsx("div",{className:"mt-2 flex flex-wrap items-baseline gap-4 text-xs",children:l.map(n=>e.jsxs("span",{className:"text-c4a-text-muted",children:[n.label,":",e.jsx("span",{className:"text-c4a-text-secondary",children:n.value})]},n.label))})]})}function N(){const{t:a}=c(),{hashId:t}=d();if(!t)return e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:a("common.loading")})});const s=t.length>16?t.slice(0,16)+"…":t;return e.jsxs("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:[e.jsx(j,{icon:e.jsx("span",{children:"📄"}),title:`Content: ${s}`}),e.jsx(h,{hashId:t})]})}export{N as ContentDetail};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{a as n,b as a,j as e,E as i}from"./index-wBO4P6CB.js";function r(){const{t:s}=n(),{entityId:t}=a();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(i,{entityId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as EntityDetail};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{a as n,b as a,j as e,E as i}from"./index-CMVd9rlp.js";function r(){const{t:s}=n(),{entityId:t}=a();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(i,{entityId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as EntityDetail};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{a,b as n,j as e,R as l}from"./index-CMVd9rlp.js";function r(){const{t:s}=a(),{relationId:t}=n();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(l,{relationId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as RelationDetail};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{a,b as n,j as e,R as l}from"./index-wBO4P6CB.js";function r(){const{t:s}=a(),{relationId:t}=n();return t?e.jsx("div",{className:"flex flex-1 flex-col p-4 md:p-6",children:e.jsx(l,{relationId:t})}):e.jsx("div",{className:"flex flex-1 items-center justify-center p-8",children:e.jsx("p",{className:"text-sm text-c4a-text-muted",children:s("common.loading")})})}export{r as RelationDetail};
|