mulmocast-preprocessor 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cli/commands/query.js +81 -4
- package/lib/core/ai/command/query/interactive.d.ts +31 -1
- package/lib/core/ai/command/query/interactive.js +78 -2
- package/lib/core/ai/command/query/prompts.d.ts +5 -1
- package/lib/core/ai/command/query/prompts.js +13 -1
- package/lib/core/ai/llm.js +96 -7
- package/lib/core/ai/utils/fetcher.d.ts +25 -0
- package/lib/core/ai/utils/fetcher.js +122 -0
- package/lib/index.d.ts +5 -3
- package/lib/index.js +4 -2
- package/lib/types/index.d.ts +78 -0
- package/lib/types/index.js +37 -0
- package/lib/types/query.d.ts +2 -0
- package/package.json +1 -1
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { createInterface } from "node:readline";
|
|
2
2
|
import { GraphAILogger } from "graphai";
|
|
3
3
|
import { queryScript } from "../../core/ai/command/query/index.js";
|
|
4
|
-
import { createInteractiveSession, sendInteractiveQuery, clearHistory } from "../../core/ai/command/query/interactive.js";
|
|
4
|
+
import { createInteractiveSession, sendInteractiveQuery, sendInteractiveQueryWithFetch, clearHistory, getReferences, fetchReference, parseSuggestedFetch, removeSuggestFetchMarkers, } from "../../core/ai/command/query/interactive.js";
|
|
5
5
|
import { loadScript } from "../utils.js";
|
|
6
6
|
/**
|
|
7
7
|
* Query command handler - outputs answer to stdout
|
|
@@ -37,6 +37,12 @@ export const queryCommand = async (scriptPath, question, options) => {
|
|
|
37
37
|
process.exit(1);
|
|
38
38
|
}
|
|
39
39
|
};
|
|
40
|
+
/**
|
|
41
|
+
* Format references for display
|
|
42
|
+
*/
|
|
43
|
+
const formatReferences = (references) => {
|
|
44
|
+
return references.map((ref, i) => ` ${i + 1}. [${ref.type || "web"}] ${ref.title || ref.url}`).join("\n");
|
|
45
|
+
};
|
|
40
46
|
/**
|
|
41
47
|
* Run interactive query mode
|
|
42
48
|
*/
|
|
@@ -54,13 +60,18 @@ const runInteractiveMode = async (scriptPath, script, options) => {
|
|
|
54
60
|
GraphAILogger.error("No content available to query.");
|
|
55
61
|
process.exit(1);
|
|
56
62
|
}
|
|
63
|
+
const references = getReferences(script);
|
|
57
64
|
const rl = createInterface({
|
|
58
65
|
input: process.stdin,
|
|
59
66
|
output: process.stdout,
|
|
60
67
|
});
|
|
61
68
|
GraphAILogger.info(`Interactive query mode for "${session.scriptTitle}" (${session.beatCount} beats)`);
|
|
62
|
-
GraphAILogger.info("Commands: /clear (clear history), /history (show history), /
|
|
69
|
+
GraphAILogger.info("Commands: /clear (clear history), /history (show history), /refs (show references), /fetch <url> (fetch URL), /exit (quit)");
|
|
70
|
+
if (references.length > 0) {
|
|
71
|
+
GraphAILogger.info(`Available references: ${references.length}`);
|
|
72
|
+
}
|
|
63
73
|
GraphAILogger.info("");
|
|
74
|
+
let lastSuggestedUrl = null;
|
|
64
75
|
const prompt = () => {
|
|
65
76
|
rl.question("You: ", async (input) => {
|
|
66
77
|
const trimmedInput = input.trim();
|
|
@@ -76,6 +87,7 @@ const runInteractiveMode = async (scriptPath, script, options) => {
|
|
|
76
87
|
}
|
|
77
88
|
if (trimmedInput === "/clear") {
|
|
78
89
|
clearHistory(session);
|
|
90
|
+
lastSuggestedUrl = null;
|
|
79
91
|
GraphAILogger.info("Conversation history cleared.\n");
|
|
80
92
|
prompt();
|
|
81
93
|
return;
|
|
@@ -95,10 +107,75 @@ const runInteractiveMode = async (scriptPath, script, options) => {
|
|
|
95
107
|
prompt();
|
|
96
108
|
return;
|
|
97
109
|
}
|
|
110
|
+
if (trimmedInput === "/refs" || trimmedInput === "/references") {
|
|
111
|
+
if (references.length === 0) {
|
|
112
|
+
GraphAILogger.info("No references available.\n");
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
GraphAILogger.info("Available references:");
|
|
116
|
+
GraphAILogger.info(formatReferences(references));
|
|
117
|
+
GraphAILogger.info("");
|
|
118
|
+
}
|
|
119
|
+
prompt();
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
// Handle /fetch command
|
|
123
|
+
if (trimmedInput.startsWith("/fetch")) {
|
|
124
|
+
const urlArg = trimmedInput.replace(/^\/fetch\s*/, "").trim();
|
|
125
|
+
const urlToFetch = urlArg || lastSuggestedUrl;
|
|
126
|
+
if (!urlToFetch) {
|
|
127
|
+
GraphAILogger.info("Usage: /fetch <url> or /fetch (to fetch last suggested URL)\n");
|
|
128
|
+
prompt();
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
GraphAILogger.info(`Fetching: ${urlToFetch}...`);
|
|
132
|
+
try {
|
|
133
|
+
const fetchedContent = await fetchReference(urlToFetch, validatedOptions.verbose);
|
|
134
|
+
if (fetchedContent.error) {
|
|
135
|
+
GraphAILogger.error(`Fetch error: ${fetchedContent.error}\n`);
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
GraphAILogger.info(`Fetched ${fetchedContent.content.length} chars from ${fetchedContent.title || urlToFetch}`);
|
|
139
|
+
GraphAILogger.info("Content loaded. Ask a question to use this reference.\n");
|
|
140
|
+
// Store for next query
|
|
141
|
+
session.fetchedContent = fetchedContent;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
catch (error) {
|
|
145
|
+
if (error instanceof Error) {
|
|
146
|
+
GraphAILogger.error(`Fetch error: ${error.message}\n`);
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
GraphAILogger.error("Unknown fetch error\n");
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
prompt();
|
|
153
|
+
return;
|
|
154
|
+
}
|
|
98
155
|
// Send query
|
|
99
156
|
try {
|
|
100
|
-
|
|
101
|
-
|
|
157
|
+
let answer;
|
|
158
|
+
// If we have fetched content, use it
|
|
159
|
+
if (session.fetchedContent) {
|
|
160
|
+
answer = await sendInteractiveQueryWithFetch(filteredScript, trimmedInput, session.fetchedContent, session, validatedOptions);
|
|
161
|
+
// Clear fetched content after use
|
|
162
|
+
session.fetchedContent = undefined;
|
|
163
|
+
}
|
|
164
|
+
else {
|
|
165
|
+
answer = await sendInteractiveQuery(filteredScript, trimmedInput, session, validatedOptions);
|
|
166
|
+
}
|
|
167
|
+
// Check for suggested fetch URL
|
|
168
|
+
const suggestedUrl = parseSuggestedFetch(answer);
|
|
169
|
+
if (suggestedUrl) {
|
|
170
|
+
lastSuggestedUrl = suggestedUrl;
|
|
171
|
+
const cleanAnswer = removeSuggestFetchMarkers(answer);
|
|
172
|
+
GraphAILogger.info(`\nAssistant: ${cleanAnswer}`);
|
|
173
|
+
GraphAILogger.info(`\n(Suggested reference: ${suggestedUrl})`);
|
|
174
|
+
GraphAILogger.info("Type /fetch to load this reference for more details.\n");
|
|
175
|
+
}
|
|
176
|
+
else {
|
|
177
|
+
GraphAILogger.info(`\nAssistant: ${answer}\n`);
|
|
178
|
+
}
|
|
102
179
|
}
|
|
103
180
|
catch (error) {
|
|
104
181
|
if (error instanceof Error) {
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import type { ExtendedScript } from "../../../../types/index.js";
|
|
1
|
+
import type { ExtendedScript, Reference } from "../../../../types/index.js";
|
|
2
2
|
import type { QueryOptions, InteractiveQuerySession, ConversationMessage } from "../../../../types/query.js";
|
|
3
|
+
import { type FetchedContent } from "../../utils/fetcher.js";
|
|
3
4
|
/**
|
|
4
5
|
* Create an interactive query session
|
|
5
6
|
*/
|
|
@@ -20,3 +21,32 @@ export declare const clearHistory: (session: InteractiveQuerySession) => void;
|
|
|
20
21
|
* Get conversation history
|
|
21
22
|
*/
|
|
22
23
|
export declare const getHistory: (session: InteractiveQuerySession) => ConversationMessage[];
|
|
24
|
+
/**
|
|
25
|
+
* Regex pattern for SUGGEST_FETCH marker (bounded quantifier to prevent ReDoS)
|
|
26
|
+
*/
|
|
27
|
+
export declare const SUGGEST_FETCH_PATTERN: RegExp;
|
|
28
|
+
export declare const SUGGEST_FETCH_PATTERN_GLOBAL: RegExp;
|
|
29
|
+
/**
|
|
30
|
+
* Parse suggested fetch URL from AI response
|
|
31
|
+
*/
|
|
32
|
+
export declare const parseSuggestedFetch: (response: string) => string | null;
|
|
33
|
+
/**
|
|
34
|
+
* Remove SUGGEST_FETCH markers from response
|
|
35
|
+
*/
|
|
36
|
+
export declare const removeSuggestFetchMarkers: (response: string) => string;
|
|
37
|
+
/**
|
|
38
|
+
* Get available references from script
|
|
39
|
+
*/
|
|
40
|
+
export declare const getReferences: (script: ExtendedScript) => Reference[];
|
|
41
|
+
/**
|
|
42
|
+
* Fetch reference content by URL
|
|
43
|
+
*/
|
|
44
|
+
export declare const fetchReference: (url: string, verbose?: boolean) => Promise<FetchedContent>;
|
|
45
|
+
/**
|
|
46
|
+
* Find matching reference for a query
|
|
47
|
+
*/
|
|
48
|
+
export declare const findReference: (script: ExtendedScript, query: string) => Reference | null;
|
|
49
|
+
/**
|
|
50
|
+
* Send a question with fetched reference content
|
|
51
|
+
*/
|
|
52
|
+
export declare const sendInteractiveQueryWithFetch: (filteredScript: ExtendedScript, question: string, fetchedContent: FetchedContent, session: InteractiveQuerySession, options: QueryOptions) => Promise<string>;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { queryOptionsSchema } from "../../../../types/query.js";
|
|
2
|
-
import { executeLLM, filterScript } from "../../llm.js";
|
|
3
|
-
import { buildInteractiveUserPrompt, getInteractiveSystemPrompt } from "./prompts.js";
|
|
2
|
+
import { executeLLM, filterScript, getLanguageName } from "../../llm.js";
|
|
3
|
+
import { buildInteractiveUserPrompt, getInteractiveSystemPrompt, DEFAULT_INTERACTIVE_SYSTEM_PROMPT_WITH_FETCH } from "./prompts.js";
|
|
4
|
+
import { fetchUrlContent, findMatchingReference } from "../../utils/fetcher.js";
|
|
4
5
|
/**
|
|
5
6
|
* Create an interactive query session
|
|
6
7
|
*/
|
|
@@ -42,3 +43,78 @@ export const clearHistory = (session) => {
|
|
|
42
43
|
export const getHistory = (session) => {
|
|
43
44
|
return [...session.history];
|
|
44
45
|
};
|
|
46
|
+
/**
|
|
47
|
+
* Regex pattern for SUGGEST_FETCH marker (bounded quantifier to prevent ReDoS)
|
|
48
|
+
*/
|
|
49
|
+
export const SUGGEST_FETCH_PATTERN = /\[SUGGEST_FETCH:\s*([^\]]{1,2000})\]/;
|
|
50
|
+
export const SUGGEST_FETCH_PATTERN_GLOBAL = /\[SUGGEST_FETCH:\s*[^\]]{1,2000}\]/g;
|
|
51
|
+
/**
|
|
52
|
+
* Parse suggested fetch URL from AI response
|
|
53
|
+
*/
|
|
54
|
+
export const parseSuggestedFetch = (response) => {
|
|
55
|
+
const match = response.match(SUGGEST_FETCH_PATTERN);
|
|
56
|
+
return match ? match[1].trim() : null;
|
|
57
|
+
};
|
|
58
|
+
/**
|
|
59
|
+
* Remove SUGGEST_FETCH markers from response
|
|
60
|
+
*/
|
|
61
|
+
export const removeSuggestFetchMarkers = (response) => {
|
|
62
|
+
return response.replace(SUGGEST_FETCH_PATTERN_GLOBAL, "").trim();
|
|
63
|
+
};
|
|
64
|
+
/**
|
|
65
|
+
* Get available references from script
|
|
66
|
+
*/
|
|
67
|
+
export const getReferences = (script) => {
|
|
68
|
+
return script.scriptMeta?.references || [];
|
|
69
|
+
};
|
|
70
|
+
/**
|
|
71
|
+
* Fetch reference content by URL
|
|
72
|
+
*/
|
|
73
|
+
export const fetchReference = async (url, verbose = false) => {
|
|
74
|
+
return fetchUrlContent(url, 8000, verbose);
|
|
75
|
+
};
|
|
76
|
+
/**
|
|
77
|
+
* Find matching reference for a query
|
|
78
|
+
*/
|
|
79
|
+
export const findReference = (script, query) => {
|
|
80
|
+
const references = getReferences(script);
|
|
81
|
+
return findMatchingReference(references, query);
|
|
82
|
+
};
|
|
83
|
+
/**
|
|
84
|
+
* Send a question with fetched reference content
|
|
85
|
+
*/
|
|
86
|
+
export const sendInteractiveQueryWithFetch = async (filteredScript, question, fetchedContent, session, options) => {
|
|
87
|
+
if (filteredScript.beats.length === 0) {
|
|
88
|
+
return "No content available to answer the question.";
|
|
89
|
+
}
|
|
90
|
+
// Build system prompt for fetched content mode
|
|
91
|
+
let systemPrompt = DEFAULT_INTERACTIVE_SYSTEM_PROMPT_WITH_FETCH;
|
|
92
|
+
if (options.lang) {
|
|
93
|
+
const langName = getLanguageName(options.lang);
|
|
94
|
+
systemPrompt = `${systemPrompt}\n- IMPORTANT: Write the answer in ${langName}`;
|
|
95
|
+
}
|
|
96
|
+
// Build user prompt with fetched content
|
|
97
|
+
const baseUserPrompt = buildInteractiveUserPrompt(filteredScript, question, session.history);
|
|
98
|
+
// Insert fetched content before the question
|
|
99
|
+
const fetchedSection = [
|
|
100
|
+
"",
|
|
101
|
+
"---",
|
|
102
|
+
"Additional reference content fetched from URL:",
|
|
103
|
+
`URL: ${fetchedContent.url}`,
|
|
104
|
+
fetchedContent.title ? `Title: ${fetchedContent.title}` : "",
|
|
105
|
+
"",
|
|
106
|
+
fetchedContent.content,
|
|
107
|
+
"---",
|
|
108
|
+
"",
|
|
109
|
+
]
|
|
110
|
+
.filter(Boolean)
|
|
111
|
+
.join("\n");
|
|
112
|
+
// Insert before "Current question:" or at the end
|
|
113
|
+
const insertPoint = baseUserPrompt.indexOf("Current question:");
|
|
114
|
+
const userPrompt = insertPoint >= 0 ? baseUserPrompt.slice(0, insertPoint) + fetchedSection + baseUserPrompt.slice(insertPoint) : baseUserPrompt + fetchedSection;
|
|
115
|
+
const answer = await executeLLM(systemPrompt, userPrompt, options, options.verbose ? `Interactive query with fetch: ${question}` : undefined);
|
|
116
|
+
// Add to history (include note about fetched content)
|
|
117
|
+
session.history.push({ role: "user", content: `${question} (with reference: ${fetchedContent.url})` });
|
|
118
|
+
session.history.push({ role: "assistant", content: answer });
|
|
119
|
+
return answer;
|
|
120
|
+
};
|
|
@@ -15,7 +15,11 @@ export declare const buildUserPrompt: (script: ExtendedScript, question: string)
|
|
|
15
15
|
/**
|
|
16
16
|
* Default system prompt for interactive query
|
|
17
17
|
*/
|
|
18
|
-
export declare const DEFAULT_INTERACTIVE_SYSTEM_PROMPT = "You are answering questions based on the content provided.\n- Answer based ONLY on the information in the provided content\n- If the answer cannot be found in the content, say so clearly\n- Be concise and direct in your answers\n- Do not make up information that is not in the content\n- You may reference previous conversation when answering follow-up questions";
|
|
18
|
+
export declare const DEFAULT_INTERACTIVE_SYSTEM_PROMPT = "You are answering questions based on the content provided.\n- Answer based ONLY on the information in the provided content\n- If the answer cannot be found in the content, say so clearly\n- Be concise and direct in your answers\n- Do not make up information that is not in the content\n- You may reference previous conversation when answering follow-up questions\n- If references are available and the user asks for more details, mention which reference could provide more information\n- When you suggest fetching a reference for more details, include [SUGGEST_FETCH: <url>] in your response";
|
|
19
|
+
/**
|
|
20
|
+
* Default system prompt for interactive query with fetched content
|
|
21
|
+
*/
|
|
22
|
+
export declare const DEFAULT_INTERACTIVE_SYSTEM_PROMPT_WITH_FETCH = "You are answering questions based on the content provided, including fetched reference content.\n- Answer based on both the main content and any fetched reference content\n- If the answer cannot be found, say so clearly\n- Be concise and direct in your answers\n- Do not make up information\n- You may reference previous conversation when answering follow-up questions\n- Prioritize information from fetched content when it's more detailed and relevant";
|
|
19
23
|
/**
|
|
20
24
|
* Get system prompt for interactive mode
|
|
21
25
|
*/
|
|
@@ -44,7 +44,19 @@ export const DEFAULT_INTERACTIVE_SYSTEM_PROMPT = `You are answering questions ba
|
|
|
44
44
|
- If the answer cannot be found in the content, say so clearly
|
|
45
45
|
- Be concise and direct in your answers
|
|
46
46
|
- Do not make up information that is not in the content
|
|
47
|
-
- You may reference previous conversation when answering follow-up questions
|
|
47
|
+
- You may reference previous conversation when answering follow-up questions
|
|
48
|
+
- If references are available and the user asks for more details, mention which reference could provide more information
|
|
49
|
+
- When you suggest fetching a reference for more details, include [SUGGEST_FETCH: <url>] in your response`;
|
|
50
|
+
/**
|
|
51
|
+
* Default system prompt for interactive query with fetched content
|
|
52
|
+
*/
|
|
53
|
+
export const DEFAULT_INTERACTIVE_SYSTEM_PROMPT_WITH_FETCH = `You are answering questions based on the content provided, including fetched reference content.
|
|
54
|
+
- Answer based on both the main content and any fetched reference content
|
|
55
|
+
- If the answer cannot be found, say so clearly
|
|
56
|
+
- Be concise and direct in your answers
|
|
57
|
+
- Do not make up information
|
|
58
|
+
- You may reference previous conversation when answering follow-up questions
|
|
59
|
+
- Prioritize information from fetched content when it's more detailed and relevant`;
|
|
48
60
|
/**
|
|
49
61
|
* Get system prompt for interactive mode
|
|
50
62
|
*/
|
package/lib/core/ai/llm.js
CHANGED
|
@@ -103,31 +103,120 @@ export const getLanguageName = (langCode) => {
|
|
|
103
103
|
};
|
|
104
104
|
return langMap[langCode] || langCode;
|
|
105
105
|
};
|
|
106
|
+
/**
|
|
107
|
+
* Build beat content including metadata
|
|
108
|
+
*/
|
|
109
|
+
const buildBeatContent = (beat, index) => {
|
|
110
|
+
const lines = [];
|
|
111
|
+
// Main text
|
|
112
|
+
const text = beat.text || "";
|
|
113
|
+
if (!text.trim())
|
|
114
|
+
return "";
|
|
115
|
+
lines.push(`[${index}] ${text}`);
|
|
116
|
+
// Add metadata if available
|
|
117
|
+
const meta = beat.meta;
|
|
118
|
+
if (meta) {
|
|
119
|
+
// Tags for categorization
|
|
120
|
+
if (meta.tags && meta.tags.length > 0) {
|
|
121
|
+
lines.push(` Tags: ${meta.tags.join(", ")}`);
|
|
122
|
+
}
|
|
123
|
+
// Context provides additional information not in the text
|
|
124
|
+
if (meta.context) {
|
|
125
|
+
lines.push(` Context: ${meta.context}`);
|
|
126
|
+
}
|
|
127
|
+
// Keywords highlight important terms
|
|
128
|
+
if (meta.keywords && meta.keywords.length > 0) {
|
|
129
|
+
lines.push(` Keywords: ${meta.keywords.join(", ")}`);
|
|
130
|
+
}
|
|
131
|
+
// Expected questions this beat can answer
|
|
132
|
+
if (meta.expectedQuestions && meta.expectedQuestions.length > 0) {
|
|
133
|
+
lines.push(` Can answer: ${meta.expectedQuestions.join("; ")}`);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return lines.join("\n");
|
|
137
|
+
};
|
|
138
|
+
/**
|
|
139
|
+
* Build script-level metadata section
|
|
140
|
+
*/
|
|
141
|
+
const buildScriptMetaContent = (script) => {
|
|
142
|
+
const meta = script.scriptMeta;
|
|
143
|
+
if (!meta)
|
|
144
|
+
return "";
|
|
145
|
+
const lines = [];
|
|
146
|
+
// Background info
|
|
147
|
+
if (meta.background) {
|
|
148
|
+
lines.push(`Background: ${meta.background}`);
|
|
149
|
+
}
|
|
150
|
+
// Audience and prerequisites
|
|
151
|
+
if (meta.audience) {
|
|
152
|
+
lines.push(`Target audience: ${meta.audience}`);
|
|
153
|
+
}
|
|
154
|
+
if (meta.prerequisites && meta.prerequisites.length > 0) {
|
|
155
|
+
lines.push(`Prerequisites: ${meta.prerequisites.join(", ")}`);
|
|
156
|
+
}
|
|
157
|
+
// Goals
|
|
158
|
+
if (meta.goals && meta.goals.length > 0) {
|
|
159
|
+
lines.push(`Goals: ${meta.goals.join("; ")}`);
|
|
160
|
+
}
|
|
161
|
+
// Keywords
|
|
162
|
+
if (meta.keywords && meta.keywords.length > 0) {
|
|
163
|
+
lines.push(`Keywords: ${meta.keywords.join(", ")}`);
|
|
164
|
+
}
|
|
165
|
+
// References
|
|
166
|
+
if (meta.references && meta.references.length > 0) {
|
|
167
|
+
lines.push("References:");
|
|
168
|
+
meta.references.forEach((ref) => {
|
|
169
|
+
const title = ref.title || ref.url;
|
|
170
|
+
const desc = ref.description ? ` - ${ref.description}` : "";
|
|
171
|
+
lines.push(` - [${ref.type || "web"}] ${title}: ${ref.url}${desc}`);
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
// FAQ
|
|
175
|
+
if (meta.faq && meta.faq.length > 0) {
|
|
176
|
+
lines.push("FAQ:");
|
|
177
|
+
meta.faq.forEach((faq) => {
|
|
178
|
+
lines.push(` Q: ${faq.question}`);
|
|
179
|
+
lines.push(` A: ${faq.answer}`);
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
// Author info
|
|
183
|
+
if (meta.author) {
|
|
184
|
+
lines.push(`Author: ${meta.author}`);
|
|
185
|
+
}
|
|
186
|
+
return lines.length > 0 ? lines.join("\n") : "";
|
|
187
|
+
};
|
|
106
188
|
/**
|
|
107
189
|
* Build script content for user prompt (common part)
|
|
108
190
|
*/
|
|
109
191
|
export const buildScriptContent = (script) => {
|
|
110
192
|
const parts = [];
|
|
111
|
-
// Add script
|
|
193
|
+
// Add script title and language
|
|
112
194
|
parts.push(`# Script: ${script.title}`);
|
|
113
195
|
parts.push(`Language: ${script.lang}`);
|
|
114
196
|
parts.push("");
|
|
115
|
-
//
|
|
197
|
+
// Add script-level metadata
|
|
198
|
+
const scriptMetaContent = buildScriptMetaContent(script);
|
|
199
|
+
if (scriptMetaContent) {
|
|
200
|
+
parts.push("## About this content");
|
|
201
|
+
parts.push(scriptMetaContent);
|
|
202
|
+
parts.push("");
|
|
203
|
+
}
|
|
204
|
+
// Collect all content from beats grouped by section
|
|
116
205
|
const sections = new Map();
|
|
117
206
|
script.beats.forEach((beat, index) => {
|
|
118
|
-
const
|
|
119
|
-
if (!
|
|
207
|
+
const content = buildBeatContent(beat, index);
|
|
208
|
+
if (!content)
|
|
120
209
|
return;
|
|
121
210
|
const section = beat.meta?.section || "main";
|
|
122
211
|
if (!sections.has(section)) {
|
|
123
212
|
sections.set(section, []);
|
|
124
213
|
}
|
|
125
|
-
sections.get(section).push(
|
|
214
|
+
sections.get(section).push(content);
|
|
126
215
|
});
|
|
127
216
|
// Output by section
|
|
128
|
-
sections.forEach((
|
|
217
|
+
sections.forEach((contents, section) => {
|
|
129
218
|
parts.push(`## Section: ${section}`);
|
|
130
|
-
|
|
219
|
+
contents.forEach((c) => parts.push(c));
|
|
131
220
|
parts.push("");
|
|
132
221
|
});
|
|
133
222
|
return parts.join("\n");
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fetched content result
|
|
3
|
+
*/
|
|
4
|
+
export interface FetchedContent {
|
|
5
|
+
url: string;
|
|
6
|
+
title: string | null;
|
|
7
|
+
content: string;
|
|
8
|
+
error?: string;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Fetch URL content and extract text
|
|
12
|
+
*/
|
|
13
|
+
export declare const fetchUrlContent: (url: string, maxLength?: number, verbose?: boolean) => Promise<FetchedContent>;
|
|
14
|
+
/**
|
|
15
|
+
* Find matching reference URL from script metadata
|
|
16
|
+
*/
|
|
17
|
+
export declare const findMatchingReference: (references: Array<{
|
|
18
|
+
url: string;
|
|
19
|
+
title?: string;
|
|
20
|
+
description?: string;
|
|
21
|
+
}> | undefined, query: string) => {
|
|
22
|
+
url: string;
|
|
23
|
+
title?: string;
|
|
24
|
+
description?: string;
|
|
25
|
+
} | null;
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import { GraphAILogger } from "graphai";
|
|
2
|
+
/**
|
|
3
|
+
* Strip HTML tags and extract text content
|
|
4
|
+
*/
|
|
5
|
+
const stripHtml = (html) => {
|
|
6
|
+
// Remove script and style elements
|
|
7
|
+
let text = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "");
|
|
8
|
+
text = text.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "");
|
|
9
|
+
// Remove HTML comments
|
|
10
|
+
text = text.replace(/<!--[\s\S]*?-->/g, "");
|
|
11
|
+
// Replace common block elements with newlines
|
|
12
|
+
text = text.replace(/<\/(p|div|h[1-6]|li|tr|br)[^>]*>/gi, "\n");
|
|
13
|
+
// Remove all remaining HTML tags
|
|
14
|
+
// eslint-disable-next-line sonarjs/slow-regex -- standard HTML tag removal pattern, safe for typical HTML
|
|
15
|
+
text = text.replace(/<[^>]*>/g, " ");
|
|
16
|
+
// Decode common HTML entities
|
|
17
|
+
text = text.replace(/ /g, " ");
|
|
18
|
+
text = text.replace(/&/g, "&");
|
|
19
|
+
text = text.replace(/</g, "<");
|
|
20
|
+
text = text.replace(/>/g, ">");
|
|
21
|
+
text = text.replace(/"/g, '"');
|
|
22
|
+
text = text.replace(/'/g, "'");
|
|
23
|
+
// Normalize whitespace
|
|
24
|
+
text = text.replace(/\s+/g, " ");
|
|
25
|
+
text = text.replace(/\n\s*\n/g, "\n\n");
|
|
26
|
+
return text.trim();
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Extract title from HTML
|
|
30
|
+
*/
|
|
31
|
+
const extractTitle = (html) => {
|
|
32
|
+
const match = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
33
|
+
return match ? match[1].trim() : null;
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* Fetch URL content and extract text
|
|
37
|
+
*/
|
|
38
|
+
export const fetchUrlContent = async (url, maxLength = 8000, verbose = false) => {
|
|
39
|
+
try {
|
|
40
|
+
if (verbose) {
|
|
41
|
+
GraphAILogger.info(`Fetching URL: ${url}`);
|
|
42
|
+
}
|
|
43
|
+
const controller = new AbortController();
|
|
44
|
+
const timeoutId = setTimeout(() => controller.abort(), 30000);
|
|
45
|
+
const response = await fetch(url, {
|
|
46
|
+
signal: controller.signal,
|
|
47
|
+
headers: {
|
|
48
|
+
"User-Agent": "Mozilla/5.0 (compatible; MulmoCast/1.0; +https://github.com/receptron/mulmocast)",
|
|
49
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
50
|
+
},
|
|
51
|
+
});
|
|
52
|
+
clearTimeout(timeoutId);
|
|
53
|
+
if (!response.ok) {
|
|
54
|
+
return {
|
|
55
|
+
url,
|
|
56
|
+
title: null,
|
|
57
|
+
content: "",
|
|
58
|
+
error: `HTTP ${response.status}: ${response.statusText}`,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
const contentType = response.headers.get("content-type") || "";
|
|
62
|
+
// Handle non-HTML content
|
|
63
|
+
if (!contentType.includes("text/html") && !contentType.includes("application/xhtml+xml")) {
|
|
64
|
+
if (contentType.includes("text/plain")) {
|
|
65
|
+
const text = await response.text();
|
|
66
|
+
return {
|
|
67
|
+
url,
|
|
68
|
+
title: null,
|
|
69
|
+
content: text.substring(0, maxLength),
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
return {
|
|
73
|
+
url,
|
|
74
|
+
title: null,
|
|
75
|
+
content: "",
|
|
76
|
+
error: `Unsupported content type: ${contentType}`,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
const html = await response.text();
|
|
80
|
+
const title = extractTitle(html);
|
|
81
|
+
const content = stripHtml(html);
|
|
82
|
+
// Truncate if needed
|
|
83
|
+
const truncatedContent = content.length > maxLength ? content.substring(0, maxLength) + "..." : content;
|
|
84
|
+
if (verbose) {
|
|
85
|
+
GraphAILogger.info(`Fetched ${content.length} chars from ${url}`);
|
|
86
|
+
}
|
|
87
|
+
return {
|
|
88
|
+
url,
|
|
89
|
+
title,
|
|
90
|
+
content: truncatedContent,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
catch (error) {
|
|
94
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
95
|
+
return {
|
|
96
|
+
url,
|
|
97
|
+
title: null,
|
|
98
|
+
content: "",
|
|
99
|
+
error: errorMessage,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
/**
|
|
104
|
+
* Find matching reference URL from script metadata
|
|
105
|
+
*/
|
|
106
|
+
export const findMatchingReference = (references, query) => {
|
|
107
|
+
if (!references || references.length === 0) {
|
|
108
|
+
return null;
|
|
109
|
+
}
|
|
110
|
+
const lowerQuery = query.toLowerCase();
|
|
111
|
+
// Try to find a reference that matches keywords in the query
|
|
112
|
+
for (const ref of references) {
|
|
113
|
+
const refText = [ref.title, ref.description, ref.url].filter(Boolean).join(" ").toLowerCase();
|
|
114
|
+
// Simple keyword matching
|
|
115
|
+
const queryWords = lowerQuery.split(/\s+/).filter((w) => w.length > 2);
|
|
116
|
+
const matchScore = queryWords.filter((word) => refText.includes(word)).length;
|
|
117
|
+
if (matchScore >= 2 || (queryWords.length === 1 && matchScore === 1)) {
|
|
118
|
+
return ref;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return null;
|
|
122
|
+
};
|
package/lib/index.d.ts
CHANGED
|
@@ -4,10 +4,12 @@ export { filterBySection, filterByTags, stripExtendedFields } from "./core/prepr
|
|
|
4
4
|
export { listProfiles } from "./core/preprocessing/profiles.js";
|
|
5
5
|
export { summarizeScript } from "./core/ai/command/summarize/index.js";
|
|
6
6
|
export { queryScript } from "./core/ai/command/query/index.js";
|
|
7
|
-
export { createInteractiveSession, sendInteractiveQuery, clearHistory, getHistory } from "./core/ai/command/query/interactive.js";
|
|
8
|
-
export
|
|
7
|
+
export { createInteractiveSession, sendInteractiveQuery, sendInteractiveQueryWithFetch, clearHistory, getHistory, getReferences, findReference, fetchReference, parseSuggestedFetch, removeSuggestFetchMarkers, } from "./core/ai/command/query/interactive.js";
|
|
8
|
+
export { fetchUrlContent } from "./core/ai/utils/fetcher.js";
|
|
9
|
+
export type { FetchedContent } from "./core/ai/utils/fetcher.js";
|
|
10
|
+
export type { BeatVariant, BeatMeta, ExtendedBeat, ExtendedScript, OutputProfile, ProcessOptions, ProfileInfo, Reference, FAQ, ScriptMeta, } from "./types/index.js";
|
|
9
11
|
export type { SummarizeOptions, SummarizeResult, LLMProvider, SummarizeFormat, ProviderConfig } from "./types/summarize.js";
|
|
10
12
|
export type { QueryOptions, QueryResult, ConversationMessage, InteractiveQuerySession } from "./types/query.js";
|
|
11
|
-
export { beatVariantSchema, beatMetaSchema, extendedBeatSchema, extendedScriptSchema, outputProfileSchema } from "./types/index.js";
|
|
13
|
+
export { beatVariantSchema, beatMetaSchema, extendedBeatSchema, extendedScriptSchema, outputProfileSchema, referenceSchema, faqSchema, scriptMetaSchema, } from "./types/index.js";
|
|
12
14
|
export { summarizeOptionsSchema, llmProviderSchema, summarizeFormatSchema } from "./types/summarize.js";
|
|
13
15
|
export { queryOptionsSchema } from "./types/query.js";
|
package/lib/index.js
CHANGED
|
@@ -6,8 +6,10 @@ export { listProfiles } from "./core/preprocessing/profiles.js";
|
|
|
6
6
|
// AI API
|
|
7
7
|
export { summarizeScript } from "./core/ai/command/summarize/index.js";
|
|
8
8
|
export { queryScript } from "./core/ai/command/query/index.js";
|
|
9
|
-
export { createInteractiveSession, sendInteractiveQuery, clearHistory, getHistory } from "./core/ai/command/query/interactive.js";
|
|
9
|
+
export { createInteractiveSession, sendInteractiveQuery, sendInteractiveQueryWithFetch, clearHistory, getHistory, getReferences, findReference, fetchReference, parseSuggestedFetch, removeSuggestFetchMarkers, } from "./core/ai/command/query/interactive.js";
|
|
10
|
+
// Utilities
|
|
11
|
+
export { fetchUrlContent } from "./core/ai/utils/fetcher.js";
|
|
10
12
|
// Schemas (for validation)
|
|
11
|
-
export { beatVariantSchema, beatMetaSchema, extendedBeatSchema, extendedScriptSchema, outputProfileSchema } from "./types/index.js";
|
|
13
|
+
export { beatVariantSchema, beatMetaSchema, extendedBeatSchema, extendedScriptSchema, outputProfileSchema, referenceSchema, faqSchema, scriptMetaSchema, } from "./types/index.js";
|
|
12
14
|
export { summarizeOptionsSchema, llmProviderSchema, summarizeFormatSchema } from "./types/summarize.js";
|
|
13
15
|
export { queryOptionsSchema } from "./types/query.js";
|
package/lib/types/index.d.ts
CHANGED
|
@@ -860,6 +860,59 @@ export declare const outputProfileSchema: z.ZodObject<{
|
|
|
860
860
|
description: z.ZodOptional<z.ZodString>;
|
|
861
861
|
}, z.core.$strip>;
|
|
862
862
|
export type OutputProfile = z.infer<typeof outputProfileSchema>;
|
|
863
|
+
/**
|
|
864
|
+
* Reference - external resource reference
|
|
865
|
+
*/
|
|
866
|
+
export declare const referenceSchema: z.ZodObject<{
|
|
867
|
+
type: z.ZodOptional<z.ZodEnum<{
|
|
868
|
+
web: "web";
|
|
869
|
+
code: "code";
|
|
870
|
+
document: "document";
|
|
871
|
+
video: "video";
|
|
872
|
+
}>>;
|
|
873
|
+
url: z.ZodString;
|
|
874
|
+
title: z.ZodOptional<z.ZodString>;
|
|
875
|
+
description: z.ZodOptional<z.ZodString>;
|
|
876
|
+
}, z.core.$strip>;
|
|
877
|
+
export type Reference = z.infer<typeof referenceSchema>;
|
|
878
|
+
/**
|
|
879
|
+
* FAQ - frequently asked question
|
|
880
|
+
*/
|
|
881
|
+
export declare const faqSchema: z.ZodObject<{
|
|
882
|
+
question: z.ZodString;
|
|
883
|
+
answer: z.ZodString;
|
|
884
|
+
relatedBeats: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
885
|
+
}, z.core.$strip>;
|
|
886
|
+
export type FAQ = z.infer<typeof faqSchema>;
|
|
887
|
+
/**
|
|
888
|
+
* Script Meta - script-level metadata for AI features
|
|
889
|
+
*/
|
|
890
|
+
export declare const scriptMetaSchema: z.ZodObject<{
|
|
891
|
+
audience: z.ZodOptional<z.ZodString>;
|
|
892
|
+
prerequisites: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
893
|
+
goals: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
894
|
+
background: z.ZodOptional<z.ZodString>;
|
|
895
|
+
faq: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
896
|
+
question: z.ZodString;
|
|
897
|
+
answer: z.ZodString;
|
|
898
|
+
relatedBeats: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
899
|
+
}, z.core.$strip>>>;
|
|
900
|
+
keywords: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
901
|
+
references: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
902
|
+
type: z.ZodOptional<z.ZodEnum<{
|
|
903
|
+
web: "web";
|
|
904
|
+
code: "code";
|
|
905
|
+
document: "document";
|
|
906
|
+
video: "video";
|
|
907
|
+
}>>;
|
|
908
|
+
url: z.ZodString;
|
|
909
|
+
title: z.ZodOptional<z.ZodString>;
|
|
910
|
+
description: z.ZodOptional<z.ZodString>;
|
|
911
|
+
}, z.core.$strip>>>;
|
|
912
|
+
author: z.ZodOptional<z.ZodString>;
|
|
913
|
+
version: z.ZodOptional<z.ZodString>;
|
|
914
|
+
}, z.core.$strip>;
|
|
915
|
+
export type ScriptMeta = z.infer<typeof scriptMetaSchema>;
|
|
863
916
|
/**
|
|
864
917
|
* Extended Script - script with variants, meta, and outputProfiles
|
|
865
918
|
*/
|
|
@@ -1924,6 +1977,31 @@ export declare const extendedScriptSchema: z.ZodObject<{
|
|
|
1924
1977
|
name: z.ZodString;
|
|
1925
1978
|
description: z.ZodOptional<z.ZodString>;
|
|
1926
1979
|
}, z.core.$strip>>>;
|
|
1980
|
+
scriptMeta: z.ZodOptional<z.ZodObject<{
|
|
1981
|
+
audience: z.ZodOptional<z.ZodString>;
|
|
1982
|
+
prerequisites: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
1983
|
+
goals: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
1984
|
+
background: z.ZodOptional<z.ZodString>;
|
|
1985
|
+
faq: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
1986
|
+
question: z.ZodString;
|
|
1987
|
+
answer: z.ZodString;
|
|
1988
|
+
relatedBeats: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
1989
|
+
}, z.core.$strip>>>;
|
|
1990
|
+
keywords: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
1991
|
+
references: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
1992
|
+
type: z.ZodOptional<z.ZodEnum<{
|
|
1993
|
+
web: "web";
|
|
1994
|
+
code: "code";
|
|
1995
|
+
document: "document";
|
|
1996
|
+
video: "video";
|
|
1997
|
+
}>>;
|
|
1998
|
+
url: z.ZodString;
|
|
1999
|
+
title: z.ZodOptional<z.ZodString>;
|
|
2000
|
+
description: z.ZodOptional<z.ZodString>;
|
|
2001
|
+
}, z.core.$strip>>>;
|
|
2002
|
+
author: z.ZodOptional<z.ZodString>;
|
|
2003
|
+
version: z.ZodOptional<z.ZodString>;
|
|
2004
|
+
}, z.core.$strip>>;
|
|
1927
2005
|
}, z.core.$strict>;
|
|
1928
2006
|
export type ExtendedScript = z.infer<typeof extendedScriptSchema>;
|
|
1929
2007
|
/**
|
package/lib/types/index.js
CHANGED
|
@@ -33,10 +33,47 @@ export const outputProfileSchema = z.object({
|
|
|
33
33
|
name: z.string(),
|
|
34
34
|
description: z.string().optional(),
|
|
35
35
|
});
|
|
36
|
+
/**
|
|
37
|
+
* Reference - external resource reference
|
|
38
|
+
*/
|
|
39
|
+
export const referenceSchema = z.object({
|
|
40
|
+
type: z.enum(["web", "code", "document", "video"]).optional(),
|
|
41
|
+
url: z.string(),
|
|
42
|
+
title: z.string().optional(),
|
|
43
|
+
description: z.string().optional(),
|
|
44
|
+
});
|
|
45
|
+
/**
|
|
46
|
+
* FAQ - frequently asked question
|
|
47
|
+
*/
|
|
48
|
+
export const faqSchema = z.object({
|
|
49
|
+
question: z.string(),
|
|
50
|
+
answer: z.string(),
|
|
51
|
+
relatedBeats: z.array(z.string()).optional(),
|
|
52
|
+
});
|
|
53
|
+
/**
|
|
54
|
+
* Script Meta - script-level metadata for AI features
|
|
55
|
+
*/
|
|
56
|
+
export const scriptMetaSchema = z.object({
|
|
57
|
+
// Target audience and prerequisites
|
|
58
|
+
audience: z.string().optional(),
|
|
59
|
+
prerequisites: z.array(z.string()).optional(),
|
|
60
|
+
// Learning goals and background
|
|
61
|
+
goals: z.array(z.string()).optional(),
|
|
62
|
+
background: z.string().optional(),
|
|
63
|
+
// FAQ for quick Q&A matching
|
|
64
|
+
faq: z.array(faqSchema).optional(),
|
|
65
|
+
// Search and discovery
|
|
66
|
+
keywords: z.array(z.string()).optional(),
|
|
67
|
+
references: z.array(referenceSchema).optional(),
|
|
68
|
+
// Authoring info
|
|
69
|
+
author: z.string().optional(),
|
|
70
|
+
version: z.string().optional(),
|
|
71
|
+
});
|
|
36
72
|
/**
|
|
37
73
|
* Extended Script - script with variants, meta, and outputProfiles
|
|
38
74
|
*/
|
|
39
75
|
export const extendedScriptSchema = mulmoScriptSchema.extend({
|
|
40
76
|
beats: z.array(extendedBeatSchema),
|
|
41
77
|
outputProfiles: z.record(z.string(), outputProfileSchema).optional(),
|
|
78
|
+
scriptMeta: scriptMetaSchema.optional(),
|
|
42
79
|
});
|
package/lib/types/query.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
+
import type { FetchedContent } from "../core/ai/utils/fetcher.js";
|
|
2
3
|
/**
|
|
3
4
|
* Query Options - configuration for querying script content
|
|
4
5
|
*/
|
|
@@ -42,4 +43,5 @@ export interface InteractiveQuerySession {
|
|
|
42
43
|
scriptTitle: string;
|
|
43
44
|
beatCount: number;
|
|
44
45
|
history: ConversationMessage[];
|
|
46
|
+
fetchedContent?: FetchedContent;
|
|
45
47
|
}
|