mulmocast-preprocessor 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import { createInterface } from "node:readline";
2
2
  import { GraphAILogger } from "graphai";
3
3
  import { queryScript } from "../../core/ai/command/query/index.js";
4
- import { createInteractiveSession, sendInteractiveQuery, clearHistory } from "../../core/ai/command/query/interactive.js";
4
+ import { createInteractiveSession, sendInteractiveQuery, sendInteractiveQueryWithFetch, clearHistory, getReferences, fetchReference, parseSuggestedFetch, removeSuggestFetchMarkers, } from "../../core/ai/command/query/interactive.js";
5
5
  import { loadScript } from "../utils.js";
6
6
  /**
7
7
  * Query command handler - outputs answer to stdout
@@ -37,6 +37,12 @@ export const queryCommand = async (scriptPath, question, options) => {
37
37
  process.exit(1);
38
38
  }
39
39
  };
40
+ /**
41
+ * Format references for display
42
+ */
43
+ const formatReferences = (references) => {
44
+ return references.map((ref, i) => ` ${i + 1}. [${ref.type || "web"}] ${ref.title || ref.url}`).join("\n");
45
+ };
40
46
  /**
41
47
  * Run interactive query mode
42
48
  */
@@ -54,13 +60,18 @@ const runInteractiveMode = async (scriptPath, script, options) => {
54
60
  GraphAILogger.error("No content available to query.");
55
61
  process.exit(1);
56
62
  }
63
+ const references = getReferences(script);
57
64
  const rl = createInterface({
58
65
  input: process.stdin,
59
66
  output: process.stdout,
60
67
  });
61
68
  GraphAILogger.info(`Interactive query mode for "${session.scriptTitle}" (${session.beatCount} beats)`);
62
- GraphAILogger.info("Commands: /clear (clear history), /history (show history), /exit or Ctrl+C (quit)");
69
+ GraphAILogger.info("Commands: /clear (clear history), /history (show history), /refs (show references), /fetch <url> (fetch URL), /exit (quit)");
70
+ if (references.length > 0) {
71
+ GraphAILogger.info(`Available references: ${references.length}`);
72
+ }
63
73
  GraphAILogger.info("");
74
+ let lastSuggestedUrl = null;
64
75
  const prompt = () => {
65
76
  rl.question("You: ", async (input) => {
66
77
  const trimmedInput = input.trim();
@@ -76,6 +87,7 @@ const runInteractiveMode = async (scriptPath, script, options) => {
76
87
  }
77
88
  if (trimmedInput === "/clear") {
78
89
  clearHistory(session);
90
+ lastSuggestedUrl = null;
79
91
  GraphAILogger.info("Conversation history cleared.\n");
80
92
  prompt();
81
93
  return;
@@ -95,10 +107,75 @@ const runInteractiveMode = async (scriptPath, script, options) => {
95
107
  prompt();
96
108
  return;
97
109
  }
110
+ if (trimmedInput === "/refs" || trimmedInput === "/references") {
111
+ if (references.length === 0) {
112
+ GraphAILogger.info("No references available.\n");
113
+ }
114
+ else {
115
+ GraphAILogger.info("Available references:");
116
+ GraphAILogger.info(formatReferences(references));
117
+ GraphAILogger.info("");
118
+ }
119
+ prompt();
120
+ return;
121
+ }
122
+ // Handle /fetch command
123
+ if (trimmedInput.startsWith("/fetch")) {
124
+ const urlArg = trimmedInput.replace(/^\/fetch\s*/, "").trim();
125
+ const urlToFetch = urlArg || lastSuggestedUrl;
126
+ if (!urlToFetch) {
127
+ GraphAILogger.info("Usage: /fetch <url> or /fetch (to fetch last suggested URL)\n");
128
+ prompt();
129
+ return;
130
+ }
131
+ GraphAILogger.info(`Fetching: ${urlToFetch}...`);
132
+ try {
133
+ const fetchedContent = await fetchReference(urlToFetch, validatedOptions.verbose);
134
+ if (fetchedContent.error) {
135
+ GraphAILogger.error(`Fetch error: ${fetchedContent.error}\n`);
136
+ }
137
+ else {
138
+ GraphAILogger.info(`Fetched ${fetchedContent.content.length} chars from ${fetchedContent.title || urlToFetch}`);
139
+ GraphAILogger.info("Content loaded. Ask a question to use this reference.\n");
140
+ // Store for next query
141
+ session.fetchedContent = fetchedContent;
142
+ }
143
+ }
144
+ catch (error) {
145
+ if (error instanceof Error) {
146
+ GraphAILogger.error(`Fetch error: ${error.message}\n`);
147
+ }
148
+ else {
149
+ GraphAILogger.error("Unknown fetch error\n");
150
+ }
151
+ }
152
+ prompt();
153
+ return;
154
+ }
98
155
  // Send query
99
156
  try {
100
- const answer = await sendInteractiveQuery(filteredScript, trimmedInput, session, validatedOptions);
101
- GraphAILogger.info(`\nAssistant: ${answer}\n`);
157
+ let answer;
158
+ // If we have fetched content, use it
159
+ if (session.fetchedContent) {
160
+ answer = await sendInteractiveQueryWithFetch(filteredScript, trimmedInput, session.fetchedContent, session, validatedOptions);
161
+ // Clear fetched content after use
162
+ session.fetchedContent = undefined;
163
+ }
164
+ else {
165
+ answer = await sendInteractiveQuery(filteredScript, trimmedInput, session, validatedOptions);
166
+ }
167
+ // Check for suggested fetch URL
168
+ const suggestedUrl = parseSuggestedFetch(answer);
169
+ if (suggestedUrl) {
170
+ lastSuggestedUrl = suggestedUrl;
171
+ const cleanAnswer = removeSuggestFetchMarkers(answer);
172
+ GraphAILogger.info(`\nAssistant: ${cleanAnswer}`);
173
+ GraphAILogger.info(`\n(Suggested reference: ${suggestedUrl})`);
174
+ GraphAILogger.info("Type /fetch to load this reference for more details.\n");
175
+ }
176
+ else {
177
+ GraphAILogger.info(`\nAssistant: ${answer}\n`);
178
+ }
102
179
  }
103
180
  catch (error) {
104
181
  if (error instanceof Error) {
@@ -1,5 +1,6 @@
1
- import type { ExtendedScript } from "../../../../types/index.js";
1
+ import type { ExtendedScript, Reference } from "../../../../types/index.js";
2
2
  import type { QueryOptions, InteractiveQuerySession, ConversationMessage } from "../../../../types/query.js";
3
+ import { type FetchedContent } from "../../utils/fetcher.js";
3
4
  /**
4
5
  * Create an interactive query session
5
6
  */
@@ -20,3 +21,32 @@ export declare const clearHistory: (session: InteractiveQuerySession) => void;
20
21
  * Get conversation history
21
22
  */
22
23
  export declare const getHistory: (session: InteractiveQuerySession) => ConversationMessage[];
24
+ /**
25
+ * Regex pattern for SUGGEST_FETCH marker (bounded quantifier to prevent ReDoS)
26
+ */
27
+ export declare const SUGGEST_FETCH_PATTERN: RegExp;
28
+ export declare const SUGGEST_FETCH_PATTERN_GLOBAL: RegExp;
29
+ /**
30
+ * Parse suggested fetch URL from AI response
31
+ */
32
+ export declare const parseSuggestedFetch: (response: string) => string | null;
33
+ /**
34
+ * Remove SUGGEST_FETCH markers from response
35
+ */
36
+ export declare const removeSuggestFetchMarkers: (response: string) => string;
37
+ /**
38
+ * Get available references from script
39
+ */
40
+ export declare const getReferences: (script: ExtendedScript) => Reference[];
41
+ /**
42
+ * Fetch reference content by URL
43
+ */
44
+ export declare const fetchReference: (url: string, verbose?: boolean) => Promise<FetchedContent>;
45
+ /**
46
+ * Find matching reference for a query
47
+ */
48
+ export declare const findReference: (script: ExtendedScript, query: string) => Reference | null;
49
+ /**
50
+ * Send a question with fetched reference content
51
+ */
52
+ export declare const sendInteractiveQueryWithFetch: (filteredScript: ExtendedScript, question: string, fetchedContent: FetchedContent, session: InteractiveQuerySession, options: QueryOptions) => Promise<string>;
@@ -1,6 +1,7 @@
1
1
  import { queryOptionsSchema } from "../../../../types/query.js";
2
- import { executeLLM, filterScript } from "../../llm.js";
3
- import { buildInteractiveUserPrompt, getInteractiveSystemPrompt } from "./prompts.js";
2
+ import { executeLLM, filterScript, getLanguageName } from "../../llm.js";
3
+ import { buildInteractiveUserPrompt, getInteractiveSystemPrompt, DEFAULT_INTERACTIVE_SYSTEM_PROMPT_WITH_FETCH } from "./prompts.js";
4
+ import { fetchUrlContent, findMatchingReference } from "../../utils/fetcher.js";
4
5
  /**
5
6
  * Create an interactive query session
6
7
  */
@@ -42,3 +43,78 @@ export const clearHistory = (session) => {
42
43
  export const getHistory = (session) => {
43
44
  return [...session.history];
44
45
  };
46
+ /**
47
+ * Regex pattern for SUGGEST_FETCH marker (bounded quantifier to prevent ReDoS)
48
+ */
49
+ export const SUGGEST_FETCH_PATTERN = /\[SUGGEST_FETCH:\s*([^\]]{1,2000})\]/;
50
+ export const SUGGEST_FETCH_PATTERN_GLOBAL = /\[SUGGEST_FETCH:\s*[^\]]{1,2000}\]/g;
51
+ /**
52
+ * Parse suggested fetch URL from AI response
53
+ */
54
+ export const parseSuggestedFetch = (response) => {
55
+ const match = response.match(SUGGEST_FETCH_PATTERN);
56
+ return match ? match[1].trim() : null;
57
+ };
58
+ /**
59
+ * Remove SUGGEST_FETCH markers from response
60
+ */
61
+ export const removeSuggestFetchMarkers = (response) => {
62
+ return response.replace(SUGGEST_FETCH_PATTERN_GLOBAL, "").trim();
63
+ };
64
+ /**
65
+ * Get available references from script
66
+ */
67
+ export const getReferences = (script) => {
68
+ return script.scriptMeta?.references || [];
69
+ };
70
+ /**
71
+ * Fetch reference content by URL
72
+ */
73
+ export const fetchReference = async (url, verbose = false) => {
74
+ return fetchUrlContent(url, 8000, verbose);
75
+ };
76
+ /**
77
+ * Find matching reference for a query
78
+ */
79
+ export const findReference = (script, query) => {
80
+ const references = getReferences(script);
81
+ return findMatchingReference(references, query);
82
+ };
83
+ /**
84
+ * Send a question with fetched reference content
85
+ */
86
+ export const sendInteractiveQueryWithFetch = async (filteredScript, question, fetchedContent, session, options) => {
87
+ if (filteredScript.beats.length === 0) {
88
+ return "No content available to answer the question.";
89
+ }
90
+ // Build system prompt for fetched content mode
91
+ let systemPrompt = DEFAULT_INTERACTIVE_SYSTEM_PROMPT_WITH_FETCH;
92
+ if (options.lang) {
93
+ const langName = getLanguageName(options.lang);
94
+ systemPrompt = `${systemPrompt}\n- IMPORTANT: Write the answer in ${langName}`;
95
+ }
96
+ // Build user prompt with fetched content
97
+ const baseUserPrompt = buildInteractiveUserPrompt(filteredScript, question, session.history);
98
+ // Insert fetched content before the question
99
+ const fetchedSection = [
100
+ "",
101
+ "---",
102
+ "Additional reference content fetched from URL:",
103
+ `URL: ${fetchedContent.url}`,
104
+ fetchedContent.title ? `Title: ${fetchedContent.title}` : "",
105
+ "",
106
+ fetchedContent.content,
107
+ "---",
108
+ "",
109
+ ]
110
+ .filter(Boolean)
111
+ .join("\n");
112
+ // Insert before "Current question:" or at the end
113
+ const insertPoint = baseUserPrompt.indexOf("Current question:");
114
+ const userPrompt = insertPoint >= 0 ? baseUserPrompt.slice(0, insertPoint) + fetchedSection + baseUserPrompt.slice(insertPoint) : baseUserPrompt + fetchedSection;
115
+ const answer = await executeLLM(systemPrompt, userPrompt, options, options.verbose ? `Interactive query with fetch: ${question}` : undefined);
116
+ // Add to history (include note about fetched content)
117
+ session.history.push({ role: "user", content: `${question} (with reference: ${fetchedContent.url})` });
118
+ session.history.push({ role: "assistant", content: answer });
119
+ return answer;
120
+ };
@@ -15,7 +15,11 @@ export declare const buildUserPrompt: (script: ExtendedScript, question: string)
15
15
  /**
16
16
  * Default system prompt for interactive query
17
17
  */
18
- export declare const DEFAULT_INTERACTIVE_SYSTEM_PROMPT = "You are answering questions based on the content provided.\n- Answer based ONLY on the information in the provided content\n- If the answer cannot be found in the content, say so clearly\n- Be concise and direct in your answers\n- Do not make up information that is not in the content\n- You may reference previous conversation when answering follow-up questions";
18
+ export declare const DEFAULT_INTERACTIVE_SYSTEM_PROMPT = "You are answering questions based on the content provided.\n- Answer based ONLY on the information in the provided content\n- If the answer cannot be found in the content, say so clearly\n- Be concise and direct in your answers\n- Do not make up information that is not in the content\n- You may reference previous conversation when answering follow-up questions\n- If references are available and the user asks for more details, mention which reference could provide more information\n- When you suggest fetching a reference for more details, include [SUGGEST_FETCH: <url>] in your response";
19
+ /**
20
+ * Default system prompt for interactive query with fetched content
21
+ */
22
+ export declare const DEFAULT_INTERACTIVE_SYSTEM_PROMPT_WITH_FETCH = "You are answering questions based on the content provided, including fetched reference content.\n- Answer based on both the main content and any fetched reference content\n- If the answer cannot be found, say so clearly\n- Be concise and direct in your answers\n- Do not make up information\n- You may reference previous conversation when answering follow-up questions\n- Prioritize information from fetched content when it's more detailed and relevant";
19
23
  /**
20
24
  * Get system prompt for interactive mode
21
25
  */
@@ -44,7 +44,19 @@ export const DEFAULT_INTERACTIVE_SYSTEM_PROMPT = `You are answering questions ba
44
44
  - If the answer cannot be found in the content, say so clearly
45
45
  - Be concise and direct in your answers
46
46
  - Do not make up information that is not in the content
47
- - You may reference previous conversation when answering follow-up questions`;
47
+ - You may reference previous conversation when answering follow-up questions
48
+ - If references are available and the user asks for more details, mention which reference could provide more information
49
+ - When you suggest fetching a reference for more details, include [SUGGEST_FETCH: <url>] in your response`;
50
+ /**
51
+ * Default system prompt for interactive query with fetched content
52
+ */
53
+ export const DEFAULT_INTERACTIVE_SYSTEM_PROMPT_WITH_FETCH = `You are answering questions based on the content provided, including fetched reference content.
54
+ - Answer based on both the main content and any fetched reference content
55
+ - If the answer cannot be found, say so clearly
56
+ - Be concise and direct in your answers
57
+ - Do not make up information
58
+ - You may reference previous conversation when answering follow-up questions
59
+ - Prioritize information from fetched content when it's more detailed and relevant`;
48
60
  /**
49
61
  * Get system prompt for interactive mode
50
62
  */
@@ -103,31 +103,120 @@ export const getLanguageName = (langCode) => {
103
103
  };
104
104
  return langMap[langCode] || langCode;
105
105
  };
106
+ /**
107
+ * Build beat content including metadata
108
+ */
109
+ const buildBeatContent = (beat, index) => {
110
+ const lines = [];
111
+ // Main text
112
+ const text = beat.text || "";
113
+ if (!text.trim())
114
+ return "";
115
+ lines.push(`[${index}] ${text}`);
116
+ // Add metadata if available
117
+ const meta = beat.meta;
118
+ if (meta) {
119
+ // Tags for categorization
120
+ if (meta.tags && meta.tags.length > 0) {
121
+ lines.push(` Tags: ${meta.tags.join(", ")}`);
122
+ }
123
+ // Context provides additional information not in the text
124
+ if (meta.context) {
125
+ lines.push(` Context: ${meta.context}`);
126
+ }
127
+ // Keywords highlight important terms
128
+ if (meta.keywords && meta.keywords.length > 0) {
129
+ lines.push(` Keywords: ${meta.keywords.join(", ")}`);
130
+ }
131
+ // Expected questions this beat can answer
132
+ if (meta.expectedQuestions && meta.expectedQuestions.length > 0) {
133
+ lines.push(` Can answer: ${meta.expectedQuestions.join("; ")}`);
134
+ }
135
+ }
136
+ return lines.join("\n");
137
+ };
138
+ /**
139
+ * Build script-level metadata section
140
+ */
141
+ const buildScriptMetaContent = (script) => {
142
+ const meta = script.scriptMeta;
143
+ if (!meta)
144
+ return "";
145
+ const lines = [];
146
+ // Background info
147
+ if (meta.background) {
148
+ lines.push(`Background: ${meta.background}`);
149
+ }
150
+ // Audience and prerequisites
151
+ if (meta.audience) {
152
+ lines.push(`Target audience: ${meta.audience}`);
153
+ }
154
+ if (meta.prerequisites && meta.prerequisites.length > 0) {
155
+ lines.push(`Prerequisites: ${meta.prerequisites.join(", ")}`);
156
+ }
157
+ // Goals
158
+ if (meta.goals && meta.goals.length > 0) {
159
+ lines.push(`Goals: ${meta.goals.join("; ")}`);
160
+ }
161
+ // Keywords
162
+ if (meta.keywords && meta.keywords.length > 0) {
163
+ lines.push(`Keywords: ${meta.keywords.join(", ")}`);
164
+ }
165
+ // References
166
+ if (meta.references && meta.references.length > 0) {
167
+ lines.push("References:");
168
+ meta.references.forEach((ref) => {
169
+ const title = ref.title || ref.url;
170
+ const desc = ref.description ? ` - ${ref.description}` : "";
171
+ lines.push(` - [${ref.type || "web"}] ${title}: ${ref.url}${desc}`);
172
+ });
173
+ }
174
+ // FAQ
175
+ if (meta.faq && meta.faq.length > 0) {
176
+ lines.push("FAQ:");
177
+ meta.faq.forEach((faq) => {
178
+ lines.push(` Q: ${faq.question}`);
179
+ lines.push(` A: ${faq.answer}`);
180
+ });
181
+ }
182
+ // Author info
183
+ if (meta.author) {
184
+ lines.push(`Author: ${meta.author}`);
185
+ }
186
+ return lines.length > 0 ? lines.join("\n") : "";
187
+ };
106
188
  /**
107
189
  * Build script content for user prompt (common part)
108
190
  */
109
191
  export const buildScriptContent = (script) => {
110
192
  const parts = [];
111
- // Add script metadata
193
+ // Add script title and language
112
194
  parts.push(`# Script: ${script.title}`);
113
195
  parts.push(`Language: ${script.lang}`);
114
196
  parts.push("");
115
- // Collect all text from beats grouped by section
197
+ // Add script-level metadata
198
+ const scriptMetaContent = buildScriptMetaContent(script);
199
+ if (scriptMetaContent) {
200
+ parts.push("## About this content");
201
+ parts.push(scriptMetaContent);
202
+ parts.push("");
203
+ }
204
+ // Collect all content from beats grouped by section
116
205
  const sections = new Map();
117
206
  script.beats.forEach((beat, index) => {
118
- const text = beat.text || "";
119
- if (!text.trim())
207
+ const content = buildBeatContent(beat, index);
208
+ if (!content)
120
209
  return;
121
210
  const section = beat.meta?.section || "main";
122
211
  if (!sections.has(section)) {
123
212
  sections.set(section, []);
124
213
  }
125
- sections.get(section).push(`[${index}] ${text}`);
214
+ sections.get(section).push(content);
126
215
  });
127
216
  // Output by section
128
- sections.forEach((texts, section) => {
217
+ sections.forEach((contents, section) => {
129
218
  parts.push(`## Section: ${section}`);
130
- texts.forEach((t) => parts.push(t));
219
+ contents.forEach((c) => parts.push(c));
131
220
  parts.push("");
132
221
  });
133
222
  return parts.join("\n");
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Fetched content result
3
+ */
4
+ export interface FetchedContent {
5
+ url: string;
6
+ title: string | null;
7
+ content: string;
8
+ error?: string;
9
+ }
10
+ /**
11
+ * Fetch URL content and extract text
12
+ */
13
+ export declare const fetchUrlContent: (url: string, maxLength?: number, verbose?: boolean) => Promise<FetchedContent>;
14
+ /**
15
+ * Find matching reference URL from script metadata
16
+ */
17
+ export declare const findMatchingReference: (references: Array<{
18
+ url: string;
19
+ title?: string;
20
+ description?: string;
21
+ }> | undefined, query: string) => {
22
+ url: string;
23
+ title?: string;
24
+ description?: string;
25
+ } | null;
@@ -0,0 +1,122 @@
1
+ import { GraphAILogger } from "graphai";
2
+ /**
3
+ * Strip HTML tags and extract text content
4
+ */
5
+ const stripHtml = (html) => {
6
+ // Remove script and style elements
7
+ let text = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "");
8
+ text = text.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "");
9
+ // Remove HTML comments
10
+ text = text.replace(/<!--[\s\S]*?-->/g, "");
11
+ // Replace common block elements with newlines
12
+ text = text.replace(/<\/(p|div|h[1-6]|li|tr|br)[^>]*>/gi, "\n");
13
+ // Remove all remaining HTML tags
14
+ // eslint-disable-next-line sonarjs/slow-regex -- standard HTML tag removal pattern, safe for typical HTML
15
+ text = text.replace(/<[^>]*>/g, " ");
16
+ // Decode common HTML entities
17
+ text = text.replace(/&nbsp;/g, " ");
18
+ text = text.replace(/&amp;/g, "&");
19
+ text = text.replace(/&lt;/g, "<");
20
+ text = text.replace(/&gt;/g, ">");
21
+ text = text.replace(/&quot;/g, '"');
22
+ text = text.replace(/&#39;/g, "'");
23
+ // Normalize whitespace
24
+ text = text.replace(/\s+/g, " ");
25
+ text = text.replace(/\n\s*\n/g, "\n\n");
26
+ return text.trim();
27
+ };
28
+ /**
29
+ * Extract title from HTML
30
+ */
31
+ const extractTitle = (html) => {
32
+ const match = html.match(/<title[^>]*>([^<]+)<\/title>/i);
33
+ return match ? match[1].trim() : null;
34
+ };
35
+ /**
36
+ * Fetch URL content and extract text
37
+ */
38
+ export const fetchUrlContent = async (url, maxLength = 8000, verbose = false) => {
39
+ try {
40
+ if (verbose) {
41
+ GraphAILogger.info(`Fetching URL: ${url}`);
42
+ }
43
+ const controller = new AbortController();
44
+ const timeoutId = setTimeout(() => controller.abort(), 30000);
45
+ const response = await fetch(url, {
46
+ signal: controller.signal,
47
+ headers: {
48
+ "User-Agent": "Mozilla/5.0 (compatible; MulmoCast/1.0; +https://github.com/receptron/mulmocast)",
49
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
50
+ },
51
+ });
52
+ clearTimeout(timeoutId);
53
+ if (!response.ok) {
54
+ return {
55
+ url,
56
+ title: null,
57
+ content: "",
58
+ error: `HTTP ${response.status}: ${response.statusText}`,
59
+ };
60
+ }
61
+ const contentType = response.headers.get("content-type") || "";
62
+ // Handle non-HTML content
63
+ if (!contentType.includes("text/html") && !contentType.includes("application/xhtml+xml")) {
64
+ if (contentType.includes("text/plain")) {
65
+ const text = await response.text();
66
+ return {
67
+ url,
68
+ title: null,
69
+ content: text.substring(0, maxLength),
70
+ };
71
+ }
72
+ return {
73
+ url,
74
+ title: null,
75
+ content: "",
76
+ error: `Unsupported content type: ${contentType}`,
77
+ };
78
+ }
79
+ const html = await response.text();
80
+ const title = extractTitle(html);
81
+ const content = stripHtml(html);
82
+ // Truncate if needed
83
+ const truncatedContent = content.length > maxLength ? content.substring(0, maxLength) + "..." : content;
84
+ if (verbose) {
85
+ GraphAILogger.info(`Fetched ${content.length} chars from ${url}`);
86
+ }
87
+ return {
88
+ url,
89
+ title,
90
+ content: truncatedContent,
91
+ };
92
+ }
93
+ catch (error) {
94
+ const errorMessage = error instanceof Error ? error.message : String(error);
95
+ return {
96
+ url,
97
+ title: null,
98
+ content: "",
99
+ error: errorMessage,
100
+ };
101
+ }
102
+ };
103
+ /**
104
+ * Find matching reference URL from script metadata
105
+ */
106
+ export const findMatchingReference = (references, query) => {
107
+ if (!references || references.length === 0) {
108
+ return null;
109
+ }
110
+ const lowerQuery = query.toLowerCase();
111
+ // Try to find a reference that matches keywords in the query
112
+ for (const ref of references) {
113
+ const refText = [ref.title, ref.description, ref.url].filter(Boolean).join(" ").toLowerCase();
114
+ // Simple keyword matching
115
+ const queryWords = lowerQuery.split(/\s+/).filter((w) => w.length > 2);
116
+ const matchScore = queryWords.filter((word) => refText.includes(word)).length;
117
+ if (matchScore >= 2 || (queryWords.length === 1 && matchScore === 1)) {
118
+ return ref;
119
+ }
120
+ }
121
+ return null;
122
+ };
package/lib/index.d.ts CHANGED
@@ -4,10 +4,12 @@ export { filterBySection, filterByTags, stripExtendedFields } from "./core/prepr
4
4
  export { listProfiles } from "./core/preprocessing/profiles.js";
5
5
  export { summarizeScript } from "./core/ai/command/summarize/index.js";
6
6
  export { queryScript } from "./core/ai/command/query/index.js";
7
- export { createInteractiveSession, sendInteractiveQuery, clearHistory, getHistory } from "./core/ai/command/query/interactive.js";
8
- export type { BeatVariant, BeatMeta, ExtendedBeat, ExtendedScript, OutputProfile, ProcessOptions, ProfileInfo } from "./types/index.js";
7
+ export { createInteractiveSession, sendInteractiveQuery, sendInteractiveQueryWithFetch, clearHistory, getHistory, getReferences, findReference, fetchReference, parseSuggestedFetch, removeSuggestFetchMarkers, } from "./core/ai/command/query/interactive.js";
8
+ export { fetchUrlContent } from "./core/ai/utils/fetcher.js";
9
+ export type { FetchedContent } from "./core/ai/utils/fetcher.js";
10
+ export type { BeatVariant, BeatMeta, ExtendedBeat, ExtendedScript, OutputProfile, ProcessOptions, ProfileInfo, Reference, FAQ, ScriptMeta, } from "./types/index.js";
9
11
  export type { SummarizeOptions, SummarizeResult, LLMProvider, SummarizeFormat, ProviderConfig } from "./types/summarize.js";
10
12
  export type { QueryOptions, QueryResult, ConversationMessage, InteractiveQuerySession } from "./types/query.js";
11
- export { beatVariantSchema, beatMetaSchema, extendedBeatSchema, extendedScriptSchema, outputProfileSchema } from "./types/index.js";
13
+ export { beatVariantSchema, beatMetaSchema, extendedBeatSchema, extendedScriptSchema, outputProfileSchema, referenceSchema, faqSchema, scriptMetaSchema, } from "./types/index.js";
12
14
  export { summarizeOptionsSchema, llmProviderSchema, summarizeFormatSchema } from "./types/summarize.js";
13
15
  export { queryOptionsSchema } from "./types/query.js";
package/lib/index.js CHANGED
@@ -6,8 +6,10 @@ export { listProfiles } from "./core/preprocessing/profiles.js";
6
6
  // AI API
7
7
  export { summarizeScript } from "./core/ai/command/summarize/index.js";
8
8
  export { queryScript } from "./core/ai/command/query/index.js";
9
- export { createInteractiveSession, sendInteractiveQuery, clearHistory, getHistory } from "./core/ai/command/query/interactive.js";
9
+ export { createInteractiveSession, sendInteractiveQuery, sendInteractiveQueryWithFetch, clearHistory, getHistory, getReferences, findReference, fetchReference, parseSuggestedFetch, removeSuggestFetchMarkers, } from "./core/ai/command/query/interactive.js";
10
+ // Utilities
11
+ export { fetchUrlContent } from "./core/ai/utils/fetcher.js";
10
12
  // Schemas (for validation)
11
- export { beatVariantSchema, beatMetaSchema, extendedBeatSchema, extendedScriptSchema, outputProfileSchema } from "./types/index.js";
13
+ export { beatVariantSchema, beatMetaSchema, extendedBeatSchema, extendedScriptSchema, outputProfileSchema, referenceSchema, faqSchema, scriptMetaSchema, } from "./types/index.js";
12
14
  export { summarizeOptionsSchema, llmProviderSchema, summarizeFormatSchema } from "./types/summarize.js";
13
15
  export { queryOptionsSchema } from "./types/query.js";
@@ -860,6 +860,59 @@ export declare const outputProfileSchema: z.ZodObject<{
860
860
  description: z.ZodOptional<z.ZodString>;
861
861
  }, z.core.$strip>;
862
862
  export type OutputProfile = z.infer<typeof outputProfileSchema>;
863
+ /**
864
+ * Reference - external resource reference
865
+ */
866
+ export declare const referenceSchema: z.ZodObject<{
867
+ type: z.ZodOptional<z.ZodEnum<{
868
+ web: "web";
869
+ code: "code";
870
+ document: "document";
871
+ video: "video";
872
+ }>>;
873
+ url: z.ZodString;
874
+ title: z.ZodOptional<z.ZodString>;
875
+ description: z.ZodOptional<z.ZodString>;
876
+ }, z.core.$strip>;
877
+ export type Reference = z.infer<typeof referenceSchema>;
878
+ /**
879
+ * FAQ - frequently asked question
880
+ */
881
+ export declare const faqSchema: z.ZodObject<{
882
+ question: z.ZodString;
883
+ answer: z.ZodString;
884
+ relatedBeats: z.ZodOptional<z.ZodArray<z.ZodString>>;
885
+ }, z.core.$strip>;
886
+ export type FAQ = z.infer<typeof faqSchema>;
887
+ /**
888
+ * Script Meta - script-level metadata for AI features
889
+ */
890
+ export declare const scriptMetaSchema: z.ZodObject<{
891
+ audience: z.ZodOptional<z.ZodString>;
892
+ prerequisites: z.ZodOptional<z.ZodArray<z.ZodString>>;
893
+ goals: z.ZodOptional<z.ZodArray<z.ZodString>>;
894
+ background: z.ZodOptional<z.ZodString>;
895
+ faq: z.ZodOptional<z.ZodArray<z.ZodObject<{
896
+ question: z.ZodString;
897
+ answer: z.ZodString;
898
+ relatedBeats: z.ZodOptional<z.ZodArray<z.ZodString>>;
899
+ }, z.core.$strip>>>;
900
+ keywords: z.ZodOptional<z.ZodArray<z.ZodString>>;
901
+ references: z.ZodOptional<z.ZodArray<z.ZodObject<{
902
+ type: z.ZodOptional<z.ZodEnum<{
903
+ web: "web";
904
+ code: "code";
905
+ document: "document";
906
+ video: "video";
907
+ }>>;
908
+ url: z.ZodString;
909
+ title: z.ZodOptional<z.ZodString>;
910
+ description: z.ZodOptional<z.ZodString>;
911
+ }, z.core.$strip>>>;
912
+ author: z.ZodOptional<z.ZodString>;
913
+ version: z.ZodOptional<z.ZodString>;
914
+ }, z.core.$strip>;
915
+ export type ScriptMeta = z.infer<typeof scriptMetaSchema>;
863
916
  /**
864
917
  * Extended Script - script with variants, meta, and outputProfiles
865
918
  */
@@ -1924,6 +1977,31 @@ export declare const extendedScriptSchema: z.ZodObject<{
1924
1977
  name: z.ZodString;
1925
1978
  description: z.ZodOptional<z.ZodString>;
1926
1979
  }, z.core.$strip>>>;
1980
+ scriptMeta: z.ZodOptional<z.ZodObject<{
1981
+ audience: z.ZodOptional<z.ZodString>;
1982
+ prerequisites: z.ZodOptional<z.ZodArray<z.ZodString>>;
1983
+ goals: z.ZodOptional<z.ZodArray<z.ZodString>>;
1984
+ background: z.ZodOptional<z.ZodString>;
1985
+ faq: z.ZodOptional<z.ZodArray<z.ZodObject<{
1986
+ question: z.ZodString;
1987
+ answer: z.ZodString;
1988
+ relatedBeats: z.ZodOptional<z.ZodArray<z.ZodString>>;
1989
+ }, z.core.$strip>>>;
1990
+ keywords: z.ZodOptional<z.ZodArray<z.ZodString>>;
1991
+ references: z.ZodOptional<z.ZodArray<z.ZodObject<{
1992
+ type: z.ZodOptional<z.ZodEnum<{
1993
+ web: "web";
1994
+ code: "code";
1995
+ document: "document";
1996
+ video: "video";
1997
+ }>>;
1998
+ url: z.ZodString;
1999
+ title: z.ZodOptional<z.ZodString>;
2000
+ description: z.ZodOptional<z.ZodString>;
2001
+ }, z.core.$strip>>>;
2002
+ author: z.ZodOptional<z.ZodString>;
2003
+ version: z.ZodOptional<z.ZodString>;
2004
+ }, z.core.$strip>>;
1927
2005
  }, z.core.$strict>;
1928
2006
  export type ExtendedScript = z.infer<typeof extendedScriptSchema>;
1929
2007
  /**
@@ -33,10 +33,47 @@ export const outputProfileSchema = z.object({
33
33
  name: z.string(),
34
34
  description: z.string().optional(),
35
35
  });
36
+ /**
37
+ * Reference - external resource reference
38
+ */
39
+ export const referenceSchema = z.object({
40
+ type: z.enum(["web", "code", "document", "video"]).optional(),
41
+ url: z.string(),
42
+ title: z.string().optional(),
43
+ description: z.string().optional(),
44
+ });
45
+ /**
46
+ * FAQ - frequently asked question
47
+ */
48
+ export const faqSchema = z.object({
49
+ question: z.string(),
50
+ answer: z.string(),
51
+ relatedBeats: z.array(z.string()).optional(),
52
+ });
53
+ /**
54
+ * Script Meta - script-level metadata for AI features
55
+ */
56
+ export const scriptMetaSchema = z.object({
57
+ // Target audience and prerequisites
58
+ audience: z.string().optional(),
59
+ prerequisites: z.array(z.string()).optional(),
60
+ // Learning goals and background
61
+ goals: z.array(z.string()).optional(),
62
+ background: z.string().optional(),
63
+ // FAQ for quick Q&A matching
64
+ faq: z.array(faqSchema).optional(),
65
+ // Search and discovery
66
+ keywords: z.array(z.string()).optional(),
67
+ references: z.array(referenceSchema).optional(),
68
+ // Authoring info
69
+ author: z.string().optional(),
70
+ version: z.string().optional(),
71
+ });
36
72
  /**
37
73
  * Extended Script - script with variants, meta, and outputProfiles
38
74
  */
39
75
  export const extendedScriptSchema = mulmoScriptSchema.extend({
40
76
  beats: z.array(extendedBeatSchema),
41
77
  outputProfiles: z.record(z.string(), outputProfileSchema).optional(),
78
+ scriptMeta: scriptMetaSchema.optional(),
42
79
  });
@@ -1,4 +1,5 @@
1
1
  import { z } from "zod";
2
+ import type { FetchedContent } from "../core/ai/utils/fetcher.js";
2
3
  /**
3
4
  * Query Options - configuration for querying script content
4
5
  */
@@ -42,4 +43,5 @@ export interface InteractiveQuerySession {
42
43
  scriptTitle: string;
43
44
  beatCount: number;
44
45
  history: ConversationMessage[];
46
+ fetchedContent?: FetchedContent;
45
47
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast-preprocessor",
3
- "version": "0.2.1",
3
+ "version": "0.3.0",
4
4
  "description": "Preprocessor for MulmoScript",
5
5
  "type": "module",
6
6
  "main": "lib/index.js",