@dominikcz/greg 0.9.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +397 -0
- package/bin/greg.js +241 -0
- package/bin/init.js +351 -0
- package/bin/templates/docs/getting-started.md +47 -0
- package/bin/templates/docs/index.md +11 -0
- package/bin/templates/greg.config.js +39 -0
- package/bin/templates/greg.config.ts +38 -0
- package/bin/templates/index.html +16 -0
- package/bin/templates/src/App.svelte +5 -0
- package/bin/templates/src/app.css +20 -0
- package/bin/templates/src/main.js +9 -0
- package/bin/templates/svelte.config.js +1 -0
- package/bin/templates/tsconfig.json +21 -0
- package/bin/templates/vite.config.js +23 -0
- package/docs/__partials/markdown/examples/basic.md +4 -0
- package/docs/__partials/markdown/examples/diff.md +10 -0
- package/docs/__partials/markdown/examples/focus.md +5 -0
- package/docs/__partials/markdown/examples/language-title.md +3 -0
- package/docs/__partials/markdown/examples/line-highlighting.md +5 -0
- package/docs/__partials/markdown/examples/line-numbers.md +5 -0
- package/docs/__partials/note.md +4 -0
- package/docs/guide/__shared-warning.md +4 -0
- package/docs/guide/asset-handling.md +88 -0
- package/docs/guide/deploying.md +162 -0
- package/docs/guide/getting-started.md +334 -0
- package/docs/guide/index.md +23 -0
- package/docs/guide/localization.md +290 -0
- package/docs/guide/markdown/code.md +95 -0
- package/docs/guide/markdown/components-and-mermaid.md +43 -0
- package/docs/guide/markdown/containers.md +110 -0
- package/docs/guide/markdown/header-anchors.md +34 -0
- package/docs/guide/markdown/includes.md +84 -0
- package/docs/guide/markdown/index.md +20 -0
- package/docs/guide/markdown/inline-attributes.md +21 -0
- package/docs/guide/markdown/links-and-toc.md +64 -0
- package/docs/guide/markdown/math.md +54 -0
- package/docs/guide/markdown/syntax-highlighting.md +75 -0
- package/docs/guide/routing.md +150 -0
- package/docs/guide/using-svelte.md +88 -0
- package/docs/guide/versioning.md +281 -0
- package/docs/incompatibilities.md +48 -0
- package/docs/index.md +43 -0
- package/docs/reference/badge.md +100 -0
- package/docs/reference/carbon-ads.md +46 -0
- package/docs/reference/code-group.md +126 -0
- package/docs/reference/home-page.md +232 -0
- package/docs/reference/index.md +18 -0
- package/docs/reference/markdowndocs.md +275 -0
- package/docs/reference/outline.md +79 -0
- package/docs/reference/search.md +263 -0
- package/docs/reference/steps.md +200 -0
- package/docs/reference/team-page.md +189 -0
- package/docs/reference/theme.md +150 -0
- package/fakeDocsGenerator/generate_docs.js +310 -0
- package/package.json +92 -0
- package/scripts/build-versions.js +609 -0
- package/scripts/generate-static.js +79 -0
- package/scripts/render-markdown.js +420 -0
- package/src/lib/MarkdownDocs/AiChat.svelte +936 -0
- package/src/lib/MarkdownDocs/BackToTop.svelte +68 -0
- package/src/lib/MarkdownDocs/Breadcrumb.svelte +68 -0
- package/src/lib/MarkdownDocs/DocsNavigation.svelte +149 -0
- package/src/lib/MarkdownDocs/DocsSiteHeader.svelte +758 -0
- package/src/lib/MarkdownDocs/DocsVersionSwitcher.svelte +103 -0
- package/src/lib/MarkdownDocs/MarkdownDocs.svelte +2115 -0
- package/src/lib/MarkdownDocs/MarkdownRenderer.svelte +487 -0
- package/src/lib/MarkdownDocs/Outline.svelte +238 -0
- package/src/lib/MarkdownDocs/PrevNext.svelte +115 -0
- package/src/lib/MarkdownDocs/SearchModal.svelte +1241 -0
- package/src/lib/MarkdownDocs/TreeView.svelte +32 -0
- package/src/lib/MarkdownDocs/TreeViewItem.svelte +219 -0
- package/src/lib/MarkdownDocs/VersionOutdatedNotice.svelte +72 -0
- package/src/lib/MarkdownDocs/__tests__/codeDirectives.test.js +54 -0
- package/src/lib/MarkdownDocs/__tests__/common.test.js +41 -0
- package/src/lib/MarkdownDocs/__tests__/docsExamplesLint.test.js +77 -0
- package/src/lib/MarkdownDocs/__tests__/fixtures/docs/markdown/__partial-basic.md +3 -0
- package/src/lib/MarkdownDocs/__tests__/fixtures/docs/markdown/snippet.js +9 -0
- package/src/lib/MarkdownDocs/__tests__/fixtures/includes/part.md +11 -0
- package/src/lib/MarkdownDocs/__tests__/fixtures/includes/wrapper.md +5 -0
- package/src/lib/MarkdownDocs/__tests__/fixtures/snippets/sample.js +8 -0
- package/src/lib/MarkdownDocs/__tests__/fixtures/snippets/sample.md +5 -0
- package/src/lib/MarkdownDocs/__tests__/helpers.js +67 -0
- package/src/lib/MarkdownDocs/__tests__/localeUtils.test.js +204 -0
- package/src/lib/MarkdownDocs/__tests__/markdown.test.js +704 -0
- package/src/lib/MarkdownDocs/__tests__/markdownRendererRuntime.test.js +65 -0
- package/src/lib/MarkdownDocs/__tests__/searchIndexBuilder.test.js +117 -0
- package/src/lib/MarkdownDocs/__tests__/sqliteStore.test.js +202 -0
- package/src/lib/MarkdownDocs/__tests__/useRouter.test.js +16 -0
- package/src/lib/MarkdownDocs/ai/adapters/customAdapter.js +14 -0
- package/src/lib/MarkdownDocs/ai/adapters/customAdapter.ts +43 -0
- package/src/lib/MarkdownDocs/ai/adapters/ollamaAdapter.js +81 -0
- package/src/lib/MarkdownDocs/ai/adapters/ollamaAdapter.ts +116 -0
- package/src/lib/MarkdownDocs/ai/adapters/openaiAdapter.js +92 -0
- package/src/lib/MarkdownDocs/ai/adapters/openaiAdapter.ts +137 -0
- package/src/lib/MarkdownDocs/ai/aiProvider.ts +31 -0
- package/src/lib/MarkdownDocs/ai/characters.js +52 -0
- package/src/lib/MarkdownDocs/ai/characters.ts +69 -0
- package/src/lib/MarkdownDocs/ai/chunkStore.ts +25 -0
- package/src/lib/MarkdownDocs/ai/chunker.js +85 -0
- package/src/lib/MarkdownDocs/ai/chunker.ts +135 -0
- package/src/lib/MarkdownDocs/ai/docLinker.js +26 -0
- package/src/lib/MarkdownDocs/ai/docLinker.ts +36 -0
- package/src/lib/MarkdownDocs/ai/promptBuilder.js +33 -0
- package/src/lib/MarkdownDocs/ai/promptBuilder.ts +53 -0
- package/src/lib/MarkdownDocs/ai/ragPipeline.js +54 -0
- package/src/lib/MarkdownDocs/ai/ragPipeline.ts +106 -0
- package/src/lib/MarkdownDocs/ai/stores/memoryStore.js +88 -0
- package/src/lib/MarkdownDocs/ai/stores/memoryStore.ts +112 -0
- package/src/lib/MarkdownDocs/ai/stores/sqliteStore.ts +372 -0
- package/src/lib/MarkdownDocs/ai/types.ts +71 -0
- package/src/lib/MarkdownDocs/aiServer.js +288 -0
- package/src/lib/MarkdownDocs/codeDirectives.js +191 -0
- package/src/lib/MarkdownDocs/codeFenceInfo.js +45 -0
- package/src/lib/MarkdownDocs/codeGroup.ts +46 -0
- package/src/lib/MarkdownDocs/common.ts +47 -0
- package/src/lib/MarkdownDocs/docsUtils.js +281 -0
- package/src/lib/MarkdownDocs/index.plugins.js +22 -0
- package/src/lib/MarkdownDocs/layouts/LayoutDoc.svelte +8 -0
- package/src/lib/MarkdownDocs/layouts/LayoutHome.svelte +58 -0
- package/src/lib/MarkdownDocs/layouts/LayoutPage.svelte +9 -0
- package/src/lib/MarkdownDocs/loadGregConfig.js +82 -0
- package/src/lib/MarkdownDocs/localeUtils.ts +682 -0
- package/src/lib/MarkdownDocs/markdownRendererRuntime.ts +314 -0
- package/src/lib/MarkdownDocs/mermaidThemes.js +319 -0
- package/src/lib/MarkdownDocs/navigationUtils.js +22 -0
- package/src/lib/MarkdownDocs/rehypeCodeGroup.js +326 -0
- package/src/lib/MarkdownDocs/rehypeCodeTitle.js +96 -0
- package/src/lib/MarkdownDocs/rehypeToc.js +170 -0
- package/src/lib/MarkdownDocs/remarkCodeMeta.js +22 -0
- package/src/lib/MarkdownDocs/remarkContainers.js +329 -0
- package/src/lib/MarkdownDocs/remarkCustomAnchors.js +42 -0
- package/src/lib/MarkdownDocs/remarkEscapeSvelte.js +33 -0
- package/src/lib/MarkdownDocs/remarkGlobalComponents.js +65 -0
- package/src/lib/MarkdownDocs/remarkImports.js +461 -0
- package/src/lib/MarkdownDocs/remarkImportsBrowser.js +349 -0
- package/src/lib/MarkdownDocs/remarkInlineAttrs.js +95 -0
- package/src/lib/MarkdownDocs/remarkMathToHtml.js +138 -0
- package/src/lib/MarkdownDocs/searchIndexBuilder.js +497 -0
- package/src/lib/MarkdownDocs/searchServer.js +263 -0
- package/src/lib/MarkdownDocs/treeViewTypes.ts +11 -0
- package/src/lib/MarkdownDocs/useRouter.svelte.ts +114 -0
- package/src/lib/MarkdownDocs/useSplitter.svelte.ts +33 -0
- package/src/lib/MarkdownDocs/versioningDefaults.js +20 -0
- package/src/lib/MarkdownDocs/vitePluginAiServer.js +204 -0
- package/src/lib/MarkdownDocs/vitePluginCopyDocs.js +153 -0
- package/src/lib/MarkdownDocs/vitePluginFrontmatter.js +109 -0
- package/src/lib/MarkdownDocs/vitePluginGregConfig.js +108 -0
- package/src/lib/MarkdownDocs/vitePluginSearchIndex.js +57 -0
- package/src/lib/MarkdownDocs/vitePluginSearchServer.js +190 -0
- package/src/lib/components/Badge.svelte +59 -0
- package/src/lib/components/Button.svelte +138 -0
- package/src/lib/components/CarbonAds.svelte +99 -0
- package/src/lib/components/CodeGroup.svelte +102 -0
- package/src/lib/components/Feature.svelte +209 -0
- package/src/lib/components/Features.svelte +123 -0
- package/src/lib/components/Hero.svelte +399 -0
- package/src/lib/components/Image.svelte +128 -0
- package/src/lib/components/Link.svelte +105 -0
- package/src/lib/components/SocialLink.svelte +84 -0
- package/src/lib/components/SocialLinks.svelte +33 -0
- package/src/lib/components/Steps.svelte +143 -0
- package/src/lib/components/TeamMember.svelte +273 -0
- package/src/lib/components/TeamMembers.svelte +81 -0
- package/src/lib/components/TeamPage.svelte +65 -0
- package/src/lib/components/TeamPageSection.svelte +108 -0
- package/src/lib/components/TeamPageTitle.svelte +89 -0
- package/src/lib/components/index.js +24 -0
- package/src/lib/portal/context.js +12 -0
- package/src/lib/portal/index.js +3 -0
- package/src/lib/portal/portal.svelte +14 -0
- package/src/lib/portal/slot.svelte +8 -0
- package/src/lib/scss/__code.scss +128 -0
- package/src/lib/scss/__containers.scss +99 -0
- package/src/lib/scss/__markdown.scss +447 -0
- package/src/lib/scss/__scrollbar.scss +60 -0
- package/src/lib/scss/__steps.scss +100 -0
- package/src/lib/scss/__theme.scss +238 -0
- package/src/lib/scss/__toc.scss +55 -0
- package/src/lib/scss/__utilities.scss +7 -0
- package/src/lib/scss/greg.scss +9 -0
- package/src/lib/spinner/spinner.svelte +42 -0
- package/svelte.config.js +146 -0
- package/types/index.d.ts +456 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
export function buildSystemPrompt(character, chunks, baseUrl = '') {
|
|
2
|
+
const context = chunks.map((c, i) => {
|
|
3
|
+
const anchor = c.sectionAnchor ? `#${c.sectionAnchor}` : '';
|
|
4
|
+
const link = `${baseUrl}${c.pageId}${anchor}`;
|
|
5
|
+
const heading = c.sectionHeading ? ` › ${c.sectionHeading}` : '';
|
|
6
|
+
return (
|
|
7
|
+
`[${i + 1}] Page: "${c.pageTitle}"${heading}\n` +
|
|
8
|
+
` Link: ${link}\n` +
|
|
9
|
+
` ${c.content}`
|
|
10
|
+
);
|
|
11
|
+
}).join('\n\n');
|
|
12
|
+
|
|
13
|
+
return `${character.systemPrompt}
|
|
14
|
+
|
|
15
|
+
STRICT RULES — follow these without exception:
|
|
16
|
+
- Base your answer EXCLUSIVELY on the DOCUMENTATION CONTEXT provided below.
|
|
17
|
+
- If the context does not contain enough information to fully answer, say so clearly instead of guessing.
|
|
18
|
+
- ALWAYS include at least one inline markdown link citation: [Section Title](link)
|
|
19
|
+
- Do NOT invent, hallucinate, or add information absent from the context.
|
|
20
|
+
- Respond in the same language the user used in their question.
|
|
21
|
+
- Format your response in markdown.
|
|
22
|
+
|
|
23
|
+
DOCUMENTATION CONTEXT:
|
|
24
|
+
${context}`;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function buildMessages(character, chunks, userQuery, conversationHistory = [], baseUrl = '') {
|
|
28
|
+
return [
|
|
29
|
+
{ role: 'system', content: buildSystemPrompt(character, chunks, baseUrl) },
|
|
30
|
+
...conversationHistory,
|
|
31
|
+
{ role: 'user', content: userQuery },
|
|
32
|
+
];
|
|
33
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import type { AiCharacter, ChatMessage, RetrievedChunk } from './types.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Build the system prompt by combining the character's persona with
|
|
5
|
+
* numbered documentation context blocks. Each block includes the source
|
|
6
|
+
* link so the LLM can cite it inline.
|
|
7
|
+
*/
|
|
8
|
+
export function buildSystemPrompt(
|
|
9
|
+
character: AiCharacter,
|
|
10
|
+
chunks: RetrievedChunk[],
|
|
11
|
+
baseUrl = '',
|
|
12
|
+
): string {
|
|
13
|
+
const context = chunks.map((c, i) => {
|
|
14
|
+
const anchor = c.sectionAnchor ? `#${c.sectionAnchor}` : '';
|
|
15
|
+
const link = `${baseUrl}${c.pageId}${anchor}`;
|
|
16
|
+
const heading = c.sectionHeading ? ` › ${c.sectionHeading}` : '';
|
|
17
|
+
return (
|
|
18
|
+
`[${i + 1}] Page: "${c.pageTitle}"${heading}\n` +
|
|
19
|
+
` Link: ${link}\n` +
|
|
20
|
+
` ${c.content}`
|
|
21
|
+
);
|
|
22
|
+
}).join('\n\n');
|
|
23
|
+
|
|
24
|
+
return `${character.systemPrompt}
|
|
25
|
+
|
|
26
|
+
STRICT RULES — follow these without exception:
|
|
27
|
+
- Base your answer EXCLUSIVELY on the DOCUMENTATION CONTEXT provided below.
|
|
28
|
+
- If the context does not contain enough information to fully answer, say so clearly instead of guessing.
|
|
29
|
+
- ALWAYS include at least one inline markdown link citation: [Section Title](link)
|
|
30
|
+
- Do NOT invent, hallucinate, or add information absent from the context.
|
|
31
|
+
- Respond in the same language the user used in their question.
|
|
32
|
+
- Format your response in markdown.
|
|
33
|
+
|
|
34
|
+
DOCUMENTATION CONTEXT:
|
|
35
|
+
${context}`;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Build the full messages array for an LLM call.
|
|
40
|
+
*/
|
|
41
|
+
export function buildMessages(
|
|
42
|
+
character: AiCharacter,
|
|
43
|
+
chunks: RetrievedChunk[],
|
|
44
|
+
userQuery: string,
|
|
45
|
+
conversationHistory: ChatMessage[] = [],
|
|
46
|
+
baseUrl = '',
|
|
47
|
+
): ChatMessage[] {
|
|
48
|
+
return [
|
|
49
|
+
{ role: 'system', content: buildSystemPrompt(character, chunks, baseUrl) },
|
|
50
|
+
...conversationHistory,
|
|
51
|
+
{ role: 'user', content: userQuery },
|
|
52
|
+
];
|
|
53
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { buildMessages } from './promptBuilder.js';
|
|
2
|
+
import { extractSources } from './docLinker.js';
|
|
3
|
+
|
|
4
|
+
export class RagPipeline {
|
|
5
|
+
constructor(provider, store, characters) {
|
|
6
|
+
this.provider = provider;
|
|
7
|
+
this.store = store;
|
|
8
|
+
this.characters = characters;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
getCharacter(characterId) {
|
|
12
|
+
if (characterId) {
|
|
13
|
+
const found = this.characters.find(c => c.id === characterId);
|
|
14
|
+
if (found) return found;
|
|
15
|
+
}
|
|
16
|
+
return this.characters[0];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
async ask(query, characterId, locale, options = {}) {
|
|
20
|
+
const topK = options.topK ?? 8;
|
|
21
|
+
const character = this.getCharacter(characterId);
|
|
22
|
+
const baseUrl = options.baseUrl ?? '';
|
|
23
|
+
|
|
24
|
+
let chunks = await this.store.search(query, topK * 2);
|
|
25
|
+
|
|
26
|
+
if (locale && locale !== '/') {
|
|
27
|
+
const prefix = locale.endsWith('/') ? locale : locale + '/';
|
|
28
|
+
const localeChunks = chunks.filter(
|
|
29
|
+
c => c.pageId === locale || c.pageId.startsWith(prefix),
|
|
30
|
+
);
|
|
31
|
+
if (localeChunks.length > 0) chunks = localeChunks;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
chunks = chunks.slice(0, topK);
|
|
35
|
+
|
|
36
|
+
if (chunks.length === 0) {
|
|
37
|
+
return {
|
|
38
|
+
answer: 'I could not find any relevant documentation to answer your question. Try rephrasing or look through the documentation directly.',
|
|
39
|
+
sources: [],
|
|
40
|
+
character: character.id,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const messages = buildMessages(character, chunks, query, [], baseUrl);
|
|
45
|
+
const answer = await this.provider.chat(messages, options.llm);
|
|
46
|
+
const sources = extractSources(chunks);
|
|
47
|
+
|
|
48
|
+
return { answer, sources, character: character.id };
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
getCharacters() {
|
|
52
|
+
return this.characters;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import type { AiProvider } from './aiProvider.js';
|
|
2
|
+
import type { ChunkStore } from './chunkStore.js';
|
|
3
|
+
import type { AiCharacter, AiProviderOptions, AiResponse } from './types.js';
|
|
4
|
+
import { buildMessages } from './promptBuilder.js';
|
|
5
|
+
import { extractSources } from './docLinker.js';
|
|
6
|
+
|
|
7
|
+
export type RagOptions = {
|
|
8
|
+
/** Number of chunks to retrieve from the store. Default: 8 */
|
|
9
|
+
topK?: number;
|
|
10
|
+
/** LLM generation parameters */
|
|
11
|
+
llm?: AiProviderOptions;
|
|
12
|
+
/**
|
|
13
|
+
* Base URL prepended to doc links in the system prompt context.
|
|
14
|
+
* Useful for absolute links when the AI server runs on a different origin.
|
|
15
|
+
* Default: '' (relative links)
|
|
16
|
+
*/
|
|
17
|
+
baseUrl?: string;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Retrieval-Augmented Generation pipeline.
|
|
22
|
+
*
|
|
23
|
+
* Orchestrates the full ask() flow:
|
|
24
|
+
* 1. Retrieve relevant chunks from the store (BM25 or embedding search)
|
|
25
|
+
* 2. Optionally filter by locale
|
|
26
|
+
* 3. Build system prompt with character persona + retrieved context
|
|
27
|
+
* 4. Call the LLM provider
|
|
28
|
+
* 5. Extract source citations
|
|
29
|
+
* 6. Return AiResponse
|
|
30
|
+
*/
|
|
31
|
+
export class RagPipeline {
|
|
32
|
+
private readonly characters: AiCharacter[];
|
|
33
|
+
|
|
34
|
+
constructor(
|
|
35
|
+
private readonly provider: AiProvider,
|
|
36
|
+
private readonly store: ChunkStore,
|
|
37
|
+
characters: AiCharacter[],
|
|
38
|
+
) {
|
|
39
|
+
this.characters = characters;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Find a character by id, or fall back to the first available. */
|
|
43
|
+
private getCharacter(characterId?: string): AiCharacter {
|
|
44
|
+
if (characterId) {
|
|
45
|
+
const found = this.characters.find(c => c.id === characterId);
|
|
46
|
+
if (found) return found;
|
|
47
|
+
}
|
|
48
|
+
return this.characters[0];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Run a RAG query and return the AI response.
|
|
53
|
+
*
|
|
54
|
+
* @param query User's question (plain text)
|
|
55
|
+
* @param characterId Persona to use (falls back to first character if unknown)
|
|
56
|
+
* @param locale Locale path prefix for results filtering (e.g. '/pl/')
|
|
57
|
+
* @param options Retrieval and LLM tuning
|
|
58
|
+
*/
|
|
59
|
+
async ask(
|
|
60
|
+
query: string,
|
|
61
|
+
characterId?: string,
|
|
62
|
+
locale?: string,
|
|
63
|
+
options: RagOptions = {},
|
|
64
|
+
): Promise<AiResponse> {
|
|
65
|
+
const topK = options.topK ?? 8;
|
|
66
|
+
const character = this.getCharacter(characterId);
|
|
67
|
+
const baseUrl = options.baseUrl ?? '';
|
|
68
|
+
|
|
69
|
+
// 1. Retrieve — fetch more than topK to have room to filter
|
|
70
|
+
let chunks = await this.store.search(query, topK * 2);
|
|
71
|
+
|
|
72
|
+
// 2. Locale filtering — prefer locale-matching chunks
|
|
73
|
+
if (locale && locale !== '/') {
|
|
74
|
+
const prefix = locale.endsWith('/') ? locale : locale + '/';
|
|
75
|
+
const localeChunks = chunks.filter(
|
|
76
|
+
c => c.pageId === locale || c.pageId.startsWith(prefix),
|
|
77
|
+
);
|
|
78
|
+
// Only apply locale filter when it yields results
|
|
79
|
+
if (localeChunks.length > 0) chunks = localeChunks;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
chunks = chunks.slice(0, topK);
|
|
83
|
+
|
|
84
|
+
if (chunks.length === 0) {
|
|
85
|
+
return {
|
|
86
|
+
answer: 'I could not find any relevant documentation to answer your question. Try rephrasing or look through the documentation directly.',
|
|
87
|
+
sources: [],
|
|
88
|
+
character: character.id,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// 3. Build prompt and call LLM
|
|
93
|
+
const messages = buildMessages(character, chunks, query, [], baseUrl);
|
|
94
|
+
const answer = await this.provider.chat(messages, options.llm);
|
|
95
|
+
|
|
96
|
+
// 4. Extract source citations from retrieved chunks
|
|
97
|
+
const sources = extractSources(chunks);
|
|
98
|
+
|
|
99
|
+
return { answer, sources, character: character.id };
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** Return the list of available characters (used by the /characters endpoint). */
|
|
103
|
+
getCharacters(): AiCharacter[] {
|
|
104
|
+
return this.characters;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
const K1 = 1.5;
|
|
2
|
+
const B = 0.75;
|
|
3
|
+
|
|
4
|
+
function tokenize(text) {
|
|
5
|
+
return text
|
|
6
|
+
.toLowerCase()
|
|
7
|
+
.replace(/[^\w\s]/g, ' ')
|
|
8
|
+
.split(/\s+/)
|
|
9
|
+
.filter(t => t.length > 1);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export class MemoryStore {
|
|
13
|
+
constructor() {
|
|
14
|
+
this.chunks = [];
|
|
15
|
+
this.invertedIndex = new Map();
|
|
16
|
+
this.avgLen = 0;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
async index(chunks) {
|
|
20
|
+
this.invertedIndex = new Map();
|
|
21
|
+
this.chunks = chunks.map((chunk, i) => {
|
|
22
|
+
const tokens = tokenize(
|
|
23
|
+
`${chunk.pageTitle} ${chunk.pageTitle} ${chunk.sectionHeading} ${chunk.sectionHeading} ${chunk.content}`,
|
|
24
|
+
);
|
|
25
|
+
const terms = new Map();
|
|
26
|
+
for (const t of tokens) {
|
|
27
|
+
terms.set(t, (terms.get(t) ?? 0) + 1);
|
|
28
|
+
}
|
|
29
|
+
for (const t of terms.keys()) {
|
|
30
|
+
if (!this.invertedIndex.has(t)) {
|
|
31
|
+
this.invertedIndex.set(t, new Set());
|
|
32
|
+
}
|
|
33
|
+
this.invertedIndex.get(t).add(i);
|
|
34
|
+
}
|
|
35
|
+
return { ...chunk, _terms: terms, _len: tokens.length };
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
const totalLen = this.chunks.reduce((s, c) => s + c._len, 0);
|
|
39
|
+
this.avgLen = this.chunks.length > 0 ? totalLen / this.chunks.length : 1;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async search(query, limit = 8) {
|
|
43
|
+
if (this.chunks.length === 0) return [];
|
|
44
|
+
|
|
45
|
+
const queryTerms = tokenize(query);
|
|
46
|
+
if (queryTerms.length === 0) return [];
|
|
47
|
+
|
|
48
|
+
const N = this.chunks.length;
|
|
49
|
+
const scores = new Float64Array(N);
|
|
50
|
+
|
|
51
|
+
for (const term of queryTerms) {
|
|
52
|
+
const docs = this.invertedIndex.get(term);
|
|
53
|
+
if (!docs) continue;
|
|
54
|
+
|
|
55
|
+
const df = docs.size;
|
|
56
|
+
const idf = Math.log((N - df + 0.5) / (df + 0.5) + 1);
|
|
57
|
+
|
|
58
|
+
for (const i of docs) {
|
|
59
|
+
const chunk = this.chunks[i];
|
|
60
|
+
const tf = chunk._terms.get(term) ?? 0;
|
|
61
|
+
const len = chunk._len;
|
|
62
|
+
const tfBm25 = (tf * (K1 + 1)) / (tf + K1 * (1 - B + B * (len / this.avgLen)));
|
|
63
|
+
scores[i] += idf * tfBm25;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
let maxScore = 0;
|
|
68
|
+
for (let i = 0; i < N; i++) {
|
|
69
|
+
if (scores[i] > maxScore) maxScore = scores[i];
|
|
70
|
+
}
|
|
71
|
+
if (maxScore === 0) return [];
|
|
72
|
+
|
|
73
|
+
const results = [];
|
|
74
|
+
for (let i = 0; i < N; i++) {
|
|
75
|
+
if (scores[i] > 0) {
|
|
76
|
+
const { _terms: _t, _len: _l, ...chunk } = this.chunks[i];
|
|
77
|
+
results.push({ ...chunk, score: scores[i] / maxScore });
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
results.sort((a, b) => b.score - a.score);
|
|
82
|
+
return results.slice(0, limit);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
size() {
|
|
86
|
+
return this.chunks.length;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import type { ChunkStore } from '../chunkStore.js';
|
|
2
|
+
import type { DocChunk, RetrievedChunk } from '../types.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* In-memory BM25 chunk store.
|
|
6
|
+
*
|
|
7
|
+
* Implements Okapi BM25 full-text ranking — no external dependencies.
|
|
8
|
+
* Suitable for small to medium documentation sets (< ~50k chunks).
|
|
9
|
+
*
|
|
10
|
+
* BM25 parameters:
|
|
11
|
+
* k1 = 1.5 (term frequency saturation — prevents long docs dominating)
|
|
12
|
+
* b = 0.75 (document-length normalization)
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const K1 = 1.5;
|
|
16
|
+
const B = 0.75;
|
|
17
|
+
|
|
18
|
+
type IndexedChunk = DocChunk & {
|
|
19
|
+
/** Term → raw frequency within this chunk's searchable text */
|
|
20
|
+
_terms: Map<string, number>;
|
|
21
|
+
/** Total token count (used for length normalization) */
|
|
22
|
+
_len: number;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
function tokenize(text: string): string[] {
|
|
26
|
+
return text
|
|
27
|
+
.toLowerCase()
|
|
28
|
+
.replace(/[^\w\s]/g, ' ')
|
|
29
|
+
.split(/\s+/)
|
|
30
|
+
.filter(t => t.length > 1);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export class MemoryStore implements ChunkStore {
|
|
34
|
+
private chunks: IndexedChunk[] = [];
|
|
35
|
+
/** term → set of chunk-array indices containing that term */
|
|
36
|
+
private invertedIndex = new Map<string, Set<number>>();
|
|
37
|
+
private avgLen = 0;
|
|
38
|
+
|
|
39
|
+
async index(chunks: DocChunk[]): Promise<void> {
|
|
40
|
+
this.invertedIndex = new Map();
|
|
41
|
+
this.chunks = chunks.map((chunk, i) => {
|
|
42
|
+
// Title and heading carry more signal — include them in the token bag
|
|
43
|
+
const tokens = tokenize(
|
|
44
|
+
`${chunk.pageTitle} ${chunk.pageTitle} ${chunk.sectionHeading} ${chunk.sectionHeading} ${chunk.content}`,
|
|
45
|
+
);
|
|
46
|
+
const terms = new Map<string, number>();
|
|
47
|
+
for (const t of tokens) {
|
|
48
|
+
terms.set(t, (terms.get(t) ?? 0) + 1);
|
|
49
|
+
}
|
|
50
|
+
for (const t of terms.keys()) {
|
|
51
|
+
if (!this.invertedIndex.has(t)) {
|
|
52
|
+
this.invertedIndex.set(t, new Set());
|
|
53
|
+
}
|
|
54
|
+
this.invertedIndex.get(t)!.add(i);
|
|
55
|
+
}
|
|
56
|
+
return { ...chunk, _terms: terms, _len: tokens.length };
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
const totalLen = this.chunks.reduce((s, c) => s + c._len, 0);
|
|
60
|
+
this.avgLen = this.chunks.length > 0 ? totalLen / this.chunks.length : 1;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async search(query: string, limit = 8): Promise<RetrievedChunk[]> {
|
|
64
|
+
if (this.chunks.length === 0) return [];
|
|
65
|
+
|
|
66
|
+
const queryTerms = tokenize(query);
|
|
67
|
+
if (queryTerms.length === 0) return [];
|
|
68
|
+
|
|
69
|
+
const N = this.chunks.length;
|
|
70
|
+
const scores = new Float64Array(N);
|
|
71
|
+
|
|
72
|
+
for (const term of queryTerms) {
|
|
73
|
+
const docs = this.invertedIndex.get(term);
|
|
74
|
+
if (!docs) continue;
|
|
75
|
+
|
|
76
|
+
// IDF: Robertson–Sparck Jones formula with smoothing
|
|
77
|
+
const df = docs.size;
|
|
78
|
+
const idf = Math.log((N - df + 0.5) / (df + 0.5) + 1);
|
|
79
|
+
|
|
80
|
+
for (const i of docs) {
|
|
81
|
+
const chunk = this.chunks[i];
|
|
82
|
+
const tf = chunk._terms.get(term) ?? 0;
|
|
83
|
+
const len = chunk._len;
|
|
84
|
+
// BM25 TF with length normalization
|
|
85
|
+
const tfBm25 = (tf * (K1 + 1)) / (tf + K1 * (1 - B + B * (len / this.avgLen)));
|
|
86
|
+
scores[i] += idf * tfBm25;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Collect and normalize scores
|
|
91
|
+
let maxScore = 0;
|
|
92
|
+
for (let i = 0; i < N; i++) {
|
|
93
|
+
if (scores[i] > maxScore) maxScore = scores[i];
|
|
94
|
+
}
|
|
95
|
+
if (maxScore === 0) return [];
|
|
96
|
+
|
|
97
|
+
const results: RetrievedChunk[] = [];
|
|
98
|
+
for (let i = 0; i < N; i++) {
|
|
99
|
+
if (scores[i] > 0) {
|
|
100
|
+
const { _terms: _t, _len: _l, ...chunk } = this.chunks[i];
|
|
101
|
+
results.push({ ...chunk, score: scores[i] / maxScore });
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
results.sort((a, b) => b.score - a.score);
|
|
106
|
+
return results.slice(0, limit);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
size(): number {
|
|
110
|
+
return this.chunks.length;
|
|
111
|
+
}
|
|
112
|
+
}
|