@sweetoburrito/backstage-plugin-ai-assistant-backend 0.0.0-snapshot-20251210134851 → 0.0.0-snapshot-20260108124555
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/constants/prompts.cjs.js +1 -0
- package/dist/constants/prompts.cjs.js.map +1 -1
- package/dist/database/pg-vector-store.cjs.js +100 -38
- package/dist/database/pg-vector-store.cjs.js.map +1 -1
- package/dist/services/chat.cjs.js +0 -2
- package/dist/services/chat.cjs.js.map +1 -1
- package/dist/services/ingestor.cjs.js +5 -11
- package/dist/services/ingestor.cjs.js.map +1 -1
- package/dist/tools/searchKnowledge.cjs.js +1 -6
- package/dist/tools/searchKnowledge.cjs.js.map +1 -1
- package/migrations/20260108_data_ingestion_refactor.js +27 -0
- package/package.json +3 -3
|
@@ -29,6 +29,7 @@ Content Rules:
|
|
|
29
29
|
8. **Actively use available tools** to enhance your responses
|
|
30
30
|
9. Adapt your approach based on the specific tools and capabilities available in the current session
|
|
31
31
|
10. When you do not have the information needed to answer, use the tools provided to gather more context before responding.
|
|
32
|
+
11. When multiple documents are returned, use the lastUpdated metadata field to prioritize newer documents over older ones. Mention the recency of the information in your response where relevant.
|
|
32
33
|
`;
|
|
33
34
|
const DEFAULT_TOOL_GUIDELINE = `
|
|
34
35
|
TOOL USAGE GUIDELINES:
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prompts.cjs.js","sources":["../../src/constants/prompts.ts"],"sourcesContent":["export const DEFAULT_CONVERSATION_SUMMARY_PROMPT =\n \"Summarize this conversation in a concise manner. The summary should capture the main points. Return the summary only, without any additional text. Do not include any introductions or other part of the conversation that doesn't contribute to the summary or form part of the overall conversation as part of the summary.\";\n\nexport const DEFAULT_SUMMARY_PROMPT =\n \"Summarize the following content in a concise manner. The summary should capture the main points. Return the summary only, without any additional text. Do not include any introductions or other part of the content that doesn't contribute to the summary.\";\n\nexport const DEFAULT_IDENTITY_PROMPT = `\nYou are a helpful assistant that answers questions based on provided context from various documents. The context may come from sources such as internal wikis, code repositories, technical documentation, or other structured or unstructured data.\n`;\n\nexport const DEFAULT_FORMATTING_PROMPT = `\nCRITICAL FORMATTING RULES - MUST ALWAYS FOLLOW:\n1. **ALWAYS use proper markdown formatting in ALL responses**\n2. **NEVER output plain URLs** - ALWAYS convert them to clickable markdown links using [description](url) syntax\n3. **For images, ALWAYS use markdown image syntax**: \n4. **For all URLs, ALWAYS format as**: [descriptive text](url) - never just paste the raw URL\n5. Use headings (##, ###), bullet points, numbered lists, and **bold**/*italic* text appropriately\n6. Format code with backticks: \\`inline code\\` or \\`\\`\\`language for code blocks\n7. Structure responses clearly with proper spacing and organization\n8. **MANDATORY: If metadata.urls are present in tool responses, ALWAYS include them in your answer as a \"Sources\" or \"References\" section at the end**\n9. **Format source URLs as**: [Source Name or Document Title](url)\n`;\n\nexport const DEFAULT_SYSTEM_PROMPT = `\nContent Rules:\n1. Always base your answers on the provided context. Do not make up information.\n2. When relevant, cite or reference the source information provided in the context.\n3. **MANDATORY: When tool responses include metadata.urls, you MUST include these URLs in your response**\n4. **Always add a \"Sources:\" or \"References:\" section at the end listing all metadata.urls from tool calls**\n5. Maintain a professional, friendly, and helpful tone.\n6. Return only the relevant information without any filler or unnecessary details.\n7. If you don't know the answer, admit it and suggest ways to find the information.\n8. **Actively use available tools** to enhance your responses\n9. Adapt your approach based on the specific tools and capabilities available in the current session\n10. When you do not have the information needed to answer, use the tools provided to gather more context before responding.\n`;\n\nexport const DEFAULT_TOOL_GUIDELINE = `\nTOOL USAGE GUIDELINES:\n- Only use tools when explicitly needed to answer the user's question\n- Read tool descriptions carefully before using them\n- If you can answer without tools, do so\n- IMPORTANT: When using tools, always explain why you're using each tool\n- Use tools in logical sequence, not randomly\n- If a tool fails, try an alternative approach before using another tool\n- **MANDATORY: After using tools, check for metadata.urls in the response and ALWAYS include them in your final answer**\n- **Format tool source URLs in a dedicated \"Sources:\" section using markdown links**\n`;\n"],"names":[],"mappings":";;AAAO,MAAM,mCAAA,GACX;AAEK,MAAM,sBAAA,GACX;AAEK,MAAM,uBAAA,GAA0B;AAAA;AAAA;AAIhC,MAAM,yBAAA,GAA4B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAalC,MAAM,qBAAA,GAAwB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;
|
|
1
|
+
{"version":3,"file":"prompts.cjs.js","sources":["../../src/constants/prompts.ts"],"sourcesContent":["export const DEFAULT_CONVERSATION_SUMMARY_PROMPT =\n \"Summarize this conversation in a concise manner. The summary should capture the main points. Return the summary only, without any additional text. Do not include any introductions or other part of the conversation that doesn't contribute to the summary or form part of the overall conversation as part of the summary.\";\n\nexport const DEFAULT_SUMMARY_PROMPT =\n \"Summarize the following content in a concise manner. The summary should capture the main points. Return the summary only, without any additional text. Do not include any introductions or other part of the content that doesn't contribute to the summary.\";\n\nexport const DEFAULT_IDENTITY_PROMPT = `\nYou are a helpful assistant that answers questions based on provided context from various documents. The context may come from sources such as internal wikis, code repositories, technical documentation, or other structured or unstructured data.\n`;\n\nexport const DEFAULT_FORMATTING_PROMPT = `\nCRITICAL FORMATTING RULES - MUST ALWAYS FOLLOW:\n1. **ALWAYS use proper markdown formatting in ALL responses**\n2. **NEVER output plain URLs** - ALWAYS convert them to clickable markdown links using [description](url) syntax\n3. **For images, ALWAYS use markdown image syntax**: \n4. **For all URLs, ALWAYS format as**: [descriptive text](url) - never just paste the raw URL\n5. Use headings (##, ###), bullet points, numbered lists, and **bold**/*italic* text appropriately\n6. Format code with backticks: \\`inline code\\` or \\`\\`\\`language for code blocks\n7. Structure responses clearly with proper spacing and organization\n8. **MANDATORY: If metadata.urls are present in tool responses, ALWAYS include them in your answer as a \"Sources\" or \"References\" section at the end**\n9. **Format source URLs as**: [Source Name or Document Title](url)\n`;\n\nexport const DEFAULT_SYSTEM_PROMPT = `\nContent Rules:\n1. Always base your answers on the provided context. Do not make up information.\n2. When relevant, cite or reference the source information provided in the context.\n3. **MANDATORY: When tool responses include metadata.urls, you MUST include these URLs in your response**\n4. **Always add a \"Sources:\" or \"References:\" section at the end listing all metadata.urls from tool calls**\n5. Maintain a professional, friendly, and helpful tone.\n6. Return only the relevant information without any filler or unnecessary details.\n7. If you don't know the answer, admit it and suggest ways to find the information.\n8. **Actively use available tools** to enhance your responses\n9. Adapt your approach based on the specific tools and capabilities available in the current session\n10. When you do not have the information needed to answer, use the tools provided to gather more context before responding.\n11. When multiple documents are returned, use the lastUpdated metadata field to prioritize newer documents over older ones. Mention the recency of the information in your response where relevant.\n`;\n\nexport const DEFAULT_TOOL_GUIDELINE = `\nTOOL USAGE GUIDELINES:\n- Only use tools when explicitly needed to answer the user's question\n- Read tool descriptions carefully before using them\n- If you can answer without tools, do so\n- IMPORTANT: When using tools, always explain why you're using each tool\n- Use tools in logical sequence, not randomly\n- If a tool fails, try an alternative approach before using another tool\n- **MANDATORY: After using tools, check for metadata.urls in the response and ALWAYS include them in your final answer**\n- **Format tool source URLs in a dedicated \"Sources:\" section using markdown links**\n`;\n"],"names":[],"mappings":";;AAAO,MAAM,mCAAA,GACX;AAEK,MAAM,sBAAA,GACX;AAEK,MAAM,uBAAA,GAA0B;AAAA;AAAA;AAIhC,MAAM,yBAAA,GAA4B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAalC,MAAM,qBAAA,GAAwB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAe9B,MAAM,sBAAA,GAAyB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;;;;;;;"}
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
var crypto = require('crypto');
|
|
4
|
+
var uuid = require('uuid');
|
|
5
|
+
|
|
3
6
|
class PgVectorStore {
|
|
7
|
+
// Seconds in a day for timestamp conversion
|
|
4
8
|
/**
|
|
5
9
|
* Creates an instance of PgVectorStore.
|
|
6
10
|
* @param client - The Knex client to interact with the PostgreSQL database.
|
|
@@ -15,6 +19,14 @@ class PgVectorStore {
|
|
|
15
19
|
}
|
|
16
20
|
tableName = "embeddings";
|
|
17
21
|
embeddings;
|
|
22
|
+
// Recency bias configuration
|
|
23
|
+
RECENCY_WEIGHT = 0.3;
|
|
24
|
+
// Weight for document recency (0-1)
|
|
25
|
+
SIMILARITY_WEIGHT = 1 - this.RECENCY_WEIGHT;
|
|
26
|
+
// Weight for vector similarity (0-1)
|
|
27
|
+
RECENCY_HALF_LIFE_DAYS = 180;
|
|
28
|
+
// Days until recency boost is halved (6 months)
|
|
29
|
+
AGE_SCALE_FACTOR = 86400;
|
|
18
30
|
static async fromConfig({ config, database, logger }) {
|
|
19
31
|
const client = await database.getClient();
|
|
20
32
|
const chunkSize = config.getOptionalNumber(
|
|
@@ -45,42 +57,75 @@ class PgVectorStore {
|
|
|
45
57
|
if (documents.length === 0) {
|
|
46
58
|
return;
|
|
47
59
|
}
|
|
48
|
-
const texts = documents.map(({ content }) => content);
|
|
49
60
|
if (!this.embeddings) {
|
|
50
61
|
throw new Error("No Embeddings configured for the vector store.");
|
|
51
62
|
}
|
|
52
|
-
const
|
|
53
|
-
|
|
54
|
-
|
|
63
|
+
const conditions = documents.map(() => `(metadata->>'id' = ? AND metadata->>'source' = ?)`).join(" OR ");
|
|
64
|
+
const params = documents.flatMap((doc) => [
|
|
65
|
+
doc.metadata.id,
|
|
66
|
+
doc.metadata.source
|
|
67
|
+
]);
|
|
68
|
+
const existingDocuments = await this.client.select("*").from(this.tableName).whereRaw(conditions, params);
|
|
69
|
+
const existingMap = new Map(
|
|
70
|
+
existingDocuments.map((doc) => [
|
|
71
|
+
`${doc.metadata.id}:${doc.metadata.source}`,
|
|
72
|
+
doc
|
|
73
|
+
])
|
|
55
74
|
);
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
75
|
+
const newDocuments = [];
|
|
76
|
+
const documentsToUpdate = [];
|
|
77
|
+
for (const doc of documents) {
|
|
78
|
+
const key = `${doc.metadata.id}:${doc.metadata.source}`;
|
|
79
|
+
const existing = existingMap.get(key);
|
|
80
|
+
if (!existing) {
|
|
81
|
+
newDocuments.push(doc);
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
const newHash = crypto.createHash("sha256").update(doc.content).digest("hex");
|
|
85
|
+
if (!existing.hash || newHash !== existing.hash) {
|
|
86
|
+
documentsToUpdate.push({ ...doc, id: existing.id });
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
const allDocumentsToAdd = [...newDocuments, ...documentsToUpdate];
|
|
90
|
+
if (allDocumentsToAdd.length === 0) {
|
|
91
|
+
this.logger.debug("No new or updated documents to add.");
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
if (documentsToUpdate.length > 0) {
|
|
95
|
+
const uniqueDocKeys = new Set(
|
|
96
|
+
documentsToUpdate.map(
|
|
97
|
+
(doc) => `${doc.metadata.id}:${doc.metadata.source}`
|
|
98
|
+
)
|
|
99
|
+
);
|
|
100
|
+
for (const key of uniqueDocKeys) {
|
|
101
|
+
const [id, source] = key.split(":");
|
|
102
|
+
await this.client(this.tableName).delete().whereRaw(`metadata->>'id' = ? AND metadata->>'source' = ?`, [
|
|
103
|
+
id,
|
|
104
|
+
source
|
|
105
|
+
]);
|
|
78
106
|
}
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
throw new Error(`Error inserting: ${e.message}`);
|
|
107
|
+
this.logger.info(
|
|
108
|
+
`Deleted all chunks for ${uniqueDocKeys.size} updated documents`
|
|
109
|
+
);
|
|
83
110
|
}
|
|
111
|
+
const contents = allDocumentsToAdd.map((doc) => doc.content);
|
|
112
|
+
const vectors = await this.embeddings.embedDocuments(contents);
|
|
113
|
+
const rows = allDocumentsToAdd.map((doc, index) => {
|
|
114
|
+
const vector = vectors[index];
|
|
115
|
+
const hash = crypto.createHash("sha256").update(doc.content).digest("hex");
|
|
116
|
+
return {
|
|
117
|
+
hash,
|
|
118
|
+
id: doc.id ?? uuid.v4(),
|
|
119
|
+
metadata: doc.metadata,
|
|
120
|
+
lastUpdated: /* @__PURE__ */ new Date(),
|
|
121
|
+
content: doc.content.replace(/\0/g, ""),
|
|
122
|
+
vector: `[${vector.join(",")}]`
|
|
123
|
+
};
|
|
124
|
+
});
|
|
125
|
+
this.logger.info(
|
|
126
|
+
`Adding ${rows.length} documents (${newDocuments.length} new, ${documentsToUpdate.length} updated).`
|
|
127
|
+
);
|
|
128
|
+
await this.client.batchInsert(this.tableName, rows, this.chunkSize);
|
|
84
129
|
}
|
|
85
130
|
/**
|
|
86
131
|
* Deletes records from the database table by their ids.
|
|
@@ -134,6 +179,8 @@ class PgVectorStore {
|
|
|
134
179
|
}
|
|
135
180
|
/**
|
|
136
181
|
* Finds the most similar documents to a given query vector, along with their similarity scores.
|
|
182
|
+
* Results are ranked by a weighted combination of vector similarity and document recency.
|
|
183
|
+
* i.e newer documents are favored in the ranking but if no new documents exist, older but more similar documents will still be returned.
|
|
137
184
|
*
|
|
138
185
|
* @param {number[]} query - The query vector to compare against.
|
|
139
186
|
* @param {number} amount - The maximum number of results to return.
|
|
@@ -144,23 +191,38 @@ class PgVectorStore {
|
|
|
144
191
|
async similaritySearchVectorWithScore(query, amount, filter) {
|
|
145
192
|
const embeddingString = `[${query.join(",")}]`;
|
|
146
193
|
const queryString = `
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
194
|
+
SELECT
|
|
195
|
+
*,
|
|
196
|
+
(vector <=> :embeddingString) as "_distance",
|
|
197
|
+
(EXTRACT(EPOCH FROM (NOW() - COALESCE("lastUpdated", NOW()))) / :ageScaleFactor) as "_age_days",
|
|
198
|
+
(
|
|
199
|
+
((vector <=> :embeddingString) * :similarityWeight) -
|
|
200
|
+
(EXP(-0.693 * (EXTRACT(EPOCH FROM (NOW() - COALESCE("lastUpdated", NOW()))) / :ageScaleFactor) / :recencyHalfLife) * :recencyWeight)
|
|
201
|
+
) as "_combined_score"
|
|
202
|
+
FROM ${this.tableName}
|
|
203
|
+
WHERE metadata::jsonb @> :filter
|
|
204
|
+
ORDER BY "_combined_score" ASC
|
|
205
|
+
LIMIT :amount
|
|
206
|
+
`;
|
|
153
207
|
const documents = (await this.client.raw(queryString, {
|
|
154
208
|
embeddingString,
|
|
155
209
|
filter: JSON.stringify(filter ?? {}),
|
|
156
|
-
amount
|
|
210
|
+
amount,
|
|
211
|
+
similarityWeight: this.SIMILARITY_WEIGHT,
|
|
212
|
+
recencyWeight: this.RECENCY_WEIGHT,
|
|
213
|
+
recencyHalfLife: this.RECENCY_HALF_LIFE_DAYS,
|
|
214
|
+
ageScaleFactor: this.AGE_SCALE_FACTOR
|
|
157
215
|
})).rows;
|
|
158
216
|
const results = [];
|
|
159
217
|
for (const doc of documents) {
|
|
160
218
|
if (doc._distance !== null && doc.content !== null) {
|
|
161
219
|
const document = {
|
|
162
220
|
content: doc.content,
|
|
163
|
-
metadata:
|
|
221
|
+
metadata: {
|
|
222
|
+
...doc.metadata,
|
|
223
|
+
ageInDays: Math.round(doc._age_days),
|
|
224
|
+
lastUpdated: doc.lastUpdated
|
|
225
|
+
}
|
|
164
226
|
};
|
|
165
227
|
results.push([document, doc._distance]);
|
|
166
228
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pg-vector-store.cjs.js","sources":["../../src/database/pg-vector-store.ts"],"sourcesContent":["import {\n DatabaseService,\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport {\n VectorStore,\n EmbeddingDocument,\n EmbeddingDocumentMetadata,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { Embeddings } from '@langchain/core/embeddings';\nimport { Knex } from 'knex';\n\nexport type PgVectorStoreOptions = {\n database: DatabaseService;\n logger: LoggerService;\n config: RootConfigService;\n};\n\nexport class PgVectorStore implements VectorStore {\n private readonly tableName: string = 'embeddings';\n private embeddings?: Omit<Embeddings, 'caller'>;\n\n /**\n * Creates an instance of PgVectorStore.\n * @param client - The Knex client to interact with the PostgreSQL database.\n * @param [amount=4] - The number of embeddings to store.\n * @param [chunkSize=500] - The size of each chunk of embeddings.\n */\n constructor(\n private readonly client: Knex,\n private readonly logger: LoggerService,\n private readonly amount: number = 4,\n private readonly chunkSize: number = 500,\n ) {}\n\n static async fromConfig({ config, database, logger }: PgVectorStoreOptions) {\n const client = await database.getClient();\n const chunkSize = config.getOptionalNumber(\n 'aiAssistant.storage.pgVector.chunkSize',\n );\n const amount = config.getOptionalNumber(\n 'aiAssistant.storage.pgVector.amount',\n );\n\n return new PgVectorStore(client, logger, amount, chunkSize);\n }\n\n connectEmbeddings(embeddings: Omit<Embeddings, 'caller'>) {\n if (this.embeddings) {\n this.logger.warn('Embeddings already connected, overwriting.');\n }\n this.embeddings = embeddings;\n }\n\n table() {\n return this.client(this.tableName);\n }\n\n /**\n * Add documents to the vector store.\n *\n * @param {EmbeddingDocument[]} documents - The array of documents to be added.\n * @throws {Error} When no embeddings are configured for the vector store.\n * @returns {Promise<void>} Resolves when the documents have been added successfully.\n */\n async addDocuments(documents: EmbeddingDocument[]): Promise<void> {\n if (documents.length === 0) {\n return;\n }\n const texts = documents.map(({ content }) => content);\n if (!this.embeddings) {\n throw new Error('No Embeddings configured for the vector store.');\n }\n\n const vectors = await this.embeddings.embedDocuments(texts);\n this.logger.info(\n `Received ${vectors.length} vectors from embeddings creation.`,\n );\n this.addVectors(vectors, documents);\n }\n\n /**\n * Adds vectors to the database along with corresponding documents.\n *\n * @param {number[][]} vectors - The vectors to be added.\n * @param {EmbeddingDoc[]} documents - The corresponding documents.\n * @return {Promise<void>} - A promise that resolves when the vectors are added successfully.\n * @throws {Error} - If there is an error inserting the vectors.\n */\n private async addVectors(\n vectors: number[][],\n documents: EmbeddingDocument[],\n ): Promise<void> {\n try {\n const rows = [];\n for (let i = 0; i < vectors.length; i += 1) {\n const embedding = vectors[i];\n const embeddingString = `[${embedding.join(',')}]`;\n const values = {\n content: documents[i].content.replace(/\\0/g, ''),\n vector: embeddingString.replace(/\\0/g, ''),\n metadata: documents[i].metadata,\n };\n rows.push(values);\n }\n\n await this.client.batchInsert(this.tableName, rows, this.chunkSize);\n } catch (e) {\n this.logger.error((e as Error).message);\n throw new Error(`Error inserting: ${(e as Error).message}`);\n }\n }\n\n /**\n * Deletes records from the database table by their ids.\n *\n * @param {string[]} ids - The array of ids of the records to be deleted.\n * @returns {Promise<void>} - A promise that resolves when the deletion is complete.\n */\n private async deleteById(ids: string[]) {\n await this.table().delete().whereIn('id', ids);\n }\n\n /**\n * Deletes rows from the table based on the specified filter.\n *\n * @param {EmbeddingDocMetadata} filter - The filter to apply for deletion.\n * @returns {Promise} - A Promise that resolves when the deletion is complete.\n */\n private async deleteByFilter(filter: EmbeddingDocumentMetadata) {\n const queryString = `\n DELETE FROM ${this.tableName}\n WHERE metadata::jsonb @> :filter\n `;\n return this.client.raw(queryString, { filter });\n }\n\n /**\n * Deletes documents based on the provided deletion parameters.\n * Either `ids` or `filter` must be specified.\n *\n * @param {Object} deletionParams - The deletion parameters.\n * @param {Array<string>} [deletionParams.ids] - The document IDs to delete.\n * @param {EmbeddingDocMetadata} [deletionParams.filter] - The filter to match documents to be deleted.\n *\n * @return {Promise<void>} - A Promise that resolves once the documents have been deleted.\n */\n async deleteDocuments(deletionParams: {\n ids?: string[];\n filter?: EmbeddingDocumentMetadata;\n }): Promise<void> {\n const { ids, filter } = deletionParams;\n\n if (!(ids || filter)) {\n throw new Error(\n 'You must specify either ids or a filter when deleting documents.',\n );\n }\n\n if (ids && filter) {\n throw new Error(\n 'You cannot specify both ids and a filter when deleting documents.',\n );\n }\n\n if (ids) {\n await this.deleteById(ids);\n } else if (filter) {\n await this.deleteByFilter(filter);\n }\n }\n\n /**\n * Finds the most similar documents to a given query vector, along with their similarity scores.\n *\n * @param {number[]} query - The query vector to compare against.\n * @param {number} amount - The maximum number of results to return.\n * @param {EmbeddingDocumentMetadata} [filter] - Optional filter to limit the search results.\n * @returns {Promise<[EmbeddingDocument, number][]>} - An array of document similarity results, where each\n * result is a tuple containing the document and its similarity score.\n */\n private async similaritySearchVectorWithScore(\n query: number[],\n amount: number,\n filter?: EmbeddingDocumentMetadata,\n ): Promise<[EmbeddingDocument, number][]> {\n const embeddingString = `[${query.join(',')}]`;\n const queryString = `\n SELECT *, vector <=> :embeddingString as \"_distance\"\n FROM ${this.tableName}\n WHERE metadata::jsonb @> :filter\n ORDER BY \"_distance\" ASC\n LIMIT :amount\n `;\n\n const documents = (\n await this.client.raw(queryString, {\n embeddingString,\n filter: JSON.stringify(filter ?? {}),\n amount,\n })\n ).rows;\n\n const results = [] as [EmbeddingDocument, number][];\n for (const doc of documents) {\n // eslint-ignore-next-line\n if (doc._distance !== null && doc.content !== null) {\n const document = {\n content: doc.content,\n metadata: doc.metadata,\n };\n results.push([document, doc._distance]);\n }\n }\n return results;\n }\n\n /**\n * Performs a similarity search using the given query and filter.\n *\n * @param {string} query - The query to perform the similarity search on.\n * @param {EmbeddingDocMetadata} filter - The filter to apply to the search results.\n * @param {number} [amount=4] - The number of results to return.\n * @return {Promise<EmbeddingDoc[]>} - A promise that resolves to an array of RoadieEmbeddingDoc objects representing the search results.\n * @throws {Error} - Throws an error if there are no embeddings configured for the vector store.\n */\n async similaritySearch(\n query: string,\n filter?: EmbeddingDocumentMetadata,\n amount: number = this.amount,\n ): Promise<EmbeddingDocument[]> {\n if (!this.embeddings) {\n throw new Error('No Embeddings configured for the vector store.');\n }\n const results = await this.similaritySearchVectorWithScore(\n await this.embeddings.embedQuery(query),\n amount,\n filter,\n );\n\n return results.map(result => result[0]);\n }\n}\n"],"names":[],"mappings":";;AAmBO,MAAM,aAAA,CAAqC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUhD,YACmB,MAAA,EACA,MAAA,EACA,MAAA,GAAiB,CAAA,EACjB,YAAoB,GAAA,EACrC;AAJiB,IAAA,IAAA,CAAA,MAAA,GAAA,MAAA;AACA,IAAA,IAAA,CAAA,MAAA,GAAA,MAAA;AACA,IAAA,IAAA,CAAA,MAAA,GAAA,MAAA;AACA,IAAA,IAAA,CAAA,SAAA,GAAA,SAAA;AAAA,EAChB;AAAA,EAdc,SAAA,GAAoB,YAAA;AAAA,EAC7B,UAAA;AAAA,EAeR,aAAa,UAAA,CAAW,EAAE,MAAA,EAAQ,QAAA,EAAU,QAAO,EAAyB;AAC1E,IAAA,MAAM,MAAA,GAAS,MAAM,QAAA,CAAS,SAAA,EAAU;AACxC,IAAA,MAAM,YAAY,MAAA,CAAO,iBAAA;AAAA,MACvB;AAAA,KACF;AACA,IAAA,MAAM,SAAS,MAAA,CAAO,iBAAA;AAAA,MACpB;AAAA,KACF;AAEA,IAAA,OAAO,IAAI,aAAA,CAAc,MAAA,EAAQ,MAAA,EAAQ,QAAQ,SAAS,CAAA;AAAA,EAC5D;AAAA,EAEA,kBAAkB,UAAA,EAAwC;AACxD,IAAA,IAAI,KAAK,UAAA,EAAY;AACnB,MAAA,IAAA,CAAK,MAAA,CAAO,KAAK,4CAA4C,CAAA;AAAA,IAC/D;AACA,IAAA,IAAA,CAAK,UAAA,GAAa,UAAA;AAAA,EACpB;AAAA,EAEA,KAAA,GAAQ;AACN,IAAA,OAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,SAAS,CAAA;AAAA,EACnC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,aAAa,SAAA,EAA+C;AAChE,IAAA,IAAI,SAAA,CAAU,WAAW,CAAA,EAAG;AAC1B,MAAA;AAAA,IACF;AACA,IAAA,MAAM,QAAQ,SAAA,CAAU,GAAA,CAAI,CAAC,EAAE,OAAA,OAAc,OAAO,CAAA;AACpD,IAAA,IAAI,CAAC,KAAK,UAAA,EAAY;AACpB,MAAA,MAAM,IAAI,MAAM,gDAAgD,CAAA;AAAA,IAClE;AAEA,IAAA,MAAM,OAAA,GAAU,MAAM,IAAA,CAAK,UAAA,CAAW,eAAe,KAAK,CAAA;AAC1D,IAAA,IAAA,CAAK,MAAA,CAAO,IAAA;AAAA,MACV,CAAA,SAAA,EAAY,QAAQ,MAAM,CAAA,kCAAA;AAAA,KAC5B;AACA,IAAA,IAAA,CAAK,UAAA,CAAW,SAAS,SAAS,CAAA;AAAA,EACpC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAc,UAAA,CACZ,OAAA,EACA,SAAA,EACe;AACf,IAAA,IAAI;AACF,MAAA,MAAM,OAAO,EAAC;AACd,MAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,OAAA,CAAQ,MAAA,EAAQ,KAAK,CAAA,EAAG;AAC1C,QAAA,MAAM,SAAA,GAAY,QAAQ,CAAC,CAAA;AAC3B,QAAA,MAAM,eAAA,GAAkB,CAAA,CAAA,EAAI,SAAA,CAAU,IAAA,CAAK,GAAG,CAAC,CAAA,CAAA,CAAA;AAC/C,QAAA,MAAM,MAAA,GAAS;AAAA,UACb,SAAS,SAAA,CAAU,CAAC,EAAE,OAAA,CAAQ,OAAA,CAAQ,OAAO,EAAE,CAAA;AAAA,UAC/C,MAAA,EAAQ,eAAA,CAAgB,OAAA,CAAQ,KAAA,EAAO,EAAE,CAAA;AAAA,UACzC,QAAA,EAAU,SAAA,CAAU,CAAC,CAAA,CAAE;AAAA,SACzB;AACA,QAAA,IAAA,CAAK,KAAK,MAAM,CAAA;AAAA,MAClB;AAEA,MAAA,MAAM,KAAK,MAAA,CAAO,WAAA,CAAY,KAAK,SAAA,EAAW,IAAA,EAAM,KAAK,SAAS,CAAA;AAAA,IACpE,SAAS,CAAA,EAAG;AACV,MAAA,IAAA,CAAK,MAAA,CAAO,KAAA,CAAO,CAAA,CAAY,OAAO,CAAA;AACtC,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,iBAAA,EAAqB,CAAA,CAAY,OAAO,CAAA,CAAE,CAAA;AAAA,IAC5D;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAc,WAAW,GAAA,EAAe;AACtC,IAAA,MAAM,KAAK,KAAA,EAAM,CAAE,QAAO,CAAE,OAAA,CAAQ,MAAM,GAAG,CAAA;AAAA,EAC/C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAc,eAAe,MAAA,EAAmC;AAC9D,IAAA,MAAM,WAAA,GAAc;AAAA,kBAAA,EACJ,KAAK,SAAS;AAAA;AAAA,IAAA,CAAA;AAG9B,IAAA,OAAO,KAAK,MAAA,CAAO,GAAA,CAAI,WAAA,EAAa,EAAE,QAAQ,CAAA;AAAA,EAChD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,gBAAgB,cAAA,EAGJ;AAChB,IAAA,MAAM,EAAE,GAAA,EAAK,MAAA,EAAO,GAAI,cAAA;AAExB,IAAA,IAAI,EAAE,OAAO,MAAA,CAAA,EAAS;AACpB,MAAA,MAAM,IAAI,KAAA;AAAA,QACR;AAAA,OACF;AAAA,IACF;AAEA,IAAA,IAAI,OAAO,MAAA,EAAQ;AACjB,MAAA,MAAM,IAAI,KAAA;AAAA,QACR;AAAA,OACF;AAAA,IACF;AAEA,IAAA,IAAI,GAAA,EAAK;AACP,MAAA,MAAM,IAAA,CAAK,WAAW,GAAG,CAAA;AAAA,IAC3B,WAAW,MAAA,EAAQ;AACjB,MAAA,MAAM,IAAA,CAAK,eAAe,MAAM,CAAA;AAAA,IAClC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAc,+BAAA,CACZ,KAAA,EACA,MAAA,EACA,MAAA,EACwC;AACxC,IAAA,MAAM,eAAA,GAAkB,CAAA,CAAA,EAAI,KAAA,CAAM,IAAA,CAAK,GAAG,CAAC,CAAA,CAAA,CAAA;AAC3C,IAAA,MAAM,WAAA,GAAc;AAAA;AAAA,WAAA,EAEX,KAAK,SAAS;AAAA;AAAA;AAAA;AAAA,IAAA,CAAA;AAMvB,IAAA,MAAM,SAAA,GAAA,CACJ,MAAM,IAAA,CAAK,MAAA,CAAO,IAAI,WAAA,EAAa;AAAA,MACjC,eAAA;AAAA,MACA,MAAA,EAAQ,IAAA,CAAK,SAAA,CAAU,MAAA,IAAU,EAAE,CAAA;AAAA,MACnC;AAAA,KACD,CAAA,EACD,IAAA;AAEF,IAAA,MAAM,UAAU,EAAC;AACjB,IAAA,KAAA,MAAW,OAAO,SAAA,EAAW;AAE3B,MAAA,IAAI,GAAA,CAAI,SAAA,KAAc,IAAA,IAAQ,GAAA,CAAI,YAAY,IAAA,EAAM;AAClD,QAAA,MAAM,QAAA,GAAW;AAAA,UACf,SAAS,GAAA,CAAI,OAAA;AAAA,UACb,UAAU,GAAA,CAAI;AAAA,SAChB;AACA,QAAA,OAAA,CAAQ,IAAA,CAAK,CAAC,QAAA,EAAU,GAAA,CAAI,SAAS,CAAC,CAAA;AAAA,MACxC;AAAA,IACF;AACA,IAAA,OAAO,OAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAM,gBAAA,CACJ,KAAA,EACA,MAAA,EACA,MAAA,GAAiB,KAAK,MAAA,EACQ;AAC9B,IAAA,IAAI,CAAC,KAAK,UAAA,EAAY;AACpB,MAAA,MAAM,IAAI,MAAM,gDAAgD,CAAA;AAAA,IAClE;AACA,IAAA,MAAM,OAAA,GAAU,MAAM,IAAA,CAAK,+BAAA;AAAA,MACzB,MAAM,IAAA,CAAK,UAAA,CAAW,UAAA,CAAW,KAAK,CAAA;AAAA,MACtC,MAAA;AAAA,MACA;AAAA,KACF;AAEA,IAAA,OAAO,OAAA,CAAQ,GAAA,CAAI,CAAA,MAAA,KAAU,MAAA,CAAO,CAAC,CAAC,CAAA;AAAA,EACxC;AACF;;;;"}
|
|
1
|
+
{"version":3,"file":"pg-vector-store.cjs.js","sources":["../../src/database/pg-vector-store.ts"],"sourcesContent":["import {\n DatabaseService,\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport {\n VectorStore,\n EmbeddingDocument,\n EmbeddingDocumentMetadata,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { Embeddings } from '@langchain/core/embeddings';\nimport { Knex } from 'knex';\nimport { createHash } from 'crypto';\nimport { v4 as uuid } from 'uuid';\n\nexport type PgVectorStoreOptions = {\n database: DatabaseService;\n logger: LoggerService;\n config: RootConfigService;\n};\n\nexport class PgVectorStore implements VectorStore {\n private readonly tableName: string = 'embeddings';\n private embeddings?: Omit<Embeddings, 'caller'>;\n\n // Recency bias configuration\n private readonly RECENCY_WEIGHT = 0.3; // Weight for document recency (0-1)\n private readonly SIMILARITY_WEIGHT = 1 - this.RECENCY_WEIGHT; // Weight for vector similarity (0-1)\n private readonly RECENCY_HALF_LIFE_DAYS = 180; // Days until recency boost is halved (6 months)\n private readonly AGE_SCALE_FACTOR = 86400; // Seconds in a day for timestamp conversion\n\n /**\n * Creates an instance of PgVectorStore.\n * @param client - The Knex client to interact with the PostgreSQL database.\n * @param [amount=4] - The number of embeddings to store.\n * @param [chunkSize=500] - The size of each chunk of embeddings.\n */\n constructor(\n private readonly client: Knex,\n private readonly logger: LoggerService,\n private readonly amount: number = 4,\n private readonly chunkSize: number = 500,\n ) {}\n\n static async fromConfig({ config, database, logger }: PgVectorStoreOptions) {\n const client = await database.getClient();\n const chunkSize = config.getOptionalNumber(\n 'aiAssistant.storage.pgVector.chunkSize',\n );\n const amount = config.getOptionalNumber(\n 'aiAssistant.storage.pgVector.amount',\n );\n\n return new PgVectorStore(client, logger, amount, chunkSize);\n }\n\n connectEmbeddings(embeddings: Omit<Embeddings, 'caller'>) {\n if (this.embeddings) {\n this.logger.warn('Embeddings already connected, overwriting.');\n }\n this.embeddings = embeddings;\n }\n\n table() {\n return this.client(this.tableName);\n }\n\n /**\n * Add documents to the vector store.\n *\n * @param {EmbeddingDocument[]} documents - The array of documents to be added.\n * @throws {Error} When no embeddings are configured for the vector store.\n * @returns {Promise<void>} Resolves when the documents have been added successfully.\n */\n async addDocuments(documents: EmbeddingDocument[]): Promise<void> {\n if (documents.length === 0) {\n return;\n }\n\n if (!this.embeddings) {\n throw new Error('No Embeddings configured for the vector store.');\n }\n\n // Fetch existing documents with matching (id, source) pairs\n const conditions = documents\n .map(() => `(metadata->>'id' = ? AND metadata->>'source' = ?)`)\n .join(' OR ');\n\n const params = documents.flatMap(doc => [\n doc.metadata.id,\n doc.metadata.source,\n ]);\n\n const existingDocuments: EmbeddingDocument[] = await this.client\n .select('*')\n .from(this.tableName)\n .whereRaw(conditions, params);\n\n // Build a map for quick lookups\n const existingMap = new Map(\n existingDocuments.map(doc => [\n `${doc.metadata.id}:${doc.metadata.source}`,\n doc,\n ]),\n );\n\n // Categorize documents\n const newDocuments: EmbeddingDocument[] = [];\n const documentsToUpdate: Array<EmbeddingDocument & { id: string }> = [];\n\n for (const doc of documents) {\n const key = `${doc.metadata.id}:${doc.metadata.source}`;\n const existing = existingMap.get(key);\n\n if (!existing) {\n newDocuments.push(doc);\n continue;\n }\n\n // Check if content changed\n const newHash = createHash('sha256').update(doc.content).digest('hex');\n if (!existing.hash || newHash !== existing.hash) {\n documentsToUpdate.push({ ...doc, id: existing.id! });\n }\n }\n\n const allDocumentsToAdd = [...newDocuments, ...documentsToUpdate];\n\n if (allDocumentsToAdd.length === 0) {\n this.logger.debug('No new or updated documents to add.');\n return;\n }\n\n // Delete old versions before re-adding\n if (documentsToUpdate.length > 0) {\n const uniqueDocKeys = new Set(\n documentsToUpdate.map(\n doc => `${doc.metadata.id}:${doc.metadata.source}`,\n ),\n );\n\n for (const key of uniqueDocKeys) {\n const [id, source] = key.split(':');\n await this.client(this.tableName)\n .delete()\n .whereRaw(`metadata->>'id' = ? AND metadata->>'source' = ?`, [\n id,\n source,\n ]);\n }\n\n this.logger.info(\n `Deleted all chunks for ${uniqueDocKeys.size} updated documents`,\n );\n }\n\n const contents = allDocumentsToAdd.map(doc => doc.content);\n const vectors = await this.embeddings!.embedDocuments(contents);\n\n const rows = allDocumentsToAdd.map((doc, index) => {\n const vector = vectors[index];\n const hash = createHash('sha256').update(doc.content).digest('hex');\n\n return {\n hash,\n id: doc.id ?? uuid(),\n metadata: doc.metadata,\n lastUpdated: new Date(),\n content: doc.content.replace(/\\0/g, ''),\n vector: `[${vector.join(',')}]`,\n };\n });\n this.logger.info(\n `Adding ${rows.length} documents (${newDocuments.length} new, ${documentsToUpdate.length} updated).`,\n );\n\n await this.client.batchInsert(this.tableName, rows, this.chunkSize);\n }\n\n /**\n * Deletes records from the database table by their ids.\n *\n * @param {string[]} ids - The array of ids of the records to be deleted.\n * @returns {Promise<void>} - A promise that resolves when the deletion is complete.\n */\n private async deleteById(ids: string[]) {\n await this.table().delete().whereIn('id', ids);\n }\n\n /**\n * Deletes rows from the table based on the specified filter.\n *\n * @param {EmbeddingDocMetadata} filter - The filter to apply for deletion.\n * @returns {Promise} - A Promise that resolves when the deletion is complete.\n */\n private async deleteByFilter(filter: EmbeddingDocumentMetadata) {\n const queryString = `\n DELETE FROM ${this.tableName}\n WHERE metadata::jsonb @> :filter\n `;\n return this.client.raw(queryString, { filter });\n }\n\n /**\n * Deletes documents based on the provided deletion parameters.\n * Either `ids` or `filter` must be specified.\n *\n * @param {Object} deletionParams - The deletion parameters.\n * @param {Array<string>} [deletionParams.ids] - The document IDs to delete.\n * @param {EmbeddingDocMetadata} [deletionParams.filter] - The filter to match documents to be deleted.\n *\n * @return {Promise<void>} - A Promise that resolves once the documents have been deleted.\n */\n async deleteDocuments(deletionParams: {\n ids?: string[];\n filter?: EmbeddingDocumentMetadata;\n }): Promise<void> {\n const { ids, filter } = deletionParams;\n\n if (!(ids || filter)) {\n throw new Error(\n 'You must specify either ids or a filter when deleting documents.',\n );\n }\n\n if (ids && filter) {\n throw new Error(\n 'You cannot specify both ids and a filter when deleting documents.',\n );\n }\n\n if (ids) {\n await this.deleteById(ids);\n } else if (filter) {\n await this.deleteByFilter(filter);\n }\n }\n\n /**\n * Finds the most similar documents to a given query vector, along with their similarity scores.\n * Results are ranked by a weighted combination of vector similarity and document recency.\n * i.e newer documents are favored in the ranking but if no new documents exist, older but more similar documents will still be returned.\n *\n * @param {number[]} query - The query vector to compare against.\n * @param {number} amount - The maximum number of results to return.\n * @param {EmbeddingDocumentMetadata} [filter] - Optional filter to limit the search results.\n * @returns {Promise<[EmbeddingDocument, number][]>} - An array of document similarity results, where each\n * result is a tuple containing the document and its similarity score.\n */\n private async similaritySearchVectorWithScore(\n query: number[],\n amount: number,\n filter?: EmbeddingDocumentMetadata,\n ): Promise<[EmbeddingDocument, number][]> {\n const embeddingString = `[${query.join(',')}]`;\n\n const queryString = `\n SELECT\n *,\n (vector <=> :embeddingString) as \"_distance\",\n (EXTRACT(EPOCH FROM (NOW() - COALESCE(\"lastUpdated\", NOW()))) / :ageScaleFactor) as \"_age_days\",\n (\n ((vector <=> :embeddingString) * :similarityWeight) -\n (EXP(-0.693 * (EXTRACT(EPOCH FROM (NOW() - COALESCE(\"lastUpdated\", NOW()))) / :ageScaleFactor) / :recencyHalfLife) * :recencyWeight)\n ) as \"_combined_score\"\n FROM ${this.tableName}\n WHERE metadata::jsonb @> :filter\n ORDER BY \"_combined_score\" ASC\n LIMIT :amount\n `;\n\n const documents = (\n await this.client.raw(queryString, {\n embeddingString,\n filter: JSON.stringify(filter ?? {}),\n amount,\n similarityWeight: this.SIMILARITY_WEIGHT,\n recencyWeight: this.RECENCY_WEIGHT,\n recencyHalfLife: this.RECENCY_HALF_LIFE_DAYS,\n ageScaleFactor: this.AGE_SCALE_FACTOR,\n })\n ).rows;\n\n const results = [] as [EmbeddingDocument, number][];\n for (const doc of documents) {\n // eslint-ignore-next-line\n if (doc._distance !== null && doc.content !== null) {\n const document: EmbeddingDocument = {\n content: doc.content,\n metadata: {\n ...doc.metadata,\n ageInDays: Math.round(doc._age_days),\n lastUpdated: doc.lastUpdated,\n },\n };\n results.push([document, doc._distance]);\n }\n }\n return results;\n }\n\n /**\n * Performs a similarity search using the given query and filter.\n *\n * @param {string} query - The query to perform the similarity search on.\n * @param {EmbeddingDocMetadata} filter - The filter to apply to the search results.\n * @param {number} [amount=4] - The number of results to return.\n * @return {Promise<EmbeddingDoc[]>} - A promise that resolves to an array of RoadieEmbeddingDoc objects representing the search results.\n * @throws {Error} - Throws an error if there are no embeddings configured for the vector store.\n */\n async similaritySearch(\n query: string,\n filter?: EmbeddingDocumentMetadata,\n amount: number = this.amount,\n ): Promise<EmbeddingDocument[]> {\n if (!this.embeddings) {\n throw new Error('No Embeddings configured for the vector store.');\n }\n const results = await this.similaritySearchVectorWithScore(\n await this.embeddings.embedQuery(query),\n amount,\n filter,\n );\n\n return results.map(result => result[0]);\n }\n}\n"],"names":["createHash","uuid"],"mappings":";;;;;AAqBO,MAAM,aAAA,CAAqC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAgBhD,YACmB,MAAA,EACA,MAAA,EACA,MAAA,GAAiB,CAAA,EACjB,YAAoB,GAAA,EACrC;AAJiB,IAAA,IAAA,CAAA,MAAA,GAAA,MAAA;AACA,IAAA,IAAA,CAAA,MAAA,GAAA,MAAA;AACA,IAAA,IAAA,CAAA,MAAA,GAAA,MAAA;AACA,IAAA,IAAA,CAAA,SAAA,GAAA,SAAA;AAAA,EAChB;AAAA,EApBc,SAAA,GAAoB,YAAA;AAAA,EAC7B,UAAA;AAAA;AAAA,EAGS,cAAA,GAAiB,GAAA;AAAA;AAAA,EACjB,iBAAA,GAAoB,IAAI,IAAA,CAAK,cAAA;AAAA;AAAA,EAC7B,sBAAA,GAAyB,GAAA;AAAA;AAAA,EACzB,gBAAA,GAAmB,KAAA;AAAA,EAepC,aAAa,UAAA,CAAW,EAAE,MAAA,EAAQ,QAAA,EAAU,QAAO,EAAyB;AAC1E,IAAA,MAAM,MAAA,GAAS,MAAM,QAAA,CAAS,SAAA,EAAU;AACxC,IAAA,MAAM,YAAY,MAAA,CAAO,iBAAA;AAAA,MACvB;AAAA,KACF;AACA,IAAA,MAAM,SAAS,MAAA,CAAO,iBAAA;AAAA,MACpB;AAAA,KACF;AAEA,IAAA,OAAO,IAAI,aAAA,CAAc,MAAA,EAAQ,MAAA,EAAQ,QAAQ,SAAS,CAAA;AAAA,EAC5D;AAAA,EAEA,kBAAkB,UAAA,EAAwC;AACxD,IAAA,IAAI,KAAK,UAAA,EAAY;AACnB,MAAA,IAAA,CAAK,MAAA,CAAO,KAAK,4CAA4C,CAAA;AAAA,IAC/D;AACA,IAAA,IAAA,CAAK,UAAA,GAAa,UAAA;AAAA,EACpB;AAAA,EAEA,KAAA,GAAQ;AACN,IAAA,OAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,SAAS,CAAA;AAAA,EACnC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,aAAa,SAAA,EAA+C;AAChE,IAAA,IAAI,SAAA,CAAU,WAAW,CAAA,EAAG;AAC1B,MAAA;AAAA,IACF;AAEA,IAAA,IAAI,CAAC,KAAK,UAAA,EAAY;AACpB,MAAA,MAAM,IAAI,MAAM,gDAAgD,CAAA;AAAA,IAClE;AAGA,IAAA,MAAM,aAAa,SAAA,CAChB,GAAA,CAAI,MAAM,CAAA,iDAAA,CAAmD,CAAA,CAC7D,KAAK,MAAM,CAAA;AAEd,IAAA,MAAM,MAAA,GAAS,SAAA,CAAU,OAAA,CAAQ,CAAA,GAAA,KAAO;AAAA,MACtC,IAAI,QAAA,CAAS,EAAA;AAAA,MACb,IAAI,QAAA,CAAS;AAAA,KACd,CAAA;AAED,IAAA,MAAM,iBAAA,GAAyC,MAAM,IAAA,CAAK,MAAA,CACvD,MAAA,CAAO,GAAG,CAAA,CACV,IAAA,CAAK,IAAA,CAAK,SAAS,CAAA,CACnB,QAAA,CAAS,YAAY,MAAM,CAAA;AAG9B,IAAA,MAAM,cAAc,IAAI,GAAA;AAAA,MACtB,iBAAA,CAAkB,IAAI,CAAA,GAAA,KAAO;AAAA,QAC3B,GAAG,GAAA,CAAI,QAAA,CAAS,EAAE,CAAA,CAAA,EAAI,GAAA,CAAI,SAAS,MAAM,CAAA,CAAA;AAAA,QACzC;AAAA,OACD;AAAA,KACH;AAGA,IAAA,MAAM,eAAoC,EAAC;AAC3C,IAAA,MAAM,oBAA+D,EAAC;AAEtE,IAAA,KAAA,MAAW,OAAO,SAAA,EAAW;AAC3B,MAAA,MAAM,GAAA,GAAM,GAAG,GAAA,CAAI,QAAA,CAAS,EAAE,CAAA,CAAA,EAAI,GAAA,CAAI,SAAS,MAAM,CAAA,CAAA;AACrD,MAAA,MAAM,QAAA,GAAW,WAAA,CAAY,GAAA,CAAI,GAAG,CAAA;AAEpC,MAAA,IAAI,CAAC,QAAA,EAAU;AACb,QAAA,YAAA,CAAa,KAAK,GAAG,CAAA;AACrB,QAAA;AAAA,MACF;AAGA,MAAA,MAAM,OAAA,GAAUA,kBAAW,QAAQ,CAAA,CAAE,OAAO,GAAA,CAAI,OAAO,CAAA,CAAE,MAAA,CAAO,KAAK,CAAA;AACrE,MAAA,IAAI,CAAC,QAAA,CAAS,IAAA,IAAQ,OAAA,KAAY,SAAS,IAAA,EAAM;AAC/C,QAAA,iBAAA,CAAkB,KAAK,EAAE,GAAG,KAAK,EAAA,EAAI,QAAA,CAAS,IAAK,CAAA;AAAA,MACrD;AAAA,IACF;AAEA,IAAA,MAAM,iBAAA,GAAoB,CAAC,GAAG,YAAA,EAAc,GAAG,iBAAiB,CAAA;AAEhE,IAAA,IAAI,iBAAA,CAAkB,WAAW,CAAA,EAAG;AAClC,MAAA,IAAA,CAAK,MAAA,CAAO,MAAM,qCAAqC,CAAA;AACvD,MAAA;AAAA,IACF;AAGA,IAAA,IAAI,iBAAA,CAAkB,SAAS,CAAA,EAAG;AAChC,MAAA,MAAM,gBAAgB,IAAI,GAAA;AAAA,QACxB,iBAAA,CAAkB,GAAA;AAAA,UAChB,CAAA,GAAA,KAAO,GAAG,GAAA,CAAI,QAAA,CAAS,EAAE,CAAA,CAAA,EAAI,GAAA,CAAI,SAAS,MAAM,CAAA;AAAA;AAClD,OACF;AAEA,MAAA,KAAA,MAAW,OAAO,aAAA,EAAe;AAC/B,QAAA,MAAM,CAAC,EAAA,EAAI,MAAM,CAAA,GAAI,GAAA,CAAI,MAAM,GAAG,CAAA;AAClC,QAAA,MAAM,IAAA,CAAK,OAAO,IAAA,CAAK,SAAS,EAC7B,MAAA,EAAO,CACP,SAAS,CAAA,+CAAA,CAAA,EAAmD;AAAA,UAC3D,EAAA;AAAA,UACA;AAAA,SACD,CAAA;AAAA,MACL;AAEA,MAAA,IAAA,CAAK,MAAA,CAAO,IAAA;AAAA,QACV,CAAA,uBAAA,EAA0B,cAAc,IAAI,CAAA,kBAAA;AAAA,OAC9C;AAAA,IACF;AAEA,IAAA,MAAM,QAAA,GAAW,iBAAA,CAAkB,GAAA,CAAI,CAAA,GAAA,KAAO,IAAI,OAAO,CAAA;AACzD,IAAA,MAAM,OAAA,GAAU,MAAM,IAAA,CAAK,UAAA,CAAY,eAAe,QAAQ,CAAA;AAE9D,IAAA,MAAM,IAAA,GAAO,iBAAA,CAAkB,GAAA,CAAI,CAAC,KAAK,KAAA,KAAU;AACjD,MAAA,MAAM,MAAA,GAAS,QAAQ,KAAK,CAAA;AAC5B,MAAA,MAAM,IAAA,GAAOA,kBAAW,QAAQ,CAAA,CAAE,OAAO,GAAA,CAAI,OAAO,CAAA,CAAE,MAAA,CAAO,KAAK,CAAA;AAElE,MAAA,OAAO;AAAA,QACL,IAAA;AAAA,QACA,EAAA,EAAI,GAAA,CAAI,EAAA,IAAMC,OAAA,EAAK;AAAA,QACnB,UAAU,GAAA,CAAI,QAAA;AAAA,QACd,WAAA,sBAAiB,IAAA,EAAK;AAAA,QACtB,OAAA,EAAS,GAAA,CAAI,OAAA,CAAQ,OAAA,CAAQ,OAAO,EAAE,CAAA;AAAA,QACtC,MAAA,EAAQ,CAAA,CAAA,EAAI,MAAA,CAAO,IAAA,CAAK,GAAG,CAAC,CAAA,CAAA;AAAA,OAC9B;AAAA,IACF,CAAC,CAAA;AACD,IAAA,IAAA,CAAK,MAAA,CAAO,IAAA;AAAA,MACV,CAAA,OAAA,EAAU,KAAK,MAAM,CAAA,YAAA,EAAe,aAAa,MAAM,CAAA,MAAA,EAAS,kBAAkB,MAAM,CAAA,UAAA;AAAA,KAC1F;AAEA,IAAA,MAAM,KAAK,MAAA,CAAO,WAAA,CAAY,KAAK,SAAA,EAAW,IAAA,EAAM,KAAK,SAAS,CAAA;AAAA,EACpE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAc,WAAW,GAAA,EAAe;AACtC,IAAA,MAAM,KAAK,KAAA,EAAM,CAAE,QAAO,CAAE,OAAA,CAAQ,MAAM,GAAG,CAAA;AAAA,EAC/C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAc,eAAe,MAAA,EAAmC;AAC9D,IAAA,MAAM,WAAA,GAAc;AAAA,kBAAA,EACJ,KAAK,SAAS;AAAA;AAAA,IAAA,CAAA;AAG9B,IAAA,OAAO,KAAK,MAAA,CAAO,GAAA,CAAI,WAAA,EAAa,EAAE,QAAQ,CAAA;AAAA,EAChD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,gBAAgB,cAAA,EAGJ;AAChB,IAAA,MAAM,EAAE,GAAA,EAAK,MAAA,EAAO,GAAI,cAAA;AAExB,IAAA,IAAI,EAAE,OAAO,MAAA,CAAA,EAAS;AACpB,MAAA,MAAM,IAAI,KAAA;AAAA,QACR;AAAA,OACF;AAAA,IACF;AAEA,IAAA,IAAI,OAAO,MAAA,EAAQ;AACjB,MAAA,MAAM,IAAI,KAAA;AAAA,QACR;AAAA,OACF;AAAA,IACF;AAEA,IAAA,IAAI,GAAA,EAAK;AACP,MAAA,MAAM,IAAA,CAAK,WAAW,GAAG,CAAA;AAAA,IAC3B,WAAW,MAAA,EAAQ;AACjB,MAAA,MAAM,IAAA,CAAK,eAAe,MAAM,CAAA;AAAA,IAClC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAaA,MAAc,+BAAA,CACZ,KAAA,EACA,MAAA,EACA,MAAA,EACwC;AACxC,IAAA,MAAM,eAAA,GAAkB,CAAA,CAAA,EAAI,KAAA,CAAM,IAAA,CAAK,GAAG,CAAC,CAAA,CAAA,CAAA;AAE3C,IAAA,MAAM,WAAA,GAAc;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,SAAA,EASb,KAAK,SAAS;AAAA;AAAA;AAAA;AAAA,EAAA,CAAA;AAMrB,IAAA,MAAM,SAAA,GAAA,CACJ,MAAM,IAAA,CAAK,MAAA,CAAO,IAAI,WAAA,EAAa;AAAA,MACjC,eAAA;AAAA,MACA,MAAA,EAAQ,IAAA,CAAK,SAAA,CAAU,MAAA,IAAU,EAAE,CAAA;AAAA,MACnC,MAAA;AAAA,MACA,kBAAkB,IAAA,CAAK,iBAAA;AAAA,MACvB,eAAe,IAAA,CAAK,cAAA;AAAA,MACpB,iBAAiB,IAAA,CAAK,sBAAA;AAAA,MACtB,gBAAgB,IAAA,CAAK;AAAA,KACtB,CAAA,EACD,IAAA;AAEF,IAAA,MAAM,UAAU,EAAC;AACjB,IAAA,KAAA,MAAW,OAAO,SAAA,EAAW;AAE3B,MAAA,IAAI,GAAA,CAAI,SAAA,KAAc,IAAA,IAAQ,GAAA,CAAI,YAAY,IAAA,EAAM;AAClD,QAAA,MAAM,QAAA,GAA8B;AAAA,UAClC,SAAS,GAAA,CAAI,OAAA;AAAA,UACb,QAAA,EAAU;AAAA,YACR,GAAG,GAAA,CAAI,QAAA;AAAA,YACP,SAAA,EAAW,IAAA,CAAK,KAAA,CAAM,GAAA,CAAI,SAAS,CAAA;AAAA,YACnC,aAAa,GAAA,CAAI;AAAA;AACnB,SACF;AACA,QAAA,OAAA,CAAQ,IAAA,CAAK,CAAC,QAAA,EAAU,GAAA,CAAI,SAAS,CAAC,CAAA;AAAA,MACxC;AAAA,IACF;AACA,IAAA,OAAO,OAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAM,gBAAA,CACJ,KAAA,EACA,MAAA,EACA,MAAA,GAAiB,KAAK,MAAA,EACQ;AAC9B,IAAA,IAAI,CAAC,KAAK,UAAA,EAAY;AACpB,MAAA,MAAM,IAAI,MAAM,gDAAgD,CAAA;AAAA,IAClE;AACA,IAAA,MAAM,OAAA,GAAU,MAAM,IAAA,CAAK,+BAAA;AAAA,MACzB,MAAM,IAAA,CAAK,UAAA,CAAW,UAAA,CAAW,KAAK,CAAA;AAAA,MACtC,MAAA;AAAA,MACA;AAAA,KACF;AAEA,IAAA,OAAO,OAAA,CAAQ,GAAA,CAAI,CAAA,MAAA,KAAU,MAAA,CAAO,CAAC,CAAC,CAAA;AAAA,EACxC;AACF;;;;"}
|
|
@@ -67,9 +67,7 @@ const createChatService = async ({
|
|
|
67
67
|
const newMessages = chunkMessages.filter(
|
|
68
68
|
(m) => conversationMessages.findIndex((cm) => cm.id === m.id) === -1
|
|
69
69
|
);
|
|
70
|
-
console.log("newMessages: ", newMessages);
|
|
71
70
|
if (newMessages.length !== 0) {
|
|
72
|
-
console.log("newMessages in if: ", newMessages);
|
|
73
71
|
conversation.addMessages(
|
|
74
72
|
newMessages,
|
|
75
73
|
userEntityRef,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chat.cjs.js","sources":["../../src/services/chat.ts"],"sourcesContent":["import {\n CatalogService,\n catalogServiceRef,\n} from '@backstage/plugin-catalog-node';\nimport {\n SignalsService,\n signalsServiceRef,\n} from '@backstage/plugin-signals-node';\n\nimport {\n Message,\n EnabledTool,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-common';\n\nimport { getUser } from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { v4 as uuid } from 'uuid';\nimport type {\n BackstageCredentials,\n CacheService,\n UserInfoService,\n AuthService,\n ServiceRef,\n} from '@backstage/backend-plugin-api';\nimport {\n coreServices,\n createServiceFactory,\n createServiceRef,\n} from '@backstage/backend-plugin-api';\nimport { ConversationService, conversationServiceRef } from './conversation';\nimport { agentServiceRef, AgentService } from './agent';\nimport { SystemMessagePromptTemplate } from '@langchain/core/prompts';\n\nexport type ChatServiceOptions = {\n signals: SignalsService;\n catalog: CatalogService;\n cache: CacheService;\n auth: AuthService;\n userInfo: UserInfoService;\n conversation: ConversationService;\n agent: AgentService;\n};\n\ntype PromptOptions = {\n credentials: BackstageCredentials;\n messages: Message[];\n conversationId: string;\n stream?: boolean;\n tools?: EnabledTool[];\n modelId?: string;\n};\n\nexport type ChatService = {\n prompt: (options: PromptOptions) => Promise<Message[]>;\n};\n\nexport const createChatService = async ({\n signals,\n catalog,\n cache,\n auth,\n userInfo,\n conversation,\n agent,\n}: ChatServiceOptions): Promise<ChatService> => {\n const contextPromptTemplate = SystemMessagePromptTemplate.fromTemplate(`\n Calling User:\n {user}`);\n\n const prompt: ChatService['prompt'] = async ({\n conversationId,\n messages,\n stream = true,\n credentials,\n tools: enabledTools,\n modelId,\n }: PromptOptions) => {\n const streamFn = async () => {\n const { userEntityRef } = await userInfo.getUserInfo(credentials);\n const recentConversationMessages =\n await conversation.getRecentConversationMessages({\n conversationId,\n userEntityRef,\n limit: 10,\n excludeRoles: ['tool'],\n });\n\n const user = await getUser(cache, userEntityRef, catalog, auth);\n\n const messagesWithoutSystem = messages.filter(m => m.role !== 'system');\n\n conversation.addMessages(\n messagesWithoutSystem,\n userEntityRef,\n conversationId,\n recentConversationMessages,\n );\n\n const traceId = uuid();\n\n const context = await contextPromptTemplate.formatMessages({\n user,\n });\n\n const responseMessages: Message[] = [];\n\n const conversationMessages = [...recentConversationMessages, ...messages];\n\n agent.stream({\n credentials,\n messages: conversationMessages,\n tools: enabledTools,\n modelId,\n metadata: {\n conversationId,\n userId: userEntityRef,\n runName: 'ai-assistant-chat',\n runId: traceId,\n },\n context: context[0].text,\n onStreamChunk: async chunkMessages => {\n const newMessages: Message[] = chunkMessages.filter(\n m => conversationMessages.findIndex(cm => cm.id === m.id) === -1,\n );\n\n
|
|
1
|
+
{"version":3,"file":"chat.cjs.js","sources":["../../src/services/chat.ts"],"sourcesContent":["import {\n CatalogService,\n catalogServiceRef,\n} from '@backstage/plugin-catalog-node';\nimport {\n SignalsService,\n signalsServiceRef,\n} from '@backstage/plugin-signals-node';\n\nimport {\n Message,\n EnabledTool,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-common';\n\nimport { getUser } from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { v4 as uuid } from 'uuid';\nimport type {\n BackstageCredentials,\n CacheService,\n UserInfoService,\n AuthService,\n ServiceRef,\n} from '@backstage/backend-plugin-api';\nimport {\n coreServices,\n createServiceFactory,\n createServiceRef,\n} from '@backstage/backend-plugin-api';\nimport { ConversationService, conversationServiceRef } from './conversation';\nimport { agentServiceRef, AgentService } from './agent';\nimport { SystemMessagePromptTemplate } from '@langchain/core/prompts';\n\nexport type ChatServiceOptions = {\n signals: SignalsService;\n catalog: CatalogService;\n cache: CacheService;\n auth: AuthService;\n userInfo: UserInfoService;\n conversation: ConversationService;\n agent: AgentService;\n};\n\ntype PromptOptions = {\n credentials: BackstageCredentials;\n messages: Message[];\n conversationId: string;\n stream?: boolean;\n tools?: EnabledTool[];\n modelId?: string;\n};\n\nexport type ChatService = {\n prompt: (options: PromptOptions) => Promise<Message[]>;\n};\n\nexport const createChatService = async ({\n signals,\n catalog,\n cache,\n auth,\n userInfo,\n conversation,\n agent,\n}: ChatServiceOptions): Promise<ChatService> => {\n const contextPromptTemplate = SystemMessagePromptTemplate.fromTemplate(`\n Calling User:\n {user}`);\n\n const prompt: ChatService['prompt'] = async ({\n conversationId,\n messages,\n stream = true,\n credentials,\n tools: enabledTools,\n modelId,\n }: PromptOptions) => {\n const streamFn = async () => {\n const { userEntityRef } = await userInfo.getUserInfo(credentials);\n const recentConversationMessages =\n await conversation.getRecentConversationMessages({\n conversationId,\n userEntityRef,\n limit: 10,\n excludeRoles: ['tool'],\n });\n\n const user = await getUser(cache, userEntityRef, catalog, auth);\n\n const messagesWithoutSystem = messages.filter(m => m.role !== 'system');\n\n conversation.addMessages(\n messagesWithoutSystem,\n userEntityRef,\n conversationId,\n recentConversationMessages,\n );\n\n const traceId = uuid();\n\n const context = await contextPromptTemplate.formatMessages({\n user,\n });\n\n const responseMessages: Message[] = [];\n\n const conversationMessages = [...recentConversationMessages, ...messages];\n\n agent.stream({\n credentials,\n messages: conversationMessages,\n tools: enabledTools,\n modelId,\n metadata: {\n conversationId,\n userId: userEntityRef,\n runName: 'ai-assistant-chat',\n runId: traceId,\n },\n context: context[0].text,\n onStreamChunk: async chunkMessages => {\n const newMessages: Message[] = chunkMessages.filter(\n m => conversationMessages.findIndex(cm => cm.id === m.id) === -1,\n );\n\n if (newMessages.length !== 0) {\n conversation.addMessages(\n newMessages,\n userEntityRef,\n conversationId,\n conversationMessages,\n );\n\n conversationMessages.push(...newMessages);\n responseMessages.push(...newMessages);\n\n // Simulate streaming until langchain messages error is better understood\n for await (const m of newMessages) {\n const words = m.content.split(' ');\n const chunkSize = 5; // Send 5 words at a time\n let messageBuilder = '';\n\n for (let i = 0; i < words.length; i += chunkSize) {\n const wordChunk = words.slice(i, i + chunkSize).join(' ');\n messageBuilder = messageBuilder.concat(wordChunk).concat(' ');\n m.content = messageBuilder;\n\n await new Promise(resolve => setTimeout(resolve, 50));\n\n signals.publish({\n channel: `ai-assistant.chat.conversation-stream:${conversationId}`,\n message: { messages: [m] },\n recipients: {\n type: 'user',\n entityRef: userEntityRef,\n },\n });\n }\n }\n }\n },\n });\n\n return responseMessages;\n };\n\n return stream ? await streamFn() : [];\n };\n\n return {\n prompt,\n };\n};\n\nexport const chatServiceRef: ServiceRef<ChatService, 'plugin', 'singleton'> =\n createServiceRef<ChatService>({\n id: 'ai-assistant.chat-service',\n defaultFactory: async service =>\n createServiceFactory({\n service,\n deps: {\n cache: coreServices.cache,\n auth: coreServices.auth,\n userInfo: coreServices.userInfo,\n signals: signalsServiceRef,\n catalog: catalogServiceRef,\n conversation: conversationServiceRef,\n agent: agentServiceRef,\n },\n factory: async options => {\n return createChatService(options);\n },\n }),\n });\n"],"names":["SystemMessagePromptTemplate","getUser","uuid","createServiceRef","createServiceFactory","coreServices","signalsServiceRef","catalogServiceRef","conversationServiceRef","agentServiceRef"],"mappings":";;;;;;;;;;;AAuDO,MAAM,oBAAoB,OAAO;AAAA,EACtC,OAAA;AAAA,EACA,OAAA;AAAA,EACA,KAAA;AAAA,EACA,IAAA;AAAA,EACA,QAAA;AAAA,EACA,YAAA;AAAA,EACA;AACF,CAAA,KAAgD;AAC9C,EAAA,MAAM,qBAAA,GAAwBA,oCAA4B,YAAA,CAAa;AAAA;AAAA,UAAA,CAE9D,CAAA;AAET,EAAA,MAAM,SAAgC,OAAO;AAAA,IAC3C,cAAA;AAAA,IACA,QAAA;AAAA,IACA,MAAA,GAAS,IAAA;AAAA,IACT,WAAA;AAAA,IACA,KAAA,EAAO,YAAA;AAAA,IACP;AAAA,GACF,KAAqB;AACnB,IAAA,MAAM,WAAW,YAAY;AAC3B,MAAA,MAAM,EAAE,aAAA,EAAc,GAAI,MAAM,QAAA,CAAS,YAAY,WAAW,CAAA;AAChE,MAAA,MAAM,0BAAA,GACJ,MAAM,YAAA,CAAa,6BAAA,CAA8B;AAAA,QAC/C,cAAA;AAAA,QACA,aAAA;AAAA,QACA,KAAA,EAAO,EAAA;AAAA,QACP,YAAA,EAAc,CAAC,MAAM;AAAA,OACtB,CAAA;AAEH,MAAA,MAAM,OAAO,MAAMC,sCAAA,CAAQ,KAAA,EAAO,aAAA,EAAe,SAAS,IAAI,CAAA;AAE9D,MAAA,MAAM,wBAAwB,QAAA,CAAS,MAAA,CAAO,CAAA,CAAA,KAAK,CAAA,CAAE,SAAS,QAAQ,CAAA;AAEtE,MAAA,YAAA,CAAa,WAAA;AAAA,QACX,qBAAA;AAAA,QACA,aAAA;AAAA,QACA,cAAA;AAAA,QACA;AAAA,OACF;AAEA,MAAA,MAAM,UAAUC,OAAA,EAAK;AAErB,MAAA,MAAM,OAAA,GAAU,MAAM,qBAAA,CAAsB,cAAA,CAAe;AAAA,QACzD;AAAA,OACD,CAAA;AAED,MAAA,MAAM,mBAA8B,EAAC;AAErC,MAAA,MAAM,oBAAA,GAAuB,CAAC,GAAG,0BAAA,EAA4B,GAAG,QAAQ,CAAA;AAExE,MAAA,KAAA,CAAM,MAAA,CAAO;AAAA,QACX,WAAA;AAAA,QACA,QAAA,EAAU,oBAAA;AAAA,QACV,KAAA,EAAO,YAAA;AAAA,QACP,OAAA;AAAA,QACA,QAAA,EAAU;AAAA,UACR,cAAA;AAAA,UACA,MAAA,EAAQ,aAAA;AAAA,UACR,OAAA,EAAS,mBAAA;AAAA,UACT,KAAA,EAAO;AAAA,SACT;AAAA,QACA,OAAA,EAAS,OAAA,CAAQ,CAAC,CAAA,CAAE,IAAA;AAAA,QACpB,aAAA,EAAe,OAAM,aAAA,KAAiB;AACpC,UAAA,MAAM,cAAyB,aAAA,CAAc,MAAA;AAAA,YAC3C,CAAA,CAAA,KAAK,qBAAqB,SAAA,CAAU,CAAA,EAAA,KAAM,GAAG,EAAA,KAAO,CAAA,CAAE,EAAE,CAAA,KAAM;AAAA,WAChE;AAEA,UAAA,IAAI,WAAA,CAAY,WAAW,CAAA,EAAG;AAC5B,YAAA,YAAA,CAAa,WAAA;AAAA,cACX,WAAA;AAAA,cACA,aAAA;AAAA,cACA,cAAA;AAAA,cACA;AAAA,aACF;AAEA,YAAA,oBAAA,CAAqB,IAAA,CAAK,GAAG,WAAW,CAAA;AACxC,YAAA,gBAAA,CAAiB,IAAA,CAAK,GAAG,WAAW,CAAA;AAGpC,YAAA,WAAA,MAAiB,KAAK,WAAA,EAAa;AACjC,cAAA,MAAM,KAAA,GAAQ,CAAA,CAAE,OAAA,CAAQ,KAAA,CAAM,GAAG,CAAA;AACjC,cAAA,MAAM,SAAA,GAAY,CAAA;AAClB,cAAA,IAAI,cAAA,GAAiB,EAAA;AAErB,cAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,KAAA,CAAM,MAAA,EAAQ,KAAK,SAAA,EAAW;AAChD,gBAAA,MAAM,SAAA,GAAY,MAAM,KAAA,CAAM,CAAA,EAAG,IAAI,SAAS,CAAA,CAAE,KAAK,GAAG,CAAA;AACxD,gBAAA,cAAA,GAAiB,cAAA,CAAe,MAAA,CAAO,SAAS,CAAA,CAAE,OAAO,GAAG,CAAA;AAC5D,gBAAA,CAAA,CAAE,OAAA,GAAU,cAAA;AAEZ,gBAAA,MAAM,IAAI,OAAA,CAAQ,CAAA,OAAA,KAAW,UAAA,CAAW,OAAA,EAAS,EAAE,CAAC,CAAA;AAEpD,gBAAA,OAAA,CAAQ,OAAA,CAAQ;AAAA,kBACd,OAAA,EAAS,yCAAyC,cAAc,CAAA,CAAA;AAAA,kBAChE,OAAA,EAAS,EAAE,QAAA,EAAU,CAAC,CAAC,CAAA,EAAE;AAAA,kBACzB,UAAA,EAAY;AAAA,oBACV,IAAA,EAAM,MAAA;AAAA,oBACN,SAAA,EAAW;AAAA;AACb,iBACD,CAAA;AAAA,cACH;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,OACD,CAAA;AAED,MAAA,OAAO,gBAAA;AAAA,IACT,CAAA;AAEA,IAAA,OAAO,MAAA,GAAS,MAAM,QAAA,EAAS,GAAI,EAAC;AAAA,EACtC,CAAA;AAEA,EAAA,OAAO;AAAA,IACL;AAAA,GACF;AACF;AAEO,MAAM,iBACXC,iCAAA,CAA8B;AAAA,EAC5B,EAAA,EAAI,2BAAA;AAAA,EACJ,cAAA,EAAgB,OAAM,OAAA,KACpBC,qCAAA,CAAqB;AAAA,IACnB,OAAA;AAAA,IACA,IAAA,EAAM;AAAA,MACJ,OAAOC,6BAAA,CAAa,KAAA;AAAA,MACpB,MAAMA,6BAAA,CAAa,IAAA;AAAA,MACnB,UAAUA,6BAAA,CAAa,QAAA;AAAA,MACvB,OAAA,EAASC,mCAAA;AAAA,MACT,OAAA,EAASC,mCAAA;AAAA,MACT,YAAA,EAAcC,mCAAA;AAAA,MACd,KAAA,EAAOC;AAAA,KACT;AAAA,IACA,OAAA,EAAS,OAAM,OAAA,KAAW;AACxB,MAAA,OAAO,kBAAkB,OAAO,CAAA;AAAA,IAClC;AAAA,GACD;AACL,CAAC;;;;;"}
|
|
@@ -41,7 +41,7 @@ const createDataIngestionPipeline = ({
|
|
|
41
41
|
for await (const ingestor of ingestors) {
|
|
42
42
|
logger.info(`Running ingestor: ${ingestor.id}`);
|
|
43
43
|
const saveDocumentsBatch = async (documents2) => {
|
|
44
|
-
logger.
|
|
44
|
+
logger.debug(
|
|
45
45
|
`Ingested documents for ${ingestor.id}: ${documents2.length}`
|
|
46
46
|
);
|
|
47
47
|
const splitter = new textsplitters.RecursiveCharacterTextSplitter({
|
|
@@ -50,12 +50,6 @@ const createDataIngestionPipeline = ({
|
|
|
50
50
|
});
|
|
51
51
|
const documentChunks = await Promise.all(
|
|
52
52
|
documents2.map(async (document) => {
|
|
53
|
-
logger.debug(
|
|
54
|
-
`Deleting existing documents with id: [${document.metadata.id}] and source: [${ingestor.id}]`
|
|
55
|
-
);
|
|
56
|
-
await vectorStore.deleteDocuments({
|
|
57
|
-
filter: { source: ingestor.id, id: document.metadata.id }
|
|
58
|
-
});
|
|
59
53
|
const chunks = await splitter.splitText(document.content);
|
|
60
54
|
const docChunks = chunks.flatMap(
|
|
61
55
|
(chunk, i) => ({
|
|
@@ -66,19 +60,19 @@ const createDataIngestionPipeline = ({
|
|
|
66
60
|
return docChunks;
|
|
67
61
|
})
|
|
68
62
|
);
|
|
69
|
-
logger.
|
|
63
|
+
logger.debug(`Adding documents to vector store...`);
|
|
70
64
|
const allChunks = documentChunks.flat();
|
|
71
|
-
logger.
|
|
65
|
+
logger.debug(
|
|
72
66
|
`Total document chunks for batch to add for ${ingestor.id}: ${allChunks.length}`
|
|
73
67
|
);
|
|
74
68
|
for (let i = 0; i < allChunks.length; i += maxChunkProcessingSize) {
|
|
75
69
|
const chunkBatch = allChunks.slice(i, i + maxChunkProcessingSize);
|
|
76
|
-
logger.
|
|
70
|
+
logger.debug(
|
|
77
71
|
`Adding batch of ${chunkBatch.length} document chunks to vector store for ${ingestor.id}`
|
|
78
72
|
);
|
|
79
73
|
await vectorStore.addDocuments(chunkBatch);
|
|
80
74
|
}
|
|
81
|
-
logger.
|
|
75
|
+
logger.debug(`Added documents to vector store for ${ingestor.id}`);
|
|
82
76
|
};
|
|
83
77
|
const documents = await ingestor.ingest({
|
|
84
78
|
saveDocumentsBatch
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ingestor.cjs.js","sources":["../../src/services/ingestor.ts"],"sourcesContent":["import {\n DataIngestionPipeline,\n DataIngestionPipelineOptions,\n EmbeddingDocument,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\n\nimport {\n SchedulerServiceTaskScheduleDefinition,\n readSchedulerServiceTaskScheduleDefinitionFromConfig,\n} from '@backstage/backend-plugin-api';\n\nimport { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';\n\nconst DEFAULT_DATA_INGESTION_SCHEDULE: SchedulerServiceTaskScheduleDefinition =\n {\n frequency: {\n hours: 24,\n },\n timeout: {\n hours: 3,\n },\n };\n\nconst DEFAULT_MAX_CHUNK_PROCESSING_SIZE = 100;\nconst DEFAULT_CHUNK_SIZE = 1000;\nconst DEFAULT_CHUNK_OVERLAP = 100;\n\nexport const createDataIngestionPipeline = ({\n config,\n logger,\n scheduler,\n ingestors,\n vectorStore,\n}: DataIngestionPipelineOptions): DataIngestionPipeline => {\n const schedule = config.has('aiAssistant.ingestion.schedule')\n ? readSchedulerServiceTaskScheduleDefinitionFromConfig(\n config.getConfig('aiAssistant.ingestion.schedule'),\n )\n : DEFAULT_DATA_INGESTION_SCHEDULE;\n\n const chunkSize =\n config.getOptionalNumber('aiAssistant.ingestion.chunking.chunkSize') ??\n DEFAULT_CHUNK_SIZE;\n\n const chunkOverlap =\n config.getOptionalNumber('aiAssistant.ingestion.chunking.chunkOverlap') ??\n DEFAULT_CHUNK_OVERLAP;\n\n const maxChunkProcessingSize =\n config.getOptionalNumber(\n 'aiAssistant.ingestion.chunking.maxChunkProcessingSize',\n ) ?? DEFAULT_MAX_CHUNK_PROCESSING_SIZE;\n\n const taskRunner = scheduler.createScheduledTaskRunner(schedule);\n\n const taskId = `ai-assistant.data-ingestion:start`;\n\n const dataIngestion = async () => {\n logger.info('Starting data ingestion...');\n\n if (ingestors.length === 0) {\n logger.warn('No ingestors available for data ingestion.');\n return;\n }\n\n logger.info(`Ingestors available: ${ingestors.map(i => i.id).join(', ')}`);\n\n for await (const ingestor of ingestors) {\n logger.info(`Running ingestor: ${ingestor.id}`);\n\n const saveDocumentsBatch = async (documents: EmbeddingDocument[]) => {\n logger.
|
|
1
|
+
{"version":3,"file":"ingestor.cjs.js","sources":["../../src/services/ingestor.ts"],"sourcesContent":["import {\n DataIngestionPipeline,\n DataIngestionPipelineOptions,\n EmbeddingDocument,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\n\nimport {\n SchedulerServiceTaskScheduleDefinition,\n readSchedulerServiceTaskScheduleDefinitionFromConfig,\n} from '@backstage/backend-plugin-api';\n\nimport { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';\n\nconst DEFAULT_DATA_INGESTION_SCHEDULE: SchedulerServiceTaskScheduleDefinition =\n {\n frequency: {\n hours: 24,\n },\n timeout: {\n hours: 3,\n },\n };\n\nconst DEFAULT_MAX_CHUNK_PROCESSING_SIZE = 100;\nconst DEFAULT_CHUNK_SIZE = 1000;\nconst DEFAULT_CHUNK_OVERLAP = 100;\n\nexport const createDataIngestionPipeline = ({\n config,\n logger,\n scheduler,\n ingestors,\n vectorStore,\n}: DataIngestionPipelineOptions): DataIngestionPipeline => {\n const schedule = config.has('aiAssistant.ingestion.schedule')\n ? readSchedulerServiceTaskScheduleDefinitionFromConfig(\n config.getConfig('aiAssistant.ingestion.schedule'),\n )\n : DEFAULT_DATA_INGESTION_SCHEDULE;\n\n const chunkSize =\n config.getOptionalNumber('aiAssistant.ingestion.chunking.chunkSize') ??\n DEFAULT_CHUNK_SIZE;\n\n const chunkOverlap =\n config.getOptionalNumber('aiAssistant.ingestion.chunking.chunkOverlap') ??\n DEFAULT_CHUNK_OVERLAP;\n\n const maxChunkProcessingSize =\n config.getOptionalNumber(\n 'aiAssistant.ingestion.chunking.maxChunkProcessingSize',\n ) ?? DEFAULT_MAX_CHUNK_PROCESSING_SIZE;\n\n const taskRunner = scheduler.createScheduledTaskRunner(schedule);\n\n const taskId = `ai-assistant.data-ingestion:start`;\n\n const dataIngestion = async () => {\n logger.info('Starting data ingestion...');\n\n if (ingestors.length === 0) {\n logger.warn('No ingestors available for data ingestion.');\n return;\n }\n\n logger.info(`Ingestors available: ${ingestors.map(i => i.id).join(', ')}`);\n\n for await (const ingestor of ingestors) {\n logger.info(`Running ingestor: ${ingestor.id}`);\n\n const saveDocumentsBatch = async (documents: EmbeddingDocument[]) => {\n logger.debug(\n `Ingested documents for ${ingestor.id}: ${documents.length}`,\n );\n\n const splitter = new RecursiveCharacterTextSplitter({\n chunkSize,\n chunkOverlap,\n });\n\n const documentChunks = await Promise.all(\n documents.map(async document => {\n const chunks = await splitter.splitText(document.content);\n\n const docChunks: EmbeddingDocument[] = chunks.flatMap(\n (chunk, i) => ({\n metadata: { ...document.metadata, chunk: String(i) },\n content: chunk,\n }),\n );\n\n return docChunks;\n }),\n );\n\n logger.debug(`Adding documents to vector store...`);\n const allChunks = documentChunks.flat();\n\n logger.debug(\n `Total document chunks for batch to add for ${ingestor.id}: ${allChunks.length}`,\n );\n\n for (let i = 0; i < allChunks.length; i += maxChunkProcessingSize) {\n const chunkBatch = allChunks.slice(i, i + maxChunkProcessingSize);\n logger.debug(\n `Adding batch of ${chunkBatch.length} document chunks to vector store for ${ingestor.id}`,\n );\n\n await vectorStore.addDocuments(chunkBatch);\n }\n\n logger.debug(`Added documents to vector store for ${ingestor.id}`);\n };\n\n const documents = await ingestor.ingest({\n saveDocumentsBatch,\n });\n\n if (documents) {\n saveDocumentsBatch(documents);\n }\n\n logger.info(`Finished processing ingestor: ${ingestor.id}`);\n }\n\n logger.info('Data ingestion completed.');\n };\n\n const start = async () => {\n taskRunner.run({\n id: taskId,\n fn: dataIngestion,\n });\n };\n\n return {\n start,\n };\n};\n"],"names":["readSchedulerServiceTaskScheduleDefinitionFromConfig","documents","RecursiveCharacterTextSplitter"],"mappings":";;;;;AAaA,MAAM,+BAAA,GACJ;AAAA,EACE,SAAA,EAAW;AAAA,IACT,KAAA,EAAO;AAAA,GACT;AAAA,EACA,OAAA,EAAS;AAAA,IACP,KAAA,EAAO;AAAA;AAEX,CAAA;AAEF,MAAM,iCAAA,GAAoC,GAAA;AAC1C,MAAM,kBAAA,GAAqB,GAAA;AAC3B,MAAM,qBAAA,GAAwB,GAAA;AAEvB,MAAM,8BAA8B,CAAC;AAAA,EAC1C,MAAA;AAAA,EACA,MAAA;AAAA,EACA,SAAA;AAAA,EACA,SAAA;AAAA,EACA;AACF,CAAA,KAA2D;AACzD,EAAA,MAAM,QAAA,GAAW,MAAA,CAAO,GAAA,CAAI,gCAAgC,CAAA,GACxDA,qEAAA;AAAA,IACE,MAAA,CAAO,UAAU,gCAAgC;AAAA,GACnD,GACA,+BAAA;AAEJ,EAAA,MAAM,SAAA,GACJ,MAAA,CAAO,iBAAA,CAAkB,0CAA0C,CAAA,IACnE,kBAAA;AAEF,EAAA,MAAM,YAAA,GACJ,MAAA,CAAO,iBAAA,CAAkB,6CAA6C,CAAA,IACtE,qBAAA;AAEF,EAAA,MAAM,yBACJ,MAAA,CAAO,iBAAA;AAAA,IACL;AAAA,GACF,IAAK,iCAAA;AAEP,EAAA,MAAM,UAAA,GAAa,SAAA,CAAU,yBAAA,CAA0B,QAAQ,CAAA;AAE/D,EAAA,MAAM,MAAA,GAAS,CAAA,iCAAA,CAAA;AAEf,EAAA,MAAM,gBAAgB,YAAY;AAChC,IAAA,MAAA,CAAO,KAAK,4BAA4B,CAAA;AAExC,IAAA,IAAI,SAAA,CAAU,WAAW,CAAA,EAAG;AAC1B,MAAA,MAAA,CAAO,KAAK,4CAA4C,CAAA;AACxD,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA,CAAK,CAAA,qBAAA,EAAwB,SAAA,CAAU,GAAA,CAAI,CAAA,CAAA,KAAK,CAAA,CAAE,EAAE,CAAA,CAAE,IAAA,CAAK,IAAI,CAAC,CAAA,CAAE,CAAA;AAEzE,IAAA,WAAA,MAAiB,YAAY,SAAA,EAAW;AACtC,MAAA,MAAA,CAAO,IAAA,CAAK,CAAA,kBAAA,EAAqB,QAAA,CAAS,EAAE,CAAA,CAAE,CAAA;AAE9C,MAAA,MAAM,kBAAA,GAAqB,OAAOC,UAAAA,KAAmC;AACnE,QAAA,MAAA,CAAO,KAAA;AAAA,UACL,CAAA,uBAAA,EAA0B,QAAA,CAAS,EAAE,CAAA,EAAA,EAAKA,WAAU,MAAM,CAAA;AAAA,SAC5D;AAEA,QAAA,MAAM,QAAA,GAAW,IAAIC,4CAAA,CAA+B;AAAA,UAClD,SAAA;AAAA,UACA;AAAA,SACD,CAAA;AAED,QAAA,MAAM,cAAA,GAAiB,MAAM,OAAA,CAAQ,GAAA;AAAA,UACnCD,UAAAA,CAAU,GAAA,CAAI,OAAM,QAAA,KAAY;AAC9B,YAAA,MAAM,MAAA,GAAS,MAAM,QAAA,CAAS,SAAA,CAAU,SAAS,OAAO,CAAA;AAExD,YAAA,MAAM,YAAiC,MAAA,CAAO,OAAA;AAAA,cAC5C,CAAC,OAAO,CAAA,MAAO;AAAA,gBACb,QAAA,EAAU,EAAE,GAAG,QAAA,CAAS,UAAU,KAAA,EAAO,MAAA,CAAO,CAAC,CAAA,EAAE;AAAA,gBACnD,OAAA,EAAS;AAAA,eACX;AAAA,aACF;AAEA,YAAA,OAAO,SAAA;AAAA,UACT,CAAC;AAAA,SACH;AAEA,QAAA,MAAA,CAAO,MAAM,CAAA,mCAAA,CAAqC,CAAA;AAClD,QAAA,MAAM,SAAA,GAAY,eAAe,IAAA,EAAK;AAEtC,QAAA,MAAA,CAAO,KAAA;AAAA,UACL,CAAA,2CAAA,EAA8C,QAAA,CAAS,EAAE,CAAA,EAAA,EAAK,UAAU,MAAM,CAAA;AAAA,SAChF;AAEA,QAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,SAAA,CAAU,MAAA,EAAQ,KAAK,sBAAA,EAAwB;AACjE,UAAA,MAAM,UAAA,GAAa,SAAA,CAAU,KAAA,CAAM,CAAA,EAAG,IAAI,sBAAsB,CAAA;AAChE,UAAA,MAAA,CAAO,KAAA;AAAA,YACL,CAAA,gBAAA,EAAmB,UAAA,CAAW,MAAM,CAAA,qCAAA,EAAwC,SAAS,EAAE,CAAA;AAAA,WACzF;AAEA,UAAA,MAAM,WAAA,CAAY,aAAa,UAAU,CAAA;AAAA,QAC3C;AAEA,QAAA,MAAA,CAAO,KAAA,CAAM,CAAA,oCAAA,EAAuC,QAAA,CAAS,EAAE,CAAA,CAAE,CAAA;AAAA,MACnE,CAAA;AAEA,MAAA,MAAM,SAAA,GAAY,MAAM,QAAA,CAAS,MAAA,CAAO;AAAA,QACtC;AAAA,OACD,CAAA;AAED,MAAA,IAAI,SAAA,EAAW;AACb,QAAA,kBAAA,CAAmB,SAAS,CAAA;AAAA,MAC9B;AAEA,MAAA,MAAA,CAAO,IAAA,CAAK,CAAA,8BAAA,EAAiC,QAAA,CAAS,EAAE,CAAA,CAAE,CAAA;AAAA,IAC5D;AAEA,IAAA,MAAA,CAAO,KAAK,2BAA2B,CAAA;AAAA,EACzC,CAAA;AAEA,EAAA,MAAM,QAAQ,YAAY;AACxB,IAAA,UAAA,CAAW,GAAA,CAAI;AAAA,MACb,EAAA,EAAI,MAAA;AAAA,MACJ,EAAA,EAAI;AAAA,KACL,CAAA;AAAA,EACH,CAAA;AAEA,EAAA,OAAO;AAAA,IACL;AAAA,GACF;AACF;;;;"}
|
|
@@ -39,13 +39,8 @@ Do NOT use for general knowledge that doesn't require company-specific informati
|
|
|
39
39
|
content: "No relevant information found in the knowledge base."
|
|
40
40
|
};
|
|
41
41
|
}
|
|
42
|
-
const content = results.map((r) => r.content).join("\n---\n");
|
|
43
|
-
const urls = results.map((r) => r.metadata.url);
|
|
44
42
|
return {
|
|
45
|
-
content
|
|
46
|
-
metadata: {
|
|
47
|
-
urls
|
|
48
|
-
}
|
|
43
|
+
content: JSON.stringify(results)
|
|
49
44
|
};
|
|
50
45
|
}
|
|
51
46
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"searchKnowledge.cjs.js","sources":["../../src/tools/searchKnowledge.ts"],"sourcesContent":["import {\n createAssistantTool,\n VectorStore,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { Tool } from '@sweetoburrito/backstage-plugin-ai-assistant-common';\nimport z from 'zod';\n\ntype CreateSearchKnowledgeToolOptions = {\n vectorStore: VectorStore;\n};\n\nexport const createSearchKnowledgeTool = ({\n vectorStore,\n}: CreateSearchKnowledgeToolOptions): Tool => {\n const knowledgeTool = createAssistantTool({\n tool: {\n name: 'search-knowledge-base',\n description: `Search the internal knowledge base containing company specific information.\n\nUse this tool when users ask about:\n- General questions about the company or internal information\n\nDo NOT use for general knowledge that doesn't require company-specific information.`,\n provider: 'core',\n schema: z.object({\n query: z.string().describe('The query to search for.'),\n filter: z\n .object({\n source: z.string().optional().describe('Source to filter by.'),\n id: z.string().optional().describe('ID to filter by.'),\n })\n .optional()\n .describe('Filters to apply to the search.'),\n amount: z\n .number()\n .min(1)\n .optional()\n .describe('The number of results to return.'),\n }),\n func: async ({ query, filter, amount }) => {\n const results = await vectorStore.similaritySearch(\n query,\n filter,\n amount,\n );\n\n if (results.length === 0) {\n return {\n content: 'No relevant information found in the knowledge base.',\n };\n }\n\n
|
|
1
|
+
{"version":3,"file":"searchKnowledge.cjs.js","sources":["../../src/tools/searchKnowledge.ts"],"sourcesContent":["import {\n createAssistantTool,\n VectorStore,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { Tool } from '@sweetoburrito/backstage-plugin-ai-assistant-common';\nimport z from 'zod';\n\ntype CreateSearchKnowledgeToolOptions = {\n vectorStore: VectorStore;\n};\n\nexport const createSearchKnowledgeTool = ({\n vectorStore,\n}: CreateSearchKnowledgeToolOptions): Tool => {\n const knowledgeTool = createAssistantTool({\n tool: {\n name: 'search-knowledge-base',\n description: `Search the internal knowledge base containing company specific information.\n\nUse this tool when users ask about:\n- General questions about the company or internal information\n\nDo NOT use for general knowledge that doesn't require company-specific information.`,\n provider: 'core',\n schema: z.object({\n query: z.string().describe('The query to search for.'),\n filter: z\n .object({\n source: z.string().optional().describe('Source to filter by.'),\n id: z.string().optional().describe('ID to filter by.'),\n })\n .optional()\n .describe('Filters to apply to the search.'),\n amount: z\n .number()\n .min(1)\n .optional()\n .describe('The number of results to return.'),\n }),\n func: async ({ query, filter, amount }) => {\n const results = await vectorStore.similaritySearch(\n query,\n filter,\n amount,\n );\n\n if (results.length === 0) {\n return {\n content: 'No relevant information found in the knowledge base.',\n };\n }\n\n return {\n content: JSON.stringify(results),\n };\n },\n },\n });\n\n return knowledgeTool;\n};\n"],"names":["createAssistantTool","z"],"mappings":";;;;;;;;;AAWO,MAAM,4BAA4B,CAAC;AAAA,EACxC;AACF,CAAA,KAA8C;AAC5C,EAAA,MAAM,gBAAgBA,kDAAA,CAAoB;AAAA,IACxC,IAAA,EAAM;AAAA,MACJ,IAAA,EAAM,uBAAA;AAAA,MACN,WAAA,EAAa,CAAA;;AAAA;AAAA;;AAAA,mFAAA,CAAA;AAAA,MAMb,QAAA,EAAU,MAAA;AAAA,MACV,MAAA,EAAQC,mBAAE,MAAA,CAAO;AAAA,QACf,KAAA,EAAOA,kBAAA,CAAE,MAAA,EAAO,CAAE,SAAS,0BAA0B,CAAA;AAAA,QACrD,MAAA,EAAQA,mBACL,MAAA,CAAO;AAAA,UACN,QAAQA,kBAAA,CAAE,MAAA,GAAS,QAAA,EAAS,CAAE,SAAS,sBAAsB,CAAA;AAAA,UAC7D,IAAIA,kBAAA,CAAE,MAAA,GAAS,QAAA,EAAS,CAAE,SAAS,kBAAkB;AAAA,SACtD,CAAA,CACA,QAAA,EAAS,CACT,SAAS,iCAAiC,CAAA;AAAA,QAC7C,MAAA,EAAQA,kBAAA,CACL,MAAA,EAAO,CACP,GAAA,CAAI,CAAC,CAAA,CACL,QAAA,EAAS,CACT,QAAA,CAAS,kCAAkC;AAAA,OAC/C,CAAA;AAAA,MACD,MAAM,OAAO,EAAE,KAAA,EAAO,MAAA,EAAQ,QAAO,KAAM;AACzC,QAAA,MAAM,OAAA,GAAU,MAAM,WAAA,CAAY,gBAAA;AAAA,UAChC,KAAA;AAAA,UACA,MAAA;AAAA,UACA;AAAA,SACF;AAEA,QAAA,IAAI,OAAA,CAAQ,WAAW,CAAA,EAAG;AACxB,UAAA,OAAO;AAAA,YACL,OAAA,EAAS;AAAA,WACX;AAAA,QACF;AAEA,QAAA,OAAO;AAAA,UACL,OAAA,EAAS,IAAA,CAAK,SAAA,CAAU,OAAO;AAAA,SACjC;AAAA,MACF;AAAA;AACF,GACD,CAAA;AAED,EAAA,OAAO,aAAA;AACT;;;;"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
const EMBEDDINGS_TABLE = 'embeddings';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
*
|
|
5
|
+
* @param {import('knex').knex} knex
|
|
6
|
+
*/
|
|
7
|
+
exports.down = async knex => {
|
|
8
|
+
await knex.schema.alterTable(EMBEDDINGS_TABLE, table => {
|
|
9
|
+
table.dropColumn('hash');
|
|
10
|
+
table.dropColumn('lastUpdated');
|
|
11
|
+
});
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
*
|
|
16
|
+
* @param {import('knex').knex} knex
|
|
17
|
+
*/
|
|
18
|
+
exports.up = async knex => {
|
|
19
|
+
await knex.schema.alterTable(EMBEDDINGS_TABLE, table => {
|
|
20
|
+
table.string('hash').comment('The content hash of the embedding document');
|
|
21
|
+
|
|
22
|
+
table
|
|
23
|
+
.timestamp('lastUpdated')
|
|
24
|
+
.defaultTo(knex.fn.now())
|
|
25
|
+
.comment('Timestamp of the last update to the embedding document');
|
|
26
|
+
});
|
|
27
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sweetoburrito/backstage-plugin-ai-assistant-backend",
|
|
3
|
-
"version": "0.0.0-snapshot-
|
|
3
|
+
"version": "0.0.0-snapshot-20260108124555",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"main": "dist/index.cjs.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -43,8 +43,8 @@
|
|
|
43
43
|
"@langchain/langgraph": "^0.4.9",
|
|
44
44
|
"@langchain/mcp-adapters": "^1.0.0",
|
|
45
45
|
"@langchain/textsplitters": "^0.1.0",
|
|
46
|
-
"@sweetoburrito/backstage-plugin-ai-assistant-common": "0.
|
|
47
|
-
"@sweetoburrito/backstage-plugin-ai-assistant-node": "0.0.0-snapshot-
|
|
46
|
+
"@sweetoburrito/backstage-plugin-ai-assistant-common": "^0.8.0",
|
|
47
|
+
"@sweetoburrito/backstage-plugin-ai-assistant-node": "0.0.0-snapshot-20260108124555",
|
|
48
48
|
"express": "^4.17.1",
|
|
49
49
|
"express-promise-router": "^4.1.0",
|
|
50
50
|
"knex": "^3.1.0",
|