npm - @appland/search - Versions diffs - 1.0.1 → 1.1.1 - Mend

@appland/search 1.0.1 → 1.1.1

Files changed (16) hide show

package/CHANGELOG.md +23 -0
package/README.md +76 -0
package/built/build-file-index.js +13 -5
package/built/build-snippet-index.js +8 -5
package/built/cli.js +10 -6
package/built/file-index.d.ts +21 -2
package/built/file-index.js +36 -10
package/built/index.d.ts +3 -1
package/built/index.js +9 -1
package/built/query-keywords.js +0 -1
package/built/session-id.d.ts +2 -0
package/built/session-id.js +7 -0
package/built/snippet-index.d.ts +38 -7
package/built/snippet-index.js +86 -24
package/built/splitter.js +3 -1
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,26 @@
+# [@appland/search-v1.1.1](https://github.com/getappmap/appmap-js/compare/@appland/search-v1.1.0...@appland/search-v1.1.1) (2024-12-18)
+### Bug Fixes
+* Extract complete chunk when splitting text ([75d2f5d](https://github.com/getappmap/appmap-js/commit/75d2f5df06c9794b772116c2facde366d5e1cd7d))
+# [@appland/search-v1.1.0](https://github.com/getappmap/appmap-js/compare/@appland/search-v1.0.1...@appland/search-v1.1.0) (2024-12-01)
+### Bug Fixes
+* Pass absolute path when loading file content ([85060bb](https://github.com/getappmap/appmap-js/commit/85060bb432fec9a1ee2d461fa671cb18b0f21fe6))
+* Search for 'code' ([d209727](https://github.com/getappmap/appmap-js/commit/d209727d4ec19d8027b1cb4eb36ed31a60d9eb21))
+### Features
+* Add session deletion ([9ccd947](https://github.com/getappmap/appmap-js/commit/9ccd947f110857d5d881a31bf0c947bb02f1f2c5))
+* Associate boost factor data with a session id ([7031193](https://github.com/getappmap/appmap-js/commit/70311932553adb0aca4ae7f6f11af23790921bdf))
+* Define and export SnippetId type ([8e3be79](https://github.com/getappmap/appmap-js/commit/8e3be7949c62a11ed1d57b1c88df2868aa3f10cd))
+* Search for AppMap data using @appland/search ([ac00047](https://github.com/getappmap/appmap-js/commit/ac0004717147a095f1fa609c2aa341dec6e6c7bc))
 # [@appland/search-v1.0.1](https://github.com/getappmap/appmap-js/compare/@appland/search-v1.0.0...@appland/search-v1.0.1) (2024-12-01)

package/README.md ADDED Viewed

@@ -0,0 +1,76 @@
+# `@appland/search`
+## session_id
+The `session id` parameter is an integral part of differentiating boost records in this system. Its
+intent is to make sure that the boost factors, which affect search results, remain specific to a
+particular search session and do not impact other concurrent search sessions.
+### Intent
+The main goal of introducing a `session id` is to:
+1. **Isolate Boost Factors**: By associating each boost record with a unique session id, different
+   sessions' boost factors are kept separate. This means that boosting done in one session doesn't
+   unintentionally affect another.
+2. **Maintain Contextual Relevance**: Boost factors should only influence the search results within
+   the scope of their intended search session. This ensures the search system remains contextually
+   aware and the results are relevant to the specific scenarios where the boosts were applied.
+### How It Works for Concurrency
+- **Storage**: When a boost factor is being stored in the system, it includes a `session id` in the
+  database schema for boost records. This ties the boost factor directly to the user's session that
+  triggered it.
+- **Filtering**: When search operations are performed, only boost factors associated with the
+  current session id are considered. This filtering ensures that only relevant boosts are applied to
+  the search results.
+- **Concurrent Use**: In environments where multiple users or sessions are interacting with the
+  search system simultaneously, the session id ensures that one session's boost factors don't spill
+  over and impact the results of another session. This isolation is crucial in multi-user systems
+  where search personalization is required.
+### Session Deletion
+The system provides a mechanism to delete all data associated with a specific session. This is
+achieved through the `deleteSession` method available in both the `FileIndex` and `SnippetIndex`
+classes. By invoking this method with a session id, all boost factors and related data tied to that
+session are removed from the database, ensuring that no residual data affects future search
+operations.
+### Entity-Relationship Diagram
+```mermaid
+erDiagram
+  FILE_CONTENT {
+    TEXT directory
+    TEXT file_path
+    TEXT file_symbols
+    TEXT file_words
+  }
+  FILE_BOOST {
+    TEXT session_id
+    TEXT file_path
+    REAL boost_factor
+  }
+  SNIPPET_CONTENT {
+    TEXT snippet_id
+    TEXT directory
+    TEXT file_symbols
+    TEXT file_words
+    TEXT content
+  }
+  SNIPPET_BOOST {
+    TEXT session_id
+    TEXT snippet_id
+    REAL boost_factor
+  }
+  FILE_CONTENT ||--|| FILE_BOOST : "Is boosted by"
+  SNIPPET_CONTENT ||--|| SNIPPET_BOOST : "Is boosted by"
+```

package/built/build-file-index.js CHANGED Viewed

@@ -7,6 +7,7 @@ exports.default = buildFileIndex;
 const debug_1 = __importDefault(require("debug"));
 const path_1 = require("path");
 const console_1 = require("console");
+const types_1 = require("util/types");
 const debug = (0, debug_1.default)('appmap:search:build-index');
 async function indexFile(context, filePath) {
     debug('Indexing file: %s', filePath);
@@ -26,13 +27,20 @@ async function indexDirectory(context, directory) {
     if (!dirContents)
         return;
     for (const dirContentItem of dirContents) {
-        const filePath = (0, path_1.join)(directory, dirContentItem);
+        let filePath;
+        if ((0, path_1.isAbsolute)(dirContentItem))
+            filePath = dirContentItem;
+        else
+            filePath = (0, path_1.join)(directory, dirContentItem);
         debug('Indexing: %s', filePath);
         if (await context.fileFilter(filePath)) {
-            indexFile(context, filePath).catch((e) => {
-                (0, console_1.warn)(`Error indexing file: ${filePath}`);
-                (0, console_1.warn)(e);
-            });
+            try {
+                await indexFile(context, filePath);
+            }
+            catch (e) {
+                const message = (0, types_1.isNativeError)(e) ? e.message : String(e);
+                (0, console_1.warn)(`Error indexing file ${filePath}: ${message}`);
+            }
         }
     }
 }

package/built/build-snippet-index.js CHANGED Viewed

@@ -1,16 +1,19 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.default = buildSnippetIndex;
+const path_1 = require("path");
+const snippet_index_1 = require("./snippet-index");
 async function indexFile(context, file) {
-    const fileContent = await context.contentReader(file.filePath);
+    const filePath = (0, path_1.isAbsolute)(file.filePath) ? file.filePath : (0, path_1.join)(file.directory, file.filePath);
+    const fileContent = await context.contentReader(filePath);
     if (!fileContent)
         return;
     const extension = file.filePath.split('.').pop() || '';
     const chunks = await context.splitter(fileContent, extension);
-    chunks.forEach((chunk, index) => {
-        const snippetId = `${file.filePath}:${index}`;
-        const { content, startLine, endLine } = chunk;
-        context.snippetIndex.indexSnippet(snippetId, file.directory, file.filePath, startLine, endLine, context.tokenizer(content, file.filePath).symbols.join(' '), context.tokenizer(content, file.filePath).words.join(' '), content);
+    chunks.forEach((chunk) => {
+        const { content, startLine } = chunk;
+        const snippetId = (0, snippet_index_1.fileChunkSnippetId)(filePath, startLine);
+        context.snippetIndex.indexSnippet(snippetId, file.directory, context.tokenizer(content, file.filePath).symbols.join(' '), context.tokenizer(content, file.filePath).words.join(' '), content);
     });
 }
 async function buildSnippetIndex(snippetIndex, files, contentReader, splitter, tokenizer) {

package/built/cli.js CHANGED Viewed

@@ -17,6 +17,7 @@ const build_snippet_index_1 = __importDefault(require("./build-snippet-index"));
 const ioutil_1 = require("./ioutil");
 const splitter_1 = require("./splitter");
 const assert_1 = __importDefault(require("assert"));
+const session_id_1 = require("./session-id");
 const debug = (0, debug_1.default)('appmap:search:cli');
 const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
     .command('* <query>', 'Index directories and perform a search', (yargs) => {
@@ -58,6 +59,7 @@ const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
     };
     const db = new better_sqlite3_1.default(':memory:');
     const fileIndex = new file_index_1.default(db);
+    const sessionId = (0, session_id_1.generateSessionId)();
     await (0, build_file_index_1.default)(fileIndex, directories, project_files_1.default, fileFilter, ioutil_1.readFileSafe, tokenize_1.fileTokens);
     const filePathAtMostThreeEntries = (filePath) => {
         const parts = filePath.split('/');
@@ -65,13 +67,13 @@ const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
             return filePath;
         return `.../${parts.slice(-3).join('/')}`;
     };
-    const printResult = (filePath, score) => console.log('%s   %s', filePathAtMostThreeEntries(filePath), score.toPrecision(3));
+    const printResult = (type, id, score) => console.log('%s %s   %s', type, filePathAtMostThreeEntries(id), score.toPrecision(3));
     console.log('File search results');
     console.log('-------------------');
-    const fileSearchResults = fileIndex.search(query);
+    const fileSearchResults = fileIndex.search(sessionId, query);
     for (const result of fileSearchResults) {
         const { filePath, score } = result;
-        printResult(filePath, score);
+        printResult('file', filePath, score);
     }
     const splitter = splitter_1.langchainSplitter;
     const snippetIndex = new snippet_index_1.default(db);
@@ -80,10 +82,12 @@ const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
     console.log('Snippet search results');
     console.log('----------------------');
     const isNullOrUndefined = (value) => value === null || value === undefined;
-    const snippetSearchResults = snippetIndex.searchSnippets(query);
+    const snippetSearchResults = snippetIndex.searchSnippets(sessionId, query);
     for (const result of snippetSearchResults) {
-        const { snippetId, filePath, startLine, endLine, score } = result;
-        printResult(snippetId, score);
+        const { snippetId, score } = result;
+        printResult(snippetId.type, snippetId.id, score);
+        const [filePath, range] = snippetId.id.split(':');
+        const [startLine, endLine] = range.split('-').map((n) => parseInt(n, 10));
         if (isNullOrUndefined(startLine) || isNullOrUndefined(endLine))
             continue;
         const content = await (0, ioutil_1.readFileSafe)(filePath);

package/built/file-index.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import sqlite3 from 'better-sqlite3';
+import { SessionId } from './session-id';
 export type FileSearchResult = {
     directory: string;
     filePath: string;
@@ -23,7 +24,25 @@ export default class FileIndex {
     database: sqlite3.Database;
     constructor(database: sqlite3.Database);
     indexFile(directory: string, filePath: string, symbols: string, words: string): void;
-    boostFile(filePath: string, boostFactor: number): void;
-    search(query: string, limit?: number): FileSearchResult[];
+    /**
+     * Boosts the relevance score of a specific file for a given session.
+     * @param sessionId - The session identifier to associate the boost with.
+     * @param filePath - The path of the file to boost.
+     * @param boostFactor - The factor by which to boost the file's relevance.
+     */
+    boostFile(sessionId: SessionId, filePath: string, boostFactor: number): void;
+    /**
+     * Deletes all data associated with a specific session.
+     * @param sessionId - The session identifier to delete data for.
+     */
+    deleteSession(sessionId: string): void;
+    /**
+     * Searches for files matching the query, considering session-specific boosts.
+     * @param sessionId - The session identifier to apply during the search.
+     * @param query - The search query string.
+     * @param limit - The maximum number of results to return.
+     * @returns An array of search results with directory, file path, and score.
+     */
+    search(sessionId: SessionId, query: string, limit?: number): FileSearchResult[];
     close(): void;
 }

package/built/file-index.js CHANGED Viewed

@@ -10,7 +10,7 @@ var __classPrivateFieldGet = (this && this.__classPrivateFieldGet) || function (
     if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
     return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
 };
-var _FileIndex_insert, _FileIndex_updateBoost, _FileIndex_search;
+var _FileIndex_insert, _FileIndex_updateBoost, _FileIndex_deleteSession, _FileIndex_search;
 Object.defineProperty(exports, "__esModule", { value: true });
 const CREATE_TABLE_SQL = `CREATE VIRTUAL TABLE file_content USING fts5(
   directory UNINDEXED,
@@ -20,13 +20,16 @@ const CREATE_TABLE_SQL = `CREATE VIRTUAL TABLE file_content USING fts5(
   tokenize = 'porter unicode61'
 )`;
 const CREATE_BOOST_TABLE_SQL = `CREATE TABLE file_boost (
-  file_path TEXT PRIMARY KEY,
-  boost_factor REAL
+  session_id TEXT,
+  file_path TEXT,
+  boost_factor REAL,
+  PRIMARY KEY (session_id, file_path)
 )`;
 const INSERT_SQL = `INSERT INTO file_content (directory, file_path, file_symbols, file_words)
 VALUES (?, ?, ?, ?)`;
-const UPDATE_BOOST_SQL = `INSERT OR REPLACE INTO file_boost (file_path, boost_factor)
-VALUES (?, ?)`;
+const UPDATE_BOOST_SQL = `INSERT OR REPLACE INTO file_boost (session_id, file_path, boost_factor)
+VALUES (?, ?, ?)`;
+const DELETE_SESSION_SQL = `DELETE FROM file_boost WHERE session_id LIKE ?`;
 const SEARCH_SQL = `SELECT
     file_content.directory,
     file_content.file_path,
@@ -39,6 +42,7 @@ LEFT JOIN
     file_boost
 ON
     file_content.file_path = file_boost.file_path
+    AND file_boost.session_id = ?
 WHERE
     file_content MATCH ?
 ORDER BY
@@ -65,6 +69,7 @@ class FileIndex {
         this.database = database;
         _FileIndex_insert.set(this, void 0);
         _FileIndex_updateBoost.set(this, void 0);
+        _FileIndex_deleteSession.set(this, void 0);
         _FileIndex_search.set(this, void 0);
         this.database.exec(CREATE_TABLE_SQL);
         this.database.exec(CREATE_BOOST_TABLE_SQL);
@@ -72,16 +77,37 @@ class FileIndex {
         this.database.pragma('synchronous = OFF');
         __classPrivateFieldSet(this, _FileIndex_insert, this.database.prepare(INSERT_SQL), "f");
         __classPrivateFieldSet(this, _FileIndex_updateBoost, this.database.prepare(UPDATE_BOOST_SQL), "f");
+        __classPrivateFieldSet(this, _FileIndex_deleteSession, this.database.prepare(DELETE_SESSION_SQL), "f");
         __classPrivateFieldSet(this, _FileIndex_search, this.database.prepare(SEARCH_SQL), "f");
     }
     indexFile(directory, filePath, symbols, words) {
         __classPrivateFieldGet(this, _FileIndex_insert, "f").run(directory, filePath, symbols, words);
     }
-    boostFile(filePath, boostFactor) {
-        __classPrivateFieldGet(this, _FileIndex_updateBoost, "f").run(filePath, boostFactor);
+    /**
+     * Boosts the relevance score of a specific file for a given session.
+     * @param sessionId - The session identifier to associate the boost with.
+     * @param filePath - The path of the file to boost.
+     * @param boostFactor - The factor by which to boost the file's relevance.
+     */
+    boostFile(sessionId, filePath, boostFactor) {
+        __classPrivateFieldGet(this, _FileIndex_updateBoost, "f").run(sessionId, filePath, boostFactor);
     }
-    search(query, limit = 10) {
-        const rows = __classPrivateFieldGet(this, _FileIndex_search, "f").all(query, limit);
+    /**
+     * Deletes all data associated with a specific session.
+     * @param sessionId - The session identifier to delete data for.
+     */
+    deleteSession(sessionId) {
+        __classPrivateFieldGet(this, _FileIndex_deleteSession, "f").run(sessionId);
+    }
+    /**
+     * Searches for files matching the query, considering session-specific boosts.
+     * @param sessionId - The session identifier to apply during the search.
+     * @param query - The search query string.
+     * @param limit - The maximum number of results to return.
+     * @returns An array of search results with directory, file path, and score.
+     */
+    search(sessionId, query, limit = 10) {
+        const rows = __classPrivateFieldGet(this, _FileIndex_search, "f").all(sessionId, query, limit);
         return rows.map((row) => ({
             directory: row.directory,
             filePath: row.file_path,
@@ -92,5 +118,5 @@ class FileIndex {
         this.database.close();
     }
 }
-_FileIndex_insert = new WeakMap(), _FileIndex_updateBoost = new WeakMap(), _FileIndex_search = new WeakMap();
+_FileIndex_insert = new WeakMap(), _FileIndex_updateBoost = new WeakMap(), _FileIndex_deleteSession = new WeakMap(), _FileIndex_search = new WeakMap();
 exports.default = FileIndex;

package/built/index.d.ts CHANGED Viewed

@@ -1,9 +1,11 @@
 export { ContentReader, readFileSafe } from './ioutil';
+export { SessionId, generateSessionId } from './session-id';
 export { Splitter, langchainSplitter } from './splitter';
 export { ListFn, FilterFn, Tokenizer, default as buildFileIndex } from './build-file-index';
 export { File, default as buildSnippetIndex } from './build-snippet-index';
-export { default as SnippetIndex, SnippetSearchResult } from './snippet-index';
+export { default as SnippetIndex, SnippetSearchResult, SnippetId, encodeSnippetId, parseSnippetId, fileChunkSnippetId, parseFileChunkSnippetId, } from './snippet-index';
 export { default as FileIndex, FileSearchResult } from './file-index';
 export { default as listProjectFiles } from './project-files';
 export { isBinaryFile, isDataFile, isLargeFile } from './file-type';
 export { fileTokens } from './tokenize';
+export { default as queryKeywords } from './query-keywords';

package/built/index.js CHANGED Viewed

@@ -3,9 +3,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.fileTokens = exports.isLargeFile = exports.isDataFile = exports.isBinaryFile = exports.listProjectFiles = exports.FileIndex = exports.SnippetIndex = exports.buildSnippetIndex = exports.buildFileIndex = exports.langchainSplitter = exports.readFileSafe = void 0;
+exports.queryKeywords = exports.fileTokens = exports.isLargeFile = exports.isDataFile = exports.isBinaryFile = exports.listProjectFiles = exports.FileIndex = exports.parseFileChunkSnippetId = exports.fileChunkSnippetId = exports.parseSnippetId = exports.encodeSnippetId = exports.SnippetIndex = exports.buildSnippetIndex = exports.buildFileIndex = exports.langchainSplitter = exports.generateSessionId = exports.readFileSafe = void 0;
 var ioutil_1 = require("./ioutil");
 Object.defineProperty(exports, "readFileSafe", { enumerable: true, get: function () { return ioutil_1.readFileSafe; } });
+var session_id_1 = require("./session-id");
+Object.defineProperty(exports, "generateSessionId", { enumerable: true, get: function () { return session_id_1.generateSessionId; } });
 var splitter_1 = require("./splitter");
 Object.defineProperty(exports, "langchainSplitter", { enumerable: true, get: function () { return splitter_1.langchainSplitter; } });
 var build_file_index_1 = require("./build-file-index");
@@ -14,6 +16,10 @@ var build_snippet_index_1 = require("./build-snippet-index");
 Object.defineProperty(exports, "buildSnippetIndex", { enumerable: true, get: function () { return __importDefault(build_snippet_index_1).default; } });
 var snippet_index_1 = require("./snippet-index");
 Object.defineProperty(exports, "SnippetIndex", { enumerable: true, get: function () { return __importDefault(snippet_index_1).default; } });
+Object.defineProperty(exports, "encodeSnippetId", { enumerable: true, get: function () { return snippet_index_1.encodeSnippetId; } });
+Object.defineProperty(exports, "parseSnippetId", { enumerable: true, get: function () { return snippet_index_1.parseSnippetId; } });
+Object.defineProperty(exports, "fileChunkSnippetId", { enumerable: true, get: function () { return snippet_index_1.fileChunkSnippetId; } });
+Object.defineProperty(exports, "parseFileChunkSnippetId", { enumerable: true, get: function () { return snippet_index_1.parseFileChunkSnippetId; } });
 var file_index_1 = require("./file-index");
 Object.defineProperty(exports, "FileIndex", { enumerable: true, get: function () { return __importDefault(file_index_1).default; } });
 var project_files_1 = require("./project-files");
@@ -24,3 +30,5 @@ Object.defineProperty(exports, "isDataFile", { enumerable: true, get: function (
 Object.defineProperty(exports, "isLargeFile", { enumerable: true, get: function () { return file_type_1.isLargeFile; } });
 var tokenize_1 = require("./tokenize");
 Object.defineProperty(exports, "fileTokens", { enumerable: true, get: function () { return tokenize_1.fileTokens; } });
+var query_keywords_1 = require("./query-keywords");
+Object.defineProperty(exports, "queryKeywords", { enumerable: true, get: function () { return __importDefault(query_keywords_1).default; } });

package/built/query-keywords.js CHANGED Viewed

@@ -11,7 +11,6 @@ const STOP_WORDS = new Set([
     'at',
     'be',
     'by',
-    'code',
     'for',
     'from',
     'has',

package/built/session-id.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export type SessionId = string;
2	+ export declare function generateSessionId(): SessionId;

package/built/session-id.js ADDED Viewed

@@ -0,0 +1,7 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.generateSessionId = generateSessionId;
+const uuid_1 = require("uuid");
+function generateSessionId() {
+    return (0, uuid_1.v4)();
+}

package/built/snippet-index.d.ts CHANGED Viewed

@@ -1,10 +1,22 @@
 import sqlite3 from 'better-sqlite3';
+import { SessionId } from './session-id';
+export declare enum SnippetType {
+    FileChunk = "file-chunk"
+}
+export type SnippetId = {
+    type: string;
+    id: string;
+};
+export declare function fileChunkSnippetId(filePath: string, startLine?: number): SnippetId;
+export declare function parseFileChunkSnippetId(snippetId: SnippetId): {
+    filePath: string;
+    startLine?: number;
+};
+export declare function encodeSnippetId(snippetId: SnippetId): string;
+export declare function parseSnippetId(snippetId: string): SnippetId;
 export type SnippetSearchResult = {
-    snippetId: string;
+    snippetId: SnippetId;
     directory: string;
-    filePath: string;
-    startLine: number | undefined;
-    endLine: number | undefined;
     score: number;
     content: string;
 };
@@ -12,8 +24,27 @@ export default class SnippetIndex {
     #private;
     database: sqlite3.Database;
     constructor(database: sqlite3.Database);
-    indexSnippet(snippetId: string, directory: string, filePath: string, startLine: number | undefined, endLine: number | undefined, symbols: string, words: string, content: string): void;
-    boostSnippet(snippetId: string, boostFactor: number): void;
-    searchSnippets(query: string, limit?: number): SnippetSearchResult[];
+    /**
+     * Deletes all data associated with a specific session.
+     * @param sessionId - The session identifier to delete data for.
+     */
+    deleteSession(sessionId: string): void;
+    /**
+     * Indexes a code snippet for searchability.
+     * @param snippetId - The unique identifier for the snippet.
+     * @param directory - The directory where the snippet is located.
+     * @param symbols - Symbols (e.g., class names) in the snippet.
+     * @param words - General words in the snippet.
+     * @param content - The actual content of the snippet.
+     */
+    indexSnippet(snippetId: SnippetId, directory: string, symbols: string, words: string, content: string): void;
+    /**
+     * Boosts the relevance score of a specific snippet for a given session.
+     * @param sessionId - The session identifier to associate the boost with.
+     * @param snippetId - The identifier of the snippet to boost.
+     * @param boostFactor - The factor by which to boost the snippet's relevance.
+     */
+    boostSnippet(sessionId: SessionId, snippetId: SnippetId, boostFactor: number): void;
+    searchSnippets(sessionId: SessionId, query: string, limit?: number): SnippetSearchResult[];
     close(): void;
 }

package/built/snippet-index.js CHANGED Viewed

@@ -10,34 +10,40 @@ var __classPrivateFieldGet = (this && this.__classPrivateFieldGet) || function (
     if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
     return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
 };
-var _SnippetIndex_insertSnippet, _SnippetIndex_updateSnippetBoost, _SnippetIndex_searchSnippet;
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+var _SnippetIndex_insertSnippet, _SnippetIndex_updateSnippetBoost, _SnippetIndex_deleteSession, _SnippetIndex_searchSnippet;
 Object.defineProperty(exports, "__esModule", { value: true });
+exports.SnippetType = void 0;
+exports.fileChunkSnippetId = fileChunkSnippetId;
+exports.parseFileChunkSnippetId = parseFileChunkSnippetId;
+exports.encodeSnippetId = encodeSnippetId;
+exports.parseSnippetId = parseSnippetId;
+const assert_1 = __importDefault(require("assert"));
 const CREATE_SNIPPET_CONTENT_TABLE_SQL = `CREATE VIRTUAL TABLE snippet_content USING fts5(
   snippet_id UNINDEXED,
   directory UNINDEXED,
-  file_path,
-  start_line UNINDEXED,
-  end_line UNINDEXED,
   file_symbols,
   file_words,
   content UNINDEXED,
   tokenize = 'porter unicode61'
 )`;
 const CREATE_SNIPPET_BOOST_TABLE_SQL = `CREATE TABLE snippet_boost (
-  snippet_id TEXT PRIMARY KEY,
-  boost_factor REAL
+  session_id TEXT,
+  snippet_id TEXT,
+  boost_factor REAL,
+  PRIMARY KEY (session_id, snippet_id)
 )`;
 const INSERT_SNIPPET_SQL = `INSERT INTO snippet_content
-(snippet_id, directory, file_path, start_line, end_line, file_symbols, file_words, content)
-VALUES (?, ?, ?, ?, ?, ?, ?, ?)`;
+(snippet_id, directory, file_symbols, file_words, content)
+VALUES (?, ?, ?, ?, ?)`;
+const DELETE_SESSION_SQL = `DELETE FROM snippet_boost WHERE session_id LIKE ?`;
 const UPDATE_SNIPPET_BOOST_SQL = `INSERT OR REPLACE INTO snippet_boost
-(snippet_id, boost_factor)
-VALUES (?, ?)`;
+(session_id, snippet_id, boost_factor)
+VALUES (?, ?, ?)`;
 const SEARCH_SNIPPET_SQL = `SELECT
   snippet_content.directory,
-  snippet_content.file_path,
-  snippet_content.start_line,
-  snippet_content.end_line,
   snippet_content.snippet_id,
   snippet_content.content,
   (bm25(snippet_content, 1)*3.0 + bm25(snippet_content, 2)*2.0 + bm25(snippet_content, 3)*1.0)
@@ -49,39 +55,95 @@ LEFT JOIN
   snippet_boost
 ON
   snippet_content.snippet_id = snippet_boost.snippet_id
+  AND snippet_boost.session_id = ?
 WHERE
   snippet_content MATCH ?
 ORDER BY
   score DESC
 LIMIT ?`;
+var SnippetType;
+(function (SnippetType) {
+    SnippetType["FileChunk"] = "file-chunk";
+})(SnippetType || (exports.SnippetType = SnippetType = {}));
+function fileChunkSnippetId(filePath, startLine) {
+    return {
+        type: 'file-chunk',
+        id: [filePath, startLine].filter(Boolean).join(':'),
+    };
+}
+function parseFileChunkSnippetId(snippetId) {
+    const type = snippetId.type;
+    (0, assert_1.default)(type === SnippetType.FileChunk);
+    const parts = snippetId.id.split(':');
+    const filePath = parts.shift();
+    (0, assert_1.default)(filePath);
+    const startLine = parts.shift();
+    return {
+        filePath: filePath,
+        startLine: startLine ? parseInt(startLine, 10) : undefined,
+    };
+}
+function encodeSnippetId(snippetId) {
+    return [snippetId.type, snippetId.id].join(':');
+}
+function parseSnippetId(snippetId) {
+    const parts = snippetId.split(':');
+    const type = parts.shift();
+    (0, assert_1.default)(type);
+    const id = parts.join(':');
+    return {
+        type,
+        id,
+    };
+}
 class SnippetIndex {
     constructor(database) {
         this.database = database;
         _SnippetIndex_insertSnippet.set(this, void 0);
         _SnippetIndex_updateSnippetBoost.set(this, void 0);
+        _SnippetIndex_deleteSession.set(this, void 0);
         _SnippetIndex_searchSnippet.set(this, void 0);
         this.database.exec(CREATE_SNIPPET_CONTENT_TABLE_SQL);
         this.database.exec(CREATE_SNIPPET_BOOST_TABLE_SQL);
         this.database.pragma('journal_mode = OFF');
         this.database.pragma('synchronous = OFF');
         __classPrivateFieldSet(this, _SnippetIndex_insertSnippet, this.database.prepare(INSERT_SNIPPET_SQL), "f");
+        __classPrivateFieldSet(this, _SnippetIndex_deleteSession, this.database.prepare(DELETE_SESSION_SQL), "f");
         __classPrivateFieldSet(this, _SnippetIndex_updateSnippetBoost, this.database.prepare(UPDATE_SNIPPET_BOOST_SQL), "f");
         __classPrivateFieldSet(this, _SnippetIndex_searchSnippet, this.database.prepare(SEARCH_SNIPPET_SQL), "f");
     }
-    indexSnippet(snippetId, directory, filePath, startLine, endLine, symbols, words, content) {
-        __classPrivateFieldGet(this, _SnippetIndex_insertSnippet, "f").run(snippetId, directory, filePath, startLine, endLine, symbols, words, content);
+    /**
+     * Deletes all data associated with a specific session.
+     * @param sessionId - The session identifier to delete data for.
+     */
+    deleteSession(sessionId) {
+        __classPrivateFieldGet(this, _SnippetIndex_deleteSession, "f").run(sessionId);
+    }
+    /**
+     * Indexes a code snippet for searchability.
+     * @param snippetId - The unique identifier for the snippet.
+     * @param directory - The directory where the snippet is located.
+     * @param symbols - Symbols (e.g., class names) in the snippet.
+     * @param words - General words in the snippet.
+     * @param content - The actual content of the snippet.
+     */
+    indexSnippet(snippetId, directory, symbols, words, content) {
+        __classPrivateFieldGet(this, _SnippetIndex_insertSnippet, "f").run(encodeSnippetId(snippetId), directory, symbols, words, content);
     }
-    boostSnippet(snippetId, boostFactor) {
-        __classPrivateFieldGet(this, _SnippetIndex_updateSnippetBoost, "f").run(snippetId, boostFactor);
+    /**
+     * Boosts the relevance score of a specific snippet for a given session.
+     * @param sessionId - The session identifier to associate the boost with.
+     * @param snippetId - The identifier of the snippet to boost.
+     * @param boostFactor - The factor by which to boost the snippet's relevance.
+     */
+    boostSnippet(sessionId, snippetId, boostFactor) {
+        __classPrivateFieldGet(this, _SnippetIndex_updateSnippetBoost, "f").run(sessionId, encodeSnippetId(snippetId), boostFactor);
     }
-    searchSnippets(query, limit = 10) {
-        const rows = __classPrivateFieldGet(this, _SnippetIndex_searchSnippet, "f").all(query, limit);
+    searchSnippets(sessionId, query, limit = 10) {
+        const rows = __classPrivateFieldGet(this, _SnippetIndex_searchSnippet, "f").all(sessionId, query, limit);
         return rows.map((row) => ({
             directory: row.directory,
-            snippetId: row.snippet_id,
-            filePath: row.file_path,
-            startLine: row.start_line,
-            endLine: row.end_line,
+            snippetId: parseSnippetId(row.snippet_id),
             score: row.score,
             content: row.content,
         }));
@@ -90,5 +152,5 @@ class SnippetIndex {
         this.database.close();
     }
 }
-_SnippetIndex_insertSnippet = new WeakMap(), _SnippetIndex_updateSnippetBoost = new WeakMap(), _SnippetIndex_searchSnippet = new WeakMap();
+_SnippetIndex_insertSnippet = new WeakMap(), _SnippetIndex_updateSnippetBoost = new WeakMap(), _SnippetIndex_deleteSession = new WeakMap(), _SnippetIndex_searchSnippet = new WeakMap();
 exports.default = SnippetIndex;

package/built/splitter.js CHANGED Viewed

@@ -42,9 +42,11 @@ async function langchainSplitter(content, fileExtension) {
         const loc = doc.metadata?.loc;
         const lines = loc?.lines;
         const result = {
-            content: doc.pageContent,
+            content: '',
         };
         if (lines) {
+            const contentLines = content.split('\n');
+            result.content = contentLines.slice(lines.from - 1, lines.to).join('\n');
             result.startLine = lines.from;
             result.endLine = lines.to;
         }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@appland/search",
-  "version": "1.0.1",
+  "version": "1.1.1",
   "description": "",
   "bin": "built/cli.js",
   "publishConfig": {