@appland/search 1.0.1 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,26 @@
1
+ # [@appland/search-v1.1.1](https://github.com/getappmap/appmap-js/compare/@appland/search-v1.1.0...@appland/search-v1.1.1) (2024-12-18)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * Extract complete chunk when splitting text ([75d2f5d](https://github.com/getappmap/appmap-js/commit/75d2f5df06c9794b772116c2facde366d5e1cd7d))
7
+
8
+ # [@appland/search-v1.1.0](https://github.com/getappmap/appmap-js/compare/@appland/search-v1.0.1...@appland/search-v1.1.0) (2024-12-01)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * Pass absolute path when loading file content ([85060bb](https://github.com/getappmap/appmap-js/commit/85060bb432fec9a1ee2d461fa671cb18b0f21fe6))
14
+ * Search for 'code' ([d209727](https://github.com/getappmap/appmap-js/commit/d209727d4ec19d8027b1cb4eb36ed31a60d9eb21))
15
+
16
+
17
+ ### Features
18
+
19
+ * Add session deletion ([9ccd947](https://github.com/getappmap/appmap-js/commit/9ccd947f110857d5d881a31bf0c947bb02f1f2c5))
20
+ * Associate boost factor data with a session id ([7031193](https://github.com/getappmap/appmap-js/commit/70311932553adb0aca4ae7f6f11af23790921bdf))
21
+ * Define and export SnippetId type ([8e3be79](https://github.com/getappmap/appmap-js/commit/8e3be7949c62a11ed1d57b1c88df2868aa3f10cd))
22
+ * Search for AppMap data using @appland/search ([ac00047](https://github.com/getappmap/appmap-js/commit/ac0004717147a095f1fa609c2aa341dec6e6c7bc))
23
+
1
24
  # [@appland/search-v1.0.1](https://github.com/getappmap/appmap-js/compare/@appland/search-v1.0.0...@appland/search-v1.0.1) (2024-12-01)
2
25
 
3
26
 
package/README.md ADDED
@@ -0,0 +1,76 @@
1
+ # `@appland/search`
2
+
3
+ ## session_id
4
+
5
+ The `session id` parameter is an integral part of differentiating boost records in this system. Its
6
+ intent is to make sure that the boost factors, which affect search results, remain specific to a
7
+ particular search session and do not impact other concurrent search sessions.
8
+
9
+ ### Intent
10
+
11
+ The main goal of introducing a `session id` is to:
12
+
13
+ 1. **Isolate Boost Factors**: By associating each boost record with a unique session id, different
14
+ sessions' boost factors are kept separate. This means that boosting done in one session doesn't
15
+ unintentionally affect another.
16
+
17
+ 2. **Maintain Contextual Relevance**: Boost factors should only influence the search results within
18
+ the scope of their intended search session. This ensures the search system remains contextually
19
+ aware and the results are relevant to the specific scenarios where the boosts were applied.
20
+
21
+ ### How It Works for Concurrency
22
+
23
+ - **Storage**: When a boost factor is being stored in the system, it includes a `session id` in the
24
+ database schema for boost records. This ties the boost factor directly to the user's session that
25
+ triggered it.
26
+
27
+ - **Filtering**: When search operations are performed, only boost factors associated with the
28
+ current session id are considered. This filtering ensures that only relevant boosts are applied to
29
+ the search results.
30
+
31
+ - **Concurrent Use**: In environments where multiple users or sessions are interacting with the
32
+ search system simultaneously, the session id ensures that one session's boost factors don't spill
33
+ over and impact the results of another session. This isolation is crucial in multi-user systems
34
+ where search personalization is required.
35
+
36
+ ### Session Deletion
37
+
38
+ The system provides a mechanism to delete all data associated with a specific session. This is
39
+ achieved through the `deleteSession` method available in both the `FileIndex` and `SnippetIndex`
40
+ classes. By invoking this method with a session id, all boost factors and related data tied to that
41
+ session are removed from the database, ensuring that no residual data affects future search
42
+ operations.
43
+
44
+ ### Entity-Relationship Diagram
45
+
46
+ ```mermaid
47
+ erDiagram
48
+ FILE_CONTENT {
49
+ TEXT directory
50
+ TEXT file_path
51
+ TEXT file_symbols
52
+ TEXT file_words
53
+ }
54
+ FILE_BOOST {
55
+ TEXT session_id
56
+ TEXT file_path
57
+ REAL boost_factor
58
+ }
59
+
60
+
61
+ SNIPPET_CONTENT {
62
+ TEXT snippet_id
63
+ TEXT directory
64
+ TEXT file_symbols
65
+ TEXT file_words
66
+ TEXT content
67
+ }
68
+ SNIPPET_BOOST {
69
+ TEXT session_id
70
+ TEXT snippet_id
71
+ REAL boost_factor
72
+ }
73
+
74
+ FILE_CONTENT ||--|| FILE_BOOST : "Is boosted by"
75
+ SNIPPET_CONTENT ||--|| SNIPPET_BOOST : "Is boosted by"
76
+ ```
@@ -7,6 +7,7 @@ exports.default = buildFileIndex;
7
7
  const debug_1 = __importDefault(require("debug"));
8
8
  const path_1 = require("path");
9
9
  const console_1 = require("console");
10
+ const types_1 = require("util/types");
10
11
  const debug = (0, debug_1.default)('appmap:search:build-index');
11
12
  async function indexFile(context, filePath) {
12
13
  debug('Indexing file: %s', filePath);
@@ -26,13 +27,20 @@ async function indexDirectory(context, directory) {
26
27
  if (!dirContents)
27
28
  return;
28
29
  for (const dirContentItem of dirContents) {
29
- const filePath = (0, path_1.join)(directory, dirContentItem);
30
+ let filePath;
31
+ if ((0, path_1.isAbsolute)(dirContentItem))
32
+ filePath = dirContentItem;
33
+ else
34
+ filePath = (0, path_1.join)(directory, dirContentItem);
30
35
  debug('Indexing: %s', filePath);
31
36
  if (await context.fileFilter(filePath)) {
32
- indexFile(context, filePath).catch((e) => {
33
- (0, console_1.warn)(`Error indexing file: ${filePath}`);
34
- (0, console_1.warn)(e);
35
- });
37
+ try {
38
+ await indexFile(context, filePath);
39
+ }
40
+ catch (e) {
41
+ const message = (0, types_1.isNativeError)(e) ? e.message : String(e);
42
+ (0, console_1.warn)(`Error indexing file ${filePath}: ${message}`);
43
+ }
36
44
  }
37
45
  }
38
46
  }
@@ -1,16 +1,19 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.default = buildSnippetIndex;
4
+ const path_1 = require("path");
5
+ const snippet_index_1 = require("./snippet-index");
4
6
  async function indexFile(context, file) {
5
- const fileContent = await context.contentReader(file.filePath);
7
+ const filePath = (0, path_1.isAbsolute)(file.filePath) ? file.filePath : (0, path_1.join)(file.directory, file.filePath);
8
+ const fileContent = await context.contentReader(filePath);
6
9
  if (!fileContent)
7
10
  return;
8
11
  const extension = file.filePath.split('.').pop() || '';
9
12
  const chunks = await context.splitter(fileContent, extension);
10
- chunks.forEach((chunk, index) => {
11
- const snippetId = `${file.filePath}:${index}`;
12
- const { content, startLine, endLine } = chunk;
13
- context.snippetIndex.indexSnippet(snippetId, file.directory, file.filePath, startLine, endLine, context.tokenizer(content, file.filePath).symbols.join(' '), context.tokenizer(content, file.filePath).words.join(' '), content);
13
+ chunks.forEach((chunk) => {
14
+ const { content, startLine } = chunk;
15
+ const snippetId = (0, snippet_index_1.fileChunkSnippetId)(filePath, startLine);
16
+ context.snippetIndex.indexSnippet(snippetId, file.directory, context.tokenizer(content, file.filePath).symbols.join(' '), context.tokenizer(content, file.filePath).words.join(' '), content);
14
17
  });
15
18
  }
16
19
  async function buildSnippetIndex(snippetIndex, files, contentReader, splitter, tokenizer) {
package/built/cli.js CHANGED
@@ -17,6 +17,7 @@ const build_snippet_index_1 = __importDefault(require("./build-snippet-index"));
17
17
  const ioutil_1 = require("./ioutil");
18
18
  const splitter_1 = require("./splitter");
19
19
  const assert_1 = __importDefault(require("assert"));
20
+ const session_id_1 = require("./session-id");
20
21
  const debug = (0, debug_1.default)('appmap:search:cli');
21
22
  const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
22
23
  .command('* <query>', 'Index directories and perform a search', (yargs) => {
@@ -58,6 +59,7 @@ const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
58
59
  };
59
60
  const db = new better_sqlite3_1.default(':memory:');
60
61
  const fileIndex = new file_index_1.default(db);
62
+ const sessionId = (0, session_id_1.generateSessionId)();
61
63
  await (0, build_file_index_1.default)(fileIndex, directories, project_files_1.default, fileFilter, ioutil_1.readFileSafe, tokenize_1.fileTokens);
62
64
  const filePathAtMostThreeEntries = (filePath) => {
63
65
  const parts = filePath.split('/');
@@ -65,13 +67,13 @@ const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
65
67
  return filePath;
66
68
  return `.../${parts.slice(-3).join('/')}`;
67
69
  };
68
- const printResult = (filePath, score) => console.log('%s %s', filePathAtMostThreeEntries(filePath), score.toPrecision(3));
70
+ const printResult = (type, id, score) => console.log('%s %s %s', type, filePathAtMostThreeEntries(id), score.toPrecision(3));
69
71
  console.log('File search results');
70
72
  console.log('-------------------');
71
- const fileSearchResults = fileIndex.search(query);
73
+ const fileSearchResults = fileIndex.search(sessionId, query);
72
74
  for (const result of fileSearchResults) {
73
75
  const { filePath, score } = result;
74
- printResult(filePath, score);
76
+ printResult('file', filePath, score);
75
77
  }
76
78
  const splitter = splitter_1.langchainSplitter;
77
79
  const snippetIndex = new snippet_index_1.default(db);
@@ -80,10 +82,12 @@ const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
80
82
  console.log('Snippet search results');
81
83
  console.log('----------------------');
82
84
  const isNullOrUndefined = (value) => value === null || value === undefined;
83
- const snippetSearchResults = snippetIndex.searchSnippets(query);
85
+ const snippetSearchResults = snippetIndex.searchSnippets(sessionId, query);
84
86
  for (const result of snippetSearchResults) {
85
- const { snippetId, filePath, startLine, endLine, score } = result;
86
- printResult(snippetId, score);
87
+ const { snippetId, score } = result;
88
+ printResult(snippetId.type, snippetId.id, score);
89
+ const [filePath, range] = snippetId.id.split(':');
90
+ const [startLine, endLine] = range.split('-').map((n) => parseInt(n, 10));
87
91
  if (isNullOrUndefined(startLine) || isNullOrUndefined(endLine))
88
92
  continue;
89
93
  const content = await (0, ioutil_1.readFileSafe)(filePath);
@@ -1,4 +1,5 @@
1
1
  import sqlite3 from 'better-sqlite3';
2
+ import { SessionId } from './session-id';
2
3
  export type FileSearchResult = {
3
4
  directory: string;
4
5
  filePath: string;
@@ -23,7 +24,25 @@ export default class FileIndex {
23
24
  database: sqlite3.Database;
24
25
  constructor(database: sqlite3.Database);
25
26
  indexFile(directory: string, filePath: string, symbols: string, words: string): void;
26
- boostFile(filePath: string, boostFactor: number): void;
27
- search(query: string, limit?: number): FileSearchResult[];
27
+ /**
28
+ * Boosts the relevance score of a specific file for a given session.
29
+ * @param sessionId - The session identifier to associate the boost with.
30
+ * @param filePath - The path of the file to boost.
31
+ * @param boostFactor - The factor by which to boost the file's relevance.
32
+ */
33
+ boostFile(sessionId: SessionId, filePath: string, boostFactor: number): void;
34
+ /**
35
+ * Deletes all data associated with a specific session.
36
+ * @param sessionId - The session identifier to delete data for.
37
+ */
38
+ deleteSession(sessionId: string): void;
39
+ /**
40
+ * Searches for files matching the query, considering session-specific boosts.
41
+ * @param sessionId - The session identifier to apply during the search.
42
+ * @param query - The search query string.
43
+ * @param limit - The maximum number of results to return.
44
+ * @returns An array of search results with directory, file path, and score.
45
+ */
46
+ search(sessionId: SessionId, query: string, limit?: number): FileSearchResult[];
28
47
  close(): void;
29
48
  }
@@ -10,7 +10,7 @@ var __classPrivateFieldGet = (this && this.__classPrivateFieldGet) || function (
10
10
  if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
11
11
  return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
12
12
  };
13
- var _FileIndex_insert, _FileIndex_updateBoost, _FileIndex_search;
13
+ var _FileIndex_insert, _FileIndex_updateBoost, _FileIndex_deleteSession, _FileIndex_search;
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
15
  const CREATE_TABLE_SQL = `CREATE VIRTUAL TABLE file_content USING fts5(
16
16
  directory UNINDEXED,
@@ -20,13 +20,16 @@ const CREATE_TABLE_SQL = `CREATE VIRTUAL TABLE file_content USING fts5(
20
20
  tokenize = 'porter unicode61'
21
21
  )`;
22
22
  const CREATE_BOOST_TABLE_SQL = `CREATE TABLE file_boost (
23
- file_path TEXT PRIMARY KEY,
24
- boost_factor REAL
23
+ session_id TEXT,
24
+ file_path TEXT,
25
+ boost_factor REAL,
26
+ PRIMARY KEY (session_id, file_path)
25
27
  )`;
26
28
  const INSERT_SQL = `INSERT INTO file_content (directory, file_path, file_symbols, file_words)
27
29
  VALUES (?, ?, ?, ?)`;
28
- const UPDATE_BOOST_SQL = `INSERT OR REPLACE INTO file_boost (file_path, boost_factor)
29
- VALUES (?, ?)`;
30
+ const UPDATE_BOOST_SQL = `INSERT OR REPLACE INTO file_boost (session_id, file_path, boost_factor)
31
+ VALUES (?, ?, ?)`;
32
+ const DELETE_SESSION_SQL = `DELETE FROM file_boost WHERE session_id LIKE ?`;
30
33
  const SEARCH_SQL = `SELECT
31
34
  file_content.directory,
32
35
  file_content.file_path,
@@ -39,6 +42,7 @@ LEFT JOIN
39
42
  file_boost
40
43
  ON
41
44
  file_content.file_path = file_boost.file_path
45
+ AND file_boost.session_id = ?
42
46
  WHERE
43
47
  file_content MATCH ?
44
48
  ORDER BY
@@ -65,6 +69,7 @@ class FileIndex {
65
69
  this.database = database;
66
70
  _FileIndex_insert.set(this, void 0);
67
71
  _FileIndex_updateBoost.set(this, void 0);
72
+ _FileIndex_deleteSession.set(this, void 0);
68
73
  _FileIndex_search.set(this, void 0);
69
74
  this.database.exec(CREATE_TABLE_SQL);
70
75
  this.database.exec(CREATE_BOOST_TABLE_SQL);
@@ -72,16 +77,37 @@ class FileIndex {
72
77
  this.database.pragma('synchronous = OFF');
73
78
  __classPrivateFieldSet(this, _FileIndex_insert, this.database.prepare(INSERT_SQL), "f");
74
79
  __classPrivateFieldSet(this, _FileIndex_updateBoost, this.database.prepare(UPDATE_BOOST_SQL), "f");
80
+ __classPrivateFieldSet(this, _FileIndex_deleteSession, this.database.prepare(DELETE_SESSION_SQL), "f");
75
81
  __classPrivateFieldSet(this, _FileIndex_search, this.database.prepare(SEARCH_SQL), "f");
76
82
  }
77
83
  indexFile(directory, filePath, symbols, words) {
78
84
  __classPrivateFieldGet(this, _FileIndex_insert, "f").run(directory, filePath, symbols, words);
79
85
  }
80
- boostFile(filePath, boostFactor) {
81
- __classPrivateFieldGet(this, _FileIndex_updateBoost, "f").run(filePath, boostFactor);
86
+ /**
87
+ * Boosts the relevance score of a specific file for a given session.
88
+ * @param sessionId - The session identifier to associate the boost with.
89
+ * @param filePath - The path of the file to boost.
90
+ * @param boostFactor - The factor by which to boost the file's relevance.
91
+ */
92
+ boostFile(sessionId, filePath, boostFactor) {
93
+ __classPrivateFieldGet(this, _FileIndex_updateBoost, "f").run(sessionId, filePath, boostFactor);
82
94
  }
83
- search(query, limit = 10) {
84
- const rows = __classPrivateFieldGet(this, _FileIndex_search, "f").all(query, limit);
95
+ /**
96
+ * Deletes all data associated with a specific session.
97
+ * @param sessionId - The session identifier to delete data for.
98
+ */
99
+ deleteSession(sessionId) {
100
+ __classPrivateFieldGet(this, _FileIndex_deleteSession, "f").run(sessionId);
101
+ }
102
+ /**
103
+ * Searches for files matching the query, considering session-specific boosts.
104
+ * @param sessionId - The session identifier to apply during the search.
105
+ * @param query - The search query string.
106
+ * @param limit - The maximum number of results to return.
107
+ * @returns An array of search results with directory, file path, and score.
108
+ */
109
+ search(sessionId, query, limit = 10) {
110
+ const rows = __classPrivateFieldGet(this, _FileIndex_search, "f").all(sessionId, query, limit);
85
111
  return rows.map((row) => ({
86
112
  directory: row.directory,
87
113
  filePath: row.file_path,
@@ -92,5 +118,5 @@ class FileIndex {
92
118
  this.database.close();
93
119
  }
94
120
  }
95
- _FileIndex_insert = new WeakMap(), _FileIndex_updateBoost = new WeakMap(), _FileIndex_search = new WeakMap();
121
+ _FileIndex_insert = new WeakMap(), _FileIndex_updateBoost = new WeakMap(), _FileIndex_deleteSession = new WeakMap(), _FileIndex_search = new WeakMap();
96
122
  exports.default = FileIndex;
package/built/index.d.ts CHANGED
@@ -1,9 +1,11 @@
1
1
  export { ContentReader, readFileSafe } from './ioutil';
2
+ export { SessionId, generateSessionId } from './session-id';
2
3
  export { Splitter, langchainSplitter } from './splitter';
3
4
  export { ListFn, FilterFn, Tokenizer, default as buildFileIndex } from './build-file-index';
4
5
  export { File, default as buildSnippetIndex } from './build-snippet-index';
5
- export { default as SnippetIndex, SnippetSearchResult } from './snippet-index';
6
+ export { default as SnippetIndex, SnippetSearchResult, SnippetId, encodeSnippetId, parseSnippetId, fileChunkSnippetId, parseFileChunkSnippetId, } from './snippet-index';
6
7
  export { default as FileIndex, FileSearchResult } from './file-index';
7
8
  export { default as listProjectFiles } from './project-files';
8
9
  export { isBinaryFile, isDataFile, isLargeFile } from './file-type';
9
10
  export { fileTokens } from './tokenize';
11
+ export { default as queryKeywords } from './query-keywords';
package/built/index.js CHANGED
@@ -3,9 +3,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.fileTokens = exports.isLargeFile = exports.isDataFile = exports.isBinaryFile = exports.listProjectFiles = exports.FileIndex = exports.SnippetIndex = exports.buildSnippetIndex = exports.buildFileIndex = exports.langchainSplitter = exports.readFileSafe = void 0;
6
+ exports.queryKeywords = exports.fileTokens = exports.isLargeFile = exports.isDataFile = exports.isBinaryFile = exports.listProjectFiles = exports.FileIndex = exports.parseFileChunkSnippetId = exports.fileChunkSnippetId = exports.parseSnippetId = exports.encodeSnippetId = exports.SnippetIndex = exports.buildSnippetIndex = exports.buildFileIndex = exports.langchainSplitter = exports.generateSessionId = exports.readFileSafe = void 0;
7
7
  var ioutil_1 = require("./ioutil");
8
8
  Object.defineProperty(exports, "readFileSafe", { enumerable: true, get: function () { return ioutil_1.readFileSafe; } });
9
+ var session_id_1 = require("./session-id");
10
+ Object.defineProperty(exports, "generateSessionId", { enumerable: true, get: function () { return session_id_1.generateSessionId; } });
9
11
  var splitter_1 = require("./splitter");
10
12
  Object.defineProperty(exports, "langchainSplitter", { enumerable: true, get: function () { return splitter_1.langchainSplitter; } });
11
13
  var build_file_index_1 = require("./build-file-index");
@@ -14,6 +16,10 @@ var build_snippet_index_1 = require("./build-snippet-index");
14
16
  Object.defineProperty(exports, "buildSnippetIndex", { enumerable: true, get: function () { return __importDefault(build_snippet_index_1).default; } });
15
17
  var snippet_index_1 = require("./snippet-index");
16
18
  Object.defineProperty(exports, "SnippetIndex", { enumerable: true, get: function () { return __importDefault(snippet_index_1).default; } });
19
+ Object.defineProperty(exports, "encodeSnippetId", { enumerable: true, get: function () { return snippet_index_1.encodeSnippetId; } });
20
+ Object.defineProperty(exports, "parseSnippetId", { enumerable: true, get: function () { return snippet_index_1.parseSnippetId; } });
21
+ Object.defineProperty(exports, "fileChunkSnippetId", { enumerable: true, get: function () { return snippet_index_1.fileChunkSnippetId; } });
22
+ Object.defineProperty(exports, "parseFileChunkSnippetId", { enumerable: true, get: function () { return snippet_index_1.parseFileChunkSnippetId; } });
17
23
  var file_index_1 = require("./file-index");
18
24
  Object.defineProperty(exports, "FileIndex", { enumerable: true, get: function () { return __importDefault(file_index_1).default; } });
19
25
  var project_files_1 = require("./project-files");
@@ -24,3 +30,5 @@ Object.defineProperty(exports, "isDataFile", { enumerable: true, get: function (
24
30
  Object.defineProperty(exports, "isLargeFile", { enumerable: true, get: function () { return file_type_1.isLargeFile; } });
25
31
  var tokenize_1 = require("./tokenize");
26
32
  Object.defineProperty(exports, "fileTokens", { enumerable: true, get: function () { return tokenize_1.fileTokens; } });
33
+ var query_keywords_1 = require("./query-keywords");
34
+ Object.defineProperty(exports, "queryKeywords", { enumerable: true, get: function () { return __importDefault(query_keywords_1).default; } });
@@ -11,7 +11,6 @@ const STOP_WORDS = new Set([
11
11
  'at',
12
12
  'be',
13
13
  'by',
14
- 'code',
15
14
  'for',
16
15
  'from',
17
16
  'has',
@@ -0,0 +1,2 @@
1
+ export type SessionId = string;
2
+ export declare function generateSessionId(): SessionId;
@@ -0,0 +1,7 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.generateSessionId = generateSessionId;
4
+ const uuid_1 = require("uuid");
5
+ function generateSessionId() {
6
+ return (0, uuid_1.v4)();
7
+ }
@@ -1,10 +1,22 @@
1
1
  import sqlite3 from 'better-sqlite3';
2
+ import { SessionId } from './session-id';
3
+ export declare enum SnippetType {
4
+ FileChunk = "file-chunk"
5
+ }
6
+ export type SnippetId = {
7
+ type: string;
8
+ id: string;
9
+ };
10
+ export declare function fileChunkSnippetId(filePath: string, startLine?: number): SnippetId;
11
+ export declare function parseFileChunkSnippetId(snippetId: SnippetId): {
12
+ filePath: string;
13
+ startLine?: number;
14
+ };
15
+ export declare function encodeSnippetId(snippetId: SnippetId): string;
16
+ export declare function parseSnippetId(snippetId: string): SnippetId;
2
17
  export type SnippetSearchResult = {
3
- snippetId: string;
18
+ snippetId: SnippetId;
4
19
  directory: string;
5
- filePath: string;
6
- startLine: number | undefined;
7
- endLine: number | undefined;
8
20
  score: number;
9
21
  content: string;
10
22
  };
@@ -12,8 +24,27 @@ export default class SnippetIndex {
12
24
  #private;
13
25
  database: sqlite3.Database;
14
26
  constructor(database: sqlite3.Database);
15
- indexSnippet(snippetId: string, directory: string, filePath: string, startLine: number | undefined, endLine: number | undefined, symbols: string, words: string, content: string): void;
16
- boostSnippet(snippetId: string, boostFactor: number): void;
17
- searchSnippets(query: string, limit?: number): SnippetSearchResult[];
27
+ /**
28
+ * Deletes all data associated with a specific session.
29
+ * @param sessionId - The session identifier to delete data for.
30
+ */
31
+ deleteSession(sessionId: string): void;
32
+ /**
33
+ * Indexes a code snippet for searchability.
34
+ * @param snippetId - The unique identifier for the snippet.
35
+ * @param directory - The directory where the snippet is located.
36
+ * @param symbols - Symbols (e.g., class names) in the snippet.
37
+ * @param words - General words in the snippet.
38
+ * @param content - The actual content of the snippet.
39
+ */
40
+ indexSnippet(snippetId: SnippetId, directory: string, symbols: string, words: string, content: string): void;
41
+ /**
42
+ * Boosts the relevance score of a specific snippet for a given session.
43
+ * @param sessionId - The session identifier to associate the boost with.
44
+ * @param snippetId - The identifier of the snippet to boost.
45
+ * @param boostFactor - The factor by which to boost the snippet's relevance.
46
+ */
47
+ boostSnippet(sessionId: SessionId, snippetId: SnippetId, boostFactor: number): void;
48
+ searchSnippets(sessionId: SessionId, query: string, limit?: number): SnippetSearchResult[];
18
49
  close(): void;
19
50
  }
@@ -10,34 +10,40 @@ var __classPrivateFieldGet = (this && this.__classPrivateFieldGet) || function (
10
10
  if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
11
11
  return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
12
12
  };
13
- var _SnippetIndex_insertSnippet, _SnippetIndex_updateSnippetBoost, _SnippetIndex_searchSnippet;
13
+ var __importDefault = (this && this.__importDefault) || function (mod) {
14
+ return (mod && mod.__esModule) ? mod : { "default": mod };
15
+ };
16
+ var _SnippetIndex_insertSnippet, _SnippetIndex_updateSnippetBoost, _SnippetIndex_deleteSession, _SnippetIndex_searchSnippet;
14
17
  Object.defineProperty(exports, "__esModule", { value: true });
18
+ exports.SnippetType = void 0;
19
+ exports.fileChunkSnippetId = fileChunkSnippetId;
20
+ exports.parseFileChunkSnippetId = parseFileChunkSnippetId;
21
+ exports.encodeSnippetId = encodeSnippetId;
22
+ exports.parseSnippetId = parseSnippetId;
23
+ const assert_1 = __importDefault(require("assert"));
15
24
  const CREATE_SNIPPET_CONTENT_TABLE_SQL = `CREATE VIRTUAL TABLE snippet_content USING fts5(
16
25
  snippet_id UNINDEXED,
17
26
  directory UNINDEXED,
18
- file_path,
19
- start_line UNINDEXED,
20
- end_line UNINDEXED,
21
27
  file_symbols,
22
28
  file_words,
23
29
  content UNINDEXED,
24
30
  tokenize = 'porter unicode61'
25
31
  )`;
26
32
  const CREATE_SNIPPET_BOOST_TABLE_SQL = `CREATE TABLE snippet_boost (
27
- snippet_id TEXT PRIMARY KEY,
28
- boost_factor REAL
33
+ session_id TEXT,
34
+ snippet_id TEXT,
35
+ boost_factor REAL,
36
+ PRIMARY KEY (session_id, snippet_id)
29
37
  )`;
30
38
  const INSERT_SNIPPET_SQL = `INSERT INTO snippet_content
31
- (snippet_id, directory, file_path, start_line, end_line, file_symbols, file_words, content)
32
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)`;
39
+ (snippet_id, directory, file_symbols, file_words, content)
40
+ VALUES (?, ?, ?, ?, ?)`;
41
+ const DELETE_SESSION_SQL = `DELETE FROM snippet_boost WHERE session_id LIKE ?`;
33
42
  const UPDATE_SNIPPET_BOOST_SQL = `INSERT OR REPLACE INTO snippet_boost
34
- (snippet_id, boost_factor)
35
- VALUES (?, ?)`;
43
+ (session_id, snippet_id, boost_factor)
44
+ VALUES (?, ?, ?)`;
36
45
  const SEARCH_SNIPPET_SQL = `SELECT
37
46
  snippet_content.directory,
38
- snippet_content.file_path,
39
- snippet_content.start_line,
40
- snippet_content.end_line,
41
47
  snippet_content.snippet_id,
42
48
  snippet_content.content,
43
49
  (bm25(snippet_content, 1)*3.0 + bm25(snippet_content, 2)*2.0 + bm25(snippet_content, 3)*1.0)
@@ -49,39 +55,95 @@ LEFT JOIN
49
55
  snippet_boost
50
56
  ON
51
57
  snippet_content.snippet_id = snippet_boost.snippet_id
58
+ AND snippet_boost.session_id = ?
52
59
  WHERE
53
60
  snippet_content MATCH ?
54
61
  ORDER BY
55
62
  score DESC
56
63
  LIMIT ?`;
64
+ var SnippetType;
65
+ (function (SnippetType) {
66
+ SnippetType["FileChunk"] = "file-chunk";
67
+ })(SnippetType || (exports.SnippetType = SnippetType = {}));
68
+ function fileChunkSnippetId(filePath, startLine) {
69
+ return {
70
+ type: 'file-chunk',
71
+ id: [filePath, startLine].filter(Boolean).join(':'),
72
+ };
73
+ }
74
+ function parseFileChunkSnippetId(snippetId) {
75
+ const type = snippetId.type;
76
+ (0, assert_1.default)(type === SnippetType.FileChunk);
77
+ const parts = snippetId.id.split(':');
78
+ const filePath = parts.shift();
79
+ (0, assert_1.default)(filePath);
80
+ const startLine = parts.shift();
81
+ return {
82
+ filePath: filePath,
83
+ startLine: startLine ? parseInt(startLine, 10) : undefined,
84
+ };
85
+ }
86
+ function encodeSnippetId(snippetId) {
87
+ return [snippetId.type, snippetId.id].join(':');
88
+ }
89
+ function parseSnippetId(snippetId) {
90
+ const parts = snippetId.split(':');
91
+ const type = parts.shift();
92
+ (0, assert_1.default)(type);
93
+ const id = parts.join(':');
94
+ return {
95
+ type,
96
+ id,
97
+ };
98
+ }
57
99
  class SnippetIndex {
58
100
  constructor(database) {
59
101
  this.database = database;
60
102
  _SnippetIndex_insertSnippet.set(this, void 0);
61
103
  _SnippetIndex_updateSnippetBoost.set(this, void 0);
104
+ _SnippetIndex_deleteSession.set(this, void 0);
62
105
  _SnippetIndex_searchSnippet.set(this, void 0);
63
106
  this.database.exec(CREATE_SNIPPET_CONTENT_TABLE_SQL);
64
107
  this.database.exec(CREATE_SNIPPET_BOOST_TABLE_SQL);
65
108
  this.database.pragma('journal_mode = OFF');
66
109
  this.database.pragma('synchronous = OFF');
67
110
  __classPrivateFieldSet(this, _SnippetIndex_insertSnippet, this.database.prepare(INSERT_SNIPPET_SQL), "f");
111
+ __classPrivateFieldSet(this, _SnippetIndex_deleteSession, this.database.prepare(DELETE_SESSION_SQL), "f");
68
112
  __classPrivateFieldSet(this, _SnippetIndex_updateSnippetBoost, this.database.prepare(UPDATE_SNIPPET_BOOST_SQL), "f");
69
113
  __classPrivateFieldSet(this, _SnippetIndex_searchSnippet, this.database.prepare(SEARCH_SNIPPET_SQL), "f");
70
114
  }
71
- indexSnippet(snippetId, directory, filePath, startLine, endLine, symbols, words, content) {
72
- __classPrivateFieldGet(this, _SnippetIndex_insertSnippet, "f").run(snippetId, directory, filePath, startLine, endLine, symbols, words, content);
115
+ /**
116
+ * Deletes all data associated with a specific session.
117
+ * @param sessionId - The session identifier to delete data for.
118
+ */
119
+ deleteSession(sessionId) {
120
+ __classPrivateFieldGet(this, _SnippetIndex_deleteSession, "f").run(sessionId);
121
+ }
122
+ /**
123
+ * Indexes a code snippet for searchability.
124
+ * @param snippetId - The unique identifier for the snippet.
125
+ * @param directory - The directory where the snippet is located.
126
+ * @param symbols - Symbols (e.g., class names) in the snippet.
127
+ * @param words - General words in the snippet.
128
+ * @param content - The actual content of the snippet.
129
+ */
130
+ indexSnippet(snippetId, directory, symbols, words, content) {
131
+ __classPrivateFieldGet(this, _SnippetIndex_insertSnippet, "f").run(encodeSnippetId(snippetId), directory, symbols, words, content);
73
132
  }
74
- boostSnippet(snippetId, boostFactor) {
75
- __classPrivateFieldGet(this, _SnippetIndex_updateSnippetBoost, "f").run(snippetId, boostFactor);
133
+ /**
134
+ * Boosts the relevance score of a specific snippet for a given session.
135
+ * @param sessionId - The session identifier to associate the boost with.
136
+ * @param snippetId - The identifier of the snippet to boost.
137
+ * @param boostFactor - The factor by which to boost the snippet's relevance.
138
+ */
139
+ boostSnippet(sessionId, snippetId, boostFactor) {
140
+ __classPrivateFieldGet(this, _SnippetIndex_updateSnippetBoost, "f").run(sessionId, encodeSnippetId(snippetId), boostFactor);
76
141
  }
77
- searchSnippets(query, limit = 10) {
78
- const rows = __classPrivateFieldGet(this, _SnippetIndex_searchSnippet, "f").all(query, limit);
142
+ searchSnippets(sessionId, query, limit = 10) {
143
+ const rows = __classPrivateFieldGet(this, _SnippetIndex_searchSnippet, "f").all(sessionId, query, limit);
79
144
  return rows.map((row) => ({
80
145
  directory: row.directory,
81
- snippetId: row.snippet_id,
82
- filePath: row.file_path,
83
- startLine: row.start_line,
84
- endLine: row.end_line,
146
+ snippetId: parseSnippetId(row.snippet_id),
85
147
  score: row.score,
86
148
  content: row.content,
87
149
  }));
@@ -90,5 +152,5 @@ class SnippetIndex {
90
152
  this.database.close();
91
153
  }
92
154
  }
93
- _SnippetIndex_insertSnippet = new WeakMap(), _SnippetIndex_updateSnippetBoost = new WeakMap(), _SnippetIndex_searchSnippet = new WeakMap();
155
+ _SnippetIndex_insertSnippet = new WeakMap(), _SnippetIndex_updateSnippetBoost = new WeakMap(), _SnippetIndex_deleteSession = new WeakMap(), _SnippetIndex_searchSnippet = new WeakMap();
94
156
  exports.default = SnippetIndex;
package/built/splitter.js CHANGED
@@ -42,9 +42,11 @@ async function langchainSplitter(content, fileExtension) {
42
42
  const loc = doc.metadata?.loc;
43
43
  const lines = loc?.lines;
44
44
  const result = {
45
- content: doc.pageContent,
45
+ content: '',
46
46
  };
47
47
  if (lines) {
48
+ const contentLines = content.split('\n');
49
+ result.content = contentLines.slice(lines.from - 1, lines.to).join('\n');
48
50
  result.startLine = lines.from;
49
51
  result.endLine = lines.to;
50
52
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@appland/search",
3
- "version": "1.0.1",
3
+ "version": "1.1.1",
4
4
  "description": "",
5
5
  "bin": "built/cli.js",
6
6
  "publishConfig": {