@appland/search 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,26 @@
1
+ # [@appland/search-v1.1.0](https://github.com/getappmap/appmap-js/compare/@appland/search-v1.0.1...@appland/search-v1.1.0) (2024-12-01)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * Pass absolute path when loading file content ([85060bb](https://github.com/getappmap/appmap-js/commit/85060bb432fec9a1ee2d461fa671cb18b0f21fe6))
7
+ * Search for 'code' ([d209727](https://github.com/getappmap/appmap-js/commit/d209727d4ec19d8027b1cb4eb36ed31a60d9eb21))
8
+
9
+
10
+ ### Features
11
+
12
+ * Add session deletion ([9ccd947](https://github.com/getappmap/appmap-js/commit/9ccd947f110857d5d881a31bf0c947bb02f1f2c5))
13
+ * Associate boost factor data with a session id ([7031193](https://github.com/getappmap/appmap-js/commit/70311932553adb0aca4ae7f6f11af23790921bdf))
14
+ * Define and export SnippetId type ([8e3be79](https://github.com/getappmap/appmap-js/commit/8e3be7949c62a11ed1d57b1c88df2868aa3f10cd))
15
+ * Search for AppMap data using @appland/search ([ac00047](https://github.com/getappmap/appmap-js/commit/ac0004717147a095f1fa609c2aa341dec6e6c7bc))
16
+
17
+ # [@appland/search-v1.0.1](https://github.com/getappmap/appmap-js/compare/@appland/search-v1.0.0...@appland/search-v1.0.1) (2024-12-01)
18
+
19
+
20
+ ### Bug Fixes
21
+
22
+ * Detect and skip binary files when indexing ([b42fedf](https://github.com/getappmap/appmap-js/commit/b42fedf258e42539243f3aea2727115846b8f19b))
23
+
1
24
  # @appland/search-v1.0.0 (2024-11-06)
2
25
 
3
26
 
package/README.md ADDED
@@ -0,0 +1,76 @@
1
+ # `@appland/search`
2
+
3
+ ## session_id
4
+
5
+ The `session id` parameter is an integral part of differentiating boost records in this system. Its
6
+ intent is to make sure that the boost factors, which affect search results, remain specific to a
7
+ particular search session and do not impact other concurrent search sessions.
8
+
9
+ ### Intent
10
+
11
+ The main goal of introducing a `session id` is to:
12
+
13
+ 1. **Isolate Boost Factors**: By associating each boost record with a unique session id, different
14
+ sessions' boost factors are kept separate. This means that boosting done in one session doesn't
15
+ unintentionally affect another.
16
+
17
+ 2. **Maintain Contextual Relevance**: Boost factors should only influence the search results within
18
+ the scope of their intended search session. This ensures the search system remains contextually
19
+ aware and the results are relevant to the specific scenarios where the boosts were applied.
20
+
21
+ ### How It Works for Concurrency
22
+
23
+ - **Storage**: When a boost factor is being stored in the system, it includes a `session id` in the
24
+ database schema for boost records. This ties the boost factor directly to the user's session that
25
+ triggered it.
26
+
27
+ - **Filtering**: When search operations are performed, only boost factors associated with the
28
+ current session id are considered. This filtering ensures that only relevant boosts are applied to
29
+ the search results.
30
+
31
+ - **Concurrent Use**: In environments where multiple users or sessions are interacting with the
32
+ search system simultaneously, the session id ensures that one session's boost factors don't spill
33
+ over and impact the results of another session. This isolation is crucial in multi-user systems
34
+ where search personalization is required.
35
+
36
+ ### Session Deletion
37
+
38
+ The system provides a mechanism to delete all data associated with a specific session. This is
39
+ achieved through the `deleteSession` method available in both the `FileIndex` and `SnippetIndex`
40
+ classes. By invoking this method with a session id, all boost factors and related data tied to that
41
+ session are removed from the database, ensuring that no residual data affects future search
42
+ operations.
43
+
44
+ ### Entity-Relationship Diagram
45
+
46
+ ```mermaid
47
+ erDiagram
48
+ FILE_CONTENT {
49
+ TEXT directory
50
+ TEXT file_path
51
+ TEXT file_symbols
52
+ TEXT file_words
53
+ }
54
+ FILE_BOOST {
55
+ TEXT session_id
56
+ TEXT file_path
57
+ REAL boost_factor
58
+ }
59
+
60
+
61
+ SNIPPET_CONTENT {
62
+ TEXT snippet_id
63
+ TEXT directory
64
+ TEXT file_symbols
65
+ TEXT file_words
66
+ TEXT content
67
+ }
68
+ SNIPPET_BOOST {
69
+ TEXT session_id
70
+ TEXT snippet_id
71
+ REAL boost_factor
72
+ }
73
+
74
+ FILE_CONTENT ||--|| FILE_BOOST : "Is boosted by"
75
+ SNIPPET_CONTENT ||--|| SNIPPET_BOOST : "Is boosted by"
76
+ ```
@@ -7,28 +7,40 @@ exports.default = buildFileIndex;
7
7
  const debug_1 = __importDefault(require("debug"));
8
8
  const path_1 = require("path");
9
9
  const console_1 = require("console");
10
+ const types_1 = require("util/types");
10
11
  const debug = (0, debug_1.default)('appmap:search:build-index');
11
12
  async function indexFile(context, filePath) {
13
+ debug('Indexing file: %s', filePath);
12
14
  const fileContents = await context.contentReader(filePath);
13
15
  if (!fileContents)
14
16
  return;
17
+ debug('Read file: %s, length: %d (%s...)', filePath, fileContents.length, fileContents.slice(0, 40));
15
18
  const tokens = context.tokenizer(fileContents, filePath);
16
19
  const symbols = tokens.symbols.join(' ');
17
20
  const words = tokens.words.join(' ');
21
+ debug('Tokenized file: %s', filePath);
18
22
  context.fileIndex.indexFile(context.baseDirectory, filePath, symbols, words);
23
+ debug('Wrote file to index: %s', filePath);
19
24
  }
20
25
  async function indexDirectory(context, directory) {
21
26
  const dirContents = await context.listDirectory(directory);
22
27
  if (!dirContents)
23
28
  return;
24
29
  for (const dirContentItem of dirContents) {
25
- const filePath = (0, path_1.join)(directory, dirContentItem);
30
+ let filePath;
31
+ if ((0, path_1.isAbsolute)(dirContentItem))
32
+ filePath = dirContentItem;
33
+ else
34
+ filePath = (0, path_1.join)(directory, dirContentItem);
26
35
  debug('Indexing: %s', filePath);
27
36
  if (await context.fileFilter(filePath)) {
28
- indexFile(context, filePath).catch((e) => {
29
- (0, console_1.warn)(`Error indexing file: ${filePath}`);
30
- (0, console_1.warn)(e);
31
- });
37
+ try {
38
+ await indexFile(context, filePath);
39
+ }
40
+ catch (e) {
41
+ const message = (0, types_1.isNativeError)(e) ? e.message : String(e);
42
+ (0, console_1.warn)(`Error indexing file ${filePath}: ${message}`);
43
+ }
32
44
  }
33
45
  }
34
46
  }
@@ -1,16 +1,19 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.default = buildSnippetIndex;
4
+ const path_1 = require("path");
5
+ const snippet_index_1 = require("./snippet-index");
4
6
  async function indexFile(context, file) {
5
- const fileContent = await context.contentReader(file.filePath);
7
+ const filePath = (0, path_1.isAbsolute)(file.filePath) ? file.filePath : (0, path_1.join)(file.directory, file.filePath);
8
+ const fileContent = await context.contentReader(filePath);
6
9
  if (!fileContent)
7
10
  return;
8
11
  const extension = file.filePath.split('.').pop() || '';
9
12
  const chunks = await context.splitter(fileContent, extension);
10
- chunks.forEach((chunk, index) => {
11
- const snippetId = `${file.filePath}:${index}`;
12
- const { content, startLine, endLine } = chunk;
13
- context.snippetIndex.indexSnippet(snippetId, file.directory, file.filePath, startLine, endLine, context.tokenizer(content, file.filePath).symbols.join(' '), context.tokenizer(content, file.filePath).words.join(' '), content);
13
+ chunks.forEach((chunk) => {
14
+ const { content, startLine } = chunk;
15
+ const snippetId = (0, snippet_index_1.fileChunkSnippetId)(filePath, startLine);
16
+ context.snippetIndex.indexSnippet(snippetId, file.directory, context.tokenizer(content, file.filePath).symbols.join(' '), context.tokenizer(content, file.filePath).words.join(' '), content);
14
17
  });
15
18
  }
16
19
  async function buildSnippetIndex(snippetIndex, files, contentReader, splitter, tokenizer) {
package/built/cli.js CHANGED
@@ -17,6 +17,7 @@ const build_snippet_index_1 = __importDefault(require("./build-snippet-index"));
17
17
  const ioutil_1 = require("./ioutil");
18
18
  const splitter_1 = require("./splitter");
19
19
  const assert_1 = __importDefault(require("assert"));
20
+ const session_id_1 = require("./session-id");
20
21
  const debug = (0, debug_1.default)('appmap:search:cli');
21
22
  const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
22
23
  .command('* <query>', 'Index directories and perform a search', (yargs) => {
@@ -58,6 +59,7 @@ const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
58
59
  };
59
60
  const db = new better_sqlite3_1.default(':memory:');
60
61
  const fileIndex = new file_index_1.default(db);
62
+ const sessionId = (0, session_id_1.generateSessionId)();
61
63
  await (0, build_file_index_1.default)(fileIndex, directories, project_files_1.default, fileFilter, ioutil_1.readFileSafe, tokenize_1.fileTokens);
62
64
  const filePathAtMostThreeEntries = (filePath) => {
63
65
  const parts = filePath.split('/');
@@ -65,13 +67,13 @@ const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
65
67
  return filePath;
66
68
  return `.../${parts.slice(-3).join('/')}`;
67
69
  };
68
- const printResult = (filePath, score) => console.log('%s %s', filePathAtMostThreeEntries(filePath), score.toPrecision(3));
70
+ const printResult = (type, id, score) => console.log('%s %s %s', type, filePathAtMostThreeEntries(id), score.toPrecision(3));
69
71
  console.log('File search results');
70
72
  console.log('-------------------');
71
- const fileSearchResults = fileIndex.search(query);
73
+ const fileSearchResults = fileIndex.search(sessionId, query);
72
74
  for (const result of fileSearchResults) {
73
75
  const { filePath, score } = result;
74
- printResult(filePath, score);
76
+ printResult('file', filePath, score);
75
77
  }
76
78
  const splitter = splitter_1.langchainSplitter;
77
79
  const snippetIndex = new snippet_index_1.default(db);
@@ -80,10 +82,12 @@ const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
80
82
  console.log('Snippet search results');
81
83
  console.log('----------------------');
82
84
  const isNullOrUndefined = (value) => value === null || value === undefined;
83
- const snippetSearchResults = snippetIndex.searchSnippets(query);
85
+ const snippetSearchResults = snippetIndex.searchSnippets(sessionId, query);
84
86
  for (const result of snippetSearchResults) {
85
- const { snippetId, filePath, startLine, endLine, score } = result;
86
- printResult(snippetId, score);
87
+ const { snippetId, score } = result;
88
+ printResult(snippetId.type, snippetId.id, score);
89
+ const [filePath, range] = snippetId.id.split(':');
90
+ const [startLine, endLine] = range.split('-').map((n) => parseInt(n, 10));
87
91
  if (isNullOrUndefined(startLine) || isNullOrUndefined(endLine))
88
92
  continue;
89
93
  const content = await (0, ioutil_1.readFileSafe)(filePath);
@@ -1,4 +1,5 @@
1
1
  import sqlite3 from 'better-sqlite3';
2
+ import { SessionId } from './session-id';
2
3
  export type FileSearchResult = {
3
4
  directory: string;
4
5
  filePath: string;
@@ -23,7 +24,25 @@ export default class FileIndex {
23
24
  database: sqlite3.Database;
24
25
  constructor(database: sqlite3.Database);
25
26
  indexFile(directory: string, filePath: string, symbols: string, words: string): void;
26
- boostFile(filePath: string, boostFactor: number): void;
27
- search(query: string, limit?: number): FileSearchResult[];
27
+ /**
28
+ * Boosts the relevance score of a specific file for a given session.
29
+ * @param sessionId - The session identifier to associate the boost with.
30
+ * @param filePath - The path of the file to boost.
31
+ * @param boostFactor - The factor by which to boost the file's relevance.
32
+ */
33
+ boostFile(sessionId: SessionId, filePath: string, boostFactor: number): void;
34
+ /**
35
+ * Deletes all data associated with a specific session.
36
+ * @param sessionId - The session identifier to delete data for.
37
+ */
38
+ deleteSession(sessionId: string): void;
39
+ /**
40
+ * Searches for files matching the query, considering session-specific boosts.
41
+ * @param sessionId - The session identifier to apply during the search.
42
+ * @param query - The search query string.
43
+ * @param limit - The maximum number of results to return.
44
+ * @returns An array of search results with directory, file path, and score.
45
+ */
46
+ search(sessionId: SessionId, query: string, limit?: number): FileSearchResult[];
28
47
  close(): void;
29
48
  }
@@ -10,7 +10,7 @@ var __classPrivateFieldGet = (this && this.__classPrivateFieldGet) || function (
10
10
  if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
11
11
  return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
12
12
  };
13
- var _FileIndex_insert, _FileIndex_updateBoost, _FileIndex_search;
13
+ var _FileIndex_insert, _FileIndex_updateBoost, _FileIndex_deleteSession, _FileIndex_search;
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
15
  const CREATE_TABLE_SQL = `CREATE VIRTUAL TABLE file_content USING fts5(
16
16
  directory UNINDEXED,
@@ -20,13 +20,16 @@ const CREATE_TABLE_SQL = `CREATE VIRTUAL TABLE file_content USING fts5(
20
20
  tokenize = 'porter unicode61'
21
21
  )`;
22
22
  const CREATE_BOOST_TABLE_SQL = `CREATE TABLE file_boost (
23
- file_path TEXT PRIMARY KEY,
24
- boost_factor REAL
23
+ session_id TEXT,
24
+ file_path TEXT,
25
+ boost_factor REAL,
26
+ PRIMARY KEY (session_id, file_path)
25
27
  )`;
26
28
  const INSERT_SQL = `INSERT INTO file_content (directory, file_path, file_symbols, file_words)
27
29
  VALUES (?, ?, ?, ?)`;
28
- const UPDATE_BOOST_SQL = `INSERT OR REPLACE INTO file_boost (file_path, boost_factor)
29
- VALUES (?, ?)`;
30
+ const UPDATE_BOOST_SQL = `INSERT OR REPLACE INTO file_boost (session_id, file_path, boost_factor)
31
+ VALUES (?, ?, ?)`;
32
+ const DELETE_SESSION_SQL = `DELETE FROM file_boost WHERE session_id LIKE ?`;
30
33
  const SEARCH_SQL = `SELECT
31
34
  file_content.directory,
32
35
  file_content.file_path,
@@ -39,6 +42,7 @@ LEFT JOIN
39
42
  file_boost
40
43
  ON
41
44
  file_content.file_path = file_boost.file_path
45
+ AND file_boost.session_id = ?
42
46
  WHERE
43
47
  file_content MATCH ?
44
48
  ORDER BY
@@ -65,6 +69,7 @@ class FileIndex {
65
69
  this.database = database;
66
70
  _FileIndex_insert.set(this, void 0);
67
71
  _FileIndex_updateBoost.set(this, void 0);
72
+ _FileIndex_deleteSession.set(this, void 0);
68
73
  _FileIndex_search.set(this, void 0);
69
74
  this.database.exec(CREATE_TABLE_SQL);
70
75
  this.database.exec(CREATE_BOOST_TABLE_SQL);
@@ -72,16 +77,37 @@ class FileIndex {
72
77
  this.database.pragma('synchronous = OFF');
73
78
  __classPrivateFieldSet(this, _FileIndex_insert, this.database.prepare(INSERT_SQL), "f");
74
79
  __classPrivateFieldSet(this, _FileIndex_updateBoost, this.database.prepare(UPDATE_BOOST_SQL), "f");
80
+ __classPrivateFieldSet(this, _FileIndex_deleteSession, this.database.prepare(DELETE_SESSION_SQL), "f");
75
81
  __classPrivateFieldSet(this, _FileIndex_search, this.database.prepare(SEARCH_SQL), "f");
76
82
  }
77
83
  indexFile(directory, filePath, symbols, words) {
78
84
  __classPrivateFieldGet(this, _FileIndex_insert, "f").run(directory, filePath, symbols, words);
79
85
  }
80
- boostFile(filePath, boostFactor) {
81
- __classPrivateFieldGet(this, _FileIndex_updateBoost, "f").run(filePath, boostFactor);
86
+ /**
87
+ * Boosts the relevance score of a specific file for a given session.
88
+ * @param sessionId - The session identifier to associate the boost with.
89
+ * @param filePath - The path of the file to boost.
90
+ * @param boostFactor - The factor by which to boost the file's relevance.
91
+ */
92
+ boostFile(sessionId, filePath, boostFactor) {
93
+ __classPrivateFieldGet(this, _FileIndex_updateBoost, "f").run(sessionId, filePath, boostFactor);
82
94
  }
83
- search(query, limit = 10) {
84
- const rows = __classPrivateFieldGet(this, _FileIndex_search, "f").all(query, limit);
95
+ /**
96
+ * Deletes all data associated with a specific session.
97
+ * @param sessionId - The session identifier to delete data for.
98
+ */
99
+ deleteSession(sessionId) {
100
+ __classPrivateFieldGet(this, _FileIndex_deleteSession, "f").run(sessionId);
101
+ }
102
+ /**
103
+ * Searches for files matching the query, considering session-specific boosts.
104
+ * @param sessionId - The session identifier to apply during the search.
105
+ * @param query - The search query string.
106
+ * @param limit - The maximum number of results to return.
107
+ * @returns An array of search results with directory, file path, and score.
108
+ */
109
+ search(sessionId, query, limit = 10) {
110
+ const rows = __classPrivateFieldGet(this, _FileIndex_search, "f").all(sessionId, query, limit);
85
111
  return rows.map((row) => ({
86
112
  directory: row.directory,
87
113
  filePath: row.file_path,
@@ -92,5 +118,5 @@ class FileIndex {
92
118
  this.database.close();
93
119
  }
94
120
  }
95
- _FileIndex_insert = new WeakMap(), _FileIndex_updateBoost = new WeakMap(), _FileIndex_search = new WeakMap();
121
+ _FileIndex_insert = new WeakMap(), _FileIndex_updateBoost = new WeakMap(), _FileIndex_deleteSession = new WeakMap(), _FileIndex_search = new WeakMap();
96
122
  exports.default = FileIndex;
@@ -1,3 +1,3 @@
1
1
  export declare const isLargeFile: (fileName: string) => Promise<boolean>;
2
- export declare const isBinaryFile: (fileName: string) => boolean;
2
+ export declare const isBinaryFile: (filePath: string) => boolean;
3
3
  export declare const isDataFile: (fileName: string) => boolean;
@@ -6,6 +6,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.isDataFile = exports.isBinaryFile = exports.isLargeFile = void 0;
7
7
  const promises_1 = require("fs/promises");
8
8
  const debug_1 = __importDefault(require("debug"));
9
+ const isbinaryfile_1 = require("isbinaryfile");
9
10
  const debug = (0, debug_1.default)('appmap:search:file-type');
10
11
  const BINARY_FILE_EXTENSIONS = [
11
12
  '7z',
@@ -107,8 +108,17 @@ const isLargeFile = async (fileName) => {
107
108
  return fileSize > largeFileThreshold();
108
109
  };
109
110
  exports.isLargeFile = isLargeFile;
110
- const isBinaryFile = (fileName) => {
111
- return BINARY_FILE_EXTENSIONS.some((ext) => fileName.endsWith(ext));
111
+ const isBinaryFile = (filePath) => {
112
+ if (BINARY_FILE_EXTENSIONS.some((ext) => filePath.endsWith(ext)))
113
+ return true;
114
+ try {
115
+ return (0, isbinaryfile_1.isBinaryFileSync)(filePath);
116
+ }
117
+ catch (error) {
118
+ debug(`Error reading file: %s`, filePath);
119
+ debug(error);
120
+ return false;
121
+ }
112
122
  };
113
123
  exports.isBinaryFile = isBinaryFile;
114
124
  const isDataFile = (fileName) => {
package/built/index.d.ts CHANGED
@@ -1,9 +1,11 @@
1
1
  export { ContentReader, readFileSafe } from './ioutil';
2
+ export { SessionId, generateSessionId } from './session-id';
2
3
  export { Splitter, langchainSplitter } from './splitter';
3
4
  export { ListFn, FilterFn, Tokenizer, default as buildFileIndex } from './build-file-index';
4
5
  export { File, default as buildSnippetIndex } from './build-snippet-index';
5
- export { default as SnippetIndex, SnippetSearchResult } from './snippet-index';
6
+ export { default as SnippetIndex, SnippetSearchResult, SnippetId, encodeSnippetId, parseSnippetId, fileChunkSnippetId, parseFileChunkSnippetId, } from './snippet-index';
6
7
  export { default as FileIndex, FileSearchResult } from './file-index';
7
8
  export { default as listProjectFiles } from './project-files';
8
9
  export { isBinaryFile, isDataFile, isLargeFile } from './file-type';
9
10
  export { fileTokens } from './tokenize';
11
+ export { default as queryKeywords } from './query-keywords';
package/built/index.js CHANGED
@@ -3,9 +3,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.fileTokens = exports.isLargeFile = exports.isDataFile = exports.isBinaryFile = exports.listProjectFiles = exports.FileIndex = exports.SnippetIndex = exports.buildSnippetIndex = exports.buildFileIndex = exports.langchainSplitter = exports.readFileSafe = void 0;
6
+ exports.queryKeywords = exports.fileTokens = exports.isLargeFile = exports.isDataFile = exports.isBinaryFile = exports.listProjectFiles = exports.FileIndex = exports.parseFileChunkSnippetId = exports.fileChunkSnippetId = exports.parseSnippetId = exports.encodeSnippetId = exports.SnippetIndex = exports.buildSnippetIndex = exports.buildFileIndex = exports.langchainSplitter = exports.generateSessionId = exports.readFileSafe = void 0;
7
7
  var ioutil_1 = require("./ioutil");
8
8
  Object.defineProperty(exports, "readFileSafe", { enumerable: true, get: function () { return ioutil_1.readFileSafe; } });
9
+ var session_id_1 = require("./session-id");
10
+ Object.defineProperty(exports, "generateSessionId", { enumerable: true, get: function () { return session_id_1.generateSessionId; } });
9
11
  var splitter_1 = require("./splitter");
10
12
  Object.defineProperty(exports, "langchainSplitter", { enumerable: true, get: function () { return splitter_1.langchainSplitter; } });
11
13
  var build_file_index_1 = require("./build-file-index");
@@ -14,6 +16,10 @@ var build_snippet_index_1 = require("./build-snippet-index");
14
16
  Object.defineProperty(exports, "buildSnippetIndex", { enumerable: true, get: function () { return __importDefault(build_snippet_index_1).default; } });
15
17
  var snippet_index_1 = require("./snippet-index");
16
18
  Object.defineProperty(exports, "SnippetIndex", { enumerable: true, get: function () { return __importDefault(snippet_index_1).default; } });
19
+ Object.defineProperty(exports, "encodeSnippetId", { enumerable: true, get: function () { return snippet_index_1.encodeSnippetId; } });
20
+ Object.defineProperty(exports, "parseSnippetId", { enumerable: true, get: function () { return snippet_index_1.parseSnippetId; } });
21
+ Object.defineProperty(exports, "fileChunkSnippetId", { enumerable: true, get: function () { return snippet_index_1.fileChunkSnippetId; } });
22
+ Object.defineProperty(exports, "parseFileChunkSnippetId", { enumerable: true, get: function () { return snippet_index_1.parseFileChunkSnippetId; } });
17
23
  var file_index_1 = require("./file-index");
18
24
  Object.defineProperty(exports, "FileIndex", { enumerable: true, get: function () { return __importDefault(file_index_1).default; } });
19
25
  var project_files_1 = require("./project-files");
@@ -24,3 +30,5 @@ Object.defineProperty(exports, "isDataFile", { enumerable: true, get: function (
24
30
  Object.defineProperty(exports, "isLargeFile", { enumerable: true, get: function () { return file_type_1.isLargeFile; } });
25
31
  var tokenize_1 = require("./tokenize");
26
32
  Object.defineProperty(exports, "fileTokens", { enumerable: true, get: function () { return tokenize_1.fileTokens; } });
33
+ var query_keywords_1 = require("./query-keywords");
34
+ Object.defineProperty(exports, "queryKeywords", { enumerable: true, get: function () { return __importDefault(query_keywords_1).default; } });
@@ -11,7 +11,6 @@ const STOP_WORDS = new Set([
11
11
  'at',
12
12
  'be',
13
13
  'by',
14
- 'code',
15
14
  'for',
16
15
  'from',
17
16
  'has',
@@ -0,0 +1,2 @@
1
+ export type SessionId = string;
2
+ export declare function generateSessionId(): SessionId;
@@ -0,0 +1,7 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.generateSessionId = generateSessionId;
4
+ const uuid_1 = require("uuid");
5
+ function generateSessionId() {
6
+ return (0, uuid_1.v4)();
7
+ }
@@ -1,10 +1,22 @@
1
1
  import sqlite3 from 'better-sqlite3';
2
+ import { SessionId } from './session-id';
3
+ export declare enum SnippetType {
4
+ FileChunk = "file-chunk"
5
+ }
6
+ export type SnippetId = {
7
+ type: string;
8
+ id: string;
9
+ };
10
+ export declare function fileChunkSnippetId(filePath: string, startLine?: number): SnippetId;
11
+ export declare function parseFileChunkSnippetId(snippetId: SnippetId): {
12
+ filePath: string;
13
+ startLine?: number;
14
+ };
15
+ export declare function encodeSnippetId(snippetId: SnippetId): string;
16
+ export declare function parseSnippetId(snippetId: string): SnippetId;
2
17
  export type SnippetSearchResult = {
3
- snippetId: string;
18
+ snippetId: SnippetId;
4
19
  directory: string;
5
- filePath: string;
6
- startLine: number | undefined;
7
- endLine: number | undefined;
8
20
  score: number;
9
21
  content: string;
10
22
  };
@@ -12,8 +24,27 @@ export default class SnippetIndex {
12
24
  #private;
13
25
  database: sqlite3.Database;
14
26
  constructor(database: sqlite3.Database);
15
- indexSnippet(snippetId: string, directory: string, filePath: string, startLine: number | undefined, endLine: number | undefined, symbols: string, words: string, content: string): void;
16
- boostSnippet(snippetId: string, boostFactor: number): void;
17
- searchSnippets(query: string, limit?: number): SnippetSearchResult[];
27
+ /**
28
+ * Deletes all data associated with a specific session.
29
+ * @param sessionId - The session identifier to delete data for.
30
+ */
31
+ deleteSession(sessionId: string): void;
32
+ /**
33
+ * Indexes a code snippet for searchability.
34
+ * @param snippetId - The unique identifier for the snippet.
35
+ * @param directory - The directory where the snippet is located.
36
+ * @param symbols - Symbols (e.g., class names) in the snippet.
37
+ * @param words - General words in the snippet.
38
+ * @param content - The actual content of the snippet.
39
+ */
40
+ indexSnippet(snippetId: SnippetId, directory: string, symbols: string, words: string, content: string): void;
41
+ /**
42
+ * Boosts the relevance score of a specific snippet for a given session.
43
+ * @param sessionId - The session identifier to associate the boost with.
44
+ * @param snippetId - The identifier of the snippet to boost.
45
+ * @param boostFactor - The factor by which to boost the snippet's relevance.
46
+ */
47
+ boostSnippet(sessionId: SessionId, snippetId: SnippetId, boostFactor: number): void;
48
+ searchSnippets(sessionId: SessionId, query: string, limit?: number): SnippetSearchResult[];
18
49
  close(): void;
19
50
  }
@@ -10,34 +10,40 @@ var __classPrivateFieldGet = (this && this.__classPrivateFieldGet) || function (
10
10
  if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
11
11
  return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
12
12
  };
13
- var _SnippetIndex_insertSnippet, _SnippetIndex_updateSnippetBoost, _SnippetIndex_searchSnippet;
13
+ var __importDefault = (this && this.__importDefault) || function (mod) {
14
+ return (mod && mod.__esModule) ? mod : { "default": mod };
15
+ };
16
+ var _SnippetIndex_insertSnippet, _SnippetIndex_updateSnippetBoost, _SnippetIndex_deleteSession, _SnippetIndex_searchSnippet;
14
17
  Object.defineProperty(exports, "__esModule", { value: true });
18
+ exports.SnippetType = void 0;
19
+ exports.fileChunkSnippetId = fileChunkSnippetId;
20
+ exports.parseFileChunkSnippetId = parseFileChunkSnippetId;
21
+ exports.encodeSnippetId = encodeSnippetId;
22
+ exports.parseSnippetId = parseSnippetId;
23
+ const assert_1 = __importDefault(require("assert"));
15
24
  const CREATE_SNIPPET_CONTENT_TABLE_SQL = `CREATE VIRTUAL TABLE snippet_content USING fts5(
16
25
  snippet_id UNINDEXED,
17
26
  directory UNINDEXED,
18
- file_path,
19
- start_line UNINDEXED,
20
- end_line UNINDEXED,
21
27
  file_symbols,
22
28
  file_words,
23
29
  content UNINDEXED,
24
30
  tokenize = 'porter unicode61'
25
31
  )`;
26
32
  const CREATE_SNIPPET_BOOST_TABLE_SQL = `CREATE TABLE snippet_boost (
27
- snippet_id TEXT PRIMARY KEY,
28
- boost_factor REAL
33
+ session_id TEXT,
34
+ snippet_id TEXT,
35
+ boost_factor REAL,
36
+ PRIMARY KEY (session_id, snippet_id)
29
37
  )`;
30
38
  const INSERT_SNIPPET_SQL = `INSERT INTO snippet_content
31
- (snippet_id, directory, file_path, start_line, end_line, file_symbols, file_words, content)
32
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)`;
39
+ (snippet_id, directory, file_symbols, file_words, content)
40
+ VALUES (?, ?, ?, ?, ?)`;
41
+ const DELETE_SESSION_SQL = `DELETE FROM snippet_boost WHERE session_id LIKE ?`;
33
42
  const UPDATE_SNIPPET_BOOST_SQL = `INSERT OR REPLACE INTO snippet_boost
34
- (snippet_id, boost_factor)
35
- VALUES (?, ?)`;
43
+ (session_id, snippet_id, boost_factor)
44
+ VALUES (?, ?, ?)`;
36
45
  const SEARCH_SNIPPET_SQL = `SELECT
37
46
  snippet_content.directory,
38
- snippet_content.file_path,
39
- snippet_content.start_line,
40
- snippet_content.end_line,
41
47
  snippet_content.snippet_id,
42
48
  snippet_content.content,
43
49
  (bm25(snippet_content, 1)*3.0 + bm25(snippet_content, 2)*2.0 + bm25(snippet_content, 3)*1.0)
@@ -49,39 +55,95 @@ LEFT JOIN
49
55
  snippet_boost
50
56
  ON
51
57
  snippet_content.snippet_id = snippet_boost.snippet_id
58
+ AND snippet_boost.session_id = ?
52
59
  WHERE
53
60
  snippet_content MATCH ?
54
61
  ORDER BY
55
62
  score DESC
56
63
  LIMIT ?`;
64
+ var SnippetType;
65
+ (function (SnippetType) {
66
+ SnippetType["FileChunk"] = "file-chunk";
67
+ })(SnippetType || (exports.SnippetType = SnippetType = {}));
68
+ function fileChunkSnippetId(filePath, startLine) {
69
+ return {
70
+ type: 'file-chunk',
71
+ id: [filePath, startLine].filter(Boolean).join(':'),
72
+ };
73
+ }
74
+ function parseFileChunkSnippetId(snippetId) {
75
+ const type = snippetId.type;
76
+ (0, assert_1.default)(type === SnippetType.FileChunk);
77
+ const parts = snippetId.id.split(':');
78
+ const filePath = parts.shift();
79
+ (0, assert_1.default)(filePath);
80
+ const startLine = parts.shift();
81
+ return {
82
+ filePath: filePath,
83
+ startLine: startLine ? parseInt(startLine, 10) : undefined,
84
+ };
85
+ }
86
+ function encodeSnippetId(snippetId) {
87
+ return [snippetId.type, snippetId.id].join(':');
88
+ }
89
+ function parseSnippetId(snippetId) {
90
+ const parts = snippetId.split(':');
91
+ const type = parts.shift();
92
+ (0, assert_1.default)(type);
93
+ const id = parts.join(':');
94
+ return {
95
+ type,
96
+ id,
97
+ };
98
+ }
57
99
  class SnippetIndex {
58
100
  constructor(database) {
59
101
  this.database = database;
60
102
  _SnippetIndex_insertSnippet.set(this, void 0);
61
103
  _SnippetIndex_updateSnippetBoost.set(this, void 0);
104
+ _SnippetIndex_deleteSession.set(this, void 0);
62
105
  _SnippetIndex_searchSnippet.set(this, void 0);
63
106
  this.database.exec(CREATE_SNIPPET_CONTENT_TABLE_SQL);
64
107
  this.database.exec(CREATE_SNIPPET_BOOST_TABLE_SQL);
65
108
  this.database.pragma('journal_mode = OFF');
66
109
  this.database.pragma('synchronous = OFF');
67
110
  __classPrivateFieldSet(this, _SnippetIndex_insertSnippet, this.database.prepare(INSERT_SNIPPET_SQL), "f");
111
+ __classPrivateFieldSet(this, _SnippetIndex_deleteSession, this.database.prepare(DELETE_SESSION_SQL), "f");
68
112
  __classPrivateFieldSet(this, _SnippetIndex_updateSnippetBoost, this.database.prepare(UPDATE_SNIPPET_BOOST_SQL), "f");
69
113
  __classPrivateFieldSet(this, _SnippetIndex_searchSnippet, this.database.prepare(SEARCH_SNIPPET_SQL), "f");
70
114
  }
71
- indexSnippet(snippetId, directory, filePath, startLine, endLine, symbols, words, content) {
72
- __classPrivateFieldGet(this, _SnippetIndex_insertSnippet, "f").run(snippetId, directory, filePath, startLine, endLine, symbols, words, content);
115
+ /**
116
+ * Deletes all data associated with a specific session.
117
+ * @param sessionId - The session identifier to delete data for.
118
+ */
119
+ deleteSession(sessionId) {
120
+ __classPrivateFieldGet(this, _SnippetIndex_deleteSession, "f").run(sessionId);
121
+ }
122
+ /**
123
+ * Indexes a code snippet for searchability.
124
+ * @param snippetId - The unique identifier for the snippet.
125
+ * @param directory - The directory where the snippet is located.
126
+ * @param symbols - Symbols (e.g., class names) in the snippet.
127
+ * @param words - General words in the snippet.
128
+ * @param content - The actual content of the snippet.
129
+ */
130
+ indexSnippet(snippetId, directory, symbols, words, content) {
131
+ __classPrivateFieldGet(this, _SnippetIndex_insertSnippet, "f").run(encodeSnippetId(snippetId), directory, symbols, words, content);
73
132
  }
74
- boostSnippet(snippetId, boostFactor) {
75
- __classPrivateFieldGet(this, _SnippetIndex_updateSnippetBoost, "f").run(snippetId, boostFactor);
133
+ /**
134
+ * Boosts the relevance score of a specific snippet for a given session.
135
+ * @param sessionId - The session identifier to associate the boost with.
136
+ * @param snippetId - The identifier of the snippet to boost.
137
+ * @param boostFactor - The factor by which to boost the snippet's relevance.
138
+ */
139
+ boostSnippet(sessionId, snippetId, boostFactor) {
140
+ __classPrivateFieldGet(this, _SnippetIndex_updateSnippetBoost, "f").run(sessionId, encodeSnippetId(snippetId), boostFactor);
76
141
  }
77
- searchSnippets(query, limit = 10) {
78
- const rows = __classPrivateFieldGet(this, _SnippetIndex_searchSnippet, "f").all(query, limit);
142
+ searchSnippets(sessionId, query, limit = 10) {
143
+ const rows = __classPrivateFieldGet(this, _SnippetIndex_searchSnippet, "f").all(sessionId, query, limit);
79
144
  return rows.map((row) => ({
80
145
  directory: row.directory,
81
- snippetId: row.snippet_id,
82
- filePath: row.file_path,
83
- startLine: row.start_line,
84
- endLine: row.end_line,
146
+ snippetId: parseSnippetId(row.snippet_id),
85
147
  score: row.score,
86
148
  content: row.content,
87
149
  }));
@@ -90,5 +152,5 @@ class SnippetIndex {
90
152
  this.database.close();
91
153
  }
92
154
  }
93
- _SnippetIndex_insertSnippet = new WeakMap(), _SnippetIndex_updateSnippetBoost = new WeakMap(), _SnippetIndex_searchSnippet = new WeakMap();
155
+ _SnippetIndex_insertSnippet = new WeakMap(), _SnippetIndex_updateSnippetBoost = new WeakMap(), _SnippetIndex_deleteSession = new WeakMap(), _SnippetIndex_searchSnippet = new WeakMap();
94
156
  exports.default = SnippetIndex;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@appland/search",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "",
5
5
  "bin": "built/cli.js",
6
6
  "publishConfig": {
@@ -40,6 +40,7 @@
40
40
  },
41
41
  "dependencies": {
42
42
  "better-sqlite3": "^11.5.0",
43
+ "isbinaryfile": "^5.0.4",
43
44
  "yargs": "^17.7.2"
44
45
  }
45
46
  }