@appland/search 1.0.1 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- package/CHANGELOG.md +23 -0
- package/README.md +76 -0
- package/built/build-file-index.js +13 -5
- package/built/build-snippet-index.js +8 -5
- package/built/cli.js +10 -6
- package/built/file-index.d.ts +21 -2
- package/built/file-index.js +36 -10
- package/built/index.d.ts +3 -1
- package/built/index.js +9 -1
- package/built/query-keywords.js +0 -1
- package/built/session-id.d.ts +2 -0
- package/built/session-id.js +7 -0
- package/built/snippet-index.d.ts +38 -7
- package/built/snippet-index.js +86 -24
- package/built/splitter.js +3 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
@@ -1,3 +1,26 @@
|
|
1
|
+
# [@appland/search-v1.1.1](https://github.com/getappmap/appmap-js/compare/@appland/search-v1.1.0...@appland/search-v1.1.1) (2024-12-18)
|
2
|
+
|
3
|
+
|
4
|
+
### Bug Fixes
|
5
|
+
|
6
|
+
* Extract complete chunk when splitting text ([75d2f5d](https://github.com/getappmap/appmap-js/commit/75d2f5df06c9794b772116c2facde366d5e1cd7d))
|
7
|
+
|
8
|
+
# [@appland/search-v1.1.0](https://github.com/getappmap/appmap-js/compare/@appland/search-v1.0.1...@appland/search-v1.1.0) (2024-12-01)
|
9
|
+
|
10
|
+
|
11
|
+
### Bug Fixes
|
12
|
+
|
13
|
+
* Pass absolute path when loading file content ([85060bb](https://github.com/getappmap/appmap-js/commit/85060bb432fec9a1ee2d461fa671cb18b0f21fe6))
|
14
|
+
* Search for 'code' ([d209727](https://github.com/getappmap/appmap-js/commit/d209727d4ec19d8027b1cb4eb36ed31a60d9eb21))
|
15
|
+
|
16
|
+
|
17
|
+
### Features
|
18
|
+
|
19
|
+
* Add session deletion ([9ccd947](https://github.com/getappmap/appmap-js/commit/9ccd947f110857d5d881a31bf0c947bb02f1f2c5))
|
20
|
+
* Associate boost factor data with a session id ([7031193](https://github.com/getappmap/appmap-js/commit/70311932553adb0aca4ae7f6f11af23790921bdf))
|
21
|
+
* Define and export SnippetId type ([8e3be79](https://github.com/getappmap/appmap-js/commit/8e3be7949c62a11ed1d57b1c88df2868aa3f10cd))
|
22
|
+
* Search for AppMap data using @appland/search ([ac00047](https://github.com/getappmap/appmap-js/commit/ac0004717147a095f1fa609c2aa341dec6e6c7bc))
|
23
|
+
|
1
24
|
# [@appland/search-v1.0.1](https://github.com/getappmap/appmap-js/compare/@appland/search-v1.0.0...@appland/search-v1.0.1) (2024-12-01)
|
2
25
|
|
3
26
|
|
package/README.md
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
# `@appland/search`
|
2
|
+
|
3
|
+
## session_id
|
4
|
+
|
5
|
+
The `session id` parameter is an integral part of differentiating boost records in this system. Its
|
6
|
+
intent is to make sure that the boost factors, which affect search results, remain specific to a
|
7
|
+
particular search session and do not impact other concurrent search sessions.
|
8
|
+
|
9
|
+
### Intent
|
10
|
+
|
11
|
+
The main goal of introducing a `session id` is to:
|
12
|
+
|
13
|
+
1. **Isolate Boost Factors**: By associating each boost record with a unique session id, different
|
14
|
+
sessions' boost factors are kept separate. This means that boosting done in one session doesn't
|
15
|
+
unintentionally affect another.
|
16
|
+
|
17
|
+
2. **Maintain Contextual Relevance**: Boost factors should only influence the search results within
|
18
|
+
the scope of their intended search session. This ensures the search system remains contextually
|
19
|
+
aware and the results are relevant to the specific scenarios where the boosts were applied.
|
20
|
+
|
21
|
+
### How It Works for Concurrency
|
22
|
+
|
23
|
+
- **Storage**: When a boost factor is being stored in the system, it includes a `session id` in the
|
24
|
+
database schema for boost records. This ties the boost factor directly to the user's session that
|
25
|
+
triggered it.
|
26
|
+
|
27
|
+
- **Filtering**: When search operations are performed, only boost factors associated with the
|
28
|
+
current session id are considered. This filtering ensures that only relevant boosts are applied to
|
29
|
+
the search results.
|
30
|
+
|
31
|
+
- **Concurrent Use**: In environments where multiple users or sessions are interacting with the
|
32
|
+
search system simultaneously, the session id ensures that one session's boost factors don't spill
|
33
|
+
over and impact the results of another session. This isolation is crucial in multi-user systems
|
34
|
+
where search personalization is required.
|
35
|
+
|
36
|
+
### Session Deletion
|
37
|
+
|
38
|
+
The system provides a mechanism to delete all data associated with a specific session. This is
|
39
|
+
achieved through the `deleteSession` method available in both the `FileIndex` and `SnippetIndex`
|
40
|
+
classes. By invoking this method with a session id, all boost factors and related data tied to that
|
41
|
+
session are removed from the database, ensuring that no residual data affects future search
|
42
|
+
operations.
|
43
|
+
|
44
|
+
### Entity-Relationship Diagram
|
45
|
+
|
46
|
+
```mermaid
|
47
|
+
erDiagram
|
48
|
+
FILE_CONTENT {
|
49
|
+
TEXT directory
|
50
|
+
TEXT file_path
|
51
|
+
TEXT file_symbols
|
52
|
+
TEXT file_words
|
53
|
+
}
|
54
|
+
FILE_BOOST {
|
55
|
+
TEXT session_id
|
56
|
+
TEXT file_path
|
57
|
+
REAL boost_factor
|
58
|
+
}
|
59
|
+
|
60
|
+
|
61
|
+
SNIPPET_CONTENT {
|
62
|
+
TEXT snippet_id
|
63
|
+
TEXT directory
|
64
|
+
TEXT file_symbols
|
65
|
+
TEXT file_words
|
66
|
+
TEXT content
|
67
|
+
}
|
68
|
+
SNIPPET_BOOST {
|
69
|
+
TEXT session_id
|
70
|
+
TEXT snippet_id
|
71
|
+
REAL boost_factor
|
72
|
+
}
|
73
|
+
|
74
|
+
FILE_CONTENT ||--|| FILE_BOOST : "Is boosted by"
|
75
|
+
SNIPPET_CONTENT ||--|| SNIPPET_BOOST : "Is boosted by"
|
76
|
+
```
|
@@ -7,6 +7,7 @@ exports.default = buildFileIndex;
|
|
7
7
|
const debug_1 = __importDefault(require("debug"));
|
8
8
|
const path_1 = require("path");
|
9
9
|
const console_1 = require("console");
|
10
|
+
const types_1 = require("util/types");
|
10
11
|
const debug = (0, debug_1.default)('appmap:search:build-index');
|
11
12
|
async function indexFile(context, filePath) {
|
12
13
|
debug('Indexing file: %s', filePath);
|
@@ -26,13 +27,20 @@ async function indexDirectory(context, directory) {
|
|
26
27
|
if (!dirContents)
|
27
28
|
return;
|
28
29
|
for (const dirContentItem of dirContents) {
|
29
|
-
|
30
|
+
let filePath;
|
31
|
+
if ((0, path_1.isAbsolute)(dirContentItem))
|
32
|
+
filePath = dirContentItem;
|
33
|
+
else
|
34
|
+
filePath = (0, path_1.join)(directory, dirContentItem);
|
30
35
|
debug('Indexing: %s', filePath);
|
31
36
|
if (await context.fileFilter(filePath)) {
|
32
|
-
|
33
|
-
(
|
34
|
-
|
35
|
-
|
37
|
+
try {
|
38
|
+
await indexFile(context, filePath);
|
39
|
+
}
|
40
|
+
catch (e) {
|
41
|
+
const message = (0, types_1.isNativeError)(e) ? e.message : String(e);
|
42
|
+
(0, console_1.warn)(`Error indexing file ${filePath}: ${message}`);
|
43
|
+
}
|
36
44
|
}
|
37
45
|
}
|
38
46
|
}
|
@@ -1,16 +1,19 @@
|
|
1
1
|
"use strict";
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
3
|
exports.default = buildSnippetIndex;
|
4
|
+
const path_1 = require("path");
|
5
|
+
const snippet_index_1 = require("./snippet-index");
|
4
6
|
async function indexFile(context, file) {
|
5
|
-
const
|
7
|
+
const filePath = (0, path_1.isAbsolute)(file.filePath) ? file.filePath : (0, path_1.join)(file.directory, file.filePath);
|
8
|
+
const fileContent = await context.contentReader(filePath);
|
6
9
|
if (!fileContent)
|
7
10
|
return;
|
8
11
|
const extension = file.filePath.split('.').pop() || '';
|
9
12
|
const chunks = await context.splitter(fileContent, extension);
|
10
|
-
chunks.forEach((chunk
|
11
|
-
const
|
12
|
-
const
|
13
|
-
context.snippetIndex.indexSnippet(snippetId, file.directory,
|
13
|
+
chunks.forEach((chunk) => {
|
14
|
+
const { content, startLine } = chunk;
|
15
|
+
const snippetId = (0, snippet_index_1.fileChunkSnippetId)(filePath, startLine);
|
16
|
+
context.snippetIndex.indexSnippet(snippetId, file.directory, context.tokenizer(content, file.filePath).symbols.join(' '), context.tokenizer(content, file.filePath).words.join(' '), content);
|
14
17
|
});
|
15
18
|
}
|
16
19
|
async function buildSnippetIndex(snippetIndex, files, contentReader, splitter, tokenizer) {
|
package/built/cli.js
CHANGED
@@ -17,6 +17,7 @@ const build_snippet_index_1 = __importDefault(require("./build-snippet-index"));
|
|
17
17
|
const ioutil_1 = require("./ioutil");
|
18
18
|
const splitter_1 = require("./splitter");
|
19
19
|
const assert_1 = __importDefault(require("assert"));
|
20
|
+
const session_id_1 = require("./session-id");
|
20
21
|
const debug = (0, debug_1.default)('appmap:search:cli');
|
21
22
|
const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
|
22
23
|
.command('* <query>', 'Index directories and perform a search', (yargs) => {
|
@@ -58,6 +59,7 @@ const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
|
|
58
59
|
};
|
59
60
|
const db = new better_sqlite3_1.default(':memory:');
|
60
61
|
const fileIndex = new file_index_1.default(db);
|
62
|
+
const sessionId = (0, session_id_1.generateSessionId)();
|
61
63
|
await (0, build_file_index_1.default)(fileIndex, directories, project_files_1.default, fileFilter, ioutil_1.readFileSafe, tokenize_1.fileTokens);
|
62
64
|
const filePathAtMostThreeEntries = (filePath) => {
|
63
65
|
const parts = filePath.split('/');
|
@@ -65,13 +67,13 @@ const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
|
|
65
67
|
return filePath;
|
66
68
|
return `.../${parts.slice(-3).join('/')}`;
|
67
69
|
};
|
68
|
-
const printResult = (
|
70
|
+
const printResult = (type, id, score) => console.log('%s %s %s', type, filePathAtMostThreeEntries(id), score.toPrecision(3));
|
69
71
|
console.log('File search results');
|
70
72
|
console.log('-------------------');
|
71
|
-
const fileSearchResults = fileIndex.search(query);
|
73
|
+
const fileSearchResults = fileIndex.search(sessionId, query);
|
72
74
|
for (const result of fileSearchResults) {
|
73
75
|
const { filePath, score } = result;
|
74
|
-
printResult(filePath, score);
|
76
|
+
printResult('file', filePath, score);
|
75
77
|
}
|
76
78
|
const splitter = splitter_1.langchainSplitter;
|
77
79
|
const snippetIndex = new snippet_index_1.default(db);
|
@@ -80,10 +82,12 @@ const cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
|
|
80
82
|
console.log('Snippet search results');
|
81
83
|
console.log('----------------------');
|
82
84
|
const isNullOrUndefined = (value) => value === null || value === undefined;
|
83
|
-
const snippetSearchResults = snippetIndex.searchSnippets(query);
|
85
|
+
const snippetSearchResults = snippetIndex.searchSnippets(sessionId, query);
|
84
86
|
for (const result of snippetSearchResults) {
|
85
|
-
const { snippetId,
|
86
|
-
printResult(snippetId, score);
|
87
|
+
const { snippetId, score } = result;
|
88
|
+
printResult(snippetId.type, snippetId.id, score);
|
89
|
+
const [filePath, range] = snippetId.id.split(':');
|
90
|
+
const [startLine, endLine] = range.split('-').map((n) => parseInt(n, 10));
|
87
91
|
if (isNullOrUndefined(startLine) || isNullOrUndefined(endLine))
|
88
92
|
continue;
|
89
93
|
const content = await (0, ioutil_1.readFileSafe)(filePath);
|
package/built/file-index.d.ts
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import sqlite3 from 'better-sqlite3';
|
2
|
+
import { SessionId } from './session-id';
|
2
3
|
export type FileSearchResult = {
|
3
4
|
directory: string;
|
4
5
|
filePath: string;
|
@@ -23,7 +24,25 @@ export default class FileIndex {
|
|
23
24
|
database: sqlite3.Database;
|
24
25
|
constructor(database: sqlite3.Database);
|
25
26
|
indexFile(directory: string, filePath: string, symbols: string, words: string): void;
|
26
|
-
|
27
|
-
|
27
|
+
/**
|
28
|
+
* Boosts the relevance score of a specific file for a given session.
|
29
|
+
* @param sessionId - The session identifier to associate the boost with.
|
30
|
+
* @param filePath - The path of the file to boost.
|
31
|
+
* @param boostFactor - The factor by which to boost the file's relevance.
|
32
|
+
*/
|
33
|
+
boostFile(sessionId: SessionId, filePath: string, boostFactor: number): void;
|
34
|
+
/**
|
35
|
+
* Deletes all data associated with a specific session.
|
36
|
+
* @param sessionId - The session identifier to delete data for.
|
37
|
+
*/
|
38
|
+
deleteSession(sessionId: string): void;
|
39
|
+
/**
|
40
|
+
* Searches for files matching the query, considering session-specific boosts.
|
41
|
+
* @param sessionId - The session identifier to apply during the search.
|
42
|
+
* @param query - The search query string.
|
43
|
+
* @param limit - The maximum number of results to return.
|
44
|
+
* @returns An array of search results with directory, file path, and score.
|
45
|
+
*/
|
46
|
+
search(sessionId: SessionId, query: string, limit?: number): FileSearchResult[];
|
28
47
|
close(): void;
|
29
48
|
}
|
package/built/file-index.js
CHANGED
@@ -10,7 +10,7 @@ var __classPrivateFieldGet = (this && this.__classPrivateFieldGet) || function (
|
|
10
10
|
if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
|
11
11
|
return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
|
12
12
|
};
|
13
|
-
var _FileIndex_insert, _FileIndex_updateBoost, _FileIndex_search;
|
13
|
+
var _FileIndex_insert, _FileIndex_updateBoost, _FileIndex_deleteSession, _FileIndex_search;
|
14
14
|
Object.defineProperty(exports, "__esModule", { value: true });
|
15
15
|
const CREATE_TABLE_SQL = `CREATE VIRTUAL TABLE file_content USING fts5(
|
16
16
|
directory UNINDEXED,
|
@@ -20,13 +20,16 @@ const CREATE_TABLE_SQL = `CREATE VIRTUAL TABLE file_content USING fts5(
|
|
20
20
|
tokenize = 'porter unicode61'
|
21
21
|
)`;
|
22
22
|
const CREATE_BOOST_TABLE_SQL = `CREATE TABLE file_boost (
|
23
|
-
|
24
|
-
|
23
|
+
session_id TEXT,
|
24
|
+
file_path TEXT,
|
25
|
+
boost_factor REAL,
|
26
|
+
PRIMARY KEY (session_id, file_path)
|
25
27
|
)`;
|
26
28
|
const INSERT_SQL = `INSERT INTO file_content (directory, file_path, file_symbols, file_words)
|
27
29
|
VALUES (?, ?, ?, ?)`;
|
28
|
-
const UPDATE_BOOST_SQL = `INSERT OR REPLACE INTO file_boost (file_path, boost_factor)
|
29
|
-
VALUES (?, ?)`;
|
30
|
+
const UPDATE_BOOST_SQL = `INSERT OR REPLACE INTO file_boost (session_id, file_path, boost_factor)
|
31
|
+
VALUES (?, ?, ?)`;
|
32
|
+
const DELETE_SESSION_SQL = `DELETE FROM file_boost WHERE session_id LIKE ?`;
|
30
33
|
const SEARCH_SQL = `SELECT
|
31
34
|
file_content.directory,
|
32
35
|
file_content.file_path,
|
@@ -39,6 +42,7 @@ LEFT JOIN
|
|
39
42
|
file_boost
|
40
43
|
ON
|
41
44
|
file_content.file_path = file_boost.file_path
|
45
|
+
AND file_boost.session_id = ?
|
42
46
|
WHERE
|
43
47
|
file_content MATCH ?
|
44
48
|
ORDER BY
|
@@ -65,6 +69,7 @@ class FileIndex {
|
|
65
69
|
this.database = database;
|
66
70
|
_FileIndex_insert.set(this, void 0);
|
67
71
|
_FileIndex_updateBoost.set(this, void 0);
|
72
|
+
_FileIndex_deleteSession.set(this, void 0);
|
68
73
|
_FileIndex_search.set(this, void 0);
|
69
74
|
this.database.exec(CREATE_TABLE_SQL);
|
70
75
|
this.database.exec(CREATE_BOOST_TABLE_SQL);
|
@@ -72,16 +77,37 @@ class FileIndex {
|
|
72
77
|
this.database.pragma('synchronous = OFF');
|
73
78
|
__classPrivateFieldSet(this, _FileIndex_insert, this.database.prepare(INSERT_SQL), "f");
|
74
79
|
__classPrivateFieldSet(this, _FileIndex_updateBoost, this.database.prepare(UPDATE_BOOST_SQL), "f");
|
80
|
+
__classPrivateFieldSet(this, _FileIndex_deleteSession, this.database.prepare(DELETE_SESSION_SQL), "f");
|
75
81
|
__classPrivateFieldSet(this, _FileIndex_search, this.database.prepare(SEARCH_SQL), "f");
|
76
82
|
}
|
77
83
|
indexFile(directory, filePath, symbols, words) {
|
78
84
|
__classPrivateFieldGet(this, _FileIndex_insert, "f").run(directory, filePath, symbols, words);
|
79
85
|
}
|
80
|
-
|
81
|
-
|
86
|
+
/**
|
87
|
+
* Boosts the relevance score of a specific file for a given session.
|
88
|
+
* @param sessionId - The session identifier to associate the boost with.
|
89
|
+
* @param filePath - The path of the file to boost.
|
90
|
+
* @param boostFactor - The factor by which to boost the file's relevance.
|
91
|
+
*/
|
92
|
+
boostFile(sessionId, filePath, boostFactor) {
|
93
|
+
__classPrivateFieldGet(this, _FileIndex_updateBoost, "f").run(sessionId, filePath, boostFactor);
|
82
94
|
}
|
83
|
-
|
84
|
-
|
95
|
+
/**
|
96
|
+
* Deletes all data associated with a specific session.
|
97
|
+
* @param sessionId - The session identifier to delete data for.
|
98
|
+
*/
|
99
|
+
deleteSession(sessionId) {
|
100
|
+
__classPrivateFieldGet(this, _FileIndex_deleteSession, "f").run(sessionId);
|
101
|
+
}
|
102
|
+
/**
|
103
|
+
* Searches for files matching the query, considering session-specific boosts.
|
104
|
+
* @param sessionId - The session identifier to apply during the search.
|
105
|
+
* @param query - The search query string.
|
106
|
+
* @param limit - The maximum number of results to return.
|
107
|
+
* @returns An array of search results with directory, file path, and score.
|
108
|
+
*/
|
109
|
+
search(sessionId, query, limit = 10) {
|
110
|
+
const rows = __classPrivateFieldGet(this, _FileIndex_search, "f").all(sessionId, query, limit);
|
85
111
|
return rows.map((row) => ({
|
86
112
|
directory: row.directory,
|
87
113
|
filePath: row.file_path,
|
@@ -92,5 +118,5 @@ class FileIndex {
|
|
92
118
|
this.database.close();
|
93
119
|
}
|
94
120
|
}
|
95
|
-
_FileIndex_insert = new WeakMap(), _FileIndex_updateBoost = new WeakMap(), _FileIndex_search = new WeakMap();
|
121
|
+
_FileIndex_insert = new WeakMap(), _FileIndex_updateBoost = new WeakMap(), _FileIndex_deleteSession = new WeakMap(), _FileIndex_search = new WeakMap();
|
96
122
|
exports.default = FileIndex;
|
package/built/index.d.ts
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
export { ContentReader, readFileSafe } from './ioutil';
|
2
|
+
export { SessionId, generateSessionId } from './session-id';
|
2
3
|
export { Splitter, langchainSplitter } from './splitter';
|
3
4
|
export { ListFn, FilterFn, Tokenizer, default as buildFileIndex } from './build-file-index';
|
4
5
|
export { File, default as buildSnippetIndex } from './build-snippet-index';
|
5
|
-
export { default as SnippetIndex, SnippetSearchResult } from './snippet-index';
|
6
|
+
export { default as SnippetIndex, SnippetSearchResult, SnippetId, encodeSnippetId, parseSnippetId, fileChunkSnippetId, parseFileChunkSnippetId, } from './snippet-index';
|
6
7
|
export { default as FileIndex, FileSearchResult } from './file-index';
|
7
8
|
export { default as listProjectFiles } from './project-files';
|
8
9
|
export { isBinaryFile, isDataFile, isLargeFile } from './file-type';
|
9
10
|
export { fileTokens } from './tokenize';
|
11
|
+
export { default as queryKeywords } from './query-keywords';
|
package/built/index.js
CHANGED
@@ -3,9 +3,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
4
|
};
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
-
exports.fileTokens = exports.isLargeFile = exports.isDataFile = exports.isBinaryFile = exports.listProjectFiles = exports.FileIndex = exports.SnippetIndex = exports.buildSnippetIndex = exports.buildFileIndex = exports.langchainSplitter = exports.readFileSafe = void 0;
|
6
|
+
exports.queryKeywords = exports.fileTokens = exports.isLargeFile = exports.isDataFile = exports.isBinaryFile = exports.listProjectFiles = exports.FileIndex = exports.parseFileChunkSnippetId = exports.fileChunkSnippetId = exports.parseSnippetId = exports.encodeSnippetId = exports.SnippetIndex = exports.buildSnippetIndex = exports.buildFileIndex = exports.langchainSplitter = exports.generateSessionId = exports.readFileSafe = void 0;
|
7
7
|
var ioutil_1 = require("./ioutil");
|
8
8
|
Object.defineProperty(exports, "readFileSafe", { enumerable: true, get: function () { return ioutil_1.readFileSafe; } });
|
9
|
+
var session_id_1 = require("./session-id");
|
10
|
+
Object.defineProperty(exports, "generateSessionId", { enumerable: true, get: function () { return session_id_1.generateSessionId; } });
|
9
11
|
var splitter_1 = require("./splitter");
|
10
12
|
Object.defineProperty(exports, "langchainSplitter", { enumerable: true, get: function () { return splitter_1.langchainSplitter; } });
|
11
13
|
var build_file_index_1 = require("./build-file-index");
|
@@ -14,6 +16,10 @@ var build_snippet_index_1 = require("./build-snippet-index");
|
|
14
16
|
Object.defineProperty(exports, "buildSnippetIndex", { enumerable: true, get: function () { return __importDefault(build_snippet_index_1).default; } });
|
15
17
|
var snippet_index_1 = require("./snippet-index");
|
16
18
|
Object.defineProperty(exports, "SnippetIndex", { enumerable: true, get: function () { return __importDefault(snippet_index_1).default; } });
|
19
|
+
Object.defineProperty(exports, "encodeSnippetId", { enumerable: true, get: function () { return snippet_index_1.encodeSnippetId; } });
|
20
|
+
Object.defineProperty(exports, "parseSnippetId", { enumerable: true, get: function () { return snippet_index_1.parseSnippetId; } });
|
21
|
+
Object.defineProperty(exports, "fileChunkSnippetId", { enumerable: true, get: function () { return snippet_index_1.fileChunkSnippetId; } });
|
22
|
+
Object.defineProperty(exports, "parseFileChunkSnippetId", { enumerable: true, get: function () { return snippet_index_1.parseFileChunkSnippetId; } });
|
17
23
|
var file_index_1 = require("./file-index");
|
18
24
|
Object.defineProperty(exports, "FileIndex", { enumerable: true, get: function () { return __importDefault(file_index_1).default; } });
|
19
25
|
var project_files_1 = require("./project-files");
|
@@ -24,3 +30,5 @@ Object.defineProperty(exports, "isDataFile", { enumerable: true, get: function (
|
|
24
30
|
Object.defineProperty(exports, "isLargeFile", { enumerable: true, get: function () { return file_type_1.isLargeFile; } });
|
25
31
|
var tokenize_1 = require("./tokenize");
|
26
32
|
Object.defineProperty(exports, "fileTokens", { enumerable: true, get: function () { return tokenize_1.fileTokens; } });
|
33
|
+
var query_keywords_1 = require("./query-keywords");
|
34
|
+
Object.defineProperty(exports, "queryKeywords", { enumerable: true, get: function () { return __importDefault(query_keywords_1).default; } });
|
package/built/query-keywords.js
CHANGED
package/built/snippet-index.d.ts
CHANGED
@@ -1,10 +1,22 @@
|
|
1
1
|
import sqlite3 from 'better-sqlite3';
|
2
|
+
import { SessionId } from './session-id';
|
3
|
+
export declare enum SnippetType {
|
4
|
+
FileChunk = "file-chunk"
|
5
|
+
}
|
6
|
+
export type SnippetId = {
|
7
|
+
type: string;
|
8
|
+
id: string;
|
9
|
+
};
|
10
|
+
export declare function fileChunkSnippetId(filePath: string, startLine?: number): SnippetId;
|
11
|
+
export declare function parseFileChunkSnippetId(snippetId: SnippetId): {
|
12
|
+
filePath: string;
|
13
|
+
startLine?: number;
|
14
|
+
};
|
15
|
+
export declare function encodeSnippetId(snippetId: SnippetId): string;
|
16
|
+
export declare function parseSnippetId(snippetId: string): SnippetId;
|
2
17
|
export type SnippetSearchResult = {
|
3
|
-
snippetId:
|
18
|
+
snippetId: SnippetId;
|
4
19
|
directory: string;
|
5
|
-
filePath: string;
|
6
|
-
startLine: number | undefined;
|
7
|
-
endLine: number | undefined;
|
8
20
|
score: number;
|
9
21
|
content: string;
|
10
22
|
};
|
@@ -12,8 +24,27 @@ export default class SnippetIndex {
|
|
12
24
|
#private;
|
13
25
|
database: sqlite3.Database;
|
14
26
|
constructor(database: sqlite3.Database);
|
15
|
-
|
16
|
-
|
17
|
-
|
27
|
+
/**
|
28
|
+
* Deletes all data associated with a specific session.
|
29
|
+
* @param sessionId - The session identifier to delete data for.
|
30
|
+
*/
|
31
|
+
deleteSession(sessionId: string): void;
|
32
|
+
/**
|
33
|
+
* Indexes a code snippet for searchability.
|
34
|
+
* @param snippetId - The unique identifier for the snippet.
|
35
|
+
* @param directory - The directory where the snippet is located.
|
36
|
+
* @param symbols - Symbols (e.g., class names) in the snippet.
|
37
|
+
* @param words - General words in the snippet.
|
38
|
+
* @param content - The actual content of the snippet.
|
39
|
+
*/
|
40
|
+
indexSnippet(snippetId: SnippetId, directory: string, symbols: string, words: string, content: string): void;
|
41
|
+
/**
|
42
|
+
* Boosts the relevance score of a specific snippet for a given session.
|
43
|
+
* @param sessionId - The session identifier to associate the boost with.
|
44
|
+
* @param snippetId - The identifier of the snippet to boost.
|
45
|
+
* @param boostFactor - The factor by which to boost the snippet's relevance.
|
46
|
+
*/
|
47
|
+
boostSnippet(sessionId: SessionId, snippetId: SnippetId, boostFactor: number): void;
|
48
|
+
searchSnippets(sessionId: SessionId, query: string, limit?: number): SnippetSearchResult[];
|
18
49
|
close(): void;
|
19
50
|
}
|
package/built/snippet-index.js
CHANGED
@@ -10,34 +10,40 @@ var __classPrivateFieldGet = (this && this.__classPrivateFieldGet) || function (
|
|
10
10
|
if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
|
11
11
|
return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
|
12
12
|
};
|
13
|
-
var
|
13
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
14
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
15
|
+
};
|
16
|
+
var _SnippetIndex_insertSnippet, _SnippetIndex_updateSnippetBoost, _SnippetIndex_deleteSession, _SnippetIndex_searchSnippet;
|
14
17
|
Object.defineProperty(exports, "__esModule", { value: true });
|
18
|
+
exports.SnippetType = void 0;
|
19
|
+
exports.fileChunkSnippetId = fileChunkSnippetId;
|
20
|
+
exports.parseFileChunkSnippetId = parseFileChunkSnippetId;
|
21
|
+
exports.encodeSnippetId = encodeSnippetId;
|
22
|
+
exports.parseSnippetId = parseSnippetId;
|
23
|
+
const assert_1 = __importDefault(require("assert"));
|
15
24
|
const CREATE_SNIPPET_CONTENT_TABLE_SQL = `CREATE VIRTUAL TABLE snippet_content USING fts5(
|
16
25
|
snippet_id UNINDEXED,
|
17
26
|
directory UNINDEXED,
|
18
|
-
file_path,
|
19
|
-
start_line UNINDEXED,
|
20
|
-
end_line UNINDEXED,
|
21
27
|
file_symbols,
|
22
28
|
file_words,
|
23
29
|
content UNINDEXED,
|
24
30
|
tokenize = 'porter unicode61'
|
25
31
|
)`;
|
26
32
|
const CREATE_SNIPPET_BOOST_TABLE_SQL = `CREATE TABLE snippet_boost (
|
27
|
-
|
28
|
-
|
33
|
+
session_id TEXT,
|
34
|
+
snippet_id TEXT,
|
35
|
+
boost_factor REAL,
|
36
|
+
PRIMARY KEY (session_id, snippet_id)
|
29
37
|
)`;
|
30
38
|
const INSERT_SNIPPET_SQL = `INSERT INTO snippet_content
|
31
|
-
(snippet_id, directory,
|
32
|
-
VALUES (?, ?, ?, ?,
|
39
|
+
(snippet_id, directory, file_symbols, file_words, content)
|
40
|
+
VALUES (?, ?, ?, ?, ?)`;
|
41
|
+
const DELETE_SESSION_SQL = `DELETE FROM snippet_boost WHERE session_id LIKE ?`;
|
33
42
|
const UPDATE_SNIPPET_BOOST_SQL = `INSERT OR REPLACE INTO snippet_boost
|
34
|
-
(snippet_id, boost_factor)
|
35
|
-
VALUES (?, ?)`;
|
43
|
+
(session_id, snippet_id, boost_factor)
|
44
|
+
VALUES (?, ?, ?)`;
|
36
45
|
const SEARCH_SNIPPET_SQL = `SELECT
|
37
46
|
snippet_content.directory,
|
38
|
-
snippet_content.file_path,
|
39
|
-
snippet_content.start_line,
|
40
|
-
snippet_content.end_line,
|
41
47
|
snippet_content.snippet_id,
|
42
48
|
snippet_content.content,
|
43
49
|
(bm25(snippet_content, 1)*3.0 + bm25(snippet_content, 2)*2.0 + bm25(snippet_content, 3)*1.0)
|
@@ -49,39 +55,95 @@ LEFT JOIN
|
|
49
55
|
snippet_boost
|
50
56
|
ON
|
51
57
|
snippet_content.snippet_id = snippet_boost.snippet_id
|
58
|
+
AND snippet_boost.session_id = ?
|
52
59
|
WHERE
|
53
60
|
snippet_content MATCH ?
|
54
61
|
ORDER BY
|
55
62
|
score DESC
|
56
63
|
LIMIT ?`;
|
64
|
+
var SnippetType;
|
65
|
+
(function (SnippetType) {
|
66
|
+
SnippetType["FileChunk"] = "file-chunk";
|
67
|
+
})(SnippetType || (exports.SnippetType = SnippetType = {}));
|
68
|
+
function fileChunkSnippetId(filePath, startLine) {
|
69
|
+
return {
|
70
|
+
type: 'file-chunk',
|
71
|
+
id: [filePath, startLine].filter(Boolean).join(':'),
|
72
|
+
};
|
73
|
+
}
|
74
|
+
function parseFileChunkSnippetId(snippetId) {
|
75
|
+
const type = snippetId.type;
|
76
|
+
(0, assert_1.default)(type === SnippetType.FileChunk);
|
77
|
+
const parts = snippetId.id.split(':');
|
78
|
+
const filePath = parts.shift();
|
79
|
+
(0, assert_1.default)(filePath);
|
80
|
+
const startLine = parts.shift();
|
81
|
+
return {
|
82
|
+
filePath: filePath,
|
83
|
+
startLine: startLine ? parseInt(startLine, 10) : undefined,
|
84
|
+
};
|
85
|
+
}
|
86
|
+
function encodeSnippetId(snippetId) {
|
87
|
+
return [snippetId.type, snippetId.id].join(':');
|
88
|
+
}
|
89
|
+
function parseSnippetId(snippetId) {
|
90
|
+
const parts = snippetId.split(':');
|
91
|
+
const type = parts.shift();
|
92
|
+
(0, assert_1.default)(type);
|
93
|
+
const id = parts.join(':');
|
94
|
+
return {
|
95
|
+
type,
|
96
|
+
id,
|
97
|
+
};
|
98
|
+
}
|
57
99
|
class SnippetIndex {
|
58
100
|
constructor(database) {
|
59
101
|
this.database = database;
|
60
102
|
_SnippetIndex_insertSnippet.set(this, void 0);
|
61
103
|
_SnippetIndex_updateSnippetBoost.set(this, void 0);
|
104
|
+
_SnippetIndex_deleteSession.set(this, void 0);
|
62
105
|
_SnippetIndex_searchSnippet.set(this, void 0);
|
63
106
|
this.database.exec(CREATE_SNIPPET_CONTENT_TABLE_SQL);
|
64
107
|
this.database.exec(CREATE_SNIPPET_BOOST_TABLE_SQL);
|
65
108
|
this.database.pragma('journal_mode = OFF');
|
66
109
|
this.database.pragma('synchronous = OFF');
|
67
110
|
__classPrivateFieldSet(this, _SnippetIndex_insertSnippet, this.database.prepare(INSERT_SNIPPET_SQL), "f");
|
111
|
+
__classPrivateFieldSet(this, _SnippetIndex_deleteSession, this.database.prepare(DELETE_SESSION_SQL), "f");
|
68
112
|
__classPrivateFieldSet(this, _SnippetIndex_updateSnippetBoost, this.database.prepare(UPDATE_SNIPPET_BOOST_SQL), "f");
|
69
113
|
__classPrivateFieldSet(this, _SnippetIndex_searchSnippet, this.database.prepare(SEARCH_SNIPPET_SQL), "f");
|
70
114
|
}
|
71
|
-
|
72
|
-
|
115
|
+
/**
|
116
|
+
* Deletes all data associated with a specific session.
|
117
|
+
* @param sessionId - The session identifier to delete data for.
|
118
|
+
*/
|
119
|
+
deleteSession(sessionId) {
|
120
|
+
__classPrivateFieldGet(this, _SnippetIndex_deleteSession, "f").run(sessionId);
|
121
|
+
}
|
122
|
+
/**
|
123
|
+
* Indexes a code snippet for searchability.
|
124
|
+
* @param snippetId - The unique identifier for the snippet.
|
125
|
+
* @param directory - The directory where the snippet is located.
|
126
|
+
* @param symbols - Symbols (e.g., class names) in the snippet.
|
127
|
+
* @param words - General words in the snippet.
|
128
|
+
* @param content - The actual content of the snippet.
|
129
|
+
*/
|
130
|
+
indexSnippet(snippetId, directory, symbols, words, content) {
|
131
|
+
__classPrivateFieldGet(this, _SnippetIndex_insertSnippet, "f").run(encodeSnippetId(snippetId), directory, symbols, words, content);
|
73
132
|
}
|
74
|
-
|
75
|
-
|
133
|
+
/**
|
134
|
+
* Boosts the relevance score of a specific snippet for a given session.
|
135
|
+
* @param sessionId - The session identifier to associate the boost with.
|
136
|
+
* @param snippetId - The identifier of the snippet to boost.
|
137
|
+
* @param boostFactor - The factor by which to boost the snippet's relevance.
|
138
|
+
*/
|
139
|
+
boostSnippet(sessionId, snippetId, boostFactor) {
|
140
|
+
__classPrivateFieldGet(this, _SnippetIndex_updateSnippetBoost, "f").run(sessionId, encodeSnippetId(snippetId), boostFactor);
|
76
141
|
}
|
77
|
-
searchSnippets(query, limit = 10) {
|
78
|
-
const rows = __classPrivateFieldGet(this, _SnippetIndex_searchSnippet, "f").all(query, limit);
|
142
|
+
searchSnippets(sessionId, query, limit = 10) {
|
143
|
+
const rows = __classPrivateFieldGet(this, _SnippetIndex_searchSnippet, "f").all(sessionId, query, limit);
|
79
144
|
return rows.map((row) => ({
|
80
145
|
directory: row.directory,
|
81
|
-
snippetId: row.snippet_id,
|
82
|
-
filePath: row.file_path,
|
83
|
-
startLine: row.start_line,
|
84
|
-
endLine: row.end_line,
|
146
|
+
snippetId: parseSnippetId(row.snippet_id),
|
85
147
|
score: row.score,
|
86
148
|
content: row.content,
|
87
149
|
}));
|
@@ -90,5 +152,5 @@ class SnippetIndex {
|
|
90
152
|
this.database.close();
|
91
153
|
}
|
92
154
|
}
|
93
|
-
_SnippetIndex_insertSnippet = new WeakMap(), _SnippetIndex_updateSnippetBoost = new WeakMap(), _SnippetIndex_searchSnippet = new WeakMap();
|
155
|
+
_SnippetIndex_insertSnippet = new WeakMap(), _SnippetIndex_updateSnippetBoost = new WeakMap(), _SnippetIndex_deleteSession = new WeakMap(), _SnippetIndex_searchSnippet = new WeakMap();
|
94
156
|
exports.default = SnippetIndex;
|
package/built/splitter.js
CHANGED
@@ -42,9 +42,11 @@ async function langchainSplitter(content, fileExtension) {
|
|
42
42
|
const loc = doc.metadata?.loc;
|
43
43
|
const lines = loc?.lines;
|
44
44
|
const result = {
|
45
|
-
content:
|
45
|
+
content: '',
|
46
46
|
};
|
47
47
|
if (lines) {
|
48
|
+
const contentLines = content.split('\n');
|
49
|
+
result.content = contentLines.slice(lines.from - 1, lines.to).join('\n');
|
48
50
|
result.startLine = lines.from;
|
49
51
|
result.endLine = lines.to;
|
50
52
|
}
|