@hesed/search 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/commands/search.js +2 -68
- package/dist/search-logic.d.ts +1 -15
- package/dist/search-logic.js +20 -78
- package/oclif.manifest.json +1 -1
- package/package.json +3 -4
- package/dist/embedders/minilm.d.ts +0 -22
- package/dist/embedders/minilm.js +0 -49
package/README.md
CHANGED
|
@@ -20,7 +20,7 @@ $ npm install -g @hesed/search
|
|
|
20
20
|
$ search COMMAND
|
|
21
21
|
running command...
|
|
22
22
|
$ search (--version)
|
|
23
|
-
@hesed/search/0.2.
|
|
23
|
+
@hesed/search/0.2.1 linux-x64 node-v24.16.0
|
|
24
24
|
$ search --help [COMMAND]
|
|
25
25
|
USAGE
|
|
26
26
|
$ search COMMAND
|
|
@@ -62,5 +62,5 @@ EXAMPLES
|
|
|
62
62
|
$ search search "update jira" --details
|
|
63
63
|
```
|
|
64
64
|
|
|
65
|
-
_See code: [src/commands/search.ts](https://github.com/hesedcasa/search/blob/v0.2.
|
|
65
|
+
_See code: [src/commands/search.ts](https://github.com/hesedcasa/search/blob/v0.2.1/src/commands/search.ts)_
|
|
66
66
|
<!-- commandsstop -->
|
package/dist/commands/search.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import { Args, Command, CommandHelp, Flags, toConfiguredId
|
|
2
|
-
import { isMiniLMModelCached, MiniLMCommandEmbedder } from '../embedders/minilm.js';
|
|
1
|
+
import { Args, Command, CommandHelp, Flags, toConfiguredId } from '@oclif/core';
|
|
3
2
|
import { searchCommands } from '../search-logic.js';
|
|
4
3
|
export default class Search extends Command {
|
|
5
4
|
static args = {
|
|
@@ -19,14 +18,7 @@ export default class Search extends Command {
|
|
|
19
18
|
async run() {
|
|
20
19
|
const { args, flags } = await this.parse(Search);
|
|
21
20
|
const allCommands = this.config.commands.filter((c) => !c.hidden && c.pluginName !== '@oclif/plugin-plugins');
|
|
22
|
-
const
|
|
23
|
-
let scored;
|
|
24
|
-
try {
|
|
25
|
-
scored = (await searchCommands(args.query, allCommands, getSearchOptions(this.config, loader.onProgress))).slice(0, flags.limit);
|
|
26
|
-
}
|
|
27
|
-
finally {
|
|
28
|
-
loader.stop();
|
|
29
|
-
}
|
|
21
|
+
const scored = (await searchCommands(args.query, allCommands)).slice(0, flags.limit);
|
|
30
22
|
const results = scored.map((entry) => {
|
|
31
23
|
const { cmd } = entry;
|
|
32
24
|
const configuredId = toConfiguredId(cmd.id, this.config);
|
|
@@ -85,61 +77,3 @@ export default class Search extends Command {
|
|
|
85
77
|
}
|
|
86
78
|
}
|
|
87
79
|
}
|
|
88
|
-
function getSearchOptions(config, onLoadProgress) {
|
|
89
|
-
const testConfig = config;
|
|
90
|
-
if (!testConfig.searchEmbedder)
|
|
91
|
-
return { embedder: new MiniLMCommandEmbedder({ onLoadProgress }) };
|
|
92
|
-
return { embedder: testConfig.searchEmbedder };
|
|
93
|
-
}
|
|
94
|
-
function createModelLoader(silent) {
|
|
95
|
-
let started = false;
|
|
96
|
-
let stopped = false;
|
|
97
|
-
const start = (status) => {
|
|
98
|
-
if (silent || stopped)
|
|
99
|
-
return;
|
|
100
|
-
if (started) {
|
|
101
|
-
ux.action.status = status;
|
|
102
|
-
return;
|
|
103
|
-
}
|
|
104
|
-
ux.action.start('Loading semantic search model', status);
|
|
105
|
-
started = true;
|
|
106
|
-
};
|
|
107
|
-
const stop = () => {
|
|
108
|
-
if (silent || stopped)
|
|
109
|
-
return;
|
|
110
|
-
stopped = true;
|
|
111
|
-
if (started)
|
|
112
|
-
ux.action.stop();
|
|
113
|
-
};
|
|
114
|
-
return {
|
|
115
|
-
onProgress(progress) {
|
|
116
|
-
if (progress.status === 'ready') {
|
|
117
|
-
stop();
|
|
118
|
-
return;
|
|
119
|
-
}
|
|
120
|
-
if (progress.status === 'progress_total' && typeof progress.progress === 'number') {
|
|
121
|
-
start(`${Math.round(progress.progress)}% ${formatBytes(progress.loaded)} / ${formatBytes(progress.total)}`);
|
|
122
|
-
return;
|
|
123
|
-
}
|
|
124
|
-
if (progress.status === 'download') {
|
|
125
|
-
start(formatProgressFile(progress.file));
|
|
126
|
-
}
|
|
127
|
-
},
|
|
128
|
-
stop,
|
|
129
|
-
};
|
|
130
|
-
}
|
|
131
|
-
function formatProgressFile(file) {
|
|
132
|
-
return file?.split('/').at(-1);
|
|
133
|
-
}
|
|
134
|
-
function formatBytes(value) {
|
|
135
|
-
if (!value || value <= 0)
|
|
136
|
-
return '?';
|
|
137
|
-
const units = ['B', 'KB', 'MB', 'GB'];
|
|
138
|
-
let size = value;
|
|
139
|
-
let unit = 0;
|
|
140
|
-
while (size >= 1024 && unit < units.length - 1) {
|
|
141
|
-
size /= 1024;
|
|
142
|
-
unit++;
|
|
143
|
-
}
|
|
144
|
-
return `${size.toFixed(size >= 10 || unit === 0 ? 0 : 1)} ${units[unit]}`;
|
|
145
|
-
}
|
package/dist/search-logic.d.ts
CHANGED
|
@@ -8,19 +8,5 @@ export type ScoredCommand<T extends SearchableCommand = SearchableCommand> = {
|
|
|
8
8
|
cmd: T;
|
|
9
9
|
score: number;
|
|
10
10
|
};
|
|
11
|
-
export
|
|
12
|
-
embed(texts: string[]): Promise<number[][]>;
|
|
13
|
-
};
|
|
14
|
-
export type CommandReranker<T extends SearchableCommand = SearchableCommand> = {
|
|
15
|
-
rerank(query: string, commands: Array<ScoredCommand<T>>): Promise<Array<ScoredCommand<T>>>;
|
|
16
|
-
};
|
|
17
|
-
export type SearchCommandsOptions<T extends SearchableCommand = SearchableCommand> = {
|
|
18
|
-
embedder?: CommandEmbedder;
|
|
19
|
-
lexicalWeight?: number;
|
|
20
|
-
minSemanticScore?: number;
|
|
21
|
-
reranker?: CommandReranker<T>;
|
|
22
|
-
semantic?: boolean;
|
|
23
|
-
semanticWeight?: number;
|
|
24
|
-
};
|
|
25
|
-
export declare function searchCommands<T extends SearchableCommand>(query: string, commands: T[], options?: SearchCommandsOptions<T>): Promise<Array<ScoredCommand<T>>>;
|
|
11
|
+
export declare function searchCommands<T extends SearchableCommand>(query: string, commands: T[]): Promise<Array<ScoredCommand<T>>>;
|
|
26
12
|
export declare function searchCommandsLexically<T extends SearchableCommand>(query: string, commands: T[], haystack?: string[]): Array<ScoredCommand<T>>;
|
package/dist/search-logic.js
CHANGED
|
@@ -1,39 +1,18 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
const
|
|
4
|
-
|
|
5
|
-
const DEFAULT_LEXICAL_WEIGHT = 0.25;
|
|
6
|
-
let defaultEmbedderPromise;
|
|
7
|
-
export async function searchCommands(query, commands, options = {}) {
|
|
1
|
+
import { createRequire } from 'node:module';
|
|
2
|
+
const require = createRequire(import.meta.url);
|
|
3
|
+
const { Index } = require('flexsearch');
|
|
4
|
+
export async function searchCommands(query, commands) {
|
|
8
5
|
const normalizedQuery = query.trim();
|
|
9
6
|
if (normalizedQuery.length === 0 || commands.length === 0)
|
|
10
7
|
return [];
|
|
11
8
|
const haystack = commands.map((command) => commandSearchText(command));
|
|
12
|
-
|
|
13
|
-
if (options.semantic === false)
|
|
14
|
-
return lexical;
|
|
15
|
-
let semantic;
|
|
16
|
-
try {
|
|
17
|
-
semantic = await searchCommandsSemantically(normalizedQuery, commands, haystack, options);
|
|
18
|
-
}
|
|
19
|
-
catch {
|
|
20
|
-
return lexical;
|
|
21
|
-
}
|
|
22
|
-
if (semantic.length === 0)
|
|
23
|
-
return lexical;
|
|
24
|
-
const merged = mergeScores(semantic, lexical, {
|
|
25
|
-
lexicalWeight: options.lexicalWeight ?? DEFAULT_LEXICAL_WEIGHT,
|
|
26
|
-
minSemanticScore: options.minSemanticScore ?? DEFAULT_MIN_SEMANTIC_SCORE,
|
|
27
|
-
semanticWeight: options.semanticWeight ?? DEFAULT_SEMANTIC_WEIGHT,
|
|
28
|
-
});
|
|
29
|
-
return options.reranker ? options.reranker.rerank(normalizedQuery, merged) : merged;
|
|
9
|
+
return searchCommandsLexically(normalizedQuery, commands, haystack);
|
|
30
10
|
}
|
|
31
11
|
export function searchCommandsLexically(query, commands, haystack = commands.map((command) => commandSearchText(command))) {
|
|
32
|
-
const
|
|
33
|
-
const
|
|
34
|
-
if (idxs
|
|
35
|
-
|
|
36
|
-
return ranked.map((oi, rank) => ({ cmd: commands[idxs[oi]], score: rank }));
|
|
12
|
+
const index = createCommandSearchIndex(haystack);
|
|
13
|
+
const idxs = index.search(query, { limit: commands.length, suggest: true });
|
|
14
|
+
if (idxs.length > 0) {
|
|
15
|
+
return idxs.map((idx, rank) => ({ cmd: commands[Number(idx)], score: rank }));
|
|
37
16
|
}
|
|
38
17
|
// Multi-token fallback: score each command by how many individual query
|
|
39
18
|
// tokens it matches. Handles queries containing unknown alias words (e.g.
|
|
@@ -43,16 +22,23 @@ export function searchCommandsLexically(query, commands, haystack = commands.map
|
|
|
43
22
|
return [];
|
|
44
23
|
const hitCount = new Map();
|
|
45
24
|
for (const token of tokens) {
|
|
46
|
-
const
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
25
|
+
const tIdxs = index.search(token, { limit: commands.length, suggest: true });
|
|
26
|
+
for (const idx of tIdxs) {
|
|
27
|
+
const commandIndex = Number(idx);
|
|
28
|
+
hitCount.set(commandIndex, (hitCount.get(commandIndex) ?? 0) + 1);
|
|
50
29
|
}
|
|
51
30
|
}
|
|
52
31
|
return [...hitCount.entries()]
|
|
53
32
|
.sort((a, b) => b[1] - a[1] || a[0] - b[0])
|
|
54
33
|
.map(([idx, hits]) => ({ cmd: commands[idx], score: tokens.length - hits }));
|
|
55
34
|
}
|
|
35
|
+
function createCommandSearchIndex(haystack) {
|
|
36
|
+
const index = new Index({ encoder: 'LatinAdvanced', resolution: 9, tokenize: 'forward' });
|
|
37
|
+
for (const [idx, text] of haystack.entries()) {
|
|
38
|
+
index.add(idx, text);
|
|
39
|
+
}
|
|
40
|
+
return index;
|
|
41
|
+
}
|
|
56
42
|
function commandSearchText(command) {
|
|
57
43
|
return [command.id, command.summary ?? command.description ?? '', command.pluginName ?? '']
|
|
58
44
|
.filter(Boolean)
|
|
@@ -60,47 +46,3 @@ function commandSearchText(command) {
|
|
|
60
46
|
.replaceAll(/\s+/g, ' ')
|
|
61
47
|
.trim();
|
|
62
48
|
}
|
|
63
|
-
async function searchCommandsSemantically(query, commands, haystack, options) {
|
|
64
|
-
const embedder = options.embedder ?? (await getDefaultEmbedder());
|
|
65
|
-
const [queryEmbedding, ...commandEmbeddings] = await embedder.embed([query, ...haystack]);
|
|
66
|
-
return commandEmbeddings
|
|
67
|
-
.map((embedding, idx) => ({
|
|
68
|
-
cmd: commands[idx],
|
|
69
|
-
score: cosineSimilarity(queryEmbedding, embedding),
|
|
70
|
-
}))
|
|
71
|
-
.filter(({ score }) => Number.isFinite(score))
|
|
72
|
-
.sort((a, b) => b.score - a.score);
|
|
73
|
-
}
|
|
74
|
-
async function getDefaultEmbedder() {
|
|
75
|
-
defaultEmbedderPromise ??= Promise.resolve(new MiniLMCommandEmbedder());
|
|
76
|
-
return defaultEmbedderPromise;
|
|
77
|
-
}
|
|
78
|
-
function mergeScores(semantic, lexical, options) {
|
|
79
|
-
const lexicalById = new Map(lexical.map((entry, idx) => [entry.cmd.id, lexicalScore(idx, lexical.length)]));
|
|
80
|
-
const lexicalIds = new Set(lexical.map((entry) => entry.cmd.id));
|
|
81
|
-
return semantic
|
|
82
|
-
.filter((entry) => entry.score >= options.minSemanticScore || lexicalIds.has(entry.cmd.id))
|
|
83
|
-
.map((entry) => {
|
|
84
|
-
const finalScore = entry.score * options.semanticWeight + (lexicalById.get(entry.cmd.id) ?? 0) * options.lexicalWeight;
|
|
85
|
-
return { cmd: entry.cmd, score: 1 - finalScore };
|
|
86
|
-
})
|
|
87
|
-
.sort((a, b) => a.score - b.score);
|
|
88
|
-
}
|
|
89
|
-
function lexicalScore(index, length) {
|
|
90
|
-
if (length <= 1)
|
|
91
|
-
return 1;
|
|
92
|
-
return 1 - index / (length - 1);
|
|
93
|
-
}
|
|
94
|
-
function cosineSimilarity(a, b) {
|
|
95
|
-
let dot = 0;
|
|
96
|
-
let aMagnitude = 0;
|
|
97
|
-
let bMagnitude = 0;
|
|
98
|
-
for (let i = 0; i < Math.min(a.length, b.length); i++) {
|
|
99
|
-
dot += a[i] * b[i];
|
|
100
|
-
aMagnitude += a[i] * a[i];
|
|
101
|
-
bMagnitude += b[i] * b[i];
|
|
102
|
-
}
|
|
103
|
-
if (aMagnitude === 0 || bMagnitude === 0)
|
|
104
|
-
return 0;
|
|
105
|
-
return dot / (Math.sqrt(aMagnitude) * Math.sqrt(bMagnitude));
|
|
106
|
-
}
|
package/oclif.manifest.json
CHANGED
package/package.json
CHANGED
|
@@ -1,16 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hesed/search",
|
|
3
3
|
"description": "Intelligence search plugin",
|
|
4
|
-
"version": "0.2.
|
|
4
|
+
"version": "0.2.1",
|
|
5
5
|
"author": "Hesed",
|
|
6
6
|
"bin": {
|
|
7
7
|
"permission": "./bin/run.js"
|
|
8
8
|
},
|
|
9
9
|
"bugs": "https://github.com/hesedcasa/search/issues",
|
|
10
10
|
"dependencies": {
|
|
11
|
-
"@
|
|
12
|
-
"
|
|
13
|
-
"@oclif/core": "^4"
|
|
11
|
+
"@oclif/core": "^4",
|
|
12
|
+
"flexsearch": "^0.8.212"
|
|
14
13
|
},
|
|
15
14
|
"devDependencies": {
|
|
16
15
|
"@eslint/compat": "^1",
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import { type CommandEmbedder } from '../search-logic.js';
|
|
2
|
-
export type ModelLoadProgress = {
|
|
3
|
-
file?: string;
|
|
4
|
-
loaded?: number;
|
|
5
|
-
model?: string;
|
|
6
|
-
name?: string;
|
|
7
|
-
progress?: number;
|
|
8
|
-
status: string;
|
|
9
|
-
task?: string;
|
|
10
|
-
total?: number;
|
|
11
|
-
};
|
|
12
|
-
export declare class MiniLMCommandEmbedder implements CommandEmbedder {
|
|
13
|
-
private readonly options;
|
|
14
|
-
private extractorPromise;
|
|
15
|
-
constructor(options?: {
|
|
16
|
-
onLoadProgress?: (progress: ModelLoadProgress) => void;
|
|
17
|
-
});
|
|
18
|
-
embed(texts: string[]): Promise<number[][]>;
|
|
19
|
-
private getExtractor;
|
|
20
|
-
}
|
|
21
|
-
export declare function isMiniLMModelCached(): boolean;
|
|
22
|
-
export declare function getMiniLMModelCachePath(): string;
|
package/dist/embedders/minilm.js
DELETED
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
import { existsSync } from 'node:fs';
|
|
2
|
-
import { createRequire } from 'node:module';
|
|
3
|
-
import path from 'node:path';
|
|
4
|
-
const MINILM_MODEL = 'Xenova/paraphrase-MiniLM-L3-v2';
|
|
5
|
-
const MODEL_CACHE_FILES = ['config.json', 'tokenizer.json', 'tokenizer_config.json', 'onnx/model.onnx'];
|
|
6
|
-
// eslint-disable-next-line no-new-func
|
|
7
|
-
const importTransformers = new Function('specifier', 'return import(specifier)');
|
|
8
|
-
const require = createRequire(import.meta.url);
|
|
9
|
-
export class MiniLMCommandEmbedder {
|
|
10
|
-
options;
|
|
11
|
-
extractorPromise;
|
|
12
|
-
constructor(options = {}) {
|
|
13
|
-
this.options = options;
|
|
14
|
-
}
|
|
15
|
-
async embed(texts) {
|
|
16
|
-
if (texts.length === 0)
|
|
17
|
-
return [];
|
|
18
|
-
const extractor = await this.getExtractor();
|
|
19
|
-
const output = await extractor(texts, { normalize: true, pooling: 'mean' });
|
|
20
|
-
return tensorToRows(output.data, output.dims);
|
|
21
|
-
}
|
|
22
|
-
async getExtractor() {
|
|
23
|
-
this.extractorPromise ??= importTransformers('@huggingface/transformers').then(async ({ pipeline }) => {
|
|
24
|
-
const pipelineOptions = {};
|
|
25
|
-
// eslint-disable-next-line camelcase
|
|
26
|
-
pipelineOptions.progress_callback = this.options.onLoadProgress;
|
|
27
|
-
const extractor = await pipeline('feature-extraction', MINILM_MODEL, pipelineOptions);
|
|
28
|
-
return extractor;
|
|
29
|
-
});
|
|
30
|
-
return this.extractorPromise;
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
export function isMiniLMModelCached() {
|
|
34
|
-
const cachePath = getMiniLMModelCachePath();
|
|
35
|
-
return MODEL_CACHE_FILES.every((file) => existsSync(path.join(cachePath, file)));
|
|
36
|
-
}
|
|
37
|
-
export function getMiniLMModelCachePath() {
|
|
38
|
-
const entrypointPath = require.resolve('@huggingface/transformers');
|
|
39
|
-
const packagePath = path.dirname(path.dirname(entrypointPath));
|
|
40
|
-
return path.join(packagePath, '.cache', ...MINILM_MODEL.split('/'));
|
|
41
|
-
}
|
|
42
|
-
function tensorToRows(data, dims) {
|
|
43
|
-
const rows = dims[0] ?? 0;
|
|
44
|
-
const columns = dims[1] ?? data.length;
|
|
45
|
-
return Array.from({ length: rows }, (_, row) => {
|
|
46
|
-
const rowData = data.slice(row * columns, (row + 1) * columns);
|
|
47
|
-
return Array.isArray(rowData) ? rowData : [...rowData];
|
|
48
|
-
});
|
|
49
|
-
}
|