@hesed/search 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -2
- package/dist/commands/search.js +4 -68
- package/dist/commands/synonyms/export.d.ts +9 -0
- package/dist/commands/synonyms/export.js +22 -0
- package/dist/commands/synonyms/import.d.ts +12 -0
- package/dist/commands/synonyms/import.js +61 -0
- package/dist/search-logic.d.ts +3 -16
- package/dist/search-logic.js +26 -82
- package/dist/synonym-store.d.ts +5 -0
- package/dist/synonym-store.js +21 -0
- package/dist/synonyms.d.ts +28 -0
- package/dist/synonyms.js +65 -0
- package/oclif.manifest.json +70 -1
- package/package.json +3 -4
- package/dist/embedders/minilm.d.ts +0 -22
- package/dist/embedders/minilm.js +0 -49
package/README.md
CHANGED
|
@@ -20,7 +20,7 @@ $ npm install -g @hesed/search
|
|
|
20
20
|
$ search COMMAND
|
|
21
21
|
running command...
|
|
22
22
|
$ search (--version)
|
|
23
|
-
@hesed/search/0.2.
|
|
23
|
+
@hesed/search/0.2.2 linux-x64 node-v24.16.0
|
|
24
24
|
$ search --help [COMMAND]
|
|
25
25
|
USAGE
|
|
26
26
|
$ search COMMAND
|
|
@@ -32,6 +32,8 @@ USAGE
|
|
|
32
32
|
|
|
33
33
|
<!-- commands -->
|
|
34
34
|
* [`search search QUERY`](#search-search-query)
|
|
35
|
+
* [`search synonyms export [FILE]`](#search-synonyms-export-file)
|
|
36
|
+
* [`search synonyms import FILE`](#search-synonyms-import-file)
|
|
35
37
|
|
|
36
38
|
## `search search QUERY`
|
|
37
39
|
|
|
@@ -62,5 +64,52 @@ EXAMPLES
|
|
|
62
64
|
$ search search "update jira" --details
|
|
63
65
|
```
|
|
64
66
|
|
|
65
|
-
_See code: [src/commands/search.ts](https://github.com/hesedcasa/search/blob/v0.2.
|
|
67
|
+
_See code: [src/commands/search.ts](https://github.com/hesedcasa/search/blob/v0.2.2/src/commands/search.ts)_
|
|
68
|
+
|
|
69
|
+
## `search synonyms export [FILE]`
|
|
70
|
+
|
|
71
|
+
Export the stored synonyms to a JSON file or stdout
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
USAGE
|
|
75
|
+
$ search synonyms export [FILE]
|
|
76
|
+
|
|
77
|
+
ARGUMENTS
|
|
78
|
+
[FILE] Output file path (prints to stdout if omitted)
|
|
79
|
+
|
|
80
|
+
DESCRIPTION
|
|
81
|
+
Export the stored synonyms to a JSON file or stdout
|
|
82
|
+
|
|
83
|
+
EXAMPLES
|
|
84
|
+
$ search synonyms export
|
|
85
|
+
|
|
86
|
+
$ search synonyms export ./my-synonyms.json
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
_See code: [src/commands/synonyms/export.ts](https://github.com/hesedcasa/search/blob/v0.2.2/src/commands/synonyms/export.ts)_
|
|
90
|
+
|
|
91
|
+
## `search synonyms import FILE`
|
|
92
|
+
|
|
93
|
+
Import synonyms from a JSON file
|
|
94
|
+
|
|
95
|
+
```
|
|
96
|
+
USAGE
|
|
97
|
+
$ search synonyms import FILE [--merge]
|
|
98
|
+
|
|
99
|
+
ARGUMENTS
|
|
100
|
+
FILE Path to a JSON synonyms file to import
|
|
101
|
+
|
|
102
|
+
FLAGS
|
|
103
|
+
--merge Merge with existing synonyms instead of replacing them
|
|
104
|
+
|
|
105
|
+
DESCRIPTION
|
|
106
|
+
Import synonyms from a JSON file
|
|
107
|
+
|
|
108
|
+
EXAMPLES
|
|
109
|
+
$ search synonyms import ./synonyms.json
|
|
110
|
+
|
|
111
|
+
$ search synonyms import ./synonyms.json --merge
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
_See code: [src/commands/synonyms/import.ts](https://github.com/hesedcasa/search/blob/v0.2.2/src/commands/synonyms/import.ts)_
|
|
66
115
|
<!-- commandsstop -->
|
package/dist/commands/search.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { Args, Command, CommandHelp, Flags, toConfiguredId
|
|
2
|
-
import { isMiniLMModelCached, MiniLMCommandEmbedder } from '../embedders/minilm.js';
|
|
1
|
+
import { Args, Command, CommandHelp, Flags, toConfiguredId } from '@oclif/core';
|
|
3
2
|
import { searchCommands } from '../search-logic.js';
|
|
3
|
+
import { loadStoredSynonymMap } from '../synonym-store.js';
|
|
4
4
|
export default class Search extends Command {
|
|
5
5
|
static args = {
|
|
6
6
|
query: Args.string({ description: 'Search term to filter commands by', required: true }),
|
|
@@ -19,14 +19,8 @@ export default class Search extends Command {
|
|
|
19
19
|
async run() {
|
|
20
20
|
const { args, flags } = await this.parse(Search);
|
|
21
21
|
const allCommands = this.config.commands.filter((c) => !c.hidden && c.pluginName !== '@oclif/plugin-plugins');
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
try {
|
|
25
|
-
scored = (await searchCommands(args.query, allCommands, getSearchOptions(this.config, loader.onProgress))).slice(0, flags.limit);
|
|
26
|
-
}
|
|
27
|
-
finally {
|
|
28
|
-
loader.stop();
|
|
29
|
-
}
|
|
22
|
+
const synonyms = loadStoredSynonymMap(this.config.configDir);
|
|
23
|
+
const scored = (await searchCommands(args.query, allCommands, synonyms)).slice(0, flags.limit);
|
|
30
24
|
const results = scored.map((entry) => {
|
|
31
25
|
const { cmd } = entry;
|
|
32
26
|
const configuredId = toConfiguredId(cmd.id, this.config);
|
|
@@ -85,61 +79,3 @@ export default class Search extends Command {
|
|
|
85
79
|
}
|
|
86
80
|
}
|
|
87
81
|
}
|
|
88
|
-
function getSearchOptions(config, onLoadProgress) {
|
|
89
|
-
const testConfig = config;
|
|
90
|
-
if (!testConfig.searchEmbedder)
|
|
91
|
-
return { embedder: new MiniLMCommandEmbedder({ onLoadProgress }) };
|
|
92
|
-
return { embedder: testConfig.searchEmbedder };
|
|
93
|
-
}
|
|
94
|
-
function createModelLoader(silent) {
|
|
95
|
-
let started = false;
|
|
96
|
-
let stopped = false;
|
|
97
|
-
const start = (status) => {
|
|
98
|
-
if (silent || stopped)
|
|
99
|
-
return;
|
|
100
|
-
if (started) {
|
|
101
|
-
ux.action.status = status;
|
|
102
|
-
return;
|
|
103
|
-
}
|
|
104
|
-
ux.action.start('Loading semantic search model', status);
|
|
105
|
-
started = true;
|
|
106
|
-
};
|
|
107
|
-
const stop = () => {
|
|
108
|
-
if (silent || stopped)
|
|
109
|
-
return;
|
|
110
|
-
stopped = true;
|
|
111
|
-
if (started)
|
|
112
|
-
ux.action.stop();
|
|
113
|
-
};
|
|
114
|
-
return {
|
|
115
|
-
onProgress(progress) {
|
|
116
|
-
if (progress.status === 'ready') {
|
|
117
|
-
stop();
|
|
118
|
-
return;
|
|
119
|
-
}
|
|
120
|
-
if (progress.status === 'progress_total' && typeof progress.progress === 'number') {
|
|
121
|
-
start(`${Math.round(progress.progress)}% ${formatBytes(progress.loaded)} / ${formatBytes(progress.total)}`);
|
|
122
|
-
return;
|
|
123
|
-
}
|
|
124
|
-
if (progress.status === 'download') {
|
|
125
|
-
start(formatProgressFile(progress.file));
|
|
126
|
-
}
|
|
127
|
-
},
|
|
128
|
-
stop,
|
|
129
|
-
};
|
|
130
|
-
}
|
|
131
|
-
function formatProgressFile(file) {
|
|
132
|
-
return file?.split('/').at(-1);
|
|
133
|
-
}
|
|
134
|
-
function formatBytes(value) {
|
|
135
|
-
if (!value || value <= 0)
|
|
136
|
-
return '?';
|
|
137
|
-
const units = ['B', 'KB', 'MB', 'GB'];
|
|
138
|
-
let size = value;
|
|
139
|
-
let unit = 0;
|
|
140
|
-
while (size >= 1024 && unit < units.length - 1) {
|
|
141
|
-
size /= 1024;
|
|
142
|
-
unit++;
|
|
143
|
-
}
|
|
144
|
-
return `${size.toFixed(size >= 10 || unit === 0 ? 0 : 1)} ${units[unit]}`;
|
|
145
|
-
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { Command } from '@oclif/core';
|
|
2
|
+
export default class SynonymsExport extends Command {
|
|
3
|
+
static args: {
|
|
4
|
+
file: import("@oclif/core/interfaces").Arg<string | undefined, Record<string, unknown>>;
|
|
5
|
+
};
|
|
6
|
+
static description: string;
|
|
7
|
+
static examples: string[];
|
|
8
|
+
run(): Promise<void>;
|
|
9
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { Args, Command } from '@oclif/core';
|
|
2
|
+
import { writeFileSync } from 'node:fs';
|
|
3
|
+
import { readSynonymGroups } from '../../synonym-store.js';
|
|
4
|
+
export default class SynonymsExport extends Command {
|
|
5
|
+
static args = {
|
|
6
|
+
file: Args.string({ description: 'Output file path (prints to stdout if omitted)', required: false }),
|
|
7
|
+
};
|
|
8
|
+
static description = 'Export the stored synonyms to a JSON file or stdout';
|
|
9
|
+
static examples = ['<%= config.bin %> synonyms export', '<%= config.bin %> synonyms export ./my-synonyms.json'];
|
|
10
|
+
async run() {
|
|
11
|
+
const { args } = await this.parse(SynonymsExport);
|
|
12
|
+
const groups = readSynonymGroups(this.config.configDir);
|
|
13
|
+
const json = JSON.stringify(groups, null, 2) + '\n';
|
|
14
|
+
if (args.file) {
|
|
15
|
+
writeFileSync(args.file, json, 'utf8');
|
|
16
|
+
this.log(`Exported ${groups.length} synonym group${groups.length === 1 ? '' : 's'} → ${args.file}`);
|
|
17
|
+
}
|
|
18
|
+
else {
|
|
19
|
+
process.stdout.write(json);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { Command } from '@oclif/core';
|
|
2
|
+
export default class SynonymsImport extends Command {
|
|
3
|
+
static args: {
|
|
4
|
+
file: import("@oclif/core/interfaces").Arg<string, Record<string, unknown>>;
|
|
5
|
+
};
|
|
6
|
+
static description: string;
|
|
7
|
+
static examples: string[];
|
|
8
|
+
static flags: {
|
|
9
|
+
merge: import("@oclif/core/interfaces").BooleanFlag<boolean>;
|
|
10
|
+
};
|
|
11
|
+
run(): Promise<void>;
|
|
12
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { Args, Command, Flags } from '@oclif/core';
|
|
2
|
+
import { existsSync } from 'node:fs';
|
|
3
|
+
import { synonymsPath, writeSynonymGroups } from '../../synonym-store.js';
|
|
4
|
+
import { loadSynonymGroupsFromFile } from '../../synonyms.js';
|
|
5
|
+
export default class SynonymsImport extends Command {
|
|
6
|
+
static args = {
|
|
7
|
+
file: Args.string({ description: 'Path to a JSON synonyms file to import', required: true }),
|
|
8
|
+
};
|
|
9
|
+
static description = 'Import synonyms from a JSON file';
|
|
10
|
+
static examples = [
|
|
11
|
+
'<%= config.bin %> synonyms import ./synonyms.json',
|
|
12
|
+
'<%= config.bin %> synonyms import ./synonyms.json --merge',
|
|
13
|
+
];
|
|
14
|
+
static flags = {
|
|
15
|
+
merge: Flags.boolean({
|
|
16
|
+
default: false,
|
|
17
|
+
description: 'Merge with existing synonyms instead of replacing them',
|
|
18
|
+
}),
|
|
19
|
+
};
|
|
20
|
+
async run() {
|
|
21
|
+
const { args, flags } = await this.parse(SynonymsImport);
|
|
22
|
+
if (!existsSync(args.file)) {
|
|
23
|
+
this.error(`File not found: ${args.file}`);
|
|
24
|
+
}
|
|
25
|
+
const incoming = loadSynonymGroupsFromFile(args.file);
|
|
26
|
+
let groups;
|
|
27
|
+
if (flags.merge) {
|
|
28
|
+
const { readSynonymGroups } = await import('../../synonym-store.js');
|
|
29
|
+
const existing = readSynonymGroups(this.config.configDir);
|
|
30
|
+
groups = mergeSynonymGroups(existing, incoming);
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
groups = incoming;
|
|
34
|
+
}
|
|
35
|
+
writeSynonymGroups(this.config.configDir, groups);
|
|
36
|
+
const dest = synonymsPath(this.config.configDir);
|
|
37
|
+
this.log(`Imported ${groups.length} synonym group${groups.length === 1 ? '' : 's'} → ${dest}`);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Merge two synonym group lists.
|
|
42
|
+
*
|
|
43
|
+
* Groups are merged when they share at least one term in common; otherwise
|
|
44
|
+
* they are appended as separate groups.
|
|
45
|
+
*/
|
|
46
|
+
function mergeSynonymGroups(existing, incoming) {
|
|
47
|
+
const result = existing.map((g) => [...g]);
|
|
48
|
+
for (const group of incoming) {
|
|
49
|
+
const normalizedGroup = group.map((t) => t.toLowerCase().trim()).filter(Boolean);
|
|
50
|
+
// Find any existing group that overlaps
|
|
51
|
+
const matchIndex = result.findIndex((r) => r.some((t) => normalizedGroup.includes(t.toLowerCase().trim())));
|
|
52
|
+
if (matchIndex === -1) {
|
|
53
|
+
result.push(normalizedGroup);
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
const merged = [...new Set([...normalizedGroup, ...result[matchIndex].map((t) => t.toLowerCase().trim())])];
|
|
57
|
+
result[matchIndex] = merged;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return result;
|
|
61
|
+
}
|
package/dist/search-logic.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { type SynonymMap } from './synonyms.js';
|
|
1
2
|
export type SearchableCommand = {
|
|
2
3
|
description?: string;
|
|
3
4
|
id: string;
|
|
@@ -8,19 +9,5 @@ export type ScoredCommand<T extends SearchableCommand = SearchableCommand> = {
|
|
|
8
9
|
cmd: T;
|
|
9
10
|
score: number;
|
|
10
11
|
};
|
|
11
|
-
export
|
|
12
|
-
|
|
13
|
-
};
|
|
14
|
-
export type CommandReranker<T extends SearchableCommand = SearchableCommand> = {
|
|
15
|
-
rerank(query: string, commands: Array<ScoredCommand<T>>): Promise<Array<ScoredCommand<T>>>;
|
|
16
|
-
};
|
|
17
|
-
export type SearchCommandsOptions<T extends SearchableCommand = SearchableCommand> = {
|
|
18
|
-
embedder?: CommandEmbedder;
|
|
19
|
-
lexicalWeight?: number;
|
|
20
|
-
minSemanticScore?: number;
|
|
21
|
-
reranker?: CommandReranker<T>;
|
|
22
|
-
semantic?: boolean;
|
|
23
|
-
semanticWeight?: number;
|
|
24
|
-
};
|
|
25
|
-
export declare function searchCommands<T extends SearchableCommand>(query: string, commands: T[], options?: SearchCommandsOptions<T>): Promise<Array<ScoredCommand<T>>>;
|
|
26
|
-
export declare function searchCommandsLexically<T extends SearchableCommand>(query: string, commands: T[], haystack?: string[]): Array<ScoredCommand<T>>;
|
|
12
|
+
export declare function searchCommands<T extends SearchableCommand>(query: string, commands: T[], synonyms?: SynonymMap): Promise<Array<ScoredCommand<T>>>;
|
|
13
|
+
export declare function searchCommandsLexically<T extends SearchableCommand>(query: string, commands: T[], haystack?: string[], synonyms?: SynonymMap): Array<ScoredCommand<T>>;
|
package/dist/search-logic.js
CHANGED
|
@@ -1,58 +1,46 @@
|
|
|
1
|
-
import
|
|
2
|
-
import {
|
|
3
|
-
const
|
|
4
|
-
const
|
|
5
|
-
|
|
6
|
-
let defaultEmbedderPromise;
|
|
7
|
-
export async function searchCommands(query, commands, options = {}) {
|
|
1
|
+
import { createRequire } from 'node:module';
|
|
2
|
+
import { expandWithSynonyms } from './synonyms.js';
|
|
3
|
+
const require = createRequire(import.meta.url);
|
|
4
|
+
const { Index } = require('flexsearch');
|
|
5
|
+
export async function searchCommands(query, commands, synonyms = new Map()) {
|
|
8
6
|
const normalizedQuery = query.trim();
|
|
9
7
|
if (normalizedQuery.length === 0 || commands.length === 0)
|
|
10
8
|
return [];
|
|
11
|
-
const haystack = commands.map((command) => commandSearchText(command));
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
return lexical;
|
|
21
|
-
}
|
|
22
|
-
if (semantic.length === 0)
|
|
23
|
-
return lexical;
|
|
24
|
-
const merged = mergeScores(semantic, lexical, {
|
|
25
|
-
lexicalWeight: options.lexicalWeight ?? DEFAULT_LEXICAL_WEIGHT,
|
|
26
|
-
minSemanticScore: options.minSemanticScore ?? DEFAULT_MIN_SEMANTIC_SCORE,
|
|
27
|
-
semanticWeight: options.semanticWeight ?? DEFAULT_SEMANTIC_WEIGHT,
|
|
28
|
-
});
|
|
29
|
-
return options.reranker ? options.reranker.rerank(normalizedQuery, merged) : merged;
|
|
30
|
-
}
|
|
31
|
-
export function searchCommandsLexically(query, commands, haystack = commands.map((command) => commandSearchText(command))) {
|
|
32
|
-
const uf = new UFuzzy({ intraIns: Infinity });
|
|
33
|
-
const [idxs, , order] = uf.search(haystack, query, 0, Infinity);
|
|
34
|
-
if (idxs && idxs.length > 0) {
|
|
35
|
-
const ranked = order ?? idxs.map((_, i) => i);
|
|
36
|
-
return ranked.map((oi, rank) => ({ cmd: commands[idxs[oi]], score: rank }));
|
|
9
|
+
const haystack = commands.map((command) => expandWithSynonyms(commandSearchText(command), synonyms));
|
|
10
|
+
return searchCommandsLexically(normalizedQuery, commands, haystack, synonyms);
|
|
11
|
+
}
|
|
12
|
+
export function searchCommandsLexically(query, commands, haystack = commands.map((command) => commandSearchText(command)), synonyms = new Map()) {
|
|
13
|
+
const index = createCommandSearchIndex(haystack);
|
|
14
|
+
const expandedQuery = expandWithSynonyms(query, synonyms);
|
|
15
|
+
const idxs = index.search(expandedQuery, { limit: commands.length, suggest: true });
|
|
16
|
+
if (idxs.length > 0) {
|
|
17
|
+
return idxs.map((idx, rank) => ({ cmd: commands[Number(idx)], score: rank }));
|
|
37
18
|
}
|
|
38
19
|
// Multi-token fallback: score each command by how many individual query
|
|
39
20
|
// tokens it matches. Handles queries containing unknown alias words (e.g.
|
|
40
21
|
// "atlassian") that don't appear literally in any command field.
|
|
41
|
-
const tokens =
|
|
22
|
+
const tokens = expandedQuery.trim().split(/\s+/).filter(Boolean);
|
|
42
23
|
if (tokens.length <= 1)
|
|
43
24
|
return [];
|
|
44
25
|
const hitCount = new Map();
|
|
45
26
|
for (const token of tokens) {
|
|
46
|
-
const
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
27
|
+
const tIdxs = index.search(token, { limit: commands.length, suggest: true });
|
|
28
|
+
for (const idx of tIdxs) {
|
|
29
|
+
const commandIndex = Number(idx);
|
|
30
|
+
hitCount.set(commandIndex, (hitCount.get(commandIndex) ?? 0) + 1);
|
|
50
31
|
}
|
|
51
32
|
}
|
|
52
33
|
return [...hitCount.entries()]
|
|
53
34
|
.sort((a, b) => b[1] - a[1] || a[0] - b[0])
|
|
54
35
|
.map(([idx, hits]) => ({ cmd: commands[idx], score: tokens.length - hits }));
|
|
55
36
|
}
|
|
37
|
+
function createCommandSearchIndex(haystack) {
|
|
38
|
+
const index = new Index({ encoder: 'LatinAdvanced', resolution: 9, tokenize: 'forward' });
|
|
39
|
+
for (const [idx, text] of haystack.entries()) {
|
|
40
|
+
index.add(idx, text);
|
|
41
|
+
}
|
|
42
|
+
return index;
|
|
43
|
+
}
|
|
56
44
|
function commandSearchText(command) {
|
|
57
45
|
return [command.id, command.summary ?? command.description ?? '', command.pluginName ?? '']
|
|
58
46
|
.filter(Boolean)
|
|
@@ -60,47 +48,3 @@ function commandSearchText(command) {
|
|
|
60
48
|
.replaceAll(/\s+/g, ' ')
|
|
61
49
|
.trim();
|
|
62
50
|
}
|
|
63
|
-
async function searchCommandsSemantically(query, commands, haystack, options) {
|
|
64
|
-
const embedder = options.embedder ?? (await getDefaultEmbedder());
|
|
65
|
-
const [queryEmbedding, ...commandEmbeddings] = await embedder.embed([query, ...haystack]);
|
|
66
|
-
return commandEmbeddings
|
|
67
|
-
.map((embedding, idx) => ({
|
|
68
|
-
cmd: commands[idx],
|
|
69
|
-
score: cosineSimilarity(queryEmbedding, embedding),
|
|
70
|
-
}))
|
|
71
|
-
.filter(({ score }) => Number.isFinite(score))
|
|
72
|
-
.sort((a, b) => b.score - a.score);
|
|
73
|
-
}
|
|
74
|
-
async function getDefaultEmbedder() {
|
|
75
|
-
defaultEmbedderPromise ??= Promise.resolve(new MiniLMCommandEmbedder());
|
|
76
|
-
return defaultEmbedderPromise;
|
|
77
|
-
}
|
|
78
|
-
function mergeScores(semantic, lexical, options) {
|
|
79
|
-
const lexicalById = new Map(lexical.map((entry, idx) => [entry.cmd.id, lexicalScore(idx, lexical.length)]));
|
|
80
|
-
const lexicalIds = new Set(lexical.map((entry) => entry.cmd.id));
|
|
81
|
-
return semantic
|
|
82
|
-
.filter((entry) => entry.score >= options.minSemanticScore || lexicalIds.has(entry.cmd.id))
|
|
83
|
-
.map((entry) => {
|
|
84
|
-
const finalScore = entry.score * options.semanticWeight + (lexicalById.get(entry.cmd.id) ?? 0) * options.lexicalWeight;
|
|
85
|
-
return { cmd: entry.cmd, score: 1 - finalScore };
|
|
86
|
-
})
|
|
87
|
-
.sort((a, b) => a.score - b.score);
|
|
88
|
-
}
|
|
89
|
-
function lexicalScore(index, length) {
|
|
90
|
-
if (length <= 1)
|
|
91
|
-
return 1;
|
|
92
|
-
return 1 - index / (length - 1);
|
|
93
|
-
}
|
|
94
|
-
function cosineSimilarity(a, b) {
|
|
95
|
-
let dot = 0;
|
|
96
|
-
let aMagnitude = 0;
|
|
97
|
-
let bMagnitude = 0;
|
|
98
|
-
for (let i = 0; i < Math.min(a.length, b.length); i++) {
|
|
99
|
-
dot += a[i] * b[i];
|
|
100
|
-
aMagnitude += a[i] * a[i];
|
|
101
|
-
bMagnitude += b[i] * b[i];
|
|
102
|
-
}
|
|
103
|
-
if (aMagnitude === 0 || bMagnitude === 0)
|
|
104
|
-
return 0;
|
|
105
|
-
return dot / (Math.sqrt(aMagnitude) * Math.sqrt(bMagnitude));
|
|
106
|
-
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import { type SynonymGroup, type SynonymMap } from './synonyms.js';
|
|
2
|
+
export declare function synonymsPath(configDir: string): string;
|
|
3
|
+
export declare function readSynonymGroups(configDir: string | undefined): SynonymGroup[];
|
|
4
|
+
export declare function writeSynonymGroups(configDir: string, groups: SynonymGroup[]): void;
|
|
5
|
+
export declare function loadStoredSynonymMap(configDir: string | undefined): SynonymMap;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { existsSync, readFileSync, writeFileSync } from 'node:fs';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
import { buildSynonymMap, parseSynonymGroups } from './synonyms.js';
|
|
4
|
+
const SYNONYMS_FILENAME = 'synonyms.json';
|
|
5
|
+
export function synonymsPath(configDir) {
|
|
6
|
+
return join(configDir, SYNONYMS_FILENAME);
|
|
7
|
+
}
|
|
8
|
+
export function readSynonymGroups(configDir) {
|
|
9
|
+
if (!configDir)
|
|
10
|
+
return [];
|
|
11
|
+
const filePath = synonymsPath(configDir);
|
|
12
|
+
if (!existsSync(filePath))
|
|
13
|
+
return [];
|
|
14
|
+
return parseSynonymGroups(readFileSync(filePath, 'utf8'));
|
|
15
|
+
}
|
|
16
|
+
export function writeSynonymGroups(configDir, groups) {
|
|
17
|
+
writeFileSync(synonymsPath(configDir), JSON.stringify(groups, null, 2) + '\n', 'utf8');
|
|
18
|
+
}
|
|
19
|
+
export function loadStoredSynonymMap(configDir) {
|
|
20
|
+
return buildSynonymMap(readSynonymGroups(configDir));
|
|
21
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export type SynonymGroup = string[];
|
|
2
|
+
export type SynonymMap = Map<string, string[]>;
|
|
3
|
+
/**
|
|
4
|
+
* Parse and validate a synonyms file.
|
|
5
|
+
*
|
|
6
|
+
* File format — a JSON array of synonym groups:
|
|
7
|
+
*
|
|
8
|
+
* [
|
|
9
|
+
* ["ticket", "issue", "bug"],
|
|
10
|
+
* ["pr", "pull request", "merge request"],
|
|
11
|
+
* ["repo", "repository"]
|
|
12
|
+
* ]
|
|
13
|
+
*
|
|
14
|
+
* Every term in a group is treated as equivalent to every other term. Terms
|
|
15
|
+
* are matched case-insensitively. Multi-word terms (e.g. "pull request") are
|
|
16
|
+
* matched as whole phrases.
|
|
17
|
+
*/
|
|
18
|
+
export declare function parseSynonymGroups(raw: string): SynonymGroup[];
|
|
19
|
+
export declare function loadSynonymGroupsFromFile(filePath: string): SynonymGroup[];
|
|
20
|
+
export declare function buildSynonymMap(groups: SynonymGroup[]): SynonymMap;
|
|
21
|
+
/**
|
|
22
|
+
* Return `text` with synonym expansions appended.
|
|
23
|
+
*
|
|
24
|
+
* For every term in `synonyms` that appears in `text`, all sibling terms from
|
|
25
|
+
* the same group are appended so the search index / query matches any
|
|
26
|
+
* equivalent spelling.
|
|
27
|
+
*/
|
|
28
|
+
export declare function expandWithSynonyms(text: string, synonyms: SynonymMap): string;
|
package/dist/synonyms.js
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { readFileSync } from 'node:fs';
|
|
2
|
+
/**
|
|
3
|
+
* Parse and validate a synonyms file.
|
|
4
|
+
*
|
|
5
|
+
* File format — a JSON array of synonym groups:
|
|
6
|
+
*
|
|
7
|
+
* [
|
|
8
|
+
* ["ticket", "issue", "bug"],
|
|
9
|
+
* ["pr", "pull request", "merge request"],
|
|
10
|
+
* ["repo", "repository"]
|
|
11
|
+
* ]
|
|
12
|
+
*
|
|
13
|
+
* Every term in a group is treated as equivalent to every other term. Terms
|
|
14
|
+
* are matched case-insensitively. Multi-word terms (e.g. "pull request") are
|
|
15
|
+
* matched as whole phrases.
|
|
16
|
+
*/
|
|
17
|
+
export function parseSynonymGroups(raw) {
|
|
18
|
+
const parsed = JSON.parse(raw);
|
|
19
|
+
if (!Array.isArray(parsed) ||
|
|
20
|
+
parsed.some((g) => !Array.isArray(g) || g.some((term) => typeof term !== 'string'))) {
|
|
21
|
+
throw new TypeError('Synonyms file must be a JSON array of string arrays');
|
|
22
|
+
}
|
|
23
|
+
return parsed;
|
|
24
|
+
}
|
|
25
|
+
export function loadSynonymGroupsFromFile(filePath) {
|
|
26
|
+
return parseSynonymGroups(readFileSync(filePath, 'utf8'));
|
|
27
|
+
}
|
|
28
|
+
export function buildSynonymMap(groups) {
|
|
29
|
+
const map = new Map();
|
|
30
|
+
for (const group of groups) {
|
|
31
|
+
const normalized = group.map((t) => t.toLowerCase().trim()).filter(Boolean);
|
|
32
|
+
for (const term of normalized) {
|
|
33
|
+
const others = normalized.filter((t) => t !== term);
|
|
34
|
+
const existing = map.get(term) ?? [];
|
|
35
|
+
map.set(term, [...new Set([...existing, ...others])]);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return map;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Return `text` with synonym expansions appended.
|
|
42
|
+
*
|
|
43
|
+
* For every term in `synonyms` that appears in `text`, all sibling terms from
|
|
44
|
+
* the same group are appended so the search index / query matches any
|
|
45
|
+
* equivalent spelling.
|
|
46
|
+
*/
|
|
47
|
+
export function expandWithSynonyms(text, synonyms) {
|
|
48
|
+
if (synonyms.size === 0)
|
|
49
|
+
return text;
|
|
50
|
+
const extras = [];
|
|
51
|
+
for (const [term, syns] of synonyms) {
|
|
52
|
+
if (containsTerm(text, term)) {
|
|
53
|
+
extras.push(...syns);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return extras.length > 0 ? `${text} ${extras.join(' ')}` : text;
|
|
57
|
+
}
|
|
58
|
+
function escapeRegex(s) {
|
|
59
|
+
// eslint-disable-next-line unicorn/prefer-string-raw
|
|
60
|
+
return s.replaceAll(/[$()*+.?[\\\]^{|}]/g, '\\$&').replaceAll(/\s+/g, String.raw `\s+`);
|
|
61
|
+
}
|
|
62
|
+
function containsTerm(text, term) {
|
|
63
|
+
const pattern = new RegExp(`(?<![\\w])${escapeRegex(term)}(?![\\w])`, 'i');
|
|
64
|
+
return pattern.test(text);
|
|
65
|
+
}
|
package/oclif.manifest.json
CHANGED
|
@@ -56,7 +56,76 @@
|
|
|
56
56
|
"commands",
|
|
57
57
|
"search.js"
|
|
58
58
|
]
|
|
59
|
+
},
|
|
60
|
+
"synonyms:export": {
|
|
61
|
+
"aliases": [],
|
|
62
|
+
"args": {
|
|
63
|
+
"file": {
|
|
64
|
+
"description": "Output file path (prints to stdout if omitted)",
|
|
65
|
+
"name": "file",
|
|
66
|
+
"required": false
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
"description": "Export the stored synonyms to a JSON file or stdout",
|
|
70
|
+
"examples": [
|
|
71
|
+
"<%= config.bin %> synonyms export",
|
|
72
|
+
"<%= config.bin %> synonyms export ./my-synonyms.json"
|
|
73
|
+
],
|
|
74
|
+
"flags": {},
|
|
75
|
+
"hasDynamicHelp": false,
|
|
76
|
+
"hiddenAliases": [],
|
|
77
|
+
"id": "synonyms:export",
|
|
78
|
+
"pluginAlias": "@hesed/search",
|
|
79
|
+
"pluginName": "@hesed/search",
|
|
80
|
+
"pluginType": "core",
|
|
81
|
+
"strict": true,
|
|
82
|
+
"enableJsonFlag": false,
|
|
83
|
+
"isESM": true,
|
|
84
|
+
"relativePath": [
|
|
85
|
+
"dist",
|
|
86
|
+
"commands",
|
|
87
|
+
"synonyms",
|
|
88
|
+
"export.js"
|
|
89
|
+
]
|
|
90
|
+
},
|
|
91
|
+
"synonyms:import": {
|
|
92
|
+
"aliases": [],
|
|
93
|
+
"args": {
|
|
94
|
+
"file": {
|
|
95
|
+
"description": "Path to a JSON synonyms file to import",
|
|
96
|
+
"name": "file",
|
|
97
|
+
"required": true
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
"description": "Import synonyms from a JSON file",
|
|
101
|
+
"examples": [
|
|
102
|
+
"<%= config.bin %> synonyms import ./synonyms.json",
|
|
103
|
+
"<%= config.bin %> synonyms import ./synonyms.json --merge"
|
|
104
|
+
],
|
|
105
|
+
"flags": {
|
|
106
|
+
"merge": {
|
|
107
|
+
"description": "Merge with existing synonyms instead of replacing them",
|
|
108
|
+
"name": "merge",
|
|
109
|
+
"allowNo": false,
|
|
110
|
+
"type": "boolean"
|
|
111
|
+
}
|
|
112
|
+
},
|
|
113
|
+
"hasDynamicHelp": false,
|
|
114
|
+
"hiddenAliases": [],
|
|
115
|
+
"id": "synonyms:import",
|
|
116
|
+
"pluginAlias": "@hesed/search",
|
|
117
|
+
"pluginName": "@hesed/search",
|
|
118
|
+
"pluginType": "core",
|
|
119
|
+
"strict": true,
|
|
120
|
+
"enableJsonFlag": false,
|
|
121
|
+
"isESM": true,
|
|
122
|
+
"relativePath": [
|
|
123
|
+
"dist",
|
|
124
|
+
"commands",
|
|
125
|
+
"synonyms",
|
|
126
|
+
"import.js"
|
|
127
|
+
]
|
|
59
128
|
}
|
|
60
129
|
},
|
|
61
|
-
"version": "0.2.
|
|
130
|
+
"version": "0.2.2"
|
|
62
131
|
}
|
package/package.json
CHANGED
|
@@ -1,16 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hesed/search",
|
|
3
3
|
"description": "Intelligence search plugin",
|
|
4
|
-
"version": "0.2.
|
|
4
|
+
"version": "0.2.2",
|
|
5
5
|
"author": "Hesed",
|
|
6
6
|
"bin": {
|
|
7
7
|
"permission": "./bin/run.js"
|
|
8
8
|
},
|
|
9
9
|
"bugs": "https://github.com/hesedcasa/search/issues",
|
|
10
10
|
"dependencies": {
|
|
11
|
-
"@
|
|
12
|
-
"
|
|
13
|
-
"@oclif/core": "^4"
|
|
11
|
+
"@oclif/core": "^4",
|
|
12
|
+
"flexsearch": "^0.8.212"
|
|
14
13
|
},
|
|
15
14
|
"devDependencies": {
|
|
16
15
|
"@eslint/compat": "^1",
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import { type CommandEmbedder } from '../search-logic.js';
|
|
2
|
-
export type ModelLoadProgress = {
|
|
3
|
-
file?: string;
|
|
4
|
-
loaded?: number;
|
|
5
|
-
model?: string;
|
|
6
|
-
name?: string;
|
|
7
|
-
progress?: number;
|
|
8
|
-
status: string;
|
|
9
|
-
task?: string;
|
|
10
|
-
total?: number;
|
|
11
|
-
};
|
|
12
|
-
export declare class MiniLMCommandEmbedder implements CommandEmbedder {
|
|
13
|
-
private readonly options;
|
|
14
|
-
private extractorPromise;
|
|
15
|
-
constructor(options?: {
|
|
16
|
-
onLoadProgress?: (progress: ModelLoadProgress) => void;
|
|
17
|
-
});
|
|
18
|
-
embed(texts: string[]): Promise<number[][]>;
|
|
19
|
-
private getExtractor;
|
|
20
|
-
}
|
|
21
|
-
export declare function isMiniLMModelCached(): boolean;
|
|
22
|
-
export declare function getMiniLMModelCachePath(): string;
|
package/dist/embedders/minilm.js
DELETED
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
import { existsSync } from 'node:fs';
|
|
2
|
-
import { createRequire } from 'node:module';
|
|
3
|
-
import path from 'node:path';
|
|
4
|
-
const MINILM_MODEL = 'Xenova/paraphrase-MiniLM-L3-v2';
|
|
5
|
-
const MODEL_CACHE_FILES = ['config.json', 'tokenizer.json', 'tokenizer_config.json', 'onnx/model.onnx'];
|
|
6
|
-
// eslint-disable-next-line no-new-func
|
|
7
|
-
const importTransformers = new Function('specifier', 'return import(specifier)');
|
|
8
|
-
const require = createRequire(import.meta.url);
|
|
9
|
-
export class MiniLMCommandEmbedder {
|
|
10
|
-
options;
|
|
11
|
-
extractorPromise;
|
|
12
|
-
constructor(options = {}) {
|
|
13
|
-
this.options = options;
|
|
14
|
-
}
|
|
15
|
-
async embed(texts) {
|
|
16
|
-
if (texts.length === 0)
|
|
17
|
-
return [];
|
|
18
|
-
const extractor = await this.getExtractor();
|
|
19
|
-
const output = await extractor(texts, { normalize: true, pooling: 'mean' });
|
|
20
|
-
return tensorToRows(output.data, output.dims);
|
|
21
|
-
}
|
|
22
|
-
async getExtractor() {
|
|
23
|
-
this.extractorPromise ??= importTransformers('@huggingface/transformers').then(async ({ pipeline }) => {
|
|
24
|
-
const pipelineOptions = {};
|
|
25
|
-
// eslint-disable-next-line camelcase
|
|
26
|
-
pipelineOptions.progress_callback = this.options.onLoadProgress;
|
|
27
|
-
const extractor = await pipeline('feature-extraction', MINILM_MODEL, pipelineOptions);
|
|
28
|
-
return extractor;
|
|
29
|
-
});
|
|
30
|
-
return this.extractorPromise;
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
export function isMiniLMModelCached() {
|
|
34
|
-
const cachePath = getMiniLMModelCachePath();
|
|
35
|
-
return MODEL_CACHE_FILES.every((file) => existsSync(path.join(cachePath, file)));
|
|
36
|
-
}
|
|
37
|
-
export function getMiniLMModelCachePath() {
|
|
38
|
-
const entrypointPath = require.resolve('@huggingface/transformers');
|
|
39
|
-
const packagePath = path.dirname(path.dirname(entrypointPath));
|
|
40
|
-
return path.join(packagePath, '.cache', ...MINILM_MODEL.split('/'));
|
|
41
|
-
}
|
|
42
|
-
function tensorToRows(data, dims) {
|
|
43
|
-
const rows = dims[0] ?? 0;
|
|
44
|
-
const columns = dims[1] ?? data.length;
|
|
45
|
-
return Array.from({ length: rows }, (_, row) => {
|
|
46
|
-
const rowData = data.slice(row * columns, (row + 1) * columns);
|
|
47
|
-
return Array.isArray(rowData) ? rowData : [...rowData];
|
|
48
|
-
});
|
|
49
|
-
}
|