@hesed/search 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -20,7 +20,7 @@ $ npm install -g @hesed/search
20
20
  $ search COMMAND
21
21
  running command...
22
22
  $ search (--version)
23
- @hesed/search/0.2.0 linux-x64 node-v24.16.0
23
+ @hesed/search/0.2.2 linux-x64 node-v24.16.0
24
24
  $ search --help [COMMAND]
25
25
  USAGE
26
26
  $ search COMMAND
@@ -32,6 +32,8 @@ USAGE
32
32
 
33
33
  <!-- commands -->
34
34
  * [`search search QUERY`](#search-search-query)
35
+ * [`search synonyms export [FILE]`](#search-synonyms-export-file)
36
+ * [`search synonyms import FILE`](#search-synonyms-import-file)
35
37
 
36
38
  ## `search search QUERY`
37
39
 
@@ -62,5 +64,52 @@ EXAMPLES
62
64
  $ search search "update jira" --details
63
65
  ```
64
66
 
65
- _See code: [src/commands/search.ts](https://github.com/hesedcasa/search/blob/v0.2.0/src/commands/search.ts)_
67
+ _See code: [src/commands/search.ts](https://github.com/hesedcasa/search/blob/v0.2.2/src/commands/search.ts)_
68
+
69
+ ## `search synonyms export [FILE]`
70
+
71
+ Export the stored synonyms to a JSON file or stdout
72
+
73
+ ```
74
+ USAGE
75
+ $ search synonyms export [FILE]
76
+
77
+ ARGUMENTS
78
+ [FILE] Output file path (prints to stdout if omitted)
79
+
80
+ DESCRIPTION
81
+ Export the stored synonyms to a JSON file or stdout
82
+
83
+ EXAMPLES
84
+ $ search synonyms export
85
+
86
+ $ search synonyms export ./my-synonyms.json
87
+ ```
88
+
89
+ _See code: [src/commands/synonyms/export.ts](https://github.com/hesedcasa/search/blob/v0.2.2/src/commands/synonyms/export.ts)_
90
+
91
+ ## `search synonyms import FILE`
92
+
93
+ Import synonyms from a JSON file
94
+
95
+ ```
96
+ USAGE
97
+ $ search synonyms import FILE [--merge]
98
+
99
+ ARGUMENTS
100
+ FILE Path to a JSON synonyms file to import
101
+
102
+ FLAGS
103
+ --merge Merge with existing synonyms instead of replacing them
104
+
105
+ DESCRIPTION
106
+ Import synonyms from a JSON file
107
+
108
+ EXAMPLES
109
+ $ search synonyms import ./synonyms.json
110
+
111
+ $ search synonyms import ./synonyms.json --merge
112
+ ```
113
+
114
+ _See code: [src/commands/synonyms/import.ts](https://github.com/hesedcasa/search/blob/v0.2.2/src/commands/synonyms/import.ts)_
66
115
  <!-- commandsstop -->
@@ -1,6 +1,6 @@
1
- import { Args, Command, CommandHelp, Flags, toConfiguredId, ux } from '@oclif/core';
2
- import { isMiniLMModelCached, MiniLMCommandEmbedder } from '../embedders/minilm.js';
1
+ import { Args, Command, CommandHelp, Flags, toConfiguredId } from '@oclif/core';
3
2
  import { searchCommands } from '../search-logic.js';
3
+ import { loadStoredSynonymMap } from '../synonym-store.js';
4
4
  export default class Search extends Command {
5
5
  static args = {
6
6
  query: Args.string({ description: 'Search term to filter commands by', required: true }),
@@ -19,14 +19,8 @@ export default class Search extends Command {
19
19
  async run() {
20
20
  const { args, flags } = await this.parse(Search);
21
21
  const allCommands = this.config.commands.filter((c) => !c.hidden && c.pluginName !== '@oclif/plugin-plugins');
22
- const loader = createModelLoader(this.jsonEnabled() || isMiniLMModelCached());
23
- let scored;
24
- try {
25
- scored = (await searchCommands(args.query, allCommands, getSearchOptions(this.config, loader.onProgress))).slice(0, flags.limit);
26
- }
27
- finally {
28
- loader.stop();
29
- }
22
+ const synonyms = loadStoredSynonymMap(this.config.configDir);
23
+ const scored = (await searchCommands(args.query, allCommands, synonyms)).slice(0, flags.limit);
30
24
  const results = scored.map((entry) => {
31
25
  const { cmd } = entry;
32
26
  const configuredId = toConfiguredId(cmd.id, this.config);
@@ -85,61 +79,3 @@ export default class Search extends Command {
85
79
  }
86
80
  }
87
81
  }
88
- function getSearchOptions(config, onLoadProgress) {
89
- const testConfig = config;
90
- if (!testConfig.searchEmbedder)
91
- return { embedder: new MiniLMCommandEmbedder({ onLoadProgress }) };
92
- return { embedder: testConfig.searchEmbedder };
93
- }
94
- function createModelLoader(silent) {
95
- let started = false;
96
- let stopped = false;
97
- const start = (status) => {
98
- if (silent || stopped)
99
- return;
100
- if (started) {
101
- ux.action.status = status;
102
- return;
103
- }
104
- ux.action.start('Loading semantic search model', status);
105
- started = true;
106
- };
107
- const stop = () => {
108
- if (silent || stopped)
109
- return;
110
- stopped = true;
111
- if (started)
112
- ux.action.stop();
113
- };
114
- return {
115
- onProgress(progress) {
116
- if (progress.status === 'ready') {
117
- stop();
118
- return;
119
- }
120
- if (progress.status === 'progress_total' && typeof progress.progress === 'number') {
121
- start(`${Math.round(progress.progress)}% ${formatBytes(progress.loaded)} / ${formatBytes(progress.total)}`);
122
- return;
123
- }
124
- if (progress.status === 'download') {
125
- start(formatProgressFile(progress.file));
126
- }
127
- },
128
- stop,
129
- };
130
- }
131
- function formatProgressFile(file) {
132
- return file?.split('/').at(-1);
133
- }
134
- function formatBytes(value) {
135
- if (!value || value <= 0)
136
- return '?';
137
- const units = ['B', 'KB', 'MB', 'GB'];
138
- let size = value;
139
- let unit = 0;
140
- while (size >= 1024 && unit < units.length - 1) {
141
- size /= 1024;
142
- unit++;
143
- }
144
- return `${size.toFixed(size >= 10 || unit === 0 ? 0 : 1)} ${units[unit]}`;
145
- }
@@ -0,0 +1,9 @@
1
+ import { Command } from '@oclif/core';
2
+ export default class SynonymsExport extends Command {
3
+ static args: {
4
+ file: import("@oclif/core/interfaces").Arg<string | undefined, Record<string, unknown>>;
5
+ };
6
+ static description: string;
7
+ static examples: string[];
8
+ run(): Promise<void>;
9
+ }
@@ -0,0 +1,22 @@
1
+ import { Args, Command } from '@oclif/core';
2
+ import { writeFileSync } from 'node:fs';
3
+ import { readSynonymGroups } from '../../synonym-store.js';
4
+ export default class SynonymsExport extends Command {
5
+ static args = {
6
+ file: Args.string({ description: 'Output file path (prints to stdout if omitted)', required: false }),
7
+ };
8
+ static description = 'Export the stored synonyms to a JSON file or stdout';
9
+ static examples = ['<%= config.bin %> synonyms export', '<%= config.bin %> synonyms export ./my-synonyms.json'];
10
+ async run() {
11
+ const { args } = await this.parse(SynonymsExport);
12
+ const groups = readSynonymGroups(this.config.configDir);
13
+ const json = JSON.stringify(groups, null, 2) + '\n';
14
+ if (args.file) {
15
+ writeFileSync(args.file, json, 'utf8');
16
+ this.log(`Exported ${groups.length} synonym group${groups.length === 1 ? '' : 's'} → ${args.file}`);
17
+ }
18
+ else {
19
+ process.stdout.write(json);
20
+ }
21
+ }
22
+ }
@@ -0,0 +1,12 @@
1
+ import { Command } from '@oclif/core';
2
+ export default class SynonymsImport extends Command {
3
+ static args: {
4
+ file: import("@oclif/core/interfaces").Arg<string, Record<string, unknown>>;
5
+ };
6
+ static description: string;
7
+ static examples: string[];
8
+ static flags: {
9
+ merge: import("@oclif/core/interfaces").BooleanFlag<boolean>;
10
+ };
11
+ run(): Promise<void>;
12
+ }
@@ -0,0 +1,61 @@
1
+ import { Args, Command, Flags } from '@oclif/core';
2
+ import { existsSync } from 'node:fs';
3
+ import { synonymsPath, writeSynonymGroups } from '../../synonym-store.js';
4
+ import { loadSynonymGroupsFromFile } from '../../synonyms.js';
5
+ export default class SynonymsImport extends Command {
6
+ static args = {
7
+ file: Args.string({ description: 'Path to a JSON synonyms file to import', required: true }),
8
+ };
9
+ static description = 'Import synonyms from a JSON file';
10
+ static examples = [
11
+ '<%= config.bin %> synonyms import ./synonyms.json',
12
+ '<%= config.bin %> synonyms import ./synonyms.json --merge',
13
+ ];
14
+ static flags = {
15
+ merge: Flags.boolean({
16
+ default: false,
17
+ description: 'Merge with existing synonyms instead of replacing them',
18
+ }),
19
+ };
20
+ async run() {
21
+ const { args, flags } = await this.parse(SynonymsImport);
22
+ if (!existsSync(args.file)) {
23
+ this.error(`File not found: ${args.file}`);
24
+ }
25
+ const incoming = loadSynonymGroupsFromFile(args.file);
26
+ let groups;
27
+ if (flags.merge) {
28
+ const { readSynonymGroups } = await import('../../synonym-store.js');
29
+ const existing = readSynonymGroups(this.config.configDir);
30
+ groups = mergeSynonymGroups(existing, incoming);
31
+ }
32
+ else {
33
+ groups = incoming;
34
+ }
35
+ writeSynonymGroups(this.config.configDir, groups);
36
+ const dest = synonymsPath(this.config.configDir);
37
+ this.log(`Imported ${groups.length} synonym group${groups.length === 1 ? '' : 's'} → ${dest}`);
38
+ }
39
+ }
40
+ /**
41
+ * Merge two synonym group lists.
42
+ *
43
+ * Groups are merged when they share at least one term in common; otherwise
44
+ * they are appended as separate groups.
45
+ */
46
+ function mergeSynonymGroups(existing, incoming) {
47
+ const result = existing.map((g) => [...g]);
48
+ for (const group of incoming) {
49
+ const normalizedGroup = group.map((t) => t.toLowerCase().trim()).filter(Boolean);
50
+ // Find any existing group that overlaps
51
+ const matchIndex = result.findIndex((r) => r.some((t) => normalizedGroup.includes(t.toLowerCase().trim())));
52
+ if (matchIndex === -1) {
53
+ result.push(normalizedGroup);
54
+ }
55
+ else {
56
+ const merged = [...new Set([...normalizedGroup, ...result[matchIndex].map((t) => t.toLowerCase().trim())])];
57
+ result[matchIndex] = merged;
58
+ }
59
+ }
60
+ return result;
61
+ }
@@ -1,3 +1,4 @@
1
+ import { type SynonymMap } from './synonyms.js';
1
2
  export type SearchableCommand = {
2
3
  description?: string;
3
4
  id: string;
@@ -8,19 +9,5 @@ export type ScoredCommand<T extends SearchableCommand = SearchableCommand> = {
8
9
  cmd: T;
9
10
  score: number;
10
11
  };
11
- export type CommandEmbedder = {
12
- embed(texts: string[]): Promise<number[][]>;
13
- };
14
- export type CommandReranker<T extends SearchableCommand = SearchableCommand> = {
15
- rerank(query: string, commands: Array<ScoredCommand<T>>): Promise<Array<ScoredCommand<T>>>;
16
- };
17
- export type SearchCommandsOptions<T extends SearchableCommand = SearchableCommand> = {
18
- embedder?: CommandEmbedder;
19
- lexicalWeight?: number;
20
- minSemanticScore?: number;
21
- reranker?: CommandReranker<T>;
22
- semantic?: boolean;
23
- semanticWeight?: number;
24
- };
25
- export declare function searchCommands<T extends SearchableCommand>(query: string, commands: T[], options?: SearchCommandsOptions<T>): Promise<Array<ScoredCommand<T>>>;
26
- export declare function searchCommandsLexically<T extends SearchableCommand>(query: string, commands: T[], haystack?: string[]): Array<ScoredCommand<T>>;
12
+ export declare function searchCommands<T extends SearchableCommand>(query: string, commands: T[], synonyms?: SynonymMap): Promise<Array<ScoredCommand<T>>>;
13
+ export declare function searchCommandsLexically<T extends SearchableCommand>(query: string, commands: T[], haystack?: string[], synonyms?: SynonymMap): Array<ScoredCommand<T>>;
@@ -1,58 +1,46 @@
1
- import UFuzzy from '@leeoniya/ufuzzy';
2
- import { MiniLMCommandEmbedder } from './embedders/minilm.js';
3
- const DEFAULT_MIN_SEMANTIC_SCORE = 0.22;
4
- const DEFAULT_SEMANTIC_WEIGHT = 0.75;
5
- const DEFAULT_LEXICAL_WEIGHT = 0.25;
6
- let defaultEmbedderPromise;
7
- export async function searchCommands(query, commands, options = {}) {
1
+ import { createRequire } from 'node:module';
2
+ import { expandWithSynonyms } from './synonyms.js';
3
+ const require = createRequire(import.meta.url);
4
+ const { Index } = require('flexsearch');
5
+ export async function searchCommands(query, commands, synonyms = new Map()) {
8
6
  const normalizedQuery = query.trim();
9
7
  if (normalizedQuery.length === 0 || commands.length === 0)
10
8
  return [];
11
- const haystack = commands.map((command) => commandSearchText(command));
12
- const lexical = searchCommandsLexically(normalizedQuery, commands, haystack);
13
- if (options.semantic === false)
14
- return lexical;
15
- let semantic;
16
- try {
17
- semantic = await searchCommandsSemantically(normalizedQuery, commands, haystack, options);
18
- }
19
- catch {
20
- return lexical;
21
- }
22
- if (semantic.length === 0)
23
- return lexical;
24
- const merged = mergeScores(semantic, lexical, {
25
- lexicalWeight: options.lexicalWeight ?? DEFAULT_LEXICAL_WEIGHT,
26
- minSemanticScore: options.minSemanticScore ?? DEFAULT_MIN_SEMANTIC_SCORE,
27
- semanticWeight: options.semanticWeight ?? DEFAULT_SEMANTIC_WEIGHT,
28
- });
29
- return options.reranker ? options.reranker.rerank(normalizedQuery, merged) : merged;
30
- }
31
- export function searchCommandsLexically(query, commands, haystack = commands.map((command) => commandSearchText(command))) {
32
- const uf = new UFuzzy({ intraIns: Infinity });
33
- const [idxs, , order] = uf.search(haystack, query, 0, Infinity);
34
- if (idxs && idxs.length > 0) {
35
- const ranked = order ?? idxs.map((_, i) => i);
36
- return ranked.map((oi, rank) => ({ cmd: commands[idxs[oi]], score: rank }));
9
+ const haystack = commands.map((command) => expandWithSynonyms(commandSearchText(command), synonyms));
10
+ return searchCommandsLexically(normalizedQuery, commands, haystack, synonyms);
11
+ }
12
+ export function searchCommandsLexically(query, commands, haystack = commands.map((command) => commandSearchText(command)), synonyms = new Map()) {
13
+ const index = createCommandSearchIndex(haystack);
14
+ const expandedQuery = expandWithSynonyms(query, synonyms);
15
+ const idxs = index.search(expandedQuery, { limit: commands.length, suggest: true });
16
+ if (idxs.length > 0) {
17
+ return idxs.map((idx, rank) => ({ cmd: commands[Number(idx)], score: rank }));
37
18
  }
38
19
  // Multi-token fallback: score each command by how many individual query
39
20
  // tokens it matches. Handles queries containing unknown alias words (e.g.
40
21
  // "atlassian") that don't appear literally in any command field.
41
- const tokens = query.trim().split(/\s+/).filter(Boolean);
22
+ const tokens = expandedQuery.trim().split(/\s+/).filter(Boolean);
42
23
  if (tokens.length <= 1)
43
24
  return [];
44
25
  const hitCount = new Map();
45
26
  for (const token of tokens) {
46
- const [tIdxs] = uf.search(haystack, token, 0, Infinity);
47
- if (tIdxs) {
48
- for (const idx of tIdxs)
49
- hitCount.set(idx, (hitCount.get(idx) ?? 0) + 1);
27
+ const tIdxs = index.search(token, { limit: commands.length, suggest: true });
28
+ for (const idx of tIdxs) {
29
+ const commandIndex = Number(idx);
30
+ hitCount.set(commandIndex, (hitCount.get(commandIndex) ?? 0) + 1);
50
31
  }
51
32
  }
52
33
  return [...hitCount.entries()]
53
34
  .sort((a, b) => b[1] - a[1] || a[0] - b[0])
54
35
  .map(([idx, hits]) => ({ cmd: commands[idx], score: tokens.length - hits }));
55
36
  }
37
+ function createCommandSearchIndex(haystack) {
38
+ const index = new Index({ encoder: 'LatinAdvanced', resolution: 9, tokenize: 'forward' });
39
+ for (const [idx, text] of haystack.entries()) {
40
+ index.add(idx, text);
41
+ }
42
+ return index;
43
+ }
56
44
  function commandSearchText(command) {
57
45
  return [command.id, command.summary ?? command.description ?? '', command.pluginName ?? '']
58
46
  .filter(Boolean)
@@ -60,47 +48,3 @@ function commandSearchText(command) {
60
48
  .replaceAll(/\s+/g, ' ')
61
49
  .trim();
62
50
  }
63
- async function searchCommandsSemantically(query, commands, haystack, options) {
64
- const embedder = options.embedder ?? (await getDefaultEmbedder());
65
- const [queryEmbedding, ...commandEmbeddings] = await embedder.embed([query, ...haystack]);
66
- return commandEmbeddings
67
- .map((embedding, idx) => ({
68
- cmd: commands[idx],
69
- score: cosineSimilarity(queryEmbedding, embedding),
70
- }))
71
- .filter(({ score }) => Number.isFinite(score))
72
- .sort((a, b) => b.score - a.score);
73
- }
74
- async function getDefaultEmbedder() {
75
- defaultEmbedderPromise ??= Promise.resolve(new MiniLMCommandEmbedder());
76
- return defaultEmbedderPromise;
77
- }
78
- function mergeScores(semantic, lexical, options) {
79
- const lexicalById = new Map(lexical.map((entry, idx) => [entry.cmd.id, lexicalScore(idx, lexical.length)]));
80
- const lexicalIds = new Set(lexical.map((entry) => entry.cmd.id));
81
- return semantic
82
- .filter((entry) => entry.score >= options.minSemanticScore || lexicalIds.has(entry.cmd.id))
83
- .map((entry) => {
84
- const finalScore = entry.score * options.semanticWeight + (lexicalById.get(entry.cmd.id) ?? 0) * options.lexicalWeight;
85
- return { cmd: entry.cmd, score: 1 - finalScore };
86
- })
87
- .sort((a, b) => a.score - b.score);
88
- }
89
- function lexicalScore(index, length) {
90
- if (length <= 1)
91
- return 1;
92
- return 1 - index / (length - 1);
93
- }
94
- function cosineSimilarity(a, b) {
95
- let dot = 0;
96
- let aMagnitude = 0;
97
- let bMagnitude = 0;
98
- for (let i = 0; i < Math.min(a.length, b.length); i++) {
99
- dot += a[i] * b[i];
100
- aMagnitude += a[i] * a[i];
101
- bMagnitude += b[i] * b[i];
102
- }
103
- if (aMagnitude === 0 || bMagnitude === 0)
104
- return 0;
105
- return dot / (Math.sqrt(aMagnitude) * Math.sqrt(bMagnitude));
106
- }
@@ -0,0 +1,5 @@
1
+ import { type SynonymGroup, type SynonymMap } from './synonyms.js';
2
+ export declare function synonymsPath(configDir: string): string;
3
+ export declare function readSynonymGroups(configDir: string | undefined): SynonymGroup[];
4
+ export declare function writeSynonymGroups(configDir: string, groups: SynonymGroup[]): void;
5
+ export declare function loadStoredSynonymMap(configDir: string | undefined): SynonymMap;
@@ -0,0 +1,21 @@
1
+ import { existsSync, readFileSync, writeFileSync } from 'node:fs';
2
+ import { join } from 'node:path';
3
+ import { buildSynonymMap, parseSynonymGroups } from './synonyms.js';
4
+ const SYNONYMS_FILENAME = 'synonyms.json';
5
+ export function synonymsPath(configDir) {
6
+ return join(configDir, SYNONYMS_FILENAME);
7
+ }
8
+ export function readSynonymGroups(configDir) {
9
+ if (!configDir)
10
+ return [];
11
+ const filePath = synonymsPath(configDir);
12
+ if (!existsSync(filePath))
13
+ return [];
14
+ return parseSynonymGroups(readFileSync(filePath, 'utf8'));
15
+ }
16
+ export function writeSynonymGroups(configDir, groups) {
17
+ writeFileSync(synonymsPath(configDir), JSON.stringify(groups, null, 2) + '\n', 'utf8');
18
+ }
19
+ export function loadStoredSynonymMap(configDir) {
20
+ return buildSynonymMap(readSynonymGroups(configDir));
21
+ }
@@ -0,0 +1,28 @@
1
+ export type SynonymGroup = string[];
2
+ export type SynonymMap = Map<string, string[]>;
3
+ /**
4
+ * Parse and validate a synonyms file.
5
+ *
6
+ * File format — a JSON array of synonym groups:
7
+ *
8
+ * [
9
+ * ["ticket", "issue", "bug"],
10
+ * ["pr", "pull request", "merge request"],
11
+ * ["repo", "repository"]
12
+ * ]
13
+ *
14
+ * Every term in a group is treated as equivalent to every other term. Terms
15
+ * are matched case-insensitively. Multi-word terms (e.g. "pull request") are
16
+ * matched as whole phrases.
17
+ */
18
+ export declare function parseSynonymGroups(raw: string): SynonymGroup[];
19
+ export declare function loadSynonymGroupsFromFile(filePath: string): SynonymGroup[];
20
+ export declare function buildSynonymMap(groups: SynonymGroup[]): SynonymMap;
21
+ /**
22
+ * Return `text` with synonym expansions appended.
23
+ *
24
+ * For every term in `synonyms` that appears in `text`, all sibling terms from
25
+ * the same group are appended so the search index / query matches any
26
+ * equivalent spelling.
27
+ */
28
+ export declare function expandWithSynonyms(text: string, synonyms: SynonymMap): string;
@@ -0,0 +1,65 @@
1
+ import { readFileSync } from 'node:fs';
2
+ /**
3
+ * Parse and validate a synonyms file.
4
+ *
5
+ * File format — a JSON array of synonym groups:
6
+ *
7
+ * [
8
+ * ["ticket", "issue", "bug"],
9
+ * ["pr", "pull request", "merge request"],
10
+ * ["repo", "repository"]
11
+ * ]
12
+ *
13
+ * Every term in a group is treated as equivalent to every other term. Terms
14
+ * are matched case-insensitively. Multi-word terms (e.g. "pull request") are
15
+ * matched as whole phrases.
16
+ */
17
+ export function parseSynonymGroups(raw) {
18
+ const parsed = JSON.parse(raw);
19
+ if (!Array.isArray(parsed) ||
20
+ parsed.some((g) => !Array.isArray(g) || g.some((term) => typeof term !== 'string'))) {
21
+ throw new TypeError('Synonyms file must be a JSON array of string arrays');
22
+ }
23
+ return parsed;
24
+ }
25
+ export function loadSynonymGroupsFromFile(filePath) {
26
+ return parseSynonymGroups(readFileSync(filePath, 'utf8'));
27
+ }
28
+ export function buildSynonymMap(groups) {
29
+ const map = new Map();
30
+ for (const group of groups) {
31
+ const normalized = group.map((t) => t.toLowerCase().trim()).filter(Boolean);
32
+ for (const term of normalized) {
33
+ const others = normalized.filter((t) => t !== term);
34
+ const existing = map.get(term) ?? [];
35
+ map.set(term, [...new Set([...existing, ...others])]);
36
+ }
37
+ }
38
+ return map;
39
+ }
40
+ /**
41
+ * Return `text` with synonym expansions appended.
42
+ *
43
+ * For every term in `synonyms` that appears in `text`, all sibling terms from
44
+ * the same group are appended so the search index / query matches any
45
+ * equivalent spelling.
46
+ */
47
+ export function expandWithSynonyms(text, synonyms) {
48
+ if (synonyms.size === 0)
49
+ return text;
50
+ const extras = [];
51
+ for (const [term, syns] of synonyms) {
52
+ if (containsTerm(text, term)) {
53
+ extras.push(...syns);
54
+ }
55
+ }
56
+ return extras.length > 0 ? `${text} ${extras.join(' ')}` : text;
57
+ }
58
+ function escapeRegex(s) {
59
+ // eslint-disable-next-line unicorn/prefer-string-raw
60
+ return s.replaceAll(/[$()*+.?[\\\]^{|}]/g, '\\$&').replaceAll(/\s+/g, String.raw `\s+`);
61
+ }
62
+ function containsTerm(text, term) {
63
+ const pattern = new RegExp(`(?<![\\w])${escapeRegex(term)}(?![\\w])`, 'i');
64
+ return pattern.test(text);
65
+ }
@@ -56,7 +56,76 @@
56
56
  "commands",
57
57
  "search.js"
58
58
  ]
59
+ },
60
+ "synonyms:export": {
61
+ "aliases": [],
62
+ "args": {
63
+ "file": {
64
+ "description": "Output file path (prints to stdout if omitted)",
65
+ "name": "file",
66
+ "required": false
67
+ }
68
+ },
69
+ "description": "Export the stored synonyms to a JSON file or stdout",
70
+ "examples": [
71
+ "<%= config.bin %> synonyms export",
72
+ "<%= config.bin %> synonyms export ./my-synonyms.json"
73
+ ],
74
+ "flags": {},
75
+ "hasDynamicHelp": false,
76
+ "hiddenAliases": [],
77
+ "id": "synonyms:export",
78
+ "pluginAlias": "@hesed/search",
79
+ "pluginName": "@hesed/search",
80
+ "pluginType": "core",
81
+ "strict": true,
82
+ "enableJsonFlag": false,
83
+ "isESM": true,
84
+ "relativePath": [
85
+ "dist",
86
+ "commands",
87
+ "synonyms",
88
+ "export.js"
89
+ ]
90
+ },
91
+ "synonyms:import": {
92
+ "aliases": [],
93
+ "args": {
94
+ "file": {
95
+ "description": "Path to a JSON synonyms file to import",
96
+ "name": "file",
97
+ "required": true
98
+ }
99
+ },
100
+ "description": "Import synonyms from a JSON file",
101
+ "examples": [
102
+ "<%= config.bin %> synonyms import ./synonyms.json",
103
+ "<%= config.bin %> synonyms import ./synonyms.json --merge"
104
+ ],
105
+ "flags": {
106
+ "merge": {
107
+ "description": "Merge with existing synonyms instead of replacing them",
108
+ "name": "merge",
109
+ "allowNo": false,
110
+ "type": "boolean"
111
+ }
112
+ },
113
+ "hasDynamicHelp": false,
114
+ "hiddenAliases": [],
115
+ "id": "synonyms:import",
116
+ "pluginAlias": "@hesed/search",
117
+ "pluginName": "@hesed/search",
118
+ "pluginType": "core",
119
+ "strict": true,
120
+ "enableJsonFlag": false,
121
+ "isESM": true,
122
+ "relativePath": [
123
+ "dist",
124
+ "commands",
125
+ "synonyms",
126
+ "import.js"
127
+ ]
59
128
  }
60
129
  },
61
- "version": "0.2.0"
130
+ "version": "0.2.2"
62
131
  }
package/package.json CHANGED
@@ -1,16 +1,15 @@
1
1
  {
2
2
  "name": "@hesed/search",
3
3
  "description": "Intelligence search plugin",
4
- "version": "0.2.0",
4
+ "version": "0.2.2",
5
5
  "author": "Hesed",
6
6
  "bin": {
7
7
  "permission": "./bin/run.js"
8
8
  },
9
9
  "bugs": "https://github.com/hesedcasa/search/issues",
10
10
  "dependencies": {
11
- "@huggingface/transformers": "^4.2.0",
12
- "@leeoniya/ufuzzy": "^1.0.19",
13
- "@oclif/core": "^4"
11
+ "@oclif/core": "^4",
12
+ "flexsearch": "^0.8.212"
14
13
  },
15
14
  "devDependencies": {
16
15
  "@eslint/compat": "^1",
@@ -1,22 +0,0 @@
1
- import { type CommandEmbedder } from '../search-logic.js';
2
- export type ModelLoadProgress = {
3
- file?: string;
4
- loaded?: number;
5
- model?: string;
6
- name?: string;
7
- progress?: number;
8
- status: string;
9
- task?: string;
10
- total?: number;
11
- };
12
- export declare class MiniLMCommandEmbedder implements CommandEmbedder {
13
- private readonly options;
14
- private extractorPromise;
15
- constructor(options?: {
16
- onLoadProgress?: (progress: ModelLoadProgress) => void;
17
- });
18
- embed(texts: string[]): Promise<number[][]>;
19
- private getExtractor;
20
- }
21
- export declare function isMiniLMModelCached(): boolean;
22
- export declare function getMiniLMModelCachePath(): string;
@@ -1,49 +0,0 @@
1
- import { existsSync } from 'node:fs';
2
- import { createRequire } from 'node:module';
3
- import path from 'node:path';
4
- const MINILM_MODEL = 'Xenova/paraphrase-MiniLM-L3-v2';
5
- const MODEL_CACHE_FILES = ['config.json', 'tokenizer.json', 'tokenizer_config.json', 'onnx/model.onnx'];
6
- // eslint-disable-next-line no-new-func
7
- const importTransformers = new Function('specifier', 'return import(specifier)');
8
- const require = createRequire(import.meta.url);
9
- export class MiniLMCommandEmbedder {
10
- options;
11
- extractorPromise;
12
- constructor(options = {}) {
13
- this.options = options;
14
- }
15
- async embed(texts) {
16
- if (texts.length === 0)
17
- return [];
18
- const extractor = await this.getExtractor();
19
- const output = await extractor(texts, { normalize: true, pooling: 'mean' });
20
- return tensorToRows(output.data, output.dims);
21
- }
22
- async getExtractor() {
23
- this.extractorPromise ??= importTransformers('@huggingface/transformers').then(async ({ pipeline }) => {
24
- const pipelineOptions = {};
25
- // eslint-disable-next-line camelcase
26
- pipelineOptions.progress_callback = this.options.onLoadProgress;
27
- const extractor = await pipeline('feature-extraction', MINILM_MODEL, pipelineOptions);
28
- return extractor;
29
- });
30
- return this.extractorPromise;
31
- }
32
- }
33
- export function isMiniLMModelCached() {
34
- const cachePath = getMiniLMModelCachePath();
35
- return MODEL_CACHE_FILES.every((file) => existsSync(path.join(cachePath, file)));
36
- }
37
- export function getMiniLMModelCachePath() {
38
- const entrypointPath = require.resolve('@huggingface/transformers');
39
- const packagePath = path.dirname(path.dirname(entrypointPath));
40
- return path.join(packagePath, '.cache', ...MINILM_MODEL.split('/'));
41
- }
42
- function tensorToRows(data, dims) {
43
- const rows = dims[0] ?? 0;
44
- const columns = dims[1] ?? data.length;
45
- return Array.from({ length: rows }, (_, row) => {
46
- const rowData = data.slice(row * columns, (row + 1) * columns);
47
- return Array.isArray(rowData) ? rowData : [...rowData];
48
- });
49
- }