xindex 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.ai/research/2026-04-10-file-watching.md +79 -0
- package/.ai/research/2026-04-10-mcp-output-format.md +129 -0
- package/.ai/task/INDEX.md +12 -0
- package/.ai/task/done/INDEX.md +3 -0
- package/.ai/task/done/task.2026-04-09-local-ai-research-protos.log.md +98 -0
- package/.ai/task/done/task.2026-04-09-local-ai-research-protos.md +102 -0
- package/.ai/task/task.2026-04-10-cluster-config.log.md +19 -0
- package/.ai/task/task.2026-04-10-cluster-config.md +118 -0
- package/.ai/task/task.2026-04-10-dir-indexing.log.md +8 -0
- package/.ai/task/task.2026-04-10-dir-indexing.md +92 -0
- package/.ai/task/task.2026-04-10-line-clustering.log.md +50 -0
- package/.ai/task/task.2026-04-10-line-clustering.md +176 -0
- package/.ai/task/task.2026-04-10-object-store.log.md +7 -0
- package/.ai/task/task.2026-04-10-object-store.md +81 -0
- package/.ai/task/task.2026-04-10-search-config.log.md +46 -0
- package/.ai/task/task.2026-04-10-search-config.md +274 -0
- package/.ai/task/task.2026-04-10-watch-indexing.log.md +32 -0
- package/.ai/task/task.2026-04-10-watch-indexing.md +101 -0
- package/.ai/task/task.2026-04-10-xindex-mcp.log.md +5 -0
- package/.ai/task/task.2026-04-10-xindex-mcp.md +92 -0
- package/.ai/task/task.2026-04-10-xindex-mcp.report.md +113 -0
- package/.claude/settings.local.json +73 -0
- package/.claude/skills/make-hof/SKILL.md +8 -0
- package/.claude/skills/make-hof/playbook.md +38 -0
- package/.cursor/mcp.json +8 -0
- package/.mcp.json +8 -0
- package/.xindex.json +22 -0
- package/CLAUDE.md +54 -0
- package/README.md +206 -0
- package/apps/indexApp.ts +31 -0
- package/apps/mcpApp.ts +119 -0
- package/apps/run.index.ts +19 -0
- package/apps/run.mcp.ts +49 -0
- package/apps/run.reset.ts +10 -0
- package/apps/run.search.ts +21 -0
- package/apps/run.watch.ts +44 -0
- package/apps/searchApp.ts +9 -0
- package/apps/watchApp.ts +53 -0
- package/apps/watchFileEventsApp.ts +39 -0
- package/bin/xindex-index +2 -0
- package/bin/xindex-mcp +2 -0
- package/bin/xindex-reset +2 -0
- package/bin/xindex-search +2 -0
- package/bin/xindex-watch +2 -0
- package/componets/IType.ts +1 -0
- package/componets/appId.ts +3 -0
- package/componets/buildComponents.ts +27 -0
- package/componets/config/loadConfig.ts +43 -0
- package/componets/config/xindexConfig.ts +4 -0
- package/componets/index/contentIndexDriver.ts +39 -0
- package/componets/index/formatSearchResults.ts +18 -0
- package/componets/index/getIndexStats.ts +11 -0
- package/componets/index/handleFileEvent.ts +25 -0
- package/componets/index/indexApi.ts +45 -0
- package/componets/index/vectraIndex.ts +11 -0
- package/componets/index/watcherLock.ts +107 -0
- package/componets/keywords/cleanUpKeywords.ts +38 -0
- package/componets/keywords/extractKeywords.ts +14 -0
- package/componets/keywords/refineKeywords.ts +16 -0
- package/componets/llm/embed.ts +18 -0
- package/componets/llm/queryLLM.ts +20 -0
- package/componets/logger.ts +34 -0
- package/componets/walkFiles.ts +51 -0
- package/componets/watchFiles.ts +106 -0
- package/features/indexContent.ts +16 -0
- package/features/removeContent.ts +9 -0
- package/features/resetIndex.ts +9 -0
- package/features/searchIndex.ts +33 -0
- package/package.json +32 -0
- package/packages/fun/src/IType.ts +5 -0
- package/packages/fun/src/array-finder.ts +55 -0
- package/packages/fun/src/array-index.ts +35 -0
- package/packages/fun/src/array.ts +112 -0
- package/packages/fun/src/assert.ts +5 -0
- package/packages/fun/src/asyncRequest.ts +35 -0
- package/packages/fun/src/callsites.ts +18 -0
- package/packages/fun/src/case-never.ts +9 -0
- package/packages/fun/src/casting.ts +41 -0
- package/packages/fun/src/collect.ts +13 -0
- package/packages/fun/src/concurrency.ts +186 -0
- package/packages/fun/src/container.ts +86 -0
- package/packages/fun/src/counter.ts +45 -0
- package/packages/fun/src/create-map.ts +2 -0
- package/packages/fun/src/dedupe.ts +2 -0
- package/packages/fun/src/defer.ts +55 -0
- package/packages/fun/src/delay.ts +5 -0
- package/packages/fun/src/discriminate.ts +34 -0
- package/packages/fun/src/enum-values.ts +12 -0
- package/packages/fun/src/exponential-backoff.ts +20 -0
- package/packages/fun/src/flatten.ts +11 -0
- package/packages/fun/src/hash.ts +67 -0
- package/packages/fun/src/hash128.ts +6 -0
- package/packages/fun/src/hash256.ts +6 -0
- package/packages/fun/src/hub.ts +53 -0
- package/packages/fun/src/id.ts +10 -0
- package/packages/fun/src/interval.ts +76 -0
- package/packages/fun/src/is-non-nullable.ts +2 -0
- package/packages/fun/src/isIterable.ts +3 -0
- package/packages/fun/src/mailbox.ts +13 -0
- package/packages/fun/src/map-record.ts +19 -0
- package/packages/fun/src/match-collections.ts +57 -0
- package/packages/fun/src/match-left-and-right-arrays.ts +78 -0
- package/packages/fun/src/mem.ts +26 -0
- package/packages/fun/src/memos.ts +28 -0
- package/packages/fun/src/normalizeError.ts +25 -0
- package/packages/fun/src/nothing.ts +3 -0
- package/packages/fun/src/pipe.ts +18 -0
- package/packages/fun/src/prettyJson.ts +3 -0
- package/packages/fun/src/project.ts +8 -0
- package/packages/fun/src/promise.ts +27 -0
- package/packages/fun/src/pubsub.ts +128 -0
- package/packages/fun/src/randomId.ts +14 -0
- package/packages/fun/src/regexp-escape.ts +13 -0
- package/packages/fun/src/retry.ts +15 -0
- package/packages/fun/src/serial.test.ts +107 -0
- package/packages/fun/src/serial.ts +17 -0
- package/packages/fun/src/sleep.ts +3 -0
- package/packages/fun/src/sort-object.ts +46 -0
- package/packages/fun/src/speed-test.ts +56 -0
- package/packages/fun/src/tick.ts +37 -0
- package/packages/fun/src/time-behavior.ts +50 -0
- package/packages/fun/src/time.ts +22 -0
- package/packages/fun/src/timedFallback.ts +37 -0
- package/packages/fun/src/timer.ts +30 -0
- package/packages/fun/src/value.ts +33 -0
- package/packages/fun/src/waitForCounter.ts +15 -0
- package/packages/streamx/src/batch.ts +23 -0
- package/packages/streamx/src/batchTimed.ts +113 -0
- package/packages/streamx/src/buffer.ts +72 -0
- package/packages/streamx/src/concatenate.ts +33 -0
- package/packages/streamx/src/filter.ts +14 -0
- package/packages/streamx/src/flat.ts +19 -0
- package/packages/streamx/src/flatMap.ts +9 -0
- package/packages/streamx/src/from.ts +30 -0
- package/packages/streamx/src/index.ts +49 -0
- package/packages/streamx/src/interval.ts +58 -0
- package/packages/streamx/src/loop.ts +8 -0
- package/packages/streamx/src/map.ts +12 -0
- package/packages/streamx/src/merge.ts +89 -0
- package/packages/streamx/src/nodeReadable.ts +6 -0
- package/packages/streamx/src/nodeTransform.ts +9 -0
- package/packages/streamx/src/nodeWritable.ts +38 -0
- package/packages/streamx/src/objectReader.ts +16 -0
- package/packages/streamx/src/polyfill.ts +20 -0
- package/packages/streamx/src/reader.ts +38 -0
- package/packages/streamx/src/reduce.ts +15 -0
- package/packages/streamx/src/scale.ts +93 -0
- package/packages/streamx/src/scaleSync.ts +13 -0
- package/packages/streamx/src/sequence.ts +7 -0
- package/packages/streamx/src/tap.ts +9 -0
- package/packages/streamx/src/toArray.ts +9 -0
- package/packages/streamx/src/writer.ts +96 -0
- package/rnd/hf.ts +14 -0
- package/rnd/keywords-compromise.ts +18 -0
- package/rnd/keywords-pipeline.ts +79 -0
- package/rnd/keywords.ts +38 -0
- package/rnd/test-vectra-memory.ts +63 -0
- package/rnd/vectra-keywords.ts +95 -0
- package/rnd/vectra.ts +50 -0
- package/tsconfig.json +14 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { Defer } from '@handy/fun/defer';
|
|
2
|
+
import { reader } from './reader';
|
|
3
|
+
import { StreamX } from './index';
|
|
4
|
+
|
|
5
|
+
export type IInterval = StreamX<number> & {
|
|
6
|
+
stop: () => void;
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
export function interval(ms: number, startImmediate = false): IInterval {
|
|
10
|
+
let point = startImmediate ? null : Date.now();
|
|
11
|
+
|
|
12
|
+
let delayDefer = Defer<void>();
|
|
13
|
+
delayDefer.resolve();
|
|
14
|
+
|
|
15
|
+
let delayTimer: any;
|
|
16
|
+
let stopped = false;
|
|
17
|
+
|
|
18
|
+
const intervalStream = reader<number>(async () => {
|
|
19
|
+
if (stopped) {
|
|
20
|
+
return reader.DONE;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const now = Date.now();
|
|
24
|
+
|
|
25
|
+
if (!point) {
|
|
26
|
+
point = now;
|
|
27
|
+
return now;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const xDelay = now - point;
|
|
31
|
+
|
|
32
|
+
if (xDelay < ms) {
|
|
33
|
+
delayDefer = Defer<void>();
|
|
34
|
+
const delayTimeout = ms - xDelay;
|
|
35
|
+
delayTimer = setTimeout(() => delayDefer.resolve(), delayTimeout);
|
|
36
|
+
await delayDefer.promise;
|
|
37
|
+
|
|
38
|
+
if (stopped) {
|
|
39
|
+
return reader.DONE;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
point = Date.now();
|
|
44
|
+
|
|
45
|
+
return point;
|
|
46
|
+
}) as IInterval;
|
|
47
|
+
|
|
48
|
+
intervalStream.stop = () => {
|
|
49
|
+
if (delayTimer) {
|
|
50
|
+
clearTimeout(delayTimer);
|
|
51
|
+
delayTimer = undefined;
|
|
52
|
+
}
|
|
53
|
+
stopped = true;
|
|
54
|
+
delayDefer.resolve();
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
return intervalStream;
|
|
58
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { Promised, StreamX, StreamXMapper } from './index';
|
|
2
|
+
|
|
3
|
+
export function map<Input, Output>(
|
|
4
|
+
mapper: (input: Input) => Promised<Output>
|
|
5
|
+
): StreamXMapper<Input, Output> {
|
|
6
|
+
return inputStream =>
|
|
7
|
+
(async function* mappedStream(): StreamX<Output> {
|
|
8
|
+
for await (const record of inputStream) {
|
|
9
|
+
yield await mapper(record);
|
|
10
|
+
}
|
|
11
|
+
})();
|
|
12
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { StreamX } from './index';
|
|
2
|
+
import { IRead, read } from './reader';
|
|
3
|
+
import { IWriter, Writer } from './writer';
|
|
4
|
+
import { syncTick } from '@handy/fun/tick';
|
|
5
|
+
|
|
6
|
+
export function merge<T1, T2, T3, T4>(
|
|
7
|
+
stream1: StreamX<T1>,
|
|
8
|
+
stream2: StreamX<T2>,
|
|
9
|
+
stream3: StreamX<T3>,
|
|
10
|
+
stream4: StreamX<T4>
|
|
11
|
+
): StreamX<T1 | T2 | T3 | T4>;
|
|
12
|
+
export function merge<T1, T2, T3>(
|
|
13
|
+
stream1: StreamX<T1>,
|
|
14
|
+
stream2: StreamX<T2>,
|
|
15
|
+
stream3: StreamX<T3>
|
|
16
|
+
): StreamX<T1 | T2 | T3>;
|
|
17
|
+
export function merge<T1, T2>(
|
|
18
|
+
stream1: StreamX<T1>,
|
|
19
|
+
stream2: StreamX<T2>
|
|
20
|
+
): StreamX<T1 | T2>;
|
|
21
|
+
export function merge<Type>(...streams: StreamX<any>[]): StreamX<Type> {
|
|
22
|
+
let outputBuffer: IWriter<Type>;
|
|
23
|
+
let readOutput: IRead<Type>;
|
|
24
|
+
let _error: Error | undefined = undefined;
|
|
25
|
+
|
|
26
|
+
async function finish() {
|
|
27
|
+
if (outputBuffer) {
|
|
28
|
+
await outputBuffer.finish();
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return {
|
|
33
|
+
[Symbol.asyncIterator](): AsyncIterator<Type> {
|
|
34
|
+
return {
|
|
35
|
+
async next(): Promise<IteratorResult<Type>> {
|
|
36
|
+
if (_error) {
|
|
37
|
+
throw _error;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (!outputBuffer) {
|
|
41
|
+
outputBuffer = Writer<Type>();
|
|
42
|
+
|
|
43
|
+
syncTick(async () => {
|
|
44
|
+
try {
|
|
45
|
+
await Promise.all(
|
|
46
|
+
streams.map(async stream => {
|
|
47
|
+
const readStream = read(stream);
|
|
48
|
+
while (true) {
|
|
49
|
+
if (_error) {
|
|
50
|
+
break;
|
|
51
|
+
}
|
|
52
|
+
const value = await readStream();
|
|
53
|
+
|
|
54
|
+
if (value === read.DONE) {
|
|
55
|
+
break;
|
|
56
|
+
} else {
|
|
57
|
+
await outputBuffer.write(value);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
})
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
await finish();
|
|
64
|
+
} catch (error) {
|
|
65
|
+
_error = error as Error;
|
|
66
|
+
}
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (!readOutput) {
|
|
71
|
+
readOutput = read<Type>(outputBuffer.stream);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const output = await readOutput();
|
|
75
|
+
|
|
76
|
+
if (_error) {
|
|
77
|
+
throw _error;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (output === read.DONE) {
|
|
81
|
+
return { done: true, value: undefined };
|
|
82
|
+
} else {
|
|
83
|
+
return { done: false, value: output };
|
|
84
|
+
}
|
|
85
|
+
},
|
|
86
|
+
};
|
|
87
|
+
},
|
|
88
|
+
};
|
|
89
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { of, StreamXMapper } from './index';
|
|
2
|
+
import { Readable, ReadableOptions, Transform } from 'stream';
|
|
3
|
+
|
|
4
|
+
export function nodeTransform<Input, Output>(
|
|
5
|
+
transform: Transform,
|
|
6
|
+
options: ReadableOptions = {}
|
|
7
|
+
): StreamXMapper<Input, Output> {
|
|
8
|
+
return inputStream => of(Readable.from(inputStream, options).pipe(transform));
|
|
9
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { StreamX, StreamXMapper } from './index';
|
|
2
|
+
import { Writable } from 'stream';
|
|
3
|
+
import { Defer } from '@handy/fun/defer';
|
|
4
|
+
|
|
5
|
+
export function nodeWritable<Type>(
|
|
6
|
+
writable: Writable,
|
|
7
|
+
encoding: BufferEncoding = 'utf-8'
|
|
8
|
+
): StreamXMapper<Type, Type> {
|
|
9
|
+
return inputStream =>
|
|
10
|
+
(async function* _nodeWritable(): StreamX<Type> {
|
|
11
|
+
const deferEnd = Defer<void>();
|
|
12
|
+
writable.on('error', error => deferEnd.reject(error));
|
|
13
|
+
|
|
14
|
+
try {
|
|
15
|
+
for await (const value of inputStream) {
|
|
16
|
+
const defer = Defer<void>();
|
|
17
|
+
writable.write(value, encoding, error => {
|
|
18
|
+
if (error) {
|
|
19
|
+
defer.reject(error);
|
|
20
|
+
} else {
|
|
21
|
+
defer.resolve();
|
|
22
|
+
}
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
await defer.promise;
|
|
26
|
+
yield value;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
writable.end(() => {
|
|
30
|
+
deferEnd.resolve();
|
|
31
|
+
});
|
|
32
|
+
} catch (error) {
|
|
33
|
+
deferEnd.reject(error as Error);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
await deferEnd.promise;
|
|
37
|
+
})();
|
|
38
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { Promised, StreamX } from './index';
|
|
2
|
+
import { reader } from './reader';
|
|
3
|
+
|
|
4
|
+
export function objectReader<T extends object | object[]>(
|
|
5
|
+
read: () => Promised<T | null | undefined | boolean | number>
|
|
6
|
+
): StreamX<T> {
|
|
7
|
+
return reader<T>(async () => {
|
|
8
|
+
const object = await read();
|
|
9
|
+
|
|
10
|
+
if (object instanceof Object) {
|
|
11
|
+
return object;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
return reader.DONE;
|
|
15
|
+
});
|
|
16
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
// polyfill
|
|
2
|
+
if (!Symbol) {
|
|
3
|
+
// @ts-ignore
|
|
4
|
+
|
|
5
|
+
Symbol = (description: string) => description;
|
|
6
|
+
|
|
7
|
+
// @ts-ignore
|
|
8
|
+
Symbol.for = key => key;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
if (!(Symbol as any).asyncIterator) {
|
|
12
|
+
// @ts-ignore
|
|
13
|
+
(Symbol as any).asyncIterator = Symbol.asyncIterator || Symbol('Symbol.asyncIterator');
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
if (!Symbol.iterator) {
|
|
17
|
+
(Symbol as any).iterator = Symbol.iterator || Symbol('Symbol.iterator');
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export const StreamXPolyfill = Symbol.for('Polyfill');
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { Promised, StreamX } from './index';
|
|
2
|
+
|
|
3
|
+
const DONE = Symbol('DONE');
|
|
4
|
+
|
|
5
|
+
export type IRead<T> = () => Promised<T | typeof DONE>;
|
|
6
|
+
|
|
7
|
+
export function reader<T>(read: IRead<T>): StreamX<T> {
|
|
8
|
+
return {
|
|
9
|
+
[Symbol.asyncIterator]() {
|
|
10
|
+
return {
|
|
11
|
+
async next() {
|
|
12
|
+
const value = await read();
|
|
13
|
+
return { done: value === reader.DONE, value };
|
|
14
|
+
},
|
|
15
|
+
} as AsyncIterator<T>;
|
|
16
|
+
},
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function read<T>(stream: StreamX<T>): IRead<T> {
|
|
21
|
+
let iterator: AsyncIterator<T>;
|
|
22
|
+
return async () => {
|
|
23
|
+
if (!iterator) {
|
|
24
|
+
iterator = stream[Symbol.asyncIterator]();
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const { done, value } = await iterator.next();
|
|
28
|
+
|
|
29
|
+
if (done) {
|
|
30
|
+
return reader.DONE;
|
|
31
|
+
} else {
|
|
32
|
+
return value;
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
read.DONE = DONE;
|
|
38
|
+
reader.DONE = DONE;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { Promised, StreamX, StreamXMapper } from './index';
|
|
2
|
+
|
|
3
|
+
export function reduce<Input, Accumulator>(
|
|
4
|
+
reducer: (accumulator: Accumulator, input: Input) => Promised<Accumulator>,
|
|
5
|
+
initial: Accumulator
|
|
6
|
+
): StreamXMapper<Input, Accumulator> {
|
|
7
|
+
return inputStream =>
|
|
8
|
+
(async function* reduced(): StreamX<Accumulator> {
|
|
9
|
+
let finalValue: Accumulator = initial;
|
|
10
|
+
for await (const input of inputStream) {
|
|
11
|
+
finalValue = await reducer(finalValue, input);
|
|
12
|
+
}
|
|
13
|
+
yield finalValue;
|
|
14
|
+
})();
|
|
15
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { Promised, StreamXMapper } from './index';
|
|
2
|
+
import { IRead, read } from './reader';
|
|
3
|
+
import { IWriter, Writer } from './writer';
|
|
4
|
+
import { Concurrency, IPublishToConcurrency } from '@handy/fun/concurrency';
|
|
5
|
+
import { syncTick } from '@handy/fun/tick';
|
|
6
|
+
|
|
7
|
+
export function scale<Input, Output>(
|
|
8
|
+
max: number,
|
|
9
|
+
mapper: (input: Input) => Promised<Output>
|
|
10
|
+
): StreamXMapper<Input, Output> {
|
|
11
|
+
let outputBuffer: IWriter<Output>;
|
|
12
|
+
let readInput: IRead<Input>;
|
|
13
|
+
let readOutput: IRead<Output>;
|
|
14
|
+
let concurrencyControl: IPublishToConcurrency<Input, Output>;
|
|
15
|
+
let _error: Error | undefined = undefined;
|
|
16
|
+
|
|
17
|
+
async function finish() {
|
|
18
|
+
if (concurrencyControl) {
|
|
19
|
+
await concurrencyControl.finish();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
if (outputBuffer) {
|
|
23
|
+
await outputBuffer.finish();
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return inputStream => {
|
|
28
|
+
return {
|
|
29
|
+
[Symbol.asyncIterator](): AsyncIterator<Output> {
|
|
30
|
+
return {
|
|
31
|
+
async next(): Promise<IteratorResult<Output>> {
|
|
32
|
+
if (_error) {
|
|
33
|
+
throw _error;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (!readInput) {
|
|
37
|
+
readInput = read<Input>(inputStream);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (!outputBuffer) {
|
|
41
|
+
outputBuffer = Writer<Output>();
|
|
42
|
+
|
|
43
|
+
if (!concurrencyControl) {
|
|
44
|
+
concurrencyControl = Concurrency<Input>(max, async input => {
|
|
45
|
+
try {
|
|
46
|
+
const output = await mapper(input);
|
|
47
|
+
await outputBuffer.write(output);
|
|
48
|
+
} catch (error) {
|
|
49
|
+
_error = error as Error;
|
|
50
|
+
finish();
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
syncTick(async () => {
|
|
56
|
+
while (true) {
|
|
57
|
+
try {
|
|
58
|
+
const inputValue = await readInput();
|
|
59
|
+
if (inputValue === read.DONE) {
|
|
60
|
+
await finish();
|
|
61
|
+
break;
|
|
62
|
+
} else {
|
|
63
|
+
await concurrencyControl(inputValue);
|
|
64
|
+
}
|
|
65
|
+
} catch (error) {
|
|
66
|
+
_error = error as Error;
|
|
67
|
+
await finish();
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (!readOutput) {
|
|
74
|
+
readOutput = read<Output>(outputBuffer.stream);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const output = await readOutput();
|
|
78
|
+
|
|
79
|
+
if (_error) {
|
|
80
|
+
throw _error;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (output === read.DONE) {
|
|
84
|
+
return { done: true, value: undefined };
|
|
85
|
+
} else {
|
|
86
|
+
return { done: false, value: output };
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
};
|
|
90
|
+
},
|
|
91
|
+
};
|
|
92
|
+
};
|
|
93
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { pipe, Promised, StreamXMapper } from './index';
|
|
2
|
+
import { batch } from './batch';
|
|
3
|
+
import { map } from './map';
|
|
4
|
+
import { flat } from './flat';
|
|
5
|
+
|
|
6
|
+
export function scaleSync<Input, Output>(
|
|
7
|
+
size: number,
|
|
8
|
+
mapper: (input: Input) => Promised<Output>
|
|
9
|
+
): StreamXMapper<Input, Output> {
|
|
10
|
+
return pipe(batch<Input>(size))
|
|
11
|
+
.pipe(map(values => Promise.all(values.map(mapper))))
|
|
12
|
+
.pipe(flat());
|
|
13
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { map } from './map';
|
|
2
|
+
import { Promised, StreamXMapper } from './index';
|
|
3
|
+
|
|
4
|
+
export function tap<Input>(fn: (input: Input) => Promised<any>): StreamXMapper<Input, Input> {
|
|
5
|
+
return map<Input, Input>(async (input): Promise<Input> => {
|
|
6
|
+
await fn(input);
|
|
7
|
+
return input;
|
|
8
|
+
});
|
|
9
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { Defer, IDefer } from '@handy/fun/defer';
|
|
2
|
+
import { of, StreamX } from './index';
|
|
3
|
+
import { reader } from './reader';
|
|
4
|
+
import { flat } from './flat';
|
|
5
|
+
|
|
6
|
+
export type IWriter<T> = {
|
|
7
|
+
stream: StreamX<T>;
|
|
8
|
+
write: (data: T) => Promise<void>;
|
|
9
|
+
finish: () => Promise<void>;
|
|
10
|
+
length(): number;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
export function Writer<T>(bufferSize = 1): IWriter<T> {
|
|
14
|
+
let records: T[] = [];
|
|
15
|
+
|
|
16
|
+
let weWaitUntilRecordsConsumed: IDefer<void> | undefined = undefined;
|
|
17
|
+
let weWaitForRecordsToRead: IDefer<void> | undefined = undefined;
|
|
18
|
+
|
|
19
|
+
let finishing = false;
|
|
20
|
+
|
|
21
|
+
const read = async (): Promise<T[] | typeof reader.DONE> => {
|
|
22
|
+
if (!records.length) {
|
|
23
|
+
if (finishing) {
|
|
24
|
+
return reader.DONE;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (!weWaitForRecordsToRead) {
|
|
28
|
+
weWaitForRecordsToRead = Defer<void>();
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// waiting to read
|
|
32
|
+
await weWaitForRecordsToRead.promise;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const currentRecords = [...records];
|
|
36
|
+
records = [];
|
|
37
|
+
|
|
38
|
+
if (weWaitUntilRecordsConsumed) {
|
|
39
|
+
weWaitUntilRecordsConsumed.resolve();
|
|
40
|
+
weWaitUntilRecordsConsumed = undefined;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (finishing) {
|
|
44
|
+
if (currentRecords.length) {
|
|
45
|
+
return currentRecords;
|
|
46
|
+
} else {
|
|
47
|
+
return reader.DONE;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return currentRecords;
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
async write(data: T) {
|
|
56
|
+
if (finishing) {
|
|
57
|
+
throw new Error(`Buffer is finishing, impossible to write`);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
records.push(data);
|
|
61
|
+
|
|
62
|
+
if (weWaitForRecordsToRead) {
|
|
63
|
+
weWaitForRecordsToRead.resolve();
|
|
64
|
+
weWaitForRecordsToRead = undefined;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (records.length >= bufferSize) {
|
|
68
|
+
if (!weWaitUntilRecordsConsumed) {
|
|
69
|
+
weWaitUntilRecordsConsumed = Defer<void>();
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
await weWaitUntilRecordsConsumed.promise;
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
|
|
76
|
+
async finish() {
|
|
77
|
+
finishing = true;
|
|
78
|
+
|
|
79
|
+
if (weWaitForRecordsToRead) {
|
|
80
|
+
weWaitForRecordsToRead.resolve();
|
|
81
|
+
weWaitForRecordsToRead = undefined;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (weWaitUntilRecordsConsumed) {
|
|
85
|
+
await weWaitUntilRecordsConsumed.promise;
|
|
86
|
+
weWaitUntilRecordsConsumed = undefined;
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
|
|
90
|
+
length(): number {
|
|
91
|
+
return records.length;
|
|
92
|
+
},
|
|
93
|
+
|
|
94
|
+
stream: of(reader(read)).pipe(flat()),
|
|
95
|
+
};
|
|
96
|
+
}
|
package/rnd/hf.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { pipeline } from "@huggingface/transformers";
|
|
2
|
+
|
|
3
|
+
const generator = await pipeline(
|
|
4
|
+
"text-generation",
|
|
5
|
+
"HuggingFaceTB/SmolLM2-135M-Instruct"
|
|
6
|
+
);
|
|
7
|
+
|
|
8
|
+
const messages = [
|
|
9
|
+
{ role: "system", content: "You are a helpful assistant." },
|
|
10
|
+
{ role: "user", content: "Who is Microsoft?" },
|
|
11
|
+
];
|
|
12
|
+
|
|
13
|
+
const output = await generator(messages, { max_new_tokens: 64 });
|
|
14
|
+
console.log(output[0].generated_text.at(-1).content);
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import nlp from "compromise";
|
|
2
|
+
import { readFile } from "fs/promises";
|
|
3
|
+
|
|
4
|
+
const filePath = process.argv[2];
|
|
5
|
+
if (!filePath) {
|
|
6
|
+
console.error("Usage: npx tsx keywords-compromise.ts <file>");
|
|
7
|
+
process.exit(1);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
const text = await readFile(filePath, "utf8");
|
|
11
|
+
const doc = nlp(text);
|
|
12
|
+
|
|
13
|
+
console.log(`Keywords from: ${filePath}\n`);
|
|
14
|
+
console.log("Topics:", doc.topics().out("array"));
|
|
15
|
+
console.log("\nNouns:", doc.nouns().out("array"));
|
|
16
|
+
console.log("\nVerbs:", doc.verbs().out("array"));
|
|
17
|
+
console.log("\nPeople:", doc.people().out("array"));
|
|
18
|
+
console.log("\nOrganizations:", doc.organizations().out("array"));
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import nlp from "compromise";
|
|
2
|
+
import {createRequire} from "module";
|
|
3
|
+
import {readFile} from "fs/promises";
|
|
4
|
+
import {pipeline} from "@huggingface/transformers";
|
|
5
|
+
|
|
6
|
+
const require = createRequire(import.meta.url);
|
|
7
|
+
const keyword_extractor = require("keyword-extractor");
|
|
8
|
+
|
|
9
|
+
const generator = await pipeline(
|
|
10
|
+
"text-generation",
|
|
11
|
+
"HuggingFaceTB/SmolLM2-135M-Instruct"
|
|
12
|
+
);
|
|
13
|
+
|
|
14
|
+
export async function llm(input: string): Promise<string> {
|
|
15
|
+
const prompt = "Extract and list the most important keywords from the following text. Return only keywords separated by commas.";
|
|
16
|
+
|
|
17
|
+
const messages = [
|
|
18
|
+
{role: "system", content: prompt},
|
|
19
|
+
{role: "user", content: input},
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
const output = await generator(messages, {max_new_tokens: 128});
|
|
23
|
+
return output[0].generated_text.at(-1).content;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const filePath = process.argv[2];
|
|
27
|
+
if (!filePath) {
|
|
28
|
+
console.error("Usage: npx tsx keywords-pipeline.ts <file>");
|
|
29
|
+
process.exit(1);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Step 1: Read file
|
|
33
|
+
const text = await readFile(filePath, "utf8");
|
|
34
|
+
console.log("=== Step 1: Read file ===");
|
|
35
|
+
console.log(`${text.length} chars\n`);
|
|
36
|
+
|
|
37
|
+
// Step 2: Compromise — extract nouns, verbs, topics
|
|
38
|
+
const doc = nlp(text);
|
|
39
|
+
const nouns = doc.nouns().out("array") as string[];
|
|
40
|
+
const verbs = doc.verbs().out("array") as string[];
|
|
41
|
+
const topics = doc.topics().out("array") as string[];
|
|
42
|
+
const combined = [...topics, ...nouns, ...verbs].join(" ");
|
|
43
|
+
console.log("=== Step 2: Compromise ===");
|
|
44
|
+
console.log(`${topics.length} topics, ${nouns.length} nouns, ${verbs.length} verbs\n`);
|
|
45
|
+
|
|
46
|
+
// Step 3: Regex — replace all non-word chars with space
|
|
47
|
+
const cleaned = combined.replace(/\W+/g, " ").trim();
|
|
48
|
+
console.log("=== Step 3: Regex cleanup ===");
|
|
49
|
+
console.log(cleaned.slice(0, 200), "\n");
|
|
50
|
+
|
|
51
|
+
// Step 4: LLM — pass cleaned text to local model
|
|
52
|
+
const llmResult = await llm(cleaned);
|
|
53
|
+
console.log("=== Step 4: LLM ===");
|
|
54
|
+
console.log(llmResult, "\n");
|
|
55
|
+
|
|
56
|
+
// Step 5: keyword-extractor
|
|
57
|
+
const keywords: string[] = keyword_extractor.extract(cleaned, {
|
|
58
|
+
language: "english",
|
|
59
|
+
remove_digits: false,
|
|
60
|
+
return_changed_case: true,
|
|
61
|
+
remove_duplicates: true,
|
|
62
|
+
return_max_ngrams: 3,
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
// Score by frequency
|
|
66
|
+
const lower = cleaned.toLowerCase();
|
|
67
|
+
const scored = keywords
|
|
68
|
+
.filter((kw) => kw.length > 2)
|
|
69
|
+
.map((kw) => {
|
|
70
|
+
const re = new RegExp(`\\b${kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "gi");
|
|
71
|
+
const count = (lower.match(re) || []).length;
|
|
72
|
+
return {keyword: kw, count};
|
|
73
|
+
});
|
|
74
|
+
scored.sort((a, b) => b.count - a.count);
|
|
75
|
+
|
|
76
|
+
console.log("=== Step 5: Keywords ===");
|
|
77
|
+
for (const {keyword, count} of scored) {
|
|
78
|
+
console.log(` ${keyword.padEnd(35)} (${count}x)`);
|
|
79
|
+
}
|