glost-processor 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/stream-processor.d.ts +145 -0
- package/dist/stream-processor.d.ts.map +1 -0
- package/dist/stream-processor.js +245 -0
- package/dist/stream-processor.js.map +1 -0
- package/package.json +3 -3
- package/src/__benchmarks__/stream-processor.bench.ts +250 -0
- package/src/__tests__/processor.test.ts +992 -0
- package/src/__tests__/stream-processor.test.ts +574 -0
- package/src/index.ts +6 -0
- package/src/stream-processor.ts +391 -0
package/dist/index.d.ts
CHANGED
|
@@ -22,6 +22,8 @@
|
|
|
22
22
|
export { GLOSTProcessor } from "./processor.js";
|
|
23
23
|
export type { FrozenProcessor } from "./processor.js";
|
|
24
24
|
export type { Plugin, PluginSpec, Preset, ProcessorOptions, ProcessingResult, ProcessingError, ProcessingWarning, ProcessingStats, BeforeHook, AfterHook, ErrorHook, SkipHook, ProgressHook, ProgressStats, } from "./types.js";
|
|
25
|
+
export { GLOSTStreamProcessor } from "./stream-processor.js";
|
|
26
|
+
export type { FrozenStreamProcessor, StreamOptions, ProcessedChunk, } from "./stream-processor.js";
|
|
25
27
|
import { GLOSTProcessor } from "./processor.js";
|
|
26
28
|
import type { ProcessorOptions } from "./types.js";
|
|
27
29
|
/**
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAChD,YAAY,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACtD,YAAY,EACV,MAAM,EACN,UAAU,EACV,MAAM,EACN,gBAAgB,EAChB,gBAAgB,EAChB,eAAe,EACf,iBAAiB,EACjB,eAAe,EACf,UAAU,EACV,SAAS,EACT,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,aAAa,GACd,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAChD,YAAY,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACtD,YAAY,EACV,MAAM,EACN,UAAU,EACV,MAAM,EACN,gBAAgB,EAChB,gBAAgB,EAChB,eAAe,EACf,iBAAiB,EACjB,eAAe,EACf,UAAU,EACV,SAAS,EACT,SAAS,EACT,QAAQ,EACR,YAAY,EACZ,aAAa,GACd,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,YAAY,EACV,qBAAqB,EACrB,aAAa,EACb,cAAc,GACf,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAEnD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAgB,KAAK,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,cAAc,CAEhE"}
|
package/dist/index.js
CHANGED
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAkBhD,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAO7D,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAGhD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,MAAM,UAAU,KAAK,CAAC,OAA0B;IAC9C,OAAO,IAAI,cAAc,CAAC,OAAO,CAAC,CAAC;AACrC,CAAC"}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GLOST Stream Processor
|
|
3
|
+
*
|
|
4
|
+
* Streaming variant of GLOSTProcessor that yields processed sentence
|
|
5
|
+
* batches progressively using AsyncGenerator. Keeps the full document
|
|
6
|
+
* out of memory between chunks.
|
|
7
|
+
*
|
|
8
|
+
* Document-level transforms (extensions with streamingSupport !== 'chunk')
|
|
9
|
+
* run once on the full document before streaming begins. Chunk-compatible
|
|
10
|
+
* extensions (streamingSupport === 'chunk') then run on each batch.
|
|
11
|
+
*
|
|
12
|
+
* @packageDocumentation
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```typescript
|
|
16
|
+
* import { GLOSTStreamProcessor } from "glost-processor";
|
|
17
|
+
*
|
|
18
|
+
* const processor = new GLOSTStreamProcessor()
|
|
19
|
+
* .use(transcription)
|
|
20
|
+
* .use(translation);
|
|
21
|
+
*
|
|
22
|
+
* for await (const chunk of processor.stream(document)) {
|
|
23
|
+
* console.log(chunk.sentences, chunk.isLast);
|
|
24
|
+
* }
|
|
25
|
+
* ```
|
|
26
|
+
*
|
|
27
|
+
* @since 0.7.0
|
|
28
|
+
*/
|
|
29
|
+
import type { GLOSTRoot, GLOSTSentence } from "glost-core";
|
|
30
|
+
import type { PluginSpec, Preset, ProcessorOptions } from "./types.js";
|
|
31
|
+
/**
|
|
32
|
+
* Options for stream() method
|
|
33
|
+
*
|
|
34
|
+
* @since 0.7.0
|
|
35
|
+
*/
|
|
36
|
+
export interface StreamOptions {
|
|
37
|
+
/**
|
|
38
|
+
* Number of sentences per chunk.
|
|
39
|
+
*
|
|
40
|
+
* A smaller value reduces latency to first yielded chunk; a larger
|
|
41
|
+
* value amortises per-chunk overhead. Default: 50.
|
|
42
|
+
*/
|
|
43
|
+
batchSize?: number;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* A single yielded chunk from the stream
|
|
47
|
+
*
|
|
48
|
+
* @since 0.7.0
|
|
49
|
+
*/
|
|
50
|
+
export interface ProcessedChunk {
|
|
51
|
+
/** Processed sentences in this batch */
|
|
52
|
+
sentences: GLOSTSentence[];
|
|
53
|
+
/**
|
|
54
|
+
* Index of the source paragraph this chunk came from.
|
|
55
|
+
*
|
|
56
|
+
* When the document has multiple paragraphs each paragraph is
|
|
57
|
+
* chunked independently, so multiple consecutive chunks may share
|
|
58
|
+
* the same paragraphIndex.
|
|
59
|
+
*/
|
|
60
|
+
paragraphIndex: number;
|
|
61
|
+
/**
|
|
62
|
+
* Index of this chunk within its paragraph (0-based).
|
|
63
|
+
*/
|
|
64
|
+
chunkIndex: number;
|
|
65
|
+
/** True for the final chunk across the whole document */
|
|
66
|
+
isLast: boolean;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Streaming processor for GLOST documents
|
|
70
|
+
*
|
|
71
|
+
* Mirrors the `GLOSTProcessor` API (`.use()`, `.freeze()`) but adds a
|
|
72
|
+
* `.stream()` method that returns an `AsyncGenerator<ProcessedChunk>`.
|
|
73
|
+
*
|
|
74
|
+
* @since 0.7.0
|
|
75
|
+
*/
|
|
76
|
+
export declare class GLOSTStreamProcessor {
|
|
77
|
+
private plugins;
|
|
78
|
+
private options;
|
|
79
|
+
private frozen;
|
|
80
|
+
/**
|
|
81
|
+
* Create a new stream processor instance
|
|
82
|
+
*
|
|
83
|
+
* @param options - Initial processor options
|
|
84
|
+
*/
|
|
85
|
+
constructor(options?: ProcessorOptions);
|
|
86
|
+
/**
|
|
87
|
+
* Add a plugin, preset, or extension to the pipeline
|
|
88
|
+
*
|
|
89
|
+
* @param spec - Plugin function, extension object, preset, or ID
|
|
90
|
+
* @param options - Plugin options
|
|
91
|
+
* @returns This processor for chaining
|
|
92
|
+
*/
|
|
93
|
+
use(spec: PluginSpec | Preset, options?: unknown): this;
|
|
94
|
+
/**
|
|
95
|
+
* Freeze the processor
|
|
96
|
+
*
|
|
97
|
+
* Returns a frozen processor that cannot be modified. Useful for
|
|
98
|
+
* reusing the same pipeline configuration across multiple documents.
|
|
99
|
+
*
|
|
100
|
+
* @returns A frozen copy of this processor
|
|
101
|
+
*/
|
|
102
|
+
freeze(): FrozenStreamProcessor;
|
|
103
|
+
/**
|
|
104
|
+
* Stream a document as progressive sentence batches
|
|
105
|
+
*
|
|
106
|
+
* Processing phases:
|
|
107
|
+
* 1. All extensions with `streamingSupport !== 'chunk'` (i.e. `'none'`
|
|
108
|
+
* or `'full'`, or unset) run their `transform`, `visit`, and
|
|
109
|
+
* `enhanceMetadata` hooks on the **full** document.
|
|
110
|
+
* 2. The resulting document is split into sentence batches.
|
|
111
|
+
* 3. For each batch, extensions with `streamingSupport === 'chunk'`
|
|
112
|
+
* run their `visit` and `enhanceMetadata` hooks.
|
|
113
|
+
* 4. A `ProcessedChunk` is yielded.
|
|
114
|
+
*
|
|
115
|
+
* Cancellation: break out of the `for await` loop at any time. The
|
|
116
|
+
* generator will stop without processing remaining chunks.
|
|
117
|
+
*
|
|
118
|
+
* @param document - GLOST document to stream
|
|
119
|
+
* @param streamOptions - Streaming options (batchSize etc.)
|
|
120
|
+
* @yields `ProcessedChunk` objects in document order
|
|
121
|
+
*
|
|
122
|
+
* @example
|
|
123
|
+
* ```typescript
|
|
124
|
+
* for await (const chunk of processor.stream(doc, { batchSize: 20 })) {
|
|
125
|
+
* console.log(`para ${chunk.paragraphIndex} chunk ${chunk.chunkIndex}`);
|
|
126
|
+
* if (chunk.isLast) console.log("done");
|
|
127
|
+
* }
|
|
128
|
+
* ```
|
|
129
|
+
*/
|
|
130
|
+
stream(document: GLOSTRoot, streamOptions?: StreamOptions): AsyncGenerator<ProcessedChunk>;
|
|
131
|
+
private usePreset;
|
|
132
|
+
private resolveExtensions;
|
|
133
|
+
private resolvePlugin;
|
|
134
|
+
private isPreset;
|
|
135
|
+
private assertNotFrozen;
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* A frozen `GLOSTStreamProcessor` that cannot be modified.
|
|
139
|
+
*
|
|
140
|
+
* @since 0.7.0
|
|
141
|
+
*/
|
|
142
|
+
export type FrozenStreamProcessor = Omit<GLOSTStreamProcessor, "use" | "freeze"> & {
|
|
143
|
+
readonly frozen: true;
|
|
144
|
+
};
|
|
145
|
+
//# sourceMappingURL=stream-processor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stream-processor.d.ts","sourceRoot":"","sources":["../src/stream-processor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAO3D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAMvE;;;;GAIG;AACH,MAAM,WAAW,aAAa;IAC5B;;;;;OAKG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;GAIG;AACH,MAAM,WAAW,cAAc;IAC7B,wCAAwC;IACxC,SAAS,EAAE,aAAa,EAAE,CAAC;IAE3B;;;;;;OAMG;IACH,cAAc,EAAE,MAAM,CAAC;IAEvB;;OAEG;IACH,UAAU,EAAE,MAAM,CAAC;IAEnB,yDAAyD;IACzD,MAAM,EAAE,OAAO,CAAC;CACjB;AAMD;;;;;;;GAOG;AACH,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,OAAO,CAAsD;IACrE,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,MAAM,CAAS;IAEvB;;;;OAIG;gBACS,OAAO,GAAE,gBAAqB;IAI1C;;;;;;OAMG;IACH,GAAG,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,IAAI;IAWvD;;;;;;;OAOG;IACH,MAAM,IAAI,qBAAqB;IAO/B;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACI,MAAM,CACX,QAAQ,EAAE,SAAS,EACnB,aAAa,CAAC,EAAE,aAAa,GAC5B,cAAc,CAAC,cAAc,CAAC;IAiGjC,OAAO,CAAC,SAAS;YAYH,iBAAiB;YAWjB,aAAa;IAsB3B,OAAO,CAAC,QAAQ;IAShB,OAAO,CAAC,eAAe;CAKxB;AAMD;;;;GAIG;AACH,MAAM,MAAM,qBAAqB,GAAG,IAAI,CACtC,oBAAoB,EACpB,KAAK,GAAG,QAAQ,CACjB,GAAG;IAAE,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAA;CAAE,CAAC"}
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GLOST Stream Processor
|
|
3
|
+
*
|
|
4
|
+
* Streaming variant of GLOSTProcessor that yields processed sentence
|
|
5
|
+
* batches progressively using AsyncGenerator. Keeps the full document
|
|
6
|
+
* out of memory between chunks.
|
|
7
|
+
*
|
|
8
|
+
* Document-level transforms (extensions with streamingSupport !== 'chunk')
|
|
9
|
+
* run once on the full document before streaming begins. Chunk-compatible
|
|
10
|
+
* extensions (streamingSupport === 'chunk') then run on each batch.
|
|
11
|
+
*
|
|
12
|
+
* @packageDocumentation
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```typescript
|
|
16
|
+
* import { GLOSTStreamProcessor } from "glost-processor";
|
|
17
|
+
*
|
|
18
|
+
* const processor = new GLOSTStreamProcessor()
|
|
19
|
+
* .use(transcription)
|
|
20
|
+
* .use(translation);
|
|
21
|
+
*
|
|
22
|
+
* for await (const chunk of processor.stream(document)) {
|
|
23
|
+
* console.log(chunk.sentences, chunk.isLast);
|
|
24
|
+
* }
|
|
25
|
+
* ```
|
|
26
|
+
*
|
|
27
|
+
* @since 0.7.0
|
|
28
|
+
*/
|
|
29
|
+
import { processGLOSTWithExtensionsAsync, processGLOSTChunkAsync, extensionRegistry, } from "glost-extensions";
|
|
30
|
+
// ============================================================================
|
|
31
|
+
// GLOSTStreamProcessor
|
|
32
|
+
// ============================================================================
|
|
33
|
+
/**
|
|
34
|
+
* Streaming processor for GLOST documents
|
|
35
|
+
*
|
|
36
|
+
* Mirrors the `GLOSTProcessor` API (`.use()`, `.freeze()`) but adds a
|
|
37
|
+
* `.stream()` method that returns an `AsyncGenerator<ProcessedChunk>`.
|
|
38
|
+
*
|
|
39
|
+
* @since 0.7.0
|
|
40
|
+
*/
|
|
41
|
+
export class GLOSTStreamProcessor {
|
|
42
|
+
plugins = [];
|
|
43
|
+
options = {};
|
|
44
|
+
frozen = false;
|
|
45
|
+
/**
|
|
46
|
+
* Create a new stream processor instance
|
|
47
|
+
*
|
|
48
|
+
* @param options - Initial processor options
|
|
49
|
+
*/
|
|
50
|
+
constructor(options = {}) {
|
|
51
|
+
this.options = { ...options };
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Add a plugin, preset, or extension to the pipeline
|
|
55
|
+
*
|
|
56
|
+
* @param spec - Plugin function, extension object, preset, or ID
|
|
57
|
+
* @param options - Plugin options
|
|
58
|
+
* @returns This processor for chaining
|
|
59
|
+
*/
|
|
60
|
+
use(spec, options) {
|
|
61
|
+
this.assertNotFrozen();
|
|
62
|
+
if (this.isPreset(spec)) {
|
|
63
|
+
return this.usePreset(spec);
|
|
64
|
+
}
|
|
65
|
+
this.plugins.push({ spec, options });
|
|
66
|
+
return this;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Freeze the processor
|
|
70
|
+
*
|
|
71
|
+
* Returns a frozen processor that cannot be modified. Useful for
|
|
72
|
+
* reusing the same pipeline configuration across multiple documents.
|
|
73
|
+
*
|
|
74
|
+
* @returns A frozen copy of this processor
|
|
75
|
+
*/
|
|
76
|
+
freeze() {
|
|
77
|
+
const frozen = new GLOSTStreamProcessor(this.options);
|
|
78
|
+
frozen.plugins = [...this.plugins];
|
|
79
|
+
frozen.frozen = true;
|
|
80
|
+
return frozen;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Stream a document as progressive sentence batches
|
|
84
|
+
*
|
|
85
|
+
* Processing phases:
|
|
86
|
+
* 1. All extensions with `streamingSupport !== 'chunk'` (i.e. `'none'`
|
|
87
|
+
* or `'full'`, or unset) run their `transform`, `visit`, and
|
|
88
|
+
* `enhanceMetadata` hooks on the **full** document.
|
|
89
|
+
* 2. The resulting document is split into sentence batches.
|
|
90
|
+
* 3. For each batch, extensions with `streamingSupport === 'chunk'`
|
|
91
|
+
* run their `visit` and `enhanceMetadata` hooks.
|
|
92
|
+
* 4. A `ProcessedChunk` is yielded.
|
|
93
|
+
*
|
|
94
|
+
* Cancellation: break out of the `for await` loop at any time. The
|
|
95
|
+
* generator will stop without processing remaining chunks.
|
|
96
|
+
*
|
|
97
|
+
* @param document - GLOST document to stream
|
|
98
|
+
* @param streamOptions - Streaming options (batchSize etc.)
|
|
99
|
+
* @yields `ProcessedChunk` objects in document order
|
|
100
|
+
*
|
|
101
|
+
* @example
|
|
102
|
+
* ```typescript
|
|
103
|
+
* for await (const chunk of processor.stream(doc, { batchSize: 20 })) {
|
|
104
|
+
* console.log(`para ${chunk.paragraphIndex} chunk ${chunk.chunkIndex}`);
|
|
105
|
+
* if (chunk.isLast) console.log("done");
|
|
106
|
+
* }
|
|
107
|
+
* ```
|
|
108
|
+
*/
|
|
109
|
+
async *stream(document, streamOptions) {
|
|
110
|
+
const batchSize = streamOptions?.batchSize ?? 50;
|
|
111
|
+
const extensions = await this.resolveExtensions();
|
|
112
|
+
// Split extensions into doc-level and chunk-level
|
|
113
|
+
const docExtensions = extensions.filter((e) => e.streamingSupport !== "chunk");
|
|
114
|
+
const chunkExtensions = extensions.filter((e) => e.streamingSupport === "chunk");
|
|
115
|
+
// Phase 1: run doc-level transforms on the full document
|
|
116
|
+
let processedDoc = document;
|
|
117
|
+
if (docExtensions.length > 0) {
|
|
118
|
+
const { data: _data, ...extOptions } = this.options;
|
|
119
|
+
const result = await processGLOSTWithExtensionsAsync(processedDoc, docExtensions, extOptions);
|
|
120
|
+
processedDoc = result.document;
|
|
121
|
+
}
|
|
122
|
+
// Phase 2: collect all sentences grouped by paragraph index
|
|
123
|
+
const paragraphSentences = collectSentencesByParagraph(processedDoc);
|
|
124
|
+
if (paragraphSentences.length === 0) {
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
const allChunks = [];
|
|
128
|
+
for (let pIdx = 0; pIdx < paragraphSentences.length; pIdx++) {
|
|
129
|
+
const sentences = paragraphSentences[pIdx];
|
|
130
|
+
let chunkIndex = 0;
|
|
131
|
+
for (let offset = 0; offset < sentences.length; offset += batchSize) {
|
|
132
|
+
allChunks.push({
|
|
133
|
+
paragraphIndex: pIdx,
|
|
134
|
+
chunkIndex,
|
|
135
|
+
sentences: sentences.slice(offset, offset + batchSize),
|
|
136
|
+
});
|
|
137
|
+
chunkIndex++;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
const totalChunks = allChunks.length;
|
|
141
|
+
// Phase 3: yield each chunk, optionally running chunk-level extensions
|
|
142
|
+
for (let i = 0; i < totalChunks; i++) {
|
|
143
|
+
const descriptor = allChunks[i];
|
|
144
|
+
let processedSentences = descriptor.sentences;
|
|
145
|
+
if (chunkExtensions.length > 0) {
|
|
146
|
+
const { data: _data, ...extOptions } = this.options;
|
|
147
|
+
processedSentences = await processGLOSTChunkAsync(processedSentences, chunkExtensions, extOptions);
|
|
148
|
+
}
|
|
149
|
+
yield {
|
|
150
|
+
sentences: processedSentences,
|
|
151
|
+
paragraphIndex: descriptor.paragraphIndex,
|
|
152
|
+
chunkIndex: descriptor.chunkIndex,
|
|
153
|
+
isLast: i === totalChunks - 1,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
// =====================================================================
|
|
158
|
+
// Private helpers
|
|
159
|
+
// =====================================================================
|
|
160
|
+
usePreset(preset) {
|
|
161
|
+
for (const entry of preset.plugins) {
|
|
162
|
+
if (Array.isArray(entry)) {
|
|
163
|
+
const [plugin, opts] = entry;
|
|
164
|
+
this.use(plugin, opts);
|
|
165
|
+
}
|
|
166
|
+
else {
|
|
167
|
+
this.use(entry);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return this;
|
|
171
|
+
}
|
|
172
|
+
async resolveExtensions() {
|
|
173
|
+
const result = [];
|
|
174
|
+
for (const { spec, options } of this.plugins) {
|
|
175
|
+
const ext = await this.resolvePlugin(spec, options);
|
|
176
|
+
if (ext) {
|
|
177
|
+
result.push(ext);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
return result;
|
|
181
|
+
}
|
|
182
|
+
async resolvePlugin(spec, options) {
|
|
183
|
+
if (typeof spec === "string") {
|
|
184
|
+
const ext = extensionRegistry.get(spec);
|
|
185
|
+
if (!ext) {
|
|
186
|
+
throw new Error(`Plugin "${spec}" not found in registry`);
|
|
187
|
+
}
|
|
188
|
+
return ext;
|
|
189
|
+
}
|
|
190
|
+
if (typeof spec === "function") {
|
|
191
|
+
const result = spec(options);
|
|
192
|
+
return result ?? null;
|
|
193
|
+
}
|
|
194
|
+
return spec;
|
|
195
|
+
}
|
|
196
|
+
isPreset(spec) {
|
|
197
|
+
return (spec !== null &&
|
|
198
|
+
typeof spec === "object" &&
|
|
199
|
+
"plugins" in spec &&
|
|
200
|
+
Array.isArray(spec.plugins));
|
|
201
|
+
}
|
|
202
|
+
assertNotFrozen() {
|
|
203
|
+
if (this.frozen) {
|
|
204
|
+
throw new Error("Cannot modify frozen stream processor");
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
// ============================================================================
|
|
209
|
+
// Internal helpers
|
|
210
|
+
// ============================================================================
|
|
211
|
+
/**
|
|
212
|
+
* Collect all sentences from a GLOSTRoot, grouped by paragraph index.
|
|
213
|
+
*
|
|
214
|
+
* Only `SentenceNode` children of `ParagraphNode` children are
|
|
215
|
+
* collected. Sentences that appear directly under the root (without a
|
|
216
|
+
* wrapping paragraph) are collected as a single synthetic group at
|
|
217
|
+
* index 0.
|
|
218
|
+
*
|
|
219
|
+
* @internal
|
|
220
|
+
*/
|
|
221
|
+
function collectSentencesByParagraph(document) {
|
|
222
|
+
const groups = [];
|
|
223
|
+
// Sentences that sit directly under the root (no paragraph wrapper)
|
|
224
|
+
const rootSentences = [];
|
|
225
|
+
for (const child of document.children) {
|
|
226
|
+
if (child.type === "ParagraphNode" && "children" in child) {
|
|
227
|
+
const para = child;
|
|
228
|
+
const sentences = para.children.filter((c) => typeof c === "object" &&
|
|
229
|
+
c !== null &&
|
|
230
|
+
c.type === "SentenceNode");
|
|
231
|
+
if (sentences.length > 0) {
|
|
232
|
+
groups.push(sentences);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
else if (child.type === "SentenceNode") {
|
|
236
|
+
rootSentences.push(child);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
// Prepend root-level sentences as paragraph 0 (if any)
|
|
240
|
+
if (rootSentences.length > 0) {
|
|
241
|
+
groups.unshift(rootSentences);
|
|
242
|
+
}
|
|
243
|
+
return groups;
|
|
244
|
+
}
|
|
245
|
+
//# sourceMappingURL=stream-processor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stream-processor.js","sourceRoot":"","sources":["../src/stream-processor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAIH,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACtB,iBAAiB,GAClB,MAAM,kBAAkB,CAAC;AAiD1B,+EAA+E;AAC/E,uBAAuB;AACvB,+EAA+E;AAE/E;;;;;;;GAOG;AACH,MAAM,OAAO,oBAAoB;IACvB,OAAO,GAAmD,EAAE,CAAC;IAC7D,OAAO,GAAqB,EAAE,CAAC;IAC/B,MAAM,GAAG,KAAK,CAAC;IAEvB;;;;OAIG;IACH,YAAY,UAA4B,EAAE;QACxC,IAAI,CAAC,OAAO,GAAG,EAAE,GAAG,OAAO,EAAE,CAAC;IAChC,CAAC;IAED;;;;;;OAMG;IACH,GAAG,CAAC,IAAyB,EAAE,OAAiB;QAC9C,IAAI,CAAC,eAAe,EAAE,CAAC;QAEvB,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;QAED,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;QACrC,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;;;;;OAOG;IACH,MAAM;QACJ,MAAM,MAAM,GAAG,IAAI,oBAAoB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACtD,MAAM,CAAC,OAAO,GAAG,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC;QAClC,MAAyC,CAAC,MAAM,GAAG,IAAI,CAAC;QACzD,OAAO,MAA0C,CAAC;IACpD,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACH,KAAK,CAAC,CAAC,MAAM,CACX,QAAmB,EACnB,aAA6B;QAE7B,MAAM,SAAS,GAAG,aAAa,EAAE,SAAS,IAAI,EAAE,CAAC;QACjD,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAElD,kDAAkD;QAClD,MAAM,aAAa,GAAG,UAAU,CAAC,MAAM,CACrC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,gBAAgB,KAAK,OAAO,CACtC,CAAC;QACF,MAAM,eAAe,GAAG,UAAU,CAAC,MAAM,CACvC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,gBAAgB,KAAK,OAAO,CACtC,CAAC;QAEF,yDAAyD;QACzD,IAAI,YAAY,GAAG,QAAQ,CAAC;QAC5B,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAE7B,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,UAAU,EAAE,GAClC,IAAI,CAAC,OAA0B,CAAC;YAClC,MAAM,MAAM,GAAG,MAAM,+BAA+B,CAClD,YAAY,EACZ,aAAa,EACb,UAAU,CACX,CAAC;YACF,YAAY,GAAG,MAAM,CAAC,QAAQ,CAAC;QACjC,CAAC;QAED,4DAA4D;QAC5D,MAAM,kBAAkB,GAAG,2BAA2B,CAAC,YAAY,CAAC,CAAC;QAErE,IAAI,kBAAkB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACpC,OAAO;QACT,CAAC;QAUD,MAAM,SAAS,GAAsB,EAAE,CAAC;QAExC,KACE,IAAI,IAAI,GAAG,CAAC,EACZ,IAAI,GAAG,kBAAkB,CAAC,MAAM,EAChC,IAAI,EAAE,EACN,CAAC;YACD,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAE,CAAC;YAC5C,IAAI,UAAU,GAAG,CAAC,CAAC;YAEnB,KACE,IAAI,MAAM,GAAG,CAAC,EACd,MAAM,GAAG,SAAS,CAAC,MAAM,EACzB,MAAM,IAAI,SAAS,EACnB,CAAC;gBACD,SAAS,CAAC,IAAI,CAAC;oBACb,cAAc,EAAE,IAAI;oBACpB,UAAU;oBACV,SAAS,EAAE,SAAS,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,SAAS,CAAC;iBACvD,CAAC,CAAC;gBACH,UAAU,EAAE,CAAC;YACf,CAAC;QACH,CAAC;QAED,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,CAAC;QAErC,uEAAuE;QACvE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,MAAM,UAAU,GAAG,SAAS,CAAC,CAAC,CAAE,CAAC;YAEjC,IAAI,kBAAkB,GAAG,UAAU,CAAC,SAAS,CAAC;YAE9C,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAE/B,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,UAAU,EAAE,GAClC,IAAI,CAAC,OAA0B,CAAC;gBAClC,kBAAkB,GAAG,MAAM,sBAAsB,CAC/C,kBAAkB,EAClB,eAAe,EACf,UAAU,CACX,CAAC;YACJ,CAAC;YAED,MAAM;gBACJ,SAAS,EAAE,kBAAkB;gBAC7B,cAAc,EAAE,UAAU,CAAC,cAAc;gBACzC,UAAU,EAAE,UAAU,CAAC,UAAU;gBACjC,MAAM,EAAE,CAAC,KAAK,WAAW,GAAG,CAAC;aAC9B,CAAC;QACJ,CAAC;IACH,CAAC;IAED,wEAAwE;IACxE,kBAAkB;IAClB,wEAAwE;IAEhE,SAAS,CAAC,MAAc;QAC9B,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACnC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;gBACzB,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,KAAK,CAAC;gBAC7B,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YAClB,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,KAAK,CAAC,iBAAiB;QAC7B,MAAM,MAAM,GAAqB,EAAE,CAAC;QACpC,KAAK,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAC7C,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACpD,IAAI,GAAG,EAAE,CAAC;gBACR,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACnB,CAAC;QACH,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,KAAK,CAAC,aAAa,CACzB,IAAgB,EAChB,OAAiB;QAEjB,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC7B,MAAM,GAAG,GAAG,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACxC,IAAI,CAAC,GAAG,EAAE,CAAC;gBACT,MAAM,IAAI,KAAK,CAAC,WAAW,IAAI,yBAAyB,CAAC,CAAC;YAC5D,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC;QAED,IAAI,OAAO,IAAI,KAAK,UAAU,EAAE,CAAC;YAC/B,MAAM,MAAM,GAAI,IAAkD,CAChE,OAAO,CACR,CAAC;YACF,OAAO,MAAM,IAAI,IAAI,CAAC;QACxB,CAAC;QAED,OAAO,IAAsB,CAAC;IAChC,CAAC;IAEO,QAAQ,CAAC,IAAa;QAC5B,OAAO,CACL,IAAI,KAAK,IAAI;YACb,OAAO,IAAI,KAAK,QAAQ;YACxB,SAAS,IAAK,IAAe;YAC7B,KAAK,CAAC,OAAO,CAAE,IAAe,CAAC,OAAO,CAAC,CACxC,CAAC;IACJ,CAAC;IAEO,eAAe;QACrB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;CACF;AAgBD,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;;;;;;;;GASG;AACH,SAAS,2BAA2B,CAClC,QAAmB;IAEnB,MAAM,MAAM,GAAsB,EAAE,CAAC;IAErC,oEAAoE;IACpE,MAAM,aAAa,GAAoB,EAAE,CAAC;IAE1C,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;QACtC,IAAI,KAAK,CAAC,IAAI,KAAK,eAAe,IAAI,UAAU,IAAI,KAAK,EAAE,CAAC;YAC1D,MAAM,IAAI,GAAG,KAA8C,CAAC;YAC5D,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CACpC,CAAC,CAAC,EAAsB,EAAE,CACxB,OAAO,CAAC,KAAK,QAAQ;gBACrB,CAAC,KAAK,IAAI;gBACT,CAAsB,CAAC,IAAI,KAAK,cAAc,CAClD,CAAC;YACF,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzB,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;aAAM,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;YACzC,aAAa,CAAC,IAAI,CAAC,KAAiC,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED,uDAAuD;IACvD,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IAChC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "glost-processor",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "Unified-style processor API for GLOST documents",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -26,8 +26,8 @@
|
|
|
26
26
|
"author": "",
|
|
27
27
|
"license": "MIT",
|
|
28
28
|
"dependencies": {
|
|
29
|
-
"glost-
|
|
30
|
-
"glost-
|
|
29
|
+
"glost-extensions": "0.4.0",
|
|
30
|
+
"glost-core": "0.5.0"
|
|
31
31
|
},
|
|
32
32
|
"devDependencies": {
|
|
33
33
|
"@types/node": "^20.0.0",
|