@oscharko-dev/keiko-local-knowledge 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -0
- package/dist/bounded-document-extraction.d.ts +27 -0
- package/dist/bounded-document-extraction.d.ts.map +1 -0
- package/dist/bounded-document-extraction.js +214 -0
- package/dist/capsule-lifecycle.d.ts +33 -0
- package/dist/capsule-lifecycle.d.ts.map +1 -0
- package/dist/capsule-lifecycle.js +292 -0
- package/dist/capsule-set-lifecycle.d.ts +15 -0
- package/dist/capsule-set-lifecycle.d.ts.map +1 -0
- package/dist/capsule-set-lifecycle.js +158 -0
- package/dist/chunking/chunker-persist.d.ts +36 -0
- package/dist/chunking/chunker-persist.d.ts.map +1 -0
- package/dist/chunking/chunker-persist.js +74 -0
- package/dist/chunking/chunker-runner.d.ts +9 -0
- package/dist/chunking/chunker-runner.d.ts.map +1 -0
- package/dist/chunking/chunker-runner.js +218 -0
- package/dist/chunking/chunker.d.ts +7 -0
- package/dist/chunking/chunker.d.ts.map +1 -0
- package/dist/chunking/chunker.js +139 -0
- package/dist/chunking/citation-mapper.d.ts +4 -0
- package/dist/chunking/citation-mapper.d.ts.map +1 -0
- package/dist/chunking/citation-mapper.js +180 -0
- package/dist/chunking/index.d.ts +6 -0
- package/dist/chunking/index.d.ts.map +1 -0
- package/dist/chunking/index.js +8 -0
- package/dist/chunking/token-estimator.d.ts +3 -0
- package/dist/chunking/token-estimator.d.ts.map +1 -0
- package/dist/chunking/token-estimator.js +26 -0
- package/dist/chunking/types.d.ts +49 -0
- package/dist/chunking/types.d.ts.map +1 -0
- package/dist/chunking/types.js +26 -0
- package/dist/composition.d.ts +57 -0
- package/dist/composition.d.ts.map +1 -0
- package/dist/composition.js +310 -0
- package/dist/conversation/citation-attacher.d.ts +8 -0
- package/dist/conversation/citation-attacher.d.ts.map +1 -0
- package/dist/conversation/citation-attacher.js +55 -0
- package/dist/conversation/citation-excerpts.d.ts +4 -0
- package/dist/conversation/citation-excerpts.d.ts.map +1 -0
- package/dist/conversation/citation-excerpts.js +41 -0
- package/dist/conversation/grounded-answer-runner.d.ts +9 -0
- package/dist/conversation/grounded-answer-runner.d.ts.map +1 -0
- package/dist/conversation/grounded-answer-runner.js +61 -0
- package/dist/conversation/index.d.ts +5 -0
- package/dist/conversation/index.d.ts.map +1 -0
- package/dist/conversation/index.js +7 -0
- package/dist/conversation/model-gateway-answer-generator.d.ts +28 -0
- package/dist/conversation/model-gateway-answer-generator.d.ts.map +1 -0
- package/dist/conversation/model-gateway-answer-generator.js +105 -0
- package/dist/conversation/types.d.ts +35 -0
- package/dist/conversation/types.d.ts.map +1 -0
- package/dist/conversation/types.js +24 -0
- package/dist/discovery/discovery-runner.d.ts +23 -0
- package/dist/discovery/discovery-runner.d.ts.map +1 -0
- package/dist/discovery/discovery-runner.js +109 -0
- package/dist/discovery/extract-progressive.d.ts +17 -0
- package/dist/discovery/extract-progressive.d.ts.map +1 -0
- package/dist/discovery/extract-progressive.js +522 -0
- package/dist/discovery/extract.d.ts +26 -0
- package/dist/discovery/extract.d.ts.map +1 -0
- package/dist/discovery/extract.js +906 -0
- package/dist/discovery/glob.d.ts +10 -0
- package/dist/discovery/glob.d.ts.map +1 -0
- package/dist/discovery/glob.js +72 -0
- package/dist/discovery/index.d.ts +6 -0
- package/dist/discovery/index.d.ts.map +1 -0
- package/dist/discovery/index.js +8 -0
- package/dist/discovery/media-type.d.ts +4 -0
- package/dist/discovery/media-type.d.ts.map +1 -0
- package/dist/discovery/media-type.js +62 -0
- package/dist/discovery/persist.d.ts +63 -0
- package/dist/discovery/persist.d.ts.map +1 -0
- package/dist/discovery/persist.js +345 -0
- package/dist/discovery/test-support.d.ts +16 -0
- package/dist/discovery/test-support.d.ts.map +1 -0
- package/dist/discovery/test-support.js +127 -0
- package/dist/discovery/types.d.ts +63 -0
- package/dist/discovery/types.d.ts.map +1 -0
- package/dist/discovery/types.js +28 -0
- package/dist/discovery/walk.d.ts +12 -0
- package/dist/discovery/walk.d.ts.map +1 -0
- package/dist/discovery/walk.js +302 -0
- package/dist/errors.d.ts +13 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +22 -0
- package/dist/evaluations/dimensions.d.ts +14 -0
- package/dist/evaluations/dimensions.d.ts.map +1 -0
- package/dist/evaluations/dimensions.js +191 -0
- package/dist/evaluations/fixtures.d.ts +18 -0
- package/dist/evaluations/fixtures.d.ts.map +1 -0
- package/dist/evaluations/fixtures.js +858 -0
- package/dist/evaluations/index.d.ts +7 -0
- package/dist/evaluations/index.d.ts.map +1 -0
- package/dist/evaluations/index.js +10 -0
- package/dist/evaluations/report.d.ts +3 -0
- package/dist/evaluations/report.d.ts.map +1 -0
- package/dist/evaluations/report.js +31 -0
- package/dist/evaluations/runner-seed.d.ts +12 -0
- package/dist/evaluations/runner-seed.d.ts.map +1 -0
- package/dist/evaluations/runner-seed.js +175 -0
- package/dist/evaluations/runner.d.ts +8 -0
- package/dist/evaluations/runner.d.ts.map +1 -0
- package/dist/evaluations/runner.js +205 -0
- package/dist/evaluations/scripted-embedding-adapter.d.ts +13 -0
- package/dist/evaluations/scripted-embedding-adapter.d.ts.map +1 -0
- package/dist/evaluations/scripted-embedding-adapter.js +163 -0
- package/dist/evaluations/types.d.ts +116 -0
- package/dist/evaluations/types.d.ts.map +1 -0
- package/dist/evaluations/types.js +27 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +41 -0
- package/dist/indexing/bounded-indexing.d.ts +41 -0
- package/dist/indexing/bounded-indexing.d.ts.map +1 -0
- package/dist/indexing/bounded-indexing.js +240 -0
- package/dist/indexing/checkpoint-persist.d.ts +8 -0
- package/dist/indexing/checkpoint-persist.d.ts.map +1 -0
- package/dist/indexing/checkpoint-persist.js +135 -0
- package/dist/indexing/checkpoint-resume.d.ts +20 -0
- package/dist/indexing/checkpoint-resume.d.ts.map +1 -0
- package/dist/indexing/checkpoint-resume.js +50 -0
- package/dist/indexing/embedding-batcher.d.ts +3 -0
- package/dist/indexing/embedding-batcher.d.ts.map +1 -0
- package/dist/indexing/embedding-batcher.js +390 -0
- package/dist/indexing/index.d.ts +7 -0
- package/dist/indexing/index.d.ts.map +1 -0
- package/dist/indexing/index.js +11 -0
- package/dist/indexing/job-persist.d.ts +46 -0
- package/dist/indexing/job-persist.d.ts.map +1 -0
- package/dist/indexing/job-persist.js +157 -0
- package/dist/indexing/job-resume.d.ts +4 -0
- package/dist/indexing/job-resume.d.ts.map +1 -0
- package/dist/indexing/job-resume.js +14 -0
- package/dist/indexing/orchestrator.d.ts +3 -0
- package/dist/indexing/orchestrator.d.ts.map +1 -0
- package/dist/indexing/orchestrator.js +1151 -0
- package/dist/indexing/types.d.ts +156 -0
- package/dist/indexing/types.d.ts.map +1 -0
- package/dist/indexing/types.js +30 -0
- package/dist/indexing/vector-persist.d.ts +32 -0
- package/dist/indexing/vector-persist.d.ts.map +1 -0
- package/dist/indexing/vector-persist.js +105 -0
- package/dist/parsers/_internal.d.ts +20 -0
- package/dist/parsers/_internal.d.ts.map +1 -0
- package/dist/parsers/_internal.js +122 -0
- package/dist/parsers/csv-parser.d.ts +3 -0
- package/dist/parsers/csv-parser.d.ts.map +1 -0
- package/dist/parsers/csv-parser.js +202 -0
- package/dist/parsers/docx-parser.d.ts +3 -0
- package/dist/parsers/docx-parser.d.ts.map +1 -0
- package/dist/parsers/docx-parser.js +390 -0
- package/dist/parsers/html-parser.d.ts +3 -0
- package/dist/parsers/html-parser.d.ts.map +1 -0
- package/dist/parsers/html-parser.js +310 -0
- package/dist/parsers/index.d.ts +15 -0
- package/dist/parsers/index.d.ts.map +1 -0
- package/dist/parsers/index.js +41 -0
- package/dist/parsers/json-parser.d.ts +3 -0
- package/dist/parsers/json-parser.d.ts.map +1 -0
- package/dist/parsers/json-parser.js +192 -0
- package/dist/parsers/large-document/capability-discovery.d.ts +27 -0
- package/dist/parsers/large-document/capability-discovery.d.ts.map +1 -0
- package/dist/parsers/large-document/capability-discovery.js +76 -0
- package/dist/parsers/large-document/diagnostics.d.ts +3 -0
- package/dist/parsers/large-document/diagnostics.d.ts.map +1 -0
- package/dist/parsers/large-document/diagnostics.js +11 -0
- package/dist/parsers/large-document/index.d.ts +15 -0
- package/dist/parsers/large-document/index.d.ts.map +1 -0
- package/dist/parsers/large-document/index.js +10 -0
- package/dist/parsers/large-document/legacy-format.d.ts +5 -0
- package/dist/parsers/large-document/legacy-format.d.ts.map +1 -0
- package/dist/parsers/large-document/legacy-format.js +25 -0
- package/dist/parsers/large-document/preflight.d.ts +9 -0
- package/dist/parsers/large-document/preflight.d.ts.map +1 -0
- package/dist/parsers/large-document/preflight.js +43 -0
- package/dist/parsers/large-document/progressive-extraction.d.ts +55 -0
- package/dist/parsers/large-document/progressive-extraction.d.ts.map +1 -0
- package/dist/parsers/large-document/progressive-extraction.js +123 -0
- package/dist/parsers/large-document/progressive-pdf.d.ts +20 -0
- package/dist/parsers/large-document/progressive-pdf.d.ts.map +1 -0
- package/dist/parsers/large-document/progressive-pdf.js +145 -0
- package/dist/parsers/large-document/synthetic-source.d.ts +9 -0
- package/dist/parsers/large-document/synthetic-source.d.ts.map +1 -0
- package/dist/parsers/large-document/synthetic-source.js +101 -0
- package/dist/parsers/large-document/window-builder.d.ts +24 -0
- package/dist/parsers/large-document/window-builder.d.ts.map +1 -0
- package/dist/parsers/large-document/window-builder.js +75 -0
- package/dist/parsers/ocr/index.d.ts +4 -0
- package/dist/parsers/ocr/index.d.ts.map +1 -0
- package/dist/parsers/ocr/index.js +4 -0
- package/dist/parsers/ocr/null-ocr-adapter.d.ts +3 -0
- package/dist/parsers/ocr/null-ocr-adapter.d.ts.map +1 -0
- package/dist/parsers/ocr/null-ocr-adapter.js +14 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.d.ts +8 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.d.ts.map +1 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.js +147 -0
- package/dist/parsers/ocr/types.d.ts +16 -0
- package/dist/parsers/ocr/types.d.ts.map +1 -0
- package/dist/parsers/ocr/types.js +4 -0
- package/dist/parsers/parser-test-fixtures.d.ts +28 -0
- package/dist/parsers/parser-test-fixtures.d.ts.map +1 -0
- package/dist/parsers/parser-test-fixtures.js +139 -0
- package/dist/parsers/pdf-parser.d.ts +43 -0
- package/dist/parsers/pdf-parser.d.ts.map +1 -0
- package/dist/parsers/pdf-parser.js +388 -0
- package/dist/parsers/registry.d.ts +8 -0
- package/dist/parsers/registry.d.ts.map +1 -0
- package/dist/parsers/registry.js +57 -0
- package/dist/parsers/text-parser.d.ts +3 -0
- package/dist/parsers/text-parser.d.ts.map +1 -0
- package/dist/parsers/text-parser.js +214 -0
- package/dist/parsers/types.d.ts +53 -0
- package/dist/parsers/types.d.ts.map +1 -0
- package/dist/parsers/types.js +21 -0
- package/dist/parsers/unsupported-parser.d.ts +4 -0
- package/dist/parsers/unsupported-parser.d.ts.map +1 -0
- package/dist/parsers/unsupported-parser.js +97 -0
- package/dist/parsers/xlsx-parser.d.ts +3 -0
- package/dist/parsers/xlsx-parser.d.ts.map +1 -0
- package/dist/parsers/xlsx-parser.js +425 -0
- package/dist/privacy/audit-emitter.d.ts +5 -0
- package/dist/privacy/audit-emitter.d.ts.map +1 -0
- package/dist/privacy/audit-emitter.js +93 -0
- package/dist/privacy/diagnostic-redactor.d.ts +2 -0
- package/dist/privacy/diagnostic-redactor.d.ts.map +1 -0
- package/dist/privacy/diagnostic-redactor.js +153 -0
- package/dist/privacy/index.d.ts +5 -0
- package/dist/privacy/index.d.ts.map +1 -0
- package/dist/privacy/index.js +6 -0
- package/dist/privacy/retention-applier.d.ts +5 -0
- package/dist/privacy/retention-applier.d.ts.map +1 -0
- package/dist/privacy/retention-applier.js +88 -0
- package/dist/privacy/types.d.ts +98 -0
- package/dist/privacy/types.d.ts.map +1 -0
- package/dist/privacy/types.js +12 -0
- package/dist/qualityIntelligence/capsuleCorpus.d.ts +27 -0
- package/dist/qualityIntelligence/capsuleCorpus.d.ts.map +1 -0
- package/dist/qualityIntelligence/capsuleCorpus.js +58 -0
- package/dist/qualityIntelligence/index.d.ts +3 -0
- package/dist/qualityIntelligence/index.d.ts.map +1 -0
- package/dist/qualityIntelligence/index.js +5 -0
- package/dist/qualityIntelligence/qiHandoff.d.ts +36 -0
- package/dist/qualityIntelligence/qiHandoff.d.ts.map +1 -0
- package/dist/qualityIntelligence/qiHandoff.js +82 -0
- package/dist/retrieval/answer-grounding.d.ts +9 -0
- package/dist/retrieval/answer-grounding.d.ts.map +1 -0
- package/dist/retrieval/answer-grounding.js +31 -0
- package/dist/retrieval/context-pack-assembler.d.ts +24 -0
- package/dist/retrieval/context-pack-assembler.d.ts.map +1 -0
- package/dist/retrieval/context-pack-assembler.js +50 -0
- package/dist/retrieval/index.d.ts +6 -0
- package/dist/retrieval/index.d.ts.map +1 -0
- package/dist/retrieval/index.js +9 -0
- package/dist/retrieval/retrieval-runner.d.ts +10 -0
- package/dist/retrieval/retrieval-runner.d.ts.map +1 -0
- package/dist/retrieval/retrieval-runner.js +163 -0
- package/dist/retrieval/scoped-vector-search.d.ts +24 -0
- package/dist/retrieval/scoped-vector-search.d.ts.map +1 -0
- package/dist/retrieval/scoped-vector-search.js +864 -0
- package/dist/retrieval/types.d.ts +28 -0
- package/dist/retrieval/types.d.ts.map +1 -0
- package/dist/retrieval/types.js +33 -0
- package/dist/section-path-hash.d.ts +3 -0
- package/dist/section-path-hash.d.ts.map +1 -0
- package/dist/section-path-hash.js +9 -0
- package/dist/source-lifecycle.d.ts +14 -0
- package/dist/source-lifecycle.d.ts.map +1 -0
- package/dist/source-lifecycle.js +155 -0
- package/dist/source-routing-validation.d.ts +11 -0
- package/dist/source-routing-validation.d.ts.map +1 -0
- package/dist/source-routing-validation.js +140 -0
- package/dist/store-content-cipher.d.ts +11 -0
- package/dist/store-content-cipher.d.ts.map +1 -0
- package/dist/store-content-cipher.js +67 -0
- package/dist/store-content-encryption.d.ts +12 -0
- package/dist/store-content-encryption.d.ts.map +1 -0
- package/dist/store-content-encryption.js +275 -0
- package/dist/store-paths.d.ts +6 -0
- package/dist/store-paths.d.ts.map +1 -0
- package/dist/store-paths.js +61 -0
- package/dist/store.d.ts +30 -0
- package/dist/store.d.ts.map +1 -0
- package/dist/store.js +219 -0
- package/dist/testing.d.ts +47 -0
- package/dist/testing.d.ts.map +1 -0
- package/dist/testing.js +170 -0
- package/dist/version.d.ts +2 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +4 -0
- package/package.json +43 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
// Deterministic synthetic streaming source + extractor for the bounded-RSS regression
|
|
2
|
+
// (Epic #1160, Issue #1286).
|
|
3
|
+
//
|
|
4
|
+
// The issue allows demonstrating bounded peak memory with "an equivalent synthetic streaming
|
|
5
|
+
// fixture" instead of committing a one-gigabyte binary document. This source never materializes
|
|
6
|
+
// the whole document: `readWindow` generates exactly the requested bytes on the fly via a pure
|
|
7
|
+
// position function, so a 1 GiB-class document can be driven through the progressive pipeline
|
|
8
|
+
// while only one page window of text is resident at a time.
|
|
9
|
+
//
|
|
10
|
+
// The byte layout is `totalPages` pages of `pageChars` ASCII letters each, separated by "\n\n"
|
|
11
|
+
// (matching the progressive PDF separator), so the synthetic extractor produces the same window
|
|
12
|
+
// shape as the real PDF strategy and exercises the identical persistence/checkpoint path.
|
|
13
|
+
import { WindowTextBuilder } from "./window-builder.js";
|
|
14
|
+
const SEPARATOR_BYTES = 2; // "\n\n"
|
|
15
|
+
const NEWLINE = 0x0a;
|
|
16
|
+
function strideBytes(pageChars) {
|
|
17
|
+
return pageChars + SEPARATOR_BYTES;
|
|
18
|
+
}
|
|
19
|
+
// Deterministic, pure body byte for page `pageNumber` at intra-page index `k`. A-Z so every page
|
|
20
|
+
// differs and the produced text chunks are distinct.
|
|
21
|
+
function pageBodyByte(pageNumber, k) {
|
|
22
|
+
return 0x41 + ((pageNumber + k) % 26);
|
|
23
|
+
}
|
|
24
|
+
function byteAt(pos, pageChars) {
|
|
25
|
+
const stride = strideBytes(pageChars);
|
|
26
|
+
const within = pos % stride;
|
|
27
|
+
if (within < pageChars) {
|
|
28
|
+
const pageNumber = Math.floor(pos / stride) + 1;
|
|
29
|
+
return pageBodyByte(pageNumber, within);
|
|
30
|
+
}
|
|
31
|
+
return NEWLINE;
|
|
32
|
+
}
|
|
33
|
+
export function syntheticStreamingSource(config) {
|
|
34
|
+
const stride = strideBytes(config.pageChars);
|
|
35
|
+
const totalBytes = config.totalPages * stride - SEPARATOR_BYTES;
|
|
36
|
+
return {
|
|
37
|
+
totalBytes,
|
|
38
|
+
// Generates exactly `length` bytes; never allocates the whole document.
|
|
39
|
+
readWindow: (startByte, length) => {
|
|
40
|
+
const out = new Uint8Array(length);
|
|
41
|
+
for (let i = 0; i < length; i += 1) {
|
|
42
|
+
out[i] = byteAt(startByte + i, config.pageChars);
|
|
43
|
+
}
|
|
44
|
+
return Promise.resolve(out);
|
|
45
|
+
},
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
function pageByteStart(pageNumber, pageChars) {
|
|
49
|
+
return (pageNumber - 1) * strideBytes(pageChars);
|
|
50
|
+
}
|
|
51
|
+
async function syntheticWindow(readWindow, decoder, documentId, config, firstPage, lastPage, state) {
|
|
52
|
+
const builder = new WindowTextBuilder(documentId, state.cursor, state.anyPageEmitted);
|
|
53
|
+
for (let pageNumber = firstPage; pageNumber <= lastPage; pageNumber += 1) {
|
|
54
|
+
const bytes = await readWindow(pageByteStart(pageNumber, config.pageChars), config.pageChars);
|
|
55
|
+
builder.addPage(pageNumber, decoder.decode(bytes));
|
|
56
|
+
state.objectCursor += config.pageChars;
|
|
57
|
+
}
|
|
58
|
+
state.cursor = builder.nextCursor;
|
|
59
|
+
state.anyPageEmitted = builder.hasEmittedAnyPage;
|
|
60
|
+
const window = {
|
|
61
|
+
windowIndex: state.windowIndex,
|
|
62
|
+
pages: builder.snapshotPages(),
|
|
63
|
+
units: builder.snapshotUnits(),
|
|
64
|
+
text: builder.text(),
|
|
65
|
+
characterStart: builder.characterStart,
|
|
66
|
+
objectCursor: state.objectCursor,
|
|
67
|
+
lastPageNumber: lastPage,
|
|
68
|
+
diagnostics: [],
|
|
69
|
+
};
|
|
70
|
+
state.windowIndex += 1;
|
|
71
|
+
return window;
|
|
72
|
+
}
|
|
73
|
+
async function* syntheticExtractWindows(config, source, options) {
|
|
74
|
+
const readWindow = source.readWindow;
|
|
75
|
+
if (readWindow === undefined)
|
|
76
|
+
return;
|
|
77
|
+
const decoder = new TextDecoder("utf-8");
|
|
78
|
+
const resumeFromPage = options.resumeFromPage ?? 0;
|
|
79
|
+
const state = {
|
|
80
|
+
cursor: options.resumeCharacterStart ?? 0,
|
|
81
|
+
anyPageEmitted: resumeFromPage > 0,
|
|
82
|
+
objectCursor: options.resumeObjectCursor ?? 0,
|
|
83
|
+
windowIndex: options.resumeWindowIndex ?? 0,
|
|
84
|
+
};
|
|
85
|
+
for (let firstPage = resumeFromPage + 1; firstPage <= config.totalPages; firstPage += config.pagesPerWindow) {
|
|
86
|
+
if (options.signal?.aborted === true)
|
|
87
|
+
return;
|
|
88
|
+
const lastPage = Math.min(firstPage + config.pagesPerWindow - 1, config.totalPages);
|
|
89
|
+
yield syntheticWindow(readWindow, decoder, options.documentId, config, firstPage, lastPage, state);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
// A ProgressiveExtractor that reads page bytes from the streaming source one window at a time.
|
|
93
|
+
// `parserVersion` is configurable so resume-compatibility tests can simulate a parser upgrade.
|
|
94
|
+
export function syntheticProgressiveExtractor(config, parserVersion = "synthetic@1") {
|
|
95
|
+
return {
|
|
96
|
+
strategyId: "progressive-pdf",
|
|
97
|
+
parserVersion,
|
|
98
|
+
matches: (input) => input.extension === "synthetic",
|
|
99
|
+
extractWindows: (source, options) => syntheticExtractWindows(config, source, options),
|
|
100
|
+
};
|
|
101
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { DocumentId, PageRecord, ParsedUnit } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
export interface AddedPage {
|
|
3
|
+
readonly page: PageRecord;
|
|
4
|
+
readonly unit: ParsedUnit;
|
|
5
|
+
}
|
|
6
|
+
export declare class WindowTextBuilder {
|
|
7
|
+
private readonly documentId;
|
|
8
|
+
private cursor;
|
|
9
|
+
private anyPageEmitted;
|
|
10
|
+
private readonly windowStart;
|
|
11
|
+
private appended;
|
|
12
|
+
private readonly pages;
|
|
13
|
+
private readonly units;
|
|
14
|
+
constructor(documentId: DocumentId, cursor: number, anyPageEmitted: boolean);
|
|
15
|
+
addPage(pageNumber: number, pageText: string): AddedPage;
|
|
16
|
+
get pageCount(): number;
|
|
17
|
+
get nextCursor(): number;
|
|
18
|
+
get hasEmittedAnyPage(): boolean;
|
|
19
|
+
get characterStart(): number;
|
|
20
|
+
snapshotPages(): readonly PageRecord[];
|
|
21
|
+
snapshotUnits(): readonly ParsedUnit[];
|
|
22
|
+
text(): string;
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=window-builder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"window-builder.d.ts","sourceRoot":"","sources":["../../../src/parsers/large-document/window-builder.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,+BAA+B,CAAC;AAIxF,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC;IAC1B,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC;CAC3B;AAED,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAa;IACxC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,cAAc,CAAU;IAChC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAM;IACtB,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAoB;IAC1C,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAoB;gBAE9B,UAAU,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,EAAE,OAAO;IAS3E,OAAO,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,SAAS;IA8BxD,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,IAAI,iBAAiB,IAAI,OAAO,CAE/B;IAED,IAAI,cAAc,IAAI,MAAM,CAE3B;IAED,aAAa,IAAI,SAAS,UAAU,EAAE;IAItC,aAAa,IAAI,SAAS,UAAU,EAAE;IAItC,IAAI,IAAI,MAAM;CAGf"}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// Shared document-relative offset bookkeeping for progressive extractors (Issue #1286).
|
|
2
|
+
//
|
|
3
|
+
// Pages are separated by "\n\n" (two characters) — the same separator the existing full-buffer
|
|
4
|
+
// PDF parser uses (`pageTexts.join("\n\n")`) — so chunk character offsets stay aligned with the
|
|
5
|
+
// persisted normalized text regardless of whether a document was extracted in one pass or
|
|
6
|
+
// window by window. The builder accumulates one window's appended text and the page records for
|
|
7
|
+
// the pages added to it; the caller flushes the window and starts a fresh builder for the next.
|
|
8
|
+
const PAGE_SEPARATOR = "\n\n";
|
|
9
|
+
export class WindowTextBuilder {
|
|
10
|
+
documentId;
|
|
11
|
+
cursor;
|
|
12
|
+
anyPageEmitted;
|
|
13
|
+
windowStart;
|
|
14
|
+
appended = "";
|
|
15
|
+
pages = [];
|
|
16
|
+
units = [];
|
|
17
|
+
constructor(documentId, cursor, anyPageEmitted) {
|
|
18
|
+
this.documentId = documentId;
|
|
19
|
+
this.cursor = cursor;
|
|
20
|
+
this.anyPageEmitted = anyPageEmitted;
|
|
21
|
+
this.windowStart = cursor;
|
|
22
|
+
}
|
|
23
|
+
// Appends a non-empty page, advancing the document-relative cursor across the "\n\n" separator
|
|
24
|
+
// that precedes every page after the first emitted page of the document.
|
|
25
|
+
addPage(pageNumber, pageText) {
|
|
26
|
+
if (this.anyPageEmitted) {
|
|
27
|
+
this.appended += PAGE_SEPARATOR;
|
|
28
|
+
this.cursor += PAGE_SEPARATOR.length;
|
|
29
|
+
}
|
|
30
|
+
const characterStart = this.cursor;
|
|
31
|
+
const characterEnd = characterStart + pageText.length;
|
|
32
|
+
this.appended += pageText;
|
|
33
|
+
this.cursor = characterEnd;
|
|
34
|
+
this.anyPageEmitted = true;
|
|
35
|
+
const page = {
|
|
36
|
+
documentId: this.documentId,
|
|
37
|
+
pageNumber,
|
|
38
|
+
pageLabel: String(pageNumber),
|
|
39
|
+
characterStart,
|
|
40
|
+
characterEnd,
|
|
41
|
+
};
|
|
42
|
+
const unit = {
|
|
43
|
+
kind: "page",
|
|
44
|
+
documentId: this.documentId,
|
|
45
|
+
pageNumber,
|
|
46
|
+
pageLabel: String(pageNumber),
|
|
47
|
+
characterStart,
|
|
48
|
+
characterEnd,
|
|
49
|
+
};
|
|
50
|
+
this.pages.push(page);
|
|
51
|
+
this.units.push(unit);
|
|
52
|
+
return { page, unit };
|
|
53
|
+
}
|
|
54
|
+
get pageCount() {
|
|
55
|
+
return this.pages.length;
|
|
56
|
+
}
|
|
57
|
+
get nextCursor() {
|
|
58
|
+
return this.cursor;
|
|
59
|
+
}
|
|
60
|
+
get hasEmittedAnyPage() {
|
|
61
|
+
return this.anyPageEmitted;
|
|
62
|
+
}
|
|
63
|
+
get characterStart() {
|
|
64
|
+
return this.windowStart;
|
|
65
|
+
}
|
|
66
|
+
snapshotPages() {
|
|
67
|
+
return this.pages;
|
|
68
|
+
}
|
|
69
|
+
snapshotUnits() {
|
|
70
|
+
return this.units;
|
|
71
|
+
}
|
|
72
|
+
text() {
|
|
73
|
+
return this.appended;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/parsers/ocr/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,uBAAuB,EAAE,KAAK,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC5F,OAAO,EAAE,KAAK,UAAU,EAAE,KAAK,aAAa,EAAE,MAAM,YAAY,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"null-ocr-adapter.d.ts","sourceRoot":"","sources":["../../../src/parsers/ocr/null-ocr-adapter.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,UAAU,EAAiB,MAAM,YAAY,CAAC;AAO5D,eAAO,MAAM,cAAc,EAAE,UAM3B,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
// NullOcrAdapter (Epic #189, Issue #202). The default adapter used when no real OCR engine
|
|
2
|
+
// has been configured. Always returns `ok: false, reason: "ocr-not-configured"` so the
|
|
3
|
+
// pipeline parser can fire the standard unsupported-media diagnostic rather than silently
|
|
4
|
+
// skipping the document.
|
|
5
|
+
//
|
|
6
|
+
// Never throws — the contract in OcrAdapter.ocrPage forbids throwing.
|
|
7
|
+
const NOT_CONFIGURED = Object.freeze({
|
|
8
|
+
ok: false,
|
|
9
|
+
reason: "ocr-not-configured",
|
|
10
|
+
});
|
|
11
|
+
export const nullOcrAdapter = Object.freeze({
|
|
12
|
+
kind: "ocr",
|
|
13
|
+
ocrPage: (_input) => Promise.resolve(NOT_CONFIGURED),
|
|
14
|
+
});
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { ParserResult } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
import type { ParserAdapter, ParserOptions, ParserSelectionInput } from "../types.js";
|
|
3
|
+
import type { OcrAdapter } from "./types.js";
|
|
4
|
+
export interface OcrPipelineAdapter extends ParserAdapter {
|
|
5
|
+
readonly parseAsync: (input: ParserSelectionInput, options: ParserOptions) => Promise<ParserResult>;
|
|
6
|
+
}
|
|
7
|
+
export declare function createOcrPipelineParser(adapter: OcrAdapter): OcrPipelineAdapter;
|
|
8
|
+
//# sourceMappingURL=ocr-pipeline-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ocr-pipeline-parser.d.ts","sourceRoot":"","sources":["../../../src/parsers/ocr/ocr-pipeline-parser.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAc,YAAY,EAAE,MAAM,+BAA+B,CAAC;AAG9E,OAAO,KAAK,EACV,aAAa,EAEb,aAAa,EACb,oBAAoB,EACrB,MAAM,aAAa,CAAC;AACrB,OAAO,KAAK,EAAE,UAAU,EAAiB,MAAM,YAAY,CAAC;AAwK5D,MAAM,WAAW,kBAAmB,SAAQ,aAAa;IACvD,QAAQ,CAAC,UAAU,EAAE,CACnB,KAAK,EAAE,oBAAoB,EAC3B,OAAO,EAAE,aAAa,KACnB,OAAO,CAAC,YAAY,CAAC,CAAC;CAC5B;AAID,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,UAAU,GAAG,kBAAkB,CAW/E"}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
// OCR pipeline parser (Epic #189, Issue #202). A `ParserAdapter` factory that wraps an
|
|
2
|
+
// `OcrAdapter`. The sync `parse` method always fires the unsupported-media diagnostic (the
|
|
3
|
+
// `ParserAdapter.parse` contract is synchronous and cannot await the OCR result). The async
|
|
4
|
+
// `parseAsync` method is the real entry point for callers that can await: when OCR succeeds
|
|
5
|
+
// it emits one `ParsedUnit { kind: "page" }` per recognised page; when OCR fails it fires
|
|
6
|
+
// the standard unsupported-media diagnostic consistent with unsupported-parser.ts (#266).
|
|
7
|
+
//
|
|
8
|
+
// No multi-page splitter ships yet — until a real splitter exists the input bytes are treated
|
|
9
|
+
// as page 1. That assumption is isolated here and collapses when a splitter is added.
|
|
10
|
+
import { diagnostic, emptyResult, oversizeDiagnostic, shouldStop } from "../_internal.js";
|
|
11
|
+
const PARSER_ID = "ocr-pipeline";
|
|
12
|
+
const PARSER_VERSION = "1";
|
|
13
|
+
const OCR_EXTENSIONS = new Set([
|
|
14
|
+
"pdf",
|
|
15
|
+
"png",
|
|
16
|
+
"jpg",
|
|
17
|
+
"jpeg",
|
|
18
|
+
"gif",
|
|
19
|
+
"bmp",
|
|
20
|
+
"tif",
|
|
21
|
+
"tiff",
|
|
22
|
+
"webp",
|
|
23
|
+
]);
|
|
24
|
+
const OCR_MEDIA_PREFIXES = ["image/", "application/pdf"];
|
|
25
|
+
// Magic-byte table for fallback detection when no extension / media type is present.
|
|
26
|
+
const OCR_MAGIC = Object.freeze([
|
|
27
|
+
{ prefix: [0x25, 0x50, 0x44, 0x46] }, // PDF: %PDF
|
|
28
|
+
{ prefix: [0x89, 0x50, 0x4e, 0x47] }, // PNG
|
|
29
|
+
{ prefix: [0xff, 0xd8, 0xff] }, // JPEG
|
|
30
|
+
{ prefix: [0x47, 0x49, 0x46, 0x38] }, // GIF8
|
|
31
|
+
{ prefix: [0x42, 0x4d] }, // BMP
|
|
32
|
+
]);
|
|
33
|
+
function matchesMagicBytes(bytes) {
|
|
34
|
+
for (const entry of OCR_MAGIC) {
|
|
35
|
+
if (bytes.length < entry.prefix.length)
|
|
36
|
+
continue;
|
|
37
|
+
let match = true;
|
|
38
|
+
for (let i = 0; i < entry.prefix.length; i += 1) {
|
|
39
|
+
if (bytes[i] !== entry.prefix[i]) {
|
|
40
|
+
match = false;
|
|
41
|
+
break;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
if (match)
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
function isOcrCandidate(input) {
|
|
50
|
+
if (OCR_EXTENSIONS.has(input.extension.toLowerCase()))
|
|
51
|
+
return true;
|
|
52
|
+
for (const prefix of OCR_MEDIA_PREFIXES) {
|
|
53
|
+
if (input.mediaType.toLowerCase().startsWith(prefix))
|
|
54
|
+
return true;
|
|
55
|
+
}
|
|
56
|
+
return matchesMagicBytes(input.bytes);
|
|
57
|
+
}
|
|
58
|
+
function unsupportedReason(input) {
|
|
59
|
+
if (input.extension.toLowerCase() === "pdf" ||
|
|
60
|
+
input.mediaType.toLowerCase() === "application/pdf")
|
|
61
|
+
return "pdf-not-implemented";
|
|
62
|
+
return "image-not-supported";
|
|
63
|
+
}
|
|
64
|
+
// Reads signal.aborted via function call to defeat TypeScript cross-await control-flow
|
|
65
|
+
// narrowing that incorrectly marks the boolean as `false` after a prior check.
|
|
66
|
+
function isAborted(signal) {
|
|
67
|
+
return signal?.aborted === true;
|
|
68
|
+
}
|
|
69
|
+
function cancelled(cap, input, options) {
|
|
70
|
+
return emptyResult(cap, input.documentId, options, [
|
|
71
|
+
diagnostic("PARSER_CANCELLED", "caller aborted parser", input.documentId, "info"),
|
|
72
|
+
]);
|
|
73
|
+
}
|
|
74
|
+
function resultFromOcrOutcome(ocrResult, cap, input, options) {
|
|
75
|
+
if (!ocrResult.ok) {
|
|
76
|
+
const reason = ocrResult.reason === "ocr-not-configured"
|
|
77
|
+
? unsupportedReason(input)
|
|
78
|
+
: `ocr-failed:${ocrResult.reason}`;
|
|
79
|
+
return emptyResult(cap, input.documentId, options, [
|
|
80
|
+
diagnostic("UNSUPPORTED_FORMAT", `ocr adapter returned ok:false (${ocrResult.reason})`, input.documentId, "info"),
|
|
81
|
+
], [{ kind: "unsupported-media", documentId: input.documentId, reason }]);
|
|
82
|
+
}
|
|
83
|
+
const pageUnit = {
|
|
84
|
+
kind: "page",
|
|
85
|
+
documentId: input.documentId,
|
|
86
|
+
pageNumber: 1,
|
|
87
|
+
characterStart: 0,
|
|
88
|
+
characterEnd: ocrResult.text.length,
|
|
89
|
+
};
|
|
90
|
+
return emptyResult(cap, input.documentId, options, [], [pageUnit]);
|
|
91
|
+
}
|
|
92
|
+
function buildSyncParse(cap) {
|
|
93
|
+
return (input, options) => {
|
|
94
|
+
if (input.bytes.byteLength > options.maxBytes) {
|
|
95
|
+
return emptyResult(cap, input.documentId, options, [
|
|
96
|
+
oversizeDiagnostic(input.documentId, input.bytes.byteLength, options.maxBytes),
|
|
97
|
+
]);
|
|
98
|
+
}
|
|
99
|
+
if (isAborted(options.signal))
|
|
100
|
+
return cancelled(cap, input, options);
|
|
101
|
+
const reason = unsupportedReason(input);
|
|
102
|
+
return emptyResult(cap, input.documentId, options, [
|
|
103
|
+
diagnostic("UNSUPPORTED_FORMAT", `ocr adapter present but sync parse called; use parseAsync (${reason})`, input.documentId, "info"),
|
|
104
|
+
], [{ kind: "unsupported-media", documentId: input.documentId, reason }]);
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
function buildAsyncParse(cap, adapter) {
|
|
108
|
+
return async (input, options) => {
|
|
109
|
+
if (input.bytes.byteLength > options.maxBytes) {
|
|
110
|
+
return emptyResult(cap, input.documentId, options, [
|
|
111
|
+
oversizeDiagnostic(input.documentId, input.bytes.byteLength, options.maxBytes),
|
|
112
|
+
]);
|
|
113
|
+
}
|
|
114
|
+
if (isAborted(options.signal))
|
|
115
|
+
return cancelled(cap, input, options);
|
|
116
|
+
const startedAt = options.now();
|
|
117
|
+
const preCheck = shouldStop(startedAt, options, 0);
|
|
118
|
+
if (preCheck.stop && preCheck.code !== undefined && preCheck.message !== undefined) {
|
|
119
|
+
return emptyResult(cap, input.documentId, options, [
|
|
120
|
+
diagnostic(preCheck.code, preCheck.message, input.documentId, "info"),
|
|
121
|
+
]);
|
|
122
|
+
}
|
|
123
|
+
const ocrResult = await adapter.ocrPage({ bytes: input.bytes, pageNumber: 1 });
|
|
124
|
+
if (isAborted(options.signal))
|
|
125
|
+
return cancelled(cap, input, options);
|
|
126
|
+
const postCheck = shouldStop(startedAt, options, 0);
|
|
127
|
+
if (postCheck.stop && postCheck.code !== undefined && postCheck.message !== undefined) {
|
|
128
|
+
return emptyResult(cap, input.documentId, options, [
|
|
129
|
+
diagnostic(postCheck.code, postCheck.message, input.documentId, "info"),
|
|
130
|
+
]);
|
|
131
|
+
}
|
|
132
|
+
return resultFromOcrOutcome(ocrResult, cap, input, options);
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
// ─── Factory ─────────────────────────────────────────────────────────────────
|
|
136
|
+
export function createOcrPipelineParser(adapter) {
|
|
137
|
+
const capability = Object.freeze({
|
|
138
|
+
parserId: PARSER_ID,
|
|
139
|
+
parserVersion: PARSER_VERSION,
|
|
140
|
+
matches: isOcrCandidate,
|
|
141
|
+
});
|
|
142
|
+
return Object.freeze({
|
|
143
|
+
capability,
|
|
144
|
+
parse: buildSyncParse(capability),
|
|
145
|
+
parseAsync: buildAsyncParse(capability, adapter),
|
|
146
|
+
});
|
|
147
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export type OcrPageResult = {
|
|
2
|
+
readonly ok: true;
|
|
3
|
+
readonly text: string;
|
|
4
|
+
readonly confidence: number;
|
|
5
|
+
} | {
|
|
6
|
+
readonly ok: false;
|
|
7
|
+
readonly reason: "ocr-not-configured" | "timeout" | "unsupported-input";
|
|
8
|
+
};
|
|
9
|
+
export interface OcrAdapter {
|
|
10
|
+
readonly kind: "ocr";
|
|
11
|
+
readonly ocrPage: (input: {
|
|
12
|
+
readonly bytes: Uint8Array;
|
|
13
|
+
readonly pageNumber: number;
|
|
14
|
+
}) => Promise<OcrPageResult>;
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/parsers/ocr/types.ts"],"names":[],"mappings":"AAMA,MAAM,MAAM,aAAa,GACrB;IACE,QAAQ,CAAC,EAAE,EAAE,IAAI,CAAC;IAGlB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAGtB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B,GACD;IACE,QAAQ,CAAC,EAAE,EAAE,KAAK,CAAC;IAKnB,QAAQ,CAAC,MAAM,EAAE,oBAAoB,GAAG,SAAS,GAAG,mBAAmB,CAAC;CACzE,CAAC;AAIN,MAAM,WAAW,UAAU;IAGzB,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC;IAIrB,QAAQ,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE;QACxB,QAAQ,CAAC,KAAK,EAAE,UAAU,CAAC;QAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;KAC7B,KAAK,OAAO,CAAC,aAAa,CAAC,CAAC;CAC9B"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
// OCR adapter port contract (Epic #189, Issue #202). Pure interface — no IO, no clock, no
|
|
2
|
+
// FS. A real OCR implementation (Tesseract, cloud API, etc.) implements `OcrAdapter` and is
|
|
3
|
+
// injected into `createOcrPipelineParser` without changing the parser registry.
|
|
4
|
+
export {};
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { DocumentId } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
import type { ParserSelectionInput } from "./types.js";
|
|
3
|
+
export declare const FIXTURE_DOCUMENT_ID: DocumentId;
|
|
4
|
+
export declare function encode(text: string): Uint8Array;
|
|
5
|
+
export declare function selectionFromText(text: string, overrides?: Partial<ParserSelectionInput>): ParserSelectionInput;
|
|
6
|
+
export declare function selectionFromBytes(bytes: Uint8Array, overrides?: Partial<ParserSelectionInput>): ParserSelectionInput;
|
|
7
|
+
export declare const TEXT_PLAIN = "Hello, world.\nSecond paragraph here.\n";
|
|
8
|
+
export declare const MARKDOWN_DOC: string;
|
|
9
|
+
export declare const JSON_FLAT = "{\"name\":\"alpha\",\"count\":3,\"active\":true}";
|
|
10
|
+
export declare const JSON_NESTED: string;
|
|
11
|
+
export declare const CSV_SIMPLE = "a,b,c\n1,2,3\n4,5,6\n";
|
|
12
|
+
export declare const CSV_QUOTED = "a,b,c\n\"x,1\",\"y\"\"2\",\"z\n3\"\n";
|
|
13
|
+
export declare const TSV_SIMPLE = "a\tb\tc\n1\t2\t3\n";
|
|
14
|
+
export declare const HTML_HEADINGS: string;
|
|
15
|
+
export declare const HTML_DANGEROUS: string;
|
|
16
|
+
export declare const PDF_MAGIC: Uint8Array;
|
|
17
|
+
export declare const PDF_TEXT_LAYER: Uint8Array;
|
|
18
|
+
export declare const PDF_NO_TEXT_LAYER: Uint8Array;
|
|
19
|
+
export declare const DOCX_SIMPLE_BASE64 = "UEsDBBQAAAAIAC28xFzXeYTq8QAAALgBAAATAAAAW0NvbnRlbnRfVHlwZXNdLnhtbH2QzU7DMBCE730Ky9cqccoBIZSkB36OwKE8wMreJFb9J69b2rdn00KREOVozXwz62nXB+/EHjPZGDq5qhspMOhobBg7+b55ru6koALBgIsBO3lEkut+0W6OCUkwHKiTUynpXinSE3qgOiYMrAwxeyj8zKNKoLcworppmlulYygYSlXmDNkvhGgfcYCdK+LpwMr5loyOpHg4e+e6TkJKzmoorKt9ML+Kqq+SmsmThyabaMkGqa6VzOL1jh/0lSfK1qB4g1xewLNRfcRslIl65xmu/0/649o4DFbjhZ/TUo4aiXh77+qL4sGG71+06jR8/wlQSwMEFAAAAAgALbzEXCAbhuqyAAAALgEAAAsAAABfcmVscy8ucmVsc43Puw6CMBQG4J2naM4uBQdjDIXFmLAafICmPZRGeklbL7y9HRzEODie23fyN93TzOSOIWpnGdRlBQStcFJbxeAynDZ7IDFxK/nsLDJYMELXFs0ZZ57yTZy0jyQjNjKYUvIHSqOY0PBYOo82T0YXDE+5DIp6Lq5cId1W1Y6GTwPagpAVS3rJIPSyBjIsHv/h3ThqgUcnbgZt+vHlayPLPChMDB4uSCrf7TKzQHNKuorZvgBQSwMEFAAAAAgALbzEXGJ/vc/pAAAA5wEAABEAAAB3b3JkL2RvY3VtZW50LnhtbJWRwU7DMAyG73uKKPc13Q4IVU2mgTRxnAQ8QEjNWimxoySs9O1JWtAmDhOc8lv+f/uT0+4+nWVnCHEglHxT1ZwBGuoGPEn++nJY33MWk8ZOW0KQfILId2rVjk1H5sMBJpYnYGxGyfuUfCNEND04HSvygLn3TsHplMtwEiOFzgcyEGNe4KzY1vWdcHpArlaM5alv1E1FzoVf1KKPQZXnOU0W2NictZX8CXQh3XChWrF4LonZn9SR7GCm0k6z6dsy+682XQJ763vNCkh1nfod+Bva9hbaI2EKZOM/4B4g3WIrYjlhUT9fpL4AUEsBAhQDFAAAAAgALbzEXNd5hOrxAAAAuAEAABMAAAAAAAAAAAAAAIABAAAAAFtDb250ZW50X1R5cGVzXS54bWxQSwECFAMUAAAACAAtvMRcIBuG6rIAAAAuAQAACwAAAAAAAAAAAAAAgAEiAQAAX3JlbHMvLnJlbHNQSwECFAMUAAAACAAtvMRcYn+9z+kAAADnAQAAEQAAAAAAAAAAAAAAgAH9AQAAd29yZC9kb2N1bWVudC54bWxQSwUGAAAAAAMAAwC5AAAAFQMAAAAA";
|
|
20
|
+
export declare const DOCX_SIMPLE: Uint8Array;
|
|
21
|
+
export declare const DOCX_WITH_PREAMBLE_BASE64 = "UEsDBBQAAAAIANACxVxn4XsRxAAAAHcBAAATABwAW0NvbnRlbnRfVHlwZXNdLnhtbFVUCQADp/ohaqf6IWp1eAsAAQT1AQAABBQAAAB9kLkOwjAMhneeosqKqBEDA6IswAoMvICVum1ELsXmentSrgEBo/0fn+X5/hqJi4uznivVicQZAOuOHHIZIvmsNCE5lDymFiLqA7YEk/F4Cjp4IS8j6TvUYr6iBo9WivUlr9kEX6lEllWxfBh7VqUwRms0Stbh5OsPyuhJKHPy7uHORB5mg4KvhF75DXjmtidKydRU7DDJBl12wTmkGuqgjy4ny/81X+4MTWM0vfN9W0xBE7PxrbPlW3Fo/Ot+uL97MbgBUEsDBAoAAAAAANACxVwAAAAAAAAAAAAAAAAGABwAX3JlbHMvVVQJAAOn+iFqp/ohanV4CwABBPUBAAAEFAAAAFBLAwQUAAAACADQAsVcJnghCIsAAADzAAAACwAcAF9yZWxzLy5yZWxzVVQJAAOn+iFqp/ohanV4CwABBPUBAAAEFAAAAI3PPQ4CIRAF4N5TEA6ws1pYGKCy2XbjBQjMAnH5yYBRby+FxWosLGfe5HsZMeOqW8ip+lAqe8Q1Vcl9a+UEUI3HqOuQC6aeLJmibn0kB0Wbq3YIh3E8Am0NrsTWZJOVnCa75+zyLPiPnZclGDxnc4uY2o+Kr4sua3LYJL9nsmDf66GzHJSAjxfV7gVQSwMECgAAAAAA0ALFXAAAAAAAAAAAAAAAAAUAHAB3b3JkL1VUCQADp/ohaqf6IWp1eAsAAQT1AQAABBQAAABQSwMEFAAAAAgA0ALFXNON7KHUAAAAWAEAABEAHAB3b3JkL2RvY3VtZW50LnhtbFVUCQADp/ohaqf6IWp1eAsAAQT1AQAABBQAAAB1kE1PwzAMhu/7FZHvLN0OCFVtd0PsNgn4ASYxbaUkjpyw0n9PsvEhJLi8Vuwnrz+6w7t36kySZg497LYNKAqG7RzGHp6f7m/uQKWMwaLjQD2slOAwbLqltWzePIWsikNI7dLDlHNstU5mIo9py5FCqb2yeMzlKaNeWGwUNpRSaeCd3jfNrfY4BxiK5QvbtcZYRark4RiysIooOArGqdM1WVUuGr/5eLr8iI95daSW9oyuhwfCusoO9JW+Mp/WJ3azWf93/IWpOtwfrP4aW/+cZNh8AFBLAQIeAxQAAAAIANACxVxn4XsRxAAAAHcBAAATABgAAAAAAAEAAACkgQAAAABbQ29udGVudF9UeXBlc10ueG1sVVQFAAOn+iFqdXgLAAEE9QEAAAQUAAAAUEsBAh4DCgAAAAAA0ALFXAAAAAAAAAAAAAAAAAYAGAAAAAAAAAAQAO1BEQEAAF9yZWxzL1VUBQADp/ohanV4CwABBPUBAAAEFAAAAFBLAQIeAxQAAAAIANACxVwmeCEIiwAAAPMAAAALABgAAAAAAAEAAACkgVEBAABfcmVscy8ucmVsc1VUBQADp/ohanV4CwABBPUBAAAEFAAAAFBLAQIeAwoAAAAAANACxVwAAAAAAAAAAAAAAAAFABgAAAAAAAAAEADtQSECAAB3b3JkL1VUBQADp/ohanV4CwABBPUBAAAEFAAAAFBLAQIeAxQAAAAIANACxVzTjeyh1AAAAFgBAAARABgAAAAAAAEAAACkgWACAAB3b3JkL2RvY3VtZW50LnhtbFVUBQADp/ohanV4CwABBPUBAAAEFAAAAFBLBQYAAAAABQAFAJgBAAB/AwAAAAA=";
|
|
22
|
+
export declare const DOCX_WITH_PREAMBLE: Uint8Array;
|
|
23
|
+
export declare const XLSX_SIMPLE_BASE64 = "UEsDBBQAAAAIAAAAAADBChDqgQAAAKQAAAAPAAAAeGwvd29ya2Jvb2sueG1sNY7BDoMwDEN/pcoHLLDDDohy2S58RgfpWkEblARtnz80jZPtZ1ly/2ZZnsyL+5S1aicektnWIeqUqAS98Eb16CJLCXZEeSHHmCd68LQXqobXprmh0Bosc9WUN4Wh10Rk+ldXQyEPd64mvCq4Hx1nDy046fJhZJxbwKHHc4jns+ELUEsDBBQAAAAIAAAAAAAHfok7RgAAAGgAAAAaAAAAeGwvX3JlbHMvd29ya2Jvb2sueG1sLnJlbHOzCUrNSSzJzM8rzsgsKLazQeYqeKbYKhV5phgqKYRUFqTaKpXnF2UXZ6SmlgAFEovSU0uQhIr1wZShXkVujpK+nY0+qsEAUEsDBBQAAAAIAAAAAACCf1VtQAAAAGwAAAAUAAAAeGwvc2hhcmVkU3RyaW5ncy54bWyzKS4usbMpzrSzKbHzTq200Qfy9EFciFBYYk5pKrqgc35eSVF+jq6hObqMa15yUWVBiUJSYnJ2aUExQlofZA0AUEsDBBQAAAAIAAAAAAA2I5L5hAAAAPoAAAAYAAAAeGwvd29ya3NoZWV0cy9zaGVldDEueG1ssynPL8ouzkhNLbGzAVMuiSWJdjZF+eUKRbZKhkp2NskghqOhkkKJrVIxkF9mZ2CjX2Zno58MlXNCljOEy+kDzYAbZAQ3yAhJsRGaQchyxqhyzhC5zLyczLzU4JIioJrMYjubErtAEwW1xNwCa4XE0pTMEht9oD/0QTIIJ+gj+Usf4V0AUEsBAhQAFAAAAAgAAAAAAMEKEOqBAAAApAAAAA8AAAAAAAAAAAAAAAAAAAAAAHhsL3dvcmtib29rLnhtbFBLAQIUABQAAAAIAAAAAAAHfok7RgAAAGgAAAAaAAAAAAAAAAAAAAAAAK4AAAB4bC9fcmVscy93b3JrYm9vay54bWwucmVsc1BLAQIUABQAAAAIAAAAAACCf1VtQAAAAGwAAAAUAAAAAAAAAAAAAAAAACwBAAB4bC9zaGFyZWRTdHJpbmdzLnhtbFBLAQIUABQAAAAIAAAAAAA2I5L5hAAAAPoAAAAYAAAAAAAAAAAAAAAAAJ4BAAB4bC93b3Jrc2hlZXRzL3NoZWV0MS54bWxQSwUGAAAAAAQABAANAQAAWAIAAAAA";
|
|
24
|
+
export declare const XLSX_SIMPLE: Uint8Array;
|
|
25
|
+
export declare const ZIP_MAGIC: Uint8Array;
|
|
26
|
+
export declare const GZIP_MAGIC: Uint8Array;
|
|
27
|
+
export declare const PNG_MAGIC: Uint8Array;
|
|
28
|
+
//# sourceMappingURL=parser-test-fixtures.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser-test-fixtures.d.ts","sourceRoot":"","sources":["../../src/parsers/parser-test-fixtures.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,+BAA+B,CAAC;AAGhE,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AAEvD,eAAO,MAAM,mBAAmB,EAAoB,UAAU,CAAC;AAE/D,wBAAgB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,UAAU,CAE/C;AAED,wBAAgB,iBAAiB,CAC/B,IAAI,EAAE,MAAM,EACZ,SAAS,GAAE,OAAO,CAAC,oBAAoB,CAAM,GAC5C,oBAAoB,CAUtB;AAED,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,UAAU,EACjB,SAAS,GAAE,OAAO,CAAC,oBAAoB,CAAM,GAC5C,oBAAoB,CAUtB;AAID,eAAO,MAAM,UAAU,4CAA4C,CAAC;AAEpE,eAAO,MAAM,YAAY,QAiBb,CAAC;AAEb,eAAO,MAAM,SAAS,qDAA6C,CAAC;AAEpE,eAAO,MAAM,WAAW,QAMtB,CAAC;AAEH,eAAO,MAAM,UAAU,0BAA0B,CAAC;AAGlD,eAAO,MAAM,UAAU,yCAAiC,CAAC;AAEzD,eAAO,MAAM,UAAU,uBAAuB,CAAC;AAE/C,eAAO,MAAM,aAAa,QAUd,CAAC;AAEb,eAAO,MAAM,cAAc,QAQf,CAAC;AAGb,eAAO,MAAM,SAAS,EAAE,UAAiD,CAAC;AAE1E,eAAO,MAAM,cAAc,EAAE,UAqB5B,CAAC;AAEF,eAAO,MAAM,iBAAiB,EAAE,UAmB/B,CAAC;AAEF,eAAO,MAAM,kBAAkB,qzCACqxC,CAAC;AAErzC,eAAO,MAAM,WAAW,EAAE,UAAuE,CAAC;AAElG,eAAO,MAAM,yBAAyB,6uDACssD,CAAC;AAE7uD,eAAO,MAAM,kBAAkB,EAAE,UAEhC,CAAC;AAEF,eAAO,MAAM,kBAAkB,yqCACyoC,CAAC;AAEzqC,eAAO,MAAM,WAAW,EAAE,UAAuE,CAAC;AAGlG,eAAO,MAAM,SAAS,EAAE,UAEtB,CAAC;AAGH,eAAO,MAAM,UAAU,EAAE,UAAqD,CAAC;AAG/E,eAAO,MAAM,SAAS,EAAE,UAEtB,CAAC"}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
// Synthetic byte fixtures for the parser adapters (Epic #189, Issue #266). Pure strings
|
|
2
|
+
// only — no FS, no shell, no network. Kept in a `*-fixtures.ts` (NOT `_support`) file so the
|
|
3
|
+
// architecture rules treat it as production source but the trust-8 rule still excludes it
|
|
4
|
+
// from publication when callers stick to the `parsers/index.ts` barrel.
|
|
5
|
+
import { Buffer } from "node:buffer";
|
|
6
|
+
export const FIXTURE_DOCUMENT_ID = "doc-fixture";
|
|
7
|
+
export function encode(text) {
|
|
8
|
+
return new TextEncoder().encode(text);
|
|
9
|
+
}
|
|
10
|
+
export function selectionFromText(text, overrides = {}) {
|
|
11
|
+
const base = {
|
|
12
|
+
documentId: FIXTURE_DOCUMENT_ID,
|
|
13
|
+
bytes: encode(text),
|
|
14
|
+
extension: overrides.extension ?? "txt",
|
|
15
|
+
mediaType: overrides.mediaType ?? "text/plain",
|
|
16
|
+
};
|
|
17
|
+
return overrides.languageHint !== undefined
|
|
18
|
+
? { ...base, languageHint: overrides.languageHint }
|
|
19
|
+
: base;
|
|
20
|
+
}
|
|
21
|
+
export function selectionFromBytes(bytes, overrides = {}) {
|
|
22
|
+
const base = {
|
|
23
|
+
documentId: FIXTURE_DOCUMENT_ID,
|
|
24
|
+
bytes,
|
|
25
|
+
extension: overrides.extension ?? "",
|
|
26
|
+
mediaType: overrides.mediaType ?? "",
|
|
27
|
+
};
|
|
28
|
+
return overrides.languageHint !== undefined
|
|
29
|
+
? { ...base, languageHint: overrides.languageHint }
|
|
30
|
+
: base;
|
|
31
|
+
}
|
|
32
|
+
// ─── Format fixtures ─────────────────────────────────────────────────────────
|
|
33
|
+
export const TEXT_PLAIN = "Hello, world.\nSecond paragraph here.\n";
|
|
34
|
+
export const MARKDOWN_DOC = [
|
|
35
|
+
"# Title",
|
|
36
|
+
"",
|
|
37
|
+
"Intro paragraph.",
|
|
38
|
+
"",
|
|
39
|
+
"## Subhead A",
|
|
40
|
+
"",
|
|
41
|
+
"Body of A.",
|
|
42
|
+
"",
|
|
43
|
+
"### Deep heading",
|
|
44
|
+
"",
|
|
45
|
+
"Deep body.",
|
|
46
|
+
"",
|
|
47
|
+
"## Subhead B",
|
|
48
|
+
"",
|
|
49
|
+
"Body of B.",
|
|
50
|
+
"",
|
|
51
|
+
].join("\n");
|
|
52
|
+
export const JSON_FLAT = '{"name":"alpha","count":3,"active":true}';
|
|
53
|
+
export const JSON_NESTED = JSON.stringify({
|
|
54
|
+
meta: { id: "doc-1", version: 2 },
|
|
55
|
+
items: [
|
|
56
|
+
{ sku: "A1", price: 10 },
|
|
57
|
+
{ sku: "B2", price: 20 },
|
|
58
|
+
],
|
|
59
|
+
});
|
|
60
|
+
export const CSV_SIMPLE = "a,b,c\n1,2,3\n4,5,6\n";
|
|
61
|
+
// RFC 4180 adversarial fixture — quoted comma, embedded quote escape, embedded newline.
|
|
62
|
+
export const CSV_QUOTED = 'a,b,c\n"x,1","y""2","z\n3"\n';
|
|
63
|
+
export const TSV_SIMPLE = "a\tb\tc\n1\t2\t3\n";
|
|
64
|
+
export const HTML_HEADINGS = [
|
|
65
|
+
"<!DOCTYPE html>",
|
|
66
|
+
"<html><body>",
|
|
67
|
+
"<h1>Top</h1>",
|
|
68
|
+
"<p>Intro text.</p>",
|
|
69
|
+
"<h2>Sub</h2>",
|
|
70
|
+
"<p>Sub body.</p>",
|
|
71
|
+
"<h3>Deeper</h3>",
|
|
72
|
+
"<p>Deeper body.</p>",
|
|
73
|
+
"</body></html>",
|
|
74
|
+
].join("\n");
|
|
75
|
+
export const HTML_DANGEROUS = [
|
|
76
|
+
"<html><body>",
|
|
77
|
+
"<h1>Safe</h1>",
|
|
78
|
+
"<script>alert('pwn');</script>",
|
|
79
|
+
"<style>body{display:none}</style>",
|
|
80
|
+
"<noscript>fallback</noscript>",
|
|
81
|
+
"<p>After script.</p>",
|
|
82
|
+
"</body></html>",
|
|
83
|
+
].join("\n");
|
|
84
|
+
// Synthetic PDF marker — only the leading magic bytes are required for sniffing.
|
|
85
|
+
export const PDF_MAGIC = encode("%PDF-1.4\n%binary marker\n");
|
|
86
|
+
export const PDF_TEXT_LAYER = encode(`%PDF-1.4
|
|
87
|
+
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
|
|
88
|
+
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
|
|
89
|
+
3 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 300 144] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >> endobj
|
|
90
|
+
4 0 obj << /Length 44 >> stream
|
|
91
|
+
BT /F1 24 Tf 72 72 Td (Hello PDF) Tj ET
|
|
92
|
+
endstream endobj
|
|
93
|
+
5 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> endobj
|
|
94
|
+
xref
|
|
95
|
+
0 6
|
|
96
|
+
0000000000 65535 f
|
|
97
|
+
0000000010 00000 n
|
|
98
|
+
0000000063 00000 n
|
|
99
|
+
0000000122 00000 n
|
|
100
|
+
0000000248 00000 n
|
|
101
|
+
0000000342 00000 n
|
|
102
|
+
trailer << /Root 1 0 R /Size 6 >>
|
|
103
|
+
startxref
|
|
104
|
+
412
|
|
105
|
+
%%EOF`);
|
|
106
|
+
export const PDF_NO_TEXT_LAYER = encode(`%PDF-1.4
|
|
107
|
+
1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj
|
|
108
|
+
2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj
|
|
109
|
+
3 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 300 144] /Contents 4 0 R >> endobj
|
|
110
|
+
4 0 obj << /Length 0 >> stream
|
|
111
|
+
|
|
112
|
+
endstream endobj
|
|
113
|
+
xref
|
|
114
|
+
0 5
|
|
115
|
+
0000000000 65535 f
|
|
116
|
+
0000000010 00000 n
|
|
117
|
+
0000000063 00000 n
|
|
118
|
+
0000000122 00000 n
|
|
119
|
+
0000000210 00000 n
|
|
120
|
+
trailer << /Root 1 0 R /Size 5 >>
|
|
121
|
+
startxref
|
|
122
|
+
260
|
|
123
|
+
%%EOF`);
|
|
124
|
+
export const DOCX_SIMPLE_BASE64 = "UEsDBBQAAAAIAC28xFzXeYTq8QAAALgBAAATAAAAW0NvbnRlbnRfVHlwZXNdLnhtbH2QzU7DMBCE730Ky9cqccoBIZSkB36OwKE8wMreJFb9J69b2rdn00KREOVozXwz62nXB+/EHjPZGDq5qhspMOhobBg7+b55ru6koALBgIsBO3lEkut+0W6OCUkwHKiTUynpXinSE3qgOiYMrAwxeyj8zKNKoLcworppmlulYygYSlXmDNkvhGgfcYCdK+LpwMr5loyOpHg4e+e6TkJKzmoorKt9ML+Kqq+SmsmThyabaMkGqa6VzOL1jh/0lSfK1qB4g1xewLNRfcRslIl65xmu/0/649o4DFbjhZ/TUo4aiXh77+qL4sGG71+06jR8/wlQSwMEFAAAAAgALbzEXCAbhuqyAAAALgEAAAsAAABfcmVscy8ucmVsc43Puw6CMBQG4J2naM4uBQdjDIXFmLAafICmPZRGeklbL7y9HRzEODie23fyN93TzOSOIWpnGdRlBQStcFJbxeAynDZ7IDFxK/nsLDJYMELXFs0ZZ57yTZy0jyQjNjKYUvIHSqOY0PBYOo82T0YXDE+5DIp6Lq5cId1W1Y6GTwPagpAVS3rJIPSyBjIsHv/h3ThqgUcnbgZt+vHlayPLPChMDB4uSCrf7TKzQHNKuorZvgBQSwMEFAAAAAgALbzEXGJ/vc/pAAAA5wEAABEAAAB3b3JkL2RvY3VtZW50LnhtbJWRwU7DMAyG73uKKPc13Q4IVU2mgTRxnAQ8QEjNWimxoySs9O1JWtAmDhOc8lv+f/uT0+4+nWVnCHEglHxT1ZwBGuoGPEn++nJY33MWk8ZOW0KQfILId2rVjk1H5sMBJpYnYGxGyfuUfCNEND04HSvygLn3TsHplMtwEiOFzgcyEGNe4KzY1vWdcHpArlaM5alv1E1FzoVf1KKPQZXnOU0W2NictZX8CXQh3XChWrF4LonZn9SR7GCm0k6z6dsy+682XQJ763vNCkh1nfod+Bva9hbaI2EKZOM/4B4g3WIrYjlhUT9fpL4AUEsBAhQDFAAAAAgALbzEXNd5hOrxAAAAuAEAABMAAAAAAAAAAAAAAIABAAAAAFtDb250ZW50X1R5cGVzXS54bWxQSwECFAMUAAAACAAtvMRcIBuG6rIAAAAuAQAACwAAAAAAAAAAAAAAgAEiAQAAX3JlbHMvLnJlbHNQSwECFAMUAAAACAAtvMRcYn+9z+kAAADnAQAAEQAAAAAAAAAAAAAAgAH9AQAAd29yZC9kb2N1bWVudC54bWxQSwUGAAAAAAMAAwC5AAAAFQMAAAAA";
|
|
125
|
+
export const DOCX_SIMPLE = Uint8Array.from(Buffer.from(DOCX_SIMPLE_BASE64, "base64"));
|
|
126
|
+
export const DOCX_WITH_PREAMBLE_BASE64 = "UEsDBBQAAAAIANACxVxn4XsRxAAAAHcBAAATABwAW0NvbnRlbnRfVHlwZXNdLnhtbFVUCQADp/ohaqf6IWp1eAsAAQT1AQAABBQAAAB9kLkOwjAMhneeosqKqBEDA6IswAoMvICVum1ELsXmentSrgEBo/0fn+X5/hqJi4uznivVicQZAOuOHHIZIvmsNCE5lDymFiLqA7YEk/F4Cjp4IS8j6TvUYr6iBo9WivUlr9kEX6lEllWxfBh7VqUwRms0Stbh5OsPyuhJKHPy7uHORB5mg4KvhF75DXjmtidKydRU7DDJBl12wTmkGuqgjy4ny/81X+4MTWM0vfN9W0xBE7PxrbPlW3Fo/Ot+uL97MbgBUEsDBAoAAAAAANACxVwAAAAAAAAAAAAAAAAGABwAX3JlbHMvVVQJAAOn+iFqp/ohanV4CwABBPUBAAAEFAAAAFBLAwQUAAAACADQAsVcJnghCIsAAADzAAAACwAcAF9yZWxzLy5yZWxzVVQJAAOn+iFqp/ohanV4CwABBPUBAAAEFAAAAI3PPQ4CIRAF4N5TEA6ws1pYGKCy2XbjBQjMAnH5yYBRby+FxWosLGfe5HsZMeOqW8ip+lAqe8Q1Vcl9a+UEUI3HqOuQC6aeLJmibn0kB0Wbq3YIh3E8Am0NrsTWZJOVnCa75+zyLPiPnZclGDxnc4uY2o+Kr4sua3LYJL9nsmDf66GzHJSAjxfV7gVQSwMECgAAAAAA0ALFXAAAAAAAAAAAAAAAAAUAHAB3b3JkL1VUCQADp/ohaqf6IWp1eAsAAQT1AQAABBQAAABQSwMEFAAAAAgA0ALFXNON7KHUAAAAWAEAABEAHAB3b3JkL2RvY3VtZW50LnhtbFVUCQADp/ohaqf6IWp1eAsAAQT1AQAABBQAAAB1kE1PwzAMhu/7FZHvLN0OCFVtd0PsNgn4ASYxbaUkjpyw0n9PsvEhJLi8Vuwnrz+6w7t36kySZg497LYNKAqG7RzGHp6f7m/uQKWMwaLjQD2slOAwbLqltWzePIWsikNI7dLDlHNstU5mIo9py5FCqb2yeMzlKaNeWGwUNpRSaeCd3jfNrfY4BxiK5QvbtcZYRark4RiysIooOArGqdM1WVUuGr/5eLr8iI95daSW9oyuhwfCusoO9JW+Mp/WJ3azWf93/IWpOtwfrP4aW/+cZNh8AFBLAQIeAxQAAAAIANACxVxn4XsRxAAAAHcBAAATABgAAAAAAAEAAACkgQAAAABbQ29udGVudF9UeXBlc10ueG1sVVQFAAOn+iFqdXgLAAEE9QEAAAQUAAAAUEsBAh4DCgAAAAAA0ALFXAAAAAAAAAAAAAAAAAYAGAAAAAAAAAAQAO1BEQEAAF9yZWxzL1VUBQADp/ohanV4CwABBPUBAAAEFAAAAFBLAQIeAxQAAAAIANACxVwmeCEIiwAAAPMAAAALABgAAAAAAAEAAACkgVEBAABfcmVscy8ucmVsc1VUBQADp/ohanV4CwABBPUBAAAEFAAAAFBLAQIeAwoAAAAAANACxVwAAAAAAAAAAAAAAAAFABgAAAAAAAAAEADtQSECAAB3b3JkL1VUBQADp/ohanV4CwABBPUBAAAEFAAAAFBLAQIeAxQAAAAIANACxVzTjeyh1AAAAFgBAAARABgAAAAAAAEAAACkgWACAAB3b3JkL2RvY3VtZW50LnhtbFVUBQADp/ohanV4CwABBPUBAAAEFAAAAFBLBQYAAAAABQAFAJgBAAB/AwAAAAA=";
|
|
127
|
+
export const DOCX_WITH_PREAMBLE = Uint8Array.from(Buffer.from(DOCX_WITH_PREAMBLE_BASE64, "base64"));
|
|
128
|
+
export const XLSX_SIMPLE_BASE64 = "UEsDBBQAAAAIAAAAAADBChDqgQAAAKQAAAAPAAAAeGwvd29ya2Jvb2sueG1sNY7BDoMwDEN/pcoHLLDDDohy2S58RgfpWkEblARtnz80jZPtZ1ly/2ZZnsyL+5S1aicektnWIeqUqAS98Eb16CJLCXZEeSHHmCd68LQXqobXprmh0Bosc9WUN4Wh10Rk+ldXQyEPd64mvCq4Hx1nDy046fJhZJxbwKHHc4jns+ELUEsDBBQAAAAIAAAAAAAHfok7RgAAAGgAAAAaAAAAeGwvX3JlbHMvd29ya2Jvb2sueG1sLnJlbHOzCUrNSSzJzM8rzsgsKLazQeYqeKbYKhV5phgqKYRUFqTaKpXnF2UXZ6SmlgAFEovSU0uQhIr1wZShXkVujpK+nY0+qsEAUEsDBBQAAAAIAAAAAACCf1VtQAAAAGwAAAAUAAAAeGwvc2hhcmVkU3RyaW5ncy54bWyzKS4usbMpzrSzKbHzTq200Qfy9EFciFBYYk5pKrqgc35eSVF+jq6hObqMa15yUWVBiUJSYnJ2aUExQlofZA0AUEsDBBQAAAAIAAAAAAA2I5L5hAAAAPoAAAAYAAAAeGwvd29ya3NoZWV0cy9zaGVldDEueG1ssynPL8ouzkhNLbGzAVMuiSWJdjZF+eUKRbZKhkp2NskghqOhkkKJrVIxkF9mZ2CjX2Zno58MlXNCljOEy+kDzYAbZAQ3yAhJsRGaQchyxqhyzhC5zLyczLzU4JIioJrMYjubErtAEwW1xNwCa4XE0pTMEht9oD/0QTIIJ+gj+Usf4V0AUEsBAhQAFAAAAAgAAAAAAMEKEOqBAAAApAAAAA8AAAAAAAAAAAAAAAAAAAAAAHhsL3dvcmtib29rLnhtbFBLAQIUABQAAAAIAAAAAAAHfok7RgAAAGgAAAAaAAAAAAAAAAAAAAAAAK4AAAB4bC9fcmVscy93b3JrYm9vay54bWwucmVsc1BLAQIUABQAAAAIAAAAAACCf1VtQAAAAGwAAAAUAAAAAAAAAAAAAAAAACwBAAB4bC9zaGFyZWRTdHJpbmdzLnhtbFBLAQIUABQAAAAIAAAAAAA2I5L5hAAAAPoAAAAYAAAAAAAAAAAAAAAAAJ4BAAB4bC93b3Jrc2hlZXRzL3NoZWV0MS54bWxQSwUGAAAAAAQABAANAQAAWAIAAAAA";
|
|
129
|
+
export const XLSX_SIMPLE = Uint8Array.from(Buffer.from(XLSX_SIMPLE_BASE64, "base64"));
|
|
130
|
+
// Synthetic zip marker — PK\x03\x04 prefix. We never decompress this.
|
|
131
|
+
export const ZIP_MAGIC = new Uint8Array([
|
|
132
|
+
0x50, 0x4b, 0x03, 0x04, 0x14, 0x00, 0x00, 0x00,
|
|
133
|
+
]);
|
|
134
|
+
// Synthetic gzip marker — 0x1f 0x8b prefix.
|
|
135
|
+
export const GZIP_MAGIC = new Uint8Array([0x1f, 0x8b, 0x08, 0x00]);
|
|
136
|
+
// Synthetic PNG marker.
|
|
137
|
+
export const PNG_MAGIC = new Uint8Array([
|
|
138
|
+
0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a,
|
|
139
|
+
]);
|