@oscharko-dev/keiko-local-knowledge 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -0
- package/dist/bounded-document-extraction.d.ts +27 -0
- package/dist/bounded-document-extraction.d.ts.map +1 -0
- package/dist/bounded-document-extraction.js +214 -0
- package/dist/capsule-lifecycle.d.ts +33 -0
- package/dist/capsule-lifecycle.d.ts.map +1 -0
- package/dist/capsule-lifecycle.js +292 -0
- package/dist/capsule-set-lifecycle.d.ts +15 -0
- package/dist/capsule-set-lifecycle.d.ts.map +1 -0
- package/dist/capsule-set-lifecycle.js +158 -0
- package/dist/chunking/chunker-persist.d.ts +36 -0
- package/dist/chunking/chunker-persist.d.ts.map +1 -0
- package/dist/chunking/chunker-persist.js +74 -0
- package/dist/chunking/chunker-runner.d.ts +9 -0
- package/dist/chunking/chunker-runner.d.ts.map +1 -0
- package/dist/chunking/chunker-runner.js +218 -0
- package/dist/chunking/chunker.d.ts +7 -0
- package/dist/chunking/chunker.d.ts.map +1 -0
- package/dist/chunking/chunker.js +139 -0
- package/dist/chunking/citation-mapper.d.ts +4 -0
- package/dist/chunking/citation-mapper.d.ts.map +1 -0
- package/dist/chunking/citation-mapper.js +180 -0
- package/dist/chunking/index.d.ts +6 -0
- package/dist/chunking/index.d.ts.map +1 -0
- package/dist/chunking/index.js +8 -0
- package/dist/chunking/token-estimator.d.ts +3 -0
- package/dist/chunking/token-estimator.d.ts.map +1 -0
- package/dist/chunking/token-estimator.js +26 -0
- package/dist/chunking/types.d.ts +49 -0
- package/dist/chunking/types.d.ts.map +1 -0
- package/dist/chunking/types.js +26 -0
- package/dist/composition.d.ts +57 -0
- package/dist/composition.d.ts.map +1 -0
- package/dist/composition.js +310 -0
- package/dist/conversation/citation-attacher.d.ts +8 -0
- package/dist/conversation/citation-attacher.d.ts.map +1 -0
- package/dist/conversation/citation-attacher.js +55 -0
- package/dist/conversation/citation-excerpts.d.ts +4 -0
- package/dist/conversation/citation-excerpts.d.ts.map +1 -0
- package/dist/conversation/citation-excerpts.js +41 -0
- package/dist/conversation/grounded-answer-runner.d.ts +9 -0
- package/dist/conversation/grounded-answer-runner.d.ts.map +1 -0
- package/dist/conversation/grounded-answer-runner.js +61 -0
- package/dist/conversation/index.d.ts +5 -0
- package/dist/conversation/index.d.ts.map +1 -0
- package/dist/conversation/index.js +7 -0
- package/dist/conversation/model-gateway-answer-generator.d.ts +28 -0
- package/dist/conversation/model-gateway-answer-generator.d.ts.map +1 -0
- package/dist/conversation/model-gateway-answer-generator.js +105 -0
- package/dist/conversation/types.d.ts +35 -0
- package/dist/conversation/types.d.ts.map +1 -0
- package/dist/conversation/types.js +24 -0
- package/dist/discovery/discovery-runner.d.ts +23 -0
- package/dist/discovery/discovery-runner.d.ts.map +1 -0
- package/dist/discovery/discovery-runner.js +109 -0
- package/dist/discovery/extract-progressive.d.ts +17 -0
- package/dist/discovery/extract-progressive.d.ts.map +1 -0
- package/dist/discovery/extract-progressive.js +522 -0
- package/dist/discovery/extract.d.ts +26 -0
- package/dist/discovery/extract.d.ts.map +1 -0
- package/dist/discovery/extract.js +906 -0
- package/dist/discovery/glob.d.ts +10 -0
- package/dist/discovery/glob.d.ts.map +1 -0
- package/dist/discovery/glob.js +72 -0
- package/dist/discovery/index.d.ts +6 -0
- package/dist/discovery/index.d.ts.map +1 -0
- package/dist/discovery/index.js +8 -0
- package/dist/discovery/media-type.d.ts +4 -0
- package/dist/discovery/media-type.d.ts.map +1 -0
- package/dist/discovery/media-type.js +62 -0
- package/dist/discovery/persist.d.ts +63 -0
- package/dist/discovery/persist.d.ts.map +1 -0
- package/dist/discovery/persist.js +345 -0
- package/dist/discovery/test-support.d.ts +16 -0
- package/dist/discovery/test-support.d.ts.map +1 -0
- package/dist/discovery/test-support.js +127 -0
- package/dist/discovery/types.d.ts +63 -0
- package/dist/discovery/types.d.ts.map +1 -0
- package/dist/discovery/types.js +28 -0
- package/dist/discovery/walk.d.ts +12 -0
- package/dist/discovery/walk.d.ts.map +1 -0
- package/dist/discovery/walk.js +302 -0
- package/dist/errors.d.ts +13 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +22 -0
- package/dist/evaluations/dimensions.d.ts +14 -0
- package/dist/evaluations/dimensions.d.ts.map +1 -0
- package/dist/evaluations/dimensions.js +191 -0
- package/dist/evaluations/fixtures.d.ts +18 -0
- package/dist/evaluations/fixtures.d.ts.map +1 -0
- package/dist/evaluations/fixtures.js +858 -0
- package/dist/evaluations/index.d.ts +7 -0
- package/dist/evaluations/index.d.ts.map +1 -0
- package/dist/evaluations/index.js +10 -0
- package/dist/evaluations/report.d.ts +3 -0
- package/dist/evaluations/report.d.ts.map +1 -0
- package/dist/evaluations/report.js +31 -0
- package/dist/evaluations/runner-seed.d.ts +12 -0
- package/dist/evaluations/runner-seed.d.ts.map +1 -0
- package/dist/evaluations/runner-seed.js +175 -0
- package/dist/evaluations/runner.d.ts +8 -0
- package/dist/evaluations/runner.d.ts.map +1 -0
- package/dist/evaluations/runner.js +205 -0
- package/dist/evaluations/scripted-embedding-adapter.d.ts +13 -0
- package/dist/evaluations/scripted-embedding-adapter.d.ts.map +1 -0
- package/dist/evaluations/scripted-embedding-adapter.js +163 -0
- package/dist/evaluations/types.d.ts +116 -0
- package/dist/evaluations/types.d.ts.map +1 -0
- package/dist/evaluations/types.js +27 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +41 -0
- package/dist/indexing/bounded-indexing.d.ts +41 -0
- package/dist/indexing/bounded-indexing.d.ts.map +1 -0
- package/dist/indexing/bounded-indexing.js +240 -0
- package/dist/indexing/checkpoint-persist.d.ts +8 -0
- package/dist/indexing/checkpoint-persist.d.ts.map +1 -0
- package/dist/indexing/checkpoint-persist.js +135 -0
- package/dist/indexing/checkpoint-resume.d.ts +20 -0
- package/dist/indexing/checkpoint-resume.d.ts.map +1 -0
- package/dist/indexing/checkpoint-resume.js +50 -0
- package/dist/indexing/embedding-batcher.d.ts +3 -0
- package/dist/indexing/embedding-batcher.d.ts.map +1 -0
- package/dist/indexing/embedding-batcher.js +390 -0
- package/dist/indexing/index.d.ts +7 -0
- package/dist/indexing/index.d.ts.map +1 -0
- package/dist/indexing/index.js +11 -0
- package/dist/indexing/job-persist.d.ts +46 -0
- package/dist/indexing/job-persist.d.ts.map +1 -0
- package/dist/indexing/job-persist.js +157 -0
- package/dist/indexing/job-resume.d.ts +4 -0
- package/dist/indexing/job-resume.d.ts.map +1 -0
- package/dist/indexing/job-resume.js +14 -0
- package/dist/indexing/orchestrator.d.ts +3 -0
- package/dist/indexing/orchestrator.d.ts.map +1 -0
- package/dist/indexing/orchestrator.js +1151 -0
- package/dist/indexing/types.d.ts +156 -0
- package/dist/indexing/types.d.ts.map +1 -0
- package/dist/indexing/types.js +30 -0
- package/dist/indexing/vector-persist.d.ts +32 -0
- package/dist/indexing/vector-persist.d.ts.map +1 -0
- package/dist/indexing/vector-persist.js +105 -0
- package/dist/parsers/_internal.d.ts +20 -0
- package/dist/parsers/_internal.d.ts.map +1 -0
- package/dist/parsers/_internal.js +122 -0
- package/dist/parsers/csv-parser.d.ts +3 -0
- package/dist/parsers/csv-parser.d.ts.map +1 -0
- package/dist/parsers/csv-parser.js +202 -0
- package/dist/parsers/docx-parser.d.ts +3 -0
- package/dist/parsers/docx-parser.d.ts.map +1 -0
- package/dist/parsers/docx-parser.js +390 -0
- package/dist/parsers/html-parser.d.ts +3 -0
- package/dist/parsers/html-parser.d.ts.map +1 -0
- package/dist/parsers/html-parser.js +310 -0
- package/dist/parsers/index.d.ts +15 -0
- package/dist/parsers/index.d.ts.map +1 -0
- package/dist/parsers/index.js +41 -0
- package/dist/parsers/json-parser.d.ts +3 -0
- package/dist/parsers/json-parser.d.ts.map +1 -0
- package/dist/parsers/json-parser.js +192 -0
- package/dist/parsers/large-document/capability-discovery.d.ts +27 -0
- package/dist/parsers/large-document/capability-discovery.d.ts.map +1 -0
- package/dist/parsers/large-document/capability-discovery.js +76 -0
- package/dist/parsers/large-document/diagnostics.d.ts +3 -0
- package/dist/parsers/large-document/diagnostics.d.ts.map +1 -0
- package/dist/parsers/large-document/diagnostics.js +11 -0
- package/dist/parsers/large-document/index.d.ts +15 -0
- package/dist/parsers/large-document/index.d.ts.map +1 -0
- package/dist/parsers/large-document/index.js +10 -0
- package/dist/parsers/large-document/legacy-format.d.ts +5 -0
- package/dist/parsers/large-document/legacy-format.d.ts.map +1 -0
- package/dist/parsers/large-document/legacy-format.js +25 -0
- package/dist/parsers/large-document/preflight.d.ts +9 -0
- package/dist/parsers/large-document/preflight.d.ts.map +1 -0
- package/dist/parsers/large-document/preflight.js +43 -0
- package/dist/parsers/large-document/progressive-extraction.d.ts +55 -0
- package/dist/parsers/large-document/progressive-extraction.d.ts.map +1 -0
- package/dist/parsers/large-document/progressive-extraction.js +123 -0
- package/dist/parsers/large-document/progressive-pdf.d.ts +20 -0
- package/dist/parsers/large-document/progressive-pdf.d.ts.map +1 -0
- package/dist/parsers/large-document/progressive-pdf.js +145 -0
- package/dist/parsers/large-document/synthetic-source.d.ts +9 -0
- package/dist/parsers/large-document/synthetic-source.d.ts.map +1 -0
- package/dist/parsers/large-document/synthetic-source.js +101 -0
- package/dist/parsers/large-document/window-builder.d.ts +24 -0
- package/dist/parsers/large-document/window-builder.d.ts.map +1 -0
- package/dist/parsers/large-document/window-builder.js +75 -0
- package/dist/parsers/ocr/index.d.ts +4 -0
- package/dist/parsers/ocr/index.d.ts.map +1 -0
- package/dist/parsers/ocr/index.js +4 -0
- package/dist/parsers/ocr/null-ocr-adapter.d.ts +3 -0
- package/dist/parsers/ocr/null-ocr-adapter.d.ts.map +1 -0
- package/dist/parsers/ocr/null-ocr-adapter.js +14 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.d.ts +8 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.d.ts.map +1 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.js +147 -0
- package/dist/parsers/ocr/types.d.ts +16 -0
- package/dist/parsers/ocr/types.d.ts.map +1 -0
- package/dist/parsers/ocr/types.js +4 -0
- package/dist/parsers/parser-test-fixtures.d.ts +28 -0
- package/dist/parsers/parser-test-fixtures.d.ts.map +1 -0
- package/dist/parsers/parser-test-fixtures.js +139 -0
- package/dist/parsers/pdf-parser.d.ts +43 -0
- package/dist/parsers/pdf-parser.d.ts.map +1 -0
- package/dist/parsers/pdf-parser.js +388 -0
- package/dist/parsers/registry.d.ts +8 -0
- package/dist/parsers/registry.d.ts.map +1 -0
- package/dist/parsers/registry.js +57 -0
- package/dist/parsers/text-parser.d.ts +3 -0
- package/dist/parsers/text-parser.d.ts.map +1 -0
- package/dist/parsers/text-parser.js +214 -0
- package/dist/parsers/types.d.ts +53 -0
- package/dist/parsers/types.d.ts.map +1 -0
- package/dist/parsers/types.js +21 -0
- package/dist/parsers/unsupported-parser.d.ts +4 -0
- package/dist/parsers/unsupported-parser.d.ts.map +1 -0
- package/dist/parsers/unsupported-parser.js +97 -0
- package/dist/parsers/xlsx-parser.d.ts +3 -0
- package/dist/parsers/xlsx-parser.d.ts.map +1 -0
- package/dist/parsers/xlsx-parser.js +425 -0
- package/dist/privacy/audit-emitter.d.ts +5 -0
- package/dist/privacy/audit-emitter.d.ts.map +1 -0
- package/dist/privacy/audit-emitter.js +93 -0
- package/dist/privacy/diagnostic-redactor.d.ts +2 -0
- package/dist/privacy/diagnostic-redactor.d.ts.map +1 -0
- package/dist/privacy/diagnostic-redactor.js +153 -0
- package/dist/privacy/index.d.ts +5 -0
- package/dist/privacy/index.d.ts.map +1 -0
- package/dist/privacy/index.js +6 -0
- package/dist/privacy/retention-applier.d.ts +5 -0
- package/dist/privacy/retention-applier.d.ts.map +1 -0
- package/dist/privacy/retention-applier.js +88 -0
- package/dist/privacy/types.d.ts +98 -0
- package/dist/privacy/types.d.ts.map +1 -0
- package/dist/privacy/types.js +12 -0
- package/dist/qualityIntelligence/capsuleCorpus.d.ts +27 -0
- package/dist/qualityIntelligence/capsuleCorpus.d.ts.map +1 -0
- package/dist/qualityIntelligence/capsuleCorpus.js +58 -0
- package/dist/qualityIntelligence/index.d.ts +3 -0
- package/dist/qualityIntelligence/index.d.ts.map +1 -0
- package/dist/qualityIntelligence/index.js +5 -0
- package/dist/qualityIntelligence/qiHandoff.d.ts +36 -0
- package/dist/qualityIntelligence/qiHandoff.d.ts.map +1 -0
- package/dist/qualityIntelligence/qiHandoff.js +82 -0
- package/dist/retrieval/answer-grounding.d.ts +9 -0
- package/dist/retrieval/answer-grounding.d.ts.map +1 -0
- package/dist/retrieval/answer-grounding.js +31 -0
- package/dist/retrieval/context-pack-assembler.d.ts +24 -0
- package/dist/retrieval/context-pack-assembler.d.ts.map +1 -0
- package/dist/retrieval/context-pack-assembler.js +50 -0
- package/dist/retrieval/index.d.ts +6 -0
- package/dist/retrieval/index.d.ts.map +1 -0
- package/dist/retrieval/index.js +9 -0
- package/dist/retrieval/retrieval-runner.d.ts +10 -0
- package/dist/retrieval/retrieval-runner.d.ts.map +1 -0
- package/dist/retrieval/retrieval-runner.js +163 -0
- package/dist/retrieval/scoped-vector-search.d.ts +24 -0
- package/dist/retrieval/scoped-vector-search.d.ts.map +1 -0
- package/dist/retrieval/scoped-vector-search.js +864 -0
- package/dist/retrieval/types.d.ts +28 -0
- package/dist/retrieval/types.d.ts.map +1 -0
- package/dist/retrieval/types.js +33 -0
- package/dist/section-path-hash.d.ts +3 -0
- package/dist/section-path-hash.d.ts.map +1 -0
- package/dist/section-path-hash.js +9 -0
- package/dist/source-lifecycle.d.ts +14 -0
- package/dist/source-lifecycle.d.ts.map +1 -0
- package/dist/source-lifecycle.js +155 -0
- package/dist/source-routing-validation.d.ts +11 -0
- package/dist/source-routing-validation.d.ts.map +1 -0
- package/dist/source-routing-validation.js +140 -0
- package/dist/store-content-cipher.d.ts +11 -0
- package/dist/store-content-cipher.d.ts.map +1 -0
- package/dist/store-content-cipher.js +67 -0
- package/dist/store-content-encryption.d.ts +12 -0
- package/dist/store-content-encryption.d.ts.map +1 -0
- package/dist/store-content-encryption.js +275 -0
- package/dist/store-paths.d.ts +6 -0
- package/dist/store-paths.d.ts.map +1 -0
- package/dist/store-paths.js +61 -0
- package/dist/store.d.ts +30 -0
- package/dist/store.d.ts.map +1 -0
- package/dist/store.js +219 -0
- package/dist/testing.d.ts +47 -0
- package/dist/testing.d.ts.map +1 -0
- package/dist/testing.js +170 -0
- package/dist/version.d.ts +2 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +4 -0
- package/package.json +43 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { PageRecord, ParsedUnit, ParserDiagnostic } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
import type { AsyncParserAdapter, ParserOptions, ParserSelectionInput } from "./types.js";
|
|
3
|
+
export interface PdfTextItem {
|
|
4
|
+
readonly str?: string;
|
|
5
|
+
}
|
|
6
|
+
export interface PdfTextContentChunk {
|
|
7
|
+
readonly items: readonly PdfTextItem[];
|
|
8
|
+
}
|
|
9
|
+
export interface PdfPageLike {
|
|
10
|
+
readonly streamTextContent: () => ReadableStream<PdfTextContentChunk>;
|
|
11
|
+
}
|
|
12
|
+
export interface PdfDocumentLike {
|
|
13
|
+
readonly numPages: number;
|
|
14
|
+
readonly getPage: (pageNumber: number) => Promise<PdfPageLike>;
|
|
15
|
+
}
|
|
16
|
+
export interface PdfDocumentSource {
|
|
17
|
+
readonly totalBytes: number;
|
|
18
|
+
readonly loadFullBuffer?: () => Promise<Uint8Array>;
|
|
19
|
+
readonly readWindow?: (startByte: number, length: number) => Promise<Uint8Array>;
|
|
20
|
+
}
|
|
21
|
+
export declare function loadPdfDocument(bytes: Uint8Array): Promise<PdfDocumentLike>;
|
|
22
|
+
export declare function loadPdfDocumentFromSource(source: PdfDocumentSource): Promise<PdfDocumentLike>;
|
|
23
|
+
export interface PageTextReadState {
|
|
24
|
+
readonly input: ParserSelectionInput;
|
|
25
|
+
readonly options: ParserOptions;
|
|
26
|
+
readonly startedAt: number;
|
|
27
|
+
readonly emittedUnits: number;
|
|
28
|
+
readonly scannedObjects: number;
|
|
29
|
+
}
|
|
30
|
+
export interface PageTextReadResult {
|
|
31
|
+
readonly text: string;
|
|
32
|
+
readonly scannedObjects: number;
|
|
33
|
+
readonly diagnostic?: ParserDiagnostic;
|
|
34
|
+
}
|
|
35
|
+
export declare function readPageText(page: PdfPageLike, state: PageTextReadState): Promise<PageTextReadResult>;
|
|
36
|
+
export declare function extractPages(doc: PdfDocumentLike, input: ParserSelectionInput, options: ParserOptions, startedAt: number): Promise<{
|
|
37
|
+
readonly diagnostics: readonly ParserDiagnostic[];
|
|
38
|
+
readonly pages: readonly PageRecord[];
|
|
39
|
+
readonly units: readonly ParsedUnit[];
|
|
40
|
+
readonly pageTexts: readonly string[];
|
|
41
|
+
}>;
|
|
42
|
+
export declare const pdfParser: AsyncParserAdapter;
|
|
43
|
+
//# sourceMappingURL=pdf-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/pdf-parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,UAAU,EACV,UAAU,EACV,gBAAgB,EAEjB,MAAM,+BAA+B,CAAC;AAUvC,OAAO,KAAK,EACV,kBAAkB,EAIlB,aAAa,EACb,oBAAoB,EACrB,MAAM,YAAY,CAAC;AAUpB,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,GAAG,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,KAAK,EAAE,SAAS,WAAW,EAAE,CAAC;CACxC;AAED,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,iBAAiB,EAAE,MAAM,cAAc,CAAC,mBAAmB,CAAC,CAAC;CACvE;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,OAAO,EAAE,CAAC,UAAU,EAAE,MAAM,KAAK,OAAO,CAAC,WAAW,CAAC,CAAC;CAChE;AAMD,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM,OAAO,CAAC,UAAU,CAAC,CAAC;IACpD,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;CAClF;AAmND,wBAAsB,eAAe,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,eAAe,CAAC,CAWjF;AAyBD,wBAAsB,yBAAyB,CAC7C,MAAM,EAAE,iBAAiB,GACxB,OAAO,CAAC,eAAe,CAAC,CAqB1B;AA4BD,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,KAAK,EAAE,oBAAoB,CAAC;IACrC,QAAQ,CAAC,OAAO,EAAE,aAAa,CAAC;IAChC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;CACjC;AAED,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,UAAU,CAAC,EAAE,gBAAgB,CAAC;CACxC;AA0CD,wBAAsB,YAAY,CAChC,IAAI,EAAE,WAAW,EACjB,KAAK,EAAE,iBAAiB,GACvB,OAAO,CAAC,kBAAkB,CAAC,CAgC7B;AAkDD,wBAAsB,YAAY,CAChC,GAAG,EAAE,eAAe,EACpB,KAAK,EAAE,oBAAoB,EAC3B,OAAO,EAAE,aAAa,EACtB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC;IACT,QAAQ,CAAC,WAAW,EAAE,SAAS,gBAAgB,EAAE,CAAC;IAClD,QAAQ,CAAC,KAAK,EAAE,SAAS,UAAU,EAAE,CAAC;IACtC,QAAQ,CAAC,KAAK,EAAE,SAAS,UAAU,EAAE,CAAC;IACtC,QAAQ,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;CACvC,CAAC,CAsCD;AA8DD,eAAO,MAAM,SAAS,EAAE,kBAKtB,CAAC"}
|
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
import { diagnostic, emptyResult, objectLimitDiagnostic, oversizeDiagnostic, parserIdentity, shouldStop, } from "./_internal.js";
|
|
2
|
+
const PARSER_ID = "pdf";
|
|
3
|
+
const PARSER_VERSION = "1";
|
|
4
|
+
const DEPENDENCY_VERSIONS = Object.freeze([
|
|
5
|
+
Object.freeze({ packageName: "pdfjs-dist", version: "6.0.227" }),
|
|
6
|
+
Object.freeze({ packageName: "@napi-rs/canvas", version: "1.0.0" }),
|
|
7
|
+
]);
|
|
8
|
+
const PDF_MAGIC = [0x25, 0x50, 0x44, 0x46];
|
|
9
|
+
function pdfMatrixValue(init, index, fallback) {
|
|
10
|
+
return init?.length === 6 ? (init[index] ?? fallback) : fallback;
|
|
11
|
+
}
|
|
12
|
+
class PdfTextDomMatrix {
|
|
13
|
+
a;
|
|
14
|
+
b;
|
|
15
|
+
c;
|
|
16
|
+
d;
|
|
17
|
+
e;
|
|
18
|
+
f;
|
|
19
|
+
is2D = true;
|
|
20
|
+
isIdentity;
|
|
21
|
+
m11;
|
|
22
|
+
m12 = 0;
|
|
23
|
+
m13 = 0;
|
|
24
|
+
m14 = 0;
|
|
25
|
+
m21 = 0;
|
|
26
|
+
m22;
|
|
27
|
+
m23 = 0;
|
|
28
|
+
m24 = 0;
|
|
29
|
+
m31 = 0;
|
|
30
|
+
m32 = 0;
|
|
31
|
+
m33 = 1;
|
|
32
|
+
m34 = 0;
|
|
33
|
+
m41;
|
|
34
|
+
m42;
|
|
35
|
+
m43 = 0;
|
|
36
|
+
m44 = 1;
|
|
37
|
+
constructor(init) {
|
|
38
|
+
this.a = pdfMatrixValue(init, 0, 1);
|
|
39
|
+
this.b = pdfMatrixValue(init, 1, 0);
|
|
40
|
+
this.c = pdfMatrixValue(init, 2, 0);
|
|
41
|
+
this.d = pdfMatrixValue(init, 3, 1);
|
|
42
|
+
this.e = pdfMatrixValue(init, 4, 0);
|
|
43
|
+
this.f = pdfMatrixValue(init, 5, 0);
|
|
44
|
+
this.m11 = this.a;
|
|
45
|
+
this.m22 = this.d;
|
|
46
|
+
this.m41 = this.e;
|
|
47
|
+
this.m42 = this.f;
|
|
48
|
+
this.isIdentity =
|
|
49
|
+
this.a === 1 && this.b === 0 && this.c === 0 && this.d === 1 && this.e === 0 && this.f === 0;
|
|
50
|
+
}
|
|
51
|
+
multiplySelf() {
|
|
52
|
+
return this;
|
|
53
|
+
}
|
|
54
|
+
preMultiplySelf() {
|
|
55
|
+
return this;
|
|
56
|
+
}
|
|
57
|
+
translateSelf() {
|
|
58
|
+
return this;
|
|
59
|
+
}
|
|
60
|
+
scaleSelf() {
|
|
61
|
+
return this;
|
|
62
|
+
}
|
|
63
|
+
rotateSelf() {
|
|
64
|
+
return this;
|
|
65
|
+
}
|
|
66
|
+
invertSelf() {
|
|
67
|
+
return this;
|
|
68
|
+
}
|
|
69
|
+
transformPoint(point = {}) {
|
|
70
|
+
return { x: point.x ?? 0, y: point.y ?? 0 };
|
|
71
|
+
}
|
|
72
|
+
toFloat32Array() {
|
|
73
|
+
return new Float32Array([
|
|
74
|
+
this.a,
|
|
75
|
+
this.b,
|
|
76
|
+
0,
|
|
77
|
+
0,
|
|
78
|
+
this.c,
|
|
79
|
+
this.d,
|
|
80
|
+
0,
|
|
81
|
+
0,
|
|
82
|
+
0,
|
|
83
|
+
0,
|
|
84
|
+
1,
|
|
85
|
+
0,
|
|
86
|
+
this.e,
|
|
87
|
+
this.f,
|
|
88
|
+
0,
|
|
89
|
+
1,
|
|
90
|
+
]);
|
|
91
|
+
}
|
|
92
|
+
toFloat64Array() {
|
|
93
|
+
return new Float64Array(this.toFloat32Array());
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
class PdfTextImageData {
|
|
97
|
+
data;
|
|
98
|
+
width;
|
|
99
|
+
height;
|
|
100
|
+
constructor(dataOrWidth, width, height) {
|
|
101
|
+
if (dataOrWidth instanceof Uint8ClampedArray) {
|
|
102
|
+
this.data = dataOrWidth;
|
|
103
|
+
this.width = width ?? 0;
|
|
104
|
+
this.height = height ?? 0;
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
this.width = dataOrWidth;
|
|
108
|
+
this.height = width ?? 0;
|
|
109
|
+
this.data = new Uint8ClampedArray(this.width * this.height * 4);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
function installPdfTextExtractionDomPolyfills() {
|
|
113
|
+
const target = globalThis;
|
|
114
|
+
target.DOMMatrix ??= PdfTextDomMatrix;
|
|
115
|
+
target.ImageData ??= PdfTextImageData;
|
|
116
|
+
target.Path2D ??= function PdfTextPath2D() {
|
|
117
|
+
return undefined;
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
function hasPdfMagic(bytes) {
|
|
121
|
+
if (bytes.length < PDF_MAGIC.length)
|
|
122
|
+
return false;
|
|
123
|
+
for (let i = 0; i < PDF_MAGIC.length; i += 1) {
|
|
124
|
+
if (bytes[i] !== PDF_MAGIC[i])
|
|
125
|
+
return false;
|
|
126
|
+
}
|
|
127
|
+
return true;
|
|
128
|
+
}
|
|
129
|
+
function isPdf(input) {
|
|
130
|
+
return (input.extension.toLowerCase() === "pdf" ||
|
|
131
|
+
input.mediaType.toLowerCase() === "application/pdf" ||
|
|
132
|
+
hasPdfMagic(input.bytes));
|
|
133
|
+
}
|
|
134
|
+
function cancelled(capability, input, options) {
|
|
135
|
+
return emptyResult(capability, input.documentId, options, [
|
|
136
|
+
diagnostic("PARSER_CANCELLED", "caller aborted parser", input.documentId, "info"),
|
|
137
|
+
]);
|
|
138
|
+
}
|
|
139
|
+
function isAborted(signal) {
|
|
140
|
+
return signal?.aborted === true;
|
|
141
|
+
}
|
|
142
|
+
function syncFallback(capability) {
|
|
143
|
+
return (input, options) => {
|
|
144
|
+
return emptyResult(capability, input.documentId, options, [
|
|
145
|
+
diagnostic("UNSUPPORTED_FORMAT", "pdf parser requires async caller; use parseAsync via discovery", input.documentId, "info"),
|
|
146
|
+
], [unsupportedMediaUnit(input.documentId, "pdf-async-required")]);
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
export async function loadPdfDocument(bytes) {
|
|
150
|
+
// pdfjs-dist imports browser geometry constructors even for text-layer extraction.
|
|
151
|
+
// Keiko does not render PDFs here, so minimal no-op constructors are sufficient.
|
|
152
|
+
installPdfTextExtractionDomPolyfills();
|
|
153
|
+
const pdfjs = (await import("pdfjs-dist/legacy/build/pdf.mjs"));
|
|
154
|
+
const task = pdfjs.getDocument({
|
|
155
|
+
data: bytes,
|
|
156
|
+
useWorkerFetch: false,
|
|
157
|
+
verbosity: 0,
|
|
158
|
+
});
|
|
159
|
+
return task.promise;
|
|
160
|
+
}
|
|
161
|
+
const PDF_RANGE_CHUNK_BYTES = 256 * 1024;
|
|
162
|
+
function createRangeTransport(Transport, source) {
|
|
163
|
+
class WorkspacePdfRangeTransport extends Transport {
|
|
164
|
+
requestDataRange(begin, end) {
|
|
165
|
+
const start = Math.max(0, Math.floor(begin));
|
|
166
|
+
const length = Math.max(0, Math.floor(end) - start);
|
|
167
|
+
void source
|
|
168
|
+
.readWindow(start, length)
|
|
169
|
+
.then((chunk) => {
|
|
170
|
+
this.onDataRange(start, chunk);
|
|
171
|
+
})
|
|
172
|
+
.catch(() => {
|
|
173
|
+
this.onDataRange(start, new Uint8Array(0));
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return new WorkspacePdfRangeTransport(source.totalBytes, new Uint8Array(0), false);
|
|
178
|
+
}
|
|
179
|
+
export async function loadPdfDocumentFromSource(source) {
|
|
180
|
+
if (source.loadFullBuffer !== undefined) {
|
|
181
|
+
return loadPdfDocument(await source.loadFullBuffer());
|
|
182
|
+
}
|
|
183
|
+
if (source.readWindow === undefined) {
|
|
184
|
+
throw new Error("pdf source does not support range reads");
|
|
185
|
+
}
|
|
186
|
+
installPdfTextExtractionDomPolyfills();
|
|
187
|
+
const pdfjs = (await import("pdfjs-dist/legacy/build/pdf.mjs"));
|
|
188
|
+
const task = pdfjs.getDocument({
|
|
189
|
+
range: createRangeTransport(pdfjs.PDFDataRangeTransport, {
|
|
190
|
+
totalBytes: source.totalBytes,
|
|
191
|
+
readWindow: source.readWindow,
|
|
192
|
+
}),
|
|
193
|
+
rangeChunkSize: PDF_RANGE_CHUNK_BYTES,
|
|
194
|
+
disableAutoFetch: true,
|
|
195
|
+
disableStream: true,
|
|
196
|
+
useWorkerFetch: false,
|
|
197
|
+
verbosity: 0,
|
|
198
|
+
});
|
|
199
|
+
return task.promise;
|
|
200
|
+
}
|
|
201
|
+
function unsupportedMediaUnit(documentId, reason) {
|
|
202
|
+
return { kind: "unsupported-media", documentId, reason };
|
|
203
|
+
}
|
|
204
|
+
function pageUnit(page) {
|
|
205
|
+
return page.pageLabel === undefined
|
|
206
|
+
? {
|
|
207
|
+
kind: "page",
|
|
208
|
+
documentId: page.documentId,
|
|
209
|
+
pageNumber: page.pageNumber,
|
|
210
|
+
characterStart: page.characterStart,
|
|
211
|
+
characterEnd: page.characterEnd,
|
|
212
|
+
}
|
|
213
|
+
: {
|
|
214
|
+
kind: "page",
|
|
215
|
+
documentId: page.documentId,
|
|
216
|
+
pageNumber: page.pageNumber,
|
|
217
|
+
pageLabel: page.pageLabel,
|
|
218
|
+
characterStart: page.characterStart,
|
|
219
|
+
characterEnd: page.characterEnd,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
function limitDiagnostic(input, limit) {
|
|
223
|
+
if (!limit.stop || limit.code === undefined || limit.message === undefined) {
|
|
224
|
+
return undefined;
|
|
225
|
+
}
|
|
226
|
+
return diagnostic(limit.code, limit.message, input.documentId, "info");
|
|
227
|
+
}
|
|
228
|
+
function pageTextStopDiagnostic(state) {
|
|
229
|
+
if (state.scannedObjects >= state.options.maxObjectsPerDocument) {
|
|
230
|
+
return objectLimitDiagnostic(state.input.documentId, state.options.maxObjectsPerDocument);
|
|
231
|
+
}
|
|
232
|
+
return limitDiagnostic(state.input, shouldStop(state.startedAt, state.options, state.emittedUnits));
|
|
233
|
+
}
|
|
234
|
+
function appendPdfTextItems(tokens, items, state) {
|
|
235
|
+
let next = state;
|
|
236
|
+
for (const item of items) {
|
|
237
|
+
const stopped = pageTextStopDiagnostic(next);
|
|
238
|
+
if (stopped !== undefined) {
|
|
239
|
+
return { state: next, diagnostic: stopped };
|
|
240
|
+
}
|
|
241
|
+
next = { ...next, scannedObjects: next.scannedObjects + 1 };
|
|
242
|
+
const value = item.str?.trim();
|
|
243
|
+
if (value !== undefined && value.length > 0) {
|
|
244
|
+
tokens.push(value);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
return { state: next };
|
|
248
|
+
}
|
|
249
|
+
export async function readPageText(page, state) {
|
|
250
|
+
const reader = page.streamTextContent().getReader();
|
|
251
|
+
const tokens = [];
|
|
252
|
+
let next = state;
|
|
253
|
+
try {
|
|
254
|
+
for (;;) {
|
|
255
|
+
const stopped = pageTextStopDiagnostic(next);
|
|
256
|
+
if (stopped !== undefined) {
|
|
257
|
+
return {
|
|
258
|
+
text: tokens.join(" ").trim(),
|
|
259
|
+
scannedObjects: next.scannedObjects,
|
|
260
|
+
diagnostic: stopped,
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
const read = await reader.read();
|
|
264
|
+
if (read.done) {
|
|
265
|
+
return { text: tokens.join(" ").trim(), scannedObjects: next.scannedObjects };
|
|
266
|
+
}
|
|
267
|
+
const chunk = read.value;
|
|
268
|
+
const appended = appendPdfTextItems(tokens, chunk.items, next);
|
|
269
|
+
next = appended.state;
|
|
270
|
+
if (appended.diagnostic !== undefined) {
|
|
271
|
+
return {
|
|
272
|
+
text: tokens.join(" ").trim(),
|
|
273
|
+
scannedObjects: next.scannedObjects,
|
|
274
|
+
diagnostic: appended.diagnostic,
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
finally {
|
|
280
|
+
reader.releaseLock();
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
function appendPageRecord(pages, units, input, pageNumber, text, cursor) {
|
|
284
|
+
const pageStart = cursor;
|
|
285
|
+
const pageEnd = cursor + text.length;
|
|
286
|
+
const pageLabel = String(pageNumber);
|
|
287
|
+
const pageRecord = {
|
|
288
|
+
documentId: input.documentId,
|
|
289
|
+
pageNumber,
|
|
290
|
+
pageLabel,
|
|
291
|
+
characterStart: pageStart,
|
|
292
|
+
characterEnd: pageEnd,
|
|
293
|
+
};
|
|
294
|
+
pages.push(pageRecord);
|
|
295
|
+
units.push(pageUnit(pageRecord));
|
|
296
|
+
return pageEnd + 2;
|
|
297
|
+
}
|
|
298
|
+
function noTextResult(capability, input, options, diagnostics = []) {
|
|
299
|
+
if (diagnostics.length > 0) {
|
|
300
|
+
return emptyResult(capability, input.documentId, options, diagnostics);
|
|
301
|
+
}
|
|
302
|
+
return emptyResult(capability, input.documentId, options, [
|
|
303
|
+
diagnostic("UNSUPPORTED_FORMAT", "pdf has no extractable text layer", input.documentId, "info"),
|
|
304
|
+
], [unsupportedMediaUnit(input.documentId, "pdf-no-text-layer")]);
|
|
305
|
+
}
|
|
306
|
+
export async function extractPages(doc, input, options, startedAt) {
|
|
307
|
+
const diagnostics = [];
|
|
308
|
+
const pages = [];
|
|
309
|
+
const units = [];
|
|
310
|
+
const pageTexts = [];
|
|
311
|
+
let cursor = 0;
|
|
312
|
+
let scannedObjects = 0;
|
|
313
|
+
for (let pageNumber = 1; pageNumber <= doc.numPages; pageNumber += 1) {
|
|
314
|
+
const limit = shouldStop(startedAt, options, units.length);
|
|
315
|
+
if (limit.stop) {
|
|
316
|
+
if (limit.code !== undefined && limit.message !== undefined) {
|
|
317
|
+
diagnostics.push(diagnostic(limit.code, limit.message, input.documentId, "info"));
|
|
318
|
+
}
|
|
319
|
+
break;
|
|
320
|
+
}
|
|
321
|
+
const page = await doc.getPage(pageNumber);
|
|
322
|
+
const textResult = await readPageText(page, {
|
|
323
|
+
input,
|
|
324
|
+
options,
|
|
325
|
+
startedAt,
|
|
326
|
+
emittedUnits: units.length,
|
|
327
|
+
scannedObjects,
|
|
328
|
+
});
|
|
329
|
+
scannedObjects = textResult.scannedObjects;
|
|
330
|
+
if (textResult.diagnostic !== undefined) {
|
|
331
|
+
diagnostics.push(textResult.diagnostic);
|
|
332
|
+
break;
|
|
333
|
+
}
|
|
334
|
+
if (textResult.text.length === 0) {
|
|
335
|
+
continue;
|
|
336
|
+
}
|
|
337
|
+
pageTexts.push(textResult.text);
|
|
338
|
+
cursor = appendPageRecord(pages, units, input, pageNumber, textResult.text, cursor);
|
|
339
|
+
}
|
|
340
|
+
return { diagnostics, pages, units, pageTexts };
|
|
341
|
+
}
|
|
342
|
+
async function asyncParse(capability, input, options) {
|
|
343
|
+
if (input.bytes.byteLength > options.maxBytes) {
|
|
344
|
+
return emptyResult(capability, input.documentId, options, [
|
|
345
|
+
oversizeDiagnostic(input.documentId, input.bytes.byteLength, options.maxBytes),
|
|
346
|
+
]);
|
|
347
|
+
}
|
|
348
|
+
if (options.signal?.aborted === true) {
|
|
349
|
+
return cancelled(capability, input, options);
|
|
350
|
+
}
|
|
351
|
+
const startedAt = options.now();
|
|
352
|
+
try {
|
|
353
|
+
const doc = await loadPdfDocument(input.bytes);
|
|
354
|
+
const { diagnostics, pages, units, pageTexts } = await extractPages(doc, input, options, startedAt);
|
|
355
|
+
if (isAborted(options.signal)) {
|
|
356
|
+
return cancelled(capability, input, options);
|
|
357
|
+
}
|
|
358
|
+
if (pages.length === 0) {
|
|
359
|
+
return noTextResult(capability, input, options, diagnostics);
|
|
360
|
+
}
|
|
361
|
+
return {
|
|
362
|
+
documentId: input.documentId,
|
|
363
|
+
parser: parserIdentity(capability),
|
|
364
|
+
pages,
|
|
365
|
+
sections: [],
|
|
366
|
+
units,
|
|
367
|
+
diagnostics,
|
|
368
|
+
extractedAt: options.now(),
|
|
369
|
+
normalizedText: pageTexts.join("\n\n"),
|
|
370
|
+
};
|
|
371
|
+
}
|
|
372
|
+
catch {
|
|
373
|
+
return emptyResult(capability, input.documentId, options, [
|
|
374
|
+
diagnostic("MALFORMED_INPUT", "pdf parser rejected malformed or unsupported document", input.documentId, "error"),
|
|
375
|
+
]);
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
const capability = Object.freeze({
|
|
379
|
+
parserId: PARSER_ID,
|
|
380
|
+
parserVersion: PARSER_VERSION,
|
|
381
|
+
dependencyVersions: DEPENDENCY_VERSIONS,
|
|
382
|
+
matches: isPdf,
|
|
383
|
+
});
|
|
384
|
+
export const pdfParser = Object.freeze({
|
|
385
|
+
capability,
|
|
386
|
+
parse: syncFallback(capability),
|
|
387
|
+
parseAsync: (input, options) => asyncParse(capability, input, options),
|
|
388
|
+
});
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { unsupportedParser } from "./unsupported-parser.js";
|
|
2
|
+
import { type ParserAdapter, type ParserOptions, type ParserRegistry, type ParserResolution, type ParserSelectionInput } from "./types.js";
|
|
3
|
+
export declare function createParserRegistry(): ParserRegistry;
|
|
4
|
+
export declare function registerParser(registry: ParserRegistry, adapter: ParserAdapter): ParserRegistry;
|
|
5
|
+
export declare function resolveParser(registry: ParserRegistry, input: ParserSelectionInput): ParserResolution;
|
|
6
|
+
export declare function buildParserOptions(overrides?: Partial<ParserOptions>): ParserOptions;
|
|
7
|
+
export { unsupportedParser };
|
|
8
|
+
//# sourceMappingURL=registry.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/parsers/registry.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAC5D,OAAO,EAML,KAAK,aAAa,EAClB,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EAC1B,MAAM,YAAY,CAAC;AAMpB,wBAAgB,oBAAoB,IAAI,cAAc,CAOrD;AAED,wBAAgB,cAAc,CAAC,QAAQ,EAAE,cAAc,EAAE,OAAO,EAAE,aAAa,GAAG,cAAc,CAS/F;AAED,wBAAgB,aAAa,CAC3B,QAAQ,EAAE,cAAc,EACxB,KAAK,EAAE,oBAAoB,GAC1B,gBAAgB,CAElB;AAuBD,wBAAgB,kBAAkB,CAAC,SAAS,GAAE,OAAO,CAAC,aAAa,CAAM,GAAG,aAAa,CAUxF;AAED,OAAO,EAAE,iBAAiB,EAAE,CAAC"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
// Parser registry (Epic #189, Issue #266). A frozen-after-build map of `ParserAdapter`
|
|
2
|
+
// values. `resolveParser` returns the first registered adapter whose capability matches the
|
|
3
|
+
// input, OR the unsupported sentinel. The registry is intentionally tiny — selection logic
|
|
4
|
+
// lives inside the adapters' `matches` predicates so a new format can land by adding one
|
|
5
|
+
// file plus one register call.
|
|
6
|
+
import { unsupportedParser } from "./unsupported-parser.js";
|
|
7
|
+
import { DEFAULT_MAX_BYTES, DEFAULT_MAX_NESTING_DEPTH, DEFAULT_MAX_OBJECTS, DEFAULT_MAX_UNITS, DEFAULT_TIMEOUT_MS, } from "./types.js";
|
|
8
|
+
export function createParserRegistry() {
|
|
9
|
+
const state = { adapters: [] };
|
|
10
|
+
return Object.freeze({
|
|
11
|
+
list: () => Object.freeze([...state.adapters]),
|
|
12
|
+
resolve: (input) => resolveFromList(state.adapters, input),
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
export function registerParser(registry, adapter) {
|
|
16
|
+
// We re-emit a brand-new frozen registry rather than mutate. This keeps the registry
|
|
17
|
+
// value-typed and lets composition layers (#196 indexer) treat it as an immutable
|
|
18
|
+
// configuration object.
|
|
19
|
+
const next = [...registry.list(), adapter];
|
|
20
|
+
return Object.freeze({
|
|
21
|
+
list: () => Object.freeze([...next]),
|
|
22
|
+
resolve: (input) => resolveFromList(next, input),
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
export function resolveParser(registry, input) {
|
|
26
|
+
return registry.resolve(input);
|
|
27
|
+
}
|
|
28
|
+
function resolveFromList(adapters, input) {
|
|
29
|
+
for (const adapter of adapters) {
|
|
30
|
+
if (adapter === unsupportedParser)
|
|
31
|
+
continue;
|
|
32
|
+
if (adapter.capability.matches(input)) {
|
|
33
|
+
return { kind: "matched", adapter };
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
// Fall through to unsupported. The unsupported adapter's `matches` is a stable predicate
|
|
37
|
+
// — it returns true for any known-unsupported signal AND for arbitrary unknown formats
|
|
38
|
+
// (returns true via the magic-byte / extension table OR is explicitly invoked).
|
|
39
|
+
if (unsupportedParser.capability.matches(input)) {
|
|
40
|
+
return { kind: "matched", adapter: unsupportedParser };
|
|
41
|
+
}
|
|
42
|
+
return { kind: "unsupported", reason: "no-adapter-matched" };
|
|
43
|
+
}
|
|
44
|
+
// Convenience: build a `ParserOptions` value with defaults applied. Callers supply only the
|
|
45
|
+
// fields they want to override.
|
|
46
|
+
export function buildParserOptions(overrides = {}) {
|
|
47
|
+
const base = {
|
|
48
|
+
maxBytes: overrides.maxBytes ?? DEFAULT_MAX_BYTES,
|
|
49
|
+
maxUnitsPerDocument: overrides.maxUnitsPerDocument ?? DEFAULT_MAX_UNITS,
|
|
50
|
+
maxNestingDepth: overrides.maxNestingDepth ?? DEFAULT_MAX_NESTING_DEPTH,
|
|
51
|
+
maxObjectsPerDocument: overrides.maxObjectsPerDocument ?? DEFAULT_MAX_OBJECTS,
|
|
52
|
+
timeoutMs: overrides.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
53
|
+
now: overrides.now ?? (() => Date.now()),
|
|
54
|
+
};
|
|
55
|
+
return overrides.signal !== undefined ? { ...base, signal: overrides.signal } : base;
|
|
56
|
+
}
|
|
57
|
+
export { unsupportedParser };
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"text-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/text-parser.ts"],"names":[],"mappings":"AAkBA,OAAO,KAAK,EAAE,aAAa,EAAuC,MAAM,YAAY,CAAC;AAgOrF,eAAO,MAAM,UAAU,EAAE,aAyBvB,CAAC"}
|