@semiont/core 0.5.5 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +120 -140
- package/dist/config/node-config-loader.d.ts +1 -5
- package/dist/config/node-config-loader.js +0 -4
- package/dist/config/node-config-loader.js.map +1 -1
- package/dist/index.d.ts +504 -198
- package/dist/index.js +443 -137
- package/dist/index.js.map +1 -1
- package/package.json +14 -13
package/dist/index.d.ts
CHANGED
|
@@ -1545,8 +1545,8 @@ interface paths {
|
|
|
1545
1545
|
file: string;
|
|
1546
1546
|
/** @description Media type of the content (e.g. text/plain, text/markdown, image/png) */
|
|
1547
1547
|
format: string;
|
|
1548
|
-
/** @description Where the content
|
|
1549
|
-
storageUri
|
|
1548
|
+
/** @description Where the content lives (file://... for local). Required — the client names the location; the server does not derive one. */
|
|
1549
|
+
storageUri: string;
|
|
1550
1550
|
/** @description ISO 639-1 language code */
|
|
1551
1551
|
language?: string;
|
|
1552
1552
|
/** @description JSON-stringified array of entity type names */
|
|
@@ -1574,7 +1574,7 @@ interface paths {
|
|
|
1574
1574
|
"application/json": components["schemas"]["CreateResourceResponse"];
|
|
1575
1575
|
};
|
|
1576
1576
|
};
|
|
1577
|
-
/** @description Missing required fields (name, file, or
|
|
1577
|
+
/** @description Missing required fields (name, file, format, or storageUri), or the format's base MIME type is not a SupportedMediaType. The error message names the offending type. */
|
|
1578
1578
|
400: {
|
|
1579
1579
|
headers: {
|
|
1580
1580
|
[name: string]: unknown;
|
|
@@ -1608,20 +1608,17 @@ interface paths {
|
|
|
1608
1608
|
cookie?: never;
|
|
1609
1609
|
};
|
|
1610
1610
|
/**
|
|
1611
|
-
* Get a resource (
|
|
1612
|
-
* @description Content
|
|
1613
|
-
* - application/ld+json (default) — returns JSON-LD metadata including all annotations + inbound entity references
|
|
1614
|
-
* - text/* or image/* or application/pdf — returns the raw representation bytes
|
|
1611
|
+
* Get a resource's stored representation (the pipe)
|
|
1612
|
+
* @description Returns the stored representation's bytes, verbatim, with the stored media type in Content-Type (`application/octet-stream` when the stored metadata carries none). The Accept header is never read: there is no content negotiation and no transcoding, so the served bytes always hash to the representation's registered checksum.
|
|
1615
1613
|
*
|
|
1616
|
-
*
|
|
1614
|
+
* The resource's JSON-LD description (descriptor + annotations + inbound entity references) lives at GET /resources/{id}/jsonld; every content response advertises it via a `Link: rel="describedby"` header.
|
|
1615
|
+
*
|
|
1616
|
+
* Content is immutable (checksum-addressed), and this route is bearer-authenticated, so responses carry `Cache-Control: private, max-age=31536000, immutable`.
|
|
1617
1617
|
*/
|
|
1618
1618
|
get: {
|
|
1619
1619
|
parameters: {
|
|
1620
1620
|
query?: never;
|
|
1621
|
-
header?:
|
|
1622
|
-
/** @description application/ld+json for metadata, or a text/image/pdf media type for raw content */
|
|
1623
|
-
Accept?: string;
|
|
1624
|
-
};
|
|
1621
|
+
header?: never;
|
|
1625
1622
|
path: {
|
|
1626
1623
|
id: string;
|
|
1627
1624
|
};
|
|
@@ -1629,21 +1626,18 @@ interface paths {
|
|
|
1629
1626
|
};
|
|
1630
1627
|
requestBody?: never;
|
|
1631
1628
|
responses: {
|
|
1632
|
-
/** @description
|
|
1629
|
+
/** @description The stored representation's bytes, verbatim */
|
|
1633
1630
|
200: {
|
|
1634
1631
|
headers: {
|
|
1632
|
+
/** @description `</resources/{id}/jsonld>; rel="describedby"; type="application/ld+json"` — where to dereference the resource's JSON-LD description */
|
|
1633
|
+
Link?: string;
|
|
1635
1634
|
[name: string]: unknown;
|
|
1636
1635
|
};
|
|
1637
1636
|
content: {
|
|
1638
|
-
"
|
|
1639
|
-
"text/plain": string;
|
|
1640
|
-
"text/markdown": string;
|
|
1641
|
-
"text/html": string;
|
|
1642
|
-
"image/*": string;
|
|
1643
|
-
"application/pdf": string;
|
|
1637
|
+
"*/*": string;
|
|
1644
1638
|
};
|
|
1645
1639
|
};
|
|
1646
|
-
/** @description Resource not found */
|
|
1640
|
+
/** @description Resource or representation not found */
|
|
1647
1641
|
404: {
|
|
1648
1642
|
headers: {
|
|
1649
1643
|
[name: string]: unknown;
|
|
@@ -1661,6 +1655,58 @@ interface paths {
|
|
|
1661
1655
|
"application/json": components["schemas"]["ErrorResponse"];
|
|
1662
1656
|
};
|
|
1663
1657
|
};
|
|
1658
|
+
};
|
|
1659
|
+
};
|
|
1660
|
+
put?: never;
|
|
1661
|
+
post?: never;
|
|
1662
|
+
delete?: never;
|
|
1663
|
+
options?: never;
|
|
1664
|
+
head?: never;
|
|
1665
|
+
patch?: never;
|
|
1666
|
+
trace?: never;
|
|
1667
|
+
};
|
|
1668
|
+
"/resources/{id}/jsonld": {
|
|
1669
|
+
parameters: {
|
|
1670
|
+
query?: never;
|
|
1671
|
+
header?: never;
|
|
1672
|
+
path?: never;
|
|
1673
|
+
cookie?: never;
|
|
1674
|
+
};
|
|
1675
|
+
/**
|
|
1676
|
+
* Get a resource's JSON-LD description
|
|
1677
|
+
* @description The dereferenceable linked-data description of the resource: its descriptor plus all annotations and inbound entity references, assembled via the bus gateway. This is the target of the `Link: rel="describedby"` header that GET /resources/{id} sends with every content response.
|
|
1678
|
+
*
|
|
1679
|
+
* Live data — annotations and references change — so responses carry `Cache-Control: no-cache`, in contrast to the immutable content at GET /resources/{id}.
|
|
1680
|
+
*/
|
|
1681
|
+
get: {
|
|
1682
|
+
parameters: {
|
|
1683
|
+
query?: never;
|
|
1684
|
+
header?: never;
|
|
1685
|
+
path: {
|
|
1686
|
+
id: string;
|
|
1687
|
+
};
|
|
1688
|
+
cookie?: never;
|
|
1689
|
+
};
|
|
1690
|
+
requestBody?: never;
|
|
1691
|
+
responses: {
|
|
1692
|
+
/** @description The resource's JSON-LD description */
|
|
1693
|
+
200: {
|
|
1694
|
+
headers: {
|
|
1695
|
+
[name: string]: unknown;
|
|
1696
|
+
};
|
|
1697
|
+
content: {
|
|
1698
|
+
"application/ld+json": components["schemas"]["GetResourceResponse"];
|
|
1699
|
+
};
|
|
1700
|
+
};
|
|
1701
|
+
/** @description Resource not found */
|
|
1702
|
+
404: {
|
|
1703
|
+
headers: {
|
|
1704
|
+
[name: string]: unknown;
|
|
1705
|
+
};
|
|
1706
|
+
content: {
|
|
1707
|
+
"application/json": components["schemas"]["ErrorResponse"];
|
|
1708
|
+
};
|
|
1709
|
+
};
|
|
1664
1710
|
/** @description Request timed out (bus gateway) */
|
|
1665
1711
|
504: {
|
|
1666
1712
|
headers: {
|
|
@@ -1688,10 +1734,10 @@ interface paths {
|
|
|
1688
1734
|
cookie?: never;
|
|
1689
1735
|
};
|
|
1690
1736
|
/**
|
|
1691
|
-
* Get
|
|
1692
|
-
* @description
|
|
1737
|
+
* Get a resource's stored representation (browser-friendly alias)
|
|
1738
|
+
* @description Identical pipe to GET /resources/{id} — verbatim bytes, stored media type in Content-Type, Accept never read. Exists only as the auth affordance for `<img>` / PDF.js / download links, which cannot carry Authorization headers: the `?token=` media token or the httpOnly semiont-token cookie ride along automatically.
|
|
1693
1739
|
*
|
|
1694
|
-
*
|
|
1740
|
+
* Responses carry `Cache-Control: public, max-age=31536000, immutable` — `public` is safe here, unlike the bearer-authenticated main route, because the `?token=` is part of the cache key.
|
|
1695
1741
|
*/
|
|
1696
1742
|
get: {
|
|
1697
1743
|
parameters: {
|
|
@@ -1704,17 +1750,15 @@ interface paths {
|
|
|
1704
1750
|
};
|
|
1705
1751
|
requestBody?: never;
|
|
1706
1752
|
responses: {
|
|
1707
|
-
/** @description
|
|
1753
|
+
/** @description The stored representation's bytes, verbatim */
|
|
1708
1754
|
200: {
|
|
1709
1755
|
headers: {
|
|
1756
|
+
/** @description `</resources/{id}/jsonld>; rel="describedby"; type="application/ld+json"` — where to dereference the resource's JSON-LD description */
|
|
1757
|
+
Link?: string;
|
|
1710
1758
|
[name: string]: unknown;
|
|
1711
1759
|
};
|
|
1712
1760
|
content: {
|
|
1713
|
-
"
|
|
1714
|
-
"text/markdown": string;
|
|
1715
|
-
"text/html": string;
|
|
1716
|
-
"image/*": string;
|
|
1717
|
-
"application/pdf": string;
|
|
1761
|
+
"*/*": string;
|
|
1718
1762
|
};
|
|
1719
1763
|
};
|
|
1720
1764
|
/** @description Resource or representation not found */
|
|
@@ -1895,22 +1939,10 @@ interface components {
|
|
|
1895
1939
|
targetResource?: components["schemas"]["ResourceDescriptor"] | null;
|
|
1896
1940
|
/** @description Gathered context for this annotation */
|
|
1897
1941
|
context?: components["schemas"]["GatheredContext"];
|
|
1898
|
-
/** @description DEPRECATED: Use 'context' instead. Legacy source context format. */
|
|
1899
|
-
sourceContext?: {
|
|
1900
|
-
before: string;
|
|
1901
|
-
selected: string;
|
|
1902
|
-
after: string;
|
|
1903
|
-
};
|
|
1904
1942
|
targetContext?: {
|
|
1905
1943
|
content: string;
|
|
1906
1944
|
summary?: string;
|
|
1907
1945
|
};
|
|
1908
|
-
suggestedResolution?: {
|
|
1909
|
-
resourceId: string;
|
|
1910
|
-
resourceName: string;
|
|
1911
|
-
confidence: number;
|
|
1912
|
-
reasoning: string;
|
|
1913
|
-
};
|
|
1914
1946
|
};
|
|
1915
1947
|
/** @description W3C Web Annotation target object - source is required, selector is optional */
|
|
1916
1948
|
AnnotationTarget: {
|
|
@@ -2059,10 +2091,15 @@ interface components {
|
|
|
2059
2091
|
details?: string;
|
|
2060
2092
|
};
|
|
2061
2093
|
/**
|
|
2062
|
-
* @description Content format as MIME type, optionally with charset
|
|
2094
|
+
* @description Content format as a MIME type, optionally with parameters. The base type (everything before the first ';') MUST be a SupportedMediaType; parameters such as charset are preserved as metadata. Semantic validation happens in code at the create/yield boundary — there is deliberately no pattern here, the vocabulary lives in SupportedMediaType. Examples: text/plain, text/plain; charset=iso-8859-1, text/markdown; charset=windows-1252, image/png, application/pdf
|
|
2063
2095
|
* @example text/plain; charset=utf-8
|
|
2064
2096
|
*/
|
|
2065
2097
|
ContentFormat: string;
|
|
2098
|
+
/**
|
|
2099
|
+
* @description Base MIME types (no parameters) admitted by Semiont. Membership is the create/yield gate — every member is storable, nameable, and uploadable. What more the system can do with a type (render, annotate, extract text, author) is curated per type in @semiont/core's media-type registry, which is keyed by this enum.
|
|
2100
|
+
* @enum {string}
|
|
2101
|
+
*/
|
|
2102
|
+
SupportedMediaType: "text/plain" | "text/markdown" | "text/html" | "text/css" | "text/csv" | "text/xml" | "application/json" | "application/xml" | "application/yaml" | "application/x-yaml" | "application/pdf" | "application/msword" | "application/vnd.openxmlformats-officedocument.wordprocessingml.document" | "application/vnd.ms-excel" | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | "application/vnd.ms-powerpoint" | "application/vnd.openxmlformats-officedocument.presentationml.presentation" | "application/zip" | "application/gzip" | "application/x-tar" | "application/x-7z-compressed" | "application/octet-stream" | "application/wasm" | "image/png" | "image/jpeg" | "image/gif" | "image/webp" | "image/svg+xml" | "image/bmp" | "image/tiff" | "image/x-icon" | "video/mp4" | "video/mpeg" | "video/webm" | "video/ogg" | "video/quicktime" | "video/x-msvideo" | "audio/mpeg" | "audio/wav" | "audio/ogg" | "audio/webm" | "audio/aac" | "audio/flac" | "text/javascript" | "application/javascript" | "text/x-typescript" | "application/typescript" | "text/x-python" | "text/x-java" | "text/x-c" | "text/x-c++" | "text/x-csharp" | "text/x-go" | "text/x-rust" | "text/x-ruby" | "text/x-php" | "text/x-swift" | "text/x-kotlin" | "text/x-shell" | "font/woff" | "font/woff2" | "font/ttf" | "font/otf";
|
|
2066
2103
|
ContextualSummaryResponse: {
|
|
2067
2104
|
summary: string;
|
|
2068
2105
|
relevantFields: {
|
|
@@ -2142,8 +2179,6 @@ interface components {
|
|
|
2142
2179
|
EventMetadata: {
|
|
2143
2180
|
/** @description Monotonic position in the event log (ordering authority) */
|
|
2144
2181
|
sequenceNumber: number;
|
|
2145
|
-
/** @description Byte offset in the JSONL file */
|
|
2146
|
-
streamPosition: number;
|
|
2147
2182
|
/** @description Optional correlation id propagated from a command. Lets clients match command-result events back to the POST that initiated them. Set by EventStore.appendEvent's options when a route handler passes one through. */
|
|
2148
2183
|
correlationId?: string;
|
|
2149
2184
|
};
|
|
@@ -3591,7 +3626,7 @@ declare function resourceAnnotationUri(uri: string): ResourceAnnotationUri;
|
|
|
3591
3626
|
* These types prevent mixing up resource IDs, annotation IDs, and user IDs
|
|
3592
3627
|
* at compile time while having zero runtime overhead.
|
|
3593
3628
|
*
|
|
3594
|
-
* URI types (ResourceUri, AnnotationUri) are in @semiont/
|
|
3629
|
+
* URI types (ResourceUri, AnnotationUri) are in @semiont/http-transport
|
|
3595
3630
|
* since they deal with HTTP URIs returned by the API.
|
|
3596
3631
|
*/
|
|
3597
3632
|
type ResourceId = string & {
|
|
@@ -3619,7 +3654,7 @@ type RawAnnotation = components['schemas']['Annotation'];
|
|
|
3619
3654
|
* `components['schemas']['Annotation']`, but with a branded `AnnotationId`
|
|
3620
3655
|
* for the `id` field. Use this import everywhere the codebase refers to
|
|
3621
3656
|
* "an annotation"; the raw OpenAPI type is only used inside
|
|
3622
|
-
* `@semiont/
|
|
3657
|
+
* `@semiont/http-transport` at the HTTP boundary.
|
|
3623
3658
|
*
|
|
3624
3659
|
* Implemented by intersection (not `Omit`) to be robust against generator
|
|
3625
3660
|
* drift — if the OpenAPI schema gets `additionalProperties: true` added,
|
|
@@ -3681,7 +3716,7 @@ interface EntityTypeStats {
|
|
|
3681
3716
|
/**
|
|
3682
3717
|
* Persisted Events
|
|
3683
3718
|
*
|
|
3684
|
-
* The
|
|
3719
|
+
* The event types that get appended to the JSONL event log.
|
|
3685
3720
|
* Each maps a type string to its OpenAPI payload schema.
|
|
3686
3721
|
* The PersistedEvent union derives from this catalog.
|
|
3687
3722
|
*/
|
|
@@ -3731,7 +3766,7 @@ type EventOfType<K extends keyof PersistedEventCatalog> = K extends SystemEventT
|
|
|
3731
3766
|
resourceId: ResourceId;
|
|
3732
3767
|
payload: PersistedEventCatalog[K];
|
|
3733
3768
|
};
|
|
3734
|
-
/** The union of all
|
|
3769
|
+
/** The union of all persisted event types. Discriminated on `type`. */
|
|
3735
3770
|
type PersistedEvent = {
|
|
3736
3771
|
[K in keyof PersistedEventCatalog]: EventOfType<K>;
|
|
3737
3772
|
}[keyof PersistedEventCatalog];
|
|
@@ -4123,23 +4158,13 @@ type EventName = keyof EventMap;
|
|
|
4123
4158
|
* for one caller) does NOT belong here. Those are per-caller correlation-ID
|
|
4124
4159
|
* responses and publish globally — the caller filters by `correlationId`.
|
|
4125
4160
|
*
|
|
4126
|
-
* The
|
|
4127
|
-
*
|
|
4128
|
-
*
|
|
4129
|
-
*
|
|
4161
|
+
* The SDK's resource-scoped `browse.*` live queries wire these channels —
|
|
4162
|
+
* subscribing acquires the scope via the transport's `subscribeToResource`
|
|
4163
|
+
* (`scope=id&scoped=<channel>`) so the SSE route delivers them to that
|
|
4164
|
+
* participant (freshness follows observation; #847). WorkerStateUnit uses this
|
|
4165
|
+
* list to decide which emitted events to scope to their resource.
|
|
4130
4166
|
*/
|
|
4131
|
-
|
|
4132
|
-
* Audit note (SIMPLE-BUS Phase 3 close): `yield:progress` was
|
|
4133
|
-
* considered for inclusion but has only one consumer — the
|
|
4134
|
-
* yield-initiator's Observable in `packages/api-client/src/namespaces/yield.ts`.
|
|
4135
|
-
* No viewer of the resource other than the initiator subscribes to
|
|
4136
|
-
* progress. Scoping therefore serves no fan-out-narrowing purpose for
|
|
4137
|
-
* that channel, so it stays global (as a correlation-ID-shaped
|
|
4138
|
-
* response, filtered by `referenceId`). Only `yield:finished` and
|
|
4139
|
-
* `yield:failed` have a genuine multi-participant consumer (the
|
|
4140
|
-
* ResourceViewerPage toast on the source resource).
|
|
4141
|
-
*/
|
|
4142
|
-
declare const RESOURCE_BROADCAST_TYPES: readonly ["job:complete", "job:fail"];
|
|
4167
|
+
declare const RESOURCE_BROADCAST_TYPES: readonly [];
|
|
4143
4168
|
type ResourceBroadcastType = typeof RESOURCE_BROADCAST_TYPES[number];
|
|
4144
4169
|
/**
|
|
4145
4170
|
* Authoritative map from bus channel to OpenAPI schema name.
|
|
@@ -4956,6 +4981,45 @@ declare function extractBoundingBox(svg: string): {
|
|
|
4956
4981
|
height: number;
|
|
4957
4982
|
} | null;
|
|
4958
4983
|
|
|
4984
|
+
/**
|
|
4985
|
+
* PDF viewrect FragmentSelector codec.
|
|
4986
|
+
*
|
|
4987
|
+
* `PdfCoordinate` is a bounding rectangle in PDF point space: origin at the
|
|
4988
|
+
* bottom-left of the page, Y increasing upward. The Y-flip to canvas pixels
|
|
4989
|
+
* lives in the browser (`react-ui`); the server has no canvas.
|
|
4990
|
+
*
|
|
4991
|
+
* These functions are the viewrect peer of the W3C `FragmentSelector` wrapper in
|
|
4992
|
+
* `web-annotation-utils`: they serialize/parse the RFC 3778
|
|
4993
|
+
* `page=N&viewrect=left,top,width,height` value. `@semiont/content` (geometry
|
|
4994
|
+
* from the text layer), `@semiont/jobs` (serialization at write time), and the
|
|
4995
|
+
* browser canvas all import them from here — no package reaches into the UI.
|
|
4996
|
+
*
|
|
4997
|
+
* RFC 3778 PDF Fragment Identifiers: https://tools.ietf.org/html/rfc3778
|
|
4998
|
+
*/
|
|
4999
|
+
/**
|
|
5000
|
+
* A bounding rectangle in PDF point coordinates.
|
|
5001
|
+
* Origin at the bottom-left of the page; Y increases upward.
|
|
5002
|
+
*/
|
|
5003
|
+
interface PdfCoordinate {
|
|
5004
|
+
page: number;
|
|
5005
|
+
x: number;
|
|
5006
|
+
y: number;
|
|
5007
|
+
width: number;
|
|
5008
|
+
height: number;
|
|
5009
|
+
}
|
|
5010
|
+
/**
|
|
5011
|
+
* Serialize a PdfCoordinate to an RFC 3778 FragmentSelector value.
|
|
5012
|
+
* Format: `page=N&viewrect=left,top,width,height` (all in PDF points).
|
|
5013
|
+
*/
|
|
5014
|
+
declare function createFragmentSelector(coord: PdfCoordinate): string;
|
|
5015
|
+
/**
|
|
5016
|
+
* Parse an RFC 3778 FragmentSelector value into PDF coordinates.
|
|
5017
|
+
* Returns null when the value is not a well-formed page + viewrect fragment.
|
|
5018
|
+
*/
|
|
5019
|
+
declare function parseFragmentSelector(fragment: string): PdfCoordinate | null;
|
|
5020
|
+
/** Extract the 1-indexed page number from a FragmentSelector value. */
|
|
5021
|
+
declare function getPageFromFragment(fragment: string): number | null;
|
|
5022
|
+
|
|
4959
5023
|
/**
|
|
4960
5024
|
* Helper functions for working with W3C ResourceDescriptor
|
|
4961
5025
|
*/
|
|
@@ -5055,7 +5119,7 @@ declare function decodeRepresentation(buffer: Buffer, mediaType: string): string
|
|
|
5055
5119
|
* Common error classes — the unified Semiont error hierarchy.
|
|
5056
5120
|
*
|
|
5057
5121
|
* `SemiontError` is the base every other Semiont error class extends:
|
|
5058
|
-
* `APIError` (
|
|
5122
|
+
* `APIError` (http-transport), `BusRequestError` and `SemiontSessionError` (sdk),
|
|
5059
5123
|
* `ValidationError`, `ScriptError`, `NotFoundError`, `UnauthorizedError`,
|
|
5060
5124
|
* `ConflictError` (here), and `AWSError` (cli). Subclasses tighten the
|
|
5061
5125
|
* `code` field to a literal-union for discriminated handling.
|
|
@@ -5103,7 +5167,7 @@ declare class ConflictError extends SemiontError {
|
|
|
5103
5167
|
* Transport interfaces — the shared contract for any wire-or-local
|
|
5104
5168
|
* communication path consumed by `SemiontClient`. Concrete implementations
|
|
5105
5169
|
* live alongside the runtime they wrap (`HttpTransport` in
|
|
5106
|
-
* `@semiont/
|
|
5170
|
+
* `@semiont/http-transport`, in-process variants in `@semiont/make-meaning`,
|
|
5107
5171
|
* etc.).
|
|
5108
5172
|
*
|
|
5109
5173
|
* Three interfaces:
|
|
@@ -5124,6 +5188,7 @@ declare class ConflictError extends SemiontError {
|
|
|
5124
5188
|
*/
|
|
5125
5189
|
|
|
5126
5190
|
type Agent$1 = components['schemas']['Agent'];
|
|
5191
|
+
type GetResourceResponse = components['schemas']['GetResourceResponse'];
|
|
5127
5192
|
/**
|
|
5128
5193
|
* Six-state lifecycle for a transport's connection. Drives UI affordances
|
|
5129
5194
|
* (connecting spinners, reconnecting banners, etc.) and is observed via
|
|
@@ -5226,6 +5291,12 @@ interface ITransport {
|
|
|
5226
5291
|
*
|
|
5227
5292
|
* Returns a disposer that detaches the scope when the last subscriber
|
|
5228
5293
|
* unsubscribes (ref-counted).
|
|
5294
|
+
*
|
|
5295
|
+
* SDK-internal: this is the scope primitive the SDK's resource-scoped
|
|
5296
|
+
* `browse.*` live queries drive on subscribe/teardown (freshness follows
|
|
5297
|
+
* observation; #847) — it is not part of the application-facing surface.
|
|
5298
|
+
* Single-scope at a time; multi-scope is deferred
|
|
5299
|
+
* (`.plans/MULTI-RESOURCE-SCOPE.md`).
|
|
5229
5300
|
*/
|
|
5230
5301
|
subscribeToResource(resourceId: ResourceId): () => void;
|
|
5231
5302
|
/**
|
|
@@ -5307,7 +5378,7 @@ interface IBackendOperations {
|
|
|
5307
5378
|
interface PutBinaryRequest {
|
|
5308
5379
|
name: string;
|
|
5309
5380
|
file: File | Buffer;
|
|
5310
|
-
format: ContentFormat
|
|
5381
|
+
format: ContentFormat;
|
|
5311
5382
|
storageUri: string;
|
|
5312
5383
|
entityTypes?: string[];
|
|
5313
5384
|
language?: string;
|
|
@@ -5350,19 +5421,27 @@ interface IContentTransport {
|
|
|
5350
5421
|
resourceId: ResourceId;
|
|
5351
5422
|
}>;
|
|
5352
5423
|
getBinary(resourceId: ResourceId, options?: {
|
|
5353
|
-
accept?: ContentFormat | string;
|
|
5354
5424
|
auth?: AccessToken;
|
|
5355
5425
|
}): Promise<{
|
|
5356
5426
|
data: ArrayBuffer;
|
|
5357
5427
|
contentType: string;
|
|
5358
5428
|
}>;
|
|
5359
5429
|
getBinaryStream(resourceId: ResourceId, options?: {
|
|
5360
|
-
accept?: ContentFormat | string;
|
|
5361
5430
|
auth?: AccessToken;
|
|
5362
5431
|
}): Promise<{
|
|
5363
5432
|
stream: ReadableStream<Uint8Array>;
|
|
5364
5433
|
contentType: string;
|
|
5365
5434
|
}>;
|
|
5435
|
+
/**
|
|
5436
|
+
* Fetch the resource's JSON-LD metadata graph (descriptor + annotations +
|
|
5437
|
+
* inbound entity references). The HTTP transport dereferences
|
|
5438
|
+
* `GET /resources/:id/jsonld` (the LD face an external linked-data client
|
|
5439
|
+
* sees); in-process transports assemble it from their `KnowledgeSystem`.
|
|
5440
|
+
* See `.plans/SIMPLER-JSON-LD.md` §5 / decision 7.
|
|
5441
|
+
*/
|
|
5442
|
+
getResourceGraph(resourceId: ResourceId, options?: {
|
|
5443
|
+
auth?: AccessToken;
|
|
5444
|
+
}): Promise<GetResourceResponse>;
|
|
5366
5445
|
dispose(): void;
|
|
5367
5446
|
}
|
|
5368
5447
|
|
|
@@ -5408,20 +5487,33 @@ declare function normalizeText(text: string): string;
|
|
|
5408
5487
|
* Pre-computed content strings for batch fuzzy matching.
|
|
5409
5488
|
* Avoids recomputing normalizeText(content) and content.toLowerCase()
|
|
5410
5489
|
* for every annotation when processing many annotations against the same content.
|
|
5490
|
+
*
|
|
5491
|
+
* `normalizedMap[i]` is the original-content index that normalized
|
|
5492
|
+
* character `i` came from. It has length `normalizedContent.length + 1`;
|
|
5493
|
+
* the final entry is `content.length` so a match that ends at the end of
|
|
5494
|
+
* the normalized string maps back to the end of the original. This map is
|
|
5495
|
+
* how `findBestTextMatch` recovers the *original* offset of a normalized
|
|
5496
|
+
* match — counting char-by-char with `normalizeText(singleChar)` is
|
|
5497
|
+
* wrong, because a lone whitespace char trims to `''` (contributing 0)
|
|
5498
|
+
* while in a full-string normalize it collapses to a single space
|
|
5499
|
+
* (contributing 1). That discrepancy shifted recovered offsets by the
|
|
5500
|
+
* number of whitespace runs before the match.
|
|
5411
5501
|
*/
|
|
5412
5502
|
interface ContentCache {
|
|
5413
5503
|
normalizedContent: string;
|
|
5504
|
+
normalizedMap: number[];
|
|
5414
5505
|
lowerContent: string;
|
|
5415
5506
|
}
|
|
5416
5507
|
/**
|
|
5417
5508
|
* Build a ContentCache for a given content string.
|
|
5418
|
-
* Call once per content, pass to findBestTextMatch/
|
|
5509
|
+
* Call once per content, pass to findBestTextMatch/anchorAnnotation for all annotations.
|
|
5419
5510
|
*/
|
|
5420
5511
|
declare function buildContentCache(content: string): ContentCache;
|
|
5421
5512
|
/**
|
|
5422
5513
|
* Find best match for text in content using multi-strategy search
|
|
5423
5514
|
*
|
|
5424
|
-
* Shared core logic used by both
|
|
5515
|
+
* Shared core logic used by both anchorAnnotation (render-time) and
|
|
5516
|
+
* reconcileSelector (write-time).
|
|
5425
5517
|
*
|
|
5426
5518
|
* @param content - Full text content to search within
|
|
5427
5519
|
* @param searchText - The text to find
|
|
@@ -5434,37 +5526,91 @@ declare function findBestTextMatch(content: string, searchText: string, position
|
|
|
5434
5526
|
end: number;
|
|
5435
5527
|
matchQuality: MatchQuality;
|
|
5436
5528
|
} | null;
|
|
5437
|
-
/**
|
|
5438
|
-
* Find text using exact match with optional prefix/suffix context
|
|
5439
|
-
*
|
|
5440
|
-
* When the exact text appears multiple times in the content, prefix and suffix
|
|
5441
|
-
* are used to disambiguate and find the correct occurrence.
|
|
5442
|
-
*
|
|
5443
|
-
* If exact text is not found, uses multi-strategy fuzzy matching (normalization,
|
|
5444
|
-
* case-insensitive, Levenshtein distance) to locate changed text.
|
|
5445
|
-
*
|
|
5446
|
-
* @param content - Full text content to search within
|
|
5447
|
-
* @param exact - The exact text to find
|
|
5448
|
-
* @param prefix - Optional text that should appear immediately before the match
|
|
5449
|
-
* @param suffix - Optional text that should appear immediately after the match
|
|
5450
|
-
* @param positionHint - Optional position hint (from TextPositionSelector) for fuzzy search
|
|
5451
|
-
* @returns Position of the matched text, or null if not found
|
|
5452
|
-
*
|
|
5453
|
-
* @example
|
|
5454
|
-
* ```typescript
|
|
5455
|
-
* const content = "The cat sat. The cat ran.";
|
|
5456
|
-
* // Find second "The cat" occurrence
|
|
5457
|
-
* const pos = findTextWithContext(content, "The cat", "sat. ", " ran");
|
|
5458
|
-
* // Returns { start: 13, end: 20 }
|
|
5459
|
-
* ```
|
|
5460
|
-
*/
|
|
5461
|
-
declare function findTextWithContext(content: string, exact: string, prefix: string | undefined, suffix: string | undefined, positionHint: number | undefined, cache: ContentCache): TextPosition | null;
|
|
5462
5529
|
/**
|
|
5463
5530
|
* Verify that a position correctly points to the exact text
|
|
5464
5531
|
* Useful for debugging and validation
|
|
5465
5532
|
*/
|
|
5466
5533
|
declare function verifyPosition(content: string, position: TextPosition, expectedExact: string): boolean;
|
|
5467
5534
|
|
|
5535
|
+
/**
|
|
5536
|
+
* Anchor a W3C Web Annotation to its rendered text.
|
|
5537
|
+
*
|
|
5538
|
+
* Render-time cleverness is deliberately limited to **verbatim** quote
|
|
5539
|
+
* matching. The annotation's two selectors are written to agree (the
|
|
5540
|
+
* write-side `reconcileSelector` + `buildTextAnnotation` invariant
|
|
5541
|
+
* guarantee `content.substring(start, end) === exact`). At render time the
|
|
5542
|
+
* only legitimate discrepancy is *positional drift*: the document grew or
|
|
5543
|
+
* shrank above the span after the annotation was written, so the offset is
|
|
5544
|
+
* stale but the exact text still exists, byte-identical, elsewhere. That is
|
|
5545
|
+
* the W3C-intended role of `TextQuoteSelector`, and it is safe because it
|
|
5546
|
+
* demands identical text — no normalization, no fuzzy matching, no
|
|
5547
|
+
* judgment call.
|
|
5548
|
+
*
|
|
5549
|
+
* Anything that would require *fuzzy* recovery (smart-quote folding,
|
|
5550
|
+
* whitespace collapse, Levenshtein) is out of scope here: a non-verbatim
|
|
5551
|
+
* mismatch means the content representation diverged or the stored record
|
|
5552
|
+
* is wrong, both of which are deterministic and belong upstream (canonical
|
|
5553
|
+
* content, or a corrected annotation event). The renderer does not guess —
|
|
5554
|
+
* it renders at the stored offset and flags the anchor low-confidence so
|
|
5555
|
+
* the discrepancy surfaces for an upstream fix.
|
|
5556
|
+
*
|
|
5557
|
+
* Returns `null` only when nothing usable is present; otherwise always
|
|
5558
|
+
* returns a position with a `strategy` and `confidence`.
|
|
5559
|
+
*/
|
|
5560
|
+
type AnchorStrategy =
|
|
5561
|
+
/** Position hint pointed exactly at the exact text. Unambiguous. */
|
|
5562
|
+
'fast-path'
|
|
5563
|
+
/** Exact text appears once verbatim in the content. No tiebreak needed. */
|
|
5564
|
+
| 'unique-occurrence'
|
|
5565
|
+
/** Multiple verbatim occurrences; prefix+suffix uniquely identified one. */
|
|
5566
|
+
| 'context-disambiguated'
|
|
5567
|
+
/** Multiple verbatim candidates; position closest to hint chosen. */
|
|
5568
|
+
| 'position-tiebreaker'
|
|
5569
|
+
/** Exact text not found verbatim (or no quote); raw stored offset used,
|
|
5570
|
+
* flagged for upstream correction. */
|
|
5571
|
+
| 'position-fallback';
|
|
5572
|
+
type AnchorConfidence = 'high' | 'medium' | 'low';
|
|
5573
|
+
interface RenderedAnchor {
|
|
5574
|
+
start: number;
|
|
5575
|
+
end: number;
|
|
5576
|
+
strategy: AnchorStrategy;
|
|
5577
|
+
confidence: AnchorConfidence;
|
|
5578
|
+
}
|
|
5579
|
+
interface AnchorSelectors {
|
|
5580
|
+
position?: {
|
|
5581
|
+
start: number;
|
|
5582
|
+
end: number;
|
|
5583
|
+
};
|
|
5584
|
+
quote?: {
|
|
5585
|
+
exact: string;
|
|
5586
|
+
prefix?: string;
|
|
5587
|
+
suffix?: string;
|
|
5588
|
+
};
|
|
5589
|
+
}
|
|
5590
|
+
/**
|
|
5591
|
+
* Distance window for the position tiebreaker. Candidates closer than this
|
|
5592
|
+
* to the hint receive a non-zero position score; further candidates fall
|
|
5593
|
+
* back to zero. Tuned for typical document sizes; calibration tests pin
|
|
5594
|
+
* the boundary behaviour rather than the exact value.
|
|
5595
|
+
*/
|
|
5596
|
+
declare const POSITION_WINDOW = 1024;
|
|
5597
|
+
/**
|
|
5598
|
+
* Score weights — kept as named constants so the calibration tests can
|
|
5599
|
+
* import them and pin the *relationships* rather than the magnitudes.
|
|
5600
|
+
*
|
|
5601
|
+
* Invariant: a full-context match always outranks any position score.
|
|
5602
|
+
* (`CONTEXT_FULL_WEIGHT * 2 > POSITION_WEIGHT_MAX`, accounting for
|
|
5603
|
+
* prefix+suffix each contributing the full weight.)
|
|
5604
|
+
*/
|
|
5605
|
+
declare const CONTEXT_FULL_WEIGHT = 10;
|
|
5606
|
+
declare const CONTEXT_PARTIAL_WEIGHT = 5;
|
|
5607
|
+
declare const POSITION_WEIGHT_MAX = 5;
|
|
5608
|
+
/**
|
|
5609
|
+
* Locate the best-effort anchor for an annotation against the content the
|
|
5610
|
+
* renderer is about to display. Verbatim-only — see the module doc.
|
|
5611
|
+
*/
|
|
5612
|
+
declare function anchorAnnotation(content: string, selectors: AnchorSelectors): RenderedAnchor | null;
|
|
5613
|
+
|
|
5468
5614
|
/**
|
|
5469
5615
|
* Locale information
|
|
5470
5616
|
* Copied from SDK for frontend use
|
|
@@ -5541,89 +5687,86 @@ declare function normalizeCoordinates(point: Point, displayWidth: number, displa
|
|
|
5541
5687
|
declare function scaleSvgToNative(svg: string, displayWidth: number, displayHeight: number, imageWidth: number, imageHeight: number): string;
|
|
5542
5688
|
|
|
5543
5689
|
/**
|
|
5544
|
-
*
|
|
5690
|
+
* Selector reconciliation for write-time annotation construction.
|
|
5545
5691
|
*
|
|
5546
|
-
*
|
|
5547
|
-
*
|
|
5692
|
+
* LLM-produced text offsets are guides, not authoritative anchors.
|
|
5693
|
+
* `reconcileSelector` takes whatever the LLM emitted and produces a
|
|
5694
|
+
* `TextQuoteSelector`-equivalent `start`/`end`/`exact`/`prefix`/`suffix`
|
|
5695
|
+
* that is provably consistent with the source content:
|
|
5548
5696
|
*
|
|
5549
|
-
*
|
|
5550
|
-
*
|
|
5697
|
+
* - `content.substring(start, end) === exact`
|
|
5698
|
+
* - `content.substring(start - prefix.length, start) === prefix`
|
|
5699
|
+
* - `content.substring(end, end + suffix.length) === suffix`
|
|
5551
5700
|
*
|
|
5552
|
-
*
|
|
5553
|
-
|
|
5554
|
-
|
|
5555
|
-
/**
|
|
5556
|
-
* Extract prefix and suffix context for TextQuoteSelector
|
|
5557
|
-
*
|
|
5558
|
-
* Extracts up to 64 characters before and after the selected text,
|
|
5559
|
-
* extending to word boundaries to avoid cutting words in half.
|
|
5560
|
-
* This ensures prefix/suffix are meaningful context for fuzzy anchoring.
|
|
5701
|
+
* No caller spreads LLM-emitted prefix/suffix into the stored selector.
|
|
5702
|
+
* The shared helper extracts both from source at the corrected position,
|
|
5703
|
+
* so the no-overlap invariant holds by construction.
|
|
5561
5704
|
*
|
|
5562
|
-
*
|
|
5563
|
-
*
|
|
5564
|
-
* @param end - End offset of selection
|
|
5565
|
-
* @returns Object with prefix and suffix (undefined if at boundaries)
|
|
5705
|
+
* Returns `null` when the LLM emitted text that doesn't appear in the
|
|
5706
|
+
* source. Callers filter; the helper doesn't decide for them.
|
|
5566
5707
|
*
|
|
5567
|
-
* @
|
|
5568
|
-
* ```typescript
|
|
5569
|
-
* const content = "The United States Congress...";
|
|
5570
|
-
* const context = extractContext(content, 4, 17); // "United States"
|
|
5571
|
-
* // Returns: { prefix: "The ", suffix: " Congress..." }
|
|
5572
|
-
* // NOT: { prefix: "nited ", suffix: "gress..." }
|
|
5573
|
-
* ```
|
|
5708
|
+
* @see https://www.w3.org/TR/annotation-model/#text-quote-selector
|
|
5574
5709
|
*/
|
|
5575
|
-
|
|
5576
|
-
prefix?: string;
|
|
5577
|
-
suffix?: string;
|
|
5578
|
-
};
|
|
5710
|
+
|
|
5579
5711
|
/**
|
|
5580
|
-
*
|
|
5581
|
-
|
|
5582
|
-
|
|
5712
|
+
* How the reconciliation arrived at the chosen offset. Carried into the
|
|
5713
|
+
* worker log so operators can audit ambiguous matches; the
|
|
5714
|
+
* `first-of-many` flag, in particular, is the signal that an annotation
|
|
5715
|
+
* *may* be anchored at the wrong occurrence and warrants review.
|
|
5716
|
+
*/
|
|
5717
|
+
type AnchorMethod =
|
|
5718
|
+
/** Exact text appears once in the source — anchored unambiguously. */
|
|
5719
|
+
'unique-match'
|
|
5720
|
+
/** Multiple occurrences; LLM-emitted prefix/suffix picked one. */
|
|
5721
|
+
| 'context-recovered'
|
|
5722
|
+
/** Exact text not found verbatim; fuzzy match recovered it. */
|
|
5723
|
+
| 'fuzzy-match'
|
|
5724
|
+
/** Multiple occurrences, no context disambiguated — risky fallback. */
|
|
5725
|
+
| 'first-of-many';
|
|
5726
|
+
interface ReconciledSelector {
|
|
5583
5727
|
start: number;
|
|
5584
5728
|
end: number;
|
|
5729
|
+
/** Always a substring of the source content — never the LLM's emission. */
|
|
5585
5730
|
exact: string;
|
|
5731
|
+
/** Extracted from source via extractContext — never the LLM's emission. */
|
|
5586
5732
|
prefix?: string;
|
|
5733
|
+
/** Extracted from source via extractContext — never the LLM's emission. */
|
|
5587
5734
|
suffix?: string;
|
|
5588
|
-
|
|
5589
|
-
|
|
5735
|
+
anchorMethod: AnchorMethod;
|
|
5736
|
+
/** Present when the fuzzy fallback recovered the match, naming how. */
|
|
5590
5737
|
matchQuality?: MatchQuality;
|
|
5591
5738
|
}
|
|
5739
|
+
interface LlmSelectorInput {
|
|
5740
|
+
exact: string;
|
|
5741
|
+
/** LLM-emitted context for disambiguation only — not for storage. */
|
|
5742
|
+
prefix?: string;
|
|
5743
|
+
/** LLM-emitted context for disambiguation only — not for storage. */
|
|
5744
|
+
suffix?: string;
|
|
5745
|
+
}
|
|
5592
5746
|
/**
|
|
5593
|
-
*
|
|
5594
|
-
*
|
|
5595
|
-
*
|
|
5596
|
-
*
|
|
5597
|
-
*
|
|
5598
|
-
* This function uses a multi-strategy approach:
|
|
5599
|
-
* 1. Check if AI's offsets are exactly correct
|
|
5600
|
-
* 2. Try exact case-sensitive search
|
|
5601
|
-
* 3. Try case-insensitive search
|
|
5602
|
-
* 4. Try fuzzy matching with Levenshtein distance (5% tolerance)
|
|
5747
|
+
* Extract prefix and suffix context for a `TextQuoteSelector` from
|
|
5748
|
+
* source content. Used internally by `reconcileSelector` after offsets
|
|
5749
|
+
* are reconciled, and exported for callers (e.g. UI-side selection
|
|
5750
|
+
* capture) that need the same extraction semantics.
|
|
5603
5751
|
*
|
|
5604
|
-
*
|
|
5605
|
-
*
|
|
5606
|
-
*
|
|
5607
|
-
|
|
5608
|
-
|
|
5609
|
-
|
|
5610
|
-
|
|
5611
|
-
|
|
5612
|
-
|
|
5752
|
+
* Extracts up to 64 characters before and after the selected text,
|
|
5753
|
+
* extending up to 32 additional chars to reach a word boundary so the
|
|
5754
|
+
* prefix/suffix is meaningful context rather than mid-word fragments.
|
|
5755
|
+
*/
|
|
5756
|
+
declare function extractContext(content: string, start: number, end: number): {
|
|
5757
|
+
prefix?: string;
|
|
5758
|
+
suffix?: string;
|
|
5759
|
+
};
|
|
5760
|
+
/**
|
|
5761
|
+
* Reconcile LLM-emitted offsets against the source. Returns a selector
|
|
5762
|
+
* whose `start`/`end` are verified to bracket `exact` in `content`, and
|
|
5763
|
+
* whose `prefix`/`suffix` are extracted from source — never carried
|
|
5764
|
+
* verbatim from the LLM.
|
|
5613
5765
|
*
|
|
5614
|
-
*
|
|
5615
|
-
*
|
|
5616
|
-
* // AI said start=1143, but actual text is at 1161
|
|
5617
|
-
* const result = validateAndCorrectOffsets(
|
|
5618
|
-
* content,
|
|
5619
|
-
* 1143,
|
|
5620
|
-
* 1289,
|
|
5621
|
-
* "the question \"whether..."
|
|
5622
|
-
* );
|
|
5623
|
-
* // Returns: { start: 1161, end: 1303, exact: "...", corrected: true, matchQuality: 'exact', ... }
|
|
5624
|
-
* ```
|
|
5766
|
+
* Returns `null` if `exact` cannot be found anywhere in the content,
|
|
5767
|
+
* even via fuzzy match. Callers filter null and log the drop.
|
|
5625
5768
|
*/
|
|
5626
|
-
declare function
|
|
5769
|
+
declare function reconcileSelector(content: string, llm: LlmSelectorInput): ReconciledSelector | null;
|
|
5627
5770
|
|
|
5628
5771
|
/**
|
|
5629
5772
|
* Text encoding utilities for consistent charset handling
|
|
@@ -5660,7 +5803,7 @@ declare function extractCharset(mediaType: string): string;
|
|
|
5660
5803
|
declare function decodeWithCharset(buffer: ArrayBuffer, mediaType: string): string;
|
|
5661
5804
|
|
|
5662
5805
|
/**
|
|
5663
|
-
* Generic validation utilities for @semiont/
|
|
5806
|
+
* Generic validation utilities for @semiont/http-transport
|
|
5664
5807
|
*
|
|
5665
5808
|
* Pure TypeScript validation with no external dependencies.
|
|
5666
5809
|
* Safe to use in any JavaScript environment (Node.js, browser, Deno, etc.)
|
|
@@ -5717,42 +5860,215 @@ declare function validateData<T>(schema: {
|
|
|
5717
5860
|
declare function isValidEmail(email: string): boolean;
|
|
5718
5861
|
|
|
5719
5862
|
/**
|
|
5720
|
-
*
|
|
5863
|
+
* Media-type registry for Semiont
|
|
5864
|
+
*
|
|
5865
|
+
* One supported-types list, capability-tiered, keyed by the spec's
|
|
5866
|
+
* SupportedMediaType enum. Admission (registry membership) is the
|
|
5867
|
+
* create/yield gate: every member is storable, nameable, and uploadable.
|
|
5868
|
+
* The curated capabilities say what more the system can do with a type:
|
|
5721
5869
|
*
|
|
5722
|
-
*
|
|
5723
|
-
* - text
|
|
5724
|
-
*
|
|
5725
|
-
* -
|
|
5726
|
-
*
|
|
5727
|
-
* -
|
|
5870
|
+
* - `render` — which viewer the UI mounts ('none' → metadata + download)
|
|
5871
|
+
* - `anchoring` — which annotation model applies: character-offset text
|
|
5872
|
+
* selectors vs spatial geometry (PDFs are spatial)
|
|
5873
|
+
* - `extractText` — how the Smelter gets embeddable text ('none' → skip
|
|
5874
|
+
* embedding, never mojibake)
|
|
5875
|
+
* - `authorable` — offered in the compose editor's format dropdown
|
|
5876
|
+
* - `uploadable` — big tent: true for every registry member
|
|
5877
|
+
*
|
|
5878
|
+
* Capabilities are orthogonal strategies, not a ladder: images render but
|
|
5879
|
+
* yield no text; PDFs yield text but aren't authorable. A "tier" is a
|
|
5880
|
+
* derived reading, not a stored fact.
|
|
5881
|
+
*
|
|
5882
|
+
* Import-leniency invariant: restore/import preserves archive mediaTypes
|
|
5883
|
+
* verbatim, so "every stored mediaType is registry-valid" holds only for
|
|
5884
|
+
* content that entered through the validated create/yield gate. No code
|
|
5885
|
+
* reading a stored mediaType may assume `capabilitiesOf()` succeeds — the
|
|
5886
|
+
* `undefined` branch is mandatory wherever stored types are read.
|
|
5728
5887
|
*/
|
|
5888
|
+
|
|
5889
|
+
type SupportedMediaType = components['schemas']['SupportedMediaType'];
|
|
5890
|
+
type RenderMode = 'text' | 'image' | 'pdf' | 'none';
|
|
5891
|
+
type AnchoringModel = 'text-selector' | 'spatial' | 'none';
|
|
5892
|
+
type TextExtraction = 'decode' | 'pdf-text-layer' | 'none';
|
|
5893
|
+
interface MediaTypeCapabilities {
|
|
5894
|
+
/** Canonical file extension, with leading dot. */
|
|
5895
|
+
extension: `.${string}`;
|
|
5896
|
+
/** UI display name. */
|
|
5897
|
+
label: string;
|
|
5898
|
+
render: RenderMode;
|
|
5899
|
+
anchoring: AnchoringModel;
|
|
5900
|
+
extractText: TextExtraction;
|
|
5901
|
+
authorable: boolean;
|
|
5902
|
+
uploadable: boolean;
|
|
5903
|
+
}
|
|
5904
|
+
/**
|
|
5905
|
+
* The registry. `satisfies Record<SupportedMediaType, …>` is the
|
|
5906
|
+
* drift-lock: adding a type to the spec enum without a capabilities row
|
|
5907
|
+
* (or vice versa) is a compile error.
|
|
5908
|
+
*
|
|
5909
|
+
* Row order matters for `mediaTypeForExtension`: extension collisions
|
|
5910
|
+
* (.xml, .yaml, .js, .ts, .webm) resolve to the first row declaring the
|
|
5911
|
+
* extension.
|
|
5912
|
+
*/
|
|
5913
|
+
declare const MEDIA_TYPES: {
|
|
5914
|
+
'text/markdown': {
|
|
5915
|
+
extension: ".md";
|
|
5916
|
+
label: string;
|
|
5917
|
+
render: "text";
|
|
5918
|
+
anchoring: "text-selector";
|
|
5919
|
+
extractText: "decode";
|
|
5920
|
+
authorable: true;
|
|
5921
|
+
uploadable: true;
|
|
5922
|
+
};
|
|
5923
|
+
'text/plain': {
|
|
5924
|
+
extension: ".txt";
|
|
5925
|
+
label: string;
|
|
5926
|
+
render: "text";
|
|
5927
|
+
anchoring: "text-selector";
|
|
5928
|
+
extractText: "decode";
|
|
5929
|
+
authorable: true;
|
|
5930
|
+
uploadable: true;
|
|
5931
|
+
};
|
|
5932
|
+
'text/html': {
|
|
5933
|
+
extension: ".html";
|
|
5934
|
+
label: string;
|
|
5935
|
+
render: "text";
|
|
5936
|
+
anchoring: "text-selector";
|
|
5937
|
+
extractText: "decode";
|
|
5938
|
+
authorable: true;
|
|
5939
|
+
uploadable: true;
|
|
5940
|
+
};
|
|
5941
|
+
'application/json': {
|
|
5942
|
+
extension: ".json";
|
|
5943
|
+
label: string;
|
|
5944
|
+
render: "text";
|
|
5945
|
+
anchoring: "text-selector";
|
|
5946
|
+
extractText: "decode";
|
|
5947
|
+
authorable: false;
|
|
5948
|
+
uploadable: true;
|
|
5949
|
+
};
|
|
5950
|
+
'image/png': {
|
|
5951
|
+
extension: ".png";
|
|
5952
|
+
label: string;
|
|
5953
|
+
render: "image";
|
|
5954
|
+
anchoring: "spatial";
|
|
5955
|
+
extractText: "none";
|
|
5956
|
+
authorable: false;
|
|
5957
|
+
uploadable: true;
|
|
5958
|
+
};
|
|
5959
|
+
'image/jpeg': {
|
|
5960
|
+
extension: ".jpg";
|
|
5961
|
+
label: string;
|
|
5962
|
+
render: "image";
|
|
5963
|
+
anchoring: "spatial";
|
|
5964
|
+
extractText: "none";
|
|
5965
|
+
authorable: false;
|
|
5966
|
+
uploadable: true;
|
|
5967
|
+
};
|
|
5968
|
+
'application/pdf': {
|
|
5969
|
+
extension: ".pdf";
|
|
5970
|
+
label: string;
|
|
5971
|
+
render: "pdf";
|
|
5972
|
+
anchoring: "spatial";
|
|
5973
|
+
extractText: "pdf-text-layer";
|
|
5974
|
+
authorable: false;
|
|
5975
|
+
uploadable: true;
|
|
5976
|
+
};
|
|
5977
|
+
'text/css': MediaTypeCapabilities;
|
|
5978
|
+
'text/csv': MediaTypeCapabilities;
|
|
5979
|
+
'text/xml': MediaTypeCapabilities;
|
|
5980
|
+
'application/xml': MediaTypeCapabilities;
|
|
5981
|
+
'application/yaml': MediaTypeCapabilities;
|
|
5982
|
+
'application/x-yaml': MediaTypeCapabilities;
|
|
5983
|
+
'text/javascript': MediaTypeCapabilities;
|
|
5984
|
+
'application/javascript': MediaTypeCapabilities;
|
|
5985
|
+
'text/x-typescript': MediaTypeCapabilities;
|
|
5986
|
+
'application/typescript': MediaTypeCapabilities;
|
|
5987
|
+
'text/x-python': MediaTypeCapabilities;
|
|
5988
|
+
'text/x-java': MediaTypeCapabilities;
|
|
5989
|
+
'text/x-c': MediaTypeCapabilities;
|
|
5990
|
+
'text/x-c++': MediaTypeCapabilities;
|
|
5991
|
+
'text/x-csharp': MediaTypeCapabilities;
|
|
5992
|
+
'text/x-go': MediaTypeCapabilities;
|
|
5993
|
+
'text/x-rust': MediaTypeCapabilities;
|
|
5994
|
+
'text/x-ruby': MediaTypeCapabilities;
|
|
5995
|
+
'text/x-php': MediaTypeCapabilities;
|
|
5996
|
+
'text/x-swift': MediaTypeCapabilities;
|
|
5997
|
+
'text/x-kotlin': MediaTypeCapabilities;
|
|
5998
|
+
'text/x-shell': MediaTypeCapabilities;
|
|
5999
|
+
'application/msword': MediaTypeCapabilities;
|
|
6000
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': MediaTypeCapabilities;
|
|
6001
|
+
'application/vnd.ms-excel': MediaTypeCapabilities;
|
|
6002
|
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': MediaTypeCapabilities;
|
|
6003
|
+
'application/vnd.ms-powerpoint': MediaTypeCapabilities;
|
|
6004
|
+
'application/vnd.openxmlformats-officedocument.presentationml.presentation': MediaTypeCapabilities;
|
|
6005
|
+
'application/zip': MediaTypeCapabilities;
|
|
6006
|
+
'application/gzip': MediaTypeCapabilities;
|
|
6007
|
+
'application/x-tar': MediaTypeCapabilities;
|
|
6008
|
+
'application/x-7z-compressed': MediaTypeCapabilities;
|
|
6009
|
+
'application/octet-stream': MediaTypeCapabilities;
|
|
6010
|
+
'application/wasm': MediaTypeCapabilities;
|
|
6011
|
+
'image/gif': MediaTypeCapabilities;
|
|
6012
|
+
'image/webp': MediaTypeCapabilities;
|
|
6013
|
+
'image/svg+xml': MediaTypeCapabilities;
|
|
6014
|
+
'image/bmp': MediaTypeCapabilities;
|
|
6015
|
+
'image/tiff': MediaTypeCapabilities;
|
|
6016
|
+
'image/x-icon': MediaTypeCapabilities;
|
|
6017
|
+
'video/mp4': MediaTypeCapabilities;
|
|
6018
|
+
'video/mpeg': MediaTypeCapabilities;
|
|
6019
|
+
'video/webm': MediaTypeCapabilities;
|
|
6020
|
+
'video/ogg': MediaTypeCapabilities;
|
|
6021
|
+
'video/quicktime': MediaTypeCapabilities;
|
|
6022
|
+
'video/x-msvideo': MediaTypeCapabilities;
|
|
6023
|
+
'audio/mpeg': MediaTypeCapabilities;
|
|
6024
|
+
'audio/wav': MediaTypeCapabilities;
|
|
6025
|
+
'audio/ogg': MediaTypeCapabilities;
|
|
6026
|
+
'audio/webm': MediaTypeCapabilities;
|
|
6027
|
+
'audio/aac': MediaTypeCapabilities;
|
|
6028
|
+
'audio/flac': MediaTypeCapabilities;
|
|
6029
|
+
'font/woff': MediaTypeCapabilities;
|
|
6030
|
+
'font/woff2': MediaTypeCapabilities;
|
|
6031
|
+
'font/ttf': MediaTypeCapabilities;
|
|
6032
|
+
'font/otf': MediaTypeCapabilities;
|
|
6033
|
+
};
|
|
5729
6034
|
/**
|
|
5730
|
-
*
|
|
6035
|
+
* Strip parameters ("; charset=...") and normalize case.
|
|
6036
|
+
* Replaces the inline `split(';')[0]` sites across the codebase.
|
|
5731
6037
|
*/
|
|
5732
|
-
declare function
|
|
6038
|
+
declare function baseMediaType(format: string): string;
|
|
5733
6039
|
/**
|
|
5734
|
-
*
|
|
6040
|
+
* Registry membership — the admission gate. Exact match: callers pass a
|
|
6041
|
+
* base type (see `baseMediaType`); strings carrying parameters are not
|
|
6042
|
+
* members.
|
|
5735
6043
|
*/
|
|
5736
|
-
declare function
|
|
6044
|
+
declare function isSupportedMediaType(format: string): format is SupportedMediaType;
|
|
6045
|
+
/** Capabilities for a format (parameters tolerated), or undefined on registry miss. */
|
|
6046
|
+
declare function capabilitiesOf(format: string): MediaTypeCapabilities | undefined;
|
|
5737
6047
|
/**
|
|
5738
|
-
*
|
|
6048
|
+
* Lenient extension lookup for naming foreign/imported content: '.dat' on
|
|
6049
|
+
* registry miss. Exporters use this — a vocabulary change must never
|
|
6050
|
+
* refuse to name restored data.
|
|
5739
6051
|
*/
|
|
5740
|
-
declare function
|
|
6052
|
+
declare function extensionForMediaType(format: string): string;
|
|
5741
6053
|
/**
|
|
5742
|
-
*
|
|
6054
|
+
* Inverted registry: extension → media type, for the CLI and the upload
|
|
6055
|
+
* detection chain. Accepts 'md' or '.md', any case, and common alternate
|
|
6056
|
+
* spellings. Returns undefined for unknown extensions — detection chains
|
|
6057
|
+
* fall back to 'application/octet-stream' themselves.
|
|
5743
6058
|
*/
|
|
5744
|
-
declare function
|
|
6059
|
+
declare function mediaTypeForExtension(ext: string): SupportedMediaType | undefined;
|
|
5745
6060
|
/**
|
|
5746
|
-
*
|
|
5747
|
-
*
|
|
5748
|
-
*
|
|
5749
|
-
*
|
|
5750
|
-
* - 'image': Spatial coordinate annotations (SvgSelector, FragmentSelector)
|
|
5751
|
-
*
|
|
5752
|
-
* PDFs use spatial coordinates for annotations, so they belong to 'image' category.
|
|
6061
|
+
* The Smelter's gate: how to get embeddable text from a format. Registry
|
|
6062
|
+
* rows answer directly; on a registry miss, base types under text/* decode
|
|
6063
|
+
* (RFC 2046 guarantees the text top-level type is textual — imported
|
|
6064
|
+
* unregistered text subtypes embed too), everything else is 'none'.
|
|
5753
6065
|
*/
|
|
5754
|
-
|
|
5755
|
-
|
|
6066
|
+
declare function textExtractionOf(format: string): TextExtraction;
|
|
6067
|
+
/** Types offered in the compose editor's format dropdown. */
|
|
6068
|
+
declare const AUTHORABLE_MEDIA_TYPES: readonly SupportedMediaType[];
|
|
6069
|
+
/** Registry rows whose text the Smelter can extract. Rows only — the
|
|
6070
|
+
* text/* fallback in `textExtractionOf` isn't enumerable. */
|
|
6071
|
+
declare const EMBEDDABLE_MEDIA_TYPES: readonly SupportedMediaType[];
|
|
5756
6072
|
|
|
5757
6073
|
/**
|
|
5758
6074
|
* Resource input/output types
|
|
@@ -6615,15 +6931,5 @@ declare function isValidPlatformType(value: string): value is PlatformType;
|
|
|
6615
6931
|
*/
|
|
6616
6932
|
declare function getAllPlatformTypes(): PlatformType[];
|
|
6617
6933
|
|
|
6618
|
-
|
|
6619
|
-
|
|
6620
|
-
*
|
|
6621
|
-
* Core domain logic and utilities for the Semiont semantic knowledge platform.
|
|
6622
|
-
* OpenAPI types are generated here and exported for use across the monorepo.
|
|
6623
|
-
*/
|
|
6624
|
-
|
|
6625
|
-
declare const CORE_TYPES_VERSION = "0.1.0";
|
|
6626
|
-
declare const SDK_VERSION = "0.1.0";
|
|
6627
|
-
|
|
6628
|
-
export { BRIDGED_CHANNELS, CHANNEL_SCHEMAS, CORE_TYPES_VERSION, ConfigurationError, ConflictError, EventBus, JWTTokenSchema, LOCALES, NotFoundError, PERSISTED_EVENT_TYPES, RESOURCE_BROADCAST_TYPES, SDK_VERSION, ScopedEventBus, ScriptError, SemiontError, UnauthorizedError, ValidationError, accessToken, agentToDid, annotationId, annotationUri, applyBodyOperations, assembleAnnotation, authCode, baseUrl, buildContentCache, burstBuffer, busLog, busLogEnabled, cloneToken, createCircleSvg, createPolygonSvg, createRectangleSvg, createTomlConfigLoader, decodeRepresentation, decodeWithCharset, didToAgent, email, entityType, errField, extractBoundingBox, extractCharset, extractContext, findBestTextMatch, findBodyItem, findTextWithContext, formatLocaleDisplay, generateUuid, getAllLocaleCodes, getAllPlatformTypes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText, getCreator, getDerivedFrom, getExactText, getExtensionForMimeType, getFragmentSelector, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getMimeCategory, getNodeEncoding, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceEntityTypes, getResourceId, getStorageUri, getSvgSelector, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, googleCredential, hasTargetSelector, isAnnotationId, isArchived, isArray, isAssessment, isBodyResolved, isBoolean, isComment, isDefined, isDraft, isEventRelatedToAnnotation, isFunction, isHighlight, isImageMimeType, isNull, isNullish, isNumber, isObject, isPdfMimeType, isReference, isResolvedReference, isResourceId, isStoredEvent, isString, isStubReference, isTag, isTextMimeType, isUndefined, isValidEmail, isValidPlatformType, jobId, loadTomlConfig, mcpToken, normalizeCoordinates, normalizeText, parseEnvironment, parseSvgSelector, refreshToken, resourceAnnotationUri, resourceId, resourceUri, scaleSvgToNative, searchQuery, serializePerKey, setBusLogTraceIdProvider, softwareToAgent, userDID, userId, userToAgent, userToDid, validateAndCorrectOffsets, validateData, validateEnvironment, validateSvgMarkup, verifyPosition };
|
|
6629
|
-
export type { AccessToken, Annotation, AnnotationCategory, AnnotationId, AnnotationUri, AnthropicProviderConfig, AppConfig, AssembledAnnotation, AuthCode, BackendDownload, BackendServiceConfig, BaseUrl, BodyItem, BodyItemIdentity, BodyOperation, BoundingBox, Brand, BridgedChannel, BurstBufferOptions, BusOp, CloneToken, ConnectionState, ContentCache, ContentFormat, CreateAnnotationInternal, DatabaseServiceConfig, Email, EmbeddingServiceConfig, EmittableChannel, EntityType, EntityTypeStats, Environment, EnvironmentConfig, EventBase, EventInput, EventMap, EventMetadata, EventName, EventOfType, EventQuery, EventSignature, FragmentSelector, FrontendServiceConfig, GatheredContext, GoogleAuthRequest, GoogleCredential, GraphConnection, GraphDatabaseType, GraphPath, GraphServiceConfig, HealthCheckResponse, IBackendOperations, IContentTransport, ITransport, InferenceProvidersConfig, JobId, ListUsersResponse, LocaleInfo, Logger, MCPToken, MatchQuality, McpServiceConfig, MimeCategory, Motivation, OllamaProviderConfig, PersistedEvent, PersistedEventType, PlatformType, Point, ProgressCallback, ProgressEvent, PutBinaryOptions, PutBinaryProgress, PutBinaryRequest, RefreshToken, ResourceAnnotationUri, ResourceAnnotations, ResourceBroadcastType, ResourceDescriptor, ResourceFilter, ResourceId, ResourceUri, SearchQuery, SelectionData, Selector, SemiontConfig, ServicePlatformConfig, ServicesConfig, SiteConfig, StatusResponse, StoredEvent, StoredEventLike, SvgSelector, TagCategory, TagSchema, TextPosition, TextPositionSelector, TextQuoteSelector, ActorInferenceConfig as TomlActorInferenceConfig, TomlFileReader, InferenceConfig as TomlInferenceConfig, WorkerInferenceConfig as TomlWorkerInferenceConfig, TransportErrorCode, UpdateResourceInput, UpdateUserRequest, UpdateUserResponse, UserDID, UserId, UserResponse, ValidatedAnnotation, ValidationFailure, ValidationResult, ValidationSuccess, VectorsServiceConfig, components, operations, paths };
|
|
6934
|
+
export { AUTHORABLE_MEDIA_TYPES, BRIDGED_CHANNELS, CHANNEL_SCHEMAS, CONTEXT_FULL_WEIGHT, CONTEXT_PARTIAL_WEIGHT, ConfigurationError, ConflictError, EMBEDDABLE_MEDIA_TYPES, EventBus, JWTTokenSchema, LOCALES, MEDIA_TYPES, NotFoundError, PERSISTED_EVENT_TYPES, POSITION_WEIGHT_MAX, POSITION_WINDOW, RESOURCE_BROADCAST_TYPES, ScopedEventBus, ScriptError, SemiontError, UnauthorizedError, ValidationError, accessToken, agentToDid, anchorAnnotation, annotationId, annotationUri, applyBodyOperations, assembleAnnotation, authCode, baseMediaType, baseUrl, buildContentCache, burstBuffer, busLog, busLogEnabled, capabilitiesOf, cloneToken, createCircleSvg, createFragmentSelector, createPolygonSvg, createRectangleSvg, createTomlConfigLoader, decodeRepresentation, decodeWithCharset, didToAgent, email, entityType, errField, extensionForMediaType, extractBoundingBox, extractCharset, extractContext, findBestTextMatch, findBodyItem, formatLocaleDisplay, generateUuid, getAllLocaleCodes, getAllPlatformTypes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText, getCreator, getDerivedFrom, getExactText, getFragmentSelector, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getNodeEncoding, getPageFromFragment, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceEntityTypes, getResourceId, getStorageUri, getSvgSelector, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, googleCredential, hasTargetSelector, isAnnotationId, isArchived, isArray, isAssessment, isBodyResolved, isBoolean, isComment, isDefined, isDraft, isEventRelatedToAnnotation, isFunction, isHighlight, isNull, isNullish, isNumber, isObject, isReference, isResolvedReference, isResourceId, isStoredEvent, isString, isStubReference, isSupportedMediaType, isTag, isUndefined, isValidEmail, isValidPlatformType, jobId, loadTomlConfig, mcpToken, mediaTypeForExtension, normalizeCoordinates, normalizeText, parseEnvironment, parseFragmentSelector, parseSvgSelector, reconcileSelector, refreshToken, resourceAnnotationUri, resourceId, resourceUri, scaleSvgToNative, searchQuery, serializePerKey, setBusLogTraceIdProvider, softwareToAgent, textExtractionOf, userDID, userId, userToAgent, userToDid, validateData, validateEnvironment, validateSvgMarkup, verifyPosition };
|
|
6935
|
+
export type { AccessToken, AnchorConfidence, AnchorMethod, AnchorSelectors, AnchorStrategy, AnchoringModel, Annotation, AnnotationCategory, AnnotationId, AnnotationUri, AnthropicProviderConfig, AppConfig, AssembledAnnotation, AuthCode, BackendDownload, BackendServiceConfig, BaseUrl, BodyItem, BodyItemIdentity, BodyOperation, BoundingBox, Brand, BridgedChannel, BurstBufferOptions, BusOp, CloneToken, ConnectionState, ContentCache, ContentFormat, CreateAnnotationInternal, DatabaseServiceConfig, Email, EmbeddingServiceConfig, EmittableChannel, EntityType, EntityTypeStats, Environment, EnvironmentConfig, EventBase, EventInput, EventMap, EventMetadata, EventName, EventOfType, EventQuery, EventSignature, FragmentSelector, FrontendServiceConfig, GatheredContext, GoogleAuthRequest, GoogleCredential, GraphConnection, GraphDatabaseType, GraphPath, GraphServiceConfig, HealthCheckResponse, IBackendOperations, IContentTransport, ITransport, InferenceProvidersConfig, JobId, ListUsersResponse, LlmSelectorInput, LocaleInfo, Logger, MCPToken, MatchQuality, McpServiceConfig, MediaTypeCapabilities, Motivation, OllamaProviderConfig, PdfCoordinate, PersistedEvent, PersistedEventType, PlatformType, Point, ProgressCallback, ProgressEvent, PutBinaryOptions, PutBinaryProgress, PutBinaryRequest, ReconciledSelector, RefreshToken, RenderMode, RenderedAnchor, ResourceAnnotationUri, ResourceAnnotations, ResourceBroadcastType, ResourceDescriptor, ResourceFilter, ResourceId, ResourceUri, SearchQuery, SelectionData, Selector, SemiontConfig, ServicePlatformConfig, ServicesConfig, SiteConfig, StatusResponse, StoredEvent, StoredEventLike, SupportedMediaType, SvgSelector, TagCategory, TagSchema, TextExtraction, TextPosition, TextPositionSelector, TextQuoteSelector, ActorInferenceConfig as TomlActorInferenceConfig, TomlFileReader, InferenceConfig as TomlInferenceConfig, WorkerInferenceConfig as TomlWorkerInferenceConfig, TransportErrorCode, UpdateResourceInput, UpdateUserRequest, UpdateUserResponse, UserDID, UserId, UserResponse, ValidationFailure, ValidationResult, ValidationSuccess, VectorsServiceConfig, components, operations, paths };
|