@semiont/core 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1545,8 +1545,8 @@ interface paths {
1545
1545
  file: string;
1546
1546
  /** @description Media type of the content (e.g. text/plain, text/markdown, image/png) */
1547
1547
  format: string;
1548
- /** @description Where the content has been / should be placed (file://... for local). If omitted, derived from name + format. */
1549
- storageUri?: string;
1548
+ /** @description Where the content lives (file://... for local). Required the client names the location; the server does not derive one. */
1549
+ storageUri: string;
1550
1550
  /** @description ISO 639-1 language code */
1551
1551
  language?: string;
1552
1552
  /** @description JSON-stringified array of entity type names */
@@ -1574,7 +1574,7 @@ interface paths {
1574
1574
  "application/json": components["schemas"]["CreateResourceResponse"];
1575
1575
  };
1576
1576
  };
1577
- /** @description Missing required fields (name, file, or format) */
1577
+ /** @description Missing required fields (name, file, format, or storageUri), or the format's base MIME type is not a SupportedMediaType. The error message names the offending type. */
1578
1578
  400: {
1579
1579
  headers: {
1580
1580
  [name: string]: unknown;
@@ -1608,20 +1608,17 @@ interface paths {
1608
1608
  cookie?: never;
1609
1609
  };
1610
1610
  /**
1611
- * Get a resource (content-negotiated)
1612
- * @description Content negotiation via the Accept header:
1613
- * - application/ld+json (default) — returns JSON-LD metadata including all annotations + inbound entity references
1614
- * - text/* or image/* or application/pdf — returns the raw representation bytes
1611
+ * Get a resource's stored representation (the pipe)
1612
+ * @description Returns the stored representation's bytes, verbatim, with the stored media type in Content-Type (`application/octet-stream` when the stored metadata carries none). The Accept header is never read: there is no content negotiation and no transcoding, so the served bytes always hash to the representation's registered checksum.
1615
1613
  *
1616
- * Binary payloads flow directly from the content store; metadata payloads go through the bus gateway.
1614
+ * The resource's JSON-LD description (descriptor + annotations + inbound entity references) lives at GET /resources/{id}/jsonld; every content response advertises it via a `Link: rel="describedby"` header.
1615
+ *
1616
+ * Content is immutable (checksum-addressed), and this route is bearer-authenticated, so responses carry `Cache-Control: private, max-age=31536000, immutable`.
1617
1617
  */
1618
1618
  get: {
1619
1619
  parameters: {
1620
1620
  query?: never;
1621
- header?: {
1622
- /** @description application/ld+json for metadata, or a text/image/pdf media type for raw content */
1623
- Accept?: string;
1624
- };
1621
+ header?: never;
1625
1622
  path: {
1626
1623
  id: string;
1627
1624
  };
@@ -1629,21 +1626,18 @@ interface paths {
1629
1626
  };
1630
1627
  requestBody?: never;
1631
1628
  responses: {
1632
- /** @description Resource content or metadata, depending on Accept */
1629
+ /** @description The stored representation's bytes, verbatim */
1633
1630
  200: {
1634
1631
  headers: {
1632
+ /** @description `</resources/{id}/jsonld>; rel="describedby"; type="application/ld+json"` — where to dereference the resource's JSON-LD description */
1633
+ Link?: string;
1635
1634
  [name: string]: unknown;
1636
1635
  };
1637
1636
  content: {
1638
- "application/ld+json": components["schemas"]["GetResourceResponse"];
1639
- "text/plain": string;
1640
- "text/markdown": string;
1641
- "text/html": string;
1642
- "image/*": string;
1643
- "application/pdf": string;
1637
+ "*/*": string;
1644
1638
  };
1645
1639
  };
1646
- /** @description Resource not found */
1640
+ /** @description Resource or representation not found */
1647
1641
  404: {
1648
1642
  headers: {
1649
1643
  [name: string]: unknown;
@@ -1661,6 +1655,58 @@ interface paths {
1661
1655
  "application/json": components["schemas"]["ErrorResponse"];
1662
1656
  };
1663
1657
  };
1658
+ };
1659
+ };
1660
+ put?: never;
1661
+ post?: never;
1662
+ delete?: never;
1663
+ options?: never;
1664
+ head?: never;
1665
+ patch?: never;
1666
+ trace?: never;
1667
+ };
1668
+ "/resources/{id}/jsonld": {
1669
+ parameters: {
1670
+ query?: never;
1671
+ header?: never;
1672
+ path?: never;
1673
+ cookie?: never;
1674
+ };
1675
+ /**
1676
+ * Get a resource's JSON-LD description
1677
+ * @description The dereferenceable linked-data description of the resource: its descriptor plus all annotations and inbound entity references, assembled via the bus gateway. This is the target of the `Link: rel="describedby"` header that GET /resources/{id} sends with every content response.
1678
+ *
1679
+ * Live data — annotations and references change — so responses carry `Cache-Control: no-cache`, in contrast to the immutable content at GET /resources/{id}.
1680
+ */
1681
+ get: {
1682
+ parameters: {
1683
+ query?: never;
1684
+ header?: never;
1685
+ path: {
1686
+ id: string;
1687
+ };
1688
+ cookie?: never;
1689
+ };
1690
+ requestBody?: never;
1691
+ responses: {
1692
+ /** @description The resource's JSON-LD description */
1693
+ 200: {
1694
+ headers: {
1695
+ [name: string]: unknown;
1696
+ };
1697
+ content: {
1698
+ "application/ld+json": components["schemas"]["GetResourceResponse"];
1699
+ };
1700
+ };
1701
+ /** @description Resource not found */
1702
+ 404: {
1703
+ headers: {
1704
+ [name: string]: unknown;
1705
+ };
1706
+ content: {
1707
+ "application/json": components["schemas"]["ErrorResponse"];
1708
+ };
1709
+ };
1664
1710
  /** @description Request timed out (bus gateway) */
1665
1711
  504: {
1666
1712
  headers: {
@@ -1688,10 +1734,10 @@ interface paths {
1688
1734
  cookie?: never;
1689
1735
  };
1690
1736
  /**
1691
- * Get raw resource content (browser-friendly alias)
1692
- * @description Browser-friendly alternative to GET /resources/{id} that always returns the raw representation, never JSON-LD. Used by `<img>` / PDF.js / `<video>` tags and anywhere an httpOnly cookie is the only feasible auth vector. Behaves like GET /resources/{id} with JSON-LD stripped from the Accept header before content negotiation.
1737
+ * Get a resource's stored representation (browser-friendly alias)
1738
+ * @description Identical pipe to GET /resources/{id} verbatim bytes, stored media type in Content-Type, Accept never read. Exists only as the auth affordance for `<img>` / PDF.js / download links, which cannot carry Authorization headers: the `?token=` media token or the httpOnly semiont-token cookie ride along automatically.
1693
1739
  *
1694
- * Sets long-lived immutable Cache-Control (content is addressed by checksum upstream, so bytes never change for a given resource id).
1740
+ * Responses carry `Cache-Control: public, max-age=31536000, immutable` — `public` is safe here, unlike the bearer-authenticated main route, because the `?token=` is part of the cache key.
1695
1741
  */
1696
1742
  get: {
1697
1743
  parameters: {
@@ -1704,17 +1750,15 @@ interface paths {
1704
1750
  };
1705
1751
  requestBody?: never;
1706
1752
  responses: {
1707
- /** @description Raw representation bytes with appropriate Content-Type */
1753
+ /** @description The stored representation's bytes, verbatim */
1708
1754
  200: {
1709
1755
  headers: {
1756
+ /** @description `</resources/{id}/jsonld>; rel="describedby"; type="application/ld+json"` — where to dereference the resource's JSON-LD description */
1757
+ Link?: string;
1710
1758
  [name: string]: unknown;
1711
1759
  };
1712
1760
  content: {
1713
- "text/plain": string;
1714
- "text/markdown": string;
1715
- "text/html": string;
1716
- "image/*": string;
1717
- "application/pdf": string;
1761
+ "*/*": string;
1718
1762
  };
1719
1763
  };
1720
1764
  /** @description Resource or representation not found */
@@ -1895,22 +1939,10 @@ interface components {
1895
1939
  targetResource?: components["schemas"]["ResourceDescriptor"] | null;
1896
1940
  /** @description Gathered context for this annotation */
1897
1941
  context?: components["schemas"]["GatheredContext"];
1898
- /** @description DEPRECATED: Use 'context' instead. Legacy source context format. */
1899
- sourceContext?: {
1900
- before: string;
1901
- selected: string;
1902
- after: string;
1903
- };
1904
1942
  targetContext?: {
1905
1943
  content: string;
1906
1944
  summary?: string;
1907
1945
  };
1908
- suggestedResolution?: {
1909
- resourceId: string;
1910
- resourceName: string;
1911
- confidence: number;
1912
- reasoning: string;
1913
- };
1914
1946
  };
1915
1947
  /** @description W3C Web Annotation target object - source is required, selector is optional */
1916
1948
  AnnotationTarget: {
@@ -2059,10 +2091,15 @@ interface components {
2059
2091
  details?: string;
2060
2092
  };
2061
2093
  /**
2062
- * @description Content format as MIME type, optionally with charset parameter. Values include: text/plain, text/plain; charset=utf-8, text/plain; charset=iso-8859-1, text/markdown, text/markdown; charset=windows-1252, image/png, image/jpeg, application/pdf
2094
+ * @description Content format as a MIME type, optionally with parameters. The base type (everything before the first ';') MUST be a SupportedMediaType; parameters such as charset are preserved as metadata. Semantic validation happens in code at the create/yield boundary — there is deliberately no pattern here, the vocabulary lives in SupportedMediaType. Examples: text/plain, text/plain; charset=iso-8859-1, text/markdown; charset=windows-1252, image/png, application/pdf
2063
2095
  * @example text/plain; charset=utf-8
2064
2096
  */
2065
2097
  ContentFormat: string;
2098
+ /**
2099
+ * @description Base MIME types (no parameters) admitted by Semiont. Membership is the create/yield gate — every member is storable, nameable, and uploadable. What more the system can do with a type (render, annotate, extract text, author) is curated per type in @semiont/core's media-type registry, which is keyed by this enum.
2100
+ * @enum {string}
2101
+ */
2102
+ SupportedMediaType: "text/plain" | "text/markdown" | "text/html" | "text/css" | "text/csv" | "text/xml" | "application/json" | "application/xml" | "application/yaml" | "application/x-yaml" | "application/pdf" | "application/msword" | "application/vnd.openxmlformats-officedocument.wordprocessingml.document" | "application/vnd.ms-excel" | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | "application/vnd.ms-powerpoint" | "application/vnd.openxmlformats-officedocument.presentationml.presentation" | "application/zip" | "application/gzip" | "application/x-tar" | "application/x-7z-compressed" | "application/octet-stream" | "application/wasm" | "image/png" | "image/jpeg" | "image/gif" | "image/webp" | "image/svg+xml" | "image/bmp" | "image/tiff" | "image/x-icon" | "video/mp4" | "video/mpeg" | "video/webm" | "video/ogg" | "video/quicktime" | "video/x-msvideo" | "audio/mpeg" | "audio/wav" | "audio/ogg" | "audio/webm" | "audio/aac" | "audio/flac" | "text/javascript" | "application/javascript" | "text/x-typescript" | "application/typescript" | "text/x-python" | "text/x-java" | "text/x-c" | "text/x-c++" | "text/x-csharp" | "text/x-go" | "text/x-rust" | "text/x-ruby" | "text/x-php" | "text/x-swift" | "text/x-kotlin" | "text/x-shell" | "font/woff" | "font/woff2" | "font/ttf" | "font/otf";
2066
2103
  ContextualSummaryResponse: {
2067
2104
  summary: string;
2068
2105
  relevantFields: {
@@ -2142,8 +2179,6 @@ interface components {
2142
2179
  EventMetadata: {
2143
2180
  /** @description Monotonic position in the event log (ordering authority) */
2144
2181
  sequenceNumber: number;
2145
- /** @description Byte offset in the JSONL file */
2146
- streamPosition: number;
2147
2182
  /** @description Optional correlation id propagated from a command. Lets clients match command-result events back to the POST that initiated them. Set by EventStore.appendEvent's options when a route handler passes one through. */
2148
2183
  correlationId?: string;
2149
2184
  };
@@ -3591,7 +3626,7 @@ declare function resourceAnnotationUri(uri: string): ResourceAnnotationUri;
3591
3626
  * These types prevent mixing up resource IDs, annotation IDs, and user IDs
3592
3627
  * at compile time while having zero runtime overhead.
3593
3628
  *
3594
- * URI types (ResourceUri, AnnotationUri) are in @semiont/api-client
3629
+ * URI types (ResourceUri, AnnotationUri) are in @semiont/http-transport
3595
3630
  * since they deal with HTTP URIs returned by the API.
3596
3631
  */
3597
3632
  type ResourceId = string & {
@@ -3619,7 +3654,7 @@ type RawAnnotation = components['schemas']['Annotation'];
3619
3654
  * `components['schemas']['Annotation']`, but with a branded `AnnotationId`
3620
3655
  * for the `id` field. Use this import everywhere the codebase refers to
3621
3656
  * "an annotation"; the raw OpenAPI type is only used inside
3622
- * `@semiont/api-client` at the HTTP boundary.
3657
+ * `@semiont/http-transport` at the HTTP boundary.
3623
3658
  *
3624
3659
  * Implemented by intersection (not `Omit`) to be robust against generator
3625
3660
  * drift — if the OpenAPI schema gets `additionalProperties: true` added,
@@ -3681,7 +3716,7 @@ interface EntityTypeStats {
3681
3716
  /**
3682
3717
  * Persisted Events
3683
3718
  *
3684
- * The 18 event types that get appended to the JSONL event log.
3719
+ * The event types that get appended to the JSONL event log.
3685
3720
  * Each maps a type string to its OpenAPI payload schema.
3686
3721
  * The PersistedEvent union derives from this catalog.
3687
3722
  */
@@ -3731,7 +3766,7 @@ type EventOfType<K extends keyof PersistedEventCatalog> = K extends SystemEventT
3731
3766
  resourceId: ResourceId;
3732
3767
  payload: PersistedEventCatalog[K];
3733
3768
  };
3734
- /** The union of all 20 persisted event types. Discriminated on `type`. */
3769
+ /** The union of all persisted event types. Discriminated on `type`. */
3735
3770
  type PersistedEvent = {
3736
3771
  [K in keyof PersistedEventCatalog]: EventOfType<K>;
3737
3772
  }[keyof PersistedEventCatalog];
@@ -4123,23 +4158,13 @@ type EventName = keyof EventMap;
4123
4158
  * for one caller) does NOT belong here. Those are per-caller correlation-ID
4124
4159
  * responses and publish globally — the caller filters by `correlationId`.
4125
4160
  *
4126
- * The frontend's `subscribeToResource(id)` wires these channels via
4127
- * `scope=id&scoped=<channel>` so the SSE route delivers them to that
4128
- * participant. WorkerStateUnit uses this list to decide which emitted events to
4129
- * scope to their resource.
4161
+ * The SDK's resource-scoped `browse.*` live queries wire these channels
4162
+ * subscribing acquires the scope via the transport's `subscribeToResource`
4163
+ * (`scope=id&scoped=<channel>`) so the SSE route delivers them to that
4164
+ * participant (freshness follows observation; #847). WorkerStateUnit uses this
4165
+ * list to decide which emitted events to scope to their resource.
4130
4166
  */
4131
- /**
4132
- * Audit note (SIMPLE-BUS Phase 3 close): `yield:progress` was
4133
- * considered for inclusion but has only one consumer — the
4134
- * yield-initiator's Observable in `packages/api-client/src/namespaces/yield.ts`.
4135
- * No viewer of the resource other than the initiator subscribes to
4136
- * progress. Scoping therefore serves no fan-out-narrowing purpose for
4137
- * that channel, so it stays global (as a correlation-ID-shaped
4138
- * response, filtered by `referenceId`). Only `yield:finished` and
4139
- * `yield:failed` have a genuine multi-participant consumer (the
4140
- * ResourceViewerPage toast on the source resource).
4141
- */
4142
- declare const RESOURCE_BROADCAST_TYPES: readonly ["job:complete", "job:fail"];
4167
+ declare const RESOURCE_BROADCAST_TYPES: readonly [];
4143
4168
  type ResourceBroadcastType = typeof RESOURCE_BROADCAST_TYPES[number];
4144
4169
  /**
4145
4170
  * Authoritative map from bus channel to OpenAPI schema name.
@@ -4956,6 +4981,45 @@ declare function extractBoundingBox(svg: string): {
4956
4981
  height: number;
4957
4982
  } | null;
4958
4983
 
4984
+ /**
4985
+ * PDF viewrect FragmentSelector codec.
4986
+ *
4987
+ * `PdfCoordinate` is a bounding rectangle in PDF point space: origin at the
4988
+ * bottom-left of the page, Y increasing upward. The Y-flip to canvas pixels
4989
+ * lives in the browser (`react-ui`); the server has no canvas.
4990
+ *
4991
+ * These functions are the viewrect peer of the W3C `FragmentSelector` wrapper in
4992
+ * `web-annotation-utils`: they serialize/parse the RFC 3778
4993
+ * `page=N&viewrect=left,top,width,height` value. `@semiont/content` (geometry
4994
+ * from the text layer), `@semiont/jobs` (serialization at write time), and the
4995
+ * browser canvas all import them from here — no package reaches into the UI.
4996
+ *
4997
+ * RFC 3778 PDF Fragment Identifiers: https://tools.ietf.org/html/rfc3778
4998
+ */
4999
+ /**
5000
+ * A bounding rectangle in PDF point coordinates.
5001
+ * Origin at the bottom-left of the page; Y increases upward.
5002
+ */
5003
+ interface PdfCoordinate {
5004
+ page: number;
5005
+ x: number;
5006
+ y: number;
5007
+ width: number;
5008
+ height: number;
5009
+ }
5010
+ /**
5011
+ * Serialize a PdfCoordinate to an RFC 3778 FragmentSelector value.
5012
+ * Format: `page=N&viewrect=left,top,width,height` (all in PDF points).
5013
+ */
5014
+ declare function createFragmentSelector(coord: PdfCoordinate): string;
5015
+ /**
5016
+ * Parse an RFC 3778 FragmentSelector value into PDF coordinates.
5017
+ * Returns null when the value is not a well-formed page + viewrect fragment.
5018
+ */
5019
+ declare function parseFragmentSelector(fragment: string): PdfCoordinate | null;
5020
+ /** Extract the 1-indexed page number from a FragmentSelector value. */
5021
+ declare function getPageFromFragment(fragment: string): number | null;
5022
+
4959
5023
  /**
4960
5024
  * Helper functions for working with W3C ResourceDescriptor
4961
5025
  */
@@ -5055,7 +5119,7 @@ declare function decodeRepresentation(buffer: Buffer, mediaType: string): string
5055
5119
  * Common error classes — the unified Semiont error hierarchy.
5056
5120
  *
5057
5121
  * `SemiontError` is the base every other Semiont error class extends:
5058
- * `APIError` (api-client), `BusRequestError` and `SemiontSessionError` (sdk),
5122
+ * `APIError` (http-transport), `BusRequestError` and `SemiontSessionError` (sdk),
5059
5123
  * `ValidationError`, `ScriptError`, `NotFoundError`, `UnauthorizedError`,
5060
5124
  * `ConflictError` (here), and `AWSError` (cli). Subclasses tighten the
5061
5125
  * `code` field to a literal-union for discriminated handling.
@@ -5103,7 +5167,7 @@ declare class ConflictError extends SemiontError {
5103
5167
  * Transport interfaces — the shared contract for any wire-or-local
5104
5168
  * communication path consumed by `SemiontClient`. Concrete implementations
5105
5169
  * live alongside the runtime they wrap (`HttpTransport` in
5106
- * `@semiont/api-client`, in-process variants in `@semiont/make-meaning`,
5170
+ * `@semiont/http-transport`, in-process variants in `@semiont/make-meaning`,
5107
5171
  * etc.).
5108
5172
  *
5109
5173
  * Three interfaces:
@@ -5124,6 +5188,7 @@ declare class ConflictError extends SemiontError {
5124
5188
  */
5125
5189
 
5126
5190
  type Agent$1 = components['schemas']['Agent'];
5191
+ type GetResourceResponse = components['schemas']['GetResourceResponse'];
5127
5192
  /**
5128
5193
  * Six-state lifecycle for a transport's connection. Drives UI affordances
5129
5194
  * (connecting spinners, reconnecting banners, etc.) and is observed via
@@ -5226,6 +5291,12 @@ interface ITransport {
5226
5291
  *
5227
5292
  * Returns a disposer that detaches the scope when the last subscriber
5228
5293
  * unsubscribes (ref-counted).
5294
+ *
5295
+ * SDK-internal: this is the scope primitive the SDK's resource-scoped
5296
+ * `browse.*` live queries drive on subscribe/teardown (freshness follows
5297
+ * observation; #847) — it is not part of the application-facing surface.
5298
+ * Single-scope at a time; multi-scope is deferred
5299
+ * (`.plans/MULTI-RESOURCE-SCOPE.md`).
5229
5300
  */
5230
5301
  subscribeToResource(resourceId: ResourceId): () => void;
5231
5302
  /**
@@ -5307,7 +5378,7 @@ interface IBackendOperations {
5307
5378
  interface PutBinaryRequest {
5308
5379
  name: string;
5309
5380
  file: File | Buffer;
5310
- format: ContentFormat | string;
5381
+ format: ContentFormat;
5311
5382
  storageUri: string;
5312
5383
  entityTypes?: string[];
5313
5384
  language?: string;
@@ -5350,19 +5421,27 @@ interface IContentTransport {
5350
5421
  resourceId: ResourceId;
5351
5422
  }>;
5352
5423
  getBinary(resourceId: ResourceId, options?: {
5353
- accept?: ContentFormat | string;
5354
5424
  auth?: AccessToken;
5355
5425
  }): Promise<{
5356
5426
  data: ArrayBuffer;
5357
5427
  contentType: string;
5358
5428
  }>;
5359
5429
  getBinaryStream(resourceId: ResourceId, options?: {
5360
- accept?: ContentFormat | string;
5361
5430
  auth?: AccessToken;
5362
5431
  }): Promise<{
5363
5432
  stream: ReadableStream<Uint8Array>;
5364
5433
  contentType: string;
5365
5434
  }>;
5435
+ /**
5436
+ * Fetch the resource's JSON-LD metadata graph (descriptor + annotations +
5437
+ * inbound entity references). The HTTP transport dereferences
5438
+ * `GET /resources/:id/jsonld` (the LD face an external linked-data client
5439
+ * sees); in-process transports assemble it from their `KnowledgeSystem`.
5440
+ * See `.plans/SIMPLER-JSON-LD.md` §5 / decision 7.
5441
+ */
5442
+ getResourceGraph(resourceId: ResourceId, options?: {
5443
+ auth?: AccessToken;
5444
+ }): Promise<GetResourceResponse>;
5366
5445
  dispose(): void;
5367
5446
  }
5368
5447
 
@@ -5408,20 +5487,33 @@ declare function normalizeText(text: string): string;
5408
5487
  * Pre-computed content strings for batch fuzzy matching.
5409
5488
  * Avoids recomputing normalizeText(content) and content.toLowerCase()
5410
5489
  * for every annotation when processing many annotations against the same content.
5490
+ *
5491
+ * `normalizedMap[i]` is the original-content index that normalized
5492
+ * character `i` came from. It has length `normalizedContent.length + 1`;
5493
+ * the final entry is `content.length` so a match that ends at the end of
5494
+ * the normalized string maps back to the end of the original. This map is
5495
+ * how `findBestTextMatch` recovers the *original* offset of a normalized
5496
+ * match — counting char-by-char with `normalizeText(singleChar)` is
5497
+ * wrong, because a lone whitespace char trims to `''` (contributing 0)
5498
+ * while in a full-string normalize it collapses to a single space
5499
+ * (contributing 1). That discrepancy shifted recovered offsets by the
5500
+ * number of whitespace runs before the match.
5411
5501
  */
5412
5502
  interface ContentCache {
5413
5503
  normalizedContent: string;
5504
+ normalizedMap: number[];
5414
5505
  lowerContent: string;
5415
5506
  }
5416
5507
  /**
5417
5508
  * Build a ContentCache for a given content string.
5418
- * Call once per content, pass to findBestTextMatch/findTextWithContext for all annotations.
5509
+ * Call once per content, pass to findBestTextMatch/anchorAnnotation for all annotations.
5419
5510
  */
5420
5511
  declare function buildContentCache(content: string): ContentCache;
5421
5512
  /**
5422
5513
  * Find best match for text in content using multi-strategy search
5423
5514
  *
5424
- * Shared core logic used by both findTextWithContext and validateAndCorrectOffsets.
5515
+ * Shared core logic used by both anchorAnnotation (render-time) and
5516
+ * reconcileSelector (write-time).
5425
5517
  *
5426
5518
  * @param content - Full text content to search within
5427
5519
  * @param searchText - The text to find
@@ -5434,37 +5526,91 @@ declare function findBestTextMatch(content: string, searchText: string, position
5434
5526
  end: number;
5435
5527
  matchQuality: MatchQuality;
5436
5528
  } | null;
5437
- /**
5438
- * Find text using exact match with optional prefix/suffix context
5439
- *
5440
- * When the exact text appears multiple times in the content, prefix and suffix
5441
- * are used to disambiguate and find the correct occurrence.
5442
- *
5443
- * If exact text is not found, uses multi-strategy fuzzy matching (normalization,
5444
- * case-insensitive, Levenshtein distance) to locate changed text.
5445
- *
5446
- * @param content - Full text content to search within
5447
- * @param exact - The exact text to find
5448
- * @param prefix - Optional text that should appear immediately before the match
5449
- * @param suffix - Optional text that should appear immediately after the match
5450
- * @param positionHint - Optional position hint (from TextPositionSelector) for fuzzy search
5451
- * @returns Position of the matched text, or null if not found
5452
- *
5453
- * @example
5454
- * ```typescript
5455
- * const content = "The cat sat. The cat ran.";
5456
- * // Find second "The cat" occurrence
5457
- * const pos = findTextWithContext(content, "The cat", "sat. ", " ran");
5458
- * // Returns { start: 13, end: 20 }
5459
- * ```
5460
- */
5461
- declare function findTextWithContext(content: string, exact: string, prefix: string | undefined, suffix: string | undefined, positionHint: number | undefined, cache: ContentCache): TextPosition | null;
5462
5529
  /**
5463
5530
  * Verify that a position correctly points to the exact text
5464
5531
  * Useful for debugging and validation
5465
5532
  */
5466
5533
  declare function verifyPosition(content: string, position: TextPosition, expectedExact: string): boolean;
5467
5534
 
5535
+ /**
5536
+ * Anchor a W3C Web Annotation to its rendered text.
5537
+ *
5538
+ * Render-time cleverness is deliberately limited to **verbatim** quote
5539
+ * matching. The annotation's two selectors are written to agree (the
5540
+ * write-side `reconcileSelector` + `buildTextAnnotation` invariant
5541
+ * guarantee `content.substring(start, end) === exact`). At render time the
5542
+ * only legitimate discrepancy is *positional drift*: the document grew or
5543
+ * shrank above the span after the annotation was written, so the offset is
5544
+ * stale but the exact text still exists, byte-identical, elsewhere. That is
5545
+ * the W3C-intended role of `TextQuoteSelector`, and it is safe because it
5546
+ * demands identical text — no normalization, no fuzzy matching, no
5547
+ * judgment call.
5548
+ *
5549
+ * Anything that would require *fuzzy* recovery (smart-quote folding,
5550
+ * whitespace collapse, Levenshtein) is out of scope here: a non-verbatim
5551
+ * mismatch means the content representation diverged or the stored record
5552
+ * is wrong, both of which are deterministic and belong upstream (canonical
5553
+ * content, or a corrected annotation event). The renderer does not guess —
5554
+ * it renders at the stored offset and flags the anchor low-confidence so
5555
+ * the discrepancy surfaces for an upstream fix.
5556
+ *
5557
+ * Returns `null` only when nothing usable is present; otherwise always
5558
+ * returns a position with a `strategy` and `confidence`.
5559
+ */
5560
+ type AnchorStrategy =
5561
+ /** Position hint pointed exactly at the exact text. Unambiguous. */
5562
+ 'fast-path'
5563
+ /** Exact text appears once verbatim in the content. No tiebreak needed. */
5564
+ | 'unique-occurrence'
5565
+ /** Multiple verbatim occurrences; prefix+suffix uniquely identified one. */
5566
+ | 'context-disambiguated'
5567
+ /** Multiple verbatim candidates; position closest to hint chosen. */
5568
+ | 'position-tiebreaker'
5569
+ /** Exact text not found verbatim (or no quote); raw stored offset used,
5570
+ * flagged for upstream correction. */
5571
+ | 'position-fallback';
5572
+ type AnchorConfidence = 'high' | 'medium' | 'low';
5573
+ interface RenderedAnchor {
5574
+ start: number;
5575
+ end: number;
5576
+ strategy: AnchorStrategy;
5577
+ confidence: AnchorConfidence;
5578
+ }
5579
+ interface AnchorSelectors {
5580
+ position?: {
5581
+ start: number;
5582
+ end: number;
5583
+ };
5584
+ quote?: {
5585
+ exact: string;
5586
+ prefix?: string;
5587
+ suffix?: string;
5588
+ };
5589
+ }
5590
+ /**
5591
+ * Distance window for the position tiebreaker. Candidates closer than this
5592
+ * to the hint receive a non-zero position score; further candidates fall
5593
+ * back to zero. Tuned for typical document sizes; calibration tests pin
5594
+ * the boundary behaviour rather than the exact value.
5595
+ */
5596
+ declare const POSITION_WINDOW = 1024;
5597
+ /**
5598
+ * Score weights — kept as named constants so the calibration tests can
5599
+ * import them and pin the *relationships* rather than the magnitudes.
5600
+ *
5601
+ * Invariant: a full-context match always outranks any position score.
5602
+ * (`CONTEXT_FULL_WEIGHT * 2 > POSITION_WEIGHT_MAX`, accounting for
5603
+ * prefix+suffix each contributing the full weight.)
5604
+ */
5605
+ declare const CONTEXT_FULL_WEIGHT = 10;
5606
+ declare const CONTEXT_PARTIAL_WEIGHT = 5;
5607
+ declare const POSITION_WEIGHT_MAX = 5;
5608
+ /**
5609
+ * Locate the best-effort anchor for an annotation against the content the
5610
+ * renderer is about to display. Verbatim-only — see the module doc.
5611
+ */
5612
+ declare function anchorAnnotation(content: string, selectors: AnchorSelectors): RenderedAnchor | null;
5613
+
5468
5614
  /**
5469
5615
  * Locale information
5470
5616
  * Copied from SDK for frontend use
@@ -5541,89 +5687,86 @@ declare function normalizeCoordinates(point: Point, displayWidth: number, displa
5541
5687
  declare function scaleSvgToNative(svg: string, displayWidth: number, displayHeight: number, imageWidth: number, imageHeight: number): string;
5542
5688
 
5543
5689
  /**
5544
- * Text context extraction utilities for W3C Web Annotation TextQuoteSelector
5690
+ * Selector reconciliation for write-time annotation construction.
5545
5691
  *
5546
- * Provides robust prefix/suffix context extraction with word boundary detection
5547
- * to ensure fuzzy anchoring works correctly when the same text appears multiple times.
5692
+ * LLM-produced text offsets are guides, not authoritative anchors.
5693
+ * `reconcileSelector` takes whatever the LLM emitted and produces a
5694
+ * `TextQuoteSelector`-equivalent `start`/`end`/`exact`/`prefix`/`suffix`
5695
+ * that is provably consistent with the source content:
5548
5696
  *
5549
- * Also provides AI offset validation and correction for handling AI-generated annotations
5550
- * where the model may return slightly incorrect character offsets.
5697
+ * - `content.substring(start, end) === exact`
5698
+ * - `content.substring(start - prefix.length, start) === prefix`
5699
+ * - `content.substring(end, end + suffix.length) === suffix`
5551
5700
  *
5552
- * @see https://www.w3.org/TR/annotation-model/#text-quote-selector
5553
- */
5554
-
5555
- /**
5556
- * Extract prefix and suffix context for TextQuoteSelector
5557
- *
5558
- * Extracts up to 64 characters before and after the selected text,
5559
- * extending to word boundaries to avoid cutting words in half.
5560
- * This ensures prefix/suffix are meaningful context for fuzzy anchoring.
5701
+ * No caller spreads LLM-emitted prefix/suffix into the stored selector.
5702
+ * The shared helper extracts both from source at the corrected position,
5703
+ * so the no-overlap invariant holds by construction.
5561
5704
  *
5562
- * @param content - Full text content
5563
- * @param start - Start offset of selection
5564
- * @param end - End offset of selection
5565
- * @returns Object with prefix and suffix (undefined if at boundaries)
5705
+ * Returns `null` when the LLM emitted text that doesn't appear in the
5706
+ * source. Callers filter; the helper doesn't decide for them.
5566
5707
  *
5567
- * @example
5568
- * ```typescript
5569
- * const content = "The United States Congress...";
5570
- * const context = extractContext(content, 4, 17); // "United States"
5571
- * // Returns: { prefix: "The ", suffix: " Congress..." }
5572
- * // NOT: { prefix: "nited ", suffix: "gress..." }
5573
- * ```
5708
+ * @see https://www.w3.org/TR/annotation-model/#text-quote-selector
5574
5709
  */
5575
- declare function extractContext(content: string, start: number, end: number): {
5576
- prefix?: string;
5577
- suffix?: string;
5578
- };
5710
+
5579
5711
  /**
5580
- * Result of validating and correcting AI-provided annotation offsets
5581
- */
5582
- interface ValidatedAnnotation {
5712
+ * How the reconciliation arrived at the chosen offset. Carried into the
5713
+ * worker log so operators can audit ambiguous matches; the
5714
+ * `first-of-many` flag, in particular, is the signal that an annotation
5715
+ * *may* be anchored at the wrong occurrence and warrants review.
5716
+ */
5717
+ type AnchorMethod =
5718
+ /** Exact text appears once in the source — anchored unambiguously. */
5719
+ 'unique-match'
5720
+ /** Multiple occurrences; LLM-emitted prefix/suffix picked one. */
5721
+ | 'context-recovered'
5722
+ /** Exact text not found verbatim; fuzzy match recovered it. */
5723
+ | 'fuzzy-match'
5724
+ /** Multiple occurrences, no context disambiguated — risky fallback. */
5725
+ | 'first-of-many';
5726
+ interface ReconciledSelector {
5583
5727
  start: number;
5584
5728
  end: number;
5729
+ /** Always a substring of the source content — never the LLM's emission. */
5585
5730
  exact: string;
5731
+ /** Extracted from source via extractContext — never the LLM's emission. */
5586
5732
  prefix?: string;
5733
+ /** Extracted from source via extractContext — never the LLM's emission. */
5587
5734
  suffix?: string;
5588
- corrected: boolean;
5589
- fuzzyMatched?: boolean;
5735
+ anchorMethod: AnchorMethod;
5736
+ /** Present when the fuzzy fallback recovered the match, naming how. */
5590
5737
  matchQuality?: MatchQuality;
5591
5738
  }
5739
+ interface LlmSelectorInput {
5740
+ exact: string;
5741
+ /** LLM-emitted context for disambiguation only — not for storage. */
5742
+ prefix?: string;
5743
+ /** LLM-emitted context for disambiguation only — not for storage. */
5744
+ suffix?: string;
5745
+ }
5592
5746
  /**
5593
- * Validate and correct AI-provided annotation offsets with fuzzy matching tolerance
5594
- *
5595
- * AI models sometimes return offsets that don't match the actual text position,
5596
- * or provide text with minor variations (case differences, whitespace, typos).
5597
- *
5598
- * This function uses a multi-strategy approach:
5599
- * 1. Check if AI's offsets are exactly correct
5600
- * 2. Try exact case-sensitive search
5601
- * 3. Try case-insensitive search
5602
- * 4. Try fuzzy matching with Levenshtein distance (5% tolerance)
5747
+ * Extract prefix and suffix context for a `TextQuoteSelector` from
5748
+ * source content. Used internally by `reconcileSelector` after offsets
5749
+ * are reconciled, and exported for callers (e.g. UI-side selection
5750
+ * capture) that need the same extraction semantics.
5603
5751
  *
5604
- * This ensures we're maximally tolerant of AI errors while still maintaining
5605
- * annotation quality and logging what corrections were made.
5606
- *
5607
- * @param content - Full text content
5608
- * @param aiStart - Start offset from AI
5609
- * @param aiEnd - End offset from AI
5610
- * @param exact - The exact text that should be at this position (from AI)
5611
- * @returns Validated annotation with corrected offsets and context
5612
- * @throws Error if no acceptable match can be found
5752
+ * Extracts up to 64 characters before and after the selected text,
5753
+ * extending up to 32 additional chars to reach a word boundary so the
5754
+ * prefix/suffix is meaningful context rather than mid-word fragments.
5755
+ */
5756
+ declare function extractContext(content: string, start: number, end: number): {
5757
+ prefix?: string;
5758
+ suffix?: string;
5759
+ };
5760
+ /**
5761
+ * Reconcile LLM-emitted offsets against the source. Returns a selector
5762
+ * whose `start`/`end` are verified to bracket `exact` in `content`, and
5763
+ * whose `prefix`/`suffix` are extracted from source — never carried
5764
+ * verbatim from the LLM.
5613
5765
  *
5614
- * @example
5615
- * ```typescript
5616
- * // AI said start=1143, but actual text is at 1161
5617
- * const result = validateAndCorrectOffsets(
5618
- * content,
5619
- * 1143,
5620
- * 1289,
5621
- * "the question \"whether..."
5622
- * );
5623
- * // Returns: { start: 1161, end: 1303, exact: "...", corrected: true, matchQuality: 'exact', ... }
5624
- * ```
5766
+ * Returns `null` if `exact` cannot be found anywhere in the content,
5767
+ * even via fuzzy match. Callers filter null and log the drop.
5625
5768
  */
5626
- declare function validateAndCorrectOffsets(content: string, aiStart: number, aiEnd: number, exact: string): ValidatedAnnotation;
5769
+ declare function reconcileSelector(content: string, llm: LlmSelectorInput): ReconciledSelector | null;
5627
5770
 
5628
5771
  /**
5629
5772
  * Text encoding utilities for consistent charset handling
@@ -5660,7 +5803,7 @@ declare function extractCharset(mediaType: string): string;
5660
5803
  declare function decodeWithCharset(buffer: ArrayBuffer, mediaType: string): string;
5661
5804
 
5662
5805
  /**
5663
- * Generic validation utilities for @semiont/api-client
5806
+ * Generic validation utilities for @semiont/http-transport
5664
5807
  *
5665
5808
  * Pure TypeScript validation with no external dependencies.
5666
5809
  * Safe to use in any JavaScript environment (Node.js, browser, Deno, etc.)
@@ -5717,42 +5860,215 @@ declare function validateData<T>(schema: {
5717
5860
  declare function isValidEmail(email: string): boolean;
5718
5861
 
5719
5862
  /**
5720
- * MIME type utilities for Semiont
5863
+ * Media-type registry for Semiont
5864
+ *
5865
+ * One supported-types list, capability-tiered, keyed by the spec's
5866
+ * SupportedMediaType enum. Admission (registry membership) is the
5867
+ * create/yield gate: every member is storable, nameable, and uploadable.
5868
+ * The curated capabilities say what more the system can do with a type:
5721
5869
  *
5722
- * Initial support for:
5723
- * - text/plain
5724
- * - text/markdown
5725
- * - image/png
5726
- * - image/jpeg
5727
- * - application/pdf
5870
+ * - `render` — which viewer the UI mounts ('none' → metadata + download)
5871
+ * - `anchoring` — which annotation model applies: character-offset text
5872
+ * selectors vs spatial geometry (PDFs are spatial)
5873
+ * - `extractText` — how the Smelter gets embeddable text ('none' → skip
5874
+ * embedding, never mojibake)
5875
+ * - `authorable` — offered in the compose editor's format dropdown
5876
+ * - `uploadable` — big tent: true for every registry member
5877
+ *
5878
+ * Capabilities are orthogonal strategies, not a ladder: images render but
5879
+ * yield no text; PDFs yield text but aren't authorable. A "tier" is a
5880
+ * derived reading, not a stored fact.
5881
+ *
5882
+ * Import-leniency invariant: restore/import preserves archive mediaTypes
5883
+ * verbatim, so "every stored mediaType is registry-valid" holds only for
5884
+ * content that entered through the validated create/yield gate. No code
5885
+ * reading a stored mediaType may assume `capabilitiesOf()` succeeds — the
5886
+ * `undefined` branch is mandatory wherever stored types are read.
5728
5887
  */
5888
+
5889
+ type SupportedMediaType = components['schemas']['SupportedMediaType'];
5890
+ type RenderMode = 'text' | 'image' | 'pdf' | 'none';
5891
+ type AnchoringModel = 'text-selector' | 'spatial' | 'none';
5892
+ type TextExtraction = 'decode' | 'pdf-text-layer' | 'none';
5893
+ interface MediaTypeCapabilities {
5894
+ /** Canonical file extension, with leading dot. */
5895
+ extension: `.${string}`;
5896
+ /** UI display name. */
5897
+ label: string;
5898
+ render: RenderMode;
5899
+ anchoring: AnchoringModel;
5900
+ extractText: TextExtraction;
5901
+ authorable: boolean;
5902
+ uploadable: boolean;
5903
+ }
5904
+ /**
5905
+ * The registry. `satisfies Record<SupportedMediaType, …>` is the
5906
+ * drift-lock: adding a type to the spec enum without a capabilities row
5907
+ * (or vice versa) is a compile error.
5908
+ *
5909
+ * Row order matters for `mediaTypeForExtension`: extension collisions
5910
+ * (.xml, .yaml, .js, .ts, .webm) resolve to the first row declaring the
5911
+ * extension.
5912
+ */
5913
+ declare const MEDIA_TYPES: {
5914
+ 'text/markdown': {
5915
+ extension: ".md";
5916
+ label: string;
5917
+ render: "text";
5918
+ anchoring: "text-selector";
5919
+ extractText: "decode";
5920
+ authorable: true;
5921
+ uploadable: true;
5922
+ };
5923
+ 'text/plain': {
5924
+ extension: ".txt";
5925
+ label: string;
5926
+ render: "text";
5927
+ anchoring: "text-selector";
5928
+ extractText: "decode";
5929
+ authorable: true;
5930
+ uploadable: true;
5931
+ };
5932
+ 'text/html': {
5933
+ extension: ".html";
5934
+ label: string;
5935
+ render: "text";
5936
+ anchoring: "text-selector";
5937
+ extractText: "decode";
5938
+ authorable: true;
5939
+ uploadable: true;
5940
+ };
5941
+ 'application/json': {
5942
+ extension: ".json";
5943
+ label: string;
5944
+ render: "text";
5945
+ anchoring: "text-selector";
5946
+ extractText: "decode";
5947
+ authorable: false;
5948
+ uploadable: true;
5949
+ };
5950
+ 'image/png': {
5951
+ extension: ".png";
5952
+ label: string;
5953
+ render: "image";
5954
+ anchoring: "spatial";
5955
+ extractText: "none";
5956
+ authorable: false;
5957
+ uploadable: true;
5958
+ };
5959
+ 'image/jpeg': {
5960
+ extension: ".jpg";
5961
+ label: string;
5962
+ render: "image";
5963
+ anchoring: "spatial";
5964
+ extractText: "none";
5965
+ authorable: false;
5966
+ uploadable: true;
5967
+ };
5968
+ 'application/pdf': {
5969
+ extension: ".pdf";
5970
+ label: string;
5971
+ render: "pdf";
5972
+ anchoring: "spatial";
5973
+ extractText: "pdf-text-layer";
5974
+ authorable: false;
5975
+ uploadable: true;
5976
+ };
5977
+ 'text/css': MediaTypeCapabilities;
5978
+ 'text/csv': MediaTypeCapabilities;
5979
+ 'text/xml': MediaTypeCapabilities;
5980
+ 'application/xml': MediaTypeCapabilities;
5981
+ 'application/yaml': MediaTypeCapabilities;
5982
+ 'application/x-yaml': MediaTypeCapabilities;
5983
+ 'text/javascript': MediaTypeCapabilities;
5984
+ 'application/javascript': MediaTypeCapabilities;
5985
+ 'text/x-typescript': MediaTypeCapabilities;
5986
+ 'application/typescript': MediaTypeCapabilities;
5987
+ 'text/x-python': MediaTypeCapabilities;
5988
+ 'text/x-java': MediaTypeCapabilities;
5989
+ 'text/x-c': MediaTypeCapabilities;
5990
+ 'text/x-c++': MediaTypeCapabilities;
5991
+ 'text/x-csharp': MediaTypeCapabilities;
5992
+ 'text/x-go': MediaTypeCapabilities;
5993
+ 'text/x-rust': MediaTypeCapabilities;
5994
+ 'text/x-ruby': MediaTypeCapabilities;
5995
+ 'text/x-php': MediaTypeCapabilities;
5996
+ 'text/x-swift': MediaTypeCapabilities;
5997
+ 'text/x-kotlin': MediaTypeCapabilities;
5998
+ 'text/x-shell': MediaTypeCapabilities;
5999
+ 'application/msword': MediaTypeCapabilities;
6000
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': MediaTypeCapabilities;
6001
+ 'application/vnd.ms-excel': MediaTypeCapabilities;
6002
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': MediaTypeCapabilities;
6003
+ 'application/vnd.ms-powerpoint': MediaTypeCapabilities;
6004
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation': MediaTypeCapabilities;
6005
+ 'application/zip': MediaTypeCapabilities;
6006
+ 'application/gzip': MediaTypeCapabilities;
6007
+ 'application/x-tar': MediaTypeCapabilities;
6008
+ 'application/x-7z-compressed': MediaTypeCapabilities;
6009
+ 'application/octet-stream': MediaTypeCapabilities;
6010
+ 'application/wasm': MediaTypeCapabilities;
6011
+ 'image/gif': MediaTypeCapabilities;
6012
+ 'image/webp': MediaTypeCapabilities;
6013
+ 'image/svg+xml': MediaTypeCapabilities;
6014
+ 'image/bmp': MediaTypeCapabilities;
6015
+ 'image/tiff': MediaTypeCapabilities;
6016
+ 'image/x-icon': MediaTypeCapabilities;
6017
+ 'video/mp4': MediaTypeCapabilities;
6018
+ 'video/mpeg': MediaTypeCapabilities;
6019
+ 'video/webm': MediaTypeCapabilities;
6020
+ 'video/ogg': MediaTypeCapabilities;
6021
+ 'video/quicktime': MediaTypeCapabilities;
6022
+ 'video/x-msvideo': MediaTypeCapabilities;
6023
+ 'audio/mpeg': MediaTypeCapabilities;
6024
+ 'audio/wav': MediaTypeCapabilities;
6025
+ 'audio/ogg': MediaTypeCapabilities;
6026
+ 'audio/webm': MediaTypeCapabilities;
6027
+ 'audio/aac': MediaTypeCapabilities;
6028
+ 'audio/flac': MediaTypeCapabilities;
6029
+ 'font/woff': MediaTypeCapabilities;
6030
+ 'font/woff2': MediaTypeCapabilities;
6031
+ 'font/ttf': MediaTypeCapabilities;
6032
+ 'font/otf': MediaTypeCapabilities;
6033
+ };
5729
6034
  /**
5730
- * Map MIME type to file extension
6035
+ * Strip parameters ("; charset=...") and normalize case.
6036
+ * Replaces the inline `split(';')[0]` sites across the codebase.
5731
6037
  */
5732
- declare function getExtensionForMimeType(mimeType: string): string;
6038
+ declare function baseMediaType(format: string): string;
5733
6039
  /**
5734
- * Detect if MIME type is an image (png or jpeg only for now)
6040
+ * Registry membership the admission gate. Exact match: callers pass a
6041
+ * base type (see `baseMediaType`); strings carrying parameters are not
6042
+ * members.
5735
6043
  */
5736
- declare function isImageMimeType(mimeType: string): boolean;
6044
+ declare function isSupportedMediaType(format: string): format is SupportedMediaType;
6045
+ /** Capabilities for a format (parameters tolerated), or undefined on registry miss. */
6046
+ declare function capabilitiesOf(format: string): MediaTypeCapabilities | undefined;
5737
6047
  /**
5738
- * Detect if MIME type is text-based (plain or markdown only for now)
6048
+ * Lenient extension lookup for naming foreign/imported content: '.dat' on
6049
+ * registry miss. Exporters use this — a vocabulary change must never
6050
+ * refuse to name restored data.
5739
6051
  */
5740
- declare function isTextMimeType(mimeType: string): boolean;
6052
+ declare function extensionForMediaType(format: string): string;
5741
6053
  /**
5742
- * Detect if MIME type is PDF
6054
+ * Inverted registry: extension → media type, for the CLI and the upload
6055
+ * detection chain. Accepts 'md' or '.md', any case, and common alternate
6056
+ * spellings. Returns undefined for unknown extensions — detection chains
6057
+ * fall back to 'application/octet-stream' themselves.
5743
6058
  */
5744
- declare function isPdfMimeType(mimeType: string): boolean;
6059
+ declare function mediaTypeForExtension(ext: string): SupportedMediaType | undefined;
5745
6060
  /**
5746
- * Get category for MIME type (for routing to appropriate viewer)
5747
- *
5748
- * Categories represent annotation models, not file formats:
5749
- * - 'text': Text-based annotations (TextPositionSelector, TextQuoteSelector)
5750
- * - 'image': Spatial coordinate annotations (SvgSelector, FragmentSelector)
5751
- *
5752
- * PDFs use spatial coordinates for annotations, so they belong to 'image' category.
6061
+ * The Smelter's gate: how to get embeddable text from a format. Registry
6062
+ * rows answer directly; on a registry miss, base types under text/* decode
6063
+ * (RFC 2046 guarantees the text top-level type is textual — imported
6064
+ * unregistered text subtypes embed too), everything else is 'none'.
5753
6065
  */
5754
- type MimeCategory = 'text' | 'image' | 'unsupported';
5755
- declare function getMimeCategory(mimeType: string): MimeCategory;
6066
+ declare function textExtractionOf(format: string): TextExtraction;
6067
+ /** Types offered in the compose editor's format dropdown. */
6068
+ declare const AUTHORABLE_MEDIA_TYPES: readonly SupportedMediaType[];
6069
+ /** Registry rows whose text the Smelter can extract. Rows only — the
6070
+ * text/* fallback in `textExtractionOf` isn't enumerable. */
6071
+ declare const EMBEDDABLE_MEDIA_TYPES: readonly SupportedMediaType[];
5756
6072
 
5757
6073
  /**
5758
6074
  * Resource input/output types
@@ -6615,15 +6931,5 @@ declare function isValidPlatformType(value: string): value is PlatformType;
6615
6931
  */
6616
6932
  declare function getAllPlatformTypes(): PlatformType[];
6617
6933
 
6618
- /**
6619
- * @semiont/core
6620
- *
6621
- * Core domain logic and utilities for the Semiont semantic knowledge platform.
6622
- * OpenAPI types are generated here and exported for use across the monorepo.
6623
- */
6624
-
6625
- declare const CORE_TYPES_VERSION = "0.1.0";
6626
- declare const SDK_VERSION = "0.1.0";
6627
-
6628
- export { BRIDGED_CHANNELS, CHANNEL_SCHEMAS, CORE_TYPES_VERSION, ConfigurationError, ConflictError, EventBus, JWTTokenSchema, LOCALES, NotFoundError, PERSISTED_EVENT_TYPES, RESOURCE_BROADCAST_TYPES, SDK_VERSION, ScopedEventBus, ScriptError, SemiontError, UnauthorizedError, ValidationError, accessToken, agentToDid, annotationId, annotationUri, applyBodyOperations, assembleAnnotation, authCode, baseUrl, buildContentCache, burstBuffer, busLog, busLogEnabled, cloneToken, createCircleSvg, createPolygonSvg, createRectangleSvg, createTomlConfigLoader, decodeRepresentation, decodeWithCharset, didToAgent, email, entityType, errField, extractBoundingBox, extractCharset, extractContext, findBestTextMatch, findBodyItem, findTextWithContext, formatLocaleDisplay, generateUuid, getAllLocaleCodes, getAllPlatformTypes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText, getCreator, getDerivedFrom, getExactText, getExtensionForMimeType, getFragmentSelector, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getMimeCategory, getNodeEncoding, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceEntityTypes, getResourceId, getStorageUri, getSvgSelector, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, googleCredential, hasTargetSelector, isAnnotationId, isArchived, isArray, isAssessment, isBodyResolved, isBoolean, isComment, isDefined, isDraft, isEventRelatedToAnnotation, isFunction, isHighlight, isImageMimeType, isNull, isNullish, isNumber, isObject, isPdfMimeType, isReference, isResolvedReference, isResourceId, isStoredEvent, isString, isStubReference, isTag, isTextMimeType, isUndefined, isValidEmail, isValidPlatformType, jobId, loadTomlConfig, mcpToken, normalizeCoordinates, normalizeText, parseEnvironment, parseSvgSelector, refreshToken, resourceAnnotationUri, resourceId, resourceUri, scaleSvgToNative, searchQuery, serializePerKey, setBusLogTraceIdProvider, softwareToAgent, userDID, userId, userToAgent, userToDid, validateAndCorrectOffsets, validateData, validateEnvironment, validateSvgMarkup, verifyPosition };
6629
- export type { AccessToken, Annotation, AnnotationCategory, AnnotationId, AnnotationUri, AnthropicProviderConfig, AppConfig, AssembledAnnotation, AuthCode, BackendDownload, BackendServiceConfig, BaseUrl, BodyItem, BodyItemIdentity, BodyOperation, BoundingBox, Brand, BridgedChannel, BurstBufferOptions, BusOp, CloneToken, ConnectionState, ContentCache, ContentFormat, CreateAnnotationInternal, DatabaseServiceConfig, Email, EmbeddingServiceConfig, EmittableChannel, EntityType, EntityTypeStats, Environment, EnvironmentConfig, EventBase, EventInput, EventMap, EventMetadata, EventName, EventOfType, EventQuery, EventSignature, FragmentSelector, FrontendServiceConfig, GatheredContext, GoogleAuthRequest, GoogleCredential, GraphConnection, GraphDatabaseType, GraphPath, GraphServiceConfig, HealthCheckResponse, IBackendOperations, IContentTransport, ITransport, InferenceProvidersConfig, JobId, ListUsersResponse, LocaleInfo, Logger, MCPToken, MatchQuality, McpServiceConfig, MimeCategory, Motivation, OllamaProviderConfig, PersistedEvent, PersistedEventType, PlatformType, Point, ProgressCallback, ProgressEvent, PutBinaryOptions, PutBinaryProgress, PutBinaryRequest, RefreshToken, ResourceAnnotationUri, ResourceAnnotations, ResourceBroadcastType, ResourceDescriptor, ResourceFilter, ResourceId, ResourceUri, SearchQuery, SelectionData, Selector, SemiontConfig, ServicePlatformConfig, ServicesConfig, SiteConfig, StatusResponse, StoredEvent, StoredEventLike, SvgSelector, TagCategory, TagSchema, TextPosition, TextPositionSelector, TextQuoteSelector, ActorInferenceConfig as TomlActorInferenceConfig, TomlFileReader, InferenceConfig as TomlInferenceConfig, WorkerInferenceConfig as TomlWorkerInferenceConfig, TransportErrorCode, UpdateResourceInput, UpdateUserRequest, UpdateUserResponse, UserDID, UserId, UserResponse, ValidatedAnnotation, ValidationFailure, ValidationResult, ValidationSuccess, VectorsServiceConfig, components, operations, paths };
6934
+ export { AUTHORABLE_MEDIA_TYPES, BRIDGED_CHANNELS, CHANNEL_SCHEMAS, CONTEXT_FULL_WEIGHT, CONTEXT_PARTIAL_WEIGHT, ConfigurationError, ConflictError, EMBEDDABLE_MEDIA_TYPES, EventBus, JWTTokenSchema, LOCALES, MEDIA_TYPES, NotFoundError, PERSISTED_EVENT_TYPES, POSITION_WEIGHT_MAX, POSITION_WINDOW, RESOURCE_BROADCAST_TYPES, ScopedEventBus, ScriptError, SemiontError, UnauthorizedError, ValidationError, accessToken, agentToDid, anchorAnnotation, annotationId, annotationUri, applyBodyOperations, assembleAnnotation, authCode, baseMediaType, baseUrl, buildContentCache, burstBuffer, busLog, busLogEnabled, capabilitiesOf, cloneToken, createCircleSvg, createFragmentSelector, createPolygonSvg, createRectangleSvg, createTomlConfigLoader, decodeRepresentation, decodeWithCharset, didToAgent, email, entityType, errField, extensionForMediaType, extractBoundingBox, extractCharset, extractContext, findBestTextMatch, findBodyItem, formatLocaleDisplay, generateUuid, getAllLocaleCodes, getAllPlatformTypes, getAnnotationExactText, getAnnotationUriFromEvent, getBodySource, getBodyType, getChecksum, getCommentText, getCreator, getDerivedFrom, getExactText, getFragmentSelector, getLanguage, getLocaleEnglishName, getLocaleInfo, getLocaleNativeName, getNodeEncoding, getPageFromFragment, getPrimaryMediaType, getPrimaryRepresentation, getPrimarySelector, getResourceEntityTypes, getResourceId, getStorageUri, getSvgSelector, getTargetSelector, getTargetSource, getTextPositionSelector, getTextQuoteSelector, googleCredential, hasTargetSelector, isAnnotationId, isArchived, isArray, isAssessment, isBodyResolved, isBoolean, isComment, isDefined, isDraft, isEventRelatedToAnnotation, isFunction, isHighlight, isNull, isNullish, isNumber, isObject, isReference, isResolvedReference, isResourceId, isStoredEvent, isString, isStubReference, isSupportedMediaType, isTag, isUndefined, isValidEmail, isValidPlatformType, jobId, loadTomlConfig, mcpToken, mediaTypeForExtension, normalizeCoordinates, normalizeText, parseEnvironment, parseFragmentSelector, parseSvgSelector, reconcileSelector, refreshToken, resourceAnnotationUri, resourceId, resourceUri, scaleSvgToNative, searchQuery, serializePerKey, setBusLogTraceIdProvider, softwareToAgent, textExtractionOf, userDID, userId, userToAgent, userToDid, validateData, validateEnvironment, validateSvgMarkup, verifyPosition };
6935
+ export type { AccessToken, AnchorConfidence, AnchorMethod, AnchorSelectors, AnchorStrategy, AnchoringModel, Annotation, AnnotationCategory, AnnotationId, AnnotationUri, AnthropicProviderConfig, AppConfig, AssembledAnnotation, AuthCode, BackendDownload, BackendServiceConfig, BaseUrl, BodyItem, BodyItemIdentity, BodyOperation, BoundingBox, Brand, BridgedChannel, BurstBufferOptions, BusOp, CloneToken, ConnectionState, ContentCache, ContentFormat, CreateAnnotationInternal, DatabaseServiceConfig, Email, EmbeddingServiceConfig, EmittableChannel, EntityType, EntityTypeStats, Environment, EnvironmentConfig, EventBase, EventInput, EventMap, EventMetadata, EventName, EventOfType, EventQuery, EventSignature, FragmentSelector, FrontendServiceConfig, GatheredContext, GoogleAuthRequest, GoogleCredential, GraphConnection, GraphDatabaseType, GraphPath, GraphServiceConfig, HealthCheckResponse, IBackendOperations, IContentTransport, ITransport, InferenceProvidersConfig, JobId, ListUsersResponse, LlmSelectorInput, LocaleInfo, Logger, MCPToken, MatchQuality, McpServiceConfig, MediaTypeCapabilities, Motivation, OllamaProviderConfig, PdfCoordinate, PersistedEvent, PersistedEventType, PlatformType, Point, ProgressCallback, ProgressEvent, PutBinaryOptions, PutBinaryProgress, PutBinaryRequest, ReconciledSelector, RefreshToken, RenderMode, RenderedAnchor, ResourceAnnotationUri, ResourceAnnotations, ResourceBroadcastType, ResourceDescriptor, ResourceFilter, ResourceId, ResourceUri, SearchQuery, SelectionData, Selector, SemiontConfig, ServicePlatformConfig, ServicesConfig, SiteConfig, StatusResponse, StoredEvent, StoredEventLike, SupportedMediaType, SvgSelector, TagCategory, TagSchema, TextExtraction, TextPosition, TextPositionSelector, TextQuoteSelector, ActorInferenceConfig as TomlActorInferenceConfig, TomlFileReader, InferenceConfig as TomlInferenceConfig, WorkerInferenceConfig as TomlWorkerInferenceConfig, TransportErrorCode, UpdateResourceInput, UpdateUserRequest, UpdateUserResponse, UserDID, UserId, UserResponse, ValidationFailure, ValidationResult, ValidationSuccess, VectorsServiceConfig, components, operations, paths };