@walkthru-earth/objex-utils 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ # Query engine
2
+
3
+ Interfaces only. `@walkthru-earth/objex-utils` does not ship a DuckDB-WASM implementation; the objex app provides one (`src/lib/query/wasm.ts`). Use `QueryEngine` as the shape your own engine must implement, or that downstream consumers of your library can depend on.
4
+
5
+ Source: `src/lib/query/engine.ts`.
6
+
7
+ ## Types
8
+
9
+ ### `QueryResult`
10
+
11
+ ```ts
12
+ interface QueryResult {
13
+ columns: string[];
14
+ types: string[];
15
+ rowCount: number;
16
+ rows: Record<string, any>[]; // already materialized
17
+ }
18
+ ```
19
+
20
+ Pre-parsed rows avoid the Arrow version-mismatch surface in DuckDB-WASM (ships Arrow v17 internally while the project may use Arrow v21).
21
+
22
+ ### `MapQueryResult`
23
+
24
+ ```ts
25
+ interface MapQueryResult {
26
+ wkbArrays: Uint8Array[]; // geometry column as raw WKB
27
+ geometryType: string; // e.g. 'POLYGON'
28
+ attributes: Map<string, { values: any[]; type: string }>; // non-geometry columns, columnar
29
+ rowCount: number;
30
+ }
31
+ ```
32
+
33
+ Raw columnar shape for map rendering — feed straight into [`buildGeoArrowTables`](./geometry.md#buildgeoarrowtables).
34
+
35
+ ### `SchemaField`
36
+
37
+ ```ts
38
+ interface SchemaField {
39
+ name: string;
40
+ type: string; // DuckDB type string, e.g. 'VARCHAR', 'GEOMETRY(EPSG:27700)'
41
+ nullable: boolean;
42
+ }
43
+ ```
44
+
45
+ ### `QuerySource`
46
+
47
+ ```ts
48
+ interface QuerySource {
49
+ ref: string; // FROM-clause expression, e.g. read_parquet('...url...') or "db"."schema"."table"
50
+ filePath?: string; // optional shortcut for Parquet file metadata queries
51
+ }
52
+ ```
53
+
54
+ ### Cancellation
55
+
56
+ ```ts
57
+ interface QueryHandle {
58
+ result: Promise<QueryResult>;
59
+ cancel(): Promise<boolean>; // true if cancelled, false if already completed
60
+ }
61
+
62
+ interface MapQueryHandle {
63
+ result: Promise<MapQueryResult>;
64
+ cancel(): Promise<boolean>;
65
+ }
66
+
67
+ class QueryCancelledError extends Error {
68
+ name: 'QueryCancelledError';
69
+ }
70
+ ```
71
+
72
+ The cancellable variants are how `objex` drives long-running queries — `cancel()` invokes DuckDB's `conn.send()` cancel path so the single worker isn't held hostage by an abandoned tab.
73
+
74
+ ## `QueryEngine` interface
75
+
76
+ ```ts
77
+ interface QueryEngine {
78
+ query(connId: string, sql: string): Promise<QueryResult>;
79
+
80
+ queryForMap(
81
+ connId: string,
82
+ sql: string,
83
+ geomCol: string,
84
+ geomColType: string,
85
+ sourceCrs?: string | null
86
+ ): Promise<MapQueryResult>;
87
+
88
+ getSchema(connId: string, source: QuerySource): Promise<SchemaField[]>;
89
+ getRowCount(connId: string, source: QuerySource): Promise<number>;
90
+ detectCrs(
91
+ connId: string,
92
+ source: QuerySource,
93
+ geomCol: string
94
+ ): Promise<string | null>;
95
+
96
+ getSchemaAndCrs?(
97
+ connId: string,
98
+ source: QuerySource,
99
+ findGeoCol: (schema: SchemaField[]) => string | null
100
+ ): Promise<{ schema: SchemaField[]; geomCol: string | null; crs: string | null }>;
101
+
102
+ queryCancellable?(connId: string, sql: string): QueryHandle;
103
+ queryForMapCancellable?(
104
+ connId: string,
105
+ sql: string,
106
+ geomCol: string,
107
+ geomColType: string,
108
+ sourceCrs?: string | null
109
+ ): MapQueryHandle;
110
+
111
+ forceCancel?(): Promise<void>;
112
+ registerFileBuffer?(name: string, buffer: Uint8Array): Promise<void>;
113
+ dropFile?(name: string): Promise<void>;
114
+
115
+ releaseMemory(): Promise<void>;
116
+ dispose(): Promise<void>;
117
+ }
118
+ ```
119
+
120
+ ### Methods at a glance
121
+
122
+ | Method | Required? | Notes |
123
+ |--------|-----------|-------|
124
+ | `query` | yes | Fire-and-forget query returning pre-parsed rows. |
125
+ | `queryForMap` | yes | Geometry-aware query. `geomColType` lets the engine decide whether to wrap in `ST_AsWKB` / `ST_Transform`. `sourceCrs` (e.g. `'EPSG:27700'`) is only used when the geometry column does not have a parameterized GEOMETRY type; pass `null` for WGS84. |
126
+ | `getSchema` / `getRowCount` | yes | Metadata helpers. `QuerySource.ref` is the FROM expression — works for both files and catalog tables. |
127
+ | `detectCrs` | yes | Returns e.g. `'EPSG:27700'` or `null` (WGS84 / unknown). |
128
+ | `getSchemaAndCrs` | optional | Single round-trip combining the three above. `findGeoCol` is injected so the engine can inspect the schema without owning the heuristic. |
129
+ | `queryCancellable` / `queryForMapCancellable` | optional | Prefer these in UIs. Falls back to non-cancellable when absent. |
130
+ | `forceCancel` | optional | Kill any in-flight query across all handles. |
131
+ | `registerFileBuffer` / `dropFile` | optional | Register an in-memory file in DuckDB's VFS (used for ATTACH'd catalogs, drag-and-drop). |
132
+ | `releaseMemory` | yes | Trim DuckDB buffer pools (e.g. after closing large tabs). |
133
+ | `dispose` | yes | Tear down the connection fully. |
134
+
135
+ ### Implementation checklist
136
+
137
+ - Serialize rows with BigInt-safe JSON (see [`jsonReplacerBigInt`](./formatting.md#jsonreplacerbigint)).
138
+ - Wrap geometry selects with `ST_AsWKB(...)` before Arrow export — DuckDB-WASM cannot Arrow-export `GEOMETRY` yet ([duckdb/duckdb-wasm#2187](https://github.com/duckdb/duckdb-wasm/issues/2187)).
139
+ - Return `rowCount` even when the result is streamed; consumers display progress.
140
+ - Surface cancellation via `QueryCancelledError` so UIs can distinguish "user aborted" from real failures.
@@ -0,0 +1,251 @@
1
+ # Storage
2
+
3
+ URL parsing, provider registry, and the `StorageAdapter` contract.
4
+
5
+ Sources:
6
+
7
+ - `src/lib/utils/storage-url.ts` — generic URL / bucket parser
8
+ - `src/lib/utils/cloud-url.ts` — cloud-scheme → HTTPS resolver
9
+ - `src/lib/storage/providers.ts` — provider registry + access-mode logic
10
+ - `src/lib/storage/adapter.ts` — the `StorageAdapter` interface
11
+ - `src/lib/storage/url-adapter.ts` — adapter for arbitrary HTTPS URLs
12
+
13
+ ## URL parsing
14
+
15
+ ### `parseStorageUrl(input, defaults?)`
16
+
17
+ ```ts
18
+ function parseStorageUrl(
19
+ input: string,
20
+ defaults?: Defaults
21
+ ): ParsedStorageUrl
22
+ ```
23
+
24
+ Universal parser for any bucket / cloud URL a user might paste. Accepted formats:
25
+
26
+ - URI schemes: `s3://`, `s3a://`, `s3n://`, `r2://`, `gs://`, `gcs://`, `azure://`, `az://`, `abfs://`, `abfss://`, `wasbs://`, `swift://`, `file://`
27
+ - AWS S3 virtual-hosted, path-style, global URLs (region auto-detected from host)
28
+ - Cloudflare R2, Google Cloud Storage, Azure, DigitalOcean Spaces, Wasabi, Backblaze B2, Alibaba OSS, Tencent COS, Yandex, Storj, Contabo, Hetzner, Linode, OVHcloud, MinIO
29
+ - Generic custom endpoints
30
+ - Plain bucket names (defaults to `s3`)
31
+
32
+ **Types**
33
+
34
+ ```ts
35
+ interface ParsedStorageUrl {
36
+ bucket: string;
37
+ region: string;
38
+ endpoint: string;
39
+ provider: StorageProvider; // same ID space as ProviderId (see below)
40
+ prefix: string; // path after bucket (may be '')
41
+ }
42
+
43
+ interface Defaults {
44
+ region?: string;
45
+ endpoint?: string;
46
+ provider?: StorageProvider;
47
+ }
48
+ ```
49
+
50
+ **Example**
51
+
52
+ ```ts
53
+ parseStorageUrl('s3://us-west-2.my-bucket/data/*.parquet');
54
+ // {
55
+ // provider: 's3',
56
+ // bucket: 'my-bucket',
57
+ // region: 'us-west-2',
58
+ // endpoint: 'https://s3.us-west-2.amazonaws.com',
59
+ // prefix: 'data/*.parquet'
60
+ // }
61
+ ```
62
+
63
+ ### `looksLikeUrl(input)`
64
+
65
+ ```ts
66
+ function looksLikeUrl(input: string): boolean
67
+ ```
68
+
69
+ Return `true` if `input` resembles a URL/URI rather than a plain bucket name. Useful to decide whether to pass it to `parseStorageUrl`.
70
+
71
+ ### `describeParseResult(parsed)`
72
+
73
+ ```ts
74
+ function describeParseResult(parsed: ParsedStorageUrl): string
75
+ ```
76
+
77
+ Build a human-readable summary of a parse result (bucket, endpoint, region, provider, prefix). Used in the objex UI but safe to render anywhere.
78
+
79
+ ### `resolveCloudUrl(url)`
80
+
81
+ ```ts
82
+ function resolveCloudUrl(url: string): string
83
+ ```
84
+
85
+ Convert a cloud-protocol URL to an HTTPS URL that any fetch client can handle.
86
+
87
+ | Input | Output |
88
+ |-------|--------|
89
+ | `s3://bucket/key` | `https://bucket.s3.<region>.amazonaws.com/key` — region auto-detected from host prefix (e.g. `us-west-2.opendata.source.coop`), or `us-east-1` as fallback |
90
+ | `gs://bucket/key` | `https://storage.googleapis.com/bucket/key` |
91
+ | `http(s)://...` | returned unchanged |
92
+ | Anything else | returned unchanged |
93
+
94
+ ### `getNativeScheme(provider)`
95
+
96
+ ```ts
97
+ function getNativeScheme(provider: string): string
98
+ ```
99
+
100
+ Map a provider ID to its canonical URI scheme prefix (first entry in the provider registry). Falls back to `'s3'` for unknown S3-compatible providers.
101
+
102
+ ### `safeDecodeURIComponent(s)`
103
+
104
+ ```ts
105
+ function safeDecodeURIComponent(s: string): string
106
+ ```
107
+
108
+ Percent-decode a URL component without throwing on malformed input. Returns the original string if decoding fails.
109
+
110
+ ## Provider registry
111
+
112
+ ### `ProviderId`
113
+
114
+ ```ts
115
+ type ProviderId =
116
+ | 's3' | 'gcs' | 'r2' | 'minio' | 'azure' | 'storj' | 'b2'
117
+ | 'digitalocean' | 'wasabi' | 'contabo' | 'hetzner' | 'linode' | 'ovhcloud';
118
+ ```
119
+
120
+ ### `ProviderDef`
121
+
122
+ ```ts
123
+ interface ProviderDef {
124
+ label: string; // "Amazon S3"
125
+ description: string; // short helper text for UI
126
+ authMethod: 'sigv4' | 'sas-token';
127
+ needsRegion: boolean;
128
+ needsEndpoint: boolean;
129
+ defaultRegion: string;
130
+ endpointTemplate: string | null; // may contain {region}
131
+ regions: ProviderRegion[];
132
+ bucketLabel?: string; // e.g. Azure uses "Container"
133
+ endpointPlaceholder: string;
134
+ schemes: string[]; // e.g. ['s3', 's3a', 's3n']
135
+ }
136
+
137
+ interface ProviderRegion {
138
+ code: string;
139
+ label: string;
140
+ }
141
+ ```
142
+
143
+ ### Registry exports
144
+
145
+ ```ts
146
+ const PROVIDERS: Record<ProviderId, ProviderDef>;
147
+ const PROVIDER_IDS: ProviderId[];
148
+ ```
149
+
150
+ `PROVIDER_IDS` is the display order used in the objex connection dialog.
151
+
152
+ ### Helpers
153
+
154
+ ```ts
155
+ function getProvider(id: string): ProviderDef;
156
+ function buildEndpointFromTemplate(id: ProviderId, region: string): string;
157
+ function buildProviderBaseUrl(
158
+ provider: ProviderId,
159
+ endpoint: string,
160
+ bucket: string,
161
+ region: string
162
+ ): string;
163
+ function isGcsProvider(provider: string, endpoint: string): boolean;
164
+ ```
165
+
166
+ | Function | Semantics |
167
+ |----------|-----------|
168
+ | `getProvider` | Unknown IDs fall back to the S3 entry (never throws). |
169
+ | `buildEndpointFromTemplate` | Substitute `{region}` in the provider's template. |
170
+ | `buildProviderBaseUrl` | Produce the HTTPS base URL for API requests (endpoint + bucket, correctly interleaved for virtual-host vs path-style). |
171
+ | `isGcsProvider` | `true` when the connection uses the GCS JSON API rather than S3 XML — used to pick adapter implementation. |
172
+
173
+ ### Access mode
174
+
175
+ ```ts
176
+ type AccessMode = 'public-https' | 'sas-https' | 'signed-s3';
177
+
178
+ function getAccessMode(conn: AccessModeInput): AccessMode;
179
+ function isPubliclyStreamable(conn: AccessModeInput): boolean;
180
+ ```
181
+
182
+ `AccessMode` is the single source of truth for how an HTTP client (DuckDB httpfs, MapLibre, fetch) should read the connection's files:
183
+
184
+ | Mode | Meaning |
185
+ |------|---------|
186
+ | `'public-https'` | Plain HTTPS, no signing — anonymous S3, GCS, R2, public MinIO, etc. |
187
+ | `'sas-https'` | HTTPS with SAS token appended to the URL — Azure. |
188
+ | `'signed-s3'` | Requires SigV4 signing — authenticated S3-compatible connections. |
189
+
190
+ `isPubliclyStreamable` is `true` for `'public-https'` and `'sas-https'` (anything a plain `fetch()` / `<img>` / `<video>` can reach directly).
191
+
192
+ ## StorageAdapter
193
+
194
+ ### Interface
195
+
196
+ ```ts
197
+ interface StorageAdapter {
198
+ list(path: string, signal?: AbortSignal): Promise<FileEntry[]>;
199
+ read(
200
+ path: string,
201
+ offset?: number,
202
+ length?: number,
203
+ signal?: AbortSignal
204
+ ): Promise<Uint8Array>;
205
+ head(path: string, signal?: AbortSignal): Promise<FileEntry>;
206
+ listPage?(
207
+ path: string,
208
+ continuationToken?: string,
209
+ pageSize?: number,
210
+ signal?: AbortSignal
211
+ ): Promise<ListPage>;
212
+ put(key: string, data: Uint8Array, contentType?: string): Promise<WriteResult>;
213
+ delete(key: string): Promise<void>;
214
+ deletePrefix(prefix: string): Promise<{ deleted: number }>;
215
+ copy(srcKey: string, destKey: string): Promise<WriteResult>;
216
+ readonly supportsWrite: boolean;
217
+ }
218
+
219
+ interface ListPage {
220
+ entries: FileEntry[];
221
+ continuationToken?: string;
222
+ hasMore: boolean;
223
+ }
224
+ ```
225
+
226
+ See [`types-constants.md`](./types-constants.md#fileentry) for `FileEntry` and `WriteResult`.
227
+
228
+ **Conventions**
229
+
230
+ - `signal` is propagated all the way to the underlying `fetch()`. Callers should always pass an `AbortController.signal` so tab switches / cleanups don't leak requests.
231
+ - `read(path, offset, length)` uses HTTP Range when supported; omitting offset/length reads the whole object.
232
+ - `listPage` is optional — read-heavy viewers (e.g. paginated browser) should feature-detect it.
233
+ - Read-only adapters should throw a native `Error` from write methods and set `supportsWrite = false`.
234
+
235
+ ### `UrlAdapter`
236
+
237
+ ```ts
238
+ class UrlAdapter implements StorageAdapter {
239
+ readonly supportsWrite = false;
240
+
241
+ read(url: string, offset?: number, length?: number, signal?: AbortSignal): Promise<Uint8Array>;
242
+ head(url: string, signal?: AbortSignal): Promise<FileEntry>;
243
+ list(): Promise<FileEntry[]>; // always []
244
+ put(): Promise<WriteResult>; // throws
245
+ delete(): Promise<void>; // throws
246
+ deletePrefix(): Promise<{ deleted: number }>; // throws
247
+ copy(): Promise<WriteResult>; // throws
248
+ }
249
+ ```
250
+
251
+ Minimal adapter for arbitrary HTTPS URLs (`tab.source === 'url'`). `path` is the full URL. Supports `read()` (Range requests) and `head()` only. Listing returns an empty array, writes throw. Use when you have a raw HTTPS link and do not need connection credentials.
@@ -0,0 +1,173 @@
1
+ # Core types & constants
2
+
3
+ Shared data shapes (connections, tabs, files) and package-wide constants.
4
+
5
+ Sources: `src/lib/types.ts`, `src/lib/constants.ts`.
6
+
7
+ ## Types
8
+
9
+ ### `FileEntry`
10
+
11
+ ```ts
12
+ interface FileEntry {
13
+ name: string;
14
+ path: string;
15
+ is_dir: boolean;
16
+ size: number;
17
+ modified: number; // unix timestamp in milliseconds
18
+ extension: string;
19
+ }
20
+ ```
21
+
22
+ A single file or directory entry returned by any `StorageAdapter.list` / `list_page` / `head`. `extension` is lowercase without the leading dot (empty string for directories and extensionless files).
23
+
24
+ ### `Connection`
25
+
26
+ ```ts
27
+ interface Connection {
28
+ id: string;
29
+ name: string;
30
+ provider: string; // same ID space as ProviderId
31
+ endpoint: string;
32
+ bucket: string;
33
+ region: string;
34
+ anonymous: boolean;
35
+ authMethod?: 'sigv4' | 'sas-token';
36
+ rootPrefix?: string;
37
+ }
38
+ ```
39
+
40
+ Persisted connection record. **No credentials** — secrets live only in the session `ConnectionConfig`.
41
+
42
+ ### `ConnectionConfig`
43
+
44
+ ```ts
45
+ interface ConnectionConfig {
46
+ name: string;
47
+ provider: string;
48
+ endpoint: string;
49
+ bucket: string;
50
+ region: string;
51
+ access_key?: string;
52
+ secret_key?: string;
53
+ sas_token?: string;
54
+ anonymous: boolean;
55
+ authMethod?: 'sigv4' | 'sas-token';
56
+ rootPrefix?: string;
57
+ }
58
+ ```
59
+
60
+ Transient form with credentials. Never persist this directly.
61
+
62
+ ### `Tab`
63
+
64
+ ```ts
65
+ interface Tab {
66
+ id: string;
67
+ name: string;
68
+ path: string;
69
+ source: 'remote' | 'url';
70
+ connectionId?: string;
71
+ extension: string; // lowercase, no leading dot
72
+ size?: number;
73
+ sourceRef?: string; // FROM-clause ref when reading from a catalog table
74
+ }
75
+ ```
76
+
77
+ Represents one open viewer tab. When `sourceRef` is set the tab reads from a DuckDB/DuckLake FROM expression rather than a file URL — file-only metadata paths (hyparquet prefetch, `parquet_kv_metadata`) are skipped.
78
+
79
+ ### `WriteResult`
80
+
81
+ ```ts
82
+ interface WriteResult {
83
+ key: string;
84
+ size: number;
85
+ e_tag?: string;
86
+ }
87
+ ```
88
+
89
+ Returned from every `StorageAdapter` write method (`put`, `copy`).
90
+
91
+ ### `Theme`
92
+
93
+ ```ts
94
+ type Theme = 'light' | 'dark' | 'system';
95
+ ```
96
+
97
+ ## Constants
98
+
99
+ ### `STORAGE_KEYS`
100
+
101
+ ```ts
102
+ const STORAGE_KEYS = {
103
+ SETTINGS: 'obstore-explore-settings',
104
+ CONNECTIONS: 'obstore-explore-connections',
105
+ QUERY_HISTORY: 'obstore-explore-query-history',
106
+ } as const;
107
+ ```
108
+
109
+ Namespace for localStorage keys. Always use these when persisting app state with [`persistToStorage`](./local-storage.md#persisttostorage-key-value) / [`loadFromStorage`](./local-storage.md#loadfromstoragetkey-defaultvalue).
110
+
111
+ ### `WGS84_CODES`
112
+
113
+ ```ts
114
+ const WGS84_CODES: Set<number>; // { 4326, 4979 }
115
+ ```
116
+
117
+ EPSG codes considered equivalent to WGS84. Use to short-circuit reprojection.
118
+
119
+ ### `DEFAULT_TARGET_CRS`
120
+
121
+ ```ts
122
+ const DEFAULT_TARGET_CRS = 'OGC:CRS84';
123
+ ```
124
+
125
+ Canonical target for DuckDB `ST_Transform`. OGC:CRS84 (longitude, latitude) matches GeoParquet 1.1+ and is functionally equivalent to EPSG:4326 under `geometry_always_xy = true`.
126
+
127
+ ### `DUCKDB_INIT_TIMEOUT_MS`
128
+
129
+ ```ts
130
+ const DUCKDB_INIT_TIMEOUT_MS = 30_000;
131
+ ```
132
+
133
+ Max milliseconds the DuckDB-WASM worker has to boot before the UI surfaces an error.
134
+
135
+ ### `MAX_QUERY_HISTORY_ENTRIES`
136
+
137
+ ```ts
138
+ const MAX_QUERY_HISTORY_ENTRIES = 200;
139
+ ```
140
+
141
+ LRU cap for persisted query history.
142
+
143
+ ### `SQL_PREVIEW_LENGTH`
144
+
145
+ ```ts
146
+ const SQL_PREVIEW_LENGTH = 120;
147
+ ```
148
+
149
+ Characters used for SQL previews in the query history list.
150
+
151
+ ### `VIEWER_DIR_EXTENSIONS`
152
+
153
+ ```ts
154
+ const VIEWER_DIR_EXTENSIONS: Set<string>; // { 'zarr', 'zr3' }
155
+ ```
156
+
157
+ Extensions that open as a viewer even though the path is a directory (Zarr stores).
158
+
159
+ ### `LAYER_HUE_MULTIPLIER`
160
+
161
+ ```ts
162
+ const LAYER_HUE_MULTIPLIER = 137;
163
+ ```
164
+
165
+ Golden-angle-based hue step (137° ≈ 360° × (1 − 1/φ)). Multiply by layer index to get visually distinct hues.
166
+
167
+ ### `COPY_FEEDBACK_MS`
168
+
169
+ ```ts
170
+ const COPY_FEEDBACK_MS = 2000;
171
+ ```
172
+
173
+ Duration of the "Copied!" confirmation state on copy-to-clipboard buttons.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@walkthru-earth/objex-utils",
3
- "version": "1.1.0",
3
+ "version": "1.2.0",
4
4
  "description": "Pure TypeScript utilities from objex — WKB parser, GeoArrow builder, storage URL parser, file type registry",
5
5
  "author": "Youssef Harby <yharby@walkthru.earth>",
6
6
  "license": "CC-BY-4.0",
@@ -22,6 +22,7 @@
22
22
  },
23
23
  "files": [
24
24
  "dist",
25
+ "docs",
25
26
  "README.md"
26
27
  ],
27
28
  "peerDependencies": {