@walkthru-earth/objex 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +5 -0
- package/README.md +20 -12
- package/dist/components/browser/FileTreeSidebar.svelte +32 -17
- package/dist/components/layout/AboutSheet.svelte +5 -2
- package/dist/components/layout/ConnectionDialog.svelte +1 -1
- package/dist/components/layout/SettingsSheet.svelte +237 -0
- package/dist/components/layout/SettingsSheet.svelte.d.ts +6 -0
- package/dist/components/layout/Sidebar.svelte +73 -6
- package/dist/components/layout/Sidebar.svelte.d.ts +4 -1
- package/dist/components/layout/StatusBar.svelte +1 -1
- package/dist/components/layout/TabBar.svelte +2 -2
- package/dist/components/ui/context-menu/context-menu-radio-group.svelte.d.ts +1 -1
- package/dist/components/ui/dropdown-menu/dropdown-menu-checkbox-group.svelte.d.ts +1 -1
- package/dist/components/ui/dropdown-menu/dropdown-menu-radio-group.svelte.d.ts +1 -1
- package/dist/components/ui/input/input.svelte.d.ts +1 -1
- package/dist/components/ui/resizable/index.d.ts +1 -1
- package/dist/components/ui/resizable/index.js +2 -2
- package/dist/components/ui/slider/index.d.ts +3 -0
- package/dist/components/ui/slider/index.js +5 -0
- package/dist/components/ui/slider/range-slider.svelte +94 -0
- package/dist/components/ui/slider/range-slider.svelte.d.ts +21 -0
- package/dist/components/ui/slider/slider.svelte +83 -0
- package/dist/components/ui/slider/slider.svelte.d.ts +7 -0
- package/dist/components/viewers/ArchiveViewer.svelte +2 -2
- package/dist/components/viewers/CodeViewer.svelte +31 -22
- package/dist/components/viewers/CogControls.svelte +338 -184
- package/dist/components/viewers/CogControls.svelte.d.ts +33 -10
- package/dist/components/viewers/CogViewer.svelte +320 -119
- package/dist/components/viewers/CopcViewer.svelte +1 -1
- package/dist/components/viewers/FlatGeobufViewer.svelte +1 -1
- package/dist/components/viewers/GeoParquetMapViewer.svelte +6 -6
- package/dist/components/viewers/GeoParquetMapViewer.svelte.d.ts +1 -1
- package/dist/components/viewers/ImageViewer.svelte +2 -2
- package/dist/components/viewers/MarkdownViewer.svelte +12 -9
- package/dist/components/viewers/MediaViewer.svelte +2 -2
- package/dist/components/viewers/ModelViewer.svelte +1 -1
- package/dist/components/viewers/MultiCogViewer.svelte +467 -102
- package/dist/components/viewers/MultiCogViewer.svelte.d.ts +1 -1
- package/dist/components/viewers/NotebookViewer.svelte +6 -3
- package/dist/components/viewers/PdfViewer.svelte +2 -2
- package/dist/components/viewers/PmtilesViewer.svelte +3 -6
- package/dist/components/viewers/RawViewer.svelte +6 -3
- package/dist/components/viewers/StacMapViewer.svelte +10 -2
- package/dist/components/viewers/StacMosaicViewer.svelte +1800 -362
- package/dist/components/viewers/StacMosaicViewer.svelte.d.ts +1 -1
- package/dist/components/viewers/StacTabViewer.svelte +24 -13
- package/dist/components/viewers/StacTabViewer.svelte.d.ts +1 -1
- package/dist/components/viewers/TableGrid.svelte +4 -4
- package/dist/components/viewers/TableStatusBar.svelte +1 -1
- package/dist/components/viewers/TableToolbar.svelte +1 -1
- package/dist/components/viewers/TableViewer.svelte +25 -17
- package/dist/components/viewers/TableViewer.svelte.d.ts +1 -0
- package/dist/components/viewers/ViewerRouter.svelte +16 -8
- package/dist/components/viewers/ZarrMapViewer.svelte +11 -9
- package/dist/components/viewers/ZarrViewer.svelte +4 -4
- package/dist/components/viewers/cog/ChannelPicker.svelte +83 -0
- package/dist/components/viewers/cog/ChannelPicker.svelte.d.ts +13 -0
- package/dist/components/viewers/cog/PixelInspectorPanel.svelte +87 -0
- package/dist/components/viewers/cog/PixelInspectorPanel.svelte.d.ts +17 -0
- package/dist/components/viewers/cog/buildRgbLayer.d.ts +78 -0
- package/dist/components/viewers/cog/buildRgbLayer.js +176 -0
- package/dist/components/viewers/map/AttributeTable.svelte +1 -1
- package/dist/components/viewers/map/MapContainer.svelte +37 -11
- package/dist/components/viewers/pmtiles/PmtilesArchiveView.svelte +1 -1
- package/dist/components/viewers/pmtiles/PmtilesTileInspector.svelte +1 -1
- package/dist/components/viewers/stac/StacDatetimeBar.svelte +175 -0
- package/dist/components/viewers/stac/StacDatetimeBar.svelte.d.ts +10 -0
- package/dist/components/viewers/stac/StacFilterPanel.svelte +243 -0
- package/dist/components/viewers/stac/StacFilterPanel.svelte.d.ts +14 -0
- package/dist/components/viewers/stac/StacItemInspector.svelte +223 -0
- package/dist/components/viewers/stac/StacItemInspector.svelte.d.ts +10 -0
- package/dist/components/viewers/stac/StacItemStrip.svelte +228 -0
- package/dist/components/viewers/stac/StacItemStrip.svelte.d.ts +12 -0
- package/dist/file-icons/index.d.ts +1 -1
- package/dist/file-icons/index.js +1 -1
- package/dist/i18n/ar.js +110 -2
- package/dist/i18n/en.js +110 -2
- package/dist/index.d.ts +2 -28
- package/dist/index.js +7 -23
- package/dist/query/engine.d.ts +10 -0
- package/dist/query/source.js +1 -1
- package/dist/query/stac-source-factory.d.ts +65 -0
- package/dist/query/stac-source-factory.js +77 -0
- package/dist/query/stac-source-parquet.d.ts +135 -0
- package/dist/query/stac-source-parquet.js +465 -0
- package/dist/query/wasm.d.ts +8 -0
- package/dist/query/wasm.js +304 -2
- package/dist/storage/presign.js +1 -1
- package/dist/storage/providers.js +5 -5
- package/dist/stores/config.svelte.d.ts +15 -0
- package/dist/stores/config.svelte.js +46 -0
- package/dist/stores/connections.svelte.d.ts +2 -2
- package/dist/stores/connections.svelte.js +1 -2
- package/dist/stores/files.svelte.d.ts +1 -1
- package/dist/stores/files.svelte.js +1 -1
- package/dist/stores/query-history.svelte.js +1 -1
- package/dist/stores/settings.svelte.d.ts +16 -1
- package/dist/stores/settings.svelte.js +104 -48
- package/dist/stores/tabs.svelte.d.ts +3 -0
- package/dist/stores/tabs.svelte.js +17 -0
- package/dist/utils/cog-histogram.d.ts +121 -0
- package/dist/utils/cog-histogram.js +424 -0
- package/dist/utils/cog.d.ts +200 -60
- package/dist/utils/cog.js +377 -114
- package/dist/utils/colormap-sprite.d.ts +0 -9
- package/dist/utils/colormap-sprite.js +0 -21
- package/dist/utils/deck.d.ts +16 -12
- package/dist/utils/deck.js +10 -4
- package/dist/utils/pmtiles-tile.js +2 -2
- package/dist/utils/{url.d.ts → signed-url.d.ts} +15 -1
- package/dist/utils/{url.js → signed-url.js} +32 -10
- package/dist/utils/url-state.d.ts +36 -0
- package/dist/utils/url-state.js +72 -2
- package/dist/utils/zarr-tab.d.ts +1 -2
- package/dist/utils/zarr-tab.js +1 -2
- package/dist/utils/zarr.d.ts +0 -17
- package/dist/utils/zarr.js +1 -45
- package/package.json +55 -84
- package/dist/components/browser/Breadcrumb.svelte +0 -50
- package/dist/components/browser/Breadcrumb.svelte.d.ts +0 -7
- package/dist/components/browser/CreateFolderDialog.svelte +0 -98
- package/dist/components/browser/CreateFolderDialog.svelte.d.ts +0 -6
- package/dist/components/browser/DeleteConfirmDialog.svelte +0 -90
- package/dist/components/browser/DeleteConfirmDialog.svelte.d.ts +0 -8
- package/dist/components/browser/DropZone.svelte +0 -83
- package/dist/components/browser/DropZone.svelte.d.ts +0 -7
- package/dist/components/browser/FileBrowser.svelte +0 -252
- package/dist/components/browser/FileBrowser.svelte.d.ts +0 -3
- package/dist/components/browser/FileRow.svelte +0 -117
- package/dist/components/browser/FileRow.svelte.d.ts +0 -9
- package/dist/components/browser/RenameDialog.svelte +0 -101
- package/dist/components/browser/RenameDialog.svelte.d.ts +0 -8
- package/dist/components/browser/SearchBar.svelte +0 -40
- package/dist/components/browser/SearchBar.svelte.d.ts +0 -6
- package/dist/components/browser/UploadButton.svelte +0 -65
- package/dist/components/browser/UploadButton.svelte.d.ts +0 -3
- package/dist/query/stac-geoparquet.d.ts +0 -31
- package/dist/query/stac-geoparquet.js +0 -136
- package/dist/utils/clipboard.d.ts +0 -13
- package/dist/utils/clipboard.js +0 -38
- package/dist/utils/cloud-url.d.ts +0 -27
- package/dist/utils/cloud-url.js +0 -61
- package/dist/utils/column-types.d.ts +0 -5
- package/dist/utils/column-types.js +0 -137
- package/dist/utils/connection-identity.d.ts +0 -51
- package/dist/utils/connection-identity.js +0 -97
- package/dist/utils/error.d.ts +0 -8
- package/dist/utils/error.js +0 -12
- package/dist/utils/evidence-context.d.ts +0 -22
- package/dist/utils/evidence-context.js +0 -56
- package/dist/utils/export.d.ts +0 -22
- package/dist/utils/export.js +0 -76
- package/dist/utils/file-sort.d.ts +0 -20
- package/dist/utils/file-sort.js +0 -41
- package/dist/utils/format.d.ts +0 -24
- package/dist/utils/format.js +0 -78
- package/dist/utils/geoarrow.d.ts +0 -32
- package/dist/utils/geoarrow.js +0 -672
- package/dist/utils/geometry-type.d.ts +0 -52
- package/dist/utils/geometry-type.js +0 -76
- package/dist/utils/hex.d.ts +0 -10
- package/dist/utils/hex.js +0 -27
- package/dist/utils/host-detection.d.ts +0 -23
- package/dist/utils/host-detection.js +0 -95
- package/dist/utils/local-storage.d.ts +0 -16
- package/dist/utils/local-storage.js +0 -37
- package/dist/utils/markdown-sql.d.ts +0 -30
- package/dist/utils/markdown-sql.js +0 -72
- package/dist/utils/notebook.d.ts +0 -59
- package/dist/utils/notebook.js +0 -211
- package/dist/utils/parquet-metadata.d.ts +0 -64
- package/dist/utils/parquet-metadata.js +0 -262
- package/dist/utils/stac-geoparquet.d.ts +0 -90
- package/dist/utils/stac-geoparquet.js +0 -223
- package/dist/utils/stac-hydrate.d.ts +0 -38
- package/dist/utils/stac-hydrate.js +0 -243
- package/dist/utils/stac.d.ts +0 -136
- package/dist/utils/stac.js +0 -176
- package/dist/utils/storage-url.d.ts +0 -90
- package/dist/utils/storage-url.js +0 -568
- package/dist/utils/wkb.d.ts +0 -43
- package/dist/utils/wkb.js +0 -359
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* stac-geoparquet implementation of the StacSource contract.
|
|
3
|
+
*
|
|
4
|
+
* Reuses:
|
|
5
|
+
* - `getQueryEngine()` + `queryCancellable`/`query` for the single worker
|
|
6
|
+
* - `resolveTableSourceAsync(tab)` for presigned `signed-s3` URL handling
|
|
7
|
+
* - `stacRowToItem` from `@walkthru-earth/objex-utils` for the pure transform
|
|
8
|
+
* - `parseWKB` from `@walkthru-earth/objex-utils` for geometry decoding
|
|
9
|
+
*
|
|
10
|
+
* Push-down: `bbox` (`ST_Intersects` + `ST_MakeEnvelope`) and `datetime`
|
|
11
|
+
* (`datetime BETWEEN TIMESTAMPTZ ...`). Without the datetime push-down,
|
|
12
|
+
* `LIMIT + ORDER BY datetime DESC` silently drops older rows before the
|
|
13
|
+
* client-side filter ever runs, so any window outside the freshest N items
|
|
14
|
+
* returned zero matches. Cloud cover / GSD / platform / etc. still ride
|
|
15
|
+
* along on the residual until slice 3 plumbs them through DuckDB SQL.
|
|
16
|
+
*
|
|
17
|
+
* Hive partitioning: when the factory (or an SDK caller) sets
|
|
18
|
+
* `useHivePartitioning: true`, the FROM target switches to
|
|
19
|
+
* `read_parquet('.../**\/*.parquet', hive_partitioning=true,
|
|
20
|
+
* union_by_name=true)`. Mirrors lazycogs'
|
|
21
|
+
* `DuckdbClient(use_hive_partitioning=True)`. Partition columns appear as
|
|
22
|
+
* virtual columns on the schema, but `buildSelectList` only projects known
|
|
23
|
+
* STAC columns so they never leak into the rendered Items. `union_by_name`
|
|
24
|
+
* is required because partitioned writes can drift schemas across
|
|
25
|
+
* partitions (extra `proj:*` columns added later, etc.).
|
|
26
|
+
*
|
|
27
|
+
* Yields a single batch with `done: true`. Slice 3 turns this into a real
|
|
28
|
+
* stream via `conn.send()` so large catalogs can render progressively.
|
|
29
|
+
*/
|
|
30
|
+
import { emptyPushdown, parseWKB, stacRowToItem } from '@walkthru-earth/objex-utils';
|
|
31
|
+
import { QueryCancelledError } from './engine.js';
|
|
32
|
+
import { getQueryEngine } from './index.js';
|
|
33
|
+
import { resolveTableSourceAsync } from './source.js';
|
|
34
|
+
/**
|
|
35
|
+
* Default mobile detection used when `lowMemoryMode` is not explicitly set.
|
|
36
|
+
* iOS Safari caps the WASM heap at ~1.8 GiB and rarely engages OPFS spill
|
|
37
|
+
* (`credentialless` COEP only landed in 17.6), so STRUCT-heavy stac-geoparquet
|
|
38
|
+
* scans OOM during the parquet decode before any rows reach the consumer.
|
|
39
|
+
*/
|
|
40
|
+
function detectLowMemoryDefault() {
|
|
41
|
+
if (typeof navigator === 'undefined')
|
|
42
|
+
return false;
|
|
43
|
+
if (/Mobi|Android|iPhone|iPad|iPod/i.test(navigator.userAgent))
|
|
44
|
+
return true;
|
|
45
|
+
if (typeof window === 'undefined')
|
|
46
|
+
return false;
|
|
47
|
+
return Math.min(window.innerWidth, window.innerHeight) <= 820;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Build a SQL fragment for the datetime filter or return `null` when nothing
|
|
51
|
+
* is selected. Each bound is round-tripped through `Date.parse` + `toISOString`
|
|
52
|
+
* so a malformed input is dropped instead of being inlined into SQL.
|
|
53
|
+
*
|
|
54
|
+
* The STAC item-properties spec lets a row carry either a single `datetime`
|
|
55
|
+
* timestamp OR a `start_datetime`+`end_datetime` interval (Landsat composites,
|
|
56
|
+
* climate reanalysis, etc.). When the parquet schema exposes the interval
|
|
57
|
+
* columns we widen the predicate so interval-only rows are not silently
|
|
58
|
+
* excluded by the simpler `datetime BETWEEN ...` form.
|
|
59
|
+
*/
|
|
60
|
+
function buildDatetimeWhere(filter, available) {
|
|
61
|
+
if (!filter)
|
|
62
|
+
return null;
|
|
63
|
+
const minIso = filter.min && Number.isFinite(Date.parse(filter.min))
|
|
64
|
+
? new Date(Date.parse(filter.min)).toISOString()
|
|
65
|
+
: null;
|
|
66
|
+
const maxIso = filter.max && Number.isFinite(Date.parse(filter.max))
|
|
67
|
+
? new Date(Date.parse(filter.max)).toISOString()
|
|
68
|
+
: null;
|
|
69
|
+
if (!minIso && !maxIso)
|
|
70
|
+
return null;
|
|
71
|
+
// Predicate matching a single `datetime` column.
|
|
72
|
+
const dtParts = [];
|
|
73
|
+
if (minIso)
|
|
74
|
+
dtParts.push(`datetime >= TIMESTAMPTZ '${minIso}'`);
|
|
75
|
+
if (maxIso)
|
|
76
|
+
dtParts.push(`datetime <= TIMESTAMPTZ '${maxIso}'`);
|
|
77
|
+
const dtClause = dtParts.length > 0 ? dtParts.join(' AND ') : null;
|
|
78
|
+
// Predicate matching the interval form: an item's [start, end] overlaps the
|
|
79
|
+
// requested window when start <= max AND end >= min.
|
|
80
|
+
const intervalParts = [];
|
|
81
|
+
if (maxIso)
|
|
82
|
+
intervalParts.push(`start_datetime <= TIMESTAMPTZ '${maxIso}'`);
|
|
83
|
+
if (minIso)
|
|
84
|
+
intervalParts.push(`end_datetime >= TIMESTAMPTZ '${minIso}'`);
|
|
85
|
+
const intervalClause = available.startDatetime && available.endDatetime && intervalParts.length > 0
|
|
86
|
+
? intervalParts.join(' AND ')
|
|
87
|
+
: null;
|
|
88
|
+
if (available.datetime && intervalClause && dtClause) {
|
|
89
|
+
// Either a row's `datetime` falls in the window, or the item carries an
|
|
90
|
+
// interval that overlaps it. NULL `datetime` rows are excluded by the
|
|
91
|
+
// first branch (NULL comparisons are NULL/false), but the second branch
|
|
92
|
+
// catches them via the interval columns.
|
|
93
|
+
return `((${dtClause}) OR (${intervalClause}))`;
|
|
94
|
+
}
|
|
95
|
+
if (available.datetime && dtClause)
|
|
96
|
+
return dtClause;
|
|
97
|
+
if (intervalClause)
|
|
98
|
+
return intervalClause;
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
function buildBboxWhere(bbox) {
|
|
102
|
+
if (!bbox || bbox.length !== 4 || !bbox.every((n) => Number.isFinite(n)))
|
|
103
|
+
return null;
|
|
104
|
+
const [w, s, e, n] = bbox;
|
|
105
|
+
return `ST_Intersects(geometry, ST_MakeEnvelope(${w}, ${s}, ${e}, ${n}))`;
|
|
106
|
+
}
|
|
107
|
+
function joinWhere(parts) {
|
|
108
|
+
const live = parts.filter((p) => p !== null && p.length > 0);
|
|
109
|
+
return live.length === 0 ? '' : ` WHERE ${live.join(' AND ')}`;
|
|
110
|
+
}
|
|
111
|
+
const DEFAULT_LIMIT = 2000;
|
|
112
|
+
/**
|
|
113
|
+
* Build the SELECT list. All columns are optional in the stac-geoparquet
|
|
114
|
+
* spec, so we only project what we know we'll use and the spec requires.
|
|
115
|
+
* The optional `proj:*` / `raster:*` / `bands` columns are sniffed from the
|
|
116
|
+
* schema so missing columns don't trigger a DuckDB binder error.
|
|
117
|
+
*/
|
|
118
|
+
function buildSelectList(availableColumns) {
|
|
119
|
+
const required = [
|
|
120
|
+
'id',
|
|
121
|
+
'collection',
|
|
122
|
+
'type',
|
|
123
|
+
'stac_version',
|
|
124
|
+
'stac_extensions',
|
|
125
|
+
'assets',
|
|
126
|
+
'bbox',
|
|
127
|
+
'links'
|
|
128
|
+
];
|
|
129
|
+
const optional = [
|
|
130
|
+
'datetime',
|
|
131
|
+
'start_datetime',
|
|
132
|
+
'end_datetime',
|
|
133
|
+
'created',
|
|
134
|
+
'updated',
|
|
135
|
+
'eo:cloud_cover',
|
|
136
|
+
'gsd',
|
|
137
|
+
'platform',
|
|
138
|
+
'constellation',
|
|
139
|
+
'instruments',
|
|
140
|
+
'proj:code',
|
|
141
|
+
'proj:bbox',
|
|
142
|
+
'proj:transform',
|
|
143
|
+
'proj:shape',
|
|
144
|
+
'raster:spatial_resolution',
|
|
145
|
+
'bands'
|
|
146
|
+
];
|
|
147
|
+
const cols = [];
|
|
148
|
+
for (const name of required) {
|
|
149
|
+
if (availableColumns.has(name))
|
|
150
|
+
cols.push(quoteIdent(name));
|
|
151
|
+
}
|
|
152
|
+
for (const name of optional) {
|
|
153
|
+
if (availableColumns.has(name))
|
|
154
|
+
cols.push(quoteIdent(name));
|
|
155
|
+
}
|
|
156
|
+
// Always project geometry as WKB so parseWKB can decode it regardless of
|
|
157
|
+
// whether DuckDB presents it as the v1.5 GEOMETRY type or a plain BLOB.
|
|
158
|
+
if (availableColumns.has('geometry')) {
|
|
159
|
+
cols.push('ST_AsWKB(geometry) AS geom_wkb');
|
|
160
|
+
}
|
|
161
|
+
return cols.join(', ');
|
|
162
|
+
}
|
|
163
|
+
function quoteIdent(name) {
|
|
164
|
+
return `"${name.replace(/"/g, '""')}"`;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Strip a trailing `/` and any URL fragment / query so a directory URL like
|
|
168
|
+
* `s3://bucket/cache/` becomes `s3://bucket/cache`. The `**\/*.parquet` glob
|
|
169
|
+
* is then appended for the hive read_parquet call.
|
|
170
|
+
*/
|
|
171
|
+
function trimDirectoryUrl(url) {
|
|
172
|
+
const noQuery = url.split('?')[0].split('#')[0];
|
|
173
|
+
return noQuery.endsWith('/') ? noQuery.slice(0, -1) : noQuery;
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Build the FROM-clause target for a hive-partitioned parquet directory.
|
|
177
|
+
* `union_by_name=true` is required because partitioned writes can drift
|
|
178
|
+
* schemas across partitions (extra `proj:*` columns added later, etc.) and
|
|
179
|
+
* positional union would error out on the first mismatch.
|
|
180
|
+
*/
|
|
181
|
+
function buildHiveReadParquet(directoryUrl) {
|
|
182
|
+
const root = trimDirectoryUrl(directoryUrl);
|
|
183
|
+
const escaped = root.replace(/'/g, "''");
|
|
184
|
+
return `read_parquet('${escaped}/**/*.parquet', hive_partitioning=true, union_by_name=true)`;
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Best-effort confirmation that a directory contains at least one parquet
|
|
188
|
+
* file. Returns true on the first match. Listing failures fall back to
|
|
189
|
+
* `true` so we still attempt the hive query — DuckDB will surface the real
|
|
190
|
+
* error if the path is empty. Adapters that don't list (UrlAdapter) return
|
|
191
|
+
* an empty array, in which case we also fall through to `true`.
|
|
192
|
+
*/
|
|
193
|
+
async function probeHasParquetChild(adapter, tabPath, signal) {
|
|
194
|
+
if (!adapter)
|
|
195
|
+
return true;
|
|
196
|
+
try {
|
|
197
|
+
const entries = await adapter.list(tabPath, signal);
|
|
198
|
+
if (!Array.isArray(entries) || entries.length === 0)
|
|
199
|
+
return true;
|
|
200
|
+
return entries.some((e) => !e.is_dir &&
|
|
201
|
+
(e.extension?.toLowerCase() === 'parquet' ||
|
|
202
|
+
e.extension?.toLowerCase() === 'geoparquet' ||
|
|
203
|
+
e.name?.toLowerCase().endsWith('.parquet') ||
|
|
204
|
+
e.name?.toLowerCase().endsWith('.geoparquet')));
|
|
205
|
+
}
|
|
206
|
+
catch {
|
|
207
|
+
return true;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Build the FROM-clause target. For single-file parquet this is the resolved
|
|
212
|
+
* `read_parquet('url')` from `resolveTableSourceAsync`; for hive directories
|
|
213
|
+
* we override with a recursive glob + `hive_partitioning=true` so DuckDB
|
|
214
|
+
* prunes partition columns from the predicate. `union_by_name=true` is
|
|
215
|
+
* load-bearing — partitioned writes can drift schemas across partitions
|
|
216
|
+
* (extra `proj:*` columns added later, etc.) and positional union would
|
|
217
|
+
* error out on the first mismatch.
|
|
218
|
+
*/
|
|
219
|
+
function buildFromTarget(resolved, hive) {
|
|
220
|
+
if (!hive)
|
|
221
|
+
return resolved.ref;
|
|
222
|
+
const url = resolved.fileUrl;
|
|
223
|
+
if (!url) {
|
|
224
|
+
// Hive was requested but we never resolved an httpfs URL (e.g.
|
|
225
|
+
// SQL-backed source). Fall back to the resolved ref — DuckDB will
|
|
226
|
+
// surface the real error if the path can't be globbed.
|
|
227
|
+
return resolved.ref;
|
|
228
|
+
}
|
|
229
|
+
return buildHiveReadParquet(url);
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Stream the catalog as Arrow RecordBatches and yield each batch's items as a
|
|
233
|
+
* separate chunk. Peak DuckDB-WASM heap usage tracks one Arrow batch (~64 KiB
|
|
234
|
+
* rows) instead of the full result set; for a 4000-item LIMIT against a
|
|
235
|
+
* stac-geoparquet root with deep `assets` / `bands` payloads this turns the
|
|
236
|
+
* "Out of Memory ... 3.1 GiB / 3.1 GiB used" failure into a steady-state
|
|
237
|
+
* stream that the viewer can also render progressively. Falls back to a
|
|
238
|
+
* single-batch buffered query when the engine has no `queryStream` (test
|
|
239
|
+
* doubles, future engine impls).
|
|
240
|
+
*/
|
|
241
|
+
async function* streamQuery(tab, connId, opts) {
|
|
242
|
+
const { signal, limit = DEFAULT_LIMIT, bbox, datetime } = opts;
|
|
243
|
+
const hiveEnabled = opts.hive?.enabled === true;
|
|
244
|
+
if (signal?.aborted)
|
|
245
|
+
throw new QueryCancelledError();
|
|
246
|
+
const engine = await getQueryEngine();
|
|
247
|
+
const resolved = await resolveTableSourceAsync(tab);
|
|
248
|
+
if (signal?.aborted)
|
|
249
|
+
throw new QueryCancelledError();
|
|
250
|
+
const fromTarget = buildFromTarget(resolved, hiveEnabled);
|
|
251
|
+
const schemaSource = hiveEnabled
|
|
252
|
+
? { ...resolved, ref: fromTarget }
|
|
253
|
+
: resolved;
|
|
254
|
+
const schema = await engine.getSchema(connId, schemaSource);
|
|
255
|
+
if (signal?.aborted)
|
|
256
|
+
throw new QueryCancelledError();
|
|
257
|
+
const available = new Set(schema.map((f) => f.name));
|
|
258
|
+
const selectList = buildSelectList(available);
|
|
259
|
+
if (!available.has('geometry') || !available.has('assets')) {
|
|
260
|
+
throw new Error('Not a stac-geoparquet file (missing geometry or assets column)');
|
|
261
|
+
}
|
|
262
|
+
const datetimeAvailability = {
|
|
263
|
+
datetime: available.has('datetime'),
|
|
264
|
+
startDatetime: available.has('start_datetime'),
|
|
265
|
+
endDatetime: available.has('end_datetime')
|
|
266
|
+
};
|
|
267
|
+
const datetimeWhere = buildDatetimeWhere(datetime, datetimeAvailability);
|
|
268
|
+
const whereClause = joinWhere([buildBboxWhere(bbox), datetimeWhere]);
|
|
269
|
+
// `ORDER BY datetime DESC LIMIT N` is a Top-N: DuckDB still has to read
|
|
270
|
+
// every row's STRUCT `assets` payload before the limit engages. On a
|
|
271
|
+
// mobile WASM heap (~1.8 GiB ceiling, no OPFS spill) that OOMs in the
|
|
272
|
+
// parquet decoder before any rows reach the consumer. `skipOrderBy`
|
|
273
|
+
// trades freshness ordering for early-exit at LIMIT.
|
|
274
|
+
const orderClause = opts.skipOrderBy || !available.has('datetime') ? '' : ' ORDER BY datetime DESC';
|
|
275
|
+
const safeLimit = Math.max(1, Math.floor(Number(limit) || DEFAULT_LIMIT));
|
|
276
|
+
const sql = `SELECT ${selectList} FROM ${fromTarget}${whereClause}${orderClause} LIMIT ${safeLimit}`;
|
|
277
|
+
if (opts.debugExplain) {
|
|
278
|
+
try {
|
|
279
|
+
const plan = (await engine.query(connId, `EXPLAIN ${sql}`));
|
|
280
|
+
// eslint-disable-next-line no-console
|
|
281
|
+
console.debug('[stac-source-parquet] EXPLAIN', { hive: hiveEnabled, sql, plan });
|
|
282
|
+
}
|
|
283
|
+
catch (e) {
|
|
284
|
+
// eslint-disable-next-line no-console
|
|
285
|
+
console.debug('[stac-source-parquet] EXPLAIN failed', e);
|
|
286
|
+
}
|
|
287
|
+
if (signal?.aborted)
|
|
288
|
+
throw new QueryCancelledError();
|
|
289
|
+
}
|
|
290
|
+
const parquetUrl = resolved.fileUrl ?? tab.path;
|
|
291
|
+
const parquetDir = parquetUrl.replace(/[^/]*(?:\?.*)?$/, '');
|
|
292
|
+
const rowToItem = (row) => {
|
|
293
|
+
const id = typeof row.id === 'string' ? row.id : String(row.id ?? '');
|
|
294
|
+
const itemBase = id ? `${parquetDir}${id}/` : parquetUrl;
|
|
295
|
+
return stacRowToItem(row, itemBase, { wkbParser: parseWKB });
|
|
296
|
+
};
|
|
297
|
+
if (engine.queryStream) {
|
|
298
|
+
const stream = engine.queryStream(connId, sql, signal);
|
|
299
|
+
const it = stream[Symbol.asyncIterator]();
|
|
300
|
+
let pending = null;
|
|
301
|
+
while (true) {
|
|
302
|
+
const { value, done } = await it.next();
|
|
303
|
+
if (done)
|
|
304
|
+
break;
|
|
305
|
+
if (signal?.aborted)
|
|
306
|
+
throw new QueryCancelledError();
|
|
307
|
+
const items = value.rows.map(rowToItem);
|
|
308
|
+
// One-batch lookahead so we know which yield is the final one without
|
|
309
|
+
// driving the consumer to track it.
|
|
310
|
+
if (pending)
|
|
311
|
+
yield { items: pending.items, final: false };
|
|
312
|
+
pending = { items };
|
|
313
|
+
}
|
|
314
|
+
yield { items: pending?.items ?? [], final: true };
|
|
315
|
+
return;
|
|
316
|
+
}
|
|
317
|
+
// Fallback: buffered single-batch path (engines without queryStream).
|
|
318
|
+
let resultPromise;
|
|
319
|
+
let cancel = null;
|
|
320
|
+
if (engine.queryCancellable) {
|
|
321
|
+
const handle = engine.queryCancellable(connId, sql);
|
|
322
|
+
cancel = handle.cancel;
|
|
323
|
+
resultPromise = handle.result;
|
|
324
|
+
}
|
|
325
|
+
else {
|
|
326
|
+
resultPromise = engine.query(connId, sql);
|
|
327
|
+
}
|
|
328
|
+
const onAbort = () => {
|
|
329
|
+
cancel?.().catch(() => { });
|
|
330
|
+
};
|
|
331
|
+
signal?.addEventListener('abort', onAbort, { once: true });
|
|
332
|
+
let rows;
|
|
333
|
+
try {
|
|
334
|
+
const result = await resultPromise;
|
|
335
|
+
rows = result.rows ?? [];
|
|
336
|
+
}
|
|
337
|
+
finally {
|
|
338
|
+
signal?.removeEventListener('abort', onAbort);
|
|
339
|
+
}
|
|
340
|
+
if (signal?.aborted)
|
|
341
|
+
throw new QueryCancelledError();
|
|
342
|
+
yield { items: rows.map(rowToItem), final: true };
|
|
343
|
+
}
|
|
344
|
+
/**
|
|
345
|
+
* stac-geoparquet `StacSource`. Slice 1: bbox is the only push-down,
|
|
346
|
+
* single yield with `done: true`. Slice 3 widens push-down (cloud cover /
|
|
347
|
+
* gsd / platform via DuckDB SQL) and turns this into a streaming
|
|
348
|
+
* `conn.send()` cursor.
|
|
349
|
+
*
|
|
350
|
+
* `options.useHivePartitioning` switches the FROM target to a recursive
|
|
351
|
+
* `read_parquet` glob over `tab.path` so DuckDB prunes partitions per
|
|
352
|
+
* `bbox` / `datetime` predicate. The first `query()` call awaits a
|
|
353
|
+
* best-effort `adapter.list()` probe to confirm at least one `.parquet`
|
|
354
|
+
* child exists; if listing fails (e.g. UrlAdapter, AccessDenied) we still
|
|
355
|
+
* attempt the hive query and let DuckDB surface the real error.
|
|
356
|
+
*/
|
|
357
|
+
export function createParquetSource(tab, connectionId, options = {}) {
|
|
358
|
+
const requestedHive = options.useHivePartitioning === true;
|
|
359
|
+
const lowMemoryMode = options.lowMemoryMode ?? detectLowMemoryDefault();
|
|
360
|
+
const lowMemoryLimit = Math.max(1, Math.floor(options.lowMemoryLimit ?? 200));
|
|
361
|
+
const capabilities = {
|
|
362
|
+
kind: 'parquet',
|
|
363
|
+
label: requestedHive ? 'stac-geoparquet (hive)' : 'stac-geoparquet',
|
|
364
|
+
countAvailable: true,
|
|
365
|
+
// Now true: `streamQuery` yields one StacSourceBatch per Arrow
|
|
366
|
+
// RecordBatch via the engine's `queryStream` cursor, so peak DuckDB
|
|
367
|
+
// heap usage tracks one batch instead of the full result set. This
|
|
368
|
+
// fixes the `Out of Memory ... in-memory mode` OOM on large catalogs
|
|
369
|
+
// and lets the mosaic render progressively as items arrive.
|
|
370
|
+
streaming: true,
|
|
371
|
+
hivePartitioned: requestedHive,
|
|
372
|
+
pushdown: { ...emptyPushdown(), bbox: true, datetime: true }
|
|
373
|
+
};
|
|
374
|
+
const connId = connectionId;
|
|
375
|
+
// The probe is purely advisory: when `useHivePartitioning: true` is
|
|
376
|
+
// passed, we always run the hive query, but the first probe logs (in
|
|
377
|
+
// debug mode) whether the directory actually has parquet children so a
|
|
378
|
+
// misconfigured path gets a faster signal than DuckDB's binder error.
|
|
379
|
+
// The probe result is cached so a second viewport reload doesn't re-list.
|
|
380
|
+
let hiveProbe = null;
|
|
381
|
+
const ensureHive = async (signal) => {
|
|
382
|
+
if (!requestedHive)
|
|
383
|
+
return false;
|
|
384
|
+
if (!hiveProbe)
|
|
385
|
+
hiveProbe = probeHasParquetChild(options.adapter, tab.path, signal);
|
|
386
|
+
const probed = await hiveProbe;
|
|
387
|
+
if (options.debugExplain && !probed) {
|
|
388
|
+
// eslint-disable-next-line no-console
|
|
389
|
+
console.debug('[stac-source-parquet] hive probe found no .parquet children', {
|
|
390
|
+
path: tab.path
|
|
391
|
+
});
|
|
392
|
+
}
|
|
393
|
+
return true;
|
|
394
|
+
};
|
|
395
|
+
return {
|
|
396
|
+
capabilities,
|
|
397
|
+
async *query(req) {
|
|
398
|
+
if (req.signal.aborted)
|
|
399
|
+
throw new DOMException('Aborted', 'AbortError');
|
|
400
|
+
const hiveEnabled = await ensureHive(req.signal);
|
|
401
|
+
if (req.signal.aborted)
|
|
402
|
+
throw new DOMException('Aborted', 'AbortError');
|
|
403
|
+
const pushedDown = req.filter?.datetime ? { datetime: req.filter.datetime } : {};
|
|
404
|
+
const { datetime: _pushed, ...residualRest } = req.filter ?? {};
|
|
405
|
+
const residual = residualRest;
|
|
406
|
+
let totalSoFar = 0;
|
|
407
|
+
// On mobile, clamp the LIMIT regardless of caller request and
|
|
408
|
+
// drop the ORDER BY so the parquet scan can early-exit. The
|
|
409
|
+
// caller's higher cap (e.g. 2000) would still trigger the
|
|
410
|
+
// 858 MB / 1.8 GiB OOM during STRUCT materialization.
|
|
411
|
+
const effectiveLimit = lowMemoryMode
|
|
412
|
+
? Math.min(req.limit ?? lowMemoryLimit, lowMemoryLimit)
|
|
413
|
+
: req.limit;
|
|
414
|
+
for await (const chunk of streamQuery(tab, connId, {
|
|
415
|
+
signal: req.signal,
|
|
416
|
+
limit: effectiveLimit,
|
|
417
|
+
bbox: req.bbox,
|
|
418
|
+
datetime: req.filter?.datetime,
|
|
419
|
+
hive: { enabled: hiveEnabled },
|
|
420
|
+
debugExplain: options.debugExplain,
|
|
421
|
+
skipOrderBy: lowMemoryMode
|
|
422
|
+
})) {
|
|
423
|
+
if (req.signal.aborted)
|
|
424
|
+
throw new DOMException('Aborted', 'AbortError');
|
|
425
|
+
totalSoFar += chunk.items.length;
|
|
426
|
+
yield {
|
|
427
|
+
items: chunk.items,
|
|
428
|
+
pushedDown,
|
|
429
|
+
residual,
|
|
430
|
+
done: chunk.final,
|
|
431
|
+
totalHinted: chunk.final ? totalSoFar : undefined
|
|
432
|
+
};
|
|
433
|
+
}
|
|
434
|
+
},
|
|
435
|
+
async count(filter, bbox, signal) {
|
|
436
|
+
if (signal.aborted)
|
|
437
|
+
throw new DOMException('Aborted', 'AbortError');
|
|
438
|
+
const hiveEnabled = await ensureHive(signal);
|
|
439
|
+
if (signal.aborted)
|
|
440
|
+
throw new DOMException('Aborted', 'AbortError');
|
|
441
|
+
const engine = await getQueryEngine();
|
|
442
|
+
const resolved = await resolveTableSourceAsync(tab);
|
|
443
|
+
if (signal.aborted)
|
|
444
|
+
throw new DOMException('Aborted', 'AbortError');
|
|
445
|
+
const fromTarget = buildFromTarget(resolved, hiveEnabled);
|
|
446
|
+
const schemaSource = hiveEnabled
|
|
447
|
+
? { ...resolved, ref: fromTarget }
|
|
448
|
+
: resolved;
|
|
449
|
+
const schema = await engine.getSchema(connId, schemaSource);
|
|
450
|
+
if (signal.aborted)
|
|
451
|
+
throw new DOMException('Aborted', 'AbortError');
|
|
452
|
+
const available = new Set(schema.map((f) => f.name));
|
|
453
|
+
const datetimeWhere = buildDatetimeWhere(filter?.datetime, {
|
|
454
|
+
datetime: available.has('datetime'),
|
|
455
|
+
startDatetime: available.has('start_datetime'),
|
|
456
|
+
endDatetime: available.has('end_datetime')
|
|
457
|
+
});
|
|
458
|
+
const where = joinWhere([buildBboxWhere(bbox), datetimeWhere]);
|
|
459
|
+
const sql = `SELECT COUNT(*) AS n FROM ${fromTarget}${where}`;
|
|
460
|
+
const result = (await engine.query(connId, sql));
|
|
461
|
+
const raw = result.rows?.[0]?.n ?? 0;
|
|
462
|
+
return typeof raw === 'bigint' ? Number(raw) : Number(raw);
|
|
463
|
+
}
|
|
464
|
+
};
|
|
465
|
+
}
|
package/dist/query/wasm.d.ts
CHANGED
|
@@ -13,6 +13,14 @@ export declare class WasmQueryEngine implements QueryEngine {
|
|
|
13
13
|
detectCrs(connId: string, source: QuerySource, geomCol: string): Promise<string | null>;
|
|
14
14
|
private detectCrsWithConn;
|
|
15
15
|
queryCancellable(connId: string, sql: string): QueryHandle;
|
|
16
|
+
/**
|
|
17
|
+
* Streaming variant of `queryCancellable`. Yields one chunk per Arrow
|
|
18
|
+
* RecordBatch so peak memory tracks one batch instead of the full result.
|
|
19
|
+
* Used by `stac-source-parquet` to ingest large catalogs progressively
|
|
20
|
+
* without OOMing the WASM heap. Cancellation routes through `conn.cancelSent`
|
|
21
|
+
* and `signal.aborted`; the connection is always closed in `finally`.
|
|
22
|
+
*/
|
|
23
|
+
queryStream(connId: string, sql: string, signal?: AbortSignal): AsyncIterable<QueryResult>;
|
|
16
24
|
queryForMapCancellable(connId: string, sql: string, geomCol: string, geomColType: string, sourceCrs?: string | null): MapQueryHandle;
|
|
17
25
|
forceCancel(): Promise<void>;
|
|
18
26
|
registerFileBuffer(name: string, buffer: Uint8Array): Promise<void>;
|