@walkthru-earth/objex-utils 1.2.1 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/docs/README.md CHANGED
@@ -34,6 +34,7 @@ As of v1.2, `yaml` is loaded via dynamic `import()` inside `parseMarkdownDocumen
34
34
  | [`geometry.md`](./geometry.md) | WKB parser, GeoArrow table builder, geometry-column detection |
35
35
  | [`cog.md`](./cog.md) | Cloud-Optimized GeoTIFF helpers (pipeline selection, band configs, color ramps, bounds clamping) |
36
36
  | [`parquet-metadata.md`](./parquet-metadata.md) | `readParquetMetadata` + CRS / bounds / geometry-types extractors |
37
+ | [`stac-geoparquet.md`](./stac-geoparquet.md) | stac-geoparquet detection (`isStacGeoparquetSchema`) and row-to-Item transforms (`stacRowToItem`, `flattenStacBbox`, `pickStacPrimaryAsset`, `resolveStacAssetHref`) |
37
38
  | [`storage.md`](./storage.md) | URL parsing (`parseStorageUrl`, `resolveCloudUrl`), provider registry, `StorageAdapter` interface, `UrlAdapter` |
38
39
  | [`query-engine.md`](./query-engine.md) | `QueryEngine` interface and associated result/handle types |
39
40
  | [`file-types.md`](./file-types.md) | File-type registry (`getFileTypeInfo`, `getViewerKind`, `getDuckDbReadFn`, ...) |
package/docs/cog.md CHANGED
@@ -2,9 +2,9 @@
2
2
 
3
3
  Pure, framework-agnostic helpers for working with Cloud-Optimized GeoTIFF metadata and bounds. No Svelte, MapLibre, deck.gl, or GeoTIFF library dependency.
4
4
 
5
- Source: `src/lib/utils/cog.ts`.
5
+ Source: `src/lib/utils/cog-pure.ts` (dependency-free subset). The full `cog.ts` re-exports these same bindings for in-repo consumers.
6
6
 
7
- > The render-pipeline helpers (`selectCogPipeline`, `createConfigurableGetTileData`, `normalizeCogGeotiff`, `createEpsgResolver`, `fitCogBounds`, `renderNonTiledBitmap`, etc.) live in the same source file but are **not** re-exported from `@walkthru-earth/objex-utils` because they pull in `@developmentseed/deck.gl-geotiff`, `@developmentseed/geotiff`, `@developmentseed/proj`, `proj4`, and `maplibre-gl`. If you need them, depend on the full Svelte package [`@walkthru-earth/objex`](https://www.npmjs.com/package/@walkthru-earth/objex) (they are re-exported from `src/lib/index.ts`) and install those optional peers yourself.
7
+ > The render-pipeline helpers (`selectCogPipeline`, `createConfigurableGetTileData`, `normalizeCogGeotiff`, `createEpsgResolver`, `fitCogBounds`, `renderNonTiledBitmap`, etc.) live in `src/lib/utils/cog.ts` but are **not** re-exported from `@walkthru-earth/objex-utils` because they pull in `@developmentseed/deck.gl-geotiff`, `@developmentseed/geotiff`, `@developmentseed/proj`, `proj4`, and `maplibre-gl`. If you need them, depend on the full Svelte package [`@walkthru-earth/objex`](https://www.npmjs.com/package/@walkthru-earth/objex) (they are re-exported from `src/lib/index.ts`) and install those optional peers yourself.
8
8
 
9
9
  ## Types
10
10
 
@@ -36,7 +36,14 @@ interface GeoParquetMeta {
36
36
  ```ts
37
37
  interface ParquetFileMetadata {
38
38
  rowCount: number;
39
+ /** Leaf columns only — struct parents are flattened into their child paths. */
39
40
  schema: { name: string; type: string }[];
41
+ /**
42
+ * Top-level column names as written, including struct/group parents
43
+ * (e.g. `assets`, `bbox`) that `schema` flattens away. Required for
44
+ * stac-geoparquet detection, which keys on the `assets` struct parent.
45
+ */
46
+ topLevelColumns: string[];
40
47
  geo: GeoParquetMeta | null; // null for non-geo Parquet
41
48
  legacyGeoParquet: boolean; // true for pre-1.0 (schema_version without "version" field)
42
49
  createdBy: string | null;
@@ -121,7 +128,8 @@ const meta = await readParquetMetadata(
121
128
  console.log({
122
129
  rows: meta.rowCount,
123
130
  compression: meta.compression,
124
- schema: meta.schema,
131
+ schema: meta.schema, // leaf columns only
132
+ topLevel: meta.topLevelColumns, // includes struct parents like `assets`, `bbox`
125
133
  });
126
134
 
127
135
  if (meta.geo) {
@@ -0,0 +1,198 @@
1
+ # stac-geoparquet
2
+
3
+ Pure transforms and detection for the [stac-geoparquet](https://github.com/stac-utils/stac-geoparquet) format. Zero Svelte / DuckDB / deck.gl dependencies, framework-agnostic. Decoupled from any single WKB library via a caller-supplied `wkbParser`, so consumers can plug in `parseWKB` from this package, `geoarrow-wasm`, `wkx`, or anything else.
4
+
5
+ ```ts
6
+ import {
7
+ STAC_GEOPARQUET_REQUIRED_COLUMNS,
8
+ isStacGeoparquetSchema,
9
+ flattenStacBbox,
10
+ resolveStacAssetHref,
11
+ pickStacPrimaryAsset,
12
+ stacRowToItem,
13
+ parseWKB,
14
+ } from '@walkthru-earth/objex-utils';
15
+ ```
16
+
17
+ ## Types
18
+
19
+ ### `StacGeoparquetSchemaColumn`
20
+
21
+ ```ts
22
+ interface StacGeoparquetSchemaColumn {
23
+ name: string;
24
+ type?: string;
25
+ }
26
+ ```
27
+
28
+ Minimal shape for a column descriptor. Works with hyparquet's leaf array, DuckDB's `DESCRIBE` rows, Arrow `Field`s, or any other source. `type` is optional because detection only keys on `name`.
29
+
30
+ ### `StacBboxStruct`
31
+
32
+ ```ts
33
+ interface StacBboxStruct {
34
+ xmin: number;
35
+ ymin: number;
36
+ xmax: number;
37
+ ymax: number;
38
+ }
39
+ ```
40
+
41
+ Bbox in struct form, as DuckDB returns it for the recommended `bbox struct(xmin double, ymin double, xmax double, ymax double)` column.
42
+
43
+ ### `StacGeoparquetRow`
44
+
45
+ ```ts
46
+ type StacGeoparquetRow = Record<string, unknown>;
47
+ ```
48
+
49
+ Generic row shape after DuckDB / Arrow / hyparquet decoding. Pass directly into `stacRowToItem`.
50
+
51
+ ### `StacRowToItemOptions`
52
+
53
+ ```ts
54
+ interface StacRowToItemOptions {
55
+ /** WKB decoder, e.g. `parseWKB` from this package. */
56
+ wkbParser?: (bytes: Uint8Array) => unknown;
57
+ /** Column holding the WKB bytes. Default `"geom_wkb"`. */
58
+ wkbColumn?: string;
59
+ /** Column holding pre-decoded GeoJSON geometry. Default `"geometry"`. */
60
+ geometryColumn?: string;
61
+ }
62
+ ```
63
+
64
+ The default `wkbColumn` of `"geom_wkb"` matches the recommended SQL projection `ST_AsWKB(geometry) AS geom_wkb`, which avoids DuckDB's GEOMETRY type hitting Arrow's WASM serializer.
65
+
66
+ ## Constants
67
+
68
+ ### `STAC_GEOPARQUET_REQUIRED_COLUMNS`
69
+
70
+ ```ts
71
+ const STAC_GEOPARQUET_REQUIRED_COLUMNS = [
72
+ 'stac_version',
73
+ 'type',
74
+ 'geometry',
75
+ 'assets',
76
+ ] as const;
77
+ ```
78
+
79
+ Columns every stac-geoparquet file MUST carry per the spec. `isStacGeoparquetSchema` checks that all four are present.
80
+
81
+ ## Functions
82
+
83
+ ### `isStacGeoparquetSchema(schema)`
84
+
85
+ ```ts
86
+ function isStacGeoparquetSchema(
87
+ schema: StacGeoparquetSchemaColumn[]
88
+ ): boolean
89
+ ```
90
+
91
+ Returns `true` when every required STAC column is present in `schema`. Type-agnostic on purpose: some pipelines know the column type (DuckDB `DESCRIBE`, Arrow `Field`), others only have the name list (hyparquet schema walk). The set of names is sufficient for routing.
92
+
93
+ **Important** when used with `readParquetMetadata`: pass `meta.topLevelColumns.map((name) => ({ name }))`, not `meta.schema`. `meta.schema` flattens struct parents away and would hide the `assets` column.
94
+
95
+ ```ts
96
+ import { readParquetMetadata, isStacGeoparquetSchema } from '@walkthru-earth/objex-utils';
97
+
98
+ const meta = await readParquetMetadata(url);
99
+ const isStac = isStacGeoparquetSchema(
100
+ meta.topLevelColumns.map((name) => ({ name }))
101
+ );
102
+ ```
103
+
104
+ ### `flattenStacBbox(bbox)`
105
+
106
+ ```ts
107
+ function flattenStacBbox(
108
+ bbox: StacBboxStruct | number[] | null | undefined
109
+ ): [number, number, number, number] | null
110
+ ```
111
+
112
+ Normalize a DuckDB `struct(xmin,ymin,xmax,ymax)` bbox to the `[minX, minY, maxX, maxY]` array shape that STAC Items and deck.gl-geotiff `MosaicLayer` expect. Pass-through for inputs that are already arrays. Returns `null` when any component is non-finite or the input is missing.
113
+
114
+ ### `resolveStacAssetHref(href, baseUrl)`
115
+
116
+ ```ts
117
+ function resolveStacAssetHref(href: string, baseUrl: string): string
118
+ ```
119
+
120
+ Resolve a possibly-relative STAC asset href against a base URL. `./foo.tif` and `foo.tif` become absolute against `baseUrl`. URLs that already carry a scheme (`http(s)://`, `s3://`, `gs://`, …) are returned unchanged.
121
+
122
+ ### `pickStacPrimaryAsset(assets, preferredKeys?)`
123
+
124
+ ```ts
125
+ function pickStacPrimaryAsset(
126
+ assets: Record<string, StacAsset> | null | undefined,
127
+ preferredKeys?: readonly string[]
128
+ ): { key: string; asset: StacAsset } | null
129
+ ```
130
+
131
+ Pick the "primary" asset from a STAC Item's `assets` map. Priority order:
132
+
133
+ 1. The first key listed in `preferredKeys` that exists.
134
+ 2. The asset under the conventional `data` key.
135
+ 3. The first asset whose `roles` array contains `'data'`.
136
+ 4. The first asset.
137
+
138
+ Returns `null` when the map is empty or the input is not an object.
139
+
140
+ ### `stacRowToItem(row, baseUrl, opts?)`
141
+
142
+ ```ts
143
+ function stacRowToItem(
144
+ row: StacGeoparquetRow,
145
+ baseUrl: string,
146
+ opts?: StacRowToItemOptions
147
+ ): StacItem
148
+ ```
149
+
150
+ Convert one stac-geoparquet row into a standard STAC Item JSON object. Handles:
151
+
152
+ - `assets` named-struct flattening + relative href resolution against `baseUrl`
153
+ - `bbox` struct → `[minX, minY, maxX, maxY]` array via `flattenStacBbox`
154
+ - Optional WKB geometry → GeoJSON via `opts.wkbParser`
155
+ - `datetime` → ISO string (passes through already-string values)
156
+ - Promotes `properties.*` columns (`proj:*`, `raster:*`, `eo:*`, `bands`, `datetime`) onto `item.properties`
157
+
158
+ Asset hrefs in stac-geoparquet are typically written relative to each item's original `self` URL, **not** the parquet URL. The stactools default layout places each item JSON at `{catalog_dir}/{item.id}/{item.id}.json`, so callers should compute a per-row base of `{parquet_dir}/{item.id}/` and pass that as `baseUrl`. Resolving against the bare parquet URL strips the item-id subfolder and every COG 404s.
159
+
160
+ ## End-to-end example
161
+
162
+ ```ts
163
+ import {
164
+ isStacGeoparquetSchema,
165
+ parseWKB,
166
+ readParquetMetadata,
167
+ stacRowToItem,
168
+ } from '@walkthru-earth/objex-utils';
169
+
170
+ const parquetUrl = 'https://example.com/catalog.parquet';
171
+
172
+ // 1. Detect — use topLevelColumns, not schema, so the `assets` struct parent is visible.
173
+ const meta = await readParquetMetadata(parquetUrl);
174
+ const isStac = isStacGeoparquetSchema(
175
+ meta.topLevelColumns.map((name) => ({ name }))
176
+ );
177
+ if (!isStac) throw new Error('Not stac-geoparquet');
178
+
179
+ // 2. Materialize via your DuckDB / Arrow / hyparquet pipeline. Recommended SQL:
180
+ // SELECT id, type, stac_version, assets, bbox, links, datetime,
181
+ // ST_AsWKB(geometry) AS geom_wkb
182
+ // FROM 'catalog.parquet'
183
+ const rows: Record<string, unknown>[] = await runYourQuery();
184
+
185
+ // 3. Build STAC Items. Per-row base = {parquet_dir}/{item.id}/.
186
+ const parquetDir = parquetUrl.replace(/[^/]*(?:\?.*)?$/, '');
187
+ const items = rows.map((row) => {
188
+ const id = String(row.id ?? '');
189
+ const itemBase = id ? `${parquetDir}${id}/` : parquetUrl;
190
+ return stacRowToItem(row, itemBase, { wkbParser: parseWKB });
191
+ });
192
+
193
+ const featureCollection = { type: 'FeatureCollection', features: items };
194
+ ```
195
+
196
+ ## Peer dependencies
197
+
198
+ None. The functions are pure and runtime-agnostic. The `wkbParser` option lets callers plug in any WKB decoder, including `parseWKB` from this same package.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@walkthru-earth/objex-utils",
3
- "version": "1.2.1",
3
+ "version": "1.3.1",
4
4
  "description": "Pure TypeScript utilities from objex — WKB parser, GeoArrow builder, storage URL parser, file type registry",
5
5
  "author": "Youssef Harby <yharby@walkthru.earth>",
6
6
  "license": "CC-BY-4.0",