@walkthru-earth/objex-utils 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -39
- package/dist/index.cjs +48 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +65 -8
- package/dist/index.d.ts +65 -8
- package/dist/index.js +44 -5
- package/dist/index.js.map +1 -1
- package/docs/README.md +102 -0
- package/docs/cog.md +303 -0
- package/docs/errors.md +34 -0
- package/docs/file-sort.md +67 -0
- package/docs/file-types.md +141 -0
- package/docs/formatting.md +192 -0
- package/docs/geometry.md +198 -0
- package/docs/local-storage.md +51 -0
- package/docs/markdown-sql.md +109 -0
- package/docs/parquet-metadata.md +133 -0
- package/docs/query-engine.md +140 -0
- package/docs/storage.md +251 -0
- package/docs/types-constants.md +173 -0
- package/package.json +2 -1
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
# Formatting, column types, hex, export
|
|
2
|
+
|
|
3
|
+
Display formatters, column-type classification, hex dump, and data serialization. All sync, pure, no browser APIs required unless noted.
|
|
4
|
+
|
|
5
|
+
Sources:
|
|
6
|
+
|
|
7
|
+
- `src/lib/utils/format.ts`
|
|
8
|
+
- `src/lib/utils/column-types.ts`
|
|
9
|
+
- `src/lib/utils/hex.ts`
|
|
10
|
+
- `src/lib/utils/export.ts`
|
|
11
|
+
|
|
12
|
+
## Formatters
|
|
13
|
+
|
|
14
|
+
### `formatFileSize(bytes)`
|
|
15
|
+
|
|
16
|
+
```ts
|
|
17
|
+
function formatFileSize(bytes: number): string
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
1024-based human-readable byte count. Integer for raw bytes (`'512 B'`), one decimal for everything else (`'1.5 MB'`). Returns `'0 B'` for `0`, `'-'` for negative.
|
|
21
|
+
|
|
22
|
+
### `formatDate(timestamp)`
|
|
23
|
+
|
|
24
|
+
```ts
|
|
25
|
+
function formatDate(timestamp: number): string
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Format a unix timestamp **in milliseconds** as a human-readable date string.
|
|
29
|
+
|
|
30
|
+
- Recent timestamps render relative: `'just now'`, `'5m ago'`, `'2h ago'`, `'3d ago'`.
|
|
31
|
+
- Older render as `'YYYY-MM-DD'`.
|
|
32
|
+
- Missing / invalid → `'--'`.
|
|
33
|
+
|
|
34
|
+
### `formatValue(value)`
|
|
35
|
+
|
|
36
|
+
```ts
|
|
37
|
+
function formatValue(value: unknown): string
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Format any value for display in tables, panels, or exports.
|
|
41
|
+
|
|
42
|
+
| Input | Output |
|
|
43
|
+
|-------|--------|
|
|
44
|
+
| `null` / `undefined` | `''` |
|
|
45
|
+
| `bigint` | `value.toString()` |
|
|
46
|
+
| `Date` | `value.toISOString()` |
|
|
47
|
+
| Object (incl. arrays) | `JSON.stringify(value, jsonReplacerBigInt)` |
|
|
48
|
+
| Everything else | `String(value)` |
|
|
49
|
+
|
|
50
|
+
### `getFileExtension(filename)`
|
|
51
|
+
|
|
52
|
+
```ts
|
|
53
|
+
function getFileExtension(filename: string): string
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Return the extension **including** the leading dot (`'data.parquet' → '.parquet'`). Empty string if no extension. Case-preserving.
|
|
57
|
+
|
|
58
|
+
### `jsonReplacerBigInt(_key, value)`
|
|
59
|
+
|
|
60
|
+
```ts
|
|
61
|
+
function jsonReplacerBigInt(_key: string, value: unknown): unknown
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
`JSON.stringify` replacer that coerces `BigInt` to decimal strings so DuckDB `BIGINT`s don't explode serialization.
|
|
65
|
+
|
|
66
|
+
```ts
|
|
67
|
+
JSON.stringify(row, jsonReplacerBigInt);
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Column-type classification
|
|
71
|
+
|
|
72
|
+
### `TypeCategory`
|
|
73
|
+
|
|
74
|
+
```ts
|
|
75
|
+
type TypeCategory =
|
|
76
|
+
| 'number' | 'string' | 'date' | 'boolean'
|
|
77
|
+
| 'geo' | 'binary' | 'json' | 'other';
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### `classifyType(duckdbType)`
|
|
81
|
+
|
|
82
|
+
```ts
|
|
83
|
+
function classifyType(duckdbType: string): TypeCategory
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Classify a DuckDB/Arrow type string. Handles parameterized types (`DECIMAL(18,3)`, `VARCHAR(100)`), compound types (`STRUCT`, `MAP`, `LIST`), and fuzzy keyword matching as a fallback.
|
|
87
|
+
|
|
88
|
+
| Example inputs | Result |
|
|
89
|
+
|----------------|--------|
|
|
90
|
+
| `BIGINT`, `DOUBLE`, `DECIMAL(18,3)` | `'number'` |
|
|
91
|
+
| `VARCHAR`, `STRING`, `UTF8` | `'string'` |
|
|
92
|
+
| `TIMESTAMP`, `DATE`, `TIME` | `'date'` |
|
|
93
|
+
| `BOOLEAN` | `'boolean'` |
|
|
94
|
+
| `GEOMETRY`, `GEOMETRY('EPSG:27700')`, `POINT`, `WKB_BLOB` | `'geo'` |
|
|
95
|
+
| `BLOB`, `BINARY`, `VARBINARY` | `'binary'` |
|
|
96
|
+
| `STRUCT<...>`, `MAP<...>`, `LIST<...>`, `JSON` | `'json'` |
|
|
97
|
+
| Otherwise | `'other'` |
|
|
98
|
+
|
|
99
|
+
### `typeColor(category)`
|
|
100
|
+
|
|
101
|
+
```ts
|
|
102
|
+
function typeColor(category: TypeCategory): string
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Tailwind text-color class (e.g. `'text-blue-500'`) for dots / badges.
|
|
106
|
+
|
|
107
|
+
### `typeBadgeClass(category)`
|
|
108
|
+
|
|
109
|
+
```ts
|
|
110
|
+
function typeBadgeClass(category: TypeCategory): string
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Tailwind classes for a pill-style badge (background + text + border).
|
|
114
|
+
|
|
115
|
+
### `typeLabel(category)`
|
|
116
|
+
|
|
117
|
+
```ts
|
|
118
|
+
function typeLabel(category: TypeCategory): string
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Short symbol (`'#'` for number, `'{}'` for json, `'geo'` for geo, etc.) for compact badges.
|
|
122
|
+
|
|
123
|
+
## Hex dump
|
|
124
|
+
|
|
125
|
+
### `HexRow`
|
|
126
|
+
|
|
127
|
+
```ts
|
|
128
|
+
interface HexRow {
|
|
129
|
+
offset: string; // hex offset, zero-padded (e.g. '00000000')
|
|
130
|
+
hex: string[]; // per-byte hex (e.g. '48', '65')
|
|
131
|
+
ascii: string; // printable ASCII, '.' for non-printable
|
|
132
|
+
}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### `generateHexDump(data, bytesPerRow?)`
|
|
136
|
+
|
|
137
|
+
```ts
|
|
138
|
+
function generateHexDump(data: Uint8Array, bytesPerRow?: number): HexRow[]
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Build a structured hex dump. `bytesPerRow` defaults to `16`. Pure — no DOM.
|
|
142
|
+
|
|
143
|
+
Rendering is left to the caller; `HexRow` maps directly to `<tr>` / CSV / JSON.
|
|
144
|
+
|
|
145
|
+
## Data export
|
|
146
|
+
|
|
147
|
+
### `escapeCsvField(value)`
|
|
148
|
+
|
|
149
|
+
```ts
|
|
150
|
+
function escapeCsvField(value: string): string
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
Escape a single CSV field per RFC 4180: wraps in double quotes and doubles any embedded quote **only when** the value contains `,`, `"`, `\n`, or `\r`. Otherwise returns the value unchanged.
|
|
154
|
+
|
|
155
|
+
### `serializeToCsv(columns, rows)`
|
|
156
|
+
|
|
157
|
+
```ts
|
|
158
|
+
function serializeToCsv(
|
|
159
|
+
columns: string[],
|
|
160
|
+
rows: Record<string, unknown>[]
|
|
161
|
+
): string
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Pure serializer. Uses `formatValue` internally, so `bigint` / `Date` / objects round-trip correctly. Row terminator is `'\n'`.
|
|
165
|
+
|
|
166
|
+
### `serializeToJson(columns, rows)`
|
|
167
|
+
|
|
168
|
+
```ts
|
|
169
|
+
function serializeToJson(
|
|
170
|
+
columns: string[],
|
|
171
|
+
rows: Record<string, unknown>[]
|
|
172
|
+
): string
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Pure. Returns pretty-printed JSON (`2`-space indent). `Date` → ISO, `BigInt` → decimal string, `null` preserved.
|
|
176
|
+
|
|
177
|
+
### `exportToCsv(columns, rows, filename)` / `exportToJson(columns, rows, filename)`
|
|
178
|
+
|
|
179
|
+
```ts
|
|
180
|
+
function exportToCsv(
|
|
181
|
+
columns: string[],
|
|
182
|
+
rows: Record<string, unknown>[],
|
|
183
|
+
filename: string
|
|
184
|
+
): void;
|
|
185
|
+
function exportToJson(
|
|
186
|
+
columns: string[],
|
|
187
|
+
rows: Record<string, unknown>[],
|
|
188
|
+
filename: string
|
|
189
|
+
): void;
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Browser-only wrappers that build the blob and trigger a download via a hidden `<a>`. Throw `ReferenceError` in non-DOM environments — use the `serializeToCsv` / `serializeToJson` variants server-side.
|
package/docs/geometry.md
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# Geometry
|
|
2
|
+
|
|
3
|
+
WKB parsing, geometry-column detection, and GeoArrow table construction. Zero-copy where possible.
|
|
4
|
+
|
|
5
|
+
Source: `src/lib/utils/wkb.ts`, `src/lib/utils/geoarrow.ts`.
|
|
6
|
+
|
|
7
|
+
## Types
|
|
8
|
+
|
|
9
|
+
### `GeoType`
|
|
10
|
+
|
|
11
|
+
```ts
|
|
12
|
+
type GeoType =
|
|
13
|
+
| 'Point'
|
|
14
|
+
| 'LineString'
|
|
15
|
+
| 'Polygon'
|
|
16
|
+
| 'MultiPoint'
|
|
17
|
+
| 'MultiLineString'
|
|
18
|
+
| 'MultiPolygon'
|
|
19
|
+
| 'Unknown';
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
`'Unknown'` is returned for unsupported WKB types (GeometryCollections, TINs, triangles, etc.).
|
|
23
|
+
|
|
24
|
+
### `ParsedGeometry`
|
|
25
|
+
|
|
26
|
+
```ts
|
|
27
|
+
interface ParsedGeometry {
|
|
28
|
+
type: GeoType;
|
|
29
|
+
coordinates: number[] | number[][] | number[][][] | number[][][][];
|
|
30
|
+
}
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Coordinate nesting follows GeoJSON conventions.
|
|
34
|
+
|
|
35
|
+
| Type | Shape |
|
|
36
|
+
|------|-------|
|
|
37
|
+
| `Point` | `[x, y]` |
|
|
38
|
+
| `MultiPoint` / `LineString` | `[[x, y], ...]` |
|
|
39
|
+
| `MultiLineString` / `Polygon` | `[[[x, y], ...], ...]` |
|
|
40
|
+
| `MultiPolygon` | `[[[[x, y], ...], ...], ...]` |
|
|
41
|
+
|
|
42
|
+
### `GeoArrowGeomType`
|
|
43
|
+
|
|
44
|
+
```ts
|
|
45
|
+
type GeoArrowGeomType =
|
|
46
|
+
| 'point'
|
|
47
|
+
| 'linestring'
|
|
48
|
+
| 'polygon'
|
|
49
|
+
| 'multipoint'
|
|
50
|
+
| 'multilinestring'
|
|
51
|
+
| 'multipolygon';
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Lowercase normalized form used by the GeoArrow builder and `@geoarrow/deck.gl-layers`.
|
|
55
|
+
|
|
56
|
+
### `GeoArrowResult`
|
|
57
|
+
|
|
58
|
+
```ts
|
|
59
|
+
interface GeoArrowResult {
|
|
60
|
+
table: Table; // apache-arrow Table
|
|
61
|
+
geometryType: GeoArrowGeomType;
|
|
62
|
+
bounds: [number, number, number, number]; // [minX, minY, maxX, maxY]
|
|
63
|
+
sourceIndices: number[]; // table row i → original wkbArrays[sourceIndices[i]]
|
|
64
|
+
}
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
`sourceIndices` lets callers map picked rows back to the original row order when mixed geometry types force a split.
|
|
68
|
+
|
|
69
|
+
## Functions
|
|
70
|
+
|
|
71
|
+
### `toBinary(value)`
|
|
72
|
+
|
|
73
|
+
```ts
|
|
74
|
+
function toBinary(value: unknown): Uint8Array | null
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Normalize an arbitrary "possibly-binary" value to a `Uint8Array`.
|
|
78
|
+
|
|
79
|
+
| Input | Handling |
|
|
80
|
+
|-------|----------|
|
|
81
|
+
| `Uint8Array` | Returned as-is |
|
|
82
|
+
| `ArrayBuffer` | Wrapped in `new Uint8Array(buf)` |
|
|
83
|
+
| `number[]` | `new Uint8Array(arr)` |
|
|
84
|
+
| Hex string (even length, `[0-9a-fA-F]`) | Decoded to bytes |
|
|
85
|
+
| DuckDB `toJSON()` blob object `{0: b0, 1: b1, ...}` | Reassembled into bytes |
|
|
86
|
+
| Anything else | `null` |
|
|
87
|
+
|
|
88
|
+
Returns **`null`** rather than throwing on unrecognized input, so callers can fall through.
|
|
89
|
+
|
|
90
|
+
### `parseWKB(data)`
|
|
91
|
+
|
|
92
|
+
```ts
|
|
93
|
+
function parseWKB(data: Uint8Array): ParsedGeometry | null
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Parse a WKB byte blob.
|
|
97
|
+
|
|
98
|
+
- Supports standard WKB, ISO WKB with Z/M flags, and EWKB with SRID prefix (PostGIS). Z/M ordinates are **dropped** — only X/Y is returned.
|
|
99
|
+
- Supports Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon.
|
|
100
|
+
- Returns **`null`** for truncated buffers, invalid byte-order flags, or malformed geometry.
|
|
101
|
+
- GeometryCollections (WKB type 7) return `{ type: 'Unknown', coordinates: [] }`.
|
|
102
|
+
|
|
103
|
+
### `findGeoColumn(schema)`
|
|
104
|
+
|
|
105
|
+
```ts
|
|
106
|
+
function findGeoColumn(
|
|
107
|
+
schema: { name: string; type: string }[]
|
|
108
|
+
): string | null
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Schema-only heuristic for locating the geometry column. Checks in priority order:
|
|
112
|
+
|
|
113
|
+
1. **Type** contains a geometry keyword (`GEOMETRY`, `POINT`, `WKB`, ...) — match wins immediately.
|
|
114
|
+
2. Exact well-known **name** (`geometry`, `geom`, `the_geom`, ...) **with** a binary-ish type (`BLOB`, `BINARY`, `VARBINARY`, `BYTES`).
|
|
115
|
+
3. Exact well-known **name**, any type.
|
|
116
|
+
4. Name contains a geo hint (`geom`, `geo_`, `wkb`, `wkt`, `shape`, `spatial`) with binary-ish type.
|
|
117
|
+
5. Name contains geo hint, any type.
|
|
118
|
+
|
|
119
|
+
Returns the first matching `name`, or `null` if no heuristic hits. Use `findGeoColumnFromRows` as a fallback when the schema is not informative.
|
|
120
|
+
|
|
121
|
+
### `findGeoColumnFromRows(rows, schema)`
|
|
122
|
+
|
|
123
|
+
```ts
|
|
124
|
+
function findGeoColumnFromRows(
|
|
125
|
+
rows: Record<string, unknown>[],
|
|
126
|
+
schema: { name: string; type: string }[]
|
|
127
|
+
): string | null
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Row-based probe. Samples the first row and classifies values:
|
|
131
|
+
|
|
132
|
+
- Binary-typed columns: tests for WKB magic bytes (endian flag 0x00/0x01 + valid geometry type).
|
|
133
|
+
- Other columns: probes hex-encoded WKB, WKT strings starting with `POINT(`, `LINESTRING(`, etc., and GeoJSON geometry objects (`{type: 'Point', coordinates: [...]}`).
|
|
134
|
+
|
|
135
|
+
Returns the first column whose value looks geometry-shaped, or `null`.
|
|
136
|
+
|
|
137
|
+
### `normalizeGeomType(raw)`
|
|
138
|
+
|
|
139
|
+
```ts
|
|
140
|
+
function normalizeGeomType(raw: string): GeoArrowGeomType
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Map a DuckDB `ST_GeometryType()` result (`'POINT'`, `'ST_Polygon'`, etc., case-insensitive, optional `ST_` prefix) to a `GeoArrowGeomType`. Unknown input falls back to `'polygon'`.
|
|
144
|
+
|
|
145
|
+
### `buildGeoArrowTables(wkbArrays, attributes, knownGeomType?)`
|
|
146
|
+
|
|
147
|
+
```ts
|
|
148
|
+
function buildGeoArrowTables(
|
|
149
|
+
wkbArrays: Uint8Array[],
|
|
150
|
+
attributes: Map<string, { values: any[]; type: string }>,
|
|
151
|
+
knownGeomType?: GeoArrowGeomType
|
|
152
|
+
): GeoArrowResult[]
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
Build one or more Arrow `Table` objects keyed by geometry type, ready for `@geoarrow/deck.gl-layers`.
|
|
156
|
+
|
|
157
|
+
**Parameters**
|
|
158
|
+
|
|
159
|
+
| Name | Type | Meaning |
|
|
160
|
+
|------|------|---------|
|
|
161
|
+
| `wkbArrays` | `Uint8Array[]` | Per-row WKB binary. Entries may be empty / invalid — they are skipped. |
|
|
162
|
+
| `attributes` | `Map<name, { values, type }>` | Non-geometry columns. `values.length` must equal `wkbArrays.length`. `type` is a DuckDB/Arrow type string used for Arrow dtype inference (numeric → Float64, bool → Bool, everything else → Utf8). |
|
|
163
|
+
| `knownGeomType` | optional | If provided (e.g. from GeoParquet metadata), classification is skipped — all WKBs are assumed to share this type, and one `GeoArrowResult` is returned. |
|
|
164
|
+
|
|
165
|
+
**Behavior**
|
|
166
|
+
|
|
167
|
+
- **Zero-copy geometry ingest.** A 5-byte peek classifies each WKB; coordinates are read directly into pre-allocated `Float64Array` backings with no intermediate JS objects.
|
|
168
|
+
- **Mixed-type splitting.** If `knownGeomType` is not provided and the rows span multiple types (e.g. `Polygon` and `MultiPolygon`), rows are partitioned and one `GeoArrowResult` is emitted per non-empty group.
|
|
169
|
+
- **Bounds.** Each result carries a computed `[minX, minY, maxX, maxY]`. When splitting, each group sees its own tight bounds.
|
|
170
|
+
- **Attribute propagation.** Attribute values follow the split via `sourceIndices`.
|
|
171
|
+
- **Empty / unknown WKBs** are silently dropped.
|
|
172
|
+
|
|
173
|
+
**Returns** `GeoArrowResult[]` — empty array if no rows have parseable geometries.
|
|
174
|
+
|
|
175
|
+
**Peer dependencies**
|
|
176
|
+
|
|
177
|
+
- `apache-arrow` (used to construct the returned `Table`).
|
|
178
|
+
|
|
179
|
+
**Example**
|
|
180
|
+
|
|
181
|
+
```ts
|
|
182
|
+
import { buildGeoArrowTables, normalizeGeomType } from '@walkthru-earth/objex-utils';
|
|
183
|
+
|
|
184
|
+
const wkbArrays: Uint8Array[] = rows.map(r => r.geometry);
|
|
185
|
+
const attributes = new Map([
|
|
186
|
+
['id', { values: rows.map(r => r.id), type: 'BIGINT' }],
|
|
187
|
+
['name', { values: rows.map(r => r.name), type: 'VARCHAR' }]
|
|
188
|
+
]);
|
|
189
|
+
|
|
190
|
+
// When you know the type from GeoParquet metadata:
|
|
191
|
+
const [result] = buildGeoArrowTables(wkbArrays, attributes, normalizeGeomType('POLYGON'));
|
|
192
|
+
|
|
193
|
+
// When you do not:
|
|
194
|
+
const results = buildGeoArrowTables(wkbArrays, attributes);
|
|
195
|
+
for (const r of results) {
|
|
196
|
+
console.log(r.geometryType, r.table.numRows, r.bounds);
|
|
197
|
+
}
|
|
198
|
+
```
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# localStorage helpers
|
|
2
|
+
|
|
3
|
+
SSR-safe JSON persistence on top of `window.localStorage`.
|
|
4
|
+
|
|
5
|
+
Source: `src/lib/utils/local-storage.ts`.
|
|
6
|
+
|
|
7
|
+
## Functions
|
|
8
|
+
|
|
9
|
+
### `loadFromStorage<T>(key, defaultValue)`
|
|
10
|
+
|
|
11
|
+
```ts
|
|
12
|
+
function loadFromStorage<T>(key: string, defaultValue: T): T
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Load a JSON value from localStorage. Returns `defaultValue` when:
|
|
16
|
+
|
|
17
|
+
- Running in SSR (`typeof window === 'undefined'`).
|
|
18
|
+
- The key is not present.
|
|
19
|
+
- `JSON.parse` throws (stored value corrupted).
|
|
20
|
+
|
|
21
|
+
Never throws.
|
|
22
|
+
|
|
23
|
+
### `persistToStorage(key, value)`
|
|
24
|
+
|
|
25
|
+
```ts
|
|
26
|
+
function persistToStorage(key: string, value: unknown): void
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Write a JSON-serializable value to localStorage. No-ops silently when:
|
|
30
|
+
|
|
31
|
+
- Running in SSR.
|
|
32
|
+
- `localStorage.setItem` throws (quota exceeded, private-browsing restrictions, Safari storage partitioning).
|
|
33
|
+
|
|
34
|
+
Pair with the [`STORAGE_KEYS`](./types-constants.md#storage_keys) constants to keep key strings consistent across the app.
|
|
35
|
+
|
|
36
|
+
## Example
|
|
37
|
+
|
|
38
|
+
```ts
|
|
39
|
+
import { loadFromStorage, persistToStorage, STORAGE_KEYS } from '@walkthru-earth/objex-utils';
|
|
40
|
+
|
|
41
|
+
interface Settings {
|
|
42
|
+
theme: 'light' | 'dark' | 'system';
|
|
43
|
+
locale: 'en' | 'ar';
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const defaults: Settings = { theme: 'system', locale: 'en' };
|
|
47
|
+
let settings = loadFromStorage<Settings>(STORAGE_KEYS.SETTINGS, defaults);
|
|
48
|
+
|
|
49
|
+
// later…
|
|
50
|
+
persistToStorage(STORAGE_KEYS.SETTINGS, settings);
|
|
51
|
+
```
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Markdown + SQL parsing
|
|
2
|
+
|
|
3
|
+
Parse markdown documents with YAML frontmatter and Evidence-style SQL code blocks, run interpolation, and stub blocks for server-side rendering.
|
|
4
|
+
|
|
5
|
+
Source: `src/lib/utils/markdown-sql.ts`.
|
|
6
|
+
|
|
7
|
+
## Peer dependency
|
|
8
|
+
|
|
9
|
+
- `yaml >= 2` — **dynamically imported** only when frontmatter is detected. If `yaml` is not installed and the document has frontmatter, parsing silently returns `frontmatter: {}` and continues. Consumers who never call `parseMarkdownDocument` do not need `yaml` at all.
|
|
10
|
+
|
|
11
|
+
## Types
|
|
12
|
+
|
|
13
|
+
### `SqlBlock`
|
|
14
|
+
|
|
15
|
+
```ts
|
|
16
|
+
interface SqlBlock {
|
|
17
|
+
name: string; // block identifier from ```sql <name>
|
|
18
|
+
sql: string; // raw SQL body
|
|
19
|
+
startLine: number; // 0-indexed line index of the opening ``` fence
|
|
20
|
+
endLine: number; // 0-indexed line index of the closing ``` fence
|
|
21
|
+
}
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### `ParsedMarkdownDocument`
|
|
25
|
+
|
|
26
|
+
```ts
|
|
27
|
+
interface ParsedMarkdownDocument {
|
|
28
|
+
frontmatter: Record<string, any>;
|
|
29
|
+
content: string; // markdown with frontmatter stripped
|
|
30
|
+
sqlBlocks: SqlBlock[];
|
|
31
|
+
}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Functions
|
|
35
|
+
|
|
36
|
+
### `parseMarkdownDocument(markdown)`
|
|
37
|
+
|
|
38
|
+
```ts
|
|
39
|
+
async function parseMarkdownDocument(
|
|
40
|
+
markdown: string
|
|
41
|
+
): Promise<ParsedMarkdownDocument>
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
**Async** (since v1.2). Parses:
|
|
45
|
+
|
|
46
|
+
- YAML frontmatter delimited by `---\n ... \n---\n` at the top of the file.
|
|
47
|
+
- SQL blocks of the form:
|
|
48
|
+
|
|
49
|
+
```markdown
|
|
50
|
+
```sql some_query_name
|
|
51
|
+
SELECT * FROM table
|
|
52
|
+
```
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
The identifier must match `/^\w[\w-]*$/`. Content is captured verbatim until the matching closing fence.
|
|
56
|
+
|
|
57
|
+
**Behavior**
|
|
58
|
+
|
|
59
|
+
- Missing or malformed frontmatter → `frontmatter = {}`.
|
|
60
|
+
- Missing `yaml` peer dep → `frontmatter = {}` (silent).
|
|
61
|
+
- No SQL blocks → `sqlBlocks = []`.
|
|
62
|
+
|
|
63
|
+
### `interpolateTemplates(text, queryResults)`
|
|
64
|
+
|
|
65
|
+
```ts
|
|
66
|
+
function interpolateTemplates(
|
|
67
|
+
text: string,
|
|
68
|
+
queryResults: Map<string, Record<string, any>[]>
|
|
69
|
+
): string
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Replace references of the form `{queryName.rows[N].columnName}` in `text` with the corresponding value from `queryResults`.
|
|
73
|
+
|
|
74
|
+
- Unknown query name → leave untouched.
|
|
75
|
+
- Missing row or column → leave untouched.
|
|
76
|
+
- Non-string values are coerced via `String(value)`.
|
|
77
|
+
|
|
78
|
+
### `markSqlBlocks(content)`
|
|
79
|
+
|
|
80
|
+
```ts
|
|
81
|
+
function markSqlBlocks(content: string): string
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Replace every `` ```sql <name> ... ``` `` block in `content` with `<div data-sql-block="<name>"></div>`. Useful when streaming `content` through a markdown renderer — the caller can later hydrate each placeholder with the actual query result.
|
|
85
|
+
|
|
86
|
+
## Pattern
|
|
87
|
+
|
|
88
|
+
```ts
|
|
89
|
+
import {
|
|
90
|
+
parseMarkdownDocument,
|
|
91
|
+
interpolateTemplates,
|
|
92
|
+
markSqlBlocks,
|
|
93
|
+
} from '@walkthru-earth/objex-utils';
|
|
94
|
+
|
|
95
|
+
const parsed = await parseMarkdownDocument(rawMarkdown);
|
|
96
|
+
|
|
97
|
+
if (parsed.sqlBlocks.length) {
|
|
98
|
+
const results = new Map<string, Record<string, any>[]>();
|
|
99
|
+
await Promise.all(
|
|
100
|
+
parsed.sqlBlocks.map(async (b) => {
|
|
101
|
+
results.set(b.name, await myEngine.query(b.sql));
|
|
102
|
+
})
|
|
103
|
+
);
|
|
104
|
+
|
|
105
|
+
const interpolated = interpolateTemplates(parsed.content, results);
|
|
106
|
+
const withPlaceholders = markSqlBlocks(interpolated);
|
|
107
|
+
// hand withPlaceholders to your markdown renderer
|
|
108
|
+
}
|
|
109
|
+
```
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# Parquet metadata
|
|
2
|
+
|
|
3
|
+
Lightweight GeoParquet-aware metadata reader for remote Parquet files. Uses [`hyparquet`](https://github.com/hyparam/hyparquet) via HTTP range requests (~512 KB) so you can inspect schemas and geo metadata **before** DuckDB-WASM finishes booting.
|
|
4
|
+
|
|
5
|
+
Source: `src/lib/utils/parquet-metadata.ts`.
|
|
6
|
+
|
|
7
|
+
## Peer dependencies
|
|
8
|
+
|
|
9
|
+
- `hyparquet >= 1.25`
|
|
10
|
+
- `hyparquet-compressors >= 1.1` (SNAPPY, ZSTD, GZIP, LZ4, BROTLI)
|
|
11
|
+
|
|
12
|
+
## Types
|
|
13
|
+
|
|
14
|
+
### `GeoColumnMeta`
|
|
15
|
+
|
|
16
|
+
```ts
|
|
17
|
+
interface GeoColumnMeta {
|
|
18
|
+
encoding: string; // e.g. 'WKB'
|
|
19
|
+
geometryTypes: string[]; // e.g. ['Polygon', 'MultiPolygon']
|
|
20
|
+
crs: any | null; // raw ProjJSON or EPSG identifier
|
|
21
|
+
bbox?: number[]; // [minX, minY, maxX, maxY] (or with Z: 6 values)
|
|
22
|
+
}
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### `GeoParquetMeta`
|
|
26
|
+
|
|
27
|
+
```ts
|
|
28
|
+
interface GeoParquetMeta {
|
|
29
|
+
primaryColumn: string;
|
|
30
|
+
columns: Record<string, GeoColumnMeta>;
|
|
31
|
+
}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### `ParquetFileMetadata`
|
|
35
|
+
|
|
36
|
+
```ts
|
|
37
|
+
interface ParquetFileMetadata {
|
|
38
|
+
rowCount: number;
|
|
39
|
+
schema: { name: string; type: string }[];
|
|
40
|
+
geo: GeoParquetMeta | null; // null for non-geo Parquet
|
|
41
|
+
legacyGeoParquet: boolean; // true for pre-1.0 (schema_version without "version" field)
|
|
42
|
+
createdBy: string | null;
|
|
43
|
+
numRowGroups: number;
|
|
44
|
+
compression: string | null; // e.g. 'SNAPPY', 'ZSTD'
|
|
45
|
+
}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Functions
|
|
49
|
+
|
|
50
|
+
### `readParquetMetadata(url)`
|
|
51
|
+
|
|
52
|
+
```ts
|
|
53
|
+
async function readParquetMetadata(url: string): Promise<ParquetFileMetadata>
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Read the Parquet footer from a remote URL via range requests.
|
|
57
|
+
|
|
58
|
+
**Parameters**
|
|
59
|
+
|
|
60
|
+
| Name | Type | Meaning |
|
|
61
|
+
|------|------|---------|
|
|
62
|
+
| `url` | `string` | Full HTTPS URL. Must be CORS-accessible. `s3://` / `gs://` URIs must be resolved first (see [`resolveCloudUrl`](./storage.md#resolvecloudurl)). |
|
|
63
|
+
|
|
64
|
+
**Returns** `Promise<ParquetFileMetadata>`.
|
|
65
|
+
|
|
66
|
+
**Throws** a native error if the URL is not reachable, CORS is blocked, or the footer is malformed.
|
|
67
|
+
|
|
68
|
+
**Notes**
|
|
69
|
+
|
|
70
|
+
- The `geo` field contains the parsed `"geo"` key from Parquet file-level KV metadata. For legacy files (`schema_version` but no `version`), it is still parsed; `legacyGeoParquet` is set so callers can apply fallbacks.
|
|
71
|
+
- `compression` comes from the first row group's first column and is reported capitalized.
|
|
72
|
+
|
|
73
|
+
### `extractEpsgFromGeoMeta(geo)`
|
|
74
|
+
|
|
75
|
+
```ts
|
|
76
|
+
function extractEpsgFromGeoMeta(geo: GeoParquetMeta): string | null
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Extract an EPSG authority code from a GeoParquet CRS block. Returns `null` for WGS84/CRS84 (no reprojection needed) or when no EPSG identifier is embedded.
|
|
80
|
+
|
|
81
|
+
**Return examples**
|
|
82
|
+
|
|
83
|
+
- `'EPSG:27700'` (British National Grid)
|
|
84
|
+
- `'EPSG:3857'` (Web Mercator)
|
|
85
|
+
- `null` (WGS84 or CRS absent)
|
|
86
|
+
|
|
87
|
+
### `extractGeometryTypes(geo)`
|
|
88
|
+
|
|
89
|
+
```ts
|
|
90
|
+
function extractGeometryTypes(
|
|
91
|
+
geo: GeoParquetMeta
|
|
92
|
+
): GeoArrowGeomType[]
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Pull the `geometry_types` array from the primary column's metadata and normalize it into [`GeoArrowGeomType`](./geometry.md#geoarrowgeomtype). Useful to skip per-row `ST_GeometryType()` calls.
|
|
96
|
+
|
|
97
|
+
### `extractBounds(geo)`
|
|
98
|
+
|
|
99
|
+
```ts
|
|
100
|
+
function extractBounds(
|
|
101
|
+
geo: GeoParquetMeta
|
|
102
|
+
): [number, number, number, number] | null
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Extract the `bbox` from the primary column. Returns `null` when absent. If the bbox has Z (`[minX, minY, minZ, maxX, maxY, maxZ]`), only the XY extent is returned.
|
|
106
|
+
|
|
107
|
+
## End-to-end example
|
|
108
|
+
|
|
109
|
+
```ts
|
|
110
|
+
import {
|
|
111
|
+
readParquetMetadata,
|
|
112
|
+
extractEpsgFromGeoMeta,
|
|
113
|
+
extractGeometryTypes,
|
|
114
|
+
extractBounds,
|
|
115
|
+
} from '@walkthru-earth/objex-utils';
|
|
116
|
+
|
|
117
|
+
const meta = await readParquetMetadata(
|
|
118
|
+
'https://example.com/data.parquet'
|
|
119
|
+
);
|
|
120
|
+
|
|
121
|
+
console.log({
|
|
122
|
+
rows: meta.rowCount,
|
|
123
|
+
compression: meta.compression,
|
|
124
|
+
schema: meta.schema,
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
if (meta.geo) {
|
|
128
|
+
const crs = extractEpsgFromGeoMeta(meta.geo); // null means WGS84
|
|
129
|
+
const types = extractGeometryTypes(meta.geo); // ['polygon']
|
|
130
|
+
const bbox = extractBounds(meta.geo); // [minX, minY, maxX, maxY] | null
|
|
131
|
+
console.log({ crs, types, bbox, legacy: meta.legacyGeoParquet });
|
|
132
|
+
}
|
|
133
|
+
```
|