@loaders.gl/schema-utils 4.4.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +41 -0
- package/README.md +9 -0
- package/dist/dist.dev.js +14269 -0
- package/dist/dist.min.js +18 -0
- package/dist/index.cjs +2436 -0
- package/dist/index.cjs.map +7 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +73 -0
- package/dist/lib/arrow-utils/arrow-fixed-size-list-utils.d.ts +11 -0
- package/dist/lib/arrow-utils/arrow-fixed-size-list-utils.d.ts.map +1 -0
- package/dist/lib/arrow-utils/arrow-fixed-size-list-utils.js +40 -0
- package/dist/lib/arrow-utils/arrow-list-of-fixed-size-list-utils.d.ts +11 -0
- package/dist/lib/arrow-utils/arrow-list-of-fixed-size-list-utils.d.ts.map +1 -0
- package/dist/lib/arrow-utils/arrow-list-of-fixed-size-list-utils.js +32 -0
- package/dist/lib/mesh/convert-mesh-to-table.d.ts +18 -0
- package/dist/lib/mesh/convert-mesh-to-table.d.ts.map +1 -0
- package/dist/lib/mesh/convert-mesh-to-table.js +60 -0
- package/dist/lib/mesh/convert-table-to-mesh.d.ts +7 -0
- package/dist/lib/mesh/convert-table-to-mesh.d.ts.map +1 -0
- package/dist/lib/mesh/convert-table-to-mesh.js +40 -0
- package/dist/lib/mesh/deduce-mesh-schema.d.ts +23 -0
- package/dist/lib/mesh/deduce-mesh-schema.d.ts.map +1 -0
- package/dist/lib/mesh/deduce-mesh-schema.js +62 -0
- package/dist/lib/mesh/mesh-utils.d.ts +23 -0
- package/dist/lib/mesh/mesh-utils.d.ts.map +1 -0
- package/dist/lib/mesh/mesh-utils.js +46 -0
- package/dist/lib/schema/convert-arrow-schema.d.ts +23 -0
- package/dist/lib/schema/convert-arrow-schema.d.ts.map +1 -0
- package/dist/lib/schema/convert-arrow-schema.js +280 -0
- package/dist/lib/schema/data-type.d.ts +16 -0
- package/dist/lib/schema/data-type.d.ts.map +1 -0
- package/dist/lib/schema/data-type.js +88 -0
- package/dist/lib/schema/deduce-table-schema.d.ts +14 -0
- package/dist/lib/schema/deduce-table-schema.d.ts.map +1 -0
- package/dist/lib/schema/deduce-table-schema.js +100 -0
- package/dist/lib/table/arrow-api/arrow-like-field.d.ts +19 -0
- package/dist/lib/table/arrow-api/arrow-like-field.d.ts.map +1 -0
- package/dist/lib/table/arrow-api/arrow-like-field.js +36 -0
- package/dist/lib/table/arrow-api/arrow-like-interface.d.ts +43 -0
- package/dist/lib/table/arrow-api/arrow-like-interface.d.ts.map +1 -0
- package/dist/lib/table/arrow-api/arrow-like-interface.js +4 -0
- package/dist/lib/table/arrow-api/arrow-like-schema.d.ts +12 -0
- package/dist/lib/table/arrow-api/arrow-like-schema.d.ts.map +1 -0
- package/dist/lib/table/arrow-api/arrow-like-schema.js +81 -0
- package/dist/lib/table/arrow-api/arrow-like-table.d.ts +28 -0
- package/dist/lib/table/arrow-api/arrow-like-table.d.ts.map +1 -0
- package/dist/lib/table/arrow-api/arrow-like-table.js +63 -0
- package/dist/lib/table/arrow-api/arrow-like-type.d.ts +177 -0
- package/dist/lib/table/arrow-api/arrow-like-type.d.ts.map +1 -0
- package/dist/lib/table/arrow-api/arrow-like-type.js +442 -0
- package/dist/lib/table/arrow-api/enum.d.ts +92 -0
- package/dist/lib/table/arrow-api/enum.d.ts.map +1 -0
- package/dist/lib/table/arrow-api/enum.js +97 -0
- package/dist/lib/table/arrow-api/get-type-info.d.ts +13 -0
- package/dist/lib/table/arrow-api/get-type-info.d.ts.map +1 -0
- package/dist/lib/table/arrow-api/get-type-info.js +27 -0
- package/dist/lib/table/arrow-api/index.d.ts +5 -0
- package/dist/lib/table/arrow-api/index.d.ts.map +1 -0
- package/dist/lib/table/arrow-api/index.js +7 -0
- package/dist/lib/table/batch-builder/arrow-table-batch-aggregator.d.ts +10 -0
- package/dist/lib/table/batch-builder/arrow-table-batch-aggregator.d.ts.map +1 -0
- package/dist/lib/table/batch-builder/arrow-table-batch-aggregator.js +67 -0
- package/dist/lib/table/batch-builder/arrow-table-builder.d.ts +25 -0
- package/dist/lib/table/batch-builder/arrow-table-builder.d.ts.map +1 -0
- package/dist/lib/table/batch-builder/arrow-table-builder.js +99 -0
- package/dist/lib/table/batch-builder/base-table-batch-aggregator.d.ts +19 -0
- package/dist/lib/table/batch-builder/base-table-batch-aggregator.d.ts.map +1 -0
- package/dist/lib/table/batch-builder/base-table-batch-aggregator.js +63 -0
- package/dist/lib/table/batch-builder/columnar-table-batch-aggregator.d.ts +20 -0
- package/dist/lib/table/batch-builder/columnar-table-batch-aggregator.d.ts.map +1 -0
- package/dist/lib/table/batch-builder/columnar-table-batch-aggregator.js +83 -0
- package/dist/lib/table/batch-builder/row-table-batch-aggregator.d.ts +21 -0
- package/dist/lib/table/batch-builder/row-table-batch-aggregator.d.ts.map +1 -0
- package/dist/lib/table/batch-builder/row-table-batch-aggregator.js +85 -0
- package/dist/lib/table/batch-builder/table-batch-aggregator.d.ts +24 -0
- package/dist/lib/table/batch-builder/table-batch-aggregator.d.ts.map +1 -0
- package/dist/lib/table/batch-builder/table-batch-aggregator.js +4 -0
- package/dist/lib/table/batch-builder/table-batch-builder.d.ts +52 -0
- package/dist/lib/table/batch-builder/table-batch-builder.d.ts.map +1 -0
- package/dist/lib/table/batch-builder/table-batch-builder.js +148 -0
- package/dist/lib/table/batches/convert-batches.d.ts +10 -0
- package/dist/lib/table/batches/convert-batches.d.ts.map +1 -0
- package/dist/lib/table/batches/convert-batches.js +45 -0
- package/dist/lib/table/batches/make-arrow-batch-iterator.d.ts +17 -0
- package/dist/lib/table/batches/make-arrow-batch-iterator.d.ts.map +1 -0
- package/dist/lib/table/batches/make-arrow-batch-iterator.js +57 -0
- package/dist/lib/table/batches/make-table-batch-iterator.d.ts +15 -0
- package/dist/lib/table/batches/make-table-batch-iterator.d.ts.map +1 -0
- package/dist/lib/table/batches/make-table-batch-iterator.js +21 -0
- package/dist/lib/table/batches/make-table-from-batches.d.ts +9 -0
- package/dist/lib/table/batches/make-table-from-batches.d.ts.map +1 -0
- package/dist/lib/table/batches/make-table-from-batches.js +62 -0
- package/dist/lib/table/tables/convert-arrow-table.d.ts +24 -0
- package/dist/lib/table/tables/convert-arrow-table.d.ts.map +1 -0
- package/dist/lib/table/tables/convert-arrow-table.js +150 -0
- package/dist/lib/table/tables/convert-table.d.ts +43 -0
- package/dist/lib/table/tables/convert-table.d.ts.map +1 -0
- package/dist/lib/table/tables/convert-table.js +120 -0
- package/dist/lib/table/tables/make-table.d.ts +67 -0
- package/dist/lib/table/tables/make-table.d.ts.map +1 -0
- package/dist/lib/table/tables/make-table.js +101 -0
- package/dist/lib/table/tables/row-utils.d.ts +15 -0
- package/dist/lib/table/tables/row-utils.d.ts.map +1 -0
- package/dist/lib/table/tables/row-utils.js +49 -0
- package/dist/lib/table/tables/table-accessors.d.ts +68 -0
- package/dist/lib/table/tables/table-accessors.d.ts.map +1 -0
- package/dist/lib/table/tables/table-accessors.js +350 -0
- package/dist/lib/table/tables/table-column.d.ts +7 -0
- package/dist/lib/table/tables/table-column.d.ts.map +1 -0
- package/dist/lib/table/tables/table-column.js +39 -0
- package/dist/lib/table/tables/table-types.d.ts +12 -0
- package/dist/lib/table/tables/table-types.d.ts.map +1 -0
- package/dist/lib/table/tables/table-types.js +23 -0
- package/dist/lib/utils/assert.d.ts +2 -0
- package/dist/lib/utils/assert.d.ts.map +1 -0
- package/dist/lib/utils/assert.js +8 -0
- package/dist/lib/utils/async-queue.d.ts +18 -0
- package/dist/lib/utils/async-queue.d.ts.map +1 -0
- package/dist/lib/utils/async-queue.js +90 -0
- package/dist/types/types.d.ts +15 -0
- package/dist/types/types.d.ts.map +1 -0
- package/dist/types/types.js +4 -0
- package/package.json +52 -0
- package/src/index.ts +134 -0
- package/src/lib/arrow-utils/arrow-fixed-size-list-utils.ts +63 -0
- package/src/lib/arrow-utils/arrow-list-of-fixed-size-list-utils.ts +48 -0
- package/src/lib/mesh/convert-mesh-to-table.ts +77 -0
- package/src/lib/mesh/convert-table-to-mesh.ts +49 -0
- package/src/lib/mesh/deduce-mesh-schema.ts +75 -0
- package/src/lib/mesh/mesh-utils.ts +64 -0
- package/src/lib/schema/convert-arrow-schema.ts +301 -0
- package/src/lib/schema/data-type.ts +103 -0
- package/src/lib/schema/deduce-table-schema.ts +119 -0
- package/src/lib/table/arrow-api/arrow-like-field.ts +53 -0
- package/src/lib/table/arrow-api/arrow-like-interface.ts +84 -0
- package/src/lib/table/arrow-api/arrow-like-schema.ts +101 -0
- package/src/lib/table/arrow-api/arrow-like-table.ts +85 -0
- package/src/lib/table/arrow-api/arrow-like-type.ts +502 -0
- package/src/lib/table/arrow-api/enum.ts +99 -0
- package/src/lib/table/arrow-api/get-type-info.ts +38 -0
- package/src/lib/table/arrow-api/index.ts +8 -0
- package/src/lib/table/batch-builder/arrow-table-batch-aggregator-builders.ts.disabled +190 -0
- package/src/lib/table/batch-builder/arrow-table-batch-aggregator.ts +82 -0
- package/src/lib/table/batch-builder/arrow-table-builder.ts +112 -0
- package/src/lib/table/batch-builder/base-table-batch-aggregator.ts +82 -0
- package/src/lib/table/batch-builder/columnar-table-batch-aggregator.ts +99 -0
- package/src/lib/table/batch-builder/row-table-batch-aggregator.ts +108 -0
- package/src/lib/table/batch-builder/table-batch-aggregator.ts +28 -0
- package/src/lib/table/batch-builder/table-batch-builder.ts +185 -0
- package/src/lib/table/batches/convert-batches.ts +83 -0
- package/src/lib/table/batches/make-arrow-batch-iterator.ts +75 -0
- package/src/lib/table/batches/make-table-batch-iterator.ts +25 -0
- package/src/lib/table/batches/make-table-from-batches.ts +84 -0
- package/src/lib/table/tables/convert-arrow-table.ts +198 -0
- package/src/lib/table/tables/convert-table.ts +151 -0
- package/src/lib/table/tables/make-table.ts +127 -0
- package/src/lib/table/tables/row-utils.ts +59 -0
- package/src/lib/table/tables/table-accessors.ts +417 -0
- package/src/lib/table/tables/table-column.ts +49 -0
- package/src/lib/table/tables/table-types.ts +37 -0
- package/src/lib/utils/assert.ts +8 -0
- package/src/lib/utils/async-queue.ts +104 -0
- package/src/types/types.ts +45 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
5
|
+
import type {Schema, ColumnarTableBatch, ArrowTableBatch, TypedArray} from '@loaders.gl/schema';
|
|
6
|
+
import {isTypedArray} from '@math.gl/types';
|
|
7
|
+
import {getArrayTypeFromDataType} from '../../schema/data-type';
|
|
8
|
+
import {TableBatchAggregator} from './table-batch-aggregator';
|
|
9
|
+
type ColumnarTableBatchOptions = {};
|
|
10
|
+
|
|
11
|
+
const DEFAULT_ROW_COUNT = 100;
|
|
12
|
+
|
|
13
|
+
export class ColumnarTableBatchAggregator implements TableBatchAggregator {
|
|
14
|
+
schema: Schema;
|
|
15
|
+
length: number = 0;
|
|
16
|
+
allocated: number = 0;
|
|
17
|
+
columns: Record<string, TypedArray | Array<any>> = {};
|
|
18
|
+
|
|
19
|
+
constructor(schema: Schema, options: ColumnarTableBatchOptions) {
|
|
20
|
+
this.schema = schema;
|
|
21
|
+
this._reallocateColumns();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
rowCount(): number {
|
|
25
|
+
return this.length;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
addArrayRow(row: any[]) {
|
|
29
|
+
// If user keeps pushing rows beyond batch size, reallocate
|
|
30
|
+
this._reallocateColumns();
|
|
31
|
+
let i = 0;
|
|
32
|
+
// TODO what if no csv header, columns not populated?
|
|
33
|
+
for (const fieldName in this.columns) {
|
|
34
|
+
this.columns[fieldName][this.length] = row[i++];
|
|
35
|
+
}
|
|
36
|
+
this.length++;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
addObjectRow(row: {[columnName: string]: any}): void {
|
|
40
|
+
// If user keeps pushing rows beyond batch size, reallocate
|
|
41
|
+
this._reallocateColumns();
|
|
42
|
+
for (const fieldName in row) {
|
|
43
|
+
this.columns[fieldName][this.length] = row[fieldName];
|
|
44
|
+
}
|
|
45
|
+
this.length++;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
getBatch(): ColumnarTableBatch | ArrowTableBatch | null {
|
|
49
|
+
this._pruneColumns();
|
|
50
|
+
|
|
51
|
+
const batch: ColumnarTableBatch = {
|
|
52
|
+
shape: 'columnar-table',
|
|
53
|
+
batchType: 'data',
|
|
54
|
+
data: this.columns,
|
|
55
|
+
schema: this.schema,
|
|
56
|
+
length: this.length
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
return batch;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// HELPERS
|
|
63
|
+
|
|
64
|
+
_reallocateColumns() {
|
|
65
|
+
if (this.length < this.allocated) {
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// @ts-ignore TODO
|
|
70
|
+
this.allocated = this.allocated > 0 ? (this.allocated *= 2) : DEFAULT_ROW_COUNT;
|
|
71
|
+
this.columns = {};
|
|
72
|
+
|
|
73
|
+
for (const field of this.schema.fields) {
|
|
74
|
+
const ArrayType = getArrayTypeFromDataType(field.type, field.nullable);
|
|
75
|
+
const oldColumn = this.columns[field.name];
|
|
76
|
+
|
|
77
|
+
if (!oldColumn) {
|
|
78
|
+
// Create new
|
|
79
|
+
this.columns[field.name] = new ArrayType(this.allocated);
|
|
80
|
+
} else if (Array.isArray(oldColumn)) {
|
|
81
|
+
// Plain array, just increase its size
|
|
82
|
+
oldColumn.length = this.allocated;
|
|
83
|
+
} else if (isTypedArray(oldColumn)) {
|
|
84
|
+
const typedArray = new ArrayType(this.allocated) as TypedArray;
|
|
85
|
+
// Copy the old data to the new array
|
|
86
|
+
typedArray.set(oldColumn);
|
|
87
|
+
this.columns[field.name] = typedArray;
|
|
88
|
+
} else {
|
|
89
|
+
throw new Error('Invalid column type');
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
_pruneColumns() {
|
|
95
|
+
for (const [columnName, column] of Object.entries(this.columns)) {
|
|
96
|
+
this.columns[columnName] = column.slice(0, this.length);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
5
|
+
import type {Schema, TableBatch} from '@loaders.gl/schema';
|
|
6
|
+
import {TableBatchAggregator, TableBatchOptions} from './table-batch-aggregator';
|
|
7
|
+
import {
|
|
8
|
+
convertToArrayRow,
|
|
9
|
+
convertToObjectRow,
|
|
10
|
+
inferHeadersFromArrayRow,
|
|
11
|
+
inferHeadersFromObjectRow
|
|
12
|
+
} from '../tables/row-utils';
|
|
13
|
+
|
|
14
|
+
const DEFAULT_ROW_COUNT = 100;
|
|
15
|
+
|
|
16
|
+
export class RowTableBatchAggregator implements TableBatchAggregator {
|
|
17
|
+
schema: Schema | null;
|
|
18
|
+
options: TableBatchOptions;
|
|
19
|
+
|
|
20
|
+
length: number = 0;
|
|
21
|
+
objectRows: {[columnName: string]: unknown}[] | null = null;
|
|
22
|
+
arrayRows: unknown[][] | null = null;
|
|
23
|
+
cursor: number = 0;
|
|
24
|
+
private _headers: string[] | null = null;
|
|
25
|
+
|
|
26
|
+
constructor(schema: Schema | null, options: TableBatchOptions) {
|
|
27
|
+
this.options = options;
|
|
28
|
+
this.schema = schema;
|
|
29
|
+
|
|
30
|
+
// schema is an array if there're no headers
|
|
31
|
+
// object if there are headers
|
|
32
|
+
if (schema) {
|
|
33
|
+
this._headers = [];
|
|
34
|
+
for (let i = 0; i < schema.fields.length; i++) {
|
|
35
|
+
this._headers[i] = schema.fields[i].name;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
rowCount(): number {
|
|
41
|
+
return this.length;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
addArrayRow(row: any[], cursor?: number): void {
|
|
45
|
+
if (Number.isFinite(cursor)) {
|
|
46
|
+
this.cursor = cursor as number;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// TODO - infer schema at a higher level, instead of hacking headers here?
|
|
50
|
+
this._headers ||= inferHeadersFromArrayRow(row);
|
|
51
|
+
|
|
52
|
+
// eslint-disable-next-line default-case
|
|
53
|
+
switch (this.options.shape) {
|
|
54
|
+
case 'object-row-table':
|
|
55
|
+
const rowObject = convertToObjectRow(row, this._headers);
|
|
56
|
+
this.addObjectRow(rowObject, cursor);
|
|
57
|
+
break;
|
|
58
|
+
case 'array-row-table':
|
|
59
|
+
this.arrayRows = this.arrayRows || new Array(DEFAULT_ROW_COUNT);
|
|
60
|
+
this.arrayRows[this.length] = row;
|
|
61
|
+
this.length++;
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
addObjectRow(row: {[columnName: string]: any}, cursor?: number): void {
|
|
67
|
+
if (Number.isFinite(cursor)) {
|
|
68
|
+
this.cursor = cursor as number;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// TODO - infer schema at a higher level, instead of hacking headers here?
|
|
72
|
+
this._headers ||= inferHeadersFromObjectRow(row);
|
|
73
|
+
|
|
74
|
+
// eslint-disable-next-line default-case
|
|
75
|
+
switch (this.options.shape) {
|
|
76
|
+
case 'array-row-table':
|
|
77
|
+
const rowArray = convertToArrayRow(row, this._headers);
|
|
78
|
+
this.addArrayRow(rowArray, cursor);
|
|
79
|
+
break;
|
|
80
|
+
case 'object-row-table':
|
|
81
|
+
this.objectRows = this.objectRows || new Array(DEFAULT_ROW_COUNT);
|
|
82
|
+
this.objectRows[this.length] = row;
|
|
83
|
+
this.length++;
|
|
84
|
+
break;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
getBatch(): TableBatch | null {
|
|
89
|
+
let rows = this.arrayRows || this.objectRows;
|
|
90
|
+
if (!rows) {
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
rows = rows.slice(0, this.length);
|
|
95
|
+
this.arrayRows = null;
|
|
96
|
+
this.objectRows = null;
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
shape: this.options.shape,
|
|
100
|
+
batchType: 'data',
|
|
101
|
+
data: rows,
|
|
102
|
+
length: this.length,
|
|
103
|
+
// @ts-expect-error we should infer a schema
|
|
104
|
+
schema: this.schema,
|
|
105
|
+
cursor: this.cursor
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
5
|
+
import type {Schema, TableBatch} from '@loaders.gl/schema';
|
|
6
|
+
|
|
7
|
+
export interface TableBatchOptions {
|
|
8
|
+
batchSize: number | string;
|
|
9
|
+
[key: string]: any;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface TableBatchConstructor {
|
|
13
|
+
new (schema: Schema, options: TableBatchOptions): TableBatchAggregator;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* TableBatchBuilder delegates batch building to this interface
|
|
18
|
+
*/
|
|
19
|
+
export interface TableBatchAggregator {
|
|
20
|
+
/** Number of rows */
|
|
21
|
+
rowCount(): number;
|
|
22
|
+
/** Add one row */
|
|
23
|
+
addArrayRow(row: any[]): void;
|
|
24
|
+
/** Add one row */
|
|
25
|
+
addObjectRow(row: {[columnName: string]: any}): void;
|
|
26
|
+
/** return a batch object */
|
|
27
|
+
getBatch(): TableBatch | null;
|
|
28
|
+
}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
5
|
+
import type {Schema, TableBatch} from '@loaders.gl/schema';
|
|
6
|
+
import type {TableBatchAggregator, TableBatchConstructor} from './table-batch-aggregator';
|
|
7
|
+
import {BaseTableBatchAggregator} from './base-table-batch-aggregator';
|
|
8
|
+
import {RowTableBatchAggregator} from './row-table-batch-aggregator';
|
|
9
|
+
import {ColumnarTableBatchAggregator} from './columnar-table-batch-aggregator';
|
|
10
|
+
import {ArrowTableBatchAggregator} from './arrow-table-batch-aggregator';
|
|
11
|
+
|
|
12
|
+
// TODO define interface instead
|
|
13
|
+
type TableBatchBuilderOptions = {
|
|
14
|
+
shape?: 'array-row-table' | 'object-row-table' | 'columnar-table' | 'arrow-table';
|
|
15
|
+
batchSize?: number | 'auto';
|
|
16
|
+
batchDebounceMs?: number;
|
|
17
|
+
limit?: number;
|
|
18
|
+
_limitMB?: number;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
type GetBatchOptions = {
|
|
22
|
+
bytesUsed?: number;
|
|
23
|
+
[key: string]: any;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
const DEFAULT_OPTIONS: Required<TableBatchBuilderOptions> = {
|
|
27
|
+
shape: undefined!,
|
|
28
|
+
batchSize: 'auto',
|
|
29
|
+
batchDebounceMs: 0,
|
|
30
|
+
limit: 0,
|
|
31
|
+
_limitMB: 0
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
/** Incrementally builds batches from a stream of rows */
|
|
35
|
+
export class TableBatchBuilder {
|
|
36
|
+
schema: Schema;
|
|
37
|
+
options: Required<TableBatchBuilderOptions>;
|
|
38
|
+
|
|
39
|
+
private aggregator: TableBatchAggregator | null = null;
|
|
40
|
+
private batchCount: number = 0;
|
|
41
|
+
private bytesUsed: number = 0;
|
|
42
|
+
private isChunkComplete: boolean = false;
|
|
43
|
+
private lastBatchEmittedMs: number = Date.now();
|
|
44
|
+
private totalLength: number = 0;
|
|
45
|
+
private totalBytes: number = 0;
|
|
46
|
+
private rowBytes: number = 0;
|
|
47
|
+
|
|
48
|
+
static ArrowBatch?: TableBatchConstructor;
|
|
49
|
+
|
|
50
|
+
constructor(schema: Schema, options?: TableBatchBuilderOptions) {
|
|
51
|
+
this.schema = schema;
|
|
52
|
+
this.options = {...DEFAULT_OPTIONS, ...options};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
limitReached(): boolean {
|
|
56
|
+
if (Boolean(this.options?.limit) && this.totalLength >= this.options.limit) {
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
if (Boolean(this.options?._limitMB) && this.totalBytes / 1e6 >= this.options._limitMB) {
|
|
60
|
+
return true;
|
|
61
|
+
}
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/** @deprecated Use addArrayRow or addObjectRow */
|
|
66
|
+
addRow(row: any[] | {[columnName: string]: any}): void {
|
|
67
|
+
if (this.limitReached()) {
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
this.totalLength++;
|
|
71
|
+
this.rowBytes = this.rowBytes || this._estimateRowMB(row);
|
|
72
|
+
this.totalBytes += this.rowBytes;
|
|
73
|
+
if (Array.isArray(row)) {
|
|
74
|
+
this.addArrayRow(row);
|
|
75
|
+
} else {
|
|
76
|
+
this.addObjectRow(row);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/** Add one row to the batch */
|
|
81
|
+
protected addArrayRow(row: any[]) {
|
|
82
|
+
if (!this.aggregator) {
|
|
83
|
+
const TableBatchType = this._getTableBatchType();
|
|
84
|
+
this.aggregator = new TableBatchType(this.schema, this.options);
|
|
85
|
+
}
|
|
86
|
+
this.aggregator.addArrayRow(row);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** Add one row to the batch */
|
|
90
|
+
protected addObjectRow(row: {[columnName: string]: any}): void {
|
|
91
|
+
if (!this.aggregator) {
|
|
92
|
+
const TableBatchType = this._getTableBatchType();
|
|
93
|
+
this.aggregator = new TableBatchType(this.schema, this.options);
|
|
94
|
+
}
|
|
95
|
+
this.aggregator.addObjectRow(row);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/** Mark an incoming raw memory chunk has completed */
|
|
99
|
+
chunkComplete(chunk: ArrayBuffer | string): void {
|
|
100
|
+
if (chunk instanceof ArrayBuffer) {
|
|
101
|
+
this.bytesUsed += chunk.byteLength;
|
|
102
|
+
}
|
|
103
|
+
if (typeof chunk === 'string') {
|
|
104
|
+
this.bytesUsed += chunk.length;
|
|
105
|
+
}
|
|
106
|
+
this.isChunkComplete = true;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
getFullBatch(options?: GetBatchOptions): TableBatch | null {
|
|
110
|
+
return this._isFull() ? this._getBatch(options) : null;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
getFinalBatch(options?: GetBatchOptions): TableBatch | null {
|
|
114
|
+
return this._getBatch(options);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// INTERNAL
|
|
118
|
+
|
|
119
|
+
_estimateRowMB(row: any[] | object): number {
|
|
120
|
+
return Array.isArray(row) ? row.length * 8 : Object.keys(row).length * 8;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
private _isFull(): boolean {
|
|
124
|
+
// No batch, not ready
|
|
125
|
+
if (!this.aggregator || this.aggregator.rowCount() === 0) {
|
|
126
|
+
return false;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// if batchSize === 'auto' we wait for chunk to complete
|
|
130
|
+
// if batchSize === number, ensure we have enough rows
|
|
131
|
+
if (this.options.batchSize === 'auto') {
|
|
132
|
+
if (!this.isChunkComplete) {
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
} else if (this.options.batchSize > this.aggregator.rowCount()) {
|
|
136
|
+
return false;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Debounce batches
|
|
140
|
+
if (this.options.batchDebounceMs > Date.now() - this.lastBatchEmittedMs) {
|
|
141
|
+
return false;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Emit batch
|
|
145
|
+
this.isChunkComplete = false;
|
|
146
|
+
this.lastBatchEmittedMs = Date.now();
|
|
147
|
+
return true;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* bytesUsed can be set via chunkComplete or via getBatch*
|
|
152
|
+
*/
|
|
153
|
+
private _getBatch(options?: GetBatchOptions): TableBatch | null {
|
|
154
|
+
if (!this.aggregator) {
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// TODO - this can overly increment bytes used?
|
|
159
|
+
if (options?.bytesUsed) {
|
|
160
|
+
this.bytesUsed = options.bytesUsed;
|
|
161
|
+
}
|
|
162
|
+
const normalizedBatch = this.aggregator.getBatch() as TableBatch;
|
|
163
|
+
normalizedBatch.count = this.batchCount;
|
|
164
|
+
normalizedBatch.bytesUsed = this.bytesUsed;
|
|
165
|
+
Object.assign(normalizedBatch, options);
|
|
166
|
+
|
|
167
|
+
this.batchCount++;
|
|
168
|
+
this.aggregator = null;
|
|
169
|
+
return normalizedBatch;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
private _getTableBatchType(): TableBatchConstructor {
|
|
173
|
+
switch (this.options.shape) {
|
|
174
|
+
case 'array-row-table':
|
|
175
|
+
case 'object-row-table':
|
|
176
|
+
return RowTableBatchAggregator;
|
|
177
|
+
case 'columnar-table':
|
|
178
|
+
return ColumnarTableBatchAggregator;
|
|
179
|
+
case 'arrow-table':
|
|
180
|
+
return ArrowTableBatchAggregator;
|
|
181
|
+
default:
|
|
182
|
+
return BaseTableBatchAggregator;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
5
|
+
import type {
|
|
6
|
+
TableBatch,
|
|
7
|
+
ArrayRowTableBatch,
|
|
8
|
+
ObjectRowTableBatch,
|
|
9
|
+
ColumnarTableBatch,
|
|
10
|
+
ArrowTableBatch
|
|
11
|
+
} from '@loaders.gl/schema';
|
|
12
|
+
import {convertTable} from '../tables/convert-table';
|
|
13
|
+
|
|
14
|
+
export function convertBatch(batches: TableBatch, shape: 'object-row-table'): ObjectRowTableBatch;
|
|
15
|
+
export function convertBatch(batches: TableBatch, shape: 'array-row-table'): ArrayRowTableBatch;
|
|
16
|
+
export function convertBatch(batches: TableBatch, shape: 'columnar-table'): ColumnarTableBatch;
|
|
17
|
+
export function convertBatch(batches: TableBatch, shape: 'arrow-table'): ArrowTableBatch;
|
|
18
|
+
|
|
19
|
+
/** Convert a table batch to a different shape */
|
|
20
|
+
export function convertBatch(
|
|
21
|
+
batch: TableBatch,
|
|
22
|
+
shape: 'object-row-table' | 'array-row-table' | 'columnar-table' | 'arrow-table'
|
|
23
|
+
): TableBatch {
|
|
24
|
+
switch (shape) {
|
|
25
|
+
case 'object-row-table':
|
|
26
|
+
return {...batch, ...convertTable(batch, 'object-row-table')};
|
|
27
|
+
case 'array-row-table':
|
|
28
|
+
return {...batch, ...convertTable(batch, 'array-row-table')};
|
|
29
|
+
case 'columnar-table':
|
|
30
|
+
return {...batch, ...convertTable(batch, 'columnar-table')};
|
|
31
|
+
case 'arrow-table':
|
|
32
|
+
return {...batch, ...convertTable(batch, 'arrow-table')};
|
|
33
|
+
default:
|
|
34
|
+
throw new Error(shape);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function convertBatches(
|
|
39
|
+
batches: Iterable<TableBatch> | AsyncIterable<TableBatch>,
|
|
40
|
+
shape: 'object-row-table'
|
|
41
|
+
): AsyncIterableIterator<ObjectRowTableBatch>;
|
|
42
|
+
export function convertBatches(
|
|
43
|
+
batches: Iterable<TableBatch> | AsyncIterable<TableBatch>,
|
|
44
|
+
shape: 'array-row-table'
|
|
45
|
+
): AsyncIterableIterator<ArrayRowTableBatch>;
|
|
46
|
+
export function convertBatches(
|
|
47
|
+
batches: Iterable<TableBatch> | AsyncIterable<TableBatch>,
|
|
48
|
+
shape: 'columnar-table'
|
|
49
|
+
): AsyncIterableIterator<ColumnarTableBatch>;
|
|
50
|
+
export function convertBatches(
|
|
51
|
+
batches: Iterable<TableBatch> | AsyncIterable<TableBatch>,
|
|
52
|
+
shape: 'arrow-table'
|
|
53
|
+
): AsyncIterableIterator<ArrowTableBatch>;
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Convert batches to a different shape
|
|
57
|
+
* @param table
|
|
58
|
+
* @param shape
|
|
59
|
+
* @returns
|
|
60
|
+
*/
|
|
61
|
+
export async function* convertBatches(
|
|
62
|
+
batches: Iterable<TableBatch> | AsyncIterable<TableBatch>,
|
|
63
|
+
shape: 'object-row-table' | 'array-row-table' | 'columnar-table' | 'arrow-table'
|
|
64
|
+
): AsyncIterableIterator<TableBatch> {
|
|
65
|
+
for await (const batch of batches) {
|
|
66
|
+
switch (shape) {
|
|
67
|
+
case 'object-row-table':
|
|
68
|
+
yield convertBatch(batch, 'object-row-table');
|
|
69
|
+
break;
|
|
70
|
+
case 'array-row-table':
|
|
71
|
+
yield convertBatch(batch, 'array-row-table');
|
|
72
|
+
break;
|
|
73
|
+
case 'columnar-table':
|
|
74
|
+
yield convertBatch(batch, 'columnar-table');
|
|
75
|
+
break;
|
|
76
|
+
case 'arrow-table':
|
|
77
|
+
yield convertBatch(batch, 'arrow-table');
|
|
78
|
+
break;
|
|
79
|
+
default:
|
|
80
|
+
throw new Error(shape);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
5
|
+
import * as arrow from 'apache-arrow';
|
|
6
|
+
import type {Table, ArrowTableBatch} from '@loaders.gl/schema';
|
|
7
|
+
|
|
8
|
+
import {convertSchemaToArrow} from '../../schema/convert-arrow-schema';
|
|
9
|
+
import {getTableLength, getTableNumCols, getTableCellAt} from '../tables/table-accessors';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Returns an iterator that yields a single table as a sequence of ArrowTable batches.
|
|
13
|
+
* @note All batches will have the same shape and schema as the original table.
|
|
14
|
+
*/
|
|
15
|
+
export function* makeArrowTableBatchIterator(
|
|
16
|
+
table: Table,
|
|
17
|
+
options?: {batchSize?: number}
|
|
18
|
+
): IterableIterator<ArrowTableBatch> {
|
|
19
|
+
for (const batch of makeArrowRecordBatchIterator(table, options)) {
|
|
20
|
+
const arrowTable = new arrow.Table([batch]);
|
|
21
|
+
yield {
|
|
22
|
+
...batch,
|
|
23
|
+
shape: 'arrow-table',
|
|
24
|
+
schema: table.schema,
|
|
25
|
+
batchType: 'data',
|
|
26
|
+
length: arrowTable.numRows,
|
|
27
|
+
data: arrowTable
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Returns an iterator that yields a single table as a sequence of arrow.RecordBatch batches.
|
|
34
|
+
* @note All batches will have the same shape and schema as the original table.
|
|
35
|
+
*/
|
|
36
|
+
export function* makeArrowRecordBatchIterator(
|
|
37
|
+
table: Table,
|
|
38
|
+
options?: {batchSize?: number}
|
|
39
|
+
): IterableIterator<arrow.RecordBatch> {
|
|
40
|
+
const arrowSchema = convertSchemaToArrow(table.schema!);
|
|
41
|
+
|
|
42
|
+
const length = getTableLength(table);
|
|
43
|
+
const numColumns = getTableNumCols(table);
|
|
44
|
+
const batchSize = options?.batchSize || length;
|
|
45
|
+
|
|
46
|
+
const builders = arrowSchema?.fields.map((arrowField) => arrow.makeBuilder(arrowField));
|
|
47
|
+
const structField = new arrow.Struct(arrowSchema.fields);
|
|
48
|
+
|
|
49
|
+
let batchLength = 0;
|
|
50
|
+
for (let rowIndex = 0; rowIndex < length; rowIndex++) {
|
|
51
|
+
for (let columnIndex = 0; columnIndex < numColumns; ++columnIndex) {
|
|
52
|
+
const value = getTableCellAt(table, rowIndex, columnIndex);
|
|
53
|
+
|
|
54
|
+
const builder = builders[columnIndex];
|
|
55
|
+
builder.append(value);
|
|
56
|
+
batchLength++;
|
|
57
|
+
|
|
58
|
+
if (batchLength >= batchSize) {
|
|
59
|
+
const datas = builders.map((builder) => builder.flush());
|
|
60
|
+
const structData = new arrow.Data(structField, 0, batchLength, 0, undefined, datas);
|
|
61
|
+
yield new arrow.RecordBatch(arrowSchema, structData);
|
|
62
|
+
batchLength = 0;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (batchLength > 0) {
|
|
68
|
+
const datas = builders.map((builder) => builder.flush());
|
|
69
|
+
const structData = new arrow.Data(structField, 0, batchLength, 0, undefined, datas);
|
|
70
|
+
yield new arrow.RecordBatch(arrowSchema, structData);
|
|
71
|
+
batchLength = 0;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
builders.map((builder) => builder.finish());
|
|
75
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
5
|
+
import type {TableBatch, Table} from '@loaders.gl/schema';
|
|
6
|
+
import {getTableLength} from '../tables/table-accessors';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Returns an iterator that yields the contents of a table as a sequence of batches.
|
|
10
|
+
* @todo Currently only a single batch is yielded.
|
|
11
|
+
* @note All batches will have the same shape and schema as the original table.
|
|
12
|
+
* @returns
|
|
13
|
+
*/
|
|
14
|
+
export function* makeTableBatchIterator(table: Table): IterableIterator<TableBatch> {
|
|
15
|
+
yield makeBatchFromTable(table);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Returns a table packaged as a single table batch
|
|
20
|
+
* @note The batch will have the same shape and schema as the original table.
|
|
21
|
+
* @returns `null` if no batches are yielded by the async iterator
|
|
22
|
+
*/
|
|
23
|
+
export function makeBatchFromTable(table: Table): TableBatch {
|
|
24
|
+
return {...table, length: getTableLength(table), batchType: 'data'};
|
|
25
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
5
|
+
import type {
|
|
6
|
+
Schema,
|
|
7
|
+
TableBatch,
|
|
8
|
+
Table,
|
|
9
|
+
ObjectRowTable,
|
|
10
|
+
ArrayRowTable,
|
|
11
|
+
Feature
|
|
12
|
+
} from '@loaders.gl/schema';
|
|
13
|
+
import {getTableLength} from '../tables/table-accessors';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Assembles all batches from an async iterator into a single table.
|
|
17
|
+
* @note All batches must have the same shape and schema
|
|
18
|
+
* @param batchIterator
|
|
19
|
+
* @returns `null` if no batches are yielded by the async iterator
|
|
20
|
+
*/
|
|
21
|
+
// eslint-disable-next-line complexity
|
|
22
|
+
export async function makeTableFromBatches(
|
|
23
|
+
batchIterator: AsyncIterable<TableBatch> | Iterable<TableBatch>
|
|
24
|
+
): Promise<Table | null> {
|
|
25
|
+
let arrayRows: ArrayRowTable['data'];
|
|
26
|
+
let objectRows: ObjectRowTable['data'];
|
|
27
|
+
let features: Feature[];
|
|
28
|
+
let shape: Table['shape'] | null = null;
|
|
29
|
+
let schema: Schema | undefined;
|
|
30
|
+
|
|
31
|
+
for await (const batch of batchIterator) {
|
|
32
|
+
shape = shape || batch.shape;
|
|
33
|
+
schema = schema || batch.schema;
|
|
34
|
+
|
|
35
|
+
switch (batch.shape) {
|
|
36
|
+
case 'array-row-table':
|
|
37
|
+
arrayRows = arrayRows! || [];
|
|
38
|
+
for (let rowIndex = 0; rowIndex < getTableLength(batch); rowIndex++) {
|
|
39
|
+
const row = batch.data[rowIndex];
|
|
40
|
+
arrayRows.push(row);
|
|
41
|
+
}
|
|
42
|
+
break;
|
|
43
|
+
|
|
44
|
+
case 'object-row-table':
|
|
45
|
+
objectRows = objectRows! || [];
|
|
46
|
+
for (let rowIndex = 0; rowIndex < getTableLength(batch); rowIndex++) {
|
|
47
|
+
const row = batch.data[rowIndex];
|
|
48
|
+
objectRows.push(row);
|
|
49
|
+
}
|
|
50
|
+
break;
|
|
51
|
+
|
|
52
|
+
case 'geojson-table':
|
|
53
|
+
features = features! || [];
|
|
54
|
+
for (let rowIndex = 0; rowIndex < getTableLength(batch); rowIndex++) {
|
|
55
|
+
const row = batch.features[rowIndex];
|
|
56
|
+
features.push(row);
|
|
57
|
+
}
|
|
58
|
+
break;
|
|
59
|
+
|
|
60
|
+
case 'columnar-table':
|
|
61
|
+
case 'arrow-table':
|
|
62
|
+
default:
|
|
63
|
+
throw new Error('shape');
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (!shape) {
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
switch (shape) {
|
|
72
|
+
case 'array-row-table':
|
|
73
|
+
return {shape: 'array-row-table', data: arrayRows!, schema};
|
|
74
|
+
|
|
75
|
+
case 'object-row-table':
|
|
76
|
+
return {shape: 'object-row-table', data: objectRows!, schema};
|
|
77
|
+
|
|
78
|
+
case 'geojson-table':
|
|
79
|
+
return {shape: 'geojson-table', type: 'FeatureCollection', features: features!, schema};
|
|
80
|
+
|
|
81
|
+
default:
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
}
|