@loaders.gl/shapefile 4.3.2 → 4.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/dbf-arrow-loader.d.ts +52 -0
  2. package/dist/dbf-arrow-loader.d.ts.map +1 -0
  3. package/dist/dbf-arrow-loader.js +32 -0
  4. package/dist/dbf-format.d.ts +10 -0
  5. package/dist/dbf-format.d.ts.map +1 -0
  6. package/dist/dbf-format.js +12 -0
  7. package/dist/dbf-loader.js +1 -1
  8. package/dist/dbf-worker.js +1 -1
  9. package/dist/dist.dev.js +12231 -33
  10. package/dist/dist.min.js +11 -2
  11. package/dist/index.cjs +277 -10
  12. package/dist/index.cjs.map +4 -4
  13. package/dist/index.d.ts +1 -0
  14. package/dist/index.d.ts.map +1 -1
  15. package/dist/index.js +1 -0
  16. package/dist/lib/parsers/parse-dbf-to-arrow.d.ts +26 -0
  17. package/dist/lib/parsers/parse-dbf-to-arrow.d.ts.map +1 -0
  18. package/dist/lib/parsers/parse-dbf-to-arrow.js +321 -0
  19. package/dist/lib/parsers/parse-dbf.d.ts +1 -1
  20. package/dist/lib/parsers/parse-dbf.d.ts.map +1 -1
  21. package/dist/lib/parsers/parse-shapefile.js +2 -2
  22. package/dist/lib/parsers/parse-shp-geometry.d.ts +1 -1
  23. package/dist/lib/parsers/parse-shp-geometry.d.ts.map +1 -1
  24. package/dist/lib/parsers/types.d.ts +1 -1
  25. package/dist/lib/parsers/types.d.ts.map +1 -1
  26. package/dist/shapefile-loader.d.ts.map +1 -1
  27. package/dist/shapefile-loader.js +1 -1
  28. package/dist/shp-loader.js +1 -1
  29. package/dist/shp-worker.js +1 -1
  30. package/package.json +6 -6
  31. package/src/dbf-arrow-loader.ts +46 -0
  32. package/src/dbf-format.ts +15 -0
  33. package/src/index.ts +1 -0
  34. package/src/lib/parsers/parse-dbf-to-arrow.ts +382 -0
  35. package/src/lib/parsers/parse-dbf.ts +1 -1
  36. package/src/lib/parsers/parse-shapefile.ts +2 -2
  37. package/src/lib/parsers/parse-shp-geometry.ts +1 -1
  38. package/src/lib/parsers/types.ts +1 -1
  39. package/src/shapefile-loader.ts +1 -1
@@ -0,0 +1,382 @@
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
3
+ // Copyright (c) vis.gl contributors
4
+
5
+ import type {Schema, Field, ArrowTable, ArrowTableBatch} from '@loaders.gl/schema';
6
+ import {ArrowTableBuilder} from '@loaders.gl/schema-utils';
7
+ import {BinaryChunkReader} from '../streaming/binary-chunk-reader';
8
+ import {DBFLoaderOptions, DBFHeader, DBFField} from './types';
9
+
10
+ export type DBFResult = {
11
+ tableBuilder?: ArrowTableBuilder;
12
+ error?: string;
13
+ dbfHeader?: DBFHeader;
14
+ dbfFields?: DBFField[];
15
+ progress?: {
16
+ bytesUsed: number;
17
+ rowsTotal: number;
18
+ rows: number;
19
+ };
20
+ };
21
+
22
+ const LITTLE_ENDIAN = true;
23
+ const DBF_HEADER_SIZE = 32;
24
+
25
+ enum STATE {
26
+ START = 0, // Expecting header
27
+ FIELD_DESCRIPTORS = 1,
28
+ FIELD_PROPERTIES = 2,
29
+ END = 3,
30
+ ERROR = 4
31
+ }
32
+
33
+ class DBFParser {
34
+ binaryReader = new BinaryChunkReader();
35
+ textDecoder: TextDecoder;
36
+ state = STATE.START;
37
+ result: DBFResult = {};
38
+
39
+ constructor(options: {encoding: string}) {
40
+ this.textDecoder = new TextDecoder(options.encoding);
41
+ }
42
+
43
+ /**
44
+ * @param arrayBuffer
45
+ */
46
+ write(arrayBuffer: ArrayBuffer): void {
47
+ this.binaryReader.write(arrayBuffer);
48
+ this.state = parseState(this.state, this.result, this.binaryReader, this.textDecoder);
49
+ // this.result.progress.bytesUsed = this.binaryReader.bytesUsed();
50
+
51
+ // important events:
52
+ // - schema available
53
+ // - first rows available
54
+ // - all rows available
55
+ }
56
+
57
+ end(): void {
58
+ this.binaryReader.end();
59
+ this.state = parseState(this.state, this.result, this.binaryReader, this.textDecoder);
60
+ // this.result.progress.bytesUsed = this.binaryReader.bytesUsed();
61
+ if (this.state !== STATE.END) {
62
+ this.state = STATE.ERROR;
63
+ this.result.error = 'DBF incomplete file';
64
+ }
65
+ }
66
+ }
67
+
68
+ /**
69
+ * @param arrayBuffer
70
+ * @param options
71
+ * @returns DBFTable or rows
72
+ */
73
+ export function parseDBF(arrayBuffer: ArrayBuffer, options: DBFLoaderOptions = {}): ArrowTable {
74
+ const {encoding = 'latin1'} = options.dbf || {};
75
+
76
+ const dbfParser = new DBFParser({encoding});
77
+ dbfParser.write(arrayBuffer);
78
+ dbfParser.end();
79
+
80
+ const tableBuilder = dbfParser.result.tableBuilder!;
81
+ const arrowTable = tableBuilder.finishTable();
82
+ return arrowTable;
83
+ }
84
+
85
+ /**
86
+ * @param asyncIterator
87
+ * @param options
88
+ */
89
+ export async function* parseDBFInBatches(
90
+ asyncIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>,
91
+ options: DBFLoaderOptions = {}
92
+ ): AsyncIterable<ArrowTableBatch> {
93
+ const {encoding = 'latin1'} = options.dbf || {};
94
+
95
+ const parser = new DBFParser({encoding});
96
+ let headerReturned = false;
97
+ for await (const arrayBuffer of asyncIterator) {
98
+ parser.write(arrayBuffer);
99
+ if (!headerReturned && parser.result.dbfHeader) {
100
+ headerReturned = true;
101
+ const tableBuilder = parser.result.tableBuilder!;
102
+ const tableBatch = tableBuilder.firstBatch();
103
+ if (tableBatch) {
104
+ yield tableBatch;
105
+ }
106
+ }
107
+ const tableBuilder = parser.result.tableBuilder!;
108
+ const tableBatch = tableBuilder.flushBatch();
109
+ if (tableBatch) {
110
+ yield tableBatch;
111
+ }
112
+ }
113
+ parser.end();
114
+ const tableBuilder = parser.result.tableBuilder!;
115
+ const tableBatch = tableBuilder.finishBatch();
116
+ if (tableBatch) {
117
+ yield tableBatch;
118
+ }
119
+ }
120
+
121
+ /**
122
+ * https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
123
+ * @param state
124
+ * @param result
125
+ * @param binaryReader
126
+ * @param textDecoder
127
+ * @returns
128
+ */
129
+ /* eslint-disable complexity, max-depth */
130
+ function parseState(
131
+ state: STATE,
132
+ result: DBFResult,
133
+ binaryReader: BinaryChunkReader,
134
+ textDecoder: TextDecoder
135
+ ): STATE {
136
+ // eslint-disable-next-line no-constant-condition
137
+ while (true) {
138
+ try {
139
+ switch (state) {
140
+ case STATE.ERROR:
141
+ case STATE.END:
142
+ return state;
143
+
144
+ case STATE.START:
145
+ // Parse initial file header
146
+ // DBF Header
147
+ const dataView = binaryReader.getDataView(DBF_HEADER_SIZE);
148
+ if (!dataView) {
149
+ return state;
150
+ }
151
+ result.dbfHeader = parseDBFHeader(dataView);
152
+ result.progress = {
153
+ bytesUsed: 0,
154
+ rowsTotal: result.dbfHeader.nRecords,
155
+ rows: 0
156
+ };
157
+ state = STATE.FIELD_DESCRIPTORS;
158
+ break;
159
+
160
+ case STATE.FIELD_DESCRIPTORS:
161
+ // Parse DBF field descriptors (schema)
162
+ const fieldDescriptorView = binaryReader.getDataView(
163
+ // @ts-ignore
164
+ result.dbfHeader.headerLength - DBF_HEADER_SIZE
165
+ );
166
+ if (!fieldDescriptorView) {
167
+ return state;
168
+ }
169
+
170
+ result.dbfFields = parseFieldDescriptors(fieldDescriptorView, textDecoder);
171
+ const schema = {
172
+ fields: result.dbfFields.map((dbfField) => makeField(dbfField)),
173
+ metadata: {}
174
+ } as const satisfies Schema;
175
+ result.tableBuilder = new ArrowTableBuilder(schema);
176
+
177
+ state = STATE.FIELD_PROPERTIES;
178
+
179
+ // TODO(kyle) Not exactly sure why start offset needs to be headerLength + 1?
180
+ // parsedbf uses ((fields.length + 1) << 5) + 2;
181
+ binaryReader.skip(1);
182
+ break;
183
+
184
+ case STATE.FIELD_PROPERTIES:
185
+ const {recordLength = 0, nRecords = 0} = result?.dbfHeader || {};
186
+ let rowCount = 0;
187
+ while (rowCount < nRecords) {
188
+ rowCount++;
189
+
190
+ const recordView = binaryReader.getDataView(recordLength - 1);
191
+ if (!recordView) {
192
+ return state;
193
+ }
194
+ // Note: Avoid actually reading the last byte, which may not be present
195
+ binaryReader.skip(1);
196
+
197
+ // @ts-ignore
198
+ const row = parseRow(recordView, result.dbfFields, textDecoder);
199
+ result.tableBuilder!.addObjectRow(row);
200
+ // result.progress.rows = result.data.length;
201
+ }
202
+ state = STATE.END;
203
+ break;
204
+
205
+ default:
206
+ state = STATE.ERROR;
207
+ result.error = `illegal parser state ${state}`;
208
+ return state;
209
+ }
210
+ } catch (error) {
211
+ state = STATE.ERROR;
212
+ result.error = `DBF parsing failed: ${(error as Error).message}`;
213
+ return state;
214
+ }
215
+ }
216
+ }
217
+
218
+ /**
219
+ * @param headerView
220
+ */
221
+ function parseDBFHeader(headerView: DataView): DBFHeader {
222
+ return {
223
+ // Last updated date
224
+ year: headerView.getUint8(1) + 1900,
225
+ month: headerView.getUint8(2),
226
+ day: headerView.getUint8(3),
227
+ // Number of records in data file
228
+ nRecords: headerView.getUint32(4, LITTLE_ENDIAN),
229
+ // Length of header in bytes
230
+ headerLength: headerView.getUint16(8, LITTLE_ENDIAN),
231
+ // Length of each record
232
+ recordLength: headerView.getUint16(10, LITTLE_ENDIAN),
233
+ // Not sure if this is usually set
234
+ languageDriver: headerView.getUint8(29)
235
+ };
236
+ }
237
+
238
+ /**
239
+ * @param view
240
+ */
241
+ function parseFieldDescriptors(view: DataView, textDecoder: TextDecoder): DBFField[] {
242
+ // NOTE: this might overestimate the number of fields if the "Database
243
+ // Container" container exists and is included in the headerLength
244
+ const nFields = (view.byteLength - 1) / 32;
245
+ const fields: DBFField[] = [];
246
+ let offset = 0;
247
+ for (let i = 0; i < nFields; i++) {
248
+ const name = textDecoder
249
+ .decode(new Uint8Array(view.buffer, view.byteOffset + offset, 11))
250
+ // eslint-disable-next-line no-control-regex
251
+ .replace(/\u0000/g, '');
252
+
253
+ fields.push({
254
+ name,
255
+ dataType: String.fromCharCode(view.getUint8(offset + 11)),
256
+ fieldLength: view.getUint8(offset + 16),
257
+ decimal: view.getUint8(offset + 17)
258
+ });
259
+ offset += 32;
260
+ }
261
+ return fields;
262
+ }
263
+
264
+ /**
265
+ *
266
+ * @param view
267
+ * @param fields
268
+ * @param textDecoder
269
+ * @returns
270
+ */
271
+ function parseRow(
272
+ view: DataView,
273
+ fields: DBFField[],
274
+ textDecoder: TextDecoder
275
+ ): {[key: string]: any} {
276
+ const out: {[key: string]: string | number | boolean | null} = {};
277
+ let offset = 0;
278
+ for (const field of fields) {
279
+ const text = textDecoder.decode(
280
+ new Uint8Array(view.buffer, view.byteOffset + offset, field.fieldLength)
281
+ );
282
+ out[field.name] = parseField(text, field.dataType);
283
+ offset += field.fieldLength;
284
+ }
285
+
286
+ return out;
287
+ }
288
+
289
+ /**
290
+ * Should NaN be coerced to null?
291
+ * @param text
292
+ * @param dataType
293
+ * @returns Field depends on a type of the data
294
+ */
295
+ function parseField(text: string, dataType: string): string | number | boolean | null {
296
+ switch (dataType) {
297
+ case 'B':
298
+ return parseNumber(text);
299
+ case 'C':
300
+ return parseCharacter(text);
301
+ case 'F':
302
+ return parseNumber(text);
303
+ case 'N':
304
+ return parseNumber(text);
305
+ case 'O':
306
+ return parseNumber(text);
307
+ case 'D':
308
+ return parseDate(text);
309
+ case 'L':
310
+ return parseBoolean(text);
311
+ default:
312
+ throw new Error('Unsupported data type');
313
+ }
314
+ }
315
+
316
+ /**
317
+ * Parse YYYYMMDD to date in milliseconds
318
+ * @param str YYYYMMDD
319
+ * @returns new Date as a number
320
+ */
321
+ function parseDate(str: any): number {
322
+ return Date.UTC(str.slice(0, 4), parseInt(str.slice(4, 6), 10) - 1, str.slice(6, 8));
323
+ }
324
+
325
+ /**
326
+ * Read boolean value
327
+ * any of Y, y, T, t coerce to true
328
+ * any of N, n, F, f coerce to false
329
+ * otherwise null
330
+ * @param value
331
+ * @returns boolean | null
332
+ */
333
+ function parseBoolean(value: string): boolean | null {
334
+ return /^[nf]$/i.test(value) ? false : /^[yt]$/i.test(value) ? true : null;
335
+ }
336
+
337
+ /**
338
+ * Return null instead of NaN
339
+ * @param text
340
+ * @returns number | null
341
+ */
342
+ function parseNumber(text: string): number | null {
343
+ const number = parseFloat(text);
344
+ return isNaN(number) ? null : number;
345
+ }
346
+
347
+ /**
348
+ *
349
+ * @param text
350
+ * @returns string | null
351
+ */
352
+ function parseCharacter(text: string): string | null {
353
+ return text.trim() || null;
354
+ }
355
+
356
+ /**
357
+ * Create a standard Arrow-style `Field` from field descriptor.
358
+ * TODO - use `fieldLength` and `decimal` to generate smaller types?
359
+ * @param param0
360
+ * @returns Field
361
+ */
362
+ // eslint-disable
363
+ function makeField({name, dataType, fieldLength, decimal}: DBFField): Field {
364
+ switch (dataType) {
365
+ case 'B':
366
+ return {name, type: 'float64', nullable: true, metadata: {}};
367
+ case 'C':
368
+ return {name, type: 'utf8', nullable: true, metadata: {}};
369
+ case 'F':
370
+ return {name, type: 'float64', nullable: true, metadata: {}};
371
+ case 'N':
372
+ return {name, type: 'float64', nullable: true, metadata: {}};
373
+ case 'O':
374
+ return {name, type: 'float64', nullable: true, metadata: {}};
375
+ case 'D':
376
+ return {name, type: 'timestamp-millisecond', nullable: true, metadata: {}};
377
+ case 'L':
378
+ return {name, type: 'bool', nullable: true, metadata: {}};
379
+ default:
380
+ throw new Error('Unsupported data type');
381
+ }
382
+ }
@@ -2,7 +2,7 @@
2
2
  // SPDX-License-Identifier: MIT
3
3
  // Copyright (c) vis.gl contributors
4
4
 
5
- import {Field, ObjectRowTable} from '@loaders.gl/schema';
5
+ import type {Field, ObjectRowTable} from '@loaders.gl/schema';
6
6
  import {BinaryChunkReader} from '../streaming/binary-chunk-reader';
7
7
  import {
8
8
  DBFLoaderOptions,
@@ -4,7 +4,7 @@
4
4
 
5
5
  // import type {Feature} from '@loaders.gl/gis';
6
6
  import {LoaderContext, parseInBatchesFromContext, parseFromContext} from '@loaders.gl/loader-utils';
7
- import {binaryToGeometry, transformGeoJsonCoords} from '@loaders.gl/gis';
7
+ import {convertBinaryGeometryToGeometry, transformGeoJsonCoords} from '@loaders.gl/gis';
8
8
  import type {
9
9
  BinaryGeometry,
10
10
  Geometry,
@@ -193,7 +193,7 @@ export async function parseShapefile(
193
193
  function parseGeometries(geometries: BinaryGeometry[]): Geometry[] {
194
194
  const geojsonGeometries: any[] = [];
195
195
  for (const geom of geometries) {
196
- geojsonGeometries.push(binaryToGeometry(geom));
196
+ geojsonGeometries.push(convertBinaryGeometryToGeometry(geom));
197
197
  }
198
198
  return geojsonGeometries;
199
199
  }
@@ -2,7 +2,7 @@
2
2
  // SPDX-License-Identifier: MIT
3
3
  // Copyright (c) vis.gl contributors
4
4
 
5
- import {BinaryGeometry, BinaryGeometryType} from '@loaders.gl/schema';
5
+ import type {BinaryGeometry, BinaryGeometryType} from '@loaders.gl/schema';
6
6
  import {SHPLoaderOptions} from './types';
7
7
 
8
8
  const LITTLE_ENDIAN = true;
@@ -2,7 +2,7 @@
2
2
  // SPDX-License-Identifier: MIT
3
3
  // Copyright (c) vis.gl contributors
4
4
 
5
- import {Schema, ObjectRowTable} from '@loaders.gl/schema';
5
+ import type {Schema, ObjectRowTable} from '@loaders.gl/schema';
6
6
  import type {LoaderOptions} from '@loaders.gl/loader-utils';
7
7
 
8
8
  export type SHPLoaderOptions = LoaderOptions & {
@@ -3,9 +3,9 @@
3
3
  // Copyright (c) vis.gl contributors
4
4
 
5
5
  import type {LoaderOptions, LoaderWithParser} from '@loaders.gl/loader-utils';
6
+ import type {Batch, GeoJSONTable} from '@loaders.gl/schema';
6
7
  import {SHP_MAGIC_NUMBER} from './shp-loader';
7
8
  import {parseShapefile, parseShapefileInBatches} from './lib/parsers/parse-shapefile';
8
- import {Batch, GeoJSONTable} from '@loaders.gl/schema';
9
9
 
10
10
  // __VERSION__ is injected by babel-plugin-version-inline
11
11
  // @ts-ignore TS2304: Cannot find name '__VERSION__'.