npm - @loaders.gl/shapefile - Versions diffs - 4.3.2 → 4.4.0-alpha.1 - Mend

@loaders.gl/shapefile 4.3.2 → 4.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/dist/dbf-arrow-loader.d.ts +52 -0
package/dist/dbf-arrow-loader.d.ts.map +1 -0
package/dist/dbf-arrow-loader.js +32 -0
package/dist/dbf-format.d.ts +10 -0
package/dist/dbf-format.d.ts.map +1 -0
package/dist/dbf-format.js +12 -0
package/dist/dbf-loader.js +1 -1
package/dist/dbf-worker.js +1 -1
package/dist/dist.dev.js +12231 -33
package/dist/dist.min.js +11 -2
package/dist/index.cjs +277 -10
package/dist/index.cjs.map +4 -4
package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -0
package/dist/lib/parsers/parse-dbf-to-arrow.d.ts +26 -0
package/dist/lib/parsers/parse-dbf-to-arrow.d.ts.map +1 -0
package/dist/lib/parsers/parse-dbf-to-arrow.js +321 -0
package/dist/lib/parsers/parse-dbf.d.ts +1 -1
package/dist/lib/parsers/parse-dbf.d.ts.map +1 -1
package/dist/lib/parsers/parse-shapefile.js +2 -2
package/dist/lib/parsers/parse-shp-geometry.d.ts +1 -1
package/dist/lib/parsers/parse-shp-geometry.d.ts.map +1 -1
package/dist/lib/parsers/types.d.ts +1 -1
package/dist/lib/parsers/types.d.ts.map +1 -1
package/dist/shapefile-loader.d.ts.map +1 -1
package/dist/shapefile-loader.js +1 -1
package/dist/shp-loader.js +1 -1
package/dist/shp-worker.js +1 -1
package/package.json +6 -6
package/src/dbf-arrow-loader.ts +46 -0
package/src/dbf-format.ts +15 -0
package/src/index.ts +1 -0
package/src/lib/parsers/parse-dbf-to-arrow.ts +382 -0
package/src/lib/parsers/parse-dbf.ts +1 -1
package/src/lib/parsers/parse-shapefile.ts +2 -2
package/src/lib/parsers/parse-shp-geometry.ts +1 -1
package/src/lib/parsers/types.ts +1 -1
package/src/shapefile-loader.ts +1 -1

package/src/lib/parsers/parse-dbf-to-arrow.ts ADDED Viewed

@@ -0,0 +1,382 @@
+// loaders.gl
+// SPDX-License-Identifier: MIT
+// Copyright (c) vis.gl contributors
+import type {Schema, Field, ArrowTable, ArrowTableBatch} from '@loaders.gl/schema';
+import {ArrowTableBuilder} from '@loaders.gl/schema-utils';
+import {BinaryChunkReader} from '../streaming/binary-chunk-reader';
+import {DBFLoaderOptions, DBFHeader, DBFField} from './types';
+export type DBFResult = {
+  tableBuilder?: ArrowTableBuilder;
+  error?: string;
+  dbfHeader?: DBFHeader;
+  dbfFields?: DBFField[];
+  progress?: {
+    bytesUsed: number;
+    rowsTotal: number;
+    rows: number;
+  };
+};
+const LITTLE_ENDIAN = true;
+const DBF_HEADER_SIZE = 32;
+enum STATE {
+  START = 0, // Expecting header
+  FIELD_DESCRIPTORS = 1,
+  FIELD_PROPERTIES = 2,
+  END = 3,
+  ERROR = 4
+}
+class DBFParser {
+  binaryReader = new BinaryChunkReader();
+  textDecoder: TextDecoder;
+  state = STATE.START;
+  result: DBFResult = {};
+  constructor(options: {encoding: string}) {
+    this.textDecoder = new TextDecoder(options.encoding);
+  }
+  /**
+   * @param arrayBuffer
+   */
+  write(arrayBuffer: ArrayBuffer): void {
+    this.binaryReader.write(arrayBuffer);
+    this.state = parseState(this.state, this.result, this.binaryReader, this.textDecoder);
+    // this.result.progress.bytesUsed = this.binaryReader.bytesUsed();
+    // important events:
+    // - schema available
+    // - first rows available
+    // - all rows available
+  }
+  end(): void {
+    this.binaryReader.end();
+    this.state = parseState(this.state, this.result, this.binaryReader, this.textDecoder);
+    // this.result.progress.bytesUsed = this.binaryReader.bytesUsed();
+    if (this.state !== STATE.END) {
+      this.state = STATE.ERROR;
+      this.result.error = 'DBF incomplete file';
+    }
+  }
+}
+/**
+ * @param arrayBuffer
+ * @param options
+ * @returns DBFTable or rows
+ */
+export function parseDBF(arrayBuffer: ArrayBuffer, options: DBFLoaderOptions = {}): ArrowTable {
+  const {encoding = 'latin1'} = options.dbf || {};
+  const dbfParser = new DBFParser({encoding});
+  dbfParser.write(arrayBuffer);
+  dbfParser.end();
+  const tableBuilder = dbfParser.result.tableBuilder!;
+  const arrowTable = tableBuilder.finishTable();
+  return arrowTable;
+}
+/**
+ * @param asyncIterator
+ * @param options
+ */
+export async function* parseDBFInBatches(
+  asyncIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>,
+  options: DBFLoaderOptions = {}
+): AsyncIterable<ArrowTableBatch> {
+  const {encoding = 'latin1'} = options.dbf || {};
+  const parser = new DBFParser({encoding});
+  let headerReturned = false;
+  for await (const arrayBuffer of asyncIterator) {
+    parser.write(arrayBuffer);
+    if (!headerReturned && parser.result.dbfHeader) {
+      headerReturned = true;
+      const tableBuilder = parser.result.tableBuilder!;
+      const tableBatch = tableBuilder.firstBatch();
+      if (tableBatch) {
+        yield tableBatch;
+      }
+    }
+    const tableBuilder = parser.result.tableBuilder!;
+    const tableBatch = tableBuilder.flushBatch();
+    if (tableBatch) {
+      yield tableBatch;
+    }
+  }
+  parser.end();
+  const tableBuilder = parser.result.tableBuilder!;
+  const tableBatch = tableBuilder.finishBatch();
+  if (tableBatch) {
+    yield tableBatch;
+  }
+}
+/**
+ * https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
+ * @param state
+ * @param result
+ * @param binaryReader
+ * @param textDecoder
+ * @returns
+ */
+/* eslint-disable complexity, max-depth */
+function parseState(
+  state: STATE,
+  result: DBFResult,
+  binaryReader: BinaryChunkReader,
+  textDecoder: TextDecoder
+): STATE {
+  // eslint-disable-next-line no-constant-condition
+  while (true) {
+    try {
+      switch (state) {
+        case STATE.ERROR:
+        case STATE.END:
+          return state;
+        case STATE.START:
+          // Parse initial file header
+          // DBF Header
+          const dataView = binaryReader.getDataView(DBF_HEADER_SIZE);
+          if (!dataView) {
+            return state;
+          }
+          result.dbfHeader = parseDBFHeader(dataView);
+          result.progress = {
+            bytesUsed: 0,
+            rowsTotal: result.dbfHeader.nRecords,
+            rows: 0
+          };
+          state = STATE.FIELD_DESCRIPTORS;
+          break;
+        case STATE.FIELD_DESCRIPTORS:
+          // Parse DBF field descriptors (schema)
+          const fieldDescriptorView = binaryReader.getDataView(
+            // @ts-ignore
+            result.dbfHeader.headerLength - DBF_HEADER_SIZE
+          );
+          if (!fieldDescriptorView) {
+            return state;
+          }
+          result.dbfFields = parseFieldDescriptors(fieldDescriptorView, textDecoder);
+          const schema = {
+            fields: result.dbfFields.map((dbfField) => makeField(dbfField)),
+            metadata: {}
+          } as const satisfies Schema;
+          result.tableBuilder = new ArrowTableBuilder(schema);
+          state = STATE.FIELD_PROPERTIES;
+          // TODO(kyle) Not exactly sure why start offset needs to be headerLength + 1?
+          // parsedbf uses ((fields.length + 1) << 5) + 2;
+          binaryReader.skip(1);
+          break;
+        case STATE.FIELD_PROPERTIES:
+          const {recordLength = 0, nRecords = 0} = result?.dbfHeader || {};
+          let rowCount = 0;
+          while (rowCount < nRecords) {
+            rowCount++;
+            const recordView = binaryReader.getDataView(recordLength - 1);
+            if (!recordView) {
+              return state;
+            }
+            // Note: Avoid actually reading the last byte, which may not be present
+            binaryReader.skip(1);
+            // @ts-ignore
+            const row = parseRow(recordView, result.dbfFields, textDecoder);
+            result.tableBuilder!.addObjectRow(row);
+            // result.progress.rows = result.data.length;
+          }
+          state = STATE.END;
+          break;
+        default:
+          state = STATE.ERROR;
+          result.error = `illegal parser state ${state}`;
+          return state;
+      }
+    } catch (error) {
+      state = STATE.ERROR;
+      result.error = `DBF parsing failed: ${(error as Error).message}`;
+      return state;
+    }
+  }
+}
+/**
+ * @param headerView
+ */
+function parseDBFHeader(headerView: DataView): DBFHeader {
+  return {
+    // Last updated date
+    year: headerView.getUint8(1) + 1900,
+    month: headerView.getUint8(2),
+    day: headerView.getUint8(3),
+    // Number of records in data file
+    nRecords: headerView.getUint32(4, LITTLE_ENDIAN),
+    // Length of header in bytes
+    headerLength: headerView.getUint16(8, LITTLE_ENDIAN),
+    // Length of each record
+    recordLength: headerView.getUint16(10, LITTLE_ENDIAN),
+    // Not sure if this is usually set
+    languageDriver: headerView.getUint8(29)
+  };
+}
+/**
+ * @param view
+ */
+function parseFieldDescriptors(view: DataView, textDecoder: TextDecoder): DBFField[] {
+  // NOTE: this might overestimate the number of fields if the "Database
+  // Container" container exists and is included in the headerLength
+  const nFields = (view.byteLength - 1) / 32;
+  const fields: DBFField[] = [];
+  let offset = 0;
+  for (let i = 0; i < nFields; i++) {
+    const name = textDecoder
+      .decode(new Uint8Array(view.buffer, view.byteOffset + offset, 11))
+      // eslint-disable-next-line no-control-regex
+      .replace(/\u0000/g, '');
+    fields.push({
+      name,
+      dataType: String.fromCharCode(view.getUint8(offset + 11)),
+      fieldLength: view.getUint8(offset + 16),
+      decimal: view.getUint8(offset + 17)
+    });
+    offset += 32;
+  }
+  return fields;
+}
+/**
+ *
+ * @param view
+ * @param fields
+ * @param textDecoder
+ * @returns
+ */
+function parseRow(
+  view: DataView,
+  fields: DBFField[],
+  textDecoder: TextDecoder
+): {[key: string]: any} {
+  const out: {[key: string]: string | number | boolean | null} = {};
+  let offset = 0;
+  for (const field of fields) {
+    const text = textDecoder.decode(
+      new Uint8Array(view.buffer, view.byteOffset + offset, field.fieldLength)
+    );
+    out[field.name] = parseField(text, field.dataType);
+    offset += field.fieldLength;
+  }
+  return out;
+}
+/**
+ * Should NaN be coerced to null?
+ * @param text
+ * @param dataType
+ * @returns Field depends on a type of the data
+ */
+function parseField(text: string, dataType: string): string | number | boolean | null {
+  switch (dataType) {
+    case 'B':
+      return parseNumber(text);
+    case 'C':
+      return parseCharacter(text);
+    case 'F':
+      return parseNumber(text);
+    case 'N':
+      return parseNumber(text);
+    case 'O':
+      return parseNumber(text);
+    case 'D':
+      return parseDate(text);
+    case 'L':
+      return parseBoolean(text);
+    default:
+      throw new Error('Unsupported data type');
+  }
+}
+/**
+ * Parse YYYYMMDD to date in milliseconds
+ * @param str YYYYMMDD
+ * @returns new Date as a number
+ */
+function parseDate(str: any): number {
+  return Date.UTC(str.slice(0, 4), parseInt(str.slice(4, 6), 10) - 1, str.slice(6, 8));
+}
+/**
+ * Read boolean value
+ * any of Y, y, T, t coerce to true
+ * any of N, n, F, f coerce to false
+ * otherwise null
+ * @param value
+ * @returns boolean | null
+ */
+function parseBoolean(value: string): boolean | null {
+  return /^[nf]$/i.test(value) ? false : /^[yt]$/i.test(value) ? true : null;
+}
+/**
+ * Return null instead of NaN
+ * @param text
+ * @returns number | null
+ */
+function parseNumber(text: string): number | null {
+  const number = parseFloat(text);
+  return isNaN(number) ? null : number;
+}
+/**
+ *
+ * @param text
+ * @returns string | null
+ */
+function parseCharacter(text: string): string | null {
+  return text.trim() || null;
+}
+/**
+ * Create a standard Arrow-style `Field` from field descriptor.
+ * TODO - use `fieldLength` and `decimal` to generate smaller types?
+ * @param param0
+ * @returns Field
+ */
+// eslint-disable
+function makeField({name, dataType, fieldLength, decimal}: DBFField): Field {
+  switch (dataType) {
+    case 'B':
+      return {name, type: 'float64', nullable: true, metadata: {}};
+    case 'C':
+      return {name, type: 'utf8', nullable: true, metadata: {}};
+    case 'F':
+      return {name, type: 'float64', nullable: true, metadata: {}};
+    case 'N':
+      return {name, type: 'float64', nullable: true, metadata: {}};
+    case 'O':
+      return {name, type: 'float64', nullable: true, metadata: {}};
+    case 'D':
+      return {name, type: 'timestamp-millisecond', nullable: true, metadata: {}};
+    case 'L':
+      return {name, type: 'bool', nullable: true, metadata: {}};
+    default:
+      throw new Error('Unsupported data type');
+  }
+}

package/src/lib/parsers/parse-dbf.ts CHANGED Viewed

@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: MIT
 // Copyright (c) vis.gl contributors
-import {Field, ObjectRowTable} from '@loaders.gl/schema';
+import type {Field, ObjectRowTable} from '@loaders.gl/schema';
 import {BinaryChunkReader} from '../streaming/binary-chunk-reader';
 import {
   DBFLoaderOptions,

package/src/lib/parsers/parse-shapefile.ts CHANGED Viewed

@@ -4,7 +4,7 @@
 // import type {Feature} from '@loaders.gl/gis';
 import {LoaderContext, parseInBatchesFromContext, parseFromContext} from '@loaders.gl/loader-utils';
-import {binaryToGeometry, transformGeoJsonCoords} from '@loaders.gl/gis';
+import {convertBinaryGeometryToGeometry, transformGeoJsonCoords} from '@loaders.gl/gis';
 import type {
   BinaryGeometry,
   Geometry,
@@ -193,7 +193,7 @@ export async function parseShapefile(
 function parseGeometries(geometries: BinaryGeometry[]): Geometry[] {
   const geojsonGeometries: any[] = [];
   for (const geom of geometries) {
-    geojsonGeometries.push(binaryToGeometry(geom));
+    geojsonGeometries.push(convertBinaryGeometryToGeometry(geom));
   }
   return geojsonGeometries;
 }

package/src/lib/parsers/parse-shp-geometry.ts CHANGED Viewed

@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: MIT
 // Copyright (c) vis.gl contributors
-import {BinaryGeometry, BinaryGeometryType} from '@loaders.gl/schema';
+import type {BinaryGeometry, BinaryGeometryType} from '@loaders.gl/schema';
 import {SHPLoaderOptions} from './types';
 const LITTLE_ENDIAN = true;

package/src/lib/parsers/types.ts CHANGED Viewed

@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: MIT
 // Copyright (c) vis.gl contributors
-import {Schema, ObjectRowTable} from '@loaders.gl/schema';
+import type {Schema, ObjectRowTable} from '@loaders.gl/schema';
 import type {LoaderOptions} from '@loaders.gl/loader-utils';
 export type SHPLoaderOptions = LoaderOptions & {

package/src/shapefile-loader.ts CHANGED Viewed

@@ -3,9 +3,9 @@
 // Copyright (c) vis.gl contributors
 import type {LoaderOptions, LoaderWithParser} from '@loaders.gl/loader-utils';
+import type {Batch, GeoJSONTable} from '@loaders.gl/schema';
 import {SHP_MAGIC_NUMBER} from './shp-loader';
 import {parseShapefile, parseShapefileInBatches} from './lib/parsers/parse-shapefile';
-import {Batch, GeoJSONTable} from '@loaders.gl/schema';
 // __VERSION__ is injected by babel-plugin-version-inline
 // @ts-ignore TS2304: Cannot find name '__VERSION__'.