@genome-spy/core 0.71.0 → 0.73.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/dist/bundle/index.es.js +6842 -5365
- package/dist/bundle/index.js +159 -140
- package/dist/bundle/parquetRead-BnAGCa4_.js +1663 -0
- package/dist/schema.json +281 -17
- package/dist/src/data/formats/bed.d.ts +8 -0
- package/dist/src/data/formats/bed.d.ts.map +1 -0
- package/dist/src/data/formats/bed.js +53 -0
- package/dist/src/data/formats/bedpe.d.ts +8 -0
- package/dist/src/data/formats/bedpe.d.ts.map +1 -0
- package/dist/src/data/formats/bedpe.js +160 -0
- package/dist/src/data/formats/parquet.d.ts +12 -0
- package/dist/src/data/formats/parquet.d.ts.map +1 -0
- package/dist/src/data/formats/parquet.js +29 -0
- package/dist/src/data/formats/parquetRead.d.ts +18 -0
- package/dist/src/data/formats/parquetRead.d.ts.map +1 -0
- package/dist/src/data/formats/parquetRead.js +326 -0
- package/dist/src/data/sources/dataUtils.d.ts +16 -0
- package/dist/src/data/sources/dataUtils.d.ts.map +1 -1
- package/dist/src/data/sources/dataUtils.js +53 -3
- package/dist/src/data/sources/urlSource.d.ts +4 -0
- package/dist/src/data/sources/urlSource.d.ts.map +1 -1
- package/dist/src/data/sources/urlSource.js +141 -17
- package/dist/src/encoder/encoder.d.ts +2 -2
- package/dist/src/fonts/bmFontManager.d.ts +1 -1
- package/dist/src/genome/assemblyPreflight.d.ts +31 -0
- package/dist/src/genome/assemblyPreflight.d.ts.map +1 -0
- package/dist/src/genome/assemblyPreflight.js +99 -0
- package/dist/src/genome/genome.d.ts +2 -2
- package/dist/src/genome/genome.d.ts.map +1 -1
- package/dist/src/genome/genome.js +4 -0
- package/dist/src/genome/genomeStore.d.ts +34 -3
- package/dist/src/genome/genomeStore.d.ts.map +1 -1
- package/dist/src/genome/genomeStore.js +409 -18
- package/dist/src/genome/rootGenomeConfig.d.ts +26 -0
- package/dist/src/genome/rootGenomeConfig.d.ts.map +1 -0
- package/dist/src/genome/rootGenomeConfig.js +94 -0
- package/dist/src/genomeSpy/interactionController.d.ts +5 -1
- package/dist/src/genomeSpy/interactionController.d.ts.map +1 -1
- package/dist/src/genomeSpy/interactionController.js +244 -29
- package/dist/src/genomeSpy/renderCoordinator.js +1 -1
- package/dist/src/genomeSpy.d.ts +13 -3
- package/dist/src/genomeSpy.d.ts.map +1 -1
- package/dist/src/genomeSpy.js +83 -7
- package/dist/src/gl/canvasSizeHelper.d.ts +74 -0
- package/dist/src/gl/canvasSizeHelper.d.ts.map +1 -0
- package/dist/src/gl/canvasSizeHelper.js +203 -0
- package/dist/src/gl/hashTable.d.ts +78 -0
- package/dist/src/gl/hashTable.d.ts.map +1 -0
- package/dist/src/gl/hashTable.js +164 -0
- package/dist/src/gl/includes/common.glsl.js +1 -1
- package/dist/src/gl/webGLHelper.d.ts +25 -11
- package/dist/src/gl/webGLHelper.d.ts.map +1 -1
- package/dist/src/gl/webGLHelper.js +71 -39
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +5 -2
- package/dist/src/marks/link.d.ts.map +1 -1
- package/dist/src/marks/link.js +5 -3
- package/dist/src/marks/mark.d.ts +1 -1
- package/dist/src/marks/mark.d.ts.map +1 -1
- package/dist/src/marks/mark.js +8 -4
- package/dist/src/scales/domainPlanner.d.ts +34 -3
- package/dist/src/scales/domainPlanner.d.ts.map +1 -1
- package/dist/src/scales/domainPlanner.js +247 -26
- package/dist/src/scales/scaleInstanceManager.d.ts +2 -1
- package/dist/src/scales/scaleInstanceManager.d.ts.map +1 -1
- package/dist/src/scales/scaleInstanceManager.js +10 -11
- package/dist/src/scales/scaleInteractionController.d.ts.map +1 -1
- package/dist/src/scales/scaleInteractionController.js +16 -14
- package/dist/src/scales/scaleResolution.d.ts +16 -0
- package/dist/src/scales/scaleResolution.d.ts.map +1 -1
- package/dist/src/scales/scaleResolution.js +314 -54
- package/dist/src/scales/scaleResolutionTestUtils.d.ts +21 -0
- package/dist/src/scales/scaleResolutionTestUtils.d.ts.map +1 -0
- package/dist/src/scales/scaleResolutionTestUtils.js +33 -0
- package/dist/src/scales/selectionDomainUtils.d.ts +22 -0
- package/dist/src/scales/selectionDomainUtils.d.ts.map +1 -0
- package/dist/src/scales/selectionDomainUtils.js +79 -0
- package/dist/src/scales/zoomDomainUtils.d.ts +18 -0
- package/dist/src/scales/zoomDomainUtils.d.ts.map +1 -0
- package/dist/src/scales/zoomDomainUtils.js +69 -0
- package/dist/src/screenshotHarness.d.ts +16 -0
- package/dist/src/screenshotHarness.d.ts.map +1 -0
- package/dist/src/screenshotHarness.js +242 -0
- package/dist/src/singlePageApp.js +1 -1
- package/dist/src/spec/data.d.ts +23 -3
- package/dist/src/spec/genome.d.ts +22 -2
- package/dist/src/spec/parameter.d.ts +39 -2
- package/dist/src/spec/root.d.ts +20 -1
- package/dist/src/spec/scale.d.ts +41 -5
- package/dist/src/styles/genome-spy.css +8 -0
- package/dist/src/styles/genome-spy.css.d.ts +1 -1
- package/dist/src/styles/genome-spy.css.d.ts.map +1 -1
- package/dist/src/styles/genome-spy.css.js +8 -0
- package/dist/src/tooltip/dataTooltipHandler.js +59 -10
- package/dist/src/types/embedApi.d.ts +19 -0
- package/dist/src/utils/inferSpecBaseUrl.d.ts +14 -0
- package/dist/src/utils/inferSpecBaseUrl.d.ts.map +1 -0
- package/dist/src/utils/inferSpecBaseUrl.js +73 -0
- package/dist/src/utils/interactionEvent.d.ts +53 -3
- package/dist/src/utils/interactionEvent.d.ts.map +1 -1
- package/dist/src/utils/interactionEvent.js +62 -1
- package/dist/src/utils/radixSort.d.ts.map +1 -1
- package/dist/src/utils/radixSort.js +26 -1
- package/dist/src/view/containerMutationHelper.d.ts.map +1 -1
- package/dist/src/view/containerMutationHelper.js +8 -0
- package/dist/src/view/dataReadiness.d.ts +2 -2
- package/dist/src/view/dataReadiness.d.ts.map +1 -1
- package/dist/src/view/dataReadiness.js +63 -58
- package/dist/src/view/facetView.d.ts +1 -1
- package/dist/src/view/facetView.js +1 -1
- package/dist/src/view/gridView/gridChild.d.ts +7 -0
- package/dist/src/view/gridView/gridChild.d.ts.map +1 -1
- package/dist/src/view/gridView/gridChild.js +180 -11
- package/dist/src/view/gridView/gridView.d.ts.map +1 -1
- package/dist/src/view/gridView/gridView.js +60 -17
- package/dist/src/view/unitView.d.ts +1 -1
- package/dist/src/view/zoom.d.ts +14 -2
- package/dist/src/view/zoom.d.ts.map +1 -1
- package/dist/src/view/zoom.js +373 -76
- package/package.json +5 -2
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
const blankLinePattern = /^\s*$/;
|
|
2
|
+
const controlLinePattern = /^\s*(?:browser\b|track\b|#)/;
|
|
3
|
+
|
|
4
|
+
const defaultColumns = [
|
|
5
|
+
"chrom1",
|
|
6
|
+
"start1",
|
|
7
|
+
"end1",
|
|
8
|
+
"chrom2",
|
|
9
|
+
"start2",
|
|
10
|
+
"end2",
|
|
11
|
+
"name",
|
|
12
|
+
"score",
|
|
13
|
+
"strand1",
|
|
14
|
+
"strand2",
|
|
15
|
+
];
|
|
16
|
+
|
|
17
|
+
const requiredColumns = defaultColumns.slice(0, 6);
|
|
18
|
+
|
|
19
|
+
const normalizeNone = (/** @type {string} */ value) => value;
|
|
20
|
+
const normalizeStringSentinel = (/** @type {string} */ value) =>
|
|
21
|
+
value == "." ? null : value;
|
|
22
|
+
const normalizeStrand = (/** @type {string} */ value) => {
|
|
23
|
+
if (value == "+") {
|
|
24
|
+
return 1;
|
|
25
|
+
}
|
|
26
|
+
if (value == "-") {
|
|
27
|
+
return -1;
|
|
28
|
+
}
|
|
29
|
+
return 0;
|
|
30
|
+
};
|
|
31
|
+
const normalizeCoordinate = (/** @type {string} */ value) => {
|
|
32
|
+
if (value == "." || value == "-1" || value == "") {
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
const parsed = Number(value);
|
|
36
|
+
return Number.isInteger(parsed) ? parsed : null;
|
|
37
|
+
};
|
|
38
|
+
const normalizeScore = (/** @type {string} */ value) => {
|
|
39
|
+
if (value == "." || value == "") {
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
const parsed = Number(value);
|
|
43
|
+
return Number.isNaN(parsed) ? value : parsed;
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
/** @type {Record<string, (value: string) => any>} */
|
|
47
|
+
const columnNormalizers = {
|
|
48
|
+
chrom1: normalizeStringSentinel,
|
|
49
|
+
chrom2: normalizeStringSentinel,
|
|
50
|
+
name: normalizeStringSentinel,
|
|
51
|
+
strand1: normalizeStrand,
|
|
52
|
+
strand2: normalizeStrand,
|
|
53
|
+
start1: normalizeCoordinate,
|
|
54
|
+
end1: normalizeCoordinate,
|
|
55
|
+
start2: normalizeCoordinate,
|
|
56
|
+
end2: normalizeCoordinate,
|
|
57
|
+
score: normalizeScore,
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* BEDPE has fixed required leading columns; optional tail columns vary by producer.
|
|
62
|
+
* Detect a header row by matching only the required prefix.
|
|
63
|
+
*
|
|
64
|
+
* @param {string[]} row
|
|
65
|
+
*/
|
|
66
|
+
function looksLikeHeaderRow(row) {
|
|
67
|
+
if (row.length < requiredColumns.length) {
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
for (let i = 0; i < requiredColumns.length; i++) {
|
|
72
|
+
if (row[i] != requiredColumns[i]) {
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return true;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* @param {string} data
|
|
82
|
+
* @param {{ columns?: string[] }} [format]
|
|
83
|
+
*/
|
|
84
|
+
export default function bedpe(data, format = {}) {
|
|
85
|
+
const lines = data.split(/\r?\n/);
|
|
86
|
+
const explicitColumns = format.columns;
|
|
87
|
+
let dataStarted = false;
|
|
88
|
+
let columnsInitialized = false;
|
|
89
|
+
let lineNumber = 0;
|
|
90
|
+
|
|
91
|
+
/** @type {string[]} */
|
|
92
|
+
const columns = [];
|
|
93
|
+
/** @type {((value: string) => any)[]} */
|
|
94
|
+
const normalizers = [];
|
|
95
|
+
/** @type {Record<string, any>[]} */
|
|
96
|
+
const rows = [];
|
|
97
|
+
|
|
98
|
+
for (const line of lines) {
|
|
99
|
+
lineNumber++;
|
|
100
|
+
|
|
101
|
+
if (line.length == 0) {
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (!dataStarted) {
|
|
106
|
+
if (blankLinePattern.test(line) || controlLinePattern.test(line)) {
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
dataStarted = true;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (blankLinePattern.test(line)) {
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const row = line.split("\t");
|
|
117
|
+
|
|
118
|
+
if (!columnsInitialized) {
|
|
119
|
+
const baseColumns = explicitColumns
|
|
120
|
+
? explicitColumns
|
|
121
|
+
: looksLikeHeaderRow(row)
|
|
122
|
+
? row
|
|
123
|
+
: defaultColumns;
|
|
124
|
+
|
|
125
|
+
for (const column of baseColumns) {
|
|
126
|
+
columns.push(column);
|
|
127
|
+
normalizers.push(columnNormalizers[column] ?? normalizeNone);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
columnsInitialized = true;
|
|
131
|
+
|
|
132
|
+
if (!explicitColumns && baseColumns == row) {
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
while (columns.length < row.length) {
|
|
138
|
+
columns.push("field" + (columns.length + 1));
|
|
139
|
+
normalizers.push(normalizeNone);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (row.length < requiredColumns.length) {
|
|
143
|
+
throw new Error(
|
|
144
|
+
`BEDPE line ${lineNumber} has ${row.length} columns, expected at least ${requiredColumns.length}.`
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/** @type {Record<string, any>} */
|
|
149
|
+
const datum = {};
|
|
150
|
+
|
|
151
|
+
for (let i = 0; i < row.length; i++) {
|
|
152
|
+
const columnName = columns[i];
|
|
153
|
+
datum[columnName] = normalizers[i](row[i]);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
rows.push(datum);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return rows;
|
|
160
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Load a data set in Apache Parquet format for use in Vega.
|
|
3
|
+
* @param {ArrayBuffer|Uint8Array} data Parquet binary data.
|
|
4
|
+
* @returns {Promise<Record<string,any>[]>} A promise that resolves to an array of data objects representing
|
|
5
|
+
* rows of a data table.
|
|
6
|
+
*/
|
|
7
|
+
declare function parquet(data: ArrayBuffer | Uint8Array): Promise<Record<string, any>[]>;
|
|
8
|
+
declare namespace parquet {
|
|
9
|
+
let responseType: string;
|
|
10
|
+
}
|
|
11
|
+
export default parquet;
|
|
12
|
+
//# sourceMappingURL=parquet.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parquet.d.ts","sourceRoot":"","sources":["../../../../src/data/formats/parquet.js"],"names":[],"mappings":"AAYA;;;;;GAKG;AACH,+BAJW,WAAW,GAAC,UAAU,GACpB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAC,GAAG,CAAC,EAAE,CAAC,CAWzC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Adapted from: https://github.com/vega/vega-loader-parquet/blob/main/src/index.js
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* @returns {Promise<typeof import("./parquetRead.js").parquetReadObjects>}
|
|
7
|
+
*/
|
|
8
|
+
async function loadParquetReadObjects() {
|
|
9
|
+
const { parquetReadObjects } = await import("./parquetRead.js");
|
|
10
|
+
return parquetReadObjects;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Load a data set in Apache Parquet format for use in Vega.
|
|
15
|
+
* @param {ArrayBuffer|Uint8Array} data Parquet binary data.
|
|
16
|
+
* @returns {Promise<Record<string,any>[]>} A promise that resolves to an array of data objects representing
|
|
17
|
+
* rows of a data table.
|
|
18
|
+
*/
|
|
19
|
+
export default async function parquet(data) {
|
|
20
|
+
const parquetReadObjects = await loadParquetReadObjects();
|
|
21
|
+
const buffer =
|
|
22
|
+
data instanceof Uint8Array
|
|
23
|
+
? /** @type {ArrayBuffer} */ (data.buffer)
|
|
24
|
+
: data;
|
|
25
|
+
|
|
26
|
+
return await parquetReadObjects({ file: buffer });
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
parquet.responseType = "arrayBuffer";
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @param {ObjectParquetReadOptions} options
|
|
3
|
+
* @returns {Promise<void>}
|
|
4
|
+
*/
|
|
5
|
+
export function parquetRead(options: ObjectParquetReadOptions): Promise<void>;
|
|
6
|
+
/**
|
|
7
|
+
* @param {Omit<ObjectParquetReadOptions, "onComplete">} options
|
|
8
|
+
* @returns {Promise<Record<string, any>[]>}
|
|
9
|
+
*/
|
|
10
|
+
export function parquetReadObjects(options: Omit<ObjectParquetReadOptions, "onComplete">): Promise<Record<string, any>[]>;
|
|
11
|
+
/**
|
|
12
|
+
* Object-row variant of Parquet read options used by this trimmed reader.
|
|
13
|
+
*/
|
|
14
|
+
export type ObjectParquetReadOptions = Omit<import("hyparquet").ParquetReadOptions, "rowFormat" | "filter" | "filterStrict" | "onComplete"> & {
|
|
15
|
+
onComplete?: (rows: Record<string, any>[]) => void;
|
|
16
|
+
};
|
|
17
|
+
export type RowGroupObjectBuilder = (groupData: Record<string, any>[], selectStart: number, selectCount: number, columnData: import("hyparquet").DecodedArray[], columnSkipped: number[]) => Record<string, any>[];
|
|
18
|
+
//# sourceMappingURL=parquetRead.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parquetRead.d.ts","sourceRoot":"","sources":["../../../../src/data/formats/parquetRead.js"],"names":[],"mappings":"AAqOA;;;GAGG;AACH,qCAHW,wBAAwB,GACtB,OAAO,CAAC,IAAI,CAAC,CAiFzB;AAED;;;GAGG;AACH,4CAHW,IAAI,CAAC,wBAAwB,EAAE,YAAY,CAAC,GAC1C,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,CAAC,CAS1C;;;;uCAlTY,IAAI,CAChB,OAAU,WAAW,EAAE,kBAAkB,EACzC,WAAc,GAAG,QAAQ,GAAG,cAAc,GAAG,YAAY,CACtD,GAAG;IACH,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,KAAK,IAAI,CAAA;CAClD;oCAIS,CACT,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,EAChC,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,EACnB,UAAU,EAAE,OAAO,WAAW,EAAE,YAAY,EAAE,EAC9C,aAAa,EAAE,MAAM,EAAE,KACnB,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE"}
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Adapted from hyparquet internals:
|
|
3
|
+
* https://github.com/hyparam/hyparquet (notably src/read.js and src/rowgroup.js).
|
|
4
|
+
*
|
|
5
|
+
* GenomeSpy-specific changes in this copy:
|
|
6
|
+
* - object-row output only (array row format removed)
|
|
7
|
+
* - filtering support removed
|
|
8
|
+
* - hot row transpose path optimized with cached codegen for typical schemas
|
|
9
|
+
* - fallback to interpreted row builder for very wide schemas
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { parquetMetadataAsync, parquetSchema } from "hyparquet/src/metadata.js";
|
|
13
|
+
import { parquetPlan, prefetchAsyncBuffer } from "hyparquet/src/plan.js";
|
|
14
|
+
import { assembleAsync, readRowGroup } from "hyparquet/src/rowgroup.js";
|
|
15
|
+
import { concat } from "hyparquet/src/utils.js";
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Object-row variant of Parquet read options used by this trimmed reader.
|
|
19
|
+
*
|
|
20
|
+
* @typedef {Omit<
|
|
21
|
+
* import("hyparquet").ParquetReadOptions,
|
|
22
|
+
* "rowFormat" | "filter" | "filterStrict" | "onComplete"
|
|
23
|
+
* > & {
|
|
24
|
+
* onComplete?: (rows: Record<string, any>[]) => void
|
|
25
|
+
* }} ObjectParquetReadOptions
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* @typedef {(
|
|
30
|
+
* groupData: Record<string, any>[],
|
|
31
|
+
* selectStart: number,
|
|
32
|
+
* selectCount: number,
|
|
33
|
+
* columnData: import("hyparquet").DecodedArray[],
|
|
34
|
+
* columnSkipped: number[]
|
|
35
|
+
* ) => Record<string, any>[]} RowGroupObjectBuilder
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
/** @type {Map<string, RowGroupObjectBuilder>} */
|
|
39
|
+
const rowGroupObjectBuilderCache = new Map();
|
|
40
|
+
|
|
41
|
+
const MAX_CODEGEN_COLUMNS = 200;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* @param {ObjectParquetReadOptions} options
|
|
45
|
+
* @returns {import("hyparquet").AsyncRowGroup[]}
|
|
46
|
+
*/
|
|
47
|
+
function parquetReadAsync(options) {
|
|
48
|
+
if (!options.metadata) {
|
|
49
|
+
throw new Error("parquet requires metadata");
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const plan = parquetPlan(options);
|
|
53
|
+
options.file = prefetchAsyncBuffer(options.file, plan);
|
|
54
|
+
|
|
55
|
+
return plan.groups.map((groupPlan) =>
|
|
56
|
+
readRowGroup(options, plan, groupPlan)
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Flatten decoded data pages into a single decoded array.
|
|
62
|
+
* This local version avoids chunked slice/push overhead in hot paths.
|
|
63
|
+
*
|
|
64
|
+
* @param {import("hyparquet").DecodedArray[] | undefined} chunks
|
|
65
|
+
* @returns {import("hyparquet").DecodedArray}
|
|
66
|
+
*/
|
|
67
|
+
function flattenColumnChunks(chunks) {
|
|
68
|
+
if (!chunks) {
|
|
69
|
+
return [];
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (chunks.length === 1) {
|
|
73
|
+
return chunks[0];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
let totalLength = 0;
|
|
77
|
+
for (const chunk of chunks) {
|
|
78
|
+
totalLength += chunk.length;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const output = Array(totalLength);
|
|
82
|
+
let offset = 0;
|
|
83
|
+
for (const chunk of chunks) {
|
|
84
|
+
for (let i = 0; i < chunk.length; i++) {
|
|
85
|
+
output[offset + i] = chunk[i];
|
|
86
|
+
}
|
|
87
|
+
offset += chunk.length;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return output;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* @param {string[]} columnNames
|
|
95
|
+
* @returns {RowGroupObjectBuilder}
|
|
96
|
+
*/
|
|
97
|
+
function getRowGroupObjectBuilder(columnNames) {
|
|
98
|
+
// Compile one builder per column layout to keep object writes monomorphic.
|
|
99
|
+
const signature = columnNames.join("\u001f");
|
|
100
|
+
const cached = rowGroupObjectBuilderCache.get(signature);
|
|
101
|
+
if (cached) {
|
|
102
|
+
return cached;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const assignments = columnNames
|
|
106
|
+
.map(
|
|
107
|
+
(columnName, i) =>
|
|
108
|
+
JSON.stringify(columnName) +
|
|
109
|
+
": columnData[" +
|
|
110
|
+
i +
|
|
111
|
+
"][row - columnSkipped[" +
|
|
112
|
+
i +
|
|
113
|
+
"]]"
|
|
114
|
+
)
|
|
115
|
+
.join(",\n");
|
|
116
|
+
|
|
117
|
+
const builder = /** @type {RowGroupObjectBuilder} */ (
|
|
118
|
+
new Function(
|
|
119
|
+
"groupData",
|
|
120
|
+
"selectStart",
|
|
121
|
+
"selectCount",
|
|
122
|
+
"columnData",
|
|
123
|
+
"columnSkipped",
|
|
124
|
+
// Keep generated code focused on the tight row loop only.
|
|
125
|
+
"for (let selectRow = 0; selectRow < selectCount; selectRow++) {\n" +
|
|
126
|
+
" const row = selectStart + selectRow;\n" +
|
|
127
|
+
" groupData[selectRow] = {\n" +
|
|
128
|
+
assignments +
|
|
129
|
+
"\n" +
|
|
130
|
+
" };\n" +
|
|
131
|
+
"}\n" +
|
|
132
|
+
"return groupData;"
|
|
133
|
+
)
|
|
134
|
+
);
|
|
135
|
+
|
|
136
|
+
rowGroupObjectBuilderCache.set(signature, builder);
|
|
137
|
+
|
|
138
|
+
return builder;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* @param {Record<string, any>[]} groupData
|
|
143
|
+
* @param {number} selectStart
|
|
144
|
+
* @param {number} selectCount
|
|
145
|
+
* @param {string[]} columnNames
|
|
146
|
+
* @param {import("hyparquet").DecodedArray[]} columnData
|
|
147
|
+
* @param {number[]} columnSkipped
|
|
148
|
+
* @returns {Record<string, any>[]}
|
|
149
|
+
*/
|
|
150
|
+
function buildRowsInterpreted(
|
|
151
|
+
groupData,
|
|
152
|
+
selectStart,
|
|
153
|
+
selectCount,
|
|
154
|
+
columnNames,
|
|
155
|
+
columnData,
|
|
156
|
+
columnSkipped
|
|
157
|
+
) {
|
|
158
|
+
for (let selectRow = 0; selectRow < selectCount; selectRow++) {
|
|
159
|
+
const row = selectStart + selectRow;
|
|
160
|
+
/** @type {Record<string, any>} */
|
|
161
|
+
const rowData = {};
|
|
162
|
+
for (let i = 0; i < columnNames.length; i++) {
|
|
163
|
+
rowData[columnNames[i]] = columnData[i][row - columnSkipped[i]];
|
|
164
|
+
}
|
|
165
|
+
groupData[selectRow] = rowData;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return groupData;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Object-only copy of hyparquet's asyncGroupToRows.
|
|
173
|
+
*
|
|
174
|
+
* @param {import("hyparquet").AsyncRowGroup} asyncGroup
|
|
175
|
+
* @param {number} selectStart
|
|
176
|
+
* @param {number} selectEnd
|
|
177
|
+
* @returns {Promise<Record<string, any>[]>}
|
|
178
|
+
*/
|
|
179
|
+
async function asyncGroupToRowsObject(
|
|
180
|
+
{ asyncColumns },
|
|
181
|
+
selectStart,
|
|
182
|
+
selectEnd
|
|
183
|
+
) {
|
|
184
|
+
// Resolve all async column pages once before entering the hot transpose loop.
|
|
185
|
+
const pages = await Promise.all(asyncColumns.map((column) => column.data));
|
|
186
|
+
const columnCount = asyncColumns.length;
|
|
187
|
+
|
|
188
|
+
/** @type {string[]} */
|
|
189
|
+
const columnNames = Array(columnCount);
|
|
190
|
+
/** @type {import("hyparquet").DecodedArray[]} */
|
|
191
|
+
const columnData = Array(columnCount);
|
|
192
|
+
/** @type {number[]} */
|
|
193
|
+
const columnSkipped = Array(columnCount);
|
|
194
|
+
|
|
195
|
+
// Precompute all indirections outside the generated function.
|
|
196
|
+
for (let i = 0; i < columnCount; i++) {
|
|
197
|
+
columnNames[i] = asyncColumns[i].pathInSchema[0];
|
|
198
|
+
columnData[i] = flattenColumnChunks(pages[i].data);
|
|
199
|
+
columnSkipped[i] = pages[i].skipped;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
const selectCount = selectEnd - selectStart;
|
|
203
|
+
|
|
204
|
+
/** @type {Record<string, any>[]} */
|
|
205
|
+
const groupData = Array(selectCount);
|
|
206
|
+
|
|
207
|
+
// Avoid excessively large generated functions for very wide schemas.
|
|
208
|
+
if (columnCount > MAX_CODEGEN_COLUMNS) {
|
|
209
|
+
return buildRowsInterpreted(
|
|
210
|
+
groupData,
|
|
211
|
+
selectStart,
|
|
212
|
+
selectCount,
|
|
213
|
+
columnNames,
|
|
214
|
+
columnData,
|
|
215
|
+
columnSkipped
|
|
216
|
+
);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const buildRows = getRowGroupObjectBuilder(columnNames);
|
|
220
|
+
|
|
221
|
+
return buildRows(
|
|
222
|
+
groupData,
|
|
223
|
+
selectStart,
|
|
224
|
+
selectCount,
|
|
225
|
+
columnData,
|
|
226
|
+
columnSkipped
|
|
227
|
+
);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* @param {ObjectParquetReadOptions} options
|
|
232
|
+
* @returns {Promise<void>}
|
|
233
|
+
*/
|
|
234
|
+
export async function parquetRead(options) {
|
|
235
|
+
if ("rowFormat" in options) {
|
|
236
|
+
throw new Error(
|
|
237
|
+
'parquetRead supports only object rows; use rowFormat: "object" implicitly'
|
|
238
|
+
);
|
|
239
|
+
}
|
|
240
|
+
if ("filter" in options || "filterStrict" in options) {
|
|
241
|
+
throw new Error("parquetRead does not support filtering");
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
options.metadata ??= await parquetMetadataAsync(options.file, options);
|
|
245
|
+
|
|
246
|
+
const { rowStart = 0, rowEnd, onChunk, onComplete } = options;
|
|
247
|
+
|
|
248
|
+
const asyncGroups = parquetReadAsync(options);
|
|
249
|
+
|
|
250
|
+
if (!onComplete && !onChunk) {
|
|
251
|
+
for (const { asyncColumns } of asyncGroups) {
|
|
252
|
+
for (const { data } of asyncColumns) {
|
|
253
|
+
await data;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
return;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
const schemaTree = parquetSchema(options.metadata);
|
|
260
|
+
const assembled = asyncGroups.map((group) =>
|
|
261
|
+
assembleAsync(group, schemaTree, options.parsers)
|
|
262
|
+
);
|
|
263
|
+
|
|
264
|
+
if (onChunk) {
|
|
265
|
+
for (const asyncGroup of assembled) {
|
|
266
|
+
for (const asyncColumn of asyncGroup.asyncColumns) {
|
|
267
|
+
asyncColumn.data.then(
|
|
268
|
+
/**
|
|
269
|
+
* @param {{ data: import("hyparquet").DecodedArray[]; skipped: number }} chunk
|
|
270
|
+
*/
|
|
271
|
+
(chunk) => {
|
|
272
|
+
let chunkRowStart =
|
|
273
|
+
asyncGroup.groupStart + chunk.skipped;
|
|
274
|
+
for (const columnData of chunk.data) {
|
|
275
|
+
onChunk({
|
|
276
|
+
columnName: asyncColumn.pathInSchema[0],
|
|
277
|
+
columnData,
|
|
278
|
+
rowStart: chunkRowStart,
|
|
279
|
+
rowEnd: chunkRowStart + columnData.length,
|
|
280
|
+
});
|
|
281
|
+
chunkRowStart += columnData.length;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
if (onComplete) {
|
|
290
|
+
/** @type {Record<string, any>[]} */
|
|
291
|
+
const rows = [];
|
|
292
|
+
for (const asyncGroup of assembled) {
|
|
293
|
+
const selectStart = Math.max(rowStart - asyncGroup.groupStart, 0);
|
|
294
|
+
const selectEnd = Math.min(
|
|
295
|
+
(rowEnd ?? Infinity) - asyncGroup.groupStart,
|
|
296
|
+
asyncGroup.groupRows
|
|
297
|
+
);
|
|
298
|
+
const groupData = await asyncGroupToRowsObject(
|
|
299
|
+
asyncGroup,
|
|
300
|
+
selectStart,
|
|
301
|
+
selectEnd
|
|
302
|
+
);
|
|
303
|
+
concat(rows, groupData);
|
|
304
|
+
}
|
|
305
|
+
onComplete(rows);
|
|
306
|
+
} else {
|
|
307
|
+
for (const { asyncColumns } of assembled) {
|
|
308
|
+
for (const { data } of asyncColumns) {
|
|
309
|
+
await data;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* @param {Omit<ObjectParquetReadOptions, "onComplete">} options
|
|
317
|
+
* @returns {Promise<Record<string, any>[]>}
|
|
318
|
+
*/
|
|
319
|
+
export function parquetReadObjects(options) {
|
|
320
|
+
return new Promise((onComplete, reject) => {
|
|
321
|
+
parquetRead({
|
|
322
|
+
...options,
|
|
323
|
+
onComplete,
|
|
324
|
+
}).catch(reject);
|
|
325
|
+
});
|
|
326
|
+
}
|
|
@@ -17,8 +17,16 @@ export function getFormat(params: import("../../spec/data.js").DataSource, urls?
|
|
|
17
17
|
type?: "json";
|
|
18
18
|
property?: string;
|
|
19
19
|
parse?: import("../../spec/data.js").Parse | null;
|
|
20
|
+
} | {
|
|
21
|
+
type: "bed";
|
|
22
|
+
parse?: import("../../spec/data.js").Parse | null;
|
|
23
|
+
} | {
|
|
24
|
+
type: "bedpe";
|
|
25
|
+
columns?: string[];
|
|
26
|
+
parse?: import("../../spec/data.js").Parse | null;
|
|
20
27
|
} | {
|
|
21
28
|
type: string;
|
|
29
|
+
parse?: import("../../spec/data.js").Parse | null;
|
|
22
30
|
};
|
|
23
31
|
/**
|
|
24
32
|
* @param {string} type
|
|
@@ -29,6 +37,10 @@ export function responseType(type: string): string;
|
|
|
29
37
|
* @param {string | string[]} url
|
|
30
38
|
*/
|
|
31
39
|
export function extractTypeFromUrl(url: string | string[]): string;
|
|
40
|
+
/**
|
|
41
|
+
* @param {string} url
|
|
42
|
+
*/
|
|
43
|
+
export function hasGzipExtension(url: string): boolean;
|
|
32
44
|
/**
|
|
33
45
|
* @param {import("../../spec/data.js").DataFormat} dataFormat
|
|
34
46
|
* @return {dataFormat is import("../../spec/data.js").CsvDataFormat}
|
|
@@ -50,6 +62,10 @@ export function isJsonDataFormat(dataFormat: import("../../spec/data.js").DataFo
|
|
|
50
62
|
* @return {dataSource is import("../../spec/data.js").UrlData}
|
|
51
63
|
*/
|
|
52
64
|
export function isUrlData(dataSource: import("../../spec/data.js").DataSource): dataSource is import("../../spec/data.js").UrlData;
|
|
65
|
+
/**
|
|
66
|
+
* @param {string | undefined} type
|
|
67
|
+
*/
|
|
68
|
+
export function isAutoParseFormat(type: string | undefined): boolean;
|
|
53
69
|
export function makeWrapper(d: any): ((x: import("../../spec/channel.js").Scalar) => {
|
|
54
70
|
data: import("../../spec/channel.js").Scalar;
|
|
55
71
|
}) | ((x: import("../flowNode.js").Datum) => import("../flowNode.js").Datum);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dataUtils.d.ts","sourceRoot":"","sources":["../../../../src/data/sources/dataUtils.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"dataUtils.d.ts","sourceRoot":"","sources":["../../../../src/data/sources/dataUtils.js"],"names":[],"mappings":"AAMA;;;;;;;GAOG;AACH,kCAJW,OAAO,oBAAoB,EAAE,UAAU,SAEvC,MAAM,GAAG,MAAM,EAAE;;;;;;;;;;;;;;;;;;;;;EAsB3B;AAED;;;GAGG;AACH,mCAHW,MAAM,GACJ,MAAM,CAIlB;AAED;;GAEG;AACH,wCAFW,MAAM,GAAG,MAAM,EAAE,UA2B3B;AAED;;GAEG;AACH,sCAFW,MAAM,WAWhB;AAkBD;;;GAGG;AACH,4CAHW,OAAO,oBAAoB,EAAE,UAAU,GACtC,UAAU,IAAI,OAAO,oBAAoB,EAAE,aAAa,CAInE;AAED;;;GAGG;AACH,4CAHW,OAAO,oBAAoB,EAAE,UAAU,GACtC,UAAU,IAAI,OAAO,oBAAoB,EAAE,aAAa,CAInE;AAED;;;GAGG;AACH,6CAHW,OAAO,oBAAoB,EAAE,UAAU,GACtC,UAAU,IAAI,OAAO,oBAAoB,EAAE,cAAc,CAIpE;AAED;;;;GAIG;AACH,sCAHW,OAAO,oBAAoB,EAAE,UAAU,GACtC,UAAU,IAAI,OAAO,oBAAoB,EAAE,OAAO,CAI7D;AAED;;GAEG;AACH,wCAFW,MAAM,GAAG,SAAS,WAI5B;AA/CM,+BAAgC,GAAG,QAI3B,OAAO,uBAAuB,EAAE,MAAM;;UAGtB,OAAO,gBAAgB,EAAE,KAAK,qCANR"}
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import { formats } from "vega-loader";
|
|
2
2
|
import { isInlineData } from "./inlineSource.js";
|
|
3
3
|
|
|
4
|
+
const autoParseFormats = new Set(["csv", "tsv", "dsv"]);
|
|
5
|
+
const compressionExtensions = new Set(["gz", "bgz", "bgzf"]);
|
|
6
|
+
|
|
4
7
|
/**
|
|
5
8
|
* Validates data source params, infers format if not specified explicitly,
|
|
6
9
|
* returns a complete DataSource params object.
|
|
@@ -16,8 +19,10 @@ export function getFormat(params, urls = []) {
|
|
|
16
19
|
const format = { ...params.format };
|
|
17
20
|
|
|
18
21
|
format.type ??= isUrlData(params) && extractTypeFromUrl(urls);
|
|
19
|
-
|
|
20
|
-
|
|
22
|
+
if (format.parse === undefined && isAutoParseFormat(format.type)) {
|
|
23
|
+
// @ts-ignore TODO: Fix typing
|
|
24
|
+
format.parse = "auto";
|
|
25
|
+
}
|
|
21
26
|
|
|
22
27
|
if (!format.type) {
|
|
23
28
|
throw new Error(
|
|
@@ -46,8 +51,46 @@ export function extractTypeFromUrl(url) {
|
|
|
46
51
|
}
|
|
47
52
|
|
|
48
53
|
if (url) {
|
|
49
|
-
|
|
54
|
+
const path = stripUrlQueryAndHash(url).split("/").pop()?.toLowerCase();
|
|
55
|
+
|
|
56
|
+
if (!path) {
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const extensions = path.split(".");
|
|
61
|
+
while (
|
|
62
|
+
extensions.length > 1 &&
|
|
63
|
+
compressionExtensions.has(extensions.at(-1))
|
|
64
|
+
) {
|
|
65
|
+
extensions.pop();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const extension = extensions.at(-1);
|
|
69
|
+
if (extension && formats(extension)) {
|
|
70
|
+
return extension;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* @param {string} url
|
|
77
|
+
*/
|
|
78
|
+
export function hasGzipExtension(url) {
|
|
79
|
+
const path = stripUrlQueryAndHash(url).split("/").pop()?.toLowerCase();
|
|
80
|
+
|
|
81
|
+
if (!path) {
|
|
82
|
+
return false;
|
|
50
83
|
}
|
|
84
|
+
|
|
85
|
+
const extension = path.split(".").at(-1);
|
|
86
|
+
return !!extension && compressionExtensions.has(extension);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* @param {string} url
|
|
91
|
+
*/
|
|
92
|
+
function stripUrlQueryAndHash(url) {
|
|
93
|
+
return url.replace(/[?#].*$/, "");
|
|
51
94
|
}
|
|
52
95
|
|
|
53
96
|
export const makeWrapper = (/** @type {any} */ d) =>
|
|
@@ -91,3 +134,10 @@ export function isJsonDataFormat(dataFormat) {
|
|
|
91
134
|
export function isUrlData(dataSource) {
|
|
92
135
|
return "url" in dataSource;
|
|
93
136
|
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* @param {string | undefined} type
|
|
140
|
+
*/
|
|
141
|
+
export function isAutoParseFormat(type) {
|
|
142
|
+
return autoParseFormats.has(type);
|
|
143
|
+
}
|
|
@@ -3,6 +3,10 @@
|
|
|
3
3
|
* @returns {data is import("../../spec/data.js").UrlData}
|
|
4
4
|
*/
|
|
5
5
|
export function isUrlData(data: Partial<import("../../spec/data.js").Data>): data is import("../../spec/data.js").UrlData;
|
|
6
|
+
/**
|
|
7
|
+
* Loads eager data from URLs and transparently decompresses gzip-compatible
|
|
8
|
+
* payloads before handing them to the registered format reader.
|
|
9
|
+
*/
|
|
6
10
|
export default class UrlSource extends DataSource {
|
|
7
11
|
/**
|
|
8
12
|
* @param {import("../../spec/data.js").UrlData} params
|