@genome-spy/core 0.43.2 → 0.43.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -92,11 +92,22 @@ export default class BigBedSource extends SingleAxisWindowedSource {
92
92
  }
93
93
 
94
94
  /**
95
- * A specific optimization for Hautaniemi Lab's Methylation project, where
96
- * we have hundreds of columns having small integers (0-100).
97
- * This parser avoids generating piles of garbage to be collected by the GC.
98
- * We don't split the line into an array of strings, but instead parse the
99
- * integer fields directly from the original string.
95
+ * An optimized parser for Hautaniemi Lab's Methylation project, where
96
+ * we have hundreds of columns having small integers (0-100). This is over 5x
97
+ * faster than @gmod/bed's parser.
98
+ *
99
+ * Techniques used:
100
+ *
101
+ * 1. Avoid generating garbage by parsing integers directly from the string,
102
+ * i.e., without splitting the line into an array of strings.
103
+ * 2. Use a template object to avoid hidden class changes after each property
104
+ * assignment. Avoids garbage generation.
105
+ * 3. Generate and compile code that uses constants to access object properties,
106
+ * avoiding Map lookups during assignment.
107
+ * 4. Input chrom, startPos, and endPos as parameters so that @gmod/bbi's
108
+ * output doesn't first need to be converted to a string just to be parsed
109
+ * again.
110
+ *
100
111
  * This parser doesn't support arrays, etc. at the moment. This could, however,
101
112
  * be extended into a fully-featured parser.
102
113
  *
@@ -152,28 +163,11 @@ function makeFastParser(bed) {
152
163
  return value * sign;
153
164
  }
154
165
 
155
- const fieldParsers = fields.map((field) => {
156
- const { name, type } = field;
157
-
158
- if (["ubyte", "int", "uint"].includes(type)) {
159
- return () => {
160
- currentObject[name] = parseInt();
161
- };
162
- } else if (field.isNumeric) {
163
- return () => {
164
- currentObject[name] = Number(parseString());
165
- };
166
- } else if (["char", "string", "lstring"].includes(type)) {
167
- return () => {
168
- currentObject[name] = parseString();
169
- };
170
- } else {
171
- throw new Error("Unsupported type: " + type);
172
- }
173
- });
174
-
175
166
  const templateFields = fields.map(
176
- (field) => `"${field.name}": ${field.isNumeric ? "0" : "emptyString"}`
167
+ (field) =>
168
+ `${JSON.stringify(field.name)}: ${
169
+ field.isNumeric ? "0" : "emptyString"
170
+ }`
177
171
  );
178
172
 
179
173
  /**
@@ -196,6 +190,42 @@ function makeFastParser(bed) {
196
190
  }
197
191
  };`)();
198
192
 
193
+ /*
194
+ * Generate setter code that uses constant field names to access the
195
+ * object's properties. This avoids Map lookups and allows for efficient
196
+ * machine code to be generated by the VM.
197
+ */
198
+ const fieldParsers = fields.map((field) => {
199
+ const type = field.type;
200
+ const name = JSON.stringify(field.name);
201
+
202
+ if (["ubyte", "int", "uint"].includes(type)) {
203
+ return `d[${name}] = parseInt();`;
204
+ } else if (field.isNumeric) {
205
+ return `d[${name}] = Number(parseString());`;
206
+ } else if (["char", "string", "lstring"].includes(type)) {
207
+ return `d[${name}] = parseString();`;
208
+ } else {
209
+ throw new Error("Unsupported type: " + type);
210
+ // TODO: Implement them
211
+ }
212
+ });
213
+
214
+ /*
215
+ * Split the field parsers into chunks to avoid creating so large
216
+ * functions that the JavaScript VM would decline to optimize it.
217
+ * Not sure if this is really necessary, but the added cost is minimal.
218
+ */
219
+ const chunckedFieldParsers = chunk(fieldParsers, 50).map((chunk, i) =>
220
+ Function(
221
+ "parseInt",
222
+ "parseString",
223
+ `return function parseFieldChunk${i}(d) {
224
+ ${chunk.join("\n")}
225
+ }`
226
+ )(parseInt, parseString)
227
+ );
228
+
199
229
  /**
200
230
  * @param {string} line
201
231
  */
@@ -216,8 +246,8 @@ function makeFastParser(bed) {
216
246
 
217
247
  currentObject = makeTemplate(chrom, chromStart, chromEnd);
218
248
 
219
- for (let j = 0, n = fieldParsers.length; j < n; j++) {
220
- fieldParsers[j]();
249
+ for (const parser of chunckedFieldParsers) {
250
+ parser(currentObject);
221
251
  }
222
252
 
223
253
  return currentObject;
@@ -225,3 +255,15 @@ function makeFastParser(bed) {
225
255
 
226
256
  return parseLine;
227
257
  }
258
+
259
+ /**
260
+ * @param {T[]} arr
261
+ * @param {number} size
262
+ * @template T
263
+ */
264
+ function chunk(arr, size) {
265
+ // https://www.30secondsofcode.org/js/s/split-array-into-chunks/
266
+ return Array.from({ length: Math.ceil(arr.length / size) }, (_v, i) =>
267
+ arr.slice(i * size, i * size + size)
268
+ );
269
+ }
package/package.json CHANGED
@@ -7,7 +7,7 @@
7
7
  },
8
8
  "contributors": [],
9
9
  "license": "MIT",
10
- "version": "0.43.2",
10
+ "version": "0.43.3",
11
11
  "jsdelivr": "dist/bundle/index.js",
12
12
  "unpkg": "dist/bundle/index.js",
13
13
  "browser": "dist/bundle/index.js",
@@ -65,5 +65,5 @@
65
65
  "vega-scale": "^7.1.1",
66
66
  "vega-util": "^1.16.0"
67
67
  },
68
- "gitHead": "12ff70326672ed7ca34ef081c01fc25f19cf1ede"
68
+ "gitHead": "0c6e0418a7461b40da98896bfaf5e05732384e85"
69
69
  }