@datagrok/bio 2.1.4 → 2.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/detectors.js CHANGED
@@ -8,7 +8,8 @@
8
8
  * TODO: Use detectors from WebLogo pickUp.. methods
9
9
  */
10
10
 
11
- const CATEGORIES_SAMPLE_LIMIT = 100;
11
+ const SEQ_SAMPLE_LIMIT = 100;
12
+ const SEQ_SAMPLE_LENGTH_LIMIT = 500;
12
13
 
13
14
  /** enum type to simplify setting "user-friendly" notation if necessary */
14
15
  const NOTATION = {
@@ -30,35 +31,37 @@ const ALIGNMENT = {
30
31
  };
31
32
 
32
33
  /** Class for handling notation units in Macromolecule columns */
33
- class UnitsHandler {
34
- static TAGS = {
34
+ const UnitsHandler = {
35
+ TAGS: {
35
36
  aligned: 'aligned',
36
37
  alphabet: 'alphabet',
37
38
  alphabetSize: '.alphabetSize',
38
39
  alphabetIsMultichar: '.alphabetIsMultichar',
39
40
  separator: 'separator',
40
- };
41
- }
41
+ },
42
+ };
43
+
44
+ const isUrlRe = /[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)?/i;
42
45
 
43
46
  class BioPackageDetectors extends DG.Package {
44
47
 
45
- static PeptideFastaAlphabet = new Set([
48
+ PeptideFastaAlphabet = new Set([
46
49
  'G', 'L', 'Y', 'S', 'E', 'Q', 'D', 'N', 'F', 'A',
47
50
  'K', 'R', 'H', 'C', 'V', 'P', 'W', 'I', 'M', 'T',
48
51
  'MeNle', 'MeA', 'MeG', 'MeF',
49
52
  ]);
50
53
 
51
- static DnaFastaAlphabet = new Set(['A', 'C', 'G', 'T']);
54
+ DnaFastaAlphabet = new Set(['A', 'C', 'G', 'T']);
52
55
 
53
- static RnaFastaAlphabet = new Set(['A', 'C', 'G', 'U']);
56
+ RnaFastaAlphabet = new Set(['A', 'C', 'G', 'U']);
54
57
 
55
- static SmilesRawAlphabet = new Set([
58
+ SmilesRawAlphabet = new Set([
56
59
  'A', 'B', 'C', 'E', 'F', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'Z',
57
60
  'a', 'c', 'e', 'g', 'i', 'l', 'n', 'o', 'r', 's', 't', 'u',
58
61
  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
59
62
  '+', '-', '.', , '/', '\\', '@', '[', ']', '(', ')', '#', '%', '=']);
60
63
 
61
- static SmartsRawAlphabet = new Set([
64
+ SmartsRawAlphabet = new Set([
62
65
  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
63
66
  '!', '#', '$', '&', '(', ')', '*', '+', ',', '-', '.', ':', ';', '=', '@', '~', '[', ']',
64
67
  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M',
@@ -69,7 +72,7 @@ class BioPackageDetectors extends DG.Package {
69
72
 
70
73
  /** @param s {String} - string to check
71
74
  * @returns {boolean} */
72
- static isHelm(s) {
75
+ isHelm(s) {
73
76
  return s.startsWith('PEPTIDE1{') || s.startsWith('CHEM1{') || s.startsWith('BLOB1{') ||
74
77
  s.startsWith('RNA1{') || s.startsWith('DNA1{');
75
78
  }
@@ -78,114 +81,123 @@ class BioPackageDetectors extends DG.Package {
78
81
  //input: column col
79
82
  //output: string semType
80
83
  detectMacromolecule(col) {
81
- // Fail early
82
- if (col.type !== DG.TYPE.STRING) return null;
83
-
84
- const categoriesSample = col.categories.length < CATEGORIES_SAMPLE_LIMIT ? col.categories :
85
- BioPackageDetectors.sample(col.categories, CATEGORIES_SAMPLE_LIMIT);
86
-
87
- // To collect alphabet freq three strategies can be used:
88
- // as chars, as fasta (single or within square brackets), as with the separator.
89
- if (
90
- !(col.categories.length == 1 && !col.categories[0]) && // TODO: Remove with tests for single empty category value
91
- DG.Detector.sampleCategories(col, (s) => BioPackageDetectors.isHelm(s), 1, CATEGORIES_SAMPLE_LIMIT)
92
- ) {
93
- const statsAsHelm = BioPackageDetectors.getStats(categoriesSample, 2, BioPackageDetectors.splitterAsHelm);
94
- col.setTag(DG.TAGS.UNITS, NOTATION.HELM);
95
-
96
- // alphabetSize calculated on (sub)sample of data is incorrect
97
- // const alphabetSize = Object.keys(statsAsHelm.freq).length;
98
- const alphabetIsMultichar = Object.keys(statsAsHelm.freq).some((m) => m.length > 1);
99
- // col.setTag(UnitsHandler.TAGS.alphabetSize, alphabetSize.toString());
100
- col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
101
-
102
- return DG.SEMTYPE.MACROMOLECULE;
103
- }
84
+ const t1 = Date.now();
85
+ try {
86
+ // Fail early
87
+ if (col.type !== DG.TYPE.STRING) return null;
88
+
89
+ const categoriesSample = col.categories.length < SEQ_SAMPLE_LIMIT ? col.categories :
90
+ this.sample(col.categories, SEQ_SAMPLE_LIMIT);
91
+
92
+ // To collect alphabet freq three strategies can be used:
93
+ // as chars, as fasta (single or within square brackets), as with the separator.
94
+ if (
95
+ !(col.categories.length == 1 && !col.categories[0]) && // TODO: Remove with tests for single empty category value
96
+ DG.Detector.sampleCategories(col, (s) => this.isHelm(s), 1, SEQ_SAMPLE_LIMIT)
97
+ ) {
98
+ const statsAsHelm = this.getStats(categoriesSample, 2,
99
+ this.getSplitterAsHelm(SEQ_SAMPLE_LENGTH_LIMIT));
100
+ col.setTag(DG.TAGS.UNITS, NOTATION.HELM);
101
+
102
+ // alphabetSize calculated on (sub)sample of data is incorrect
103
+ // const alphabetSize = Object.keys(statsAsHelm.freq).length;
104
+ const alphabetIsMultichar = Object.keys(statsAsHelm.freq).some((m) => m.length > 1);
105
+ // col.setTag(UnitsHandler.TAGS.alphabetSize, alphabetSize.toString());
106
+ col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
104
107
 
105
- const decoyAlphabets = [
106
- ['SMILES', BioPackageDetectors.SmilesRawAlphabet, 0.30],
107
- ['SMARTS', BioPackageDetectors.SmartsRawAlphabet, 0.45],
108
- ];
109
-
110
- const candidateAlphabets = [
111
- [ALPHABET.PT, BioPackageDetectors.PeptideFastaAlphabet, 0.50],
112
- [ALPHABET.DNA, BioPackageDetectors.DnaFastaAlphabet, 0.55],
113
- [ALPHABET.RNA, BioPackageDetectors.RnaFastaAlphabet, 0.55],
114
- ];
115
-
116
- // Check for url column, maybe it is too heavy check
117
- const isUrlCheck = (s) => {
118
- let res = true;
119
- try {
120
- const url = new URL(s);
121
- res = true;
122
- } catch {
123
- res = false;
124
- }
125
- return res;
126
- };
127
- const isUrl = categoriesSample.every((v) => { return !v || isUrlCheck(v); });
128
- if (isUrl) return null;
129
-
130
- // TODO: Detect HELM sequence
131
- // TODO: Lazy calculations could be helpful for performance and convenient for expressing classification logic.
132
- const statsAsChars = BioPackageDetectors.getStats(categoriesSample, 5, BioPackageDetectors.splitterAsChars);
133
- // if (Object.keys(statsAsChars.freq).length === 0) return;
134
-
135
- const decoy = BioPackageDetectors.detectAlphabet(statsAsChars.freq, decoyAlphabets, null);
136
- if (decoy != ALPHABET.UN) return null;
137
-
138
- if (statsAsChars.sameLength) {
139
- if (Object.keys(statsAsChars.freq).length > 0) { // require non empty alphabet
140
- const alphabet = BioPackageDetectors.detectAlphabet(statsAsChars.freq, candidateAlphabets, '-');
141
- if (alphabet === ALPHABET.UN) return null;
142
-
143
- const units = NOTATION.FASTA;
144
- col.setTag(DG.TAGS.UNITS, units);
145
- col.setTag(UnitsHandler.TAGS.aligned, ALIGNMENT.SEQ_MSA);
146
- col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
147
108
  return DG.SEMTYPE.MACROMOLECULE;
148
109
  }
149
- } else {
150
- const separator = BioPackageDetectors.detectSeparator(statsAsChars.freq);
110
+
111
+ const decoyAlphabets = [
112
+ ['SMILES', this.SmilesRawAlphabet, 0.30],
113
+ ['SMARTS', this.SmartsRawAlphabet, 0.43],
114
+ ];
115
+
116
+ const candidateAlphabets = [
117
+ [ALPHABET.PT, this.PeptideFastaAlphabet, 0.50],
118
+ [ALPHABET.DNA, this.DnaFastaAlphabet, 0.55],
119
+ [ALPHABET.RNA, this.RnaFastaAlphabet, 0.55],
120
+ ];
121
+
122
+ // Check for url column, maybe it is too heavy check
123
+ const isUrlCheck = (s) => {
124
+ let res = true;
125
+ try {
126
+ const url = new URL(s);
127
+ res = true;
128
+ } catch {
129
+ res = false;
130
+ }
131
+ return res;
132
+ // return isUrlRe.test(s);
133
+ };
134
+ const isUrl = categoriesSample.every((v) => { return !v || isUrlCheck(v); });
135
+ if (isUrl) return null;
136
+
137
+ // TODO: Detect HELM sequence
138
+ // TODO: Lazy calculations could be helpful for performance and convenient for expressing classification logic.
139
+ const statsAsChars = this.getStats(categoriesSample, 5,
140
+ this.getSplitterAsChars(SEQ_SAMPLE_LENGTH_LIMIT));
141
+ // if (Object.keys(statsAsChars.freq).length === 0) return;
142
+
143
+ const decoy = this.detectAlphabet(statsAsChars.freq, decoyAlphabets, null);
144
+ if (decoy != ALPHABET.UN) return null;
145
+
146
+ const separator = this.detectSeparator(statsAsChars.freq);
147
+ const units = separator ? NOTATION.SEPARATOR : NOTATION.FASTA;
151
148
  const gapSymbol = separator ? '' : '-';
152
- const splitter = separator ? BioPackageDetectors.getSplitterWithSeparator(separator) :
153
- BioPackageDetectors.splitterAsFasta;
154
-
155
- const stats = BioPackageDetectors.getStats(categoriesSample, 5, splitter);
156
- // Empty monomer alphabet is not allowed
157
- if (Object.keys(stats.freq).length === 0) return null;
158
- // Long monomer names for sequences with separators have constraints
159
- if (separator && BioPackageDetectors.checkForbiddenWithSeparators(stats.freq)) return null;
160
-
161
- const format = separator ? NOTATION.SEPARATOR : NOTATION.FASTA;
162
- const aligned = stats.sameLength ? ALIGNMENT.SEQ_MSA : ALIGNMENT.SEQ;
163
-
164
- // TODO: If separator detected, then extra efforts to detect alphabet are allowed.
165
- const alphabet = BioPackageDetectors.detectAlphabet(stats.freq, candidateAlphabets, gapSymbol);
166
-
167
- // const forbidden = BioPackageDetectors.checkForbiddenWoSeparator(stats.freq);
168
- if (separator || alphabet != 'UN') {
169
- col.setTag(DG.TAGS.UNITS, format);
170
- col.setTag(UnitsHandler.TAGS.aligned, aligned);
171
- col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
172
- if (separator) col.setTag(UnitsHandler.TAGS.separator, separator);
173
- if (alphabet === ALPHABET.UN) {
174
- // alphabetSize calculated on (sub)sample of data is incorrect
175
- // const alphabetSize = Object.keys(stats.freq).length;
176
- const alphabetIsMultichar = Object.keys(stats.freq).some((m) => m.length > 1);
177
- // col.setTag(UnitsHandler.TAGS.alphabetSize, alphabetSize.toString());
178
- col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
149
+ const splitter = separator ? this.getSplitterWithSeparator(separator, SEQ_SAMPLE_LENGTH_LIMIT) :
150
+ this.getSplitterAsFasta(SEQ_SAMPLE_LENGTH_LIMIT);
151
+
152
+ col.setTag(DG.TAGS.UNITS, units);
153
+ if (separator) col.setTag(UnitsHandler.TAGS.separator, separator);
154
+
155
+ if (statsAsChars.sameLength) {
156
+ if (Object.keys(statsAsChars.freq).length > 0) { // require non empty alphabet
157
+ const stats = this.getStats(categoriesSample, 5, splitter);
158
+ const alphabet = this.detectAlphabet(stats.freq, candidateAlphabets, '-');
159
+ if (alphabet === ALPHABET.UN) return null;
160
+
161
+ col.setTag(UnitsHandler.TAGS.aligned, ALIGNMENT.SEQ_MSA);
162
+ col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
163
+ return DG.SEMTYPE.MACROMOLECULE;
164
+ }
165
+ } else {
166
+ const stats = this.getStats(categoriesSample, 5, splitter);
167
+ // Empty monomer alphabet is not allowed
168
+ if (Object.keys(stats.freq).length === 0) return null;
169
+ // Long monomer names for sequences with separators have constraints
170
+ if (separator && this.checkForbiddenWithSeparators(stats.freq)) return null;
171
+
172
+ const aligned = stats.sameLength ? ALIGNMENT.SEQ_MSA : ALIGNMENT.SEQ;
173
+
174
+ // TODO: If separator detected, then extra efforts to detect alphabet are allowed.
175
+ const alphabet = this.detectAlphabet(stats.freq, candidateAlphabets, gapSymbol);
176
+
177
+ // const forbidden = this.checkForbiddenWoSeparator(stats.freq);
178
+ if (separator || alphabet != 'UN') {
179
+ col.setTag(UnitsHandler.TAGS.aligned, aligned);
180
+ col.setTag(UnitsHandler.TAGS.alphabet, alphabet);
181
+ if (alphabet === ALPHABET.UN) {
182
+ // alphabetSize calculated on (sub)sample of data is incorrect
183
+ // const alphabetSize = Object.keys(stats.freq).length;
184
+ const alphabetIsMultichar = Object.keys(stats.freq).some((m) => m.length > 1);
185
+ // col.setTag(UnitsHandler.TAGS.alphabetSize, alphabetSize.toString());
186
+ col.setTag(UnitsHandler.TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
187
+ }
188
+ return DG.SEMTYPE.MACROMOLECULE;
179
189
  }
180
- return DG.SEMTYPE.MACROMOLECULE;
181
190
  }
191
+ } finally {
192
+ const t2 = Date.now();
193
+ console.debug('Bio: detectMacromolecule() ' + `ET = ${t2 - t1} ms.`);
182
194
  }
183
195
  }
184
196
 
185
197
  /** Detects the most frequent char with a rate of at least 0.15 of others in sum.
186
198
  * Does not use any splitting strategies, estimates just by single characters.
187
199
  * */
188
- static detectSeparator(freq) {
200
+ detectSeparator(freq) {
189
201
  // To detect a separator we analyse col's sequences character frequencies.
190
202
  // If there is an exceptionally frequent symbol, then we will call it the separator.
191
203
  // The most frequent symbol should occur with a rate of at least 0.15
@@ -200,8 +212,8 @@ class BioPackageDetectors extends DG.Package {
200
212
  const cleanFreq = Object.assign({}, ...Object.entries(freq)
201
213
  .filter(([m, f]) =>
202
214
  !noSeparatorChemRe.test(m) && !noSeparatorAlphaDigitRe.test(m) && !noSeparatorBracketsRe.test(m) &&
203
- !BioPackageDetectors.PeptideFastaAlphabet.has(m) &&
204
- !BioPackageDetectors.DnaFastaAlphabet.has(m))
215
+ !this.PeptideFastaAlphabet.has(m) &&
216
+ !this.DnaFastaAlphabet.has(m))
205
217
  .map(([m, f]) => ({[m]: f})));
206
218
  if (Object.keys(cleanFreq).length == 0) return null;
207
219
 
@@ -218,19 +230,19 @@ class BioPackageDetectors extends DG.Package {
218
230
  /** With a separator, spaces are nor allowed in monomer names.
219
231
  * The monomer name/label cannot contain digits only.
220
232
  */
221
- static checkForbiddenWithSeparators(freq) {
233
+ checkForbiddenWithSeparators(freq) {
222
234
  const forbiddenRe = /[ ]|^\d+$/i;
223
235
  return Object.keys(freq).filter((m) => forbiddenRe.test(m)).length > 0;
224
236
  }
225
237
 
226
238
  // /** Without a separator, special symbols or digits are not allowed as monomers. */
227
- // static checkForbiddenWoSeparator(freq) {
239
+ // checkForbiddenWoSeparator(freq) {
228
240
  // const forbiddenRe = /[\d!@#$%^&*()_+\-=\[\]{};':"\\|,.<>\/?]/i;
229
241
  // return Object.keys(freq).filter((m) => forbiddenRe.test(m)).length > 0;
230
242
  // }
231
243
 
232
244
  /** Stats of sequences with specified splitter func, returns { freq, sameLength } */
233
- static getStats(values, minLength, splitter) {
245
+ getStats(values, minLength, splitter) {
234
246
  const freq = {};
235
247
  let sameLength = true;
236
248
  let firstLength = null;
@@ -260,9 +272,9 @@ class BioPackageDetectors extends DG.Package {
260
272
  * @param freq frequencies of monomers in sequence set
261
273
  * @param candidates an array of pairs [name, monomer set]
262
274
  * */
263
- static detectAlphabet(freq, candidates, gapSymbol) {
275
+ detectAlphabet(freq, candidates, gapSymbol) {
264
276
  const candidatesSims = candidates.map((c) => {
265
- const sim = BioPackageDetectors.getAlphabetSimilarity(freq, c[1], gapSymbol);
277
+ const sim = this.getAlphabetSimilarity(freq, c[1], gapSymbol);
266
278
  return [c[0], c[1], c[2], freq, sim];
267
279
  });
268
280
 
@@ -277,7 +289,7 @@ class BioPackageDetectors extends DG.Package {
277
289
  return alphabetName;
278
290
  }
279
291
 
280
- static getAlphabetSimilarity(freq, alphabet, gapSymbol) {
292
+ getAlphabetSimilarity(freq, alphabet, gapSymbol) {
281
293
  const keys = new Set([...new Set(Object.keys(freq)), ...alphabet]);
282
294
  keys.delete(gapSymbol);
283
295
 
@@ -288,11 +300,11 @@ class BioPackageDetectors extends DG.Package {
288
300
  alphabetA.push(alphabet.has(m) ? 10 : -20 /* penalty for character outside alphabet set*/);
289
301
  }
290
302
  /* There were a few ideas: chi-squared, pearson correlation (variance?), scalar product */
291
- const cos = BioPackageDetectors.vectorDotProduct(freqA, alphabetA) / (BioPackageDetectors.vectorLength(freqA) * BioPackageDetectors.vectorLength(alphabetA));
303
+ const cos = this.vectorDotProduct(freqA, alphabetA) / (this.vectorLength(freqA) * this.vectorLength(alphabetA));
292
304
  return cos;
293
305
  }
294
306
 
295
- static vectorLength(v) {
307
+ vectorLength(v) {
296
308
  let sqrSum = 0;
297
309
  for (let i = 0; i < v.length; i++) {
298
310
  sqrSum += v[i] * v[i];
@@ -300,7 +312,7 @@ class BioPackageDetectors extends DG.Package {
300
312
  return Math.sqrt(sqrSum);
301
313
  }
302
314
 
303
- static vectorDotProduct(v1, v2) {
315
+ vectorDotProduct(v1, v2) {
304
316
  if (v1.length != v2.length) {
305
317
  throw Error('The dimensionality of the vectors must match');
306
318
  }
@@ -312,66 +324,87 @@ class BioPackageDetectors extends DG.Package {
312
324
  }
313
325
 
314
326
  /** For trivial checks split by single chars*/
315
- static splitterAsChars(seq) {
316
- return seq.split('');
327
+ getSplitterAsChars(lengthLimit) {
328
+ return function(seq) {
329
+ return seq.split('', lengthLimit);
330
+ }.bind(this);
317
331
  }
318
332
 
319
- static getSplitterWithSeparator(separator) {
333
+ getSplitterWithSeparator(separator, lengthLimit) {
320
334
  return function(seq) {
321
- return seq.split(separator);
322
- };
335
+ // if (!!lengthLimit) {
336
+ // const res = new Array(lengthLimit);
337
+ // let pos = 0, count = 0;
338
+ // while (pos < seq.length && count < lengthLimit) {
339
+ // const newPos = seq.indexOf(separator, pos);
340
+ // res[count] = seq.substring(pos, newPos);
341
+ // count++;
342
+ // pos = newPos;
343
+ // }
344
+ //
345
+ // return res.slice(0, count);
346
+ // } else {
347
+ return seq.split(separator, lengthLimit);
348
+ // }
349
+ }.bind(this);
323
350
  }
324
351
 
325
352
  // Multichar monomer names in square brackets, single char monomers or gap symbol
326
- static monomerRe = /\[(\w+)\]|(\w)|(-)/g;
353
+ monomerRe = /\[(\w+)\]|(\w)|(-)/g;
327
354
 
328
355
  /** Split sequence for single character monomers, square brackets multichar monomer names or gap symbol. */
329
- static splitterAsFasta(seq) {
330
- const res = wu(seq.toString().matchAll(BioPackageDetectors.monomerRe)).map((ma) => {
331
- let mRes;
332
- const m = ma[0];
333
- if (m.length > 1) {
334
- mRes = ma[1];
335
- } else {
336
- mRes = m;
337
- }
338
- return mRes;
339
- }).toArray();
356
+ getSplitterAsFasta(lengthLimit) {
357
+ return function(seq) {
358
+ const res = wu(seq.toString().matchAll(this.monomerRe))
359
+ .take(lengthLimit)
360
+ .map((ma) => {
361
+ let mRes;
362
+ const m = ma[0];
363
+ if (m.length > 1) {
364
+ mRes = ma[1];
365
+ } else {
366
+ mRes = m;
367
+ }
368
+ return mRes;
369
+ }).toArray();
340
370
 
341
- return res;
371
+ return res;
372
+ }.bind(this);
342
373
  }
343
374
 
344
375
  /** Only some of the synonyms. These were obtained from the clustered oligopeptide dataset. */
345
- static aaSynonyms = {
376
+ aaSynonyms = {
346
377
  '[MeNle]': 'L', // Nle - norleucine
347
378
  '[MeA]': 'A', '[MeG]': 'G', '[MeF]': 'F',
348
379
  };
349
380
 
350
- static helmRe = /(PEPTIDE1|DNA1|RNA1)\{([^}]+)}/g;
351
- static helmPp1Re = /\[([^\[\]]+)]/g;
381
+ helmRe = /(PEPTIDE1|DNA1|RNA1)\{([^}]+)}/g;
382
+ helmPp1Re = /\[([^\[\]]+)]/g;
352
383
 
353
384
  /** Splits Helm string to monomers, but does not replace monomer names to other notation (e.g. for RNA). */
354
- static splitterAsHelm(seq) {
355
- BioPackageDetectors.helmRe.lastIndex = 0;
356
- const ea = BioPackageDetectors.helmRe.exec(seq.toString());
357
- const inSeq = ea ? ea[2] : null;
358
-
359
- const mmPostProcess = (mm) => {
360
- BioPackageDetectors.helmPp1Re.lastIndex = 0;
361
- const pp1M = BioPackageDetectors.helmPp1Re.exec(mm);
362
- if (pp1M && pp1M.length >= 2) {
363
- return pp1M[1];
364
- } else {
365
- return mm;
366
- }
367
- };
385
+ getSplitterAsHelm(lengthLimit) {
386
+ return function(seq) {
387
+ this.helmRe.lastIndex = 0;
388
+ const ea = this.helmRe.exec(seq.toString());
389
+ const inSeq = ea ? ea[2] : null;
390
+
391
+ const mmPostProcess = (mm) => {
392
+ this.helmPp1Re.lastIndex = 0;
393
+ const pp1M = this.helmPp1Re.exec(mm);
394
+ if (pp1M && pp1M.length >= 2) {
395
+ return pp1M[1];
396
+ } else {
397
+ return mm;
398
+ }
399
+ };
368
400
 
369
- const mmList = inSeq ? inSeq.split('.') : [];
370
- const mmListRes = mmList.map(mmPostProcess);
371
- return mmListRes;
401
+ const mmList = inSeq ? inSeq.split('.') : [];
402
+ const mmListRes = mmList.map(mmPostProcess);
403
+ return mmListRes;
404
+ }.bind(this);
372
405
  }
373
406
 
374
- static sample(src, n) {
407
+ sample(src, n) {
375
408
  if (src.length < n) {
376
409
  throw new Error('Sample source is less than n requested.');
377
410
  }