sdf-parser 7.0.2 → 7.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.js CHANGED
@@ -108,6 +108,7 @@ function getMolecule$1(sdfPart, labels, currentLabels, options) {
108
108
  * @param {object} [options.modifiers] - Object containing callbacks to apply on some specific fields
109
109
  * @param {boolean} [options.mixedEOL=false] - Set to true if you know there is a mixture between \r\n and \n
110
110
  * @param {string} [options.eol] - Specify the end of line character. Default will be the one found in the file
111
+ * @returns {object} - Object containing the molecules, the labels and the statistics
111
112
  */
112
113
  function parse(sdf, options = {}) {
113
114
  options = { ...options };
@@ -148,7 +149,7 @@ function parse(sdf, options = {}) {
148
149
 
149
150
  for (let i = 0; i < entriesBoundaries.length; i++) {
150
151
  let sdfPart = sdf.slice(...entriesBoundaries[i]);
151
-
152
+ if (sdfPart.length < 40) continue;
152
153
  let currentLabels = [];
153
154
  const molecule = getMolecule$1(sdfPart, labels, currentLabels, options);
154
155
  if (!molecule) continue;
@@ -214,18 +215,26 @@ class MolfileStream extends TransformStream {
214
215
  this.#buffer += chunk;
215
216
  let begin = 0;
216
217
  let index = 0;
217
- while ((index = this.#buffer.indexOf('\n$$$$', index)) !== -1) {
218
- controller.enqueue(this.#buffer.slice(begin, index));
219
- index += 5;
220
- if (this.#buffer[index] === '\r') {
221
- index++;
218
+ while ((index = this.#buffer.indexOf('$$$$', index)) !== -1) {
219
+ // we need to check if the delimiter '\n' is in the current buffer
220
+ // if it is not we need to wait for the next chunk
221
+ const endOfDelimiter = this.#buffer.indexOf('\n', index);
222
+ if (endOfDelimiter === -1) {
223
+ index = begin;
224
+ break;
225
+ }
226
+ const eolLength = this.#buffer[endOfDelimiter - 1] === '\r' ? 2 : 1;
227
+ // need to remove the last eol because we will split on eol+'>' in getMolecule
228
+ if (index - eolLength - begin > 40) {
229
+ controller.enqueue(this.#buffer.slice(begin, index - eolLength));
222
230
  }
231
+ index = endOfDelimiter + eolLength;
223
232
  begin = index;
224
233
  }
225
234
  this.#buffer = this.#buffer.slice(begin);
226
235
  },
227
236
  flush: (controller) => {
228
- if (this.#buffer) {
237
+ if (this.#buffer && this.#buffer.length > 40) {
229
238
  controller.enqueue(this.#buffer);
230
239
  }
231
240
  },
@@ -234,7 +243,7 @@ class MolfileStream extends TransformStream {
234
243
  }
235
244
 
236
245
  /**
237
- * Parse a SDF file
246
+ * Parse a SDF file as an iterator
238
247
  * @param {ReadableStream} readStream - SDF file to parse
239
248
  * @param {object} [options={}] - iterator options
240
249
  * @param {Function} [options.filter] - Callback allowing to filter the molecules
@@ -245,10 +254,9 @@ class MolfileStream extends TransformStream {
245
254
  async function* iterator(readStream, options = {}) {
246
255
  const { eol = '\n', dynamicTyping = true } = options;
247
256
 
248
- const moleculeStream = readStream.pipeThrough(new MolfileStream());
249
- for await (const molfile of moleculeStream) {
250
- if (molfile.length < 20) continue;
251
- const molecule = getMolecule(molfile, {
257
+ const moleculeStream = readStream.pipeThrough(new MolfileStream({ eol }));
258
+ for await (const entry of moleculeStream) {
259
+ const molecule = getMolecule(entry, {
252
260
  eol,
253
261
  dynamicTyping,
254
262
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sdf-parser",
3
- "version": "7.0.2",
3
+ "version": "7.0.4",
4
4
  "description": "SDF parser",
5
5
  "main": "lib/index.js",
6
6
  "module": "src/index.js",
@@ -7,18 +7,26 @@ export class MolfileStream extends TransformStream {
7
7
  this.#buffer += chunk;
8
8
  let begin = 0;
9
9
  let index = 0;
10
- while ((index = this.#buffer.indexOf('\n$$$$', index)) !== -1) {
11
- controller.enqueue(this.#buffer.slice(begin, index));
12
- index += 5;
13
- if (this.#buffer[index] === '\r') {
14
- index++;
10
+ while ((index = this.#buffer.indexOf('$$$$', index)) !== -1) {
11
+ // we need to check if the delimiter '\n' is in the current buffer
12
+ // if it is not we need to wait for the next chunk
13
+ const endOfDelimiter = this.#buffer.indexOf('\n', index);
14
+ if (endOfDelimiter === -1) {
15
+ index = begin;
16
+ break;
15
17
  }
18
+ const eolLength = this.#buffer[endOfDelimiter - 1] === '\r' ? 2 : 1;
19
+ // need to remove the last eol because we will split on eol+'>' in getMolecule
20
+ if (index - eolLength - begin > 40) {
21
+ controller.enqueue(this.#buffer.slice(begin, index - eolLength));
22
+ }
23
+ index = endOfDelimiter + eolLength;
16
24
  begin = index;
17
25
  }
18
26
  this.#buffer = this.#buffer.slice(begin);
19
27
  },
20
28
  flush: (controller) => {
21
- if (this.#buffer) {
29
+ if (this.#buffer && this.#buffer.length > 40) {
22
30
  controller.enqueue(this.#buffer);
23
31
  }
24
32
  },
package/src/iterator.js CHANGED
@@ -3,7 +3,7 @@ import { parseString } from 'dynamic-typing';
3
3
  import { MolfileStream } from './MolfileStream.js';
4
4
 
5
5
  /**
6
- * Parse a SDF file
6
+ * Parse a SDF file as an iterator
7
7
  * @param {ReadableStream} readStream - SDF file to parse
8
8
  * @param {object} [options={}] - iterator options
9
9
  * @param {Function} [options.filter] - Callback allowing to filter the molecules
@@ -14,10 +14,9 @@ import { MolfileStream } from './MolfileStream.js';
14
14
  export async function* iterator(readStream, options = {}) {
15
15
  const { eol = '\n', dynamicTyping = true } = options;
16
16
 
17
- const moleculeStream = readStream.pipeThrough(new MolfileStream());
18
- for await (const molfile of moleculeStream) {
19
- if (molfile.length < 20) continue;
20
- const molecule = getMolecule(molfile, {
17
+ const moleculeStream = readStream.pipeThrough(new MolfileStream({ eol }));
18
+ for await (const entry of moleculeStream) {
19
+ const molecule = getMolecule(entry, {
21
20
  eol,
22
21
  dynamicTyping,
23
22
  });
package/src/parse.js CHANGED
@@ -13,6 +13,7 @@ import { getMolecule } from './util/getMolecule';
13
13
  * @param {object} [options.modifiers] - Object containing callbacks to apply on some specific fields
14
14
  * @param {boolean} [options.mixedEOL=false] - Set to true if you know there is a mixture between \r\n and \n
15
15
  * @param {string} [options.eol] - Specify the end of line character. Default will be the one found in the file
16
+ * @returns {object} - Object containing the molecules, the labels and the statistics
16
17
  */
17
18
  export function parse(sdf, options = {}) {
18
19
  options = { ...options };
@@ -53,7 +54,7 @@ export function parse(sdf, options = {}) {
53
54
 
54
55
  for (let i = 0; i < entriesBoundaries.length; i++) {
55
56
  let sdfPart = sdf.slice(...entriesBoundaries[i]);
56
-
57
+ if (sdfPart.length < 40) continue;
57
58
  let currentLabels = [];
58
59
  const molecule = getMolecule(sdfPart, labels, currentLabels, options);
59
60
  if (!molecule) continue;