sdf-parser 4.0.2 → 5.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -8
- package/lib/index.js +239 -0
- package/package.json +25 -16
- package/src/__tests__/__snapshots__/getEntriesBoundaries.test.js.snap +10 -0
- package/src/__tests__/checkOptions.test.js +57 -0
- package/src/__tests__/checkUndefined.test.js +24 -0
- package/src/__tests__/getEntriesBoundaries.test.js +33 -0
- package/src/__tests__/index.test.js +76 -0
- package/src/__tests__/notWellFormatted.test.js +14 -0
- package/src/__tests__/stream.test.js +98 -0
- package/src/__tests__/test.sdf +8707 -0
- package/src/__tests__/test1.sdf +38 -0
- package/src/__tests__/test2.sdf +498 -0
- package/src/__tests__/test4.sdf +37 -0
- package/src/getEntriesBoundaries.js +21 -0
- package/src/index.js +2 -7
- package/src/parse.js +31 -13
- package/src/stream.browser.js +2 -2
- package/src/stream.js +10 -16
- package/History.md +0 -100
package/README.md
CHANGED
|
@@ -17,7 +17,7 @@ In node script:
|
|
|
17
17
|
```js
|
|
18
18
|
// allows to parse a file test.sdf that would be present in the same directory
|
|
19
19
|
|
|
20
|
-
var parse = require('sdf-parser');
|
|
20
|
+
var { parse } = require('sdf-parser');
|
|
21
21
|
|
|
22
22
|
var fs = require('fs');
|
|
23
23
|
var sdf = fs.readFileSync('./test.sdf', 'utf-8');
|
|
@@ -44,16 +44,16 @@ var result = parse(sdf, {
|
|
|
44
44
|
exclude: ['Number of H-Donors'],
|
|
45
45
|
include: ['Number of H-Donors', 'CLogP', 'Code'],
|
|
46
46
|
modifiers: {
|
|
47
|
-
CLogP: function(field) {
|
|
47
|
+
CLogP: function (field) {
|
|
48
48
|
return {
|
|
49
49
|
low: field * 1 - 0.2,
|
|
50
|
-
high: field * 1 + 0.2
|
|
50
|
+
high: field * 1 + 0.2,
|
|
51
51
|
};
|
|
52
|
-
}
|
|
52
|
+
},
|
|
53
53
|
},
|
|
54
|
-
filter:
|
|
54
|
+
filter: (entry) => {
|
|
55
55
|
return entry.CLogP && entry.CLogP.low > 4;
|
|
56
|
-
}
|
|
56
|
+
},
|
|
57
57
|
});
|
|
58
58
|
```
|
|
59
59
|
|
|
@@ -98,7 +98,7 @@ fs.createReadStream('test.sdf')
|
|
|
98
98
|
|
|
99
99
|
[npm-image]: https://img.shields.io/npm/v/sdf-parser.svg?style=flat-square
|
|
100
100
|
[npm-url]: https://www.npmjs.com/package/sdf-parser
|
|
101
|
-
[travis-image]: https://img.shields.io/travis/cheminfo
|
|
102
|
-
[travis-url]: https://travis-ci.org/cheminfo
|
|
101
|
+
[travis-image]: https://img.shields.io/travis/cheminfo/sdf-parser/master.svg?style=flat-square
|
|
102
|
+
[travis-url]: https://travis-ci.org/cheminfo/sdf-parser
|
|
103
103
|
[download-image]: https://img.shields.io/npm/dm/sdf-parser.svg?style=flat-square
|
|
104
104
|
[download-url]: https://www.npmjs.com/package/sdf-parser
|
package/lib/index.js
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, '__esModule', { value: true });
|
|
4
|
+
|
|
5
|
+
var ensureString = require('ensure-string');
|
|
6
|
+
var pipeline = require('pumpify');
|
|
7
|
+
var split2 = require('split2');
|
|
8
|
+
var through2 = require('through2');
|
|
9
|
+
var filter = require('through2-filter');
|
|
10
|
+
|
|
11
|
+
function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; }
|
|
12
|
+
|
|
13
|
+
var pipeline__default = /*#__PURE__*/_interopDefaultLegacy(pipeline);
|
|
14
|
+
var split2__default = /*#__PURE__*/_interopDefaultLegacy(split2);
|
|
15
|
+
var through2__default = /*#__PURE__*/_interopDefaultLegacy(through2);
|
|
16
|
+
var filter__default = /*#__PURE__*/_interopDefaultLegacy(filter);
|
|
17
|
+
|
|
18
|
+
function getEntriesBoundaries(string, substring, eol) {
|
|
19
|
+
const res = [];
|
|
20
|
+
let previous = 0;
|
|
21
|
+
let next = 0;
|
|
22
|
+
while (next !== -1) {
|
|
23
|
+
next = string.indexOf(substring, previous);
|
|
24
|
+
if (next !== -1) {
|
|
25
|
+
res.push([previous, next]);
|
|
26
|
+
const nextMatch = string.indexOf(eol, next + substring.length);
|
|
27
|
+
if (nextMatch === -1) {
|
|
28
|
+
next = -1;
|
|
29
|
+
} else {
|
|
30
|
+
previous = nextMatch + eol.length;
|
|
31
|
+
next = previous;
|
|
32
|
+
}
|
|
33
|
+
} else {
|
|
34
|
+
res.push([previous, string.length]);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return res;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Parse a SDF file
|
|
42
|
+
* @param {string|ArrayBuffer|Uint8Array} sdf SDF file to parse
|
|
43
|
+
* @param {any} [options={}]
|
|
44
|
+
* @param {array<string>} [options.include] List of fields to include
|
|
45
|
+
* @param {array<string>} [options.exclude] List of fields to exclude
|
|
46
|
+
* @param {boolean} [options.dynamicTyping] Dynamically type the data
|
|
47
|
+
* @param {object} [options.modifiers] Object containing callbacks to apply on some specific fields
|
|
48
|
+
* @param {boolean} [options.mixedEOL=false] Set to true if you know there is a mixture between \r\n and \n
|
|
49
|
+
*/
|
|
50
|
+
function parse(sdf, options = {}) {
|
|
51
|
+
const {
|
|
52
|
+
include,
|
|
53
|
+
exclude,
|
|
54
|
+
filter,
|
|
55
|
+
modifiers = {},
|
|
56
|
+
forEach = {},
|
|
57
|
+
dynamicTyping = true,
|
|
58
|
+
} = options;
|
|
59
|
+
|
|
60
|
+
sdf = ensureString.ensureString(sdf);
|
|
61
|
+
if (typeof sdf !== 'string') {
|
|
62
|
+
throw new TypeError('Parameter "sdf" must be a string');
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
let eol = '\n';
|
|
66
|
+
if (options.mixedEOL) {
|
|
67
|
+
sdf = sdf.replace(/\r\n/g, '\n');
|
|
68
|
+
sdf = sdf.replace(/\r/g, '\n');
|
|
69
|
+
} else {
|
|
70
|
+
// we will find the delimiter in order to be much faster and not use regular expression
|
|
71
|
+
let header = sdf.substr(0, 1000);
|
|
72
|
+
if (header.indexOf('\r\n') > -1) {
|
|
73
|
+
eol = '\r\n';
|
|
74
|
+
} else if (header.indexOf('\r') > -1) {
|
|
75
|
+
eol = '\r';
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
let entriesBoundaries = getEntriesBoundaries(sdf, `${eol}$$$$`, eol);
|
|
80
|
+
let molecules = [];
|
|
81
|
+
let labels = {};
|
|
82
|
+
|
|
83
|
+
let start = Date.now();
|
|
84
|
+
|
|
85
|
+
for (let i = 0; i < entriesBoundaries.length; i++) {
|
|
86
|
+
let sdfPart = sdf.substring(...entriesBoundaries[i]);
|
|
87
|
+
let parts = sdfPart.split(`${eol}>`);
|
|
88
|
+
if (parts.length > 0 && parts[0].length > 5) {
|
|
89
|
+
let molecule = {};
|
|
90
|
+
let currentLabels = [];
|
|
91
|
+
molecule.molfile = parts[0] + eol;
|
|
92
|
+
for (let j = 1; j < parts.length; j++) {
|
|
93
|
+
let lines = parts[j].split(eol);
|
|
94
|
+
let from = lines[0].indexOf('<');
|
|
95
|
+
let to = lines[0].indexOf('>');
|
|
96
|
+
let label = lines[0].substring(from + 1, to);
|
|
97
|
+
currentLabels.push(label);
|
|
98
|
+
if (!labels[label]) {
|
|
99
|
+
labels[label] = {
|
|
100
|
+
counter: 0,
|
|
101
|
+
isNumeric: dynamicTyping,
|
|
102
|
+
keep: false,
|
|
103
|
+
};
|
|
104
|
+
if (
|
|
105
|
+
(!exclude || exclude.indexOf(label) === -1) &&
|
|
106
|
+
(!include || include.indexOf(label) > -1)
|
|
107
|
+
) {
|
|
108
|
+
labels[label].keep = true;
|
|
109
|
+
if (modifiers[label]) {
|
|
110
|
+
labels[label].modifier = modifiers[label];
|
|
111
|
+
}
|
|
112
|
+
if (forEach[label]) {
|
|
113
|
+
labels[label].forEach = forEach[label];
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
if (labels[label].keep) {
|
|
118
|
+
for (let k = 1; k < lines.length - 1; k++) {
|
|
119
|
+
if (molecule[label]) {
|
|
120
|
+
molecule[label] += eol + lines[k];
|
|
121
|
+
} else {
|
|
122
|
+
molecule[label] = lines[k];
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
if (labels[label].modifier) {
|
|
126
|
+
let modifiedValue = labels[label].modifier(molecule[label]);
|
|
127
|
+
if (modifiedValue === undefined || modifiedValue === null) {
|
|
128
|
+
delete molecule[label];
|
|
129
|
+
} else {
|
|
130
|
+
molecule[label] = modifiedValue;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (labels[label].isNumeric) {
|
|
134
|
+
if (
|
|
135
|
+
!isFinite(molecule[label]) ||
|
|
136
|
+
molecule[label].match(/^0[0-9]/)
|
|
137
|
+
) {
|
|
138
|
+
labels[label].isNumeric = false;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
if (!filter || filter(molecule)) {
|
|
144
|
+
molecules.push(molecule);
|
|
145
|
+
// only now we can increase the counter
|
|
146
|
+
for (let j = 0; j < currentLabels.length; j++) {
|
|
147
|
+
labels[currentLabels[j]].counter++;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// all numeric fields should be converted to numbers
|
|
154
|
+
for (let label in labels) {
|
|
155
|
+
let currentLabel = labels[label];
|
|
156
|
+
if (currentLabel.isNumeric) {
|
|
157
|
+
currentLabel.minValue = Infinity;
|
|
158
|
+
currentLabel.maxValue = -Infinity;
|
|
159
|
+
for (let j = 0; j < molecules.length; j++) {
|
|
160
|
+
if (molecules[j][label]) {
|
|
161
|
+
let value = parseFloat(molecules[j][label]);
|
|
162
|
+
molecules[j][label] = value;
|
|
163
|
+
if (value > currentLabel.maxValue) {
|
|
164
|
+
currentLabel.maxValue = value;
|
|
165
|
+
}
|
|
166
|
+
if (value < currentLabel.minValue) {
|
|
167
|
+
currentLabel.minValue = value;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// we check that a label is in all the records
|
|
175
|
+
for (let key in labels) {
|
|
176
|
+
if (labels[key].counter === molecules.length) {
|
|
177
|
+
labels[key].always = true;
|
|
178
|
+
} else {
|
|
179
|
+
labels[key].always = false;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
let statistics = [];
|
|
184
|
+
for (let key in labels) {
|
|
185
|
+
let statistic = labels[key];
|
|
186
|
+
statistic.label = key;
|
|
187
|
+
statistics.push(statistic);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
time: Date.now() - start,
|
|
192
|
+
molecules,
|
|
193
|
+
labels: Object.keys(labels),
|
|
194
|
+
statistics,
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const filterStream = filter__default["default"].bind(null, { objectMode: true });
|
|
199
|
+
|
|
200
|
+
function filterCb(chunk) {
|
|
201
|
+
return chunk.length > 1 && chunk.trim().length > 1;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function entries() {
|
|
205
|
+
return pipeline__default["default"].obj(
|
|
206
|
+
split2__default["default"](/\r?\n\${4}.*\r?\n/),
|
|
207
|
+
filterStream(filterCb),
|
|
208
|
+
through2__default["default"]({ objectMode: true }, function process(value, encoding, callback) {
|
|
209
|
+
const eol = value.includes('\r\n') ? '\r\n' : '\n';
|
|
210
|
+
this.push(`${value + eol}$$$$${eol}`);
|
|
211
|
+
callback();
|
|
212
|
+
}),
|
|
213
|
+
);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function molecules(options) {
|
|
217
|
+
return pipeline__default["default"].obj(
|
|
218
|
+
entries(),
|
|
219
|
+
through2__default["default"]({ objectMode: true }, function process(value, encoding, callback) {
|
|
220
|
+
try {
|
|
221
|
+
const parsed = parse(value, options);
|
|
222
|
+
if (parsed.molecules.length === 1) {
|
|
223
|
+
if (options && options.fullResult) {
|
|
224
|
+
this.push(parsed);
|
|
225
|
+
} else {
|
|
226
|
+
this.push(parsed.molecules[0]);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
callback();
|
|
230
|
+
} catch (e) {
|
|
231
|
+
callback(e);
|
|
232
|
+
}
|
|
233
|
+
}),
|
|
234
|
+
);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
exports.entries = entries;
|
|
238
|
+
exports.molecules = molecules;
|
|
239
|
+
exports.parse = parse;
|
package/package.json
CHANGED
|
@@ -1,14 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sdf-parser",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "5.0.2",
|
|
4
4
|
"description": "SDF parser",
|
|
5
|
-
"main": "
|
|
5
|
+
"main": "lib/index.js",
|
|
6
|
+
"module": "src/index.js",
|
|
6
7
|
"files": [
|
|
8
|
+
"lib",
|
|
7
9
|
"src"
|
|
8
10
|
],
|
|
11
|
+
"sideEffects": false,
|
|
9
12
|
"scripts": {
|
|
10
|
-
"
|
|
13
|
+
"build": "npm run compile && cheminfo-build --root SDFParser",
|
|
14
|
+
"compile": "rollup -c",
|
|
15
|
+
"eslint": "eslint src",
|
|
11
16
|
"eslint-fix": "npm run eslint -- --fix",
|
|
17
|
+
"prepack": "npm run compile",
|
|
18
|
+
"prettier": "prettier --check src",
|
|
19
|
+
"prettier-write": "prettier --write src",
|
|
12
20
|
"test": "npm run test-coverage && npm run eslint",
|
|
13
21
|
"test-coverage": "jest --coverage",
|
|
14
22
|
"test-only": "jest"
|
|
@@ -18,37 +26,38 @@
|
|
|
18
26
|
},
|
|
19
27
|
"repository": {
|
|
20
28
|
"type": "git",
|
|
21
|
-
"url": "https://github.com/cheminfo
|
|
29
|
+
"url": "https://github.com/cheminfo/sdf-parser.git"
|
|
22
30
|
},
|
|
23
31
|
"keywords": [
|
|
24
32
|
"sdf",
|
|
25
33
|
"parser",
|
|
26
34
|
"molfile",
|
|
27
35
|
"v2000",
|
|
36
|
+
"v3000",
|
|
28
37
|
"mdl"
|
|
29
38
|
],
|
|
30
39
|
"author": "Luc Patiny",
|
|
31
40
|
"license": "MIT",
|
|
32
41
|
"bugs": {
|
|
33
|
-
"url": "https://github.com/cheminfo
|
|
42
|
+
"url": "https://github.com/cheminfo/sdf-parser/issues"
|
|
34
43
|
},
|
|
35
|
-
"homepage": "https://github.com/cheminfo
|
|
44
|
+
"homepage": "https://github.com/cheminfo/sdf-parser",
|
|
36
45
|
"devDependencies": {
|
|
46
|
+
"@babel/plugin-transform-modules-commonjs": "^7.18.6",
|
|
37
47
|
"babel-eslint": "^10.1.0",
|
|
38
48
|
"callback-stream": "^1.1.0",
|
|
39
|
-
"
|
|
40
|
-
"eslint
|
|
41
|
-
"eslint-
|
|
42
|
-
"
|
|
43
|
-
"
|
|
44
|
-
"
|
|
45
|
-
"openchemlib": "^7.2.3",
|
|
46
|
-
"prettier": "^2.0.5"
|
|
49
|
+
"cheminfo-build": "^1.1.11",
|
|
50
|
+
"eslint": "^8.22.0",
|
|
51
|
+
"eslint-config-cheminfo": "^8.0.2",
|
|
52
|
+
"jest": "^28.1.3",
|
|
53
|
+
"openchemlib": "^8.0.1",
|
|
54
|
+
"prettier": "^2.7.1"
|
|
47
55
|
},
|
|
48
56
|
"dependencies": {
|
|
57
|
+
"ensure-string": "^1.2.0",
|
|
49
58
|
"pumpify": "^2.0.1",
|
|
50
|
-
"split2": "^
|
|
51
|
-
"through2": "^
|
|
59
|
+
"split2": "^4.1.0",
|
|
60
|
+
"through2": "^4.0.2",
|
|
52
61
|
"through2-filter": "^3.0.0"
|
|
53
62
|
}
|
|
54
63
|
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
|
|
3
|
+
import { parse } from '..';
|
|
4
|
+
|
|
5
|
+
let sdf = fs.readFileSync(`${__dirname}/test.sdf`, 'utf-8');
|
|
6
|
+
|
|
7
|
+
describe('SDF Parser options', () => {
|
|
8
|
+
let result = parse(sdf, {
|
|
9
|
+
exclude: ['Number of H-Donors'],
|
|
10
|
+
include: ['Number of H-Donors', 'CLogP', 'Code'],
|
|
11
|
+
modifiers: {
|
|
12
|
+
CLogP: (field) => {
|
|
13
|
+
return {
|
|
14
|
+
low: field * 1 - 0.2,
|
|
15
|
+
high: field * 1 + 0.2,
|
|
16
|
+
};
|
|
17
|
+
},
|
|
18
|
+
},
|
|
19
|
+
filter: (entry) => {
|
|
20
|
+
return entry.CLogP && entry.CLogP.low > 4;
|
|
21
|
+
},
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('Check statistics', () => {
|
|
25
|
+
expect(result.statistics[0].counter).toBe(43);
|
|
26
|
+
expect(result.statistics[0].isNumeric).toBe(false);
|
|
27
|
+
expect(result.statistics[0].label).toBe('Code');
|
|
28
|
+
expect(result.statistics[0].always).toBe(true);
|
|
29
|
+
expect(result.statistics[4].counter).toBe(43);
|
|
30
|
+
expect(result.statistics[4].isNumeric).toBe(false);
|
|
31
|
+
expect(result.statistics[4].label).toBe('CLogP');
|
|
32
|
+
expect(result.statistics[4].always).toBe(true);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('Check molecules', () => {
|
|
36
|
+
expect(result.molecules).toHaveLength(43);
|
|
37
|
+
let molecule = result.molecules[0];
|
|
38
|
+
|
|
39
|
+
expect(Object.keys(molecule)).toHaveLength(3);
|
|
40
|
+
expect(molecule.Code).toBe('0100380851');
|
|
41
|
+
expect(molecule.CLogP.low).toBeCloseTo(4.8, 0.0001);
|
|
42
|
+
expect(molecule.CLogP.high).toBeCloseTo(5.2, 0.0001);
|
|
43
|
+
expect(molecule.molfile.split('\n')).toHaveLength(56);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it('should throw with non-string argument', () => {
|
|
47
|
+
expect(() => {
|
|
48
|
+
parse();
|
|
49
|
+
}).toThrow(TypeError);
|
|
50
|
+
expect(() => {
|
|
51
|
+
parse(42);
|
|
52
|
+
}).toThrow(TypeError);
|
|
53
|
+
expect(() => {
|
|
54
|
+
parse({});
|
|
55
|
+
}).toThrow(TypeError);
|
|
56
|
+
});
|
|
57
|
+
});
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
|
|
3
|
+
import { parse } from '..';
|
|
4
|
+
|
|
5
|
+
let sdf = fs.readFileSync(`${__dirname}/test.sdf`, 'utf-8');
|
|
6
|
+
|
|
7
|
+
describe('SDF Parser options and undefined', () => {
|
|
8
|
+
let result = parse(sdf, {
|
|
9
|
+
exclude: ['Number of H-Donors'],
|
|
10
|
+
include: ['Number of H-Donors', 'CLogP', 'Code'],
|
|
11
|
+
modifiers: {
|
|
12
|
+
CLogP: () => {
|
|
13
|
+
return undefined;
|
|
14
|
+
},
|
|
15
|
+
},
|
|
16
|
+
filter: (entry) => {
|
|
17
|
+
return entry.CLogP && entry.CLogP.low > 4;
|
|
18
|
+
},
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it('Check molecules', () => {
|
|
22
|
+
expect(result.molecules).toHaveLength(0);
|
|
23
|
+
});
|
|
24
|
+
});
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
|
|
3
|
+
import { getEntriesBoundaries } from '../getEntriesBoundaries';
|
|
4
|
+
|
|
5
|
+
let sdf0 = fs.readFileSync(`${__dirname}/test.sdf`, 'utf-8');
|
|
6
|
+
let sdf1 = fs.readFileSync(`${__dirname}/test1.sdf`, 'utf-8');
|
|
7
|
+
let sdf2 = fs.readFileSync(`${__dirname}/test2.sdf`, 'utf-8');
|
|
8
|
+
|
|
9
|
+
[sdf0, sdf1, sdf2].forEach((sdf) => {
|
|
10
|
+
let eol = '\n';
|
|
11
|
+
let header = sdf.substr(0, 1000);
|
|
12
|
+
if (header.indexOf('\r\n') > -1) {
|
|
13
|
+
eol = '\r\n';
|
|
14
|
+
} else if (header.indexOf('\r') > -1) {
|
|
15
|
+
eol = '\r';
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
test('Split should match regex behavior', () => {
|
|
19
|
+
let sdfParts = sdf.split(new RegExp(`${eol}\\$\\$\\$\\$.*${eol}`));
|
|
20
|
+
expect(sdfParts).toStrictEqual(
|
|
21
|
+
getEntriesBoundaries(sdf, `${eol}$$$$`, eol).map((v) =>
|
|
22
|
+
sdf.substring(...v),
|
|
23
|
+
),
|
|
24
|
+
);
|
|
25
|
+
});
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test('should parse sdf files without EOL in the EOF', () => {
|
|
29
|
+
const eol = '\n';
|
|
30
|
+
const sdf = fs.readFileSync(`${__dirname}/test4.sdf`, 'utf-8');
|
|
31
|
+
|
|
32
|
+
expect(getEntriesBoundaries(sdf, `${eol}$$$$`, eol)).toMatchSnapshot();
|
|
33
|
+
});
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
|
|
3
|
+
import { parse } from '..';
|
|
4
|
+
|
|
5
|
+
let sdf = fs.readFileSync(`${__dirname}/test.sdf`, 'utf-8');
|
|
6
|
+
let sdf1 = fs.readFileSync(`${__dirname}/test1.sdf`, 'utf-8');
|
|
7
|
+
|
|
8
|
+
describe('SDF Parser', () => {
|
|
9
|
+
let result = parse(sdf);
|
|
10
|
+
|
|
11
|
+
it('Check statistics', () => {
|
|
12
|
+
expect(result.statistics[0].counter).toBe(128);
|
|
13
|
+
expect(result.statistics[0].isNumeric).toBe(false);
|
|
14
|
+
expect(result.statistics[0].label).toBe('Code');
|
|
15
|
+
expect(result.statistics[1].counter).toBe(128);
|
|
16
|
+
expect(result.statistics[1].minValue).toBe(0);
|
|
17
|
+
expect(result.statistics[1].maxValue).toBe(5);
|
|
18
|
+
expect(result.statistics[1].isNumeric).toBe(true);
|
|
19
|
+
expect(result.statistics[1].label).toBe('Number of H-Donors');
|
|
20
|
+
expect(result.statistics[0].always).toBe(true);
|
|
21
|
+
expect(result.statistics[4].always).toBe(false);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('Check molecules', () => {
|
|
25
|
+
let molecule = result.molecules[0];
|
|
26
|
+
expect(molecule.Code).toContain('0100380824');
|
|
27
|
+
expect(molecule.CLogP).toBe(2.7);
|
|
28
|
+
expect(molecule.molfile.split('\n')).toHaveLength(37);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it('should throw with non-string argument', () => {
|
|
32
|
+
expect(() => {
|
|
33
|
+
parse();
|
|
34
|
+
}).toThrow(TypeError);
|
|
35
|
+
expect(() => {
|
|
36
|
+
parse(42);
|
|
37
|
+
}).toThrow(TypeError);
|
|
38
|
+
expect(() => {
|
|
39
|
+
parse({});
|
|
40
|
+
}).toThrow(TypeError);
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
describe('SDF Parser no dynamicTyping', () => {
|
|
45
|
+
let result = parse(sdf, {
|
|
46
|
+
dynamicTyping: false,
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it('Check statistics', () => {
|
|
50
|
+
expect(result.statistics[0].counter).toBe(128);
|
|
51
|
+
expect(result.statistics[0].isNumeric).toBe(false);
|
|
52
|
+
expect(result.statistics[0].label).toBe('Code');
|
|
53
|
+
expect(result.statistics[1].counter).toBe(128);
|
|
54
|
+
expect(result.statistics[1].minValue).toBeUndefined();
|
|
55
|
+
expect(result.statistics[1].maxValue).toBeUndefined();
|
|
56
|
+
expect(result.statistics[1].isNumeric).toBe(false);
|
|
57
|
+
expect(result.statistics[1].label).toBe('Number of H-Donors');
|
|
58
|
+
expect(result.statistics[0].always).toBe(true);
|
|
59
|
+
expect(result.statistics[4].always).toBe(false);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('Check molecules', () => {
|
|
63
|
+
let molecule = result.molecules[0];
|
|
64
|
+
expect(typeof molecule.Code).toBe('string');
|
|
65
|
+
expect(typeof molecule.CLogP).toBe('string');
|
|
66
|
+
expect(molecule.CLogP).toBe('2.700000000000000e+000');
|
|
67
|
+
expect(molecule.molfile.split('\n')).toHaveLength(37);
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
describe('SDF Parser one molecule', () => {
|
|
72
|
+
let result = parse(sdf1);
|
|
73
|
+
it('Check statistics', () => {
|
|
74
|
+
expect(result.molecules).toHaveLength(1);
|
|
75
|
+
});
|
|
76
|
+
});
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
|
|
3
|
+
import { parse } from '..';
|
|
4
|
+
|
|
5
|
+
describe('SDF Parser of non well formatted file', () => {
|
|
6
|
+
let sdf = fs.readFileSync(`${__dirname}/test2.sdf`, 'utf-8');
|
|
7
|
+
sdf = sdf.replace(/\r/g, '');
|
|
8
|
+
let result = parse(sdf, { mixedEOL: true });
|
|
9
|
+
|
|
10
|
+
it('Check molecules', () => {
|
|
11
|
+
let molecules = result.molecules;
|
|
12
|
+
expect(molecules).toHaveLength(7);
|
|
13
|
+
});
|
|
14
|
+
});
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
|
|
3
|
+
import callbackStream from 'callback-stream';
|
|
4
|
+
import OCL from 'openchemlib/minimal';
|
|
5
|
+
|
|
6
|
+
import { entries, molecules } from '..';
|
|
7
|
+
|
|
8
|
+
const cbStream = callbackStream.bind(null, { objectMode: true });
|
|
9
|
+
|
|
10
|
+
describe('stream', () => {
|
|
11
|
+
it('entries', () =>
|
|
12
|
+
new Promise((resolve) => {
|
|
13
|
+
fs.createReadStream(`${__dirname}/test.sdf`)
|
|
14
|
+
.pipe(entries())
|
|
15
|
+
.pipe(
|
|
16
|
+
cbStream((err, data) => {
|
|
17
|
+
expect(err).toBeNull();
|
|
18
|
+
expect(data).toHaveLength(128);
|
|
19
|
+
expect(data[0]).toContain('-ISIS- 04231216572D');
|
|
20
|
+
const mol = OCL.Molecule.fromMolfile(data[5]);
|
|
21
|
+
expect(mol.toMolfile()).toContain(
|
|
22
|
+
'17 18 0 0 0 0 0 0 0 0999 V2000',
|
|
23
|
+
);
|
|
24
|
+
resolve();
|
|
25
|
+
}),
|
|
26
|
+
);
|
|
27
|
+
}));
|
|
28
|
+
|
|
29
|
+
it('molecules', () =>
|
|
30
|
+
new Promise((resolve) => {
|
|
31
|
+
fs.createReadStream(`${__dirname}/test.sdf`)
|
|
32
|
+
.pipe(molecules())
|
|
33
|
+
.pipe(
|
|
34
|
+
cbStream((err, data) => {
|
|
35
|
+
expect(err).toBeNull();
|
|
36
|
+
expect(data).toHaveLength(128);
|
|
37
|
+
expect(data[0]).toMatchObject({
|
|
38
|
+
Code: '0100380824',
|
|
39
|
+
CLogP: 2.7,
|
|
40
|
+
});
|
|
41
|
+
expect(data[0].molfile).toContain('-ISIS- 04231216572D');
|
|
42
|
+
resolve();
|
|
43
|
+
}),
|
|
44
|
+
);
|
|
45
|
+
}));
|
|
46
|
+
|
|
47
|
+
it('molecules - full result', () =>
|
|
48
|
+
new Promise((resolve) => {
|
|
49
|
+
fs.createReadStream(`${__dirname}/test.sdf`)
|
|
50
|
+
.pipe(molecules({ fullResult: true }))
|
|
51
|
+
.pipe(
|
|
52
|
+
cbStream((err, data) => {
|
|
53
|
+
expect(err).toBeNull();
|
|
54
|
+
expect(data).toHaveLength(128);
|
|
55
|
+
expect(data[0]).toMatchObject({
|
|
56
|
+
labels: [
|
|
57
|
+
'Code',
|
|
58
|
+
'Number of H-Donors',
|
|
59
|
+
'Number of H-Acceptors',
|
|
60
|
+
'Number of Rotatable bonds',
|
|
61
|
+
'CLogP',
|
|
62
|
+
],
|
|
63
|
+
});
|
|
64
|
+
expect(data[0].molecules).toHaveLength(1);
|
|
65
|
+
resolve();
|
|
66
|
+
}),
|
|
67
|
+
);
|
|
68
|
+
}));
|
|
69
|
+
|
|
70
|
+
it('molecules with filter', () =>
|
|
71
|
+
new Promise((resolve) => {
|
|
72
|
+
fs.createReadStream(`${__dirname}/test.sdf`)
|
|
73
|
+
.pipe(
|
|
74
|
+
molecules({
|
|
75
|
+
filter: (entry) => entry.Code === '0100380869',
|
|
76
|
+
}),
|
|
77
|
+
)
|
|
78
|
+
.pipe(
|
|
79
|
+
cbStream((err, data) => {
|
|
80
|
+
expect(err).toBeNull();
|
|
81
|
+
expect(data).toHaveLength(1);
|
|
82
|
+
resolve();
|
|
83
|
+
}),
|
|
84
|
+
);
|
|
85
|
+
}));
|
|
86
|
+
|
|
87
|
+
it('async iteration', async () => {
|
|
88
|
+
const stream = fs
|
|
89
|
+
.createReadStream(`${__dirname}/test.sdf`)
|
|
90
|
+
.pipe(molecules());
|
|
91
|
+
let count = 0;
|
|
92
|
+
for await (const molecule of stream) {
|
|
93
|
+
count++;
|
|
94
|
+
expect(molecule.molfile.toString()).toContain('0999 V2000');
|
|
95
|
+
}
|
|
96
|
+
expect(count).toBe(128);
|
|
97
|
+
});
|
|
98
|
+
});
|