sdf-parser 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +104 -0
- package/lib/index.js +221 -0
- package/package.json +61 -0
- package/src/__tests__/checkOptions.test.js +57 -0
- package/src/__tests__/checkUndefined.test.js +24 -0
- package/src/__tests__/getEntriesBoundaries.test.js +26 -0
- package/src/__tests__/index.test.js +76 -0
- package/src/__tests__/notWellFormatted.test.js +14 -0
- package/src/__tests__/stream.test.js +98 -0
- package/src/__tests__/test.sdf +8707 -0
- package/src/__tests__/test1.sdf +38 -0
- package/src/__tests__/test2.sdf +498 -0
- package/src/getEntriesBoundaries.js +16 -0
- package/src/index.js +2 -0
- package/src/parse.js +148 -0
- package/src/stream.browser.js +3 -0
- package/src/stream.js +44 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2014 cheminfo
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
package/README.md
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# sdf-parser
|
|
2
|
+
|
|
3
|
+
[![NPM version][npm-image]][npm-url]
|
|
4
|
+
[![build status][travis-image]][travis-url]
|
|
5
|
+
[![npm download][download-image]][download-url]
|
|
6
|
+
|
|
7
|
+
Allow to parse a SDF file and convert it to an array of objects.
|
|
8
|
+
|
|
9
|
+
## Use of the package
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
npm install sdf-parser
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
In node script:
|
|
16
|
+
|
|
17
|
+
```js
|
|
18
|
+
// allows to parse a file test.sdf that would be present in the same directory
|
|
19
|
+
|
|
20
|
+
var { parse } = require('sdf-parser');
|
|
21
|
+
|
|
22
|
+
var fs = require('fs');
|
|
23
|
+
var sdf = fs.readFileSync('./test.sdf', 'utf-8');
|
|
24
|
+
|
|
25
|
+
var result = parse(sdf);
|
|
26
|
+
console.log(result);
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## require('sdf-parser') (sdf, options)
|
|
30
|
+
|
|
31
|
+
options:
|
|
32
|
+
|
|
33
|
+
- exclude : array of string containing the fields to discard
|
|
34
|
+
- include : array of string containing the fields to keep
|
|
35
|
+
- modifiers : object of functions that need to be converted during the parsing
|
|
36
|
+
- filter : function that allows to filter the result
|
|
37
|
+
- mixedEOL : if set to true will try to deal with mixed End Of Line separator
|
|
38
|
+
- dynamicTyping : convert fields containing only number to numbers (default: true)
|
|
39
|
+
|
|
40
|
+
## Advanced example with filtering and modifiers
|
|
41
|
+
|
|
42
|
+
```js
|
|
43
|
+
var result = parse(sdf, {
|
|
44
|
+
exclude: ['Number of H-Donors'],
|
|
45
|
+
include: ['Number of H-Donors', 'CLogP', 'Code'],
|
|
46
|
+
modifiers: {
|
|
47
|
+
CLogP: function (field) {
|
|
48
|
+
return {
|
|
49
|
+
low: field * 1 - 0.2,
|
|
50
|
+
high: field * 1 + 0.2,
|
|
51
|
+
};
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
filter: (entry) => {
|
|
55
|
+
return entry.CLogP && entry.CLogP.low > 4;
|
|
56
|
+
},
|
|
57
|
+
});
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Streams
|
|
61
|
+
|
|
62
|
+
This API is only available on Node.js.
|
|
63
|
+
|
|
64
|
+
### molecules(options)
|
|
65
|
+
|
|
66
|
+
Transform an input text stream to a stream of molecule objects.
|
|
67
|
+
|
|
68
|
+
#### options
|
|
69
|
+
|
|
70
|
+
- `fullResult`: true to emit the full result of `parse` instead of just the molecules.
|
|
71
|
+
- All other options from the `parse` function.
|
|
72
|
+
|
|
73
|
+
```js
|
|
74
|
+
const { stream } = require('sdf-parser');
|
|
75
|
+
fs.createReadStream('test.sdf')
|
|
76
|
+
.pipe(stream.molecules())
|
|
77
|
+
.on('data', (molecule) => {
|
|
78
|
+
console.log(molecule.molfile);
|
|
79
|
+
});
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### entries()
|
|
83
|
+
|
|
84
|
+
Transform an input text stream to a stream of sdf entries.
|
|
85
|
+
|
|
86
|
+
```js
|
|
87
|
+
const { stream } = require('sdf-parser');
|
|
88
|
+
fs.createReadStream('test.sdf')
|
|
89
|
+
.pipe(stream.entries())
|
|
90
|
+
.on('data', (entry) => {
|
|
91
|
+
// sdf entry as a string
|
|
92
|
+
});
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## License
|
|
96
|
+
|
|
97
|
+
[MIT](./LICENSE)
|
|
98
|
+
|
|
99
|
+
[npm-image]: https://img.shields.io/npm/v/sdf-parser.svg?style=flat-square
|
|
100
|
+
[npm-url]: https://www.npmjs.com/package/sdf-parser
|
|
101
|
+
[travis-image]: https://img.shields.io/travis/cheminfo/sdf-parser/master.svg?style=flat-square
|
|
102
|
+
[travis-url]: https://travis-ci.org/cheminfo/sdf-parser
|
|
103
|
+
[download-image]: https://img.shields.io/npm/dm/sdf-parser.svg?style=flat-square
|
|
104
|
+
[download-url]: https://www.npmjs.com/package/sdf-parser
|
package/lib/index.js
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, '__esModule', { value: true });
|
|
4
|
+
|
|
5
|
+
var pipeline = require('pumpify');
|
|
6
|
+
var split2 = require('split2');
|
|
7
|
+
var through2 = require('through2');
|
|
8
|
+
var filter = require('through2-filter');
|
|
9
|
+
|
|
10
|
+
function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; }
|
|
11
|
+
|
|
12
|
+
var pipeline__default = /*#__PURE__*/_interopDefaultLegacy(pipeline);
|
|
13
|
+
var split2__default = /*#__PURE__*/_interopDefaultLegacy(split2);
|
|
14
|
+
var through2__default = /*#__PURE__*/_interopDefaultLegacy(through2);
|
|
15
|
+
var filter__default = /*#__PURE__*/_interopDefaultLegacy(filter);
|
|
16
|
+
|
|
17
|
+
function getEntriesBoundaries(string, substring, eol) {
|
|
18
|
+
const res = [];
|
|
19
|
+
let previous = 0;
|
|
20
|
+
let next = 0;
|
|
21
|
+
while (next !== -1) {
|
|
22
|
+
next = string.indexOf(substring, previous);
|
|
23
|
+
if (next !== -1) {
|
|
24
|
+
res.push([previous, next]);
|
|
25
|
+
previous = next =
|
|
26
|
+
string.indexOf(eol, next + substring.length) + eol.length;
|
|
27
|
+
} else {
|
|
28
|
+
res.push([previous, string.length]);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
return res;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function parse(sdf, options = {}) {
|
|
35
|
+
const {
|
|
36
|
+
include,
|
|
37
|
+
exclude,
|
|
38
|
+
filter,
|
|
39
|
+
modifiers = {},
|
|
40
|
+
forEach = {},
|
|
41
|
+
dynamicTyping = true,
|
|
42
|
+
} = options;
|
|
43
|
+
|
|
44
|
+
if (typeof sdf !== 'string') {
|
|
45
|
+
throw new TypeError('Parameter "sdf" must be a string');
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
let eol = '\n';
|
|
49
|
+
if (options.mixedEOL) {
|
|
50
|
+
sdf = sdf.replace(/\r\n/g, '\n');
|
|
51
|
+
sdf = sdf.replace(/\r/g, '\n');
|
|
52
|
+
} else {
|
|
53
|
+
// we will find the delimiter in order to be much faster and not use regular expression
|
|
54
|
+
let header = sdf.substr(0, 1000);
|
|
55
|
+
if (header.indexOf('\r\n') > -1) {
|
|
56
|
+
eol = '\r\n';
|
|
57
|
+
} else if (header.indexOf('\r') > -1) {
|
|
58
|
+
eol = '\r';
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
let entriesBoundaries = getEntriesBoundaries(sdf, `${eol}$$$$`, eol);
|
|
63
|
+
let molecules = [];
|
|
64
|
+
let labels = {};
|
|
65
|
+
|
|
66
|
+
let start = Date.now();
|
|
67
|
+
|
|
68
|
+
for (let i = 0; i < entriesBoundaries.length; i++) {
|
|
69
|
+
let sdfPart = sdf.substring(...entriesBoundaries[i]);
|
|
70
|
+
let parts = sdfPart.split(`${eol}>`);
|
|
71
|
+
if (parts.length > 0 && parts[0].length > 5) {
|
|
72
|
+
let molecule = {};
|
|
73
|
+
let currentLabels = [];
|
|
74
|
+
molecule.molfile = parts[0] + eol;
|
|
75
|
+
for (let j = 1; j < parts.length; j++) {
|
|
76
|
+
let lines = parts[j].split(eol);
|
|
77
|
+
let from = lines[0].indexOf('<');
|
|
78
|
+
let to = lines[0].indexOf('>');
|
|
79
|
+
let label = lines[0].substring(from + 1, to);
|
|
80
|
+
currentLabels.push(label);
|
|
81
|
+
if (!labels[label]) {
|
|
82
|
+
labels[label] = {
|
|
83
|
+
counter: 0,
|
|
84
|
+
isNumeric: dynamicTyping,
|
|
85
|
+
keep: false,
|
|
86
|
+
};
|
|
87
|
+
if (
|
|
88
|
+
(!exclude || exclude.indexOf(label) === -1) &&
|
|
89
|
+
(!include || include.indexOf(label) > -1)
|
|
90
|
+
) {
|
|
91
|
+
labels[label].keep = true;
|
|
92
|
+
if (modifiers[label]) {
|
|
93
|
+
labels[label].modifier = modifiers[label];
|
|
94
|
+
}
|
|
95
|
+
if (forEach[label]) {
|
|
96
|
+
labels[label].forEach = forEach[label];
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
if (labels[label].keep) {
|
|
101
|
+
for (let k = 1; k < lines.length - 1; k++) {
|
|
102
|
+
if (molecule[label]) {
|
|
103
|
+
molecule[label] += eol + lines[k];
|
|
104
|
+
} else {
|
|
105
|
+
molecule[label] = lines[k];
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
if (labels[label].modifier) {
|
|
109
|
+
let modifiedValue = labels[label].modifier(molecule[label]);
|
|
110
|
+
if (modifiedValue === undefined || modifiedValue === null) {
|
|
111
|
+
delete molecule[label];
|
|
112
|
+
} else {
|
|
113
|
+
molecule[label] = modifiedValue;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
if (labels[label].isNumeric) {
|
|
117
|
+
if (
|
|
118
|
+
!isFinite(molecule[label]) ||
|
|
119
|
+
molecule[label].match(/^0[0-9]/)
|
|
120
|
+
) {
|
|
121
|
+
labels[label].isNumeric = false;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
if (!filter || filter(molecule)) {
|
|
127
|
+
molecules.push(molecule);
|
|
128
|
+
// only now we can increase the counter
|
|
129
|
+
for (let j = 0; j < currentLabels.length; j++) {
|
|
130
|
+
labels[currentLabels[j]].counter++;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// all numeric fields should be converted to numbers
|
|
137
|
+
for (let label in labels) {
|
|
138
|
+
let currentLabel = labels[label];
|
|
139
|
+
if (currentLabel.isNumeric) {
|
|
140
|
+
currentLabel.minValue = Infinity;
|
|
141
|
+
currentLabel.maxValue = -Infinity;
|
|
142
|
+
for (let j = 0; j < molecules.length; j++) {
|
|
143
|
+
if (molecules[j][label]) {
|
|
144
|
+
let value = parseFloat(molecules[j][label]);
|
|
145
|
+
molecules[j][label] = value;
|
|
146
|
+
if (value > currentLabel.maxValue) {
|
|
147
|
+
currentLabel.maxValue = value;
|
|
148
|
+
}
|
|
149
|
+
if (value < currentLabel.minValue) {
|
|
150
|
+
currentLabel.minValue = value;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// we check that a label is in all the records
|
|
158
|
+
for (let key in labels) {
|
|
159
|
+
if (labels[key].counter === molecules.length) {
|
|
160
|
+
labels[key].always = true;
|
|
161
|
+
} else {
|
|
162
|
+
labels[key].always = false;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
let statistics = [];
|
|
167
|
+
for (let key in labels) {
|
|
168
|
+
let statistic = labels[key];
|
|
169
|
+
statistic.label = key;
|
|
170
|
+
statistics.push(statistic);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return {
|
|
174
|
+
time: Date.now() - start,
|
|
175
|
+
molecules: molecules,
|
|
176
|
+
labels: Object.keys(labels),
|
|
177
|
+
statistics: statistics,
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const filterStream = filter__default["default"].bind(null, { objectMode: true });
|
|
182
|
+
function filterCb(chunk) {
|
|
183
|
+
return chunk.length > 1 && chunk.trim().length > 1;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function entries() {
|
|
187
|
+
return pipeline__default["default"].obj(
|
|
188
|
+
split2__default["default"](/\r?\n\${4}.*\r?\n/),
|
|
189
|
+
filterStream(filterCb),
|
|
190
|
+
through2__default["default"]({ objectMode: true }, function process(value, encoding, callback) {
|
|
191
|
+
const eol = value.includes('\r\n') ? '\r\n' : '\n';
|
|
192
|
+
this.push(`${value + eol}$$$$${eol}`);
|
|
193
|
+
callback();
|
|
194
|
+
}),
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function molecules(options) {
|
|
199
|
+
return pipeline__default["default"].obj(
|
|
200
|
+
entries(),
|
|
201
|
+
through2__default["default"]({ objectMode: true }, function process(value, encoding, callback) {
|
|
202
|
+
try {
|
|
203
|
+
const parsed = parse(value, options);
|
|
204
|
+
if (parsed.molecules.length === 1) {
|
|
205
|
+
if (options && options.fullResult) {
|
|
206
|
+
this.push(parsed);
|
|
207
|
+
} else {
|
|
208
|
+
this.push(parsed.molecules[0]);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
callback();
|
|
212
|
+
} catch (e) {
|
|
213
|
+
callback(e);
|
|
214
|
+
}
|
|
215
|
+
}),
|
|
216
|
+
);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
exports.entries = entries;
|
|
220
|
+
exports.molecules = molecules;
|
|
221
|
+
exports.parse = parse;
|
package/package.json
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "sdf-parser",
|
|
3
|
+
"version": "5.0.0",
|
|
4
|
+
"description": "SDF parser",
|
|
5
|
+
"main": "lib/index.js",
|
|
6
|
+
"module": "src/index.js",
|
|
7
|
+
"files": [
|
|
8
|
+
"lib",
|
|
9
|
+
"src"
|
|
10
|
+
],
|
|
11
|
+
"scripts": {
|
|
12
|
+
"build": "npm run compile && cheminfo-build --root SDFParser",
|
|
13
|
+
"compile": "rollup -c",
|
|
14
|
+
"eslint": "eslint src",
|
|
15
|
+
"eslint-fix": "npm run eslint -- --fix",
|
|
16
|
+
"prepack": "npm run compile",
|
|
17
|
+
"prettier": "prettier --check src",
|
|
18
|
+
"prettier-write": "prettier --write src",
|
|
19
|
+
"test": "npm run test-coverage && npm run eslint",
|
|
20
|
+
"test-coverage": "jest --coverage",
|
|
21
|
+
"test-only": "jest"
|
|
22
|
+
},
|
|
23
|
+
"browser": {
|
|
24
|
+
"./src/stream.js": "./src/stream.browser.js"
|
|
25
|
+
},
|
|
26
|
+
"repository": {
|
|
27
|
+
"type": "git",
|
|
28
|
+
"url": "https://github.com/cheminfo/sdf-parser.git"
|
|
29
|
+
},
|
|
30
|
+
"keywords": [
|
|
31
|
+
"sdf",
|
|
32
|
+
"parser",
|
|
33
|
+
"molfile",
|
|
34
|
+
"v2000",
|
|
35
|
+
"v3000",
|
|
36
|
+
"mdl"
|
|
37
|
+
],
|
|
38
|
+
"author": "Luc Patiny",
|
|
39
|
+
"license": "MIT",
|
|
40
|
+
"bugs": {
|
|
41
|
+
"url": "https://github.com/cheminfo/sdf-parser/issues"
|
|
42
|
+
},
|
|
43
|
+
"homepage": "https://github.com/cheminfo/sdf-parser",
|
|
44
|
+
"devDependencies": {
|
|
45
|
+
"@babel/plugin-transform-modules-commonjs": "^7.15.4",
|
|
46
|
+
"babel-eslint": "^10.1.0",
|
|
47
|
+
"callback-stream": "^1.1.0",
|
|
48
|
+
"cheminfo-build": "^1.1.11",
|
|
49
|
+
"eslint": "^7.32.0",
|
|
50
|
+
"eslint-config-cheminfo": "^6.0.1",
|
|
51
|
+
"jest": "^27.2.5",
|
|
52
|
+
"openchemlib": "^7.4.3",
|
|
53
|
+
"prettier": "^2.4.1"
|
|
54
|
+
},
|
|
55
|
+
"dependencies": {
|
|
56
|
+
"pumpify": "^2.0.1",
|
|
57
|
+
"split2": "^3.2.2",
|
|
58
|
+
"through2": "^4.0.2",
|
|
59
|
+
"through2-filter": "^3.0.0"
|
|
60
|
+
}
|
|
61
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
|
|
3
|
+
import { parse } from '..';
|
|
4
|
+
|
|
5
|
+
let sdf = fs.readFileSync(`${__dirname}/test.sdf`, 'utf-8');
|
|
6
|
+
|
|
7
|
+
describe('SDF Parser options', () => {
|
|
8
|
+
let result = parse(sdf, {
|
|
9
|
+
exclude: ['Number of H-Donors'],
|
|
10
|
+
include: ['Number of H-Donors', 'CLogP', 'Code'],
|
|
11
|
+
modifiers: {
|
|
12
|
+
CLogP: (field) => {
|
|
13
|
+
return {
|
|
14
|
+
low: field * 1 - 0.2,
|
|
15
|
+
high: field * 1 + 0.2,
|
|
16
|
+
};
|
|
17
|
+
},
|
|
18
|
+
},
|
|
19
|
+
filter: (entry) => {
|
|
20
|
+
return entry.CLogP && entry.CLogP.low > 4;
|
|
21
|
+
},
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('Check statistics', () => {
|
|
25
|
+
expect(result.statistics[0].counter).toBe(43);
|
|
26
|
+
expect(result.statistics[0].isNumeric).toBe(false);
|
|
27
|
+
expect(result.statistics[0].label).toBe('Code');
|
|
28
|
+
expect(result.statistics[0].always).toBe(true);
|
|
29
|
+
expect(result.statistics[4].counter).toBe(43);
|
|
30
|
+
expect(result.statistics[4].isNumeric).toBe(false);
|
|
31
|
+
expect(result.statistics[4].label).toBe('CLogP');
|
|
32
|
+
expect(result.statistics[4].always).toBe(true);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('Check molecules', () => {
|
|
36
|
+
expect(result.molecules).toHaveLength(43);
|
|
37
|
+
let molecule = result.molecules[0];
|
|
38
|
+
|
|
39
|
+
expect(Object.keys(molecule)).toHaveLength(3);
|
|
40
|
+
expect(molecule.Code).toBe('0100380851');
|
|
41
|
+
expect(molecule.CLogP.low).toBeCloseTo(4.8, 0.0001);
|
|
42
|
+
expect(molecule.CLogP.high).toBeCloseTo(5.2, 0.0001);
|
|
43
|
+
expect(molecule.molfile.split('\n')).toHaveLength(56);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it('should throw with non-string argument', () => {
|
|
47
|
+
expect(() => {
|
|
48
|
+
parse();
|
|
49
|
+
}).toThrow(TypeError);
|
|
50
|
+
expect(() => {
|
|
51
|
+
parse(42);
|
|
52
|
+
}).toThrow(TypeError);
|
|
53
|
+
expect(() => {
|
|
54
|
+
parse({});
|
|
55
|
+
}).toThrow(TypeError);
|
|
56
|
+
});
|
|
57
|
+
});
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
|
|
3
|
+
import { parse } from '..';
|
|
4
|
+
|
|
5
|
+
let sdf = fs.readFileSync(`${__dirname}/test.sdf`, 'utf-8');
|
|
6
|
+
|
|
7
|
+
describe('SDF Parser options and undefined', () => {
|
|
8
|
+
let result = parse(sdf, {
|
|
9
|
+
exclude: ['Number of H-Donors'],
|
|
10
|
+
include: ['Number of H-Donors', 'CLogP', 'Code'],
|
|
11
|
+
modifiers: {
|
|
12
|
+
CLogP: () => {
|
|
13
|
+
return undefined;
|
|
14
|
+
},
|
|
15
|
+
},
|
|
16
|
+
filter: (entry) => {
|
|
17
|
+
return entry.CLogP && entry.CLogP.low > 4;
|
|
18
|
+
},
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it('Check molecules', () => {
|
|
22
|
+
expect(result.molecules).toHaveLength(0);
|
|
23
|
+
});
|
|
24
|
+
});
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
|
|
3
|
+
import { getEntriesBoundaries } from '../getEntriesBoundaries';
|
|
4
|
+
|
|
5
|
+
let sdf0 = fs.readFileSync(`${__dirname}/test.sdf`, 'utf-8');
|
|
6
|
+
let sdf1 = fs.readFileSync(`${__dirname}/test1.sdf`, 'utf-8');
|
|
7
|
+
let sdf2 = fs.readFileSync(`${__dirname}/test2.sdf`, 'utf-8');
|
|
8
|
+
|
|
9
|
+
[sdf0, sdf1, sdf2].forEach((sdf) => {
|
|
10
|
+
let eol = '\n';
|
|
11
|
+
let header = sdf.substr(0, 1000);
|
|
12
|
+
if (header.indexOf('\r\n') > -1) {
|
|
13
|
+
eol = '\r\n';
|
|
14
|
+
} else if (header.indexOf('\r') > -1) {
|
|
15
|
+
eol = '\r';
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
test('Split should match regex behavior', () => {
|
|
19
|
+
let sdfParts = sdf.split(new RegExp(`${eol}\\$\\$\\$\\$.*${eol}`));
|
|
20
|
+
expect(sdfParts).toStrictEqual(
|
|
21
|
+
getEntriesBoundaries(sdf, `${eol}$$$$`, eol).map((v) =>
|
|
22
|
+
sdf.substring(...v),
|
|
23
|
+
),
|
|
24
|
+
);
|
|
25
|
+
});
|
|
26
|
+
});
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
|
|
3
|
+
import { parse } from '..';
|
|
4
|
+
|
|
5
|
+
let sdf = fs.readFileSync(`${__dirname}/test.sdf`, 'utf-8');
|
|
6
|
+
let sdf1 = fs.readFileSync(`${__dirname}/test1.sdf`, 'utf-8');
|
|
7
|
+
|
|
8
|
+
describe('SDF Parser', () => {
|
|
9
|
+
let result = parse(sdf);
|
|
10
|
+
|
|
11
|
+
it('Check statistics', () => {
|
|
12
|
+
expect(result.statistics[0].counter).toBe(128);
|
|
13
|
+
expect(result.statistics[0].isNumeric).toBe(false);
|
|
14
|
+
expect(result.statistics[0].label).toBe('Code');
|
|
15
|
+
expect(result.statistics[1].counter).toBe(128);
|
|
16
|
+
expect(result.statistics[1].minValue).toBe(0);
|
|
17
|
+
expect(result.statistics[1].maxValue).toBe(5);
|
|
18
|
+
expect(result.statistics[1].isNumeric).toBe(true);
|
|
19
|
+
expect(result.statistics[1].label).toBe('Number of H-Donors');
|
|
20
|
+
expect(result.statistics[0].always).toBe(true);
|
|
21
|
+
expect(result.statistics[4].always).toBe(false);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('Check molecules', () => {
|
|
25
|
+
let molecule = result.molecules[0];
|
|
26
|
+
expect(molecule.Code).toContain('0100380824');
|
|
27
|
+
expect(molecule.CLogP).toBe(2.7);
|
|
28
|
+
expect(molecule.molfile.split('\n')).toHaveLength(37);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it('should throw with non-string argument', () => {
|
|
32
|
+
expect(() => {
|
|
33
|
+
parse();
|
|
34
|
+
}).toThrow(TypeError);
|
|
35
|
+
expect(() => {
|
|
36
|
+
parse(42);
|
|
37
|
+
}).toThrow(TypeError);
|
|
38
|
+
expect(() => {
|
|
39
|
+
parse({});
|
|
40
|
+
}).toThrow(TypeError);
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
describe('SDF Parser no dynamicTyping', () => {
|
|
45
|
+
let result = parse(sdf, {
|
|
46
|
+
dynamicTyping: false,
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it('Check statistics', () => {
|
|
50
|
+
expect(result.statistics[0].counter).toBe(128);
|
|
51
|
+
expect(result.statistics[0].isNumeric).toBe(false);
|
|
52
|
+
expect(result.statistics[0].label).toBe('Code');
|
|
53
|
+
expect(result.statistics[1].counter).toBe(128);
|
|
54
|
+
expect(result.statistics[1].minValue).toBeUndefined();
|
|
55
|
+
expect(result.statistics[1].maxValue).toBeUndefined();
|
|
56
|
+
expect(result.statistics[1].isNumeric).toBe(false);
|
|
57
|
+
expect(result.statistics[1].label).toBe('Number of H-Donors');
|
|
58
|
+
expect(result.statistics[0].always).toBe(true);
|
|
59
|
+
expect(result.statistics[4].always).toBe(false);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('Check molecules', () => {
|
|
63
|
+
let molecule = result.molecules[0];
|
|
64
|
+
expect(typeof molecule.Code).toBe('string');
|
|
65
|
+
expect(typeof molecule.CLogP).toBe('string');
|
|
66
|
+
expect(molecule.CLogP).toBe('2.700000000000000e+000');
|
|
67
|
+
expect(molecule.molfile.split('\n')).toHaveLength(37);
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
describe('SDF Parser one molecule', () => {
|
|
72
|
+
let result = parse(sdf1);
|
|
73
|
+
it('Check statistics', () => {
|
|
74
|
+
expect(result.molecules).toHaveLength(1);
|
|
75
|
+
});
|
|
76
|
+
});
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
|
|
3
|
+
import { parse } from '..';
|
|
4
|
+
|
|
5
|
+
describe('SDF Parser of non well formatted file', () => {
|
|
6
|
+
let sdf = fs.readFileSync(`${__dirname}/test2.sdf`, 'utf-8');
|
|
7
|
+
sdf = sdf.replace(/\r/g, '');
|
|
8
|
+
let result = parse(sdf, { mixedEOL: true });
|
|
9
|
+
|
|
10
|
+
it('Check molecules', () => {
|
|
11
|
+
let molecules = result.molecules;
|
|
12
|
+
expect(molecules).toHaveLength(7);
|
|
13
|
+
});
|
|
14
|
+
});
|