rino 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +44 -0
- data/Rakefile +123 -0
- data/ext/extconf.rb +26 -0
- data/ext/ruby_inchi_main.so +0 -0
- data/ext/src/aux2atom.h +2786 -0
- data/ext/src/comdef.h +148 -0
- data/ext/src/e_0dstereo.c +3014 -0
- data/ext/src/e_0dstereo.h +31 -0
- data/ext/src/e_comdef.h +57 -0
- data/ext/src/e_ctl_data.h +147 -0
- data/ext/src/e_ichi_io.c +498 -0
- data/ext/src/e_ichi_io.h +40 -0
- data/ext/src/e_ichi_parms.c +37 -0
- data/ext/src/e_ichi_parms.h +41 -0
- data/ext/src/e_ichicomp.h +50 -0
- data/ext/src/e_ichierr.h +40 -0
- data/ext/src/e_ichimain.c +593 -0
- data/ext/src/e_ichisize.h +43 -0
- data/ext/src/e_inchi_atom.c +75 -0
- data/ext/src/e_inchi_atom.h +33 -0
- data/ext/src/e_inpdef.h +41 -0
- data/ext/src/e_mode.h +706 -0
- data/ext/src/e_mol2atom.c +649 -0
- data/ext/src/e_readinch.c +58 -0
- data/ext/src/e_readmol.c +54 -0
- data/ext/src/e_readmol.h +180 -0
- data/ext/src/e_readstru.c +251 -0
- data/ext/src/e_readstru.h +33 -0
- data/ext/src/e_util.c +284 -0
- data/ext/src/e_util.h +61 -0
- data/ext/src/extr_ct.h +251 -0
- data/ext/src/ichi.h +206 -0
- data/ext/src/ichi_bns.c +7999 -0
- data/ext/src/ichi_bns.h +231 -0
- data/ext/src/ichican2.c +5000 -0
- data/ext/src/ichicano.c +2195 -0
- data/ext/src/ichicano.h +49 -0
- data/ext/src/ichicans.c +1625 -0
- data/ext/src/ichicant.h +379 -0
- data/ext/src/ichicomn.h +260 -0
- data/ext/src/ichicomp.h +50 -0
- data/ext/src/ichidrp.h +119 -0
- data/ext/src/ichierr.h +124 -0
- data/ext/src/ichiisot.c +101 -0
- data/ext/src/ichilnct.c +286 -0
- data/ext/src/ichimain.h +132 -0
- data/ext/src/ichimak2.c +1189 -0
- data/ext/src/ichimake.c +3812 -0
- data/ext/src/ichimake.h +205 -0
- data/ext/src/ichimap1.c +851 -0
- data/ext/src/ichimap2.c +2856 -0
- data/ext/src/ichimap4.c +1609 -0
- data/ext/src/ichinorm.c +741 -0
- data/ext/src/ichinorm.h +67 -0
- data/ext/src/ichiparm.c +45 -0
- data/ext/src/ichiparm.h +1441 -0
- data/ext/src/ichiprt1.c +3612 -0
- data/ext/src/ichiprt2.c +1511 -0
- data/ext/src/ichiprt3.c +3011 -0
- data/ext/src/ichiqueu.c +1003 -0
- data/ext/src/ichiring.c +326 -0
- data/ext/src/ichiring.h +49 -0
- data/ext/src/ichisize.h +35 -0
- data/ext/src/ichisort.c +539 -0
- data/ext/src/ichister.c +3538 -0
- data/ext/src/ichister.h +35 -0
- data/ext/src/ichitaut.c +3843 -0
- data/ext/src/ichitaut.h +387 -0
- data/ext/src/ichitime.h +74 -0
- data/ext/src/inchi_api.h +670 -0
- data/ext/src/inchi_dll.c +1480 -0
- data/ext/src/inchi_dll.h +34 -0
- data/ext/src/inchi_dll_main.c +23 -0
- data/ext/src/inchi_dll_main.h +31 -0
- data/ext/src/inpdef.h +328 -0
- data/ext/src/lreadmol.h +1246 -0
- data/ext/src/mode.h +706 -0
- data/ext/src/ruby_inchi_main.c +558 -0
- data/ext/src/runichi.c +4179 -0
- data/ext/src/strutil.c +3861 -0
- data/ext/src/strutil.h +182 -0
- data/ext/src/util.c +1130 -0
- data/ext/src/util.h +85 -0
- data/lib/clean_tempfile.rb +220 -0
- data/lib/rino.rb +111 -0
- data/test/test.rb +386 -0
- metadata +130 -0
data/ext/src/lreadmol.h
ADDED
@@ -0,0 +1,1246 @@
|
|
1
|
+
/*
|
2
|
+
* International Union of Pure and Applied Chemistry (IUPAC)
|
3
|
+
* International Chemical Identifier (InChI)
|
4
|
+
* Version 1
|
5
|
+
* Software version 1.00
|
6
|
+
* April 13, 2005
|
7
|
+
* Developed at NIST
|
8
|
+
*/
|
9
|
+
|
10
|
+
|
11
|
+
/* local prototypes */
|
12
|
+
int bypass_sdf_data_items( FILE* inp, long *cas_reg_no, char* comment, int lcomment, char *name, int lname, int prev_err,
|
13
|
+
const char *pSdfLabel, char *pSdfValue, char *pStrErr );
|
14
|
+
MOL_DATA* read_mol_file( FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
|
15
|
+
int bGetOrigCoord, int *err, char *pStrErr );
|
16
|
+
|
17
|
+
|
18
|
+
static int mol_read_hdr(MOL_HEADER_BLOCK *hdr, FILE* inp, char *pStrErr);
|
19
|
+
static int mol_read_counts_line( MOL_CTAB* ctab, FILE *inp, char *pStrErr );
|
20
|
+
static int read_atom_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
|
21
|
+
static int read_bonds_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
|
22
|
+
static int read_stext_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
|
23
|
+
static int read_properties_block( MOL_CTAB* ctab, MOL_HEADER_BLOCK *pHdr, FILE *inp, int err, char *pStrErr );
|
24
|
+
|
25
|
+
static int identify_sdf_label( char* inp_line, const char *pSdfLabel );
|
26
|
+
static long extract_cas_rn( char *line );
|
27
|
+
int RemoveNonPrintable( char *line );
|
28
|
+
|
29
|
+
|
30
|
+
/******/
|
31
|
+
#ifndef MOLFILE_ERR_FIN
|
32
|
+
#define MOLFILE_ERR_FIN(err, new_err, err_fin, msg) \
|
33
|
+
if ( !(err) && (new_err) ) { (err) = (new_err);} AddMOLfileError(pStrErr, (msg)); goto err_fin
|
34
|
+
#endif
|
35
|
+
#ifndef MOLFILE_ERR_SET
|
36
|
+
#define MOLFILE_ERR_SET(err, new_err, msg) \
|
37
|
+
if ( !(err) && (new_err) ) { (err) = (new_err);} AddMOLfileError(pStrErr, (msg))
|
38
|
+
#endif
|
39
|
+
|
40
|
+
/*************************************************************************/
|
41
|
+
int AddMOLfileError( char *pStrErr, const char *szMsg )
|
42
|
+
{
|
43
|
+
if ( pStrErr && szMsg && szMsg[0] ) {
|
44
|
+
int lenStrErr = strlen( pStrErr );
|
45
|
+
int lenMsg = strlen( szMsg );
|
46
|
+
char *p = strstr( pStrErr, szMsg );
|
47
|
+
if ( p && (p==pStrErr || *(p-1) == ' ' && (*(p-2) == ';' || *(p-2) == ':' )) &&
|
48
|
+
(p+lenMsg == pStrErr+lenStrErr ||
|
49
|
+
p[lenMsg] == ';' && p[lenMsg+1] == ' ' ||
|
50
|
+
p[lenMsg-1]==':' && p[lenMsg]==' ') ) {
|
51
|
+
return 1; /* reject duplicates */
|
52
|
+
}
|
53
|
+
if ( lenStrErr + lenMsg + 2*(lenStrErr > 0) < STR_ERR_LEN ) {
|
54
|
+
/* enough room to add */
|
55
|
+
if (lenStrErr > 0) {
|
56
|
+
if ( pStrErr[lenStrErr-1] != ':' ) {
|
57
|
+
strcat( pStrErr, ";" );
|
58
|
+
}
|
59
|
+
strcat( pStrErr, " " );
|
60
|
+
}
|
61
|
+
strcat( pStrErr, szMsg );
|
62
|
+
return 1;
|
63
|
+
}
|
64
|
+
/* no room */
|
65
|
+
if ( strstr( pStrErr, "..." ) ) {
|
66
|
+
return 0; /* no room mark has already been set */
|
67
|
+
}
|
68
|
+
if ( lenStrErr + 3 < STR_ERR_LEN ) {
|
69
|
+
strcat( pStrErr, "..." );
|
70
|
+
}
|
71
|
+
}
|
72
|
+
return 0;
|
73
|
+
}
|
74
|
+
/*************************************************************************/
|
75
|
+
static int mol_copy_check_empty( char* dest, char* source, int len, char **first_space )
|
76
|
+
{
|
77
|
+
int i, c; /* required len >= 0; dest must have at least len+1 bytes */
|
78
|
+
if ( len > 0 )
|
79
|
+
strncpy( dest, source, len );
|
80
|
+
dest[len]='\0';
|
81
|
+
len = ( len > 0 )? (int)strlen( dest) : 0;
|
82
|
+
for ( i = (len-1); i >= 0 && 0 != (c = source[i]) && isspace(UCINT c); i-- )
|
83
|
+
;
|
84
|
+
*first_space = dest + (i+1); /* first blank or zero terminating byte in dest */
|
85
|
+
return len; /* number of actually processed bytes; zero termination not included */
|
86
|
+
}
|
87
|
+
/*************************************************************************/
|
88
|
+
static int mol_read_datum(void* data, int field_len, int data_type, char** line_ptr)
|
89
|
+
{
|
90
|
+
/* 1. 'field_len' for MOL_STRING_DATA does not include trailing zero,
|
91
|
+
* that is actual length of the string pointed by 'data'
|
92
|
+
* should be at least field_len+1 bytes.
|
93
|
+
* For numerical data 'field_len' is length of input data field
|
94
|
+
* For numerical integral data field_len <= 0 means read up to first
|
95
|
+
* non-numeric character as strtod() does ("free format")
|
96
|
+
* 2. return value: for MOL_STRING_DATA: number of bytes excluding trailing zero
|
97
|
+
* for all others: 1=success; 0 = empty; -1= error
|
98
|
+
* 3. on exit *line_ptr points to the next byte after the last entered
|
99
|
+
*/
|
100
|
+
char *p = *line_ptr, *q, *p_end;
|
101
|
+
int i, ret=1, c, len;
|
102
|
+
long ldata;
|
103
|
+
double ddata;
|
104
|
+
|
105
|
+
switch( data_type ) {
|
106
|
+
case MOL_STRING_DATA:
|
107
|
+
for ( i= 0; i < field_len && 0 != (c = p[i]) && isspace(UCINT c); i++ ) /* pass by all leading spaces */
|
108
|
+
;
|
109
|
+
len = mol_copy_check_empty( (char*)data, &p[i], field_len-i, &q );
|
110
|
+
ret = ( q - (char*)data );/* actual data length */
|
111
|
+
*q = '\0'; /* add zero termination to data if it is not there yet*/
|
112
|
+
*line_ptr += (len+i); /* ptr to the 1st byte of the next input field or to zero termination */
|
113
|
+
break;
|
114
|
+
|
115
|
+
case MOL_CHAR_INT_DATA:
|
116
|
+
case MOL_SHORT_INT_DATA:
|
117
|
+
case MOL_LONG_INT_DATA:
|
118
|
+
{ /* block start */
|
119
|
+
char str[MOL_MAX_VALUE_LEN+1];
|
120
|
+
ldata = 0L;
|
121
|
+
if ( field_len > MOL_MAX_VALUE_LEN ) {
|
122
|
+
ret = -1;
|
123
|
+
}else
|
124
|
+
if ( field_len > 0 ) { /* fixed length */
|
125
|
+
*line_ptr += ( len = mol_copy_check_empty( str, p, field_len, &q ) );
|
126
|
+
*q = '\0';
|
127
|
+
if ( !len || !(q-str) ) { /* empty string */
|
128
|
+
ret = 0;
|
129
|
+
}else
|
130
|
+
if ( (ldata=strtol(str,&p_end,10), p_end != q) ){ /* wrong data: incompletely interpreted */
|
131
|
+
ret = -1;
|
132
|
+
}
|
133
|
+
}else{ /* free format: field_len <= 0 */
|
134
|
+
ldata = strtol( p, &p_end, 10 );
|
135
|
+
*line_ptr += ( len = p_end - p );
|
136
|
+
if ( len == 0 ){
|
137
|
+
ret = 0;
|
138
|
+
}
|
139
|
+
}
|
140
|
+
|
141
|
+
switch( data_type ) {
|
142
|
+
case MOL_CHAR_INT_DATA:
|
143
|
+
if ( SCHAR_MIN <= ldata && ldata <= SCHAR_MAX ){ /* from || to &&: 11-19-96 */
|
144
|
+
*(S_CHAR*)data = (S_CHAR)ldata;
|
145
|
+
}else{
|
146
|
+
*(S_CHAR*)data = (S_CHAR)0;
|
147
|
+
ret = -1;
|
148
|
+
}
|
149
|
+
break;
|
150
|
+
case MOL_SHORT_INT_DATA:
|
151
|
+
if ( SHRT_MIN <= ldata && ldata <= SHRT_MAX ){
|
152
|
+
*(S_SHORT*)data = (S_SHORT)ldata;
|
153
|
+
}else{
|
154
|
+
*(S_SHORT*)data = (S_SHORT)0;
|
155
|
+
ret = -1;
|
156
|
+
}
|
157
|
+
break;
|
158
|
+
case MOL_LONG_INT_DATA:
|
159
|
+
if ( LONG_MIN < ldata && ldata < LONG_MAX ){
|
160
|
+
*(long*)data = (long)ldata;
|
161
|
+
}else{
|
162
|
+
*(long*)data = 0L;
|
163
|
+
ret = -1;
|
164
|
+
}
|
165
|
+
break;
|
166
|
+
default:
|
167
|
+
ret=-1;
|
168
|
+
}
|
169
|
+
|
170
|
+
} /* block end */
|
171
|
+
break;
|
172
|
+
case MOL_DOUBLE_DATA:
|
173
|
+
case MOL_FLOAT_DATA:
|
174
|
+
{ /* block start */
|
175
|
+
char str[MOL_MAX_VALUE_LEN+1];
|
176
|
+
if ( field_len > MOL_MAX_VALUE_LEN ) {
|
177
|
+
ret = -1;
|
178
|
+
ddata = 0.0;
|
179
|
+
}else
|
180
|
+
if ( field_len > 0 ) {
|
181
|
+
*line_ptr += (len = mol_copy_check_empty( str, p, field_len, &q ));
|
182
|
+
*q = '\0';
|
183
|
+
if ( !len || !(q-str) ) { /* empty string */
|
184
|
+
ddata = 0.0;
|
185
|
+
ret = 0;
|
186
|
+
}else
|
187
|
+
if ( (ddata=strtod(str,&p_end), p_end != q) ){ /* wrong data */
|
188
|
+
ret = -1;
|
189
|
+
}
|
190
|
+
}else{ /* free format */
|
191
|
+
ddata = strtod( p, &p_end );
|
192
|
+
*line_ptr += ( len = p_end - p );
|
193
|
+
if ( len == 0 ){
|
194
|
+
ret = 0;
|
195
|
+
}
|
196
|
+
}
|
197
|
+
switch(data_type){
|
198
|
+
case MOL_DOUBLE_DATA:
|
199
|
+
if ( ddata != HUGE_VAL && /*ldata*/ ddata != -HUGE_VAL ){ /* replaced ldata with ddata 6-30-98 DCh */
|
200
|
+
*(double*)data = ddata;
|
201
|
+
}else{
|
202
|
+
*(double*)data = 0.0;
|
203
|
+
ret = -1;
|
204
|
+
}
|
205
|
+
break;
|
206
|
+
case MOL_FLOAT_DATA:
|
207
|
+
if ( fabs(ddata) <= (double)FLT_MIN ) {
|
208
|
+
*(float*)data = 0.0;
|
209
|
+
}else
|
210
|
+
if ( fabs(ddata) >= (double)FLT_MAX ) {
|
211
|
+
*(float*)data = 0.0;
|
212
|
+
ret = -1;
|
213
|
+
}else{
|
214
|
+
*(float*)data = (float)ddata;
|
215
|
+
}
|
216
|
+
break;
|
217
|
+
}
|
218
|
+
} /* block end */
|
219
|
+
break;
|
220
|
+
case MOL_JUMP_TO_RIGHT:
|
221
|
+
for ( i = 0; i < field_len && p[i]; i++ )
|
222
|
+
;
|
223
|
+
*line_ptr += i;
|
224
|
+
ret = i;
|
225
|
+
break;
|
226
|
+
default:
|
227
|
+
ret = -1;
|
228
|
+
}
|
229
|
+
return ret;
|
230
|
+
}
|
231
|
+
/*************************************************************************/
|
232
|
+
static int mol_read_hdr(MOL_HEADER_BLOCK *hdr, FILE* inp, char *pStrErr)
|
233
|
+
{
|
234
|
+
/* All input lines can have are up 80 characters */
|
235
|
+
/* Header Block */
|
236
|
+
char line[MOLFILEINPLINELEN]; /* + cr +lf +zero termination + reserve */
|
237
|
+
int err = 0, len;
|
238
|
+
const int line_len = sizeof(line);
|
239
|
+
char *p;
|
240
|
+
|
241
|
+
/* memset( &hdr, 0, sizeof( MOL_HEADER_BLOCK ) ); */
|
242
|
+
/*------------ header line #1: name ----------------*/
|
243
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
244
|
+
err = 1; /* can't read the input file line */
|
245
|
+
/* AddMOLfileError( pStrErr, "Can't read header block name line" ); */
|
246
|
+
goto err_fin;
|
247
|
+
}
|
248
|
+
remove_one_lf( line );
|
249
|
+
/* -- Disabled to relax strictness: allow > 80 chars names.
|
250
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
251
|
+
err = 2; // too long line
|
252
|
+
goto err_fin;
|
253
|
+
}
|
254
|
+
*/
|
255
|
+
len = mol_read_datum( hdr->szMoleculeName, sizeof(hdr->szMoleculeName)-1, MOL_STRING_DATA, &p );
|
256
|
+
/*----------- header line #2 -----------------------*/
|
257
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
258
|
+
err = 3; /* can't read the input file line */
|
259
|
+
/* AddMOLfileError( pStrErr, "Can't read header block line 2" ); */
|
260
|
+
goto err_fin;
|
261
|
+
}
|
262
|
+
remove_one_lf( line );
|
263
|
+
/* -- Disabled to relax strictness: allow > 80 chars names.
|
264
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
265
|
+
err = 4; // too long input file line
|
266
|
+
goto err_fin;
|
267
|
+
}
|
268
|
+
*/
|
269
|
+
len = mol_read_datum( hdr->szUserInitials, sizeof(hdr->szUserInitials)-1, MOL_STRING_DATA, &p );
|
270
|
+
len = mol_read_datum( hdr->szProgramName, sizeof(hdr->szProgramName)-1, MOL_STRING_DATA, &p );
|
271
|
+
|
272
|
+
/*------------ Relax strictness -----------------------*/
|
273
|
+
len = mol_read_datum( &hdr->cMonth, 2, MOL_CHAR_INT_DATA, &p );
|
274
|
+
len = mol_read_datum( &hdr->cDay, 2, MOL_CHAR_INT_DATA, &p );
|
275
|
+
len = mol_read_datum( &hdr->cYear, 2, MOL_CHAR_INT_DATA, &p );
|
276
|
+
len = mol_read_datum( &hdr->cHour, 2, MOL_CHAR_INT_DATA, &p );
|
277
|
+
len = mol_read_datum( &hdr->cMinute, 2, MOL_CHAR_INT_DATA, &p );
|
278
|
+
len = mol_read_datum( hdr->szDimCode, sizeof(hdr->szDimCode)-1, MOL_STRING_DATA, &p );
|
279
|
+
len = mol_read_datum( &hdr->nScalingFactor1, 2, MOL_SHORT_INT_DATA, &p );
|
280
|
+
len = mol_read_datum( &hdr->dScalingFactor2, 10, MOL_DOUBLE_DATA, &p );
|
281
|
+
len = mol_read_datum( &hdr->dEnergy, 12, MOL_DOUBLE_DATA, &p );
|
282
|
+
len = mol_read_datum( &hdr->lInternalRegistryNumber, 6, MOL_LONG_INT_DATA, &p );
|
283
|
+
|
284
|
+
/* save the whole line 2 */
|
285
|
+
p = line;
|
286
|
+
len = mol_read_datum( hdr->szMoleculeLine2, sizeof(hdr->szMoleculeLine2)-1, MOL_STRING_DATA, &p );
|
287
|
+
|
288
|
+
|
289
|
+
/*------------ header line #3: comment ----------------*/
|
290
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
291
|
+
err = 7; /* can't read the line */
|
292
|
+
/* AddMOLfileError( pStrErr, "Can't read header block comment line" ); */
|
293
|
+
goto err_fin;
|
294
|
+
}
|
295
|
+
remove_one_lf( line );
|
296
|
+
/* -- Disabled to relax strictness: allow > 80 chars comments.
|
297
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
298
|
+
err = 8; // too long line
|
299
|
+
goto err_fin;
|
300
|
+
}
|
301
|
+
*/
|
302
|
+
len = mol_read_datum( hdr->szComment, sizeof(hdr->szComment)-1, MOL_STRING_DATA, &p );
|
303
|
+
|
304
|
+
err_fin:
|
305
|
+
|
306
|
+
return err;
|
307
|
+
}
|
308
|
+
/***************************************************************/
|
309
|
+
int RemoveNonPrintable( char *line )
|
310
|
+
{
|
311
|
+
int i, c, num = 0;
|
312
|
+
if ( line ) {
|
313
|
+
for ( i = 0; c = UCINT line[i]; i ++ ) {
|
314
|
+
/* assuming ASCII charset */
|
315
|
+
if ( c < ' ' || c >= 0x7F ) {
|
316
|
+
line[i] = '.';
|
317
|
+
num ++;
|
318
|
+
}
|
319
|
+
}
|
320
|
+
}
|
321
|
+
return num;
|
322
|
+
}
|
323
|
+
/***************************************************************/
|
324
|
+
static int mol_read_counts_line( MOL_CTAB* ctab, FILE *inp, char *pStrErr )
|
325
|
+
{
|
326
|
+
char *p;
|
327
|
+
char line[MOLFILEINPLINELEN];
|
328
|
+
const int line_len = sizeof(line);
|
329
|
+
int err = 0, len;
|
330
|
+
|
331
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
332
|
+
MOLFILE_ERR_FIN (err, 1, err_fin, "Cannot read counts line");
|
333
|
+
/* can't read the input file line */
|
334
|
+
}
|
335
|
+
remove_one_lf( line );
|
336
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
337
|
+
MOLFILE_ERR_SET (err, 0, "Too long counts line"); /* too long input file line */
|
338
|
+
}
|
339
|
+
if ( 0 > mol_read_datum( &ctab->nNumberOfAtoms, 3, MOL_SHORT_INT_DATA, &p )
|
340
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfBonds, 3, MOL_SHORT_INT_DATA, &p )
|
341
|
+
#if ( MOL_QUERY == MOL_PRESENT )
|
342
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfAtomsLists, 3, MOL_SHORT_INT_DATA, &p )
|
343
|
+
#else
|
344
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
345
|
+
#endif
|
346
|
+
|| 0 > mol_read_datum( NULL, /*obsolete*/ 3, MOL_JUMP_TO_RIGHT, &p )
|
347
|
+
|| 0 > mol_read_datum( &ctab->cChiralFlag, 3, MOL_CHAR_INT_DATA, &p )
|
348
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfStextEntries, 3, MOL_SHORT_INT_DATA, &p )
|
349
|
+
#if ( MOL_CPSS == MOL_PRESENT )
|
350
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfReactionComponentsPlus1, 3, MOL_SHORT_INT_DATA, &p )
|
351
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfReactants, 3, MOL_SHORT_INT_DATA, &p )
|
352
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfProducts, 3, MOL_SHORT_INT_DATA, &p )
|
353
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfIntermediates, 3, MOL_SHORT_INT_DATA, &p )
|
354
|
+
#else
|
355
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
356
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
357
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
358
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
359
|
+
#endif
|
360
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfPropertyLines, 3, MOL_SHORT_INT_DATA, &p ) ){
|
361
|
+
err = 3; /* can't interpret counts line */
|
362
|
+
MOLFILE_ERR_SET (err, 3, "Cannot interpret counts line:"); /* too long input file line */
|
363
|
+
RemoveNonPrintable( line );
|
364
|
+
AddMOLfileError(pStrErr, line);
|
365
|
+
goto err_fin;
|
366
|
+
}
|
367
|
+
len = mol_read_datum( ctab->csCurrentCtabVersion, sizeof(ctab->csCurrentCtabVersion)-1, MOL_STRING_DATA, &p );
|
368
|
+
err_fin:
|
369
|
+
return err;
|
370
|
+
}
|
371
|
+
|
372
|
+
/*************************************************************************/
|
373
|
+
static int read_atom_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
|
374
|
+
{
|
375
|
+
char *p;
|
376
|
+
char line[MOLFILEINPLINELEN];
|
377
|
+
const int line_len = sizeof(line);
|
378
|
+
S_SHORT i, chg;
|
379
|
+
static S_SHORT charge_val[] = {0, 3, 2, 1, 'R', -1, -2, -3};
|
380
|
+
/* 0 1 2 3 4 5 6 7 */
|
381
|
+
/*
|
382
|
+
if ( NULL == ctab->MolAtom ){
|
383
|
+
err = 1;
|
384
|
+
goto err_fin; // internal error: MolAtom structure has not been allocated
|
385
|
+
}
|
386
|
+
*/
|
387
|
+
|
388
|
+
for ( i = 0; i < ctab->nNumberOfAtoms; i++ ) {
|
389
|
+
|
390
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
391
|
+
if ( !err ) {
|
392
|
+
MOLFILE_ERR_SET (err, 2, "Cannot read atom block line");
|
393
|
+
}
|
394
|
+
break;
|
395
|
+
}
|
396
|
+
remove_one_lf( line );
|
397
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
398
|
+
MOLFILE_ERR_SET (err, 0, "Too long atom block line");
|
399
|
+
}
|
400
|
+
if ( err ) {
|
401
|
+
if ( !strcmp( line, SDF_END_OF_DATA ) ) {
|
402
|
+
err = -abs(err);
|
403
|
+
break;
|
404
|
+
}
|
405
|
+
continue; /* bypass the rest of the Atom block */
|
406
|
+
}
|
407
|
+
if ( NULL != ctab->szCoord ) {
|
408
|
+
mystrncpy( ctab->szCoord[i], p, 31 ); /* original coordinates */
|
409
|
+
}
|
410
|
+
|
411
|
+
if ( NULL != ctab->MolAtom ) {
|
412
|
+
if ( 0 > mol_read_datum( &ctab->MolAtom[i].fX, 10, MOL_DOUBLE_DATA, &p )
|
413
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].fY, 10, MOL_DOUBLE_DATA, &p )
|
414
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].fZ, 10, MOL_DOUBLE_DATA, &p )
|
415
|
+
|| 0 > mol_read_datum( NULL, /* undescribed in article*/ 1, MOL_JUMP_TO_RIGHT, &p )
|
416
|
+
|| 0 == mol_read_datum( &ctab->MolAtom[i].szAtomSymbol, 3, MOL_STRING_DATA, &p ) /* was sizeof(ctab->MolAtom[0].szAtomSymbol)-1 */
|
417
|
+
#ifdef INCHI_MAIN
|
418
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cMassDifference, 2, MOL_SHORT_INT_DATA, &p )
|
419
|
+
#else
|
420
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cMassDifference, 2, MOL_CHAR_INT_DATA, &p )
|
421
|
+
#endif
|
422
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cCharge, 3, MOL_CHAR_INT_DATA, &p )
|
423
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cStereoParity, 3, MOL_CHAR_INT_DATA, &p )
|
424
|
+
#if ( MOL_QUERY == MOL_PRESENT )
|
425
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cH_countPlus1, 3, MOL_CHAR_INT_DATA, &p )
|
426
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cStereoCare, 3, MOL_CHAR_INT_DATA, &p )
|
427
|
+
#else
|
428
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
429
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
430
|
+
#endif
|
431
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cValence, 3, MOL_CHAR_INT_DATA, &p ) ) {
|
432
|
+
|
433
|
+
err = 4;
|
434
|
+
MOLFILE_ERR_SET (err, 4, "Cannot interpret atom block line:");
|
435
|
+
RemoveNonPrintable( line );
|
436
|
+
AddMOLfileError(pStrErr, line);
|
437
|
+
if ( !strcmp( line, SDF_END_OF_DATA ) ) {
|
438
|
+
err = -abs(err);
|
439
|
+
break;
|
440
|
+
}
|
441
|
+
continue; /* can't interpret a first half of atom block line */
|
442
|
+
}
|
443
|
+
if ( 2 == strlen(ctab->MolAtom[i].szAtomSymbol) && isupper(UCINT ctab->MolAtom[i].szAtomSymbol[1]))
|
444
|
+
ctab->MolAtom[i].szAtomSymbol[1] = (char)tolower(UCINT ctab->MolAtom[i].szAtomSymbol[1]); /* 5-4-99 DCh*/
|
445
|
+
|
446
|
+
if ( (chg = (S_SHORT) ctab->MolAtom[i].cCharge)< 0 || chg >= (int)(sizeof ( charge_val ) / sizeof( charge_val[0] )) ) {
|
447
|
+
/* ctab->MolAtom[i].cCharge = 0; */ /* error; ignore for now */
|
448
|
+
ctab->MolAtom[i].cCharge = (S_CHAR)(4 - chg); /* allow greater charges to accommodate NCI structures. 8-20-2002 */
|
449
|
+
ctab->MolAtom[i].cRadical = 0;
|
450
|
+
}else
|
451
|
+
if ( 'R' == (chg = charge_val[chg]) ){
|
452
|
+
ctab->MolAtom[i].cCharge = 0;
|
453
|
+
ctab->MolAtom[i].cRadical = RADICAL_DOUBLET;
|
454
|
+
}else{
|
455
|
+
ctab->MolAtom[i].cCharge = (S_CHAR)chg; /* actual charge value */
|
456
|
+
ctab->MolAtom[i].cRadical = 0;
|
457
|
+
}
|
458
|
+
#ifdef INCHI_MAIN
|
459
|
+
if ( ctab->MolAtom[i].cMassDifference ) { /* e_ReadMOL.c specific */
|
460
|
+
ctab->MolAtom[i].cMassDifference += ISOTOPIC_SHIFT_FLAG;
|
461
|
+
}
|
462
|
+
#endif
|
463
|
+
|
464
|
+
if (
|
465
|
+
#if ( MOL_CPSS == MOL_PRESENT )
|
466
|
+
0 > mol_read_datum( &ctab->MolAtom[i].cH0_designator, 3, MOL_CHAR_INT_DATA, &p )
|
467
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentType, 3, MOL_CHAR_INT_DATA, &p )
|
468
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentNumber, 3, MOL_CHAR_INT_DATA, &p )
|
469
|
+
#else
|
470
|
+
0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
471
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
472
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
473
|
+
#endif
|
474
|
+
#if ( MOL_REACT == MOL_PRESENT )
|
475
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].nAtomAtomMappingNumber, 3, MOL_SHORT_INT_DATA, &p )
|
476
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentType, 3, MOL_CHAR_INT_DATA, &p )
|
477
|
+
#else
|
478
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
479
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
480
|
+
#endif
|
481
|
+
#if ( MOL_REACT == MOL_PRESENT || MOL_QUERY == MOL_PRESENT )
|
482
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cExactChargeFlag, 3, MOL_CHAR_INT_DATA, &p )
|
483
|
+
#else
|
484
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
485
|
+
#endif
|
486
|
+
){
|
487
|
+
err = 5; /* can't interpret a second half of atom block line */
|
488
|
+
MOLFILE_ERR_SET (err, 5, "Cannot interpret atom block line:");
|
489
|
+
RemoveNonPrintable( line );
|
490
|
+
AddMOLfileError(pStrErr, line);
|
491
|
+
if ( !strcmp( line, SDF_END_OF_DATA ) ) {
|
492
|
+
err = -abs(err);
|
493
|
+
break;
|
494
|
+
}
|
495
|
+
continue;
|
496
|
+
}
|
497
|
+
}
|
498
|
+
}
|
499
|
+
/* err_fin: */
|
500
|
+
return err;
|
501
|
+
}
|
502
|
+
/*************************************************************************/
|
503
|
+
static int read_bonds_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
|
504
|
+
{
|
505
|
+
char *p;
|
506
|
+
char line[MOLFILEINPLINELEN];
|
507
|
+
const int line_len = sizeof(line);
|
508
|
+
S_SHORT i;
|
509
|
+
/*
|
510
|
+
if ( NULL == ctab->MolBond ){
|
511
|
+
err = 1;
|
512
|
+
goto err_fin; // internal error: memory has not been allocated for MolBond structure
|
513
|
+
}
|
514
|
+
*/
|
515
|
+
for ( i = 0; i < ctab->nNumberOfBonds; i++ ) {
|
516
|
+
|
517
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
518
|
+
if ( !err ) {
|
519
|
+
MOLFILE_ERR_SET (err, 2, "Cannot read bond block line");
|
520
|
+
}
|
521
|
+
break;
|
522
|
+
}
|
523
|
+
remove_one_lf( line );
|
524
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
525
|
+
err = err? err : 3; /* too long input file line */
|
526
|
+
}
|
527
|
+
if ( err ) {
|
528
|
+
if ( !strcmp( line, SDF_END_OF_DATA ) ) {
|
529
|
+
err = -abs(err);
|
530
|
+
break;
|
531
|
+
}
|
532
|
+
continue;
|
533
|
+
}
|
534
|
+
|
535
|
+
if ( ctab->MolBond ) {
|
536
|
+
if ( 0 > mol_read_datum( &ctab->MolBond[i].nAtomNo1, 3, MOL_SHORT_INT_DATA, &p )
|
537
|
+
|| 0 > mol_read_datum( &ctab->MolBond[i].nAtomNo2, 3, MOL_SHORT_INT_DATA, &p )
|
538
|
+
|| 0 > mol_read_datum( &ctab->MolBond[i].cBondType, 3, MOL_CHAR_INT_DATA, &p )
|
539
|
+
|| 0 > mol_read_datum( &ctab->MolBond[i].cBondStereo, 3, MOL_CHAR_INT_DATA, &p )
|
540
|
+
#if ( MOL_QUERY == MOL_PRESENT )
|
541
|
+
|| 0 > mol_read_datum( &ctab->MolBond[i].cBondTopology, 3, MOL_CHAR_INT_DATA, &p ) /* ring/chain */
|
542
|
+
#else
|
543
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
544
|
+
#endif
|
545
|
+
#if ( MOL_REACT == MOL_PRESENT )
|
546
|
+
|| 0 > mol_read_datum( &ctab->MolBond[i].cReactingCenterStatus, 3, MOL_CHAR_INT_DATA, &p )
|
547
|
+
#else
|
548
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
549
|
+
#endif
|
550
|
+
){
|
551
|
+
if ( !err ) {
|
552
|
+
/* can't interpret bonds block line */
|
553
|
+
MOLFILE_ERR_SET (err, 4, "Cannot interpret bond block line:");
|
554
|
+
RemoveNonPrintable( line );
|
555
|
+
AddMOLfileError(pStrErr, line);
|
556
|
+
}
|
557
|
+
if ( !strcmp( line, SDF_END_OF_DATA ) ) {
|
558
|
+
err = -abs(err);
|
559
|
+
break;
|
560
|
+
}
|
561
|
+
}
|
562
|
+
}
|
563
|
+
}
|
564
|
+
/* err_fin: */
|
565
|
+
return err;
|
566
|
+
}
|
567
|
+
/*************************************************************************/
|
568
|
+
static int read_stext_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
|
569
|
+
{
|
570
|
+
/* just pass by all stext enties without attemp to interpret */
|
571
|
+
char *p;
|
572
|
+
char line[MOLFILEINPLINELEN];
|
573
|
+
const int line_len = sizeof(line);
|
574
|
+
S_SHORT i;
|
575
|
+
|
576
|
+
for ( i = 0; i < 2*ctab->nNumberOfStextEntries; i++ ) {
|
577
|
+
|
578
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
579
|
+
if ( !err ) {
|
580
|
+
MOLFILE_ERR_FIN (err, 2, err_fin, "Cannot read STEXT block line");
|
581
|
+
}
|
582
|
+
break;
|
583
|
+
/* can't read the input file line */
|
584
|
+
}
|
585
|
+
/*
|
586
|
+
remove_one_lf( line );
|
587
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
588
|
+
MOLFILE_ERR_SET (err, 2, "Warning: Too long STEXT block line");
|
589
|
+
// too long input file line
|
590
|
+
}
|
591
|
+
*/
|
592
|
+
}
|
593
|
+
err_fin:
|
594
|
+
return err;
|
595
|
+
}
|
596
|
+
/*************************************************************************/
|
597
|
+
static int read_properties_block( MOL_CTAB* ctab, MOL_HEADER_BLOCK *pHdr, FILE *inp, int err, char *pStrErr )
|
598
|
+
{
|
599
|
+
enum { MULTI_LINE_MODE_NO_MODE, MULTI_LINE_MODE_ISIS_ALIAS };
|
600
|
+
char *p;
|
601
|
+
char line[MOLFILEINPLINELEN];
|
602
|
+
const int line_len = sizeof(line);
|
603
|
+
int nMultiLineMode = MULTI_LINE_MODE_NO_MODE, nAtomNumber=0;
|
604
|
+
S_SHORT i, j;
|
605
|
+
char charM[2];
|
606
|
+
char szBlank[3];
|
607
|
+
char szType[4];
|
608
|
+
S_SHORT skip_lines=0;
|
609
|
+
S_SHORT num_entries;
|
610
|
+
S_SHORT num_atoms = ctab->nNumberOfAtoms;
|
611
|
+
|
612
|
+
int charge_encountered = 0;
|
613
|
+
int radical_encountered = 0;
|
614
|
+
int isotope_encountered = 0;
|
615
|
+
/*
|
616
|
+
if ( NULL == ctab->MolAtom ){
|
617
|
+
err = 1;
|
618
|
+
goto err_fin; internal error: memory has not been allocated for MolAtom structure
|
619
|
+
}
|
620
|
+
*/
|
621
|
+
for ( i = 0; ctab->csCurrentCtabVersion[0]? 1 : (i < ctab->nNumberOfPropertyLines); i++ ) { /* the last line should be M END */
|
622
|
+
/* ctab->csCurrentCtabVersion[0] == 0:
|
623
|
+
exactly ctab->nNumberOfPropertyLines lines including M END */
|
624
|
+
/* ctab->csCurrentCtabVersion[0] != 0:
|
625
|
+
read until M END line was encountered */
|
626
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
627
|
+
if ( !err ) {
|
628
|
+
MOLFILE_ERR_SET (err, 2, "Cannot read properties block line");
|
629
|
+
}
|
630
|
+
goto err_fin;
|
631
|
+
}
|
632
|
+
remove_one_lf( line );
|
633
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
634
|
+
MOLFILE_ERR_SET (err, 3, "Too long properties block line");
|
635
|
+
continue;
|
636
|
+
}
|
637
|
+
if ( skip_lines > 0 ) {
|
638
|
+
skip_lines --;
|
639
|
+
continue;
|
640
|
+
}
|
641
|
+
/* alias. */
|
642
|
+
if ( nMultiLineMode == MULTI_LINE_MODE_ISIS_ALIAS && nAtomNumber ) {
|
643
|
+
int len;
|
644
|
+
nMultiLineMode = MULTI_LINE_MODE_NO_MODE;
|
645
|
+
if ( 0 >= (len=normalize_name( p )) ) {
|
646
|
+
nAtomNumber = 0;
|
647
|
+
continue;
|
648
|
+
}
|
649
|
+
if( 0 < len && len < (int)(sizeof(ctab->MolAtom->szAtomSymbol)) ) {
|
650
|
+
int nCharge, nRad;
|
651
|
+
MOL_ATOM* MolAtom = ctab->MolAtom + nAtomNumber-1;
|
652
|
+
/* ctab->MolAtom[nAtomNumber-1].cAtomAliasedFlag = 1; */
|
653
|
+
/* extract radicals & charges */
|
654
|
+
extract_ChargeRadical( p, &nRad, &nCharge );
|
655
|
+
/* Aliased atom cannot have charge, radical & mass difference */
|
656
|
+
/* in the atom table or "M CHG", "M RAD", "M ISO" */
|
657
|
+
/* if ( nCharge ) */
|
658
|
+
MolAtom->cCharge = (S_CHAR)nCharge;
|
659
|
+
/* if ( nRad ) */
|
660
|
+
MolAtom->cRadical = (char)nRad;
|
661
|
+
|
662
|
+
if ( 1 == len && 'D' == p[0] ) {
|
663
|
+
/* H isotope */
|
664
|
+
p[0] = 'H';
|
665
|
+
#ifdef INCHI_MAIN
|
666
|
+
MolAtom->cMassDifference=(1 + ISOTOPIC_SHIFT_FLAG);
|
667
|
+
#else
|
668
|
+
MolAtom->cMassDifference=1;
|
669
|
+
#endif
|
670
|
+
} else
|
671
|
+
if ( 1 == len && 'T' == p[0] ) {
|
672
|
+
/* H isotope */
|
673
|
+
p[0] = 'H';
|
674
|
+
#ifdef INCHI_MAIN
|
675
|
+
MolAtom->cMassDifference=(2 + ISOTOPIC_SHIFT_FLAG);
|
676
|
+
#else
|
677
|
+
MolAtom->cMassDifference=2;
|
678
|
+
#endif
|
679
|
+
} else
|
680
|
+
MolAtom->cMassDifference=0;
|
681
|
+
if ( strlen(p) < sizeof(ctab->MolAtom[0].szAtomSymbol) ) {
|
682
|
+
strcpy(MolAtom->szAtomSymbol, p);
|
683
|
+
} else {
|
684
|
+
strcpy(MolAtom->szAtomSymbol, "???");
|
685
|
+
}
|
686
|
+
MolAtom->cAtomAliasedFlag ++;
|
687
|
+
}
|
688
|
+
skip_lines = 0;
|
689
|
+
nAtomNumber = 0;
|
690
|
+
continue;
|
691
|
+
}
|
692
|
+
|
693
|
+
if ( 1 != mol_read_datum( charM, sizeof(charM) - 1, MOL_STRING_DATA, &p )
|
694
|
+
|| 0 != mol_read_datum( szBlank, sizeof(szBlank) - 1, MOL_STRING_DATA, &p ) /* must contain 0 bytes */
|
695
|
+
|| 0 >= mol_read_datum( szType, sizeof(szType) - 1, MOL_STRING_DATA, &p ) /* must contain 3 bytes */
|
696
|
+
) {
|
697
|
+
if ( !strcmp( line, SDF_END_OF_DATA ) ) {
|
698
|
+
err = err? -abs(err): -4;
|
699
|
+
break;
|
700
|
+
}
|
701
|
+
continue; /* ignore because cannot recognize */
|
702
|
+
}
|
703
|
+
if ( charM[0] == 'V' ){
|
704
|
+
skip_lines = 0; /* ISIS/Desktop Atom Value: one-line property */
|
705
|
+
continue;
|
706
|
+
}
|
707
|
+
if ( charM[0] == 'G' ){
|
708
|
+
skip_lines = 1; /* ISIS/Desktop Group abbreviation: two-line property */
|
709
|
+
continue;
|
710
|
+
}
|
711
|
+
if ( charM[0] == 'A' ) {
|
712
|
+
if ( NULL != ctab->MolAtom &&
|
713
|
+
0 < ( nAtomNumber = (int)strtol(szType, NULL, 10) ) &&
|
714
|
+
nAtomNumber <= ctab->nNumberOfAtoms ){
|
715
|
+
/* Atom Alias [ISIS/Desktop] two-line property */
|
716
|
+
nMultiLineMode = MULTI_LINE_MODE_ISIS_ALIAS;
|
717
|
+
continue;
|
718
|
+
} else {
|
719
|
+
nAtomNumber = 0;
|
720
|
+
skip_lines = 1;
|
721
|
+
continue;
|
722
|
+
}
|
723
|
+
}
|
724
|
+
if ( charM[0] == 'S' && !strcmp( szType, "SKP" ) ){ /* skip lines */
|
725
|
+
if ( 0 >= mol_read_datum( &skip_lines, 3, MOL_SHORT_INT_DATA, &p ) ) {
|
726
|
+
skip_lines = 0;
|
727
|
+
}
|
728
|
+
continue;
|
729
|
+
}
|
730
|
+
if ( charM[0] != 'M' ) {/* cannot recognize a line */
|
731
|
+
continue;
|
732
|
+
}
|
733
|
+
if ( !strcmp( szType, "REG" ) ) {
|
734
|
+
int len;
|
735
|
+
p = p + strspn( p, " " );
|
736
|
+
len = strcspn( p, " " );
|
737
|
+
len = inchi_min( len, MOL_MAX_VALUE_LEN );
|
738
|
+
mol_read_datum( &pHdr->lInternalRegistryNumber, len, MOL_LONG_INT_DATA, &p );
|
739
|
+
continue;
|
740
|
+
}
|
741
|
+
|
742
|
+
if ( !strcmp( szType, "END" ) ){
|
743
|
+
if ( ctab->csCurrentCtabVersion[0] )
|
744
|
+
break; /* end of property lines */
|
745
|
+
continue;
|
746
|
+
}
|
747
|
+
|
748
|
+
if ( NULL == ctab->MolAtom )
|
749
|
+
continue; /* ignore because the user requested to bypass all this stuff */
|
750
|
+
|
751
|
+
/*----------------------------------- charge: Generic */
|
752
|
+
if ( !strcmp( szType, "CHG" ) &&
|
753
|
+
0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
|
754
|
+
1 <= num_entries && num_entries <= 8 ) {
|
755
|
+
S_SHORT atoms[8];
|
756
|
+
S_SHORT charges[8];
|
757
|
+
if ( !charge_encountered && !radical_encountered ) {
|
758
|
+
/* first charge or radical record clears all Atom Block */
|
759
|
+
/* entered charge and radical data to zeroes */
|
760
|
+
charge_encountered = -1;
|
761
|
+
}
|
762
|
+
for ( j = 0; j < num_entries; j++ ) {
|
763
|
+
if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
|
764
|
+
0 > mol_read_datum( &charges[j], 0, MOL_SHORT_INT_DATA, &p ) ||
|
765
|
+
atoms[j] <= 0 || atoms[j] > num_atoms ||
|
766
|
+
charges[j] < -15 || charges[j] > 15 ) {
|
767
|
+
goto charge_error;
|
768
|
+
}
|
769
|
+
}
|
770
|
+
if ( charge_encountered == -1 ) {
|
771
|
+
for ( j = 0; j < num_atoms; j++ ) {
|
772
|
+
if ( !ctab->MolAtom[j].cAtomAliasedFlag ) /* do not clear aliased atoms.*/
|
773
|
+
ctab->MolAtom[j].cCharge = ctab->MolAtom[j].cRadical = '\0';
|
774
|
+
}
|
775
|
+
charge_encountered = 1;
|
776
|
+
}
|
777
|
+
for ( j = 0; j < num_entries; j++ ) {
|
778
|
+
if ( !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag ) /* do not change aliased atoms.*/
|
779
|
+
ctab->MolAtom[atoms[j]-1].cCharge = (S_CHAR)charges[j];
|
780
|
+
}
|
781
|
+
continue;
|
782
|
+
charge_error:
|
783
|
+
MOLFILE_ERR_SET (err, 0, "Charge not recognized:");
|
784
|
+
RemoveNonPrintable( line );
|
785
|
+
AddMOLfileError(pStrErr, line);
|
786
|
+
continue; /* ignore for now */
|
787
|
+
}
|
788
|
+
/*-------------------------------------- radical: Generic */
|
789
|
+
if ( !strcmp( szType, "RAD" ) &&
|
790
|
+
0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
|
791
|
+
1 <= num_entries && num_entries <= 8 ) {
|
792
|
+
S_SHORT atoms[8];
|
793
|
+
S_SHORT radicals[8];
|
794
|
+
if ( !charge_encountered && !radical_encountered ) {
|
795
|
+
/* first charge or radical record clears all Atom Block */
|
796
|
+
/* entered charge and radical data to zeroes */
|
797
|
+
radical_encountered = -1;
|
798
|
+
}
|
799
|
+
for ( j = 0; j < num_entries; j++ ) {
|
800
|
+
if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
|
801
|
+
0 > mol_read_datum( &radicals[j], 0, MOL_SHORT_INT_DATA, &p ) ||
|
802
|
+
atoms[j] <= 0 || atoms[j] > num_atoms ||
|
803
|
+
radicals[j] < 0 || radicals[j] > 3 ) {
|
804
|
+
goto radical_error;
|
805
|
+
}
|
806
|
+
}
|
807
|
+
if ( radical_encountered == -1 ) {
|
808
|
+
for ( j = 0; j < num_atoms; j++ ) {
|
809
|
+
if ( !ctab->MolAtom[j].cAtomAliasedFlag ) /* do not clear aliased atoms. 5-3-99 DCh */
|
810
|
+
ctab->MolAtom[j].cCharge = ctab->MolAtom[j].cRadical = '\0';
|
811
|
+
}
|
812
|
+
radical_encountered = 1;
|
813
|
+
}
|
814
|
+
for ( j = 0; j < num_entries; j++ ) {
|
815
|
+
if ( !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag ) { /* do not change aliased atoms. 5-3-99 DCh */
|
816
|
+
ctab->MolAtom[atoms[j]-1].cRadical = (S_CHAR)radicals[j];
|
817
|
+
}
|
818
|
+
}
|
819
|
+
continue;
|
820
|
+
radical_error:
|
821
|
+
MOLFILE_ERR_SET (err, 0, "Radical not recognized:");
|
822
|
+
RemoveNonPrintable( line );
|
823
|
+
AddMOLfileError(pStrErr, line);
|
824
|
+
continue; /* ignore error for now */
|
825
|
+
}
|
826
|
+
/*-------------------------------------- isotope: Generic */
|
827
|
+
if ( !strcmp( szType, "ISO" ) &&
|
828
|
+
0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
|
829
|
+
1 <= num_entries && num_entries <= 8 ) {
|
830
|
+
S_SHORT atoms[8];
|
831
|
+
S_SHORT iso_mass[8]; /* contains istotope mass number, not difference. 7-14-00 DCh. */
|
832
|
+
if ( !isotope_encountered ) {
|
833
|
+
/* first charge or radical record clears all Atom Block */
|
834
|
+
/* entered charge and radical data to zeroes */
|
835
|
+
isotope_encountered = -1;
|
836
|
+
}
|
837
|
+
for ( j = 0; j < num_entries; j++ ) {
|
838
|
+
if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
|
839
|
+
0 > mol_read_datum( &iso_mass[j], 0, MOL_SHORT_INT_DATA, &p ) ||
|
840
|
+
atoms[j] <= 0 || atoms[j] > num_atoms
|
841
|
+
/*|| iso_mass[j] < -18 || iso_mass[j] > 12*/ ) {
|
842
|
+
/* goto isotope_error; */
|
843
|
+
atoms[j] = -1; /* flag error */
|
844
|
+
MOLFILE_ERR_SET (err, 0, "Isotopic data not recognized:");
|
845
|
+
RemoveNonPrintable( line );
|
846
|
+
AddMOLfileError(pStrErr, line);
|
847
|
+
continue; /* ignore isotopic error for now */
|
848
|
+
}
|
849
|
+
}
|
850
|
+
if ( isotope_encountered == -1 ) {
|
851
|
+
for ( j = 0; j < num_atoms; j++ ) {
|
852
|
+
/*if ( !ctab->MolAtom[j].cAtomAliasedFlag )*/ /* clear even aliased atoms */
|
853
|
+
ctab->MolAtom[j].cMassDifference = 0;
|
854
|
+
}
|
855
|
+
isotope_encountered = 1;
|
856
|
+
}
|
857
|
+
for ( j = 0; j < num_entries; j++ ) {
|
858
|
+
if ( atoms[j] <= 0 )
|
859
|
+
continue; /* ignore isotopic error for now */
|
860
|
+
if ( 1 /* !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag */) {
|
861
|
+
char *at = ctab->MolAtom[atoms[j]-1].szAtomSymbol;
|
862
|
+
if ( at[1] || at[0] != 'D' && at[0] != 'T' ) { /* D & T cannot have ISO */
|
863
|
+
/* need atomic weight to calculate isotope difference. 7-14-00 DCh. */
|
864
|
+
#ifdef INCHI_MAIN
|
865
|
+
ctab->MolAtom[atoms[j]-1].cMassDifference = iso_mass[j]; /* mass, not difference */
|
866
|
+
#else
|
867
|
+
int atw, atw_diff;
|
868
|
+
if ( (atw = get_atw( at )) && abs( atw_diff = (int)iso_mass[j] - atw ) < 20 ) {
|
869
|
+
ctab->MolAtom[atoms[j]-1].cMassDifference = (char)(atw_diff? atw_diff : ZERO_ATW_DIFF);
|
870
|
+
}
|
871
|
+
#endif
|
872
|
+
}
|
873
|
+
}
|
874
|
+
}
|
875
|
+
continue;
|
876
|
+
}
|
877
|
+
}
|
878
|
+
err_fin:
|
879
|
+
return err;
|
880
|
+
}
|
881
|
+
/*************************************************************************/
|
882
|
+
MOL_DATA* delete_mol_data( MOL_DATA* mol_data )
|
883
|
+
{
|
884
|
+
if ( mol_data ) {
|
885
|
+
if ( mol_data->ctab.MolAtom )
|
886
|
+
inchi_free( mol_data->ctab.MolAtom );
|
887
|
+
if ( mol_data->ctab.MolBond )
|
888
|
+
inchi_free( mol_data->ctab.MolBond );
|
889
|
+
if ( mol_data->ctab.szCoord )
|
890
|
+
inchi_free( mol_data->ctab.szCoord );
|
891
|
+
inchi_free( mol_data );
|
892
|
+
mol_data = NULL;
|
893
|
+
}
|
894
|
+
return mol_data;
|
895
|
+
}
|
896
|
+
/*************************************************************************/
|
897
|
+
/* Comletely ingnore STEXT block, queries, and 3D features
|
898
|
+
*/
|
899
|
+
MOL_DATA* read_mol_file( FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
|
900
|
+
int bGetOrigCoord, int *err, char *pStrErr )
|
901
|
+
{
|
902
|
+
MOL_DATA* mol_data = NULL;
|
903
|
+
int ret = 0, prev_ret, bEndOfData = 0;
|
904
|
+
int bReadAll = ( OnlyHeaderBlock == NULL );
|
905
|
+
MOL_CTAB ctab, *pCtab = NULL;
|
906
|
+
MOL_HEADER_BLOCK *pHdr = NULL;
|
907
|
+
|
908
|
+
*err = 0;
|
909
|
+
if ( bReadAll ) {
|
910
|
+
if ( NULL == ( mol_data = ( MOL_DATA* )inchi_calloc( 1, sizeof(MOL_DATA) ) ) ){
|
911
|
+
ret = 1; /* can't allocate mol_data structure */
|
912
|
+
AddMOLfileError( pStrErr, "Out of RAM" );
|
913
|
+
goto err_fin;
|
914
|
+
}
|
915
|
+
pHdr = &mol_data->hdr;
|
916
|
+
pCtab = &mol_data->ctab;
|
917
|
+
} else {
|
918
|
+
pHdr = OnlyHeaderBlock;
|
919
|
+
pCtab = OnlyCtab? OnlyCtab : &ctab;
|
920
|
+
memset( pHdr, 0, sizeof( MOL_HEADER_BLOCK ) );
|
921
|
+
memset( pCtab, 0, sizeof( MOL_CTAB ) );
|
922
|
+
}
|
923
|
+
pCtab->MolBond = NULL;
|
924
|
+
pCtab->MolAtom = NULL;
|
925
|
+
pCtab->szCoord = NULL;
|
926
|
+
|
927
|
+
if ( 0 != ( ret = mol_read_hdr(pHdr, inp, pStrErr) ) ){
|
928
|
+
ret += 10;
|
929
|
+
goto err_fin; /* most probably end of file */
|
930
|
+
}
|
931
|
+
if ( 0 != ( ret = mol_read_counts_line( pCtab , inp, pStrErr) ) ){
|
932
|
+
ret += 20;
|
933
|
+
goto err_fin;
|
934
|
+
}
|
935
|
+
|
936
|
+
if ( bReadAll ) {
|
937
|
+
if ( NULL == ( mol_data->ctab.MolAtom = (MOL_ATOM*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfAtoms,1), sizeof(MOL_ATOM)) ) ){
|
938
|
+
ret = 2; /* can't allocate MolAtom structure */
|
939
|
+
MOLFILE_ERR_FIN (ret, 2, err_fin, "Out of RAM");
|
940
|
+
}
|
941
|
+
if ( bGetOrigCoord &&
|
942
|
+
NULL == ( mol_data->ctab.szCoord = (MOL_COORD*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfAtoms,1), sizeof(MOL_COORD)) ) ){
|
943
|
+
ret = 2; /* can't allocate MolAtom structure */
|
944
|
+
MOLFILE_ERR_FIN (ret, 2, err_fin, "Out of RAM");
|
945
|
+
}
|
946
|
+
}
|
947
|
+
if ( 0 != ( ret = read_atom_block(pCtab, inp, ret, pStrErr) ) ){
|
948
|
+
if ( ret < 0 ) {
|
949
|
+
ret = -ret;
|
950
|
+
bEndOfData = 1;
|
951
|
+
}
|
952
|
+
ret += 30;
|
953
|
+
/* goto err_fin; */
|
954
|
+
}
|
955
|
+
|
956
|
+
if ( bReadAll && ret < 30 ) {
|
957
|
+
if ( !bEndOfData && NULL == ( mol_data->ctab.MolBond = (MOL_BONDS*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfBonds,1), sizeof(MOL_BONDS)) ) ){
|
958
|
+
ret = 3; /* can't allocate MolBond structure */
|
959
|
+
MOLFILE_ERR_FIN (ret, 3, err_fin, "Out of RAM");
|
960
|
+
}
|
961
|
+
}
|
962
|
+
prev_ret = ret;
|
963
|
+
if ( !bEndOfData && 0 != ( ret = read_bonds_block(pCtab, inp, ret, pStrErr) ) ){
|
964
|
+
if ( ret < 0 ) {
|
965
|
+
ret = -ret;
|
966
|
+
bEndOfData = 1;
|
967
|
+
}
|
968
|
+
ret = prev_ret? prev_ret : ret + 40;
|
969
|
+
}
|
970
|
+
prev_ret = ret;
|
971
|
+
if ( !bEndOfData && 0 != ( ret = read_stext_block(pCtab, inp, ret, pStrErr) ) ){
|
972
|
+
ret = prev_ret? prev_ret : ret + 50;
|
973
|
+
}
|
974
|
+
prev_ret = ret;
|
975
|
+
if ( !bEndOfData && 0 != ( ret = read_properties_block(pCtab, pHdr, inp, ret, pStrErr) ) ){
|
976
|
+
if ( ret < 0 ) {
|
977
|
+
ret = -ret;
|
978
|
+
bEndOfData = 1;
|
979
|
+
}
|
980
|
+
ret = prev_ret? prev_ret : ret + 60;
|
981
|
+
}
|
982
|
+
|
983
|
+
err_fin:
|
984
|
+
*err = bEndOfData? -ret : ret;
|
985
|
+
if ( bReadAll ) {
|
986
|
+
if ( ret )
|
987
|
+
mol_data = delete_mol_data( mol_data ); /* delete all results */
|
988
|
+
return mol_data;
|
989
|
+
} else {
|
990
|
+
if ( ret )
|
991
|
+
return NULL;
|
992
|
+
else
|
993
|
+
return (MOL_DATA*)OnlyHeaderBlock;
|
994
|
+
}
|
995
|
+
}
|
996
|
+
|
997
|
+
/******************************************************************/
|
998
|
+
char sdf_data_hdr_name[] = "NAME";
|
999
|
+
char sdf_data_hdr_comm[] = "COMMENT";
|
1000
|
+
enum { SDF_START, SDF_DATA_HEADER, SDF_DATA_HEADER_NAME
|
1001
|
+
, SDF_DATA_HEADER_COMMENT, SDF_DATA_HEADER_CAS
|
1002
|
+
, SDF_DATA_HEADER_USER, SDF_DATA_LINE
|
1003
|
+
, SDF_END_OF_DATA_ITEM, SDF_EMPTY_LINE, SDF_END_OF_DATA_BLOCK };
|
1004
|
+
/******************************************************************/
|
1005
|
+
static long extract_cas_rn( char *line )
|
1006
|
+
{
|
1007
|
+
int i, j;
|
1008
|
+
i = line[0] == '-'? 1 : 0;
|
1009
|
+
for ( j = i; line[i]; i ++ ) {
|
1010
|
+
if ( isdigit( UCINT line[i] ) ) {
|
1011
|
+
line[j++] = line[i];
|
1012
|
+
} else
|
1013
|
+
if ( line[i] != '-' ) {
|
1014
|
+
break;
|
1015
|
+
}
|
1016
|
+
}
|
1017
|
+
line[j] = '\0';
|
1018
|
+
return strtol( line, NULL, 10 );
|
1019
|
+
}
|
1020
|
+
/******************************************************************/
|
1021
|
+
static int identify_sdf_label( char* inp_line, const char *pSdfLabel )
|
1022
|
+
{
|
1023
|
+
char line[MOLFILEMAXLINELEN];
|
1024
|
+
char *p, *q;
|
1025
|
+
int i, j, len;
|
1026
|
+
if ( (p = strchr( inp_line, '<' )) &&
|
1027
|
+
(q = strchr( p, '>' )) &&
|
1028
|
+
(len = q-p-1) > 0 && len < (int)sizeof(line) ) {
|
1029
|
+
memcpy( line, p+1, len );
|
1030
|
+
line[len] = '\0';
|
1031
|
+
for ( i = 0; isspace( UCINT line[i] ); i ++ )
|
1032
|
+
;
|
1033
|
+
for ( j = len-1; j >= i && isspace( UCINT line[i] ); j -- )
|
1034
|
+
;
|
1035
|
+
len = j-i+1;
|
1036
|
+
p = line+i;
|
1037
|
+
if ( pSdfLabel && pSdfLabel[0] && len == (int)strlen(pSdfLabel) && !memicmp( p, pSdfLabel, len ) )
|
1038
|
+
return SDF_DATA_HEADER_USER;
|
1039
|
+
if ( len == sizeof(sdf_data_hdr_name)-1 && !memicmp( p, sdf_data_hdr_name, len ) )
|
1040
|
+
return SDF_DATA_HEADER_NAME;
|
1041
|
+
if ( len == sizeof(sdf_data_hdr_comm)-1 && !memicmp( p, sdf_data_hdr_comm, len ) )
|
1042
|
+
return SDF_DATA_HEADER_COMMENT;
|
1043
|
+
if ( !memicmp( p, "CAS", 3 ) )
|
1044
|
+
return SDF_DATA_HEADER_CAS;
|
1045
|
+
}
|
1046
|
+
return SDF_DATA_HEADER;
|
1047
|
+
}
|
1048
|
+
/******************************************************************/
|
1049
|
+
int bypass_sdf_data_items( FILE* inp, long *cas_reg_no, char* comment,
|
1050
|
+
int lcomment, char *name, int lname, int prev_err,
|
1051
|
+
const char *pSdfLabel, char *pSdfValue, char *pStrErr )
|
1052
|
+
{
|
1053
|
+
char line[MOLFILEINPLINELEN];
|
1054
|
+
const int line_len = sizeof(line);
|
1055
|
+
int err = 0;
|
1056
|
+
int current_state = SDF_START;
|
1057
|
+
int n_blank_lines = 0;
|
1058
|
+
int n_lines = 0;
|
1059
|
+
char* p = NULL;
|
1060
|
+
int bNeedsName = name && lname > 0 && !name[0];
|
1061
|
+
int bNeedsComm = comment && lcomment > 0 && !comment[0];
|
1062
|
+
int bNeedsUser = pSdfLabel && pSdfLabel[0] && pSdfValue;
|
1063
|
+
int bNeedsCASrn = 0;
|
1064
|
+
int bCASrnIsUser = 0;
|
1065
|
+
|
1066
|
+
if ( cas_reg_no != NULL ) {
|
1067
|
+
bNeedsCASrn = 1;
|
1068
|
+
*cas_reg_no = 0;
|
1069
|
+
bCASrnIsUser = (bNeedsUser && !memicmp(pSdfLabel,"CAS", 3));
|
1070
|
+
}
|
1071
|
+
|
1072
|
+
while ( err == 0 &&
|
1073
|
+
current_state !=SDF_END_OF_DATA_BLOCK &&
|
1074
|
+
NULL != ( p = fgets_up_to_lf( line, line_len, inp ) ) ) {
|
1075
|
+
|
1076
|
+
if ( !n_lines && !memcmp(line, "M END", 6) ) {
|
1077
|
+
continue; /* allow subtle errors */
|
1078
|
+
}
|
1079
|
+
n_lines++;
|
1080
|
+
|
1081
|
+
remove_trailing_spaces( line );
|
1082
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
1083
|
+
if ( current_state != SDF_DATA_HEADER &&
|
1084
|
+
current_state != SDF_DATA_LINE &&
|
1085
|
+
current_state != SDF_DATA_HEADER_NAME &&
|
1086
|
+
current_state != SDF_DATA_HEADER_USER &&
|
1087
|
+
current_state != SDF_DATA_HEADER_COMMENT ) {
|
1088
|
+
line[MOLFILEMAXLINELEN] = '\0';
|
1089
|
+
if ( !prev_err ) {
|
1090
|
+
MOLFILE_ERR_SET (err, 0, "Too long SData line truncated");
|
1091
|
+
}
|
1092
|
+
} else {
|
1093
|
+
/* allow long lines in SDF data. 9-29-00 DCh */
|
1094
|
+
line[MOLFILEMAXLINELEN] = '\0';
|
1095
|
+
}
|
1096
|
+
}
|
1097
|
+
|
1098
|
+
n_blank_lines += ( *line == '\0' );
|
1099
|
+
|
1100
|
+
switch( current_state ) {
|
1101
|
+
|
1102
|
+
case SDF_START:
|
1103
|
+
case SDF_END_OF_DATA_ITEM:
|
1104
|
+
case SDF_EMPTY_LINE: /* Added 9-25-97 DCh */
|
1105
|
+
|
1106
|
+
if ( 0 == strcmp( line, SDF_END_OF_DATA ) ) {
|
1107
|
+
current_state = SDF_END_OF_DATA_BLOCK;
|
1108
|
+
}
|
1109
|
+
else
|
1110
|
+
if ( '>' == *line ) {
|
1111
|
+
current_state = ( bNeedsName || bNeedsComm || bNeedsCASrn || bNeedsUser )? identify_sdf_label(line, pSdfLabel) : SDF_DATA_HEADER;
|
1112
|
+
}else
|
1113
|
+
if ( *line == '\0' ) { /* Added 9-25-97 DCh */
|
1114
|
+
/* Relax the strictness: Allow more than 1 empty line. */
|
1115
|
+
current_state=SDF_EMPTY_LINE;
|
1116
|
+
} else
|
1117
|
+
if ( !prev_err ) {
|
1118
|
+
MOLFILE_ERR_SET (err, 3, "Unexpected SData header line:");
|
1119
|
+
RemoveNonPrintable( line );
|
1120
|
+
AddMOLfileError(pStrErr, line);
|
1121
|
+
/* unexpected contents of data header line */
|
1122
|
+
} else {
|
1123
|
+
err = 3;
|
1124
|
+
}
|
1125
|
+
break;
|
1126
|
+
|
1127
|
+
case SDF_DATA_HEADER_NAME:
|
1128
|
+
if ( bNeedsName && 0 < normalize_name( line ) ) {
|
1129
|
+
bNeedsName = 0;
|
1130
|
+
mystrncpy( name, line, lname );
|
1131
|
+
}
|
1132
|
+
goto got_data_line;
|
1133
|
+
|
1134
|
+
case SDF_DATA_HEADER_COMMENT:
|
1135
|
+
if ( bNeedsComm && 0 < normalize_name( line ) ) {
|
1136
|
+
bNeedsComm = 0;
|
1137
|
+
mystrncpy( comment, line, lcomment );
|
1138
|
+
}
|
1139
|
+
goto got_data_line;
|
1140
|
+
|
1141
|
+
case SDF_DATA_HEADER_USER:
|
1142
|
+
if ( bNeedsUser && 0 < normalize_name( line ) ) {
|
1143
|
+
bNeedsUser = 0;
|
1144
|
+
mystrncpy( pSdfValue, line, MAX_SDF_VALUE+1 );
|
1145
|
+
if ( bCASrnIsUser && bNeedsCASrn ) {
|
1146
|
+
*cas_reg_no = extract_cas_rn( line );
|
1147
|
+
bNeedsCASrn = (0 == *cas_reg_no);
|
1148
|
+
}
|
1149
|
+
}
|
1150
|
+
goto got_data_line;
|
1151
|
+
|
1152
|
+
case SDF_DATA_HEADER_CAS:
|
1153
|
+
if ( bNeedsCASrn && 0 < normalize_name( line ) ) {
|
1154
|
+
*cas_reg_no = extract_cas_rn( line );
|
1155
|
+
bNeedsCASrn = (0 == *cas_reg_no);
|
1156
|
+
}
|
1157
|
+
goto got_data_line;
|
1158
|
+
|
1159
|
+
case SDF_DATA_HEADER:
|
1160
|
+
case SDF_DATA_LINE:
|
1161
|
+
got_data_line:
|
1162
|
+
current_state = *line? SDF_DATA_LINE : SDF_END_OF_DATA_ITEM;
|
1163
|
+
break;
|
1164
|
+
|
1165
|
+
}
|
1166
|
+
}
|
1167
|
+
if ( 0 == err && SDF_END_OF_DATA_BLOCK != current_state && NULL == p )
|
1168
|
+
; /* err = 4; */ /* unexpected end of file: missing $$$$ */
|
1169
|
+
else
|
1170
|
+
if (err && ( n_blank_lines == n_lines && *line == '\0' ) )
|
1171
|
+
err = 5; /* empty lines -- do not know when this can happen */
|
1172
|
+
|
1173
|
+
if ( err && err != 5 && current_state != SDF_END_OF_DATA_BLOCK && p ) {
|
1174
|
+
/* bypass up to $$$$ */
|
1175
|
+
while ( ( p = fgets_up_to_lf( line, line_len, inp ) ) && memcmp( line, SDF_END_OF_DATA, 4 ) )
|
1176
|
+
;
|
1177
|
+
if ( p ) {
|
1178
|
+
err = 9; /* bypassed to $$$$; non-fatal */
|
1179
|
+
AddMOLfileError(pStrErr, "Bypassing to next structure");
|
1180
|
+
}
|
1181
|
+
|
1182
|
+
}
|
1183
|
+
|
1184
|
+
return err;
|
1185
|
+
}
|
1186
|
+
/******************************************************************/
|
1187
|
+
MOL_DATA* read_sdfile_segment(FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
|
1188
|
+
int bGetOrigCoord,
|
1189
|
+
char *pname, int lname,
|
1190
|
+
long *Id, const char *pSdfLabel, char *pSdfValue,
|
1191
|
+
int *err, char *pStrErr )
|
1192
|
+
{
|
1193
|
+
MOL_DATA* mol_data = read_mol_file( inp, OnlyHeaderBlock, OnlyCtab, bGetOrigCoord, err, pStrErr );
|
1194
|
+
int err_bypass_sdf = 0;
|
1195
|
+
|
1196
|
+
if ( pname && lname ) {
|
1197
|
+
pname[0] = '\0';
|
1198
|
+
}
|
1199
|
+
if ( Id ) {
|
1200
|
+
*Id = 0L; /* ignore for now */
|
1201
|
+
}
|
1202
|
+
/* if ( mol_data && !*err ) { */
|
1203
|
+
if ( *err < 0 ) {
|
1204
|
+
*err = -*err; /* end of data encountered */
|
1205
|
+
} else {
|
1206
|
+
err_bypass_sdf = bypass_sdf_data_items( inp, Id, NULL, 0, pname, lname, *err, pSdfLabel, pSdfValue, pStrErr );
|
1207
|
+
if ( err_bypass_sdf ) {
|
1208
|
+
*err = err_bypass_sdf; /* important to continue to the next good structure */
|
1209
|
+
}
|
1210
|
+
}
|
1211
|
+
/* } */
|
1212
|
+
return mol_data;
|
1213
|
+
}
|
1214
|
+
/****************************************************************************/
|
1215
|
+
int CopyMOLfile(FILE *inp_file, long fPtrStart, long fPtrEnd, FILE *prb_file, long lNumb)
|
1216
|
+
{
|
1217
|
+
char line[MOLFILEINPLINELEN], *p;
|
1218
|
+
long fPtr;
|
1219
|
+
int ret = 1;
|
1220
|
+
char szNumber[32];
|
1221
|
+
|
1222
|
+
if ( inp_file && prb_file && fPtrStart >= 0L &&
|
1223
|
+
fPtrEnd > fPtrStart &&
|
1224
|
+
0 == fseek( inp_file, fPtrStart, SEEK_SET ) ) {
|
1225
|
+
|
1226
|
+
while ( fPtrEnd > (fPtr = ftell(inp_file)) && fPtr >= 0L &&
|
1227
|
+
fgets_up_to_lf( line, sizeof(line)-1, inp_file ) ) {
|
1228
|
+
line[sizeof(line)-1] = '\0'; /* unnecessary extra precaution */
|
1229
|
+
if ( fPtr == fPtrStart && lNumb ) {
|
1230
|
+
int len;
|
1231
|
+
LtrimRtrim( line, &len );
|
1232
|
+
len = sprintf( szNumber, "#%ld%s", lNumb, len?"/":"" );
|
1233
|
+
mystrncpy( line+len, line, sizeof(line)-len-1 );
|
1234
|
+
memcpy( line, szNumber, len );
|
1235
|
+
}
|
1236
|
+
if ( !strchr(line, '\n') ) {
|
1237
|
+
p = line+strlen(line);
|
1238
|
+
p[0] = '\n';
|
1239
|
+
p[1] = '\0';
|
1240
|
+
}
|
1241
|
+
fputs( line, prb_file );
|
1242
|
+
}
|
1243
|
+
ret = fseek( inp_file, fPtrEnd, SEEK_SET );
|
1244
|
+
}
|
1245
|
+
return ret;
|
1246
|
+
}
|