rino 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +44 -0
- data/Rakefile +123 -0
- data/ext/extconf.rb +26 -0
- data/ext/ruby_inchi_main.so +0 -0
- data/ext/src/aux2atom.h +2786 -0
- data/ext/src/comdef.h +148 -0
- data/ext/src/e_0dstereo.c +3014 -0
- data/ext/src/e_0dstereo.h +31 -0
- data/ext/src/e_comdef.h +57 -0
- data/ext/src/e_ctl_data.h +147 -0
- data/ext/src/e_ichi_io.c +498 -0
- data/ext/src/e_ichi_io.h +40 -0
- data/ext/src/e_ichi_parms.c +37 -0
- data/ext/src/e_ichi_parms.h +41 -0
- data/ext/src/e_ichicomp.h +50 -0
- data/ext/src/e_ichierr.h +40 -0
- data/ext/src/e_ichimain.c +593 -0
- data/ext/src/e_ichisize.h +43 -0
- data/ext/src/e_inchi_atom.c +75 -0
- data/ext/src/e_inchi_atom.h +33 -0
- data/ext/src/e_inpdef.h +41 -0
- data/ext/src/e_mode.h +706 -0
- data/ext/src/e_mol2atom.c +649 -0
- data/ext/src/e_readinch.c +58 -0
- data/ext/src/e_readmol.c +54 -0
- data/ext/src/e_readmol.h +180 -0
- data/ext/src/e_readstru.c +251 -0
- data/ext/src/e_readstru.h +33 -0
- data/ext/src/e_util.c +284 -0
- data/ext/src/e_util.h +61 -0
- data/ext/src/extr_ct.h +251 -0
- data/ext/src/ichi.h +206 -0
- data/ext/src/ichi_bns.c +7999 -0
- data/ext/src/ichi_bns.h +231 -0
- data/ext/src/ichican2.c +5000 -0
- data/ext/src/ichicano.c +2195 -0
- data/ext/src/ichicano.h +49 -0
- data/ext/src/ichicans.c +1625 -0
- data/ext/src/ichicant.h +379 -0
- data/ext/src/ichicomn.h +260 -0
- data/ext/src/ichicomp.h +50 -0
- data/ext/src/ichidrp.h +119 -0
- data/ext/src/ichierr.h +124 -0
- data/ext/src/ichiisot.c +101 -0
- data/ext/src/ichilnct.c +286 -0
- data/ext/src/ichimain.h +132 -0
- data/ext/src/ichimak2.c +1189 -0
- data/ext/src/ichimake.c +3812 -0
- data/ext/src/ichimake.h +205 -0
- data/ext/src/ichimap1.c +851 -0
- data/ext/src/ichimap2.c +2856 -0
- data/ext/src/ichimap4.c +1609 -0
- data/ext/src/ichinorm.c +741 -0
- data/ext/src/ichinorm.h +67 -0
- data/ext/src/ichiparm.c +45 -0
- data/ext/src/ichiparm.h +1441 -0
- data/ext/src/ichiprt1.c +3612 -0
- data/ext/src/ichiprt2.c +1511 -0
- data/ext/src/ichiprt3.c +3011 -0
- data/ext/src/ichiqueu.c +1003 -0
- data/ext/src/ichiring.c +326 -0
- data/ext/src/ichiring.h +49 -0
- data/ext/src/ichisize.h +35 -0
- data/ext/src/ichisort.c +539 -0
- data/ext/src/ichister.c +3538 -0
- data/ext/src/ichister.h +35 -0
- data/ext/src/ichitaut.c +3843 -0
- data/ext/src/ichitaut.h +387 -0
- data/ext/src/ichitime.h +74 -0
- data/ext/src/inchi_api.h +670 -0
- data/ext/src/inchi_dll.c +1480 -0
- data/ext/src/inchi_dll.h +34 -0
- data/ext/src/inchi_dll_main.c +23 -0
- data/ext/src/inchi_dll_main.h +31 -0
- data/ext/src/inpdef.h +328 -0
- data/ext/src/lreadmol.h +1246 -0
- data/ext/src/mode.h +706 -0
- data/ext/src/ruby_inchi_main.c +558 -0
- data/ext/src/runichi.c +4179 -0
- data/ext/src/strutil.c +3861 -0
- data/ext/src/strutil.h +182 -0
- data/ext/src/util.c +1130 -0
- data/ext/src/util.h +85 -0
- data/lib/clean_tempfile.rb +220 -0
- data/lib/rino.rb +111 -0
- data/test/test.rb +386 -0
- metadata +130 -0
data/ext/src/lreadmol.h
ADDED
@@ -0,0 +1,1246 @@
|
|
1
|
+
/*
|
2
|
+
* International Union of Pure and Applied Chemistry (IUPAC)
|
3
|
+
* International Chemical Identifier (InChI)
|
4
|
+
* Version 1
|
5
|
+
* Software version 1.00
|
6
|
+
* April 13, 2005
|
7
|
+
* Developed at NIST
|
8
|
+
*/
|
9
|
+
|
10
|
+
|
11
|
+
/* local prototypes */
|
12
|
+
int bypass_sdf_data_items( FILE* inp, long *cas_reg_no, char* comment, int lcomment, char *name, int lname, int prev_err,
|
13
|
+
const char *pSdfLabel, char *pSdfValue, char *pStrErr );
|
14
|
+
MOL_DATA* read_mol_file( FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
|
15
|
+
int bGetOrigCoord, int *err, char *pStrErr );
|
16
|
+
|
17
|
+
|
18
|
+
static int mol_read_hdr(MOL_HEADER_BLOCK *hdr, FILE* inp, char *pStrErr);
|
19
|
+
static int mol_read_counts_line( MOL_CTAB* ctab, FILE *inp, char *pStrErr );
|
20
|
+
static int read_atom_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
|
21
|
+
static int read_bonds_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
|
22
|
+
static int read_stext_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
|
23
|
+
static int read_properties_block( MOL_CTAB* ctab, MOL_HEADER_BLOCK *pHdr, FILE *inp, int err, char *pStrErr );
|
24
|
+
|
25
|
+
static int identify_sdf_label( char* inp_line, const char *pSdfLabel );
|
26
|
+
static long extract_cas_rn( char *line );
|
27
|
+
int RemoveNonPrintable( char *line );
|
28
|
+
|
29
|
+
|
30
|
+
/******/
|
31
|
+
#ifndef MOLFILE_ERR_FIN
|
32
|
+
#define MOLFILE_ERR_FIN(err, new_err, err_fin, msg) \
|
33
|
+
if ( !(err) && (new_err) ) { (err) = (new_err);} AddMOLfileError(pStrErr, (msg)); goto err_fin
|
34
|
+
#endif
|
35
|
+
#ifndef MOLFILE_ERR_SET
|
36
|
+
#define MOLFILE_ERR_SET(err, new_err, msg) \
|
37
|
+
if ( !(err) && (new_err) ) { (err) = (new_err);} AddMOLfileError(pStrErr, (msg))
|
38
|
+
#endif
|
39
|
+
|
40
|
+
/*************************************************************************/
|
41
|
+
int AddMOLfileError( char *pStrErr, const char *szMsg )
|
42
|
+
{
|
43
|
+
if ( pStrErr && szMsg && szMsg[0] ) {
|
44
|
+
int lenStrErr = strlen( pStrErr );
|
45
|
+
int lenMsg = strlen( szMsg );
|
46
|
+
char *p = strstr( pStrErr, szMsg );
|
47
|
+
if ( p && (p==pStrErr || *(p-1) == ' ' && (*(p-2) == ';' || *(p-2) == ':' )) &&
|
48
|
+
(p+lenMsg == pStrErr+lenStrErr ||
|
49
|
+
p[lenMsg] == ';' && p[lenMsg+1] == ' ' ||
|
50
|
+
p[lenMsg-1]==':' && p[lenMsg]==' ') ) {
|
51
|
+
return 1; /* reject duplicates */
|
52
|
+
}
|
53
|
+
if ( lenStrErr + lenMsg + 2*(lenStrErr > 0) < STR_ERR_LEN ) {
|
54
|
+
/* enough room to add */
|
55
|
+
if (lenStrErr > 0) {
|
56
|
+
if ( pStrErr[lenStrErr-1] != ':' ) {
|
57
|
+
strcat( pStrErr, ";" );
|
58
|
+
}
|
59
|
+
strcat( pStrErr, " " );
|
60
|
+
}
|
61
|
+
strcat( pStrErr, szMsg );
|
62
|
+
return 1;
|
63
|
+
}
|
64
|
+
/* no room */
|
65
|
+
if ( strstr( pStrErr, "..." ) ) {
|
66
|
+
return 0; /* no room mark has already been set */
|
67
|
+
}
|
68
|
+
if ( lenStrErr + 3 < STR_ERR_LEN ) {
|
69
|
+
strcat( pStrErr, "..." );
|
70
|
+
}
|
71
|
+
}
|
72
|
+
return 0;
|
73
|
+
}
|
74
|
+
/*************************************************************************/
|
75
|
+
static int mol_copy_check_empty( char* dest, char* source, int len, char **first_space )
|
76
|
+
{
|
77
|
+
int i, c; /* required len >= 0; dest must have at least len+1 bytes */
|
78
|
+
if ( len > 0 )
|
79
|
+
strncpy( dest, source, len );
|
80
|
+
dest[len]='\0';
|
81
|
+
len = ( len > 0 )? (int)strlen( dest) : 0;
|
82
|
+
for ( i = (len-1); i >= 0 && 0 != (c = source[i]) && isspace(UCINT c); i-- )
|
83
|
+
;
|
84
|
+
*first_space = dest + (i+1); /* first blank or zero terminating byte in dest */
|
85
|
+
return len; /* number of actually processed bytes; zero termination not included */
|
86
|
+
}
|
87
|
+
/*************************************************************************/
|
88
|
+
static int mol_read_datum(void* data, int field_len, int data_type, char** line_ptr)
|
89
|
+
{
|
90
|
+
/* 1. 'field_len' for MOL_STRING_DATA does not include trailing zero,
|
91
|
+
* that is actual length of the string pointed by 'data'
|
92
|
+
* should be at least field_len+1 bytes.
|
93
|
+
* For numerical data 'field_len' is length of input data field
|
94
|
+
* For numerical integral data field_len <= 0 means read up to first
|
95
|
+
* non-numeric character as strtod() does ("free format")
|
96
|
+
* 2. return value: for MOL_STRING_DATA: number of bytes excluding trailing zero
|
97
|
+
* for all others: 1=success; 0 = empty; -1= error
|
98
|
+
* 3. on exit *line_ptr points to the next byte after the last entered
|
99
|
+
*/
|
100
|
+
char *p = *line_ptr, *q, *p_end;
|
101
|
+
int i, ret=1, c, len;
|
102
|
+
long ldata;
|
103
|
+
double ddata;
|
104
|
+
|
105
|
+
switch( data_type ) {
|
106
|
+
case MOL_STRING_DATA:
|
107
|
+
for ( i= 0; i < field_len && 0 != (c = p[i]) && isspace(UCINT c); i++ ) /* pass by all leading spaces */
|
108
|
+
;
|
109
|
+
len = mol_copy_check_empty( (char*)data, &p[i], field_len-i, &q );
|
110
|
+
ret = ( q - (char*)data );/* actual data length */
|
111
|
+
*q = '\0'; /* add zero termination to data if it is not there yet*/
|
112
|
+
*line_ptr += (len+i); /* ptr to the 1st byte of the next input field or to zero termination */
|
113
|
+
break;
|
114
|
+
|
115
|
+
case MOL_CHAR_INT_DATA:
|
116
|
+
case MOL_SHORT_INT_DATA:
|
117
|
+
case MOL_LONG_INT_DATA:
|
118
|
+
{ /* block start */
|
119
|
+
char str[MOL_MAX_VALUE_LEN+1];
|
120
|
+
ldata = 0L;
|
121
|
+
if ( field_len > MOL_MAX_VALUE_LEN ) {
|
122
|
+
ret = -1;
|
123
|
+
}else
|
124
|
+
if ( field_len > 0 ) { /* fixed length */
|
125
|
+
*line_ptr += ( len = mol_copy_check_empty( str, p, field_len, &q ) );
|
126
|
+
*q = '\0';
|
127
|
+
if ( !len || !(q-str) ) { /* empty string */
|
128
|
+
ret = 0;
|
129
|
+
}else
|
130
|
+
if ( (ldata=strtol(str,&p_end,10), p_end != q) ){ /* wrong data: incompletely interpreted */
|
131
|
+
ret = -1;
|
132
|
+
}
|
133
|
+
}else{ /* free format: field_len <= 0 */
|
134
|
+
ldata = strtol( p, &p_end, 10 );
|
135
|
+
*line_ptr += ( len = p_end - p );
|
136
|
+
if ( len == 0 ){
|
137
|
+
ret = 0;
|
138
|
+
}
|
139
|
+
}
|
140
|
+
|
141
|
+
switch( data_type ) {
|
142
|
+
case MOL_CHAR_INT_DATA:
|
143
|
+
if ( SCHAR_MIN <= ldata && ldata <= SCHAR_MAX ){ /* from || to &&: 11-19-96 */
|
144
|
+
*(S_CHAR*)data = (S_CHAR)ldata;
|
145
|
+
}else{
|
146
|
+
*(S_CHAR*)data = (S_CHAR)0;
|
147
|
+
ret = -1;
|
148
|
+
}
|
149
|
+
break;
|
150
|
+
case MOL_SHORT_INT_DATA:
|
151
|
+
if ( SHRT_MIN <= ldata && ldata <= SHRT_MAX ){
|
152
|
+
*(S_SHORT*)data = (S_SHORT)ldata;
|
153
|
+
}else{
|
154
|
+
*(S_SHORT*)data = (S_SHORT)0;
|
155
|
+
ret = -1;
|
156
|
+
}
|
157
|
+
break;
|
158
|
+
case MOL_LONG_INT_DATA:
|
159
|
+
if ( LONG_MIN < ldata && ldata < LONG_MAX ){
|
160
|
+
*(long*)data = (long)ldata;
|
161
|
+
}else{
|
162
|
+
*(long*)data = 0L;
|
163
|
+
ret = -1;
|
164
|
+
}
|
165
|
+
break;
|
166
|
+
default:
|
167
|
+
ret=-1;
|
168
|
+
}
|
169
|
+
|
170
|
+
} /* block end */
|
171
|
+
break;
|
172
|
+
case MOL_DOUBLE_DATA:
|
173
|
+
case MOL_FLOAT_DATA:
|
174
|
+
{ /* block start */
|
175
|
+
char str[MOL_MAX_VALUE_LEN+1];
|
176
|
+
if ( field_len > MOL_MAX_VALUE_LEN ) {
|
177
|
+
ret = -1;
|
178
|
+
ddata = 0.0;
|
179
|
+
}else
|
180
|
+
if ( field_len > 0 ) {
|
181
|
+
*line_ptr += (len = mol_copy_check_empty( str, p, field_len, &q ));
|
182
|
+
*q = '\0';
|
183
|
+
if ( !len || !(q-str) ) { /* empty string */
|
184
|
+
ddata = 0.0;
|
185
|
+
ret = 0;
|
186
|
+
}else
|
187
|
+
if ( (ddata=strtod(str,&p_end), p_end != q) ){ /* wrong data */
|
188
|
+
ret = -1;
|
189
|
+
}
|
190
|
+
}else{ /* free format */
|
191
|
+
ddata = strtod( p, &p_end );
|
192
|
+
*line_ptr += ( len = p_end - p );
|
193
|
+
if ( len == 0 ){
|
194
|
+
ret = 0;
|
195
|
+
}
|
196
|
+
}
|
197
|
+
switch(data_type){
|
198
|
+
case MOL_DOUBLE_DATA:
|
199
|
+
if ( ddata != HUGE_VAL && /*ldata*/ ddata != -HUGE_VAL ){ /* replaced ldata with ddata 6-30-98 DCh */
|
200
|
+
*(double*)data = ddata;
|
201
|
+
}else{
|
202
|
+
*(double*)data = 0.0;
|
203
|
+
ret = -1;
|
204
|
+
}
|
205
|
+
break;
|
206
|
+
case MOL_FLOAT_DATA:
|
207
|
+
if ( fabs(ddata) <= (double)FLT_MIN ) {
|
208
|
+
*(float*)data = 0.0;
|
209
|
+
}else
|
210
|
+
if ( fabs(ddata) >= (double)FLT_MAX ) {
|
211
|
+
*(float*)data = 0.0;
|
212
|
+
ret = -1;
|
213
|
+
}else{
|
214
|
+
*(float*)data = (float)ddata;
|
215
|
+
}
|
216
|
+
break;
|
217
|
+
}
|
218
|
+
} /* block end */
|
219
|
+
break;
|
220
|
+
case MOL_JUMP_TO_RIGHT:
|
221
|
+
for ( i = 0; i < field_len && p[i]; i++ )
|
222
|
+
;
|
223
|
+
*line_ptr += i;
|
224
|
+
ret = i;
|
225
|
+
break;
|
226
|
+
default:
|
227
|
+
ret = -1;
|
228
|
+
}
|
229
|
+
return ret;
|
230
|
+
}
|
231
|
+
/*************************************************************************/
|
232
|
+
static int mol_read_hdr(MOL_HEADER_BLOCK *hdr, FILE* inp, char *pStrErr)
|
233
|
+
{
|
234
|
+
/* All input lines can have are up 80 characters */
|
235
|
+
/* Header Block */
|
236
|
+
char line[MOLFILEINPLINELEN]; /* + cr +lf +zero termination + reserve */
|
237
|
+
int err = 0, len;
|
238
|
+
const int line_len = sizeof(line);
|
239
|
+
char *p;
|
240
|
+
|
241
|
+
/* memset( &hdr, 0, sizeof( MOL_HEADER_BLOCK ) ); */
|
242
|
+
/*------------ header line #1: name ----------------*/
|
243
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
244
|
+
err = 1; /* can't read the input file line */
|
245
|
+
/* AddMOLfileError( pStrErr, "Can't read header block name line" ); */
|
246
|
+
goto err_fin;
|
247
|
+
}
|
248
|
+
remove_one_lf( line );
|
249
|
+
/* -- Disabled to relax strictness: allow > 80 chars names.
|
250
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
251
|
+
err = 2; // too long line
|
252
|
+
goto err_fin;
|
253
|
+
}
|
254
|
+
*/
|
255
|
+
len = mol_read_datum( hdr->szMoleculeName, sizeof(hdr->szMoleculeName)-1, MOL_STRING_DATA, &p );
|
256
|
+
/*----------- header line #2 -----------------------*/
|
257
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
258
|
+
err = 3; /* can't read the input file line */
|
259
|
+
/* AddMOLfileError( pStrErr, "Can't read header block line 2" ); */
|
260
|
+
goto err_fin;
|
261
|
+
}
|
262
|
+
remove_one_lf( line );
|
263
|
+
/* -- Disabled to relax strictness: allow > 80 chars names.
|
264
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
265
|
+
err = 4; // too long input file line
|
266
|
+
goto err_fin;
|
267
|
+
}
|
268
|
+
*/
|
269
|
+
len = mol_read_datum( hdr->szUserInitials, sizeof(hdr->szUserInitials)-1, MOL_STRING_DATA, &p );
|
270
|
+
len = mol_read_datum( hdr->szProgramName, sizeof(hdr->szProgramName)-1, MOL_STRING_DATA, &p );
|
271
|
+
|
272
|
+
/*------------ Relax strictness -----------------------*/
|
273
|
+
len = mol_read_datum( &hdr->cMonth, 2, MOL_CHAR_INT_DATA, &p );
|
274
|
+
len = mol_read_datum( &hdr->cDay, 2, MOL_CHAR_INT_DATA, &p );
|
275
|
+
len = mol_read_datum( &hdr->cYear, 2, MOL_CHAR_INT_DATA, &p );
|
276
|
+
len = mol_read_datum( &hdr->cHour, 2, MOL_CHAR_INT_DATA, &p );
|
277
|
+
len = mol_read_datum( &hdr->cMinute, 2, MOL_CHAR_INT_DATA, &p );
|
278
|
+
len = mol_read_datum( hdr->szDimCode, sizeof(hdr->szDimCode)-1, MOL_STRING_DATA, &p );
|
279
|
+
len = mol_read_datum( &hdr->nScalingFactor1, 2, MOL_SHORT_INT_DATA, &p );
|
280
|
+
len = mol_read_datum( &hdr->dScalingFactor2, 10, MOL_DOUBLE_DATA, &p );
|
281
|
+
len = mol_read_datum( &hdr->dEnergy, 12, MOL_DOUBLE_DATA, &p );
|
282
|
+
len = mol_read_datum( &hdr->lInternalRegistryNumber, 6, MOL_LONG_INT_DATA, &p );
|
283
|
+
|
284
|
+
/* save the whole line 2 */
|
285
|
+
p = line;
|
286
|
+
len = mol_read_datum( hdr->szMoleculeLine2, sizeof(hdr->szMoleculeLine2)-1, MOL_STRING_DATA, &p );
|
287
|
+
|
288
|
+
|
289
|
+
/*------------ header line #3: comment ----------------*/
|
290
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
291
|
+
err = 7; /* can't read the line */
|
292
|
+
/* AddMOLfileError( pStrErr, "Can't read header block comment line" ); */
|
293
|
+
goto err_fin;
|
294
|
+
}
|
295
|
+
remove_one_lf( line );
|
296
|
+
/* -- Disabled to relax strictness: allow > 80 chars comments.
|
297
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
298
|
+
err = 8; // too long line
|
299
|
+
goto err_fin;
|
300
|
+
}
|
301
|
+
*/
|
302
|
+
len = mol_read_datum( hdr->szComment, sizeof(hdr->szComment)-1, MOL_STRING_DATA, &p );
|
303
|
+
|
304
|
+
err_fin:
|
305
|
+
|
306
|
+
return err;
|
307
|
+
}
|
308
|
+
/***************************************************************/
|
309
|
+
int RemoveNonPrintable( char *line )
|
310
|
+
{
|
311
|
+
int i, c, num = 0;
|
312
|
+
if ( line ) {
|
313
|
+
for ( i = 0; c = UCINT line[i]; i ++ ) {
|
314
|
+
/* assuming ASCII charset */
|
315
|
+
if ( c < ' ' || c >= 0x7F ) {
|
316
|
+
line[i] = '.';
|
317
|
+
num ++;
|
318
|
+
}
|
319
|
+
}
|
320
|
+
}
|
321
|
+
return num;
|
322
|
+
}
|
323
|
+
/***************************************************************/
|
324
|
+
static int mol_read_counts_line( MOL_CTAB* ctab, FILE *inp, char *pStrErr )
|
325
|
+
{
|
326
|
+
char *p;
|
327
|
+
char line[MOLFILEINPLINELEN];
|
328
|
+
const int line_len = sizeof(line);
|
329
|
+
int err = 0, len;
|
330
|
+
|
331
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
332
|
+
MOLFILE_ERR_FIN (err, 1, err_fin, "Cannot read counts line");
|
333
|
+
/* can't read the input file line */
|
334
|
+
}
|
335
|
+
remove_one_lf( line );
|
336
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
337
|
+
MOLFILE_ERR_SET (err, 0, "Too long counts line"); /* too long input file line */
|
338
|
+
}
|
339
|
+
if ( 0 > mol_read_datum( &ctab->nNumberOfAtoms, 3, MOL_SHORT_INT_DATA, &p )
|
340
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfBonds, 3, MOL_SHORT_INT_DATA, &p )
|
341
|
+
#if ( MOL_QUERY == MOL_PRESENT )
|
342
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfAtomsLists, 3, MOL_SHORT_INT_DATA, &p )
|
343
|
+
#else
|
344
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
345
|
+
#endif
|
346
|
+
|| 0 > mol_read_datum( NULL, /*obsolete*/ 3, MOL_JUMP_TO_RIGHT, &p )
|
347
|
+
|| 0 > mol_read_datum( &ctab->cChiralFlag, 3, MOL_CHAR_INT_DATA, &p )
|
348
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfStextEntries, 3, MOL_SHORT_INT_DATA, &p )
|
349
|
+
#if ( MOL_CPSS == MOL_PRESENT )
|
350
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfReactionComponentsPlus1, 3, MOL_SHORT_INT_DATA, &p )
|
351
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfReactants, 3, MOL_SHORT_INT_DATA, &p )
|
352
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfProducts, 3, MOL_SHORT_INT_DATA, &p )
|
353
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfIntermediates, 3, MOL_SHORT_INT_DATA, &p )
|
354
|
+
#else
|
355
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
356
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
357
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
358
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
359
|
+
#endif
|
360
|
+
|| 0 > mol_read_datum( &ctab->nNumberOfPropertyLines, 3, MOL_SHORT_INT_DATA, &p ) ){
|
361
|
+
err = 3; /* can't interpret counts line */
|
362
|
+
MOLFILE_ERR_SET (err, 3, "Cannot interpret counts line:"); /* too long input file line */
|
363
|
+
RemoveNonPrintable( line );
|
364
|
+
AddMOLfileError(pStrErr, line);
|
365
|
+
goto err_fin;
|
366
|
+
}
|
367
|
+
len = mol_read_datum( ctab->csCurrentCtabVersion, sizeof(ctab->csCurrentCtabVersion)-1, MOL_STRING_DATA, &p );
|
368
|
+
err_fin:
|
369
|
+
return err;
|
370
|
+
}
|
371
|
+
|
372
|
+
/*************************************************************************/
|
373
|
+
static int read_atom_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
|
374
|
+
{
|
375
|
+
char *p;
|
376
|
+
char line[MOLFILEINPLINELEN];
|
377
|
+
const int line_len = sizeof(line);
|
378
|
+
S_SHORT i, chg;
|
379
|
+
static S_SHORT charge_val[] = {0, 3, 2, 1, 'R', -1, -2, -3};
|
380
|
+
/* 0 1 2 3 4 5 6 7 */
|
381
|
+
/*
|
382
|
+
if ( NULL == ctab->MolAtom ){
|
383
|
+
err = 1;
|
384
|
+
goto err_fin; // internal error: MolAtom structure has not been allocated
|
385
|
+
}
|
386
|
+
*/
|
387
|
+
|
388
|
+
for ( i = 0; i < ctab->nNumberOfAtoms; i++ ) {
|
389
|
+
|
390
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
391
|
+
if ( !err ) {
|
392
|
+
MOLFILE_ERR_SET (err, 2, "Cannot read atom block line");
|
393
|
+
}
|
394
|
+
break;
|
395
|
+
}
|
396
|
+
remove_one_lf( line );
|
397
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
398
|
+
MOLFILE_ERR_SET (err, 0, "Too long atom block line");
|
399
|
+
}
|
400
|
+
if ( err ) {
|
401
|
+
if ( !strcmp( line, SDF_END_OF_DATA ) ) {
|
402
|
+
err = -abs(err);
|
403
|
+
break;
|
404
|
+
}
|
405
|
+
continue; /* bypass the rest of the Atom block */
|
406
|
+
}
|
407
|
+
if ( NULL != ctab->szCoord ) {
|
408
|
+
mystrncpy( ctab->szCoord[i], p, 31 ); /* original coordinates */
|
409
|
+
}
|
410
|
+
|
411
|
+
if ( NULL != ctab->MolAtom ) {
|
412
|
+
if ( 0 > mol_read_datum( &ctab->MolAtom[i].fX, 10, MOL_DOUBLE_DATA, &p )
|
413
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].fY, 10, MOL_DOUBLE_DATA, &p )
|
414
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].fZ, 10, MOL_DOUBLE_DATA, &p )
|
415
|
+
|| 0 > mol_read_datum( NULL, /* undescribed in article*/ 1, MOL_JUMP_TO_RIGHT, &p )
|
416
|
+
|| 0 == mol_read_datum( &ctab->MolAtom[i].szAtomSymbol, 3, MOL_STRING_DATA, &p ) /* was sizeof(ctab->MolAtom[0].szAtomSymbol)-1 */
|
417
|
+
#ifdef INCHI_MAIN
|
418
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cMassDifference, 2, MOL_SHORT_INT_DATA, &p )
|
419
|
+
#else
|
420
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cMassDifference, 2, MOL_CHAR_INT_DATA, &p )
|
421
|
+
#endif
|
422
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cCharge, 3, MOL_CHAR_INT_DATA, &p )
|
423
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cStereoParity, 3, MOL_CHAR_INT_DATA, &p )
|
424
|
+
#if ( MOL_QUERY == MOL_PRESENT )
|
425
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cH_countPlus1, 3, MOL_CHAR_INT_DATA, &p )
|
426
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cStereoCare, 3, MOL_CHAR_INT_DATA, &p )
|
427
|
+
#else
|
428
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
429
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
430
|
+
#endif
|
431
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cValence, 3, MOL_CHAR_INT_DATA, &p ) ) {
|
432
|
+
|
433
|
+
err = 4;
|
434
|
+
MOLFILE_ERR_SET (err, 4, "Cannot interpret atom block line:");
|
435
|
+
RemoveNonPrintable( line );
|
436
|
+
AddMOLfileError(pStrErr, line);
|
437
|
+
if ( !strcmp( line, SDF_END_OF_DATA ) ) {
|
438
|
+
err = -abs(err);
|
439
|
+
break;
|
440
|
+
}
|
441
|
+
continue; /* can't interpret a first half of atom block line */
|
442
|
+
}
|
443
|
+
if ( 2 == strlen(ctab->MolAtom[i].szAtomSymbol) && isupper(UCINT ctab->MolAtom[i].szAtomSymbol[1]))
|
444
|
+
ctab->MolAtom[i].szAtomSymbol[1] = (char)tolower(UCINT ctab->MolAtom[i].szAtomSymbol[1]); /* 5-4-99 DCh*/
|
445
|
+
|
446
|
+
if ( (chg = (S_SHORT) ctab->MolAtom[i].cCharge)< 0 || chg >= (int)(sizeof ( charge_val ) / sizeof( charge_val[0] )) ) {
|
447
|
+
/* ctab->MolAtom[i].cCharge = 0; */ /* error; ignore for now */
|
448
|
+
ctab->MolAtom[i].cCharge = (S_CHAR)(4 - chg); /* allow greater charges to accommodate NCI structures. 8-20-2002 */
|
449
|
+
ctab->MolAtom[i].cRadical = 0;
|
450
|
+
}else
|
451
|
+
if ( 'R' == (chg = charge_val[chg]) ){
|
452
|
+
ctab->MolAtom[i].cCharge = 0;
|
453
|
+
ctab->MolAtom[i].cRadical = RADICAL_DOUBLET;
|
454
|
+
}else{
|
455
|
+
ctab->MolAtom[i].cCharge = (S_CHAR)chg; /* actual charge value */
|
456
|
+
ctab->MolAtom[i].cRadical = 0;
|
457
|
+
}
|
458
|
+
#ifdef INCHI_MAIN
|
459
|
+
if ( ctab->MolAtom[i].cMassDifference ) { /* e_ReadMOL.c specific */
|
460
|
+
ctab->MolAtom[i].cMassDifference += ISOTOPIC_SHIFT_FLAG;
|
461
|
+
}
|
462
|
+
#endif
|
463
|
+
|
464
|
+
if (
|
465
|
+
#if ( MOL_CPSS == MOL_PRESENT )
|
466
|
+
0 > mol_read_datum( &ctab->MolAtom[i].cH0_designator, 3, MOL_CHAR_INT_DATA, &p )
|
467
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentType, 3, MOL_CHAR_INT_DATA, &p )
|
468
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentNumber, 3, MOL_CHAR_INT_DATA, &p )
|
469
|
+
#else
|
470
|
+
0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
471
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
472
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
473
|
+
#endif
|
474
|
+
#if ( MOL_REACT == MOL_PRESENT )
|
475
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].nAtomAtomMappingNumber, 3, MOL_SHORT_INT_DATA, &p )
|
476
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentType, 3, MOL_CHAR_INT_DATA, &p )
|
477
|
+
#else
|
478
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
479
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
480
|
+
#endif
|
481
|
+
#if ( MOL_REACT == MOL_PRESENT || MOL_QUERY == MOL_PRESENT )
|
482
|
+
|| 0 > mol_read_datum( &ctab->MolAtom[i].cExactChargeFlag, 3, MOL_CHAR_INT_DATA, &p )
|
483
|
+
#else
|
484
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
485
|
+
#endif
|
486
|
+
){
|
487
|
+
err = 5; /* can't interpret a second half of atom block line */
|
488
|
+
MOLFILE_ERR_SET (err, 5, "Cannot interpret atom block line:");
|
489
|
+
RemoveNonPrintable( line );
|
490
|
+
AddMOLfileError(pStrErr, line);
|
491
|
+
if ( !strcmp( line, SDF_END_OF_DATA ) ) {
|
492
|
+
err = -abs(err);
|
493
|
+
break;
|
494
|
+
}
|
495
|
+
continue;
|
496
|
+
}
|
497
|
+
}
|
498
|
+
}
|
499
|
+
/* err_fin: */
|
500
|
+
return err;
|
501
|
+
}
|
502
|
+
/*************************************************************************/
|
503
|
+
static int read_bonds_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
|
504
|
+
{
|
505
|
+
char *p;
|
506
|
+
char line[MOLFILEINPLINELEN];
|
507
|
+
const int line_len = sizeof(line);
|
508
|
+
S_SHORT i;
|
509
|
+
/*
|
510
|
+
if ( NULL == ctab->MolBond ){
|
511
|
+
err = 1;
|
512
|
+
goto err_fin; // internal error: memory has not been allocated for MolBond structure
|
513
|
+
}
|
514
|
+
*/
|
515
|
+
for ( i = 0; i < ctab->nNumberOfBonds; i++ ) {
|
516
|
+
|
517
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
518
|
+
if ( !err ) {
|
519
|
+
MOLFILE_ERR_SET (err, 2, "Cannot read bond block line");
|
520
|
+
}
|
521
|
+
break;
|
522
|
+
}
|
523
|
+
remove_one_lf( line );
|
524
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
525
|
+
err = err? err : 3; /* too long input file line */
|
526
|
+
}
|
527
|
+
if ( err ) {
|
528
|
+
if ( !strcmp( line, SDF_END_OF_DATA ) ) {
|
529
|
+
err = -abs(err);
|
530
|
+
break;
|
531
|
+
}
|
532
|
+
continue;
|
533
|
+
}
|
534
|
+
|
535
|
+
if ( ctab->MolBond ) {
|
536
|
+
if ( 0 > mol_read_datum( &ctab->MolBond[i].nAtomNo1, 3, MOL_SHORT_INT_DATA, &p )
|
537
|
+
|| 0 > mol_read_datum( &ctab->MolBond[i].nAtomNo2, 3, MOL_SHORT_INT_DATA, &p )
|
538
|
+
|| 0 > mol_read_datum( &ctab->MolBond[i].cBondType, 3, MOL_CHAR_INT_DATA, &p )
|
539
|
+
|| 0 > mol_read_datum( &ctab->MolBond[i].cBondStereo, 3, MOL_CHAR_INT_DATA, &p )
|
540
|
+
#if ( MOL_QUERY == MOL_PRESENT )
|
541
|
+
|| 0 > mol_read_datum( &ctab->MolBond[i].cBondTopology, 3, MOL_CHAR_INT_DATA, &p ) /* ring/chain */
|
542
|
+
#else
|
543
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
544
|
+
#endif
|
545
|
+
#if ( MOL_REACT == MOL_PRESENT )
|
546
|
+
|| 0 > mol_read_datum( &ctab->MolBond[i].cReactingCenterStatus, 3, MOL_CHAR_INT_DATA, &p )
|
547
|
+
#else
|
548
|
+
|| 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
|
549
|
+
#endif
|
550
|
+
){
|
551
|
+
if ( !err ) {
|
552
|
+
/* can't interpret bonds block line */
|
553
|
+
MOLFILE_ERR_SET (err, 4, "Cannot interpret bond block line:");
|
554
|
+
RemoveNonPrintable( line );
|
555
|
+
AddMOLfileError(pStrErr, line);
|
556
|
+
}
|
557
|
+
if ( !strcmp( line, SDF_END_OF_DATA ) ) {
|
558
|
+
err = -abs(err);
|
559
|
+
break;
|
560
|
+
}
|
561
|
+
}
|
562
|
+
}
|
563
|
+
}
|
564
|
+
/* err_fin: */
|
565
|
+
return err;
|
566
|
+
}
|
567
|
+
/*************************************************************************/
|
568
|
+
static int read_stext_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
|
569
|
+
{
|
570
|
+
/* just pass by all stext enties without attemp to interpret */
|
571
|
+
char *p;
|
572
|
+
char line[MOLFILEINPLINELEN];
|
573
|
+
const int line_len = sizeof(line);
|
574
|
+
S_SHORT i;
|
575
|
+
|
576
|
+
for ( i = 0; i < 2*ctab->nNumberOfStextEntries; i++ ) {
|
577
|
+
|
578
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
579
|
+
if ( !err ) {
|
580
|
+
MOLFILE_ERR_FIN (err, 2, err_fin, "Cannot read STEXT block line");
|
581
|
+
}
|
582
|
+
break;
|
583
|
+
/* can't read the input file line */
|
584
|
+
}
|
585
|
+
/*
|
586
|
+
remove_one_lf( line );
|
587
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
588
|
+
MOLFILE_ERR_SET (err, 2, "Warning: Too long STEXT block line");
|
589
|
+
// too long input file line
|
590
|
+
}
|
591
|
+
*/
|
592
|
+
}
|
593
|
+
err_fin:
|
594
|
+
return err;
|
595
|
+
}
|
596
|
+
/*************************************************************************/
|
597
|
+
static int read_properties_block( MOL_CTAB* ctab, MOL_HEADER_BLOCK *pHdr, FILE *inp, int err, char *pStrErr )
|
598
|
+
{
|
599
|
+
enum { MULTI_LINE_MODE_NO_MODE, MULTI_LINE_MODE_ISIS_ALIAS };
|
600
|
+
char *p;
|
601
|
+
char line[MOLFILEINPLINELEN];
|
602
|
+
const int line_len = sizeof(line);
|
603
|
+
int nMultiLineMode = MULTI_LINE_MODE_NO_MODE, nAtomNumber=0;
|
604
|
+
S_SHORT i, j;
|
605
|
+
char charM[2];
|
606
|
+
char szBlank[3];
|
607
|
+
char szType[4];
|
608
|
+
S_SHORT skip_lines=0;
|
609
|
+
S_SHORT num_entries;
|
610
|
+
S_SHORT num_atoms = ctab->nNumberOfAtoms;
|
611
|
+
|
612
|
+
int charge_encountered = 0;
|
613
|
+
int radical_encountered = 0;
|
614
|
+
int isotope_encountered = 0;
|
615
|
+
/*
|
616
|
+
if ( NULL == ctab->MolAtom ){
|
617
|
+
err = 1;
|
618
|
+
goto err_fin; internal error: memory has not been allocated for MolAtom structure
|
619
|
+
}
|
620
|
+
*/
|
621
|
+
for ( i = 0; ctab->csCurrentCtabVersion[0]? 1 : (i < ctab->nNumberOfPropertyLines); i++ ) { /* the last line should be M END */
|
622
|
+
/* ctab->csCurrentCtabVersion[0] == 0:
|
623
|
+
exactly ctab->nNumberOfPropertyLines lines including M END */
|
624
|
+
/* ctab->csCurrentCtabVersion[0] != 0:
|
625
|
+
read until M END line was encountered */
|
626
|
+
if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
|
627
|
+
if ( !err ) {
|
628
|
+
MOLFILE_ERR_SET (err, 2, "Cannot read properties block line");
|
629
|
+
}
|
630
|
+
goto err_fin;
|
631
|
+
}
|
632
|
+
remove_one_lf( line );
|
633
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
634
|
+
MOLFILE_ERR_SET (err, 3, "Too long properties block line");
|
635
|
+
continue;
|
636
|
+
}
|
637
|
+
if ( skip_lines > 0 ) {
|
638
|
+
skip_lines --;
|
639
|
+
continue;
|
640
|
+
}
|
641
|
+
/* alias. */
|
642
|
+
if ( nMultiLineMode == MULTI_LINE_MODE_ISIS_ALIAS && nAtomNumber ) {
|
643
|
+
int len;
|
644
|
+
nMultiLineMode = MULTI_LINE_MODE_NO_MODE;
|
645
|
+
if ( 0 >= (len=normalize_name( p )) ) {
|
646
|
+
nAtomNumber = 0;
|
647
|
+
continue;
|
648
|
+
}
|
649
|
+
if( 0 < len && len < (int)(sizeof(ctab->MolAtom->szAtomSymbol)) ) {
|
650
|
+
int nCharge, nRad;
|
651
|
+
MOL_ATOM* MolAtom = ctab->MolAtom + nAtomNumber-1;
|
652
|
+
/* ctab->MolAtom[nAtomNumber-1].cAtomAliasedFlag = 1; */
|
653
|
+
/* extract radicals & charges */
|
654
|
+
extract_ChargeRadical( p, &nRad, &nCharge );
|
655
|
+
/* Aliased atom cannot have charge, radical & mass difference */
|
656
|
+
/* in the atom table or "M CHG", "M RAD", "M ISO" */
|
657
|
+
/* if ( nCharge ) */
|
658
|
+
MolAtom->cCharge = (S_CHAR)nCharge;
|
659
|
+
/* if ( nRad ) */
|
660
|
+
MolAtom->cRadical = (char)nRad;
|
661
|
+
|
662
|
+
if ( 1 == len && 'D' == p[0] ) {
|
663
|
+
/* H isotope */
|
664
|
+
p[0] = 'H';
|
665
|
+
#ifdef INCHI_MAIN
|
666
|
+
MolAtom->cMassDifference=(1 + ISOTOPIC_SHIFT_FLAG);
|
667
|
+
#else
|
668
|
+
MolAtom->cMassDifference=1;
|
669
|
+
#endif
|
670
|
+
} else
|
671
|
+
if ( 1 == len && 'T' == p[0] ) {
|
672
|
+
/* H isotope */
|
673
|
+
p[0] = 'H';
|
674
|
+
#ifdef INCHI_MAIN
|
675
|
+
MolAtom->cMassDifference=(2 + ISOTOPIC_SHIFT_FLAG);
|
676
|
+
#else
|
677
|
+
MolAtom->cMassDifference=2;
|
678
|
+
#endif
|
679
|
+
} else
|
680
|
+
MolAtom->cMassDifference=0;
|
681
|
+
if ( strlen(p) < sizeof(ctab->MolAtom[0].szAtomSymbol) ) {
|
682
|
+
strcpy(MolAtom->szAtomSymbol, p);
|
683
|
+
} else {
|
684
|
+
strcpy(MolAtom->szAtomSymbol, "???");
|
685
|
+
}
|
686
|
+
MolAtom->cAtomAliasedFlag ++;
|
687
|
+
}
|
688
|
+
skip_lines = 0;
|
689
|
+
nAtomNumber = 0;
|
690
|
+
continue;
|
691
|
+
}
|
692
|
+
|
693
|
+
if ( 1 != mol_read_datum( charM, sizeof(charM) - 1, MOL_STRING_DATA, &p )
|
694
|
+
|| 0 != mol_read_datum( szBlank, sizeof(szBlank) - 1, MOL_STRING_DATA, &p ) /* must contain 0 bytes */
|
695
|
+
|| 0 >= mol_read_datum( szType, sizeof(szType) - 1, MOL_STRING_DATA, &p ) /* must contain 3 bytes */
|
696
|
+
) {
|
697
|
+
if ( !strcmp( line, SDF_END_OF_DATA ) ) {
|
698
|
+
err = err? -abs(err): -4;
|
699
|
+
break;
|
700
|
+
}
|
701
|
+
continue; /* ignore because cannot recognize */
|
702
|
+
}
|
703
|
+
if ( charM[0] == 'V' ){
|
704
|
+
skip_lines = 0; /* ISIS/Desktop Atom Value: one-line property */
|
705
|
+
continue;
|
706
|
+
}
|
707
|
+
if ( charM[0] == 'G' ){
|
708
|
+
skip_lines = 1; /* ISIS/Desktop Group abbreviation: two-line property */
|
709
|
+
continue;
|
710
|
+
}
|
711
|
+
if ( charM[0] == 'A' ) {
|
712
|
+
if ( NULL != ctab->MolAtom &&
|
713
|
+
0 < ( nAtomNumber = (int)strtol(szType, NULL, 10) ) &&
|
714
|
+
nAtomNumber <= ctab->nNumberOfAtoms ){
|
715
|
+
/* Atom Alias [ISIS/Desktop] two-line property */
|
716
|
+
nMultiLineMode = MULTI_LINE_MODE_ISIS_ALIAS;
|
717
|
+
continue;
|
718
|
+
} else {
|
719
|
+
nAtomNumber = 0;
|
720
|
+
skip_lines = 1;
|
721
|
+
continue;
|
722
|
+
}
|
723
|
+
}
|
724
|
+
if ( charM[0] == 'S' && !strcmp( szType, "SKP" ) ){ /* skip lines */
|
725
|
+
if ( 0 >= mol_read_datum( &skip_lines, 3, MOL_SHORT_INT_DATA, &p ) ) {
|
726
|
+
skip_lines = 0;
|
727
|
+
}
|
728
|
+
continue;
|
729
|
+
}
|
730
|
+
if ( charM[0] != 'M' ) {/* cannot recognize a line */
|
731
|
+
continue;
|
732
|
+
}
|
733
|
+
if ( !strcmp( szType, "REG" ) ) {
|
734
|
+
int len;
|
735
|
+
p = p + strspn( p, " " );
|
736
|
+
len = strcspn( p, " " );
|
737
|
+
len = inchi_min( len, MOL_MAX_VALUE_LEN );
|
738
|
+
mol_read_datum( &pHdr->lInternalRegistryNumber, len, MOL_LONG_INT_DATA, &p );
|
739
|
+
continue;
|
740
|
+
}
|
741
|
+
|
742
|
+
if ( !strcmp( szType, "END" ) ){
|
743
|
+
if ( ctab->csCurrentCtabVersion[0] )
|
744
|
+
break; /* end of property lines */
|
745
|
+
continue;
|
746
|
+
}
|
747
|
+
|
748
|
+
if ( NULL == ctab->MolAtom )
|
749
|
+
continue; /* ignore because the user requested to bypass all this stuff */
|
750
|
+
|
751
|
+
/*----------------------------------- charge: Generic */
|
752
|
+
if ( !strcmp( szType, "CHG" ) &&
|
753
|
+
0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
|
754
|
+
1 <= num_entries && num_entries <= 8 ) {
|
755
|
+
S_SHORT atoms[8];
|
756
|
+
S_SHORT charges[8];
|
757
|
+
if ( !charge_encountered && !radical_encountered ) {
|
758
|
+
/* first charge or radical record clears all Atom Block */
|
759
|
+
/* entered charge and radical data to zeroes */
|
760
|
+
charge_encountered = -1;
|
761
|
+
}
|
762
|
+
for ( j = 0; j < num_entries; j++ ) {
|
763
|
+
if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
|
764
|
+
0 > mol_read_datum( &charges[j], 0, MOL_SHORT_INT_DATA, &p ) ||
|
765
|
+
atoms[j] <= 0 || atoms[j] > num_atoms ||
|
766
|
+
charges[j] < -15 || charges[j] > 15 ) {
|
767
|
+
goto charge_error;
|
768
|
+
}
|
769
|
+
}
|
770
|
+
if ( charge_encountered == -1 ) {
|
771
|
+
for ( j = 0; j < num_atoms; j++ ) {
|
772
|
+
if ( !ctab->MolAtom[j].cAtomAliasedFlag ) /* do not clear aliased atoms.*/
|
773
|
+
ctab->MolAtom[j].cCharge = ctab->MolAtom[j].cRadical = '\0';
|
774
|
+
}
|
775
|
+
charge_encountered = 1;
|
776
|
+
}
|
777
|
+
for ( j = 0; j < num_entries; j++ ) {
|
778
|
+
if ( !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag ) /* do not change aliased atoms.*/
|
779
|
+
ctab->MolAtom[atoms[j]-1].cCharge = (S_CHAR)charges[j];
|
780
|
+
}
|
781
|
+
continue;
|
782
|
+
charge_error:
|
783
|
+
MOLFILE_ERR_SET (err, 0, "Charge not recognized:");
|
784
|
+
RemoveNonPrintable( line );
|
785
|
+
AddMOLfileError(pStrErr, line);
|
786
|
+
continue; /* ignore for now */
|
787
|
+
}
|
788
|
+
/*-------------------------------------- radical: Generic */
|
789
|
+
if ( !strcmp( szType, "RAD" ) &&
|
790
|
+
0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
|
791
|
+
1 <= num_entries && num_entries <= 8 ) {
|
792
|
+
S_SHORT atoms[8];
|
793
|
+
S_SHORT radicals[8];
|
794
|
+
if ( !charge_encountered && !radical_encountered ) {
|
795
|
+
/* first charge or radical record clears all Atom Block */
|
796
|
+
/* entered charge and radical data to zeroes */
|
797
|
+
radical_encountered = -1;
|
798
|
+
}
|
799
|
+
for ( j = 0; j < num_entries; j++ ) {
|
800
|
+
if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
|
801
|
+
0 > mol_read_datum( &radicals[j], 0, MOL_SHORT_INT_DATA, &p ) ||
|
802
|
+
atoms[j] <= 0 || atoms[j] > num_atoms ||
|
803
|
+
radicals[j] < 0 || radicals[j] > 3 ) {
|
804
|
+
goto radical_error;
|
805
|
+
}
|
806
|
+
}
|
807
|
+
if ( radical_encountered == -1 ) {
|
808
|
+
for ( j = 0; j < num_atoms; j++ ) {
|
809
|
+
if ( !ctab->MolAtom[j].cAtomAliasedFlag ) /* do not clear aliased atoms. 5-3-99 DCh */
|
810
|
+
ctab->MolAtom[j].cCharge = ctab->MolAtom[j].cRadical = '\0';
|
811
|
+
}
|
812
|
+
radical_encountered = 1;
|
813
|
+
}
|
814
|
+
for ( j = 0; j < num_entries; j++ ) {
|
815
|
+
if ( !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag ) { /* do not change aliased atoms. 5-3-99 DCh */
|
816
|
+
ctab->MolAtom[atoms[j]-1].cRadical = (S_CHAR)radicals[j];
|
817
|
+
}
|
818
|
+
}
|
819
|
+
continue;
|
820
|
+
radical_error:
|
821
|
+
MOLFILE_ERR_SET (err, 0, "Radical not recognized:");
|
822
|
+
RemoveNonPrintable( line );
|
823
|
+
AddMOLfileError(pStrErr, line);
|
824
|
+
continue; /* ignore error for now */
|
825
|
+
}
|
826
|
+
/*-------------------------------------- isotope: Generic */
|
827
|
+
if ( !strcmp( szType, "ISO" ) &&
|
828
|
+
0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
|
829
|
+
1 <= num_entries && num_entries <= 8 ) {
|
830
|
+
S_SHORT atoms[8];
|
831
|
+
S_SHORT iso_mass[8]; /* contains istotope mass number, not difference. 7-14-00 DCh. */
|
832
|
+
if ( !isotope_encountered ) {
|
833
|
+
/* first charge or radical record clears all Atom Block */
|
834
|
+
/* entered charge and radical data to zeroes */
|
835
|
+
isotope_encountered = -1;
|
836
|
+
}
|
837
|
+
for ( j = 0; j < num_entries; j++ ) {
|
838
|
+
if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
|
839
|
+
0 > mol_read_datum( &iso_mass[j], 0, MOL_SHORT_INT_DATA, &p ) ||
|
840
|
+
atoms[j] <= 0 || atoms[j] > num_atoms
|
841
|
+
/*|| iso_mass[j] < -18 || iso_mass[j] > 12*/ ) {
|
842
|
+
/* goto isotope_error; */
|
843
|
+
atoms[j] = -1; /* flag error */
|
844
|
+
MOLFILE_ERR_SET (err, 0, "Isotopic data not recognized:");
|
845
|
+
RemoveNonPrintable( line );
|
846
|
+
AddMOLfileError(pStrErr, line);
|
847
|
+
continue; /* ignore isotopic error for now */
|
848
|
+
}
|
849
|
+
}
|
850
|
+
if ( isotope_encountered == -1 ) {
|
851
|
+
for ( j = 0; j < num_atoms; j++ ) {
|
852
|
+
/*if ( !ctab->MolAtom[j].cAtomAliasedFlag )*/ /* clear even aliased atoms */
|
853
|
+
ctab->MolAtom[j].cMassDifference = 0;
|
854
|
+
}
|
855
|
+
isotope_encountered = 1;
|
856
|
+
}
|
857
|
+
for ( j = 0; j < num_entries; j++ ) {
|
858
|
+
if ( atoms[j] <= 0 )
|
859
|
+
continue; /* ignore isotopic error for now */
|
860
|
+
if ( 1 /* !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag */) {
|
861
|
+
char *at = ctab->MolAtom[atoms[j]-1].szAtomSymbol;
|
862
|
+
if ( at[1] || at[0] != 'D' && at[0] != 'T' ) { /* D & T cannot have ISO */
|
863
|
+
/* need atomic weight to calculate isotope difference. 7-14-00 DCh. */
|
864
|
+
#ifdef INCHI_MAIN
|
865
|
+
ctab->MolAtom[atoms[j]-1].cMassDifference = iso_mass[j]; /* mass, not difference */
|
866
|
+
#else
|
867
|
+
int atw, atw_diff;
|
868
|
+
if ( (atw = get_atw( at )) && abs( atw_diff = (int)iso_mass[j] - atw ) < 20 ) {
|
869
|
+
ctab->MolAtom[atoms[j]-1].cMassDifference = (char)(atw_diff? atw_diff : ZERO_ATW_DIFF);
|
870
|
+
}
|
871
|
+
#endif
|
872
|
+
}
|
873
|
+
}
|
874
|
+
}
|
875
|
+
continue;
|
876
|
+
}
|
877
|
+
}
|
878
|
+
err_fin:
|
879
|
+
return err;
|
880
|
+
}
|
881
|
+
/*************************************************************************/
|
882
|
+
MOL_DATA* delete_mol_data( MOL_DATA* mol_data )
|
883
|
+
{
|
884
|
+
if ( mol_data ) {
|
885
|
+
if ( mol_data->ctab.MolAtom )
|
886
|
+
inchi_free( mol_data->ctab.MolAtom );
|
887
|
+
if ( mol_data->ctab.MolBond )
|
888
|
+
inchi_free( mol_data->ctab.MolBond );
|
889
|
+
if ( mol_data->ctab.szCoord )
|
890
|
+
inchi_free( mol_data->ctab.szCoord );
|
891
|
+
inchi_free( mol_data );
|
892
|
+
mol_data = NULL;
|
893
|
+
}
|
894
|
+
return mol_data;
|
895
|
+
}
|
896
|
+
/*************************************************************************/
|
897
|
+
/* Comletely ingnore STEXT block, queries, and 3D features
|
898
|
+
*/
|
899
|
+
MOL_DATA* read_mol_file( FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
|
900
|
+
int bGetOrigCoord, int *err, char *pStrErr )
|
901
|
+
{
|
902
|
+
MOL_DATA* mol_data = NULL;
|
903
|
+
int ret = 0, prev_ret, bEndOfData = 0;
|
904
|
+
int bReadAll = ( OnlyHeaderBlock == NULL );
|
905
|
+
MOL_CTAB ctab, *pCtab = NULL;
|
906
|
+
MOL_HEADER_BLOCK *pHdr = NULL;
|
907
|
+
|
908
|
+
*err = 0;
|
909
|
+
if ( bReadAll ) {
|
910
|
+
if ( NULL == ( mol_data = ( MOL_DATA* )inchi_calloc( 1, sizeof(MOL_DATA) ) ) ){
|
911
|
+
ret = 1; /* can't allocate mol_data structure */
|
912
|
+
AddMOLfileError( pStrErr, "Out of RAM" );
|
913
|
+
goto err_fin;
|
914
|
+
}
|
915
|
+
pHdr = &mol_data->hdr;
|
916
|
+
pCtab = &mol_data->ctab;
|
917
|
+
} else {
|
918
|
+
pHdr = OnlyHeaderBlock;
|
919
|
+
pCtab = OnlyCtab? OnlyCtab : &ctab;
|
920
|
+
memset( pHdr, 0, sizeof( MOL_HEADER_BLOCK ) );
|
921
|
+
memset( pCtab, 0, sizeof( MOL_CTAB ) );
|
922
|
+
}
|
923
|
+
pCtab->MolBond = NULL;
|
924
|
+
pCtab->MolAtom = NULL;
|
925
|
+
pCtab->szCoord = NULL;
|
926
|
+
|
927
|
+
if ( 0 != ( ret = mol_read_hdr(pHdr, inp, pStrErr) ) ){
|
928
|
+
ret += 10;
|
929
|
+
goto err_fin; /* most probably end of file */
|
930
|
+
}
|
931
|
+
if ( 0 != ( ret = mol_read_counts_line( pCtab , inp, pStrErr) ) ){
|
932
|
+
ret += 20;
|
933
|
+
goto err_fin;
|
934
|
+
}
|
935
|
+
|
936
|
+
if ( bReadAll ) {
|
937
|
+
if ( NULL == ( mol_data->ctab.MolAtom = (MOL_ATOM*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfAtoms,1), sizeof(MOL_ATOM)) ) ){
|
938
|
+
ret = 2; /* can't allocate MolAtom structure */
|
939
|
+
MOLFILE_ERR_FIN (ret, 2, err_fin, "Out of RAM");
|
940
|
+
}
|
941
|
+
if ( bGetOrigCoord &&
|
942
|
+
NULL == ( mol_data->ctab.szCoord = (MOL_COORD*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfAtoms,1), sizeof(MOL_COORD)) ) ){
|
943
|
+
ret = 2; /* can't allocate MolAtom structure */
|
944
|
+
MOLFILE_ERR_FIN (ret, 2, err_fin, "Out of RAM");
|
945
|
+
}
|
946
|
+
}
|
947
|
+
if ( 0 != ( ret = read_atom_block(pCtab, inp, ret, pStrErr) ) ){
|
948
|
+
if ( ret < 0 ) {
|
949
|
+
ret = -ret;
|
950
|
+
bEndOfData = 1;
|
951
|
+
}
|
952
|
+
ret += 30;
|
953
|
+
/* goto err_fin; */
|
954
|
+
}
|
955
|
+
|
956
|
+
if ( bReadAll && ret < 30 ) {
|
957
|
+
if ( !bEndOfData && NULL == ( mol_data->ctab.MolBond = (MOL_BONDS*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfBonds,1), sizeof(MOL_BONDS)) ) ){
|
958
|
+
ret = 3; /* can't allocate MolBond structure */
|
959
|
+
MOLFILE_ERR_FIN (ret, 3, err_fin, "Out of RAM");
|
960
|
+
}
|
961
|
+
}
|
962
|
+
prev_ret = ret;
|
963
|
+
if ( !bEndOfData && 0 != ( ret = read_bonds_block(pCtab, inp, ret, pStrErr) ) ){
|
964
|
+
if ( ret < 0 ) {
|
965
|
+
ret = -ret;
|
966
|
+
bEndOfData = 1;
|
967
|
+
}
|
968
|
+
ret = prev_ret? prev_ret : ret + 40;
|
969
|
+
}
|
970
|
+
prev_ret = ret;
|
971
|
+
if ( !bEndOfData && 0 != ( ret = read_stext_block(pCtab, inp, ret, pStrErr) ) ){
|
972
|
+
ret = prev_ret? prev_ret : ret + 50;
|
973
|
+
}
|
974
|
+
prev_ret = ret;
|
975
|
+
if ( !bEndOfData && 0 != ( ret = read_properties_block(pCtab, pHdr, inp, ret, pStrErr) ) ){
|
976
|
+
if ( ret < 0 ) {
|
977
|
+
ret = -ret;
|
978
|
+
bEndOfData = 1;
|
979
|
+
}
|
980
|
+
ret = prev_ret? prev_ret : ret + 60;
|
981
|
+
}
|
982
|
+
|
983
|
+
err_fin:
|
984
|
+
*err = bEndOfData? -ret : ret;
|
985
|
+
if ( bReadAll ) {
|
986
|
+
if ( ret )
|
987
|
+
mol_data = delete_mol_data( mol_data ); /* delete all results */
|
988
|
+
return mol_data;
|
989
|
+
} else {
|
990
|
+
if ( ret )
|
991
|
+
return NULL;
|
992
|
+
else
|
993
|
+
return (MOL_DATA*)OnlyHeaderBlock;
|
994
|
+
}
|
995
|
+
}
|
996
|
+
|
997
|
+
/******************************************************************/
|
998
|
+
char sdf_data_hdr_name[] = "NAME";
|
999
|
+
char sdf_data_hdr_comm[] = "COMMENT";
|
1000
|
+
enum { SDF_START, SDF_DATA_HEADER, SDF_DATA_HEADER_NAME
|
1001
|
+
, SDF_DATA_HEADER_COMMENT, SDF_DATA_HEADER_CAS
|
1002
|
+
, SDF_DATA_HEADER_USER, SDF_DATA_LINE
|
1003
|
+
, SDF_END_OF_DATA_ITEM, SDF_EMPTY_LINE, SDF_END_OF_DATA_BLOCK };
|
1004
|
+
/******************************************************************/
|
1005
|
+
static long extract_cas_rn( char *line )
|
1006
|
+
{
|
1007
|
+
int i, j;
|
1008
|
+
i = line[0] == '-'? 1 : 0;
|
1009
|
+
for ( j = i; line[i]; i ++ ) {
|
1010
|
+
if ( isdigit( UCINT line[i] ) ) {
|
1011
|
+
line[j++] = line[i];
|
1012
|
+
} else
|
1013
|
+
if ( line[i] != '-' ) {
|
1014
|
+
break;
|
1015
|
+
}
|
1016
|
+
}
|
1017
|
+
line[j] = '\0';
|
1018
|
+
return strtol( line, NULL, 10 );
|
1019
|
+
}
|
1020
|
+
/******************************************************************/
|
1021
|
+
static int identify_sdf_label( char* inp_line, const char *pSdfLabel )
|
1022
|
+
{
|
1023
|
+
char line[MOLFILEMAXLINELEN];
|
1024
|
+
char *p, *q;
|
1025
|
+
int i, j, len;
|
1026
|
+
if ( (p = strchr( inp_line, '<' )) &&
|
1027
|
+
(q = strchr( p, '>' )) &&
|
1028
|
+
(len = q-p-1) > 0 && len < (int)sizeof(line) ) {
|
1029
|
+
memcpy( line, p+1, len );
|
1030
|
+
line[len] = '\0';
|
1031
|
+
for ( i = 0; isspace( UCINT line[i] ); i ++ )
|
1032
|
+
;
|
1033
|
+
for ( j = len-1; j >= i && isspace( UCINT line[i] ); j -- )
|
1034
|
+
;
|
1035
|
+
len = j-i+1;
|
1036
|
+
p = line+i;
|
1037
|
+
if ( pSdfLabel && pSdfLabel[0] && len == (int)strlen(pSdfLabel) && !memicmp( p, pSdfLabel, len ) )
|
1038
|
+
return SDF_DATA_HEADER_USER;
|
1039
|
+
if ( len == sizeof(sdf_data_hdr_name)-1 && !memicmp( p, sdf_data_hdr_name, len ) )
|
1040
|
+
return SDF_DATA_HEADER_NAME;
|
1041
|
+
if ( len == sizeof(sdf_data_hdr_comm)-1 && !memicmp( p, sdf_data_hdr_comm, len ) )
|
1042
|
+
return SDF_DATA_HEADER_COMMENT;
|
1043
|
+
if ( !memicmp( p, "CAS", 3 ) )
|
1044
|
+
return SDF_DATA_HEADER_CAS;
|
1045
|
+
}
|
1046
|
+
return SDF_DATA_HEADER;
|
1047
|
+
}
|
1048
|
+
/******************************************************************/
|
1049
|
+
int bypass_sdf_data_items( FILE* inp, long *cas_reg_no, char* comment,
|
1050
|
+
int lcomment, char *name, int lname, int prev_err,
|
1051
|
+
const char *pSdfLabel, char *pSdfValue, char *pStrErr )
|
1052
|
+
{
|
1053
|
+
char line[MOLFILEINPLINELEN];
|
1054
|
+
const int line_len = sizeof(line);
|
1055
|
+
int err = 0;
|
1056
|
+
int current_state = SDF_START;
|
1057
|
+
int n_blank_lines = 0;
|
1058
|
+
int n_lines = 0;
|
1059
|
+
char* p = NULL;
|
1060
|
+
int bNeedsName = name && lname > 0 && !name[0];
|
1061
|
+
int bNeedsComm = comment && lcomment > 0 && !comment[0];
|
1062
|
+
int bNeedsUser = pSdfLabel && pSdfLabel[0] && pSdfValue;
|
1063
|
+
int bNeedsCASrn = 0;
|
1064
|
+
int bCASrnIsUser = 0;
|
1065
|
+
|
1066
|
+
if ( cas_reg_no != NULL ) {
|
1067
|
+
bNeedsCASrn = 1;
|
1068
|
+
*cas_reg_no = 0;
|
1069
|
+
bCASrnIsUser = (bNeedsUser && !memicmp(pSdfLabel,"CAS", 3));
|
1070
|
+
}
|
1071
|
+
|
1072
|
+
while ( err == 0 &&
|
1073
|
+
current_state !=SDF_END_OF_DATA_BLOCK &&
|
1074
|
+
NULL != ( p = fgets_up_to_lf( line, line_len, inp ) ) ) {
|
1075
|
+
|
1076
|
+
if ( !n_lines && !memcmp(line, "M END", 6) ) {
|
1077
|
+
continue; /* allow subtle errors */
|
1078
|
+
}
|
1079
|
+
n_lines++;
|
1080
|
+
|
1081
|
+
remove_trailing_spaces( line );
|
1082
|
+
if ( line[MOLFILEMAXLINELEN] ){
|
1083
|
+
if ( current_state != SDF_DATA_HEADER &&
|
1084
|
+
current_state != SDF_DATA_LINE &&
|
1085
|
+
current_state != SDF_DATA_HEADER_NAME &&
|
1086
|
+
current_state != SDF_DATA_HEADER_USER &&
|
1087
|
+
current_state != SDF_DATA_HEADER_COMMENT ) {
|
1088
|
+
line[MOLFILEMAXLINELEN] = '\0';
|
1089
|
+
if ( !prev_err ) {
|
1090
|
+
MOLFILE_ERR_SET (err, 0, "Too long SData line truncated");
|
1091
|
+
}
|
1092
|
+
} else {
|
1093
|
+
/* allow long lines in SDF data. 9-29-00 DCh */
|
1094
|
+
line[MOLFILEMAXLINELEN] = '\0';
|
1095
|
+
}
|
1096
|
+
}
|
1097
|
+
|
1098
|
+
n_blank_lines += ( *line == '\0' );
|
1099
|
+
|
1100
|
+
switch( current_state ) {
|
1101
|
+
|
1102
|
+
case SDF_START:
|
1103
|
+
case SDF_END_OF_DATA_ITEM:
|
1104
|
+
case SDF_EMPTY_LINE: /* Added 9-25-97 DCh */
|
1105
|
+
|
1106
|
+
if ( 0 == strcmp( line, SDF_END_OF_DATA ) ) {
|
1107
|
+
current_state = SDF_END_OF_DATA_BLOCK;
|
1108
|
+
}
|
1109
|
+
else
|
1110
|
+
if ( '>' == *line ) {
|
1111
|
+
current_state = ( bNeedsName || bNeedsComm || bNeedsCASrn || bNeedsUser )? identify_sdf_label(line, pSdfLabel) : SDF_DATA_HEADER;
|
1112
|
+
}else
|
1113
|
+
if ( *line == '\0' ) { /* Added 9-25-97 DCh */
|
1114
|
+
/* Relax the strictness: Allow more than 1 empty line. */
|
1115
|
+
current_state=SDF_EMPTY_LINE;
|
1116
|
+
} else
|
1117
|
+
if ( !prev_err ) {
|
1118
|
+
MOLFILE_ERR_SET (err, 3, "Unexpected SData header line:");
|
1119
|
+
RemoveNonPrintable( line );
|
1120
|
+
AddMOLfileError(pStrErr, line);
|
1121
|
+
/* unexpected contents of data header line */
|
1122
|
+
} else {
|
1123
|
+
err = 3;
|
1124
|
+
}
|
1125
|
+
break;
|
1126
|
+
|
1127
|
+
case SDF_DATA_HEADER_NAME:
|
1128
|
+
if ( bNeedsName && 0 < normalize_name( line ) ) {
|
1129
|
+
bNeedsName = 0;
|
1130
|
+
mystrncpy( name, line, lname );
|
1131
|
+
}
|
1132
|
+
goto got_data_line;
|
1133
|
+
|
1134
|
+
case SDF_DATA_HEADER_COMMENT:
|
1135
|
+
if ( bNeedsComm && 0 < normalize_name( line ) ) {
|
1136
|
+
bNeedsComm = 0;
|
1137
|
+
mystrncpy( comment, line, lcomment );
|
1138
|
+
}
|
1139
|
+
goto got_data_line;
|
1140
|
+
|
1141
|
+
case SDF_DATA_HEADER_USER:
|
1142
|
+
if ( bNeedsUser && 0 < normalize_name( line ) ) {
|
1143
|
+
bNeedsUser = 0;
|
1144
|
+
mystrncpy( pSdfValue, line, MAX_SDF_VALUE+1 );
|
1145
|
+
if ( bCASrnIsUser && bNeedsCASrn ) {
|
1146
|
+
*cas_reg_no = extract_cas_rn( line );
|
1147
|
+
bNeedsCASrn = (0 == *cas_reg_no);
|
1148
|
+
}
|
1149
|
+
}
|
1150
|
+
goto got_data_line;
|
1151
|
+
|
1152
|
+
case SDF_DATA_HEADER_CAS:
|
1153
|
+
if ( bNeedsCASrn && 0 < normalize_name( line ) ) {
|
1154
|
+
*cas_reg_no = extract_cas_rn( line );
|
1155
|
+
bNeedsCASrn = (0 == *cas_reg_no);
|
1156
|
+
}
|
1157
|
+
goto got_data_line;
|
1158
|
+
|
1159
|
+
case SDF_DATA_HEADER:
|
1160
|
+
case SDF_DATA_LINE:
|
1161
|
+
got_data_line:
|
1162
|
+
current_state = *line? SDF_DATA_LINE : SDF_END_OF_DATA_ITEM;
|
1163
|
+
break;
|
1164
|
+
|
1165
|
+
}
|
1166
|
+
}
|
1167
|
+
if ( 0 == err && SDF_END_OF_DATA_BLOCK != current_state && NULL == p )
|
1168
|
+
; /* err = 4; */ /* unexpected end of file: missing $$$$ */
|
1169
|
+
else
|
1170
|
+
if (err && ( n_blank_lines == n_lines && *line == '\0' ) )
|
1171
|
+
err = 5; /* empty lines -- do not know when this can happen */
|
1172
|
+
|
1173
|
+
if ( err && err != 5 && current_state != SDF_END_OF_DATA_BLOCK && p ) {
|
1174
|
+
/* bypass up to $$$$ */
|
1175
|
+
while ( ( p = fgets_up_to_lf( line, line_len, inp ) ) && memcmp( line, SDF_END_OF_DATA, 4 ) )
|
1176
|
+
;
|
1177
|
+
if ( p ) {
|
1178
|
+
err = 9; /* bypassed to $$$$; non-fatal */
|
1179
|
+
AddMOLfileError(pStrErr, "Bypassing to next structure");
|
1180
|
+
}
|
1181
|
+
|
1182
|
+
}
|
1183
|
+
|
1184
|
+
return err;
|
1185
|
+
}
|
1186
|
+
/******************************************************************/
|
1187
|
+
MOL_DATA* read_sdfile_segment(FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
|
1188
|
+
int bGetOrigCoord,
|
1189
|
+
char *pname, int lname,
|
1190
|
+
long *Id, const char *pSdfLabel, char *pSdfValue,
|
1191
|
+
int *err, char *pStrErr )
|
1192
|
+
{
|
1193
|
+
MOL_DATA* mol_data = read_mol_file( inp, OnlyHeaderBlock, OnlyCtab, bGetOrigCoord, err, pStrErr );
|
1194
|
+
int err_bypass_sdf = 0;
|
1195
|
+
|
1196
|
+
if ( pname && lname ) {
|
1197
|
+
pname[0] = '\0';
|
1198
|
+
}
|
1199
|
+
if ( Id ) {
|
1200
|
+
*Id = 0L; /* ignore for now */
|
1201
|
+
}
|
1202
|
+
/* if ( mol_data && !*err ) { */
|
1203
|
+
if ( *err < 0 ) {
|
1204
|
+
*err = -*err; /* end of data encountered */
|
1205
|
+
} else {
|
1206
|
+
err_bypass_sdf = bypass_sdf_data_items( inp, Id, NULL, 0, pname, lname, *err, pSdfLabel, pSdfValue, pStrErr );
|
1207
|
+
if ( err_bypass_sdf ) {
|
1208
|
+
*err = err_bypass_sdf; /* important to continue to the next good structure */
|
1209
|
+
}
|
1210
|
+
}
|
1211
|
+
/* } */
|
1212
|
+
return mol_data;
|
1213
|
+
}
|
1214
|
+
/****************************************************************************/
|
1215
|
+
int CopyMOLfile(FILE *inp_file, long fPtrStart, long fPtrEnd, FILE *prb_file, long lNumb)
|
1216
|
+
{
|
1217
|
+
char line[MOLFILEINPLINELEN], *p;
|
1218
|
+
long fPtr;
|
1219
|
+
int ret = 1;
|
1220
|
+
char szNumber[32];
|
1221
|
+
|
1222
|
+
if ( inp_file && prb_file && fPtrStart >= 0L &&
|
1223
|
+
fPtrEnd > fPtrStart &&
|
1224
|
+
0 == fseek( inp_file, fPtrStart, SEEK_SET ) ) {
|
1225
|
+
|
1226
|
+
while ( fPtrEnd > (fPtr = ftell(inp_file)) && fPtr >= 0L &&
|
1227
|
+
fgets_up_to_lf( line, sizeof(line)-1, inp_file ) ) {
|
1228
|
+
line[sizeof(line)-1] = '\0'; /* unnecessary extra precaution */
|
1229
|
+
if ( fPtr == fPtrStart && lNumb ) {
|
1230
|
+
int len;
|
1231
|
+
LtrimRtrim( line, &len );
|
1232
|
+
len = sprintf( szNumber, "#%ld%s", lNumb, len?"/":"" );
|
1233
|
+
mystrncpy( line+len, line, sizeof(line)-len-1 );
|
1234
|
+
memcpy( line, szNumber, len );
|
1235
|
+
}
|
1236
|
+
if ( !strchr(line, '\n') ) {
|
1237
|
+
p = line+strlen(line);
|
1238
|
+
p[0] = '\n';
|
1239
|
+
p[1] = '\0';
|
1240
|
+
}
|
1241
|
+
fputs( line, prb_file );
|
1242
|
+
}
|
1243
|
+
ret = fseek( inp_file, fPtrEnd, SEEK_SET );
|
1244
|
+
}
|
1245
|
+
return ret;
|
1246
|
+
}
|