rino 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/README +44 -0
  2. data/Rakefile +123 -0
  3. data/ext/extconf.rb +26 -0
  4. data/ext/ruby_inchi_main.so +0 -0
  5. data/ext/src/aux2atom.h +2786 -0
  6. data/ext/src/comdef.h +148 -0
  7. data/ext/src/e_0dstereo.c +3014 -0
  8. data/ext/src/e_0dstereo.h +31 -0
  9. data/ext/src/e_comdef.h +57 -0
  10. data/ext/src/e_ctl_data.h +147 -0
  11. data/ext/src/e_ichi_io.c +498 -0
  12. data/ext/src/e_ichi_io.h +40 -0
  13. data/ext/src/e_ichi_parms.c +37 -0
  14. data/ext/src/e_ichi_parms.h +41 -0
  15. data/ext/src/e_ichicomp.h +50 -0
  16. data/ext/src/e_ichierr.h +40 -0
  17. data/ext/src/e_ichimain.c +593 -0
  18. data/ext/src/e_ichisize.h +43 -0
  19. data/ext/src/e_inchi_atom.c +75 -0
  20. data/ext/src/e_inchi_atom.h +33 -0
  21. data/ext/src/e_inpdef.h +41 -0
  22. data/ext/src/e_mode.h +706 -0
  23. data/ext/src/e_mol2atom.c +649 -0
  24. data/ext/src/e_readinch.c +58 -0
  25. data/ext/src/e_readmol.c +54 -0
  26. data/ext/src/e_readmol.h +180 -0
  27. data/ext/src/e_readstru.c +251 -0
  28. data/ext/src/e_readstru.h +33 -0
  29. data/ext/src/e_util.c +284 -0
  30. data/ext/src/e_util.h +61 -0
  31. data/ext/src/extr_ct.h +251 -0
  32. data/ext/src/ichi.h +206 -0
  33. data/ext/src/ichi_bns.c +7999 -0
  34. data/ext/src/ichi_bns.h +231 -0
  35. data/ext/src/ichican2.c +5000 -0
  36. data/ext/src/ichicano.c +2195 -0
  37. data/ext/src/ichicano.h +49 -0
  38. data/ext/src/ichicans.c +1625 -0
  39. data/ext/src/ichicant.h +379 -0
  40. data/ext/src/ichicomn.h +260 -0
  41. data/ext/src/ichicomp.h +50 -0
  42. data/ext/src/ichidrp.h +119 -0
  43. data/ext/src/ichierr.h +124 -0
  44. data/ext/src/ichiisot.c +101 -0
  45. data/ext/src/ichilnct.c +286 -0
  46. data/ext/src/ichimain.h +132 -0
  47. data/ext/src/ichimak2.c +1189 -0
  48. data/ext/src/ichimake.c +3812 -0
  49. data/ext/src/ichimake.h +205 -0
  50. data/ext/src/ichimap1.c +851 -0
  51. data/ext/src/ichimap2.c +2856 -0
  52. data/ext/src/ichimap4.c +1609 -0
  53. data/ext/src/ichinorm.c +741 -0
  54. data/ext/src/ichinorm.h +67 -0
  55. data/ext/src/ichiparm.c +45 -0
  56. data/ext/src/ichiparm.h +1441 -0
  57. data/ext/src/ichiprt1.c +3612 -0
  58. data/ext/src/ichiprt2.c +1511 -0
  59. data/ext/src/ichiprt3.c +3011 -0
  60. data/ext/src/ichiqueu.c +1003 -0
  61. data/ext/src/ichiring.c +326 -0
  62. data/ext/src/ichiring.h +49 -0
  63. data/ext/src/ichisize.h +35 -0
  64. data/ext/src/ichisort.c +539 -0
  65. data/ext/src/ichister.c +3538 -0
  66. data/ext/src/ichister.h +35 -0
  67. data/ext/src/ichitaut.c +3843 -0
  68. data/ext/src/ichitaut.h +387 -0
  69. data/ext/src/ichitime.h +74 -0
  70. data/ext/src/inchi_api.h +670 -0
  71. data/ext/src/inchi_dll.c +1480 -0
  72. data/ext/src/inchi_dll.h +34 -0
  73. data/ext/src/inchi_dll_main.c +23 -0
  74. data/ext/src/inchi_dll_main.h +31 -0
  75. data/ext/src/inpdef.h +328 -0
  76. data/ext/src/lreadmol.h +1246 -0
  77. data/ext/src/mode.h +706 -0
  78. data/ext/src/ruby_inchi_main.c +558 -0
  79. data/ext/src/runichi.c +4179 -0
  80. data/ext/src/strutil.c +3861 -0
  81. data/ext/src/strutil.h +182 -0
  82. data/ext/src/util.c +1130 -0
  83. data/ext/src/util.h +85 -0
  84. data/lib/clean_tempfile.rb +220 -0
  85. data/lib/rino.rb +111 -0
  86. data/test/test.rb +386 -0
  87. metadata +130 -0
@@ -0,0 +1,1246 @@
1
+ /*
2
+ * International Union of Pure and Applied Chemistry (IUPAC)
3
+ * International Chemical Identifier (InChI)
4
+ * Version 1
5
+ * Software version 1.00
6
+ * April 13, 2005
7
+ * Developed at NIST
8
+ */
9
+
10
+
11
+ /* local prototypes */
12
+ int bypass_sdf_data_items( FILE* inp, long *cas_reg_no, char* comment, int lcomment, char *name, int lname, int prev_err,
13
+ const char *pSdfLabel, char *pSdfValue, char *pStrErr );
14
+ MOL_DATA* read_mol_file( FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
15
+ int bGetOrigCoord, int *err, char *pStrErr );
16
+
17
+
18
+ static int mol_read_hdr(MOL_HEADER_BLOCK *hdr, FILE* inp, char *pStrErr);
19
+ static int mol_read_counts_line( MOL_CTAB* ctab, FILE *inp, char *pStrErr );
20
+ static int read_atom_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
21
+ static int read_bonds_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
22
+ static int read_stext_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
23
+ static int read_properties_block( MOL_CTAB* ctab, MOL_HEADER_BLOCK *pHdr, FILE *inp, int err, char *pStrErr );
24
+
25
+ static int identify_sdf_label( char* inp_line, const char *pSdfLabel );
26
+ static long extract_cas_rn( char *line );
27
+ int RemoveNonPrintable( char *line );
28
+
29
+
30
+ /******/
31
+ #ifndef MOLFILE_ERR_FIN
32
+ #define MOLFILE_ERR_FIN(err, new_err, err_fin, msg) \
33
+ if ( !(err) && (new_err) ) { (err) = (new_err);} AddMOLfileError(pStrErr, (msg)); goto err_fin
34
+ #endif
35
+ #ifndef MOLFILE_ERR_SET
36
+ #define MOLFILE_ERR_SET(err, new_err, msg) \
37
+ if ( !(err) && (new_err) ) { (err) = (new_err);} AddMOLfileError(pStrErr, (msg))
38
+ #endif
39
+
40
+ /*************************************************************************/
41
+ int AddMOLfileError( char *pStrErr, const char *szMsg )
42
+ {
43
+ if ( pStrErr && szMsg && szMsg[0] ) {
44
+ int lenStrErr = strlen( pStrErr );
45
+ int lenMsg = strlen( szMsg );
46
+ char *p = strstr( pStrErr, szMsg );
47
+ if ( p && (p==pStrErr || *(p-1) == ' ' && (*(p-2) == ';' || *(p-2) == ':' )) &&
48
+ (p+lenMsg == pStrErr+lenStrErr ||
49
+ p[lenMsg] == ';' && p[lenMsg+1] == ' ' ||
50
+ p[lenMsg-1]==':' && p[lenMsg]==' ') ) {
51
+ return 1; /* reject duplicates */
52
+ }
53
+ if ( lenStrErr + lenMsg + 2*(lenStrErr > 0) < STR_ERR_LEN ) {
54
+ /* enough room to add */
55
+ if (lenStrErr > 0) {
56
+ if ( pStrErr[lenStrErr-1] != ':' ) {
57
+ strcat( pStrErr, ";" );
58
+ }
59
+ strcat( pStrErr, " " );
60
+ }
61
+ strcat( pStrErr, szMsg );
62
+ return 1;
63
+ }
64
+ /* no room */
65
+ if ( strstr( pStrErr, "..." ) ) {
66
+ return 0; /* no room mark has already been set */
67
+ }
68
+ if ( lenStrErr + 3 < STR_ERR_LEN ) {
69
+ strcat( pStrErr, "..." );
70
+ }
71
+ }
72
+ return 0;
73
+ }
74
+ /*************************************************************************/
75
+ static int mol_copy_check_empty( char* dest, char* source, int len, char **first_space )
76
+ {
77
+ int i, c; /* required len >= 0; dest must have at least len+1 bytes */
78
+ if ( len > 0 )
79
+ strncpy( dest, source, len );
80
+ dest[len]='\0';
81
+ len = ( len > 0 )? (int)strlen( dest) : 0;
82
+ for ( i = (len-1); i >= 0 && 0 != (c = source[i]) && isspace(UCINT c); i-- )
83
+ ;
84
+ *first_space = dest + (i+1); /* first blank or zero terminating byte in dest */
85
+ return len; /* number of actually processed bytes; zero termination not included */
86
+ }
87
+ /*************************************************************************/
88
+ static int mol_read_datum(void* data, int field_len, int data_type, char** line_ptr)
89
+ {
90
+ /* 1. 'field_len' for MOL_STRING_DATA does not include trailing zero,
91
+ * that is actual length of the string pointed by 'data'
92
+ * should be at least field_len+1 bytes.
93
+ * For numerical data 'field_len' is length of input data field
94
+ * For numerical integral data field_len <= 0 means read up to first
95
+ * non-numeric character as strtod() does ("free format")
96
+ * 2. return value: for MOL_STRING_DATA: number of bytes excluding trailing zero
97
+ * for all others: 1=success; 0 = empty; -1= error
98
+ * 3. on exit *line_ptr points to the next byte after the last entered
99
+ */
100
+ char *p = *line_ptr, *q, *p_end;
101
+ int i, ret=1, c, len;
102
+ long ldata;
103
+ double ddata;
104
+
105
+ switch( data_type ) {
106
+ case MOL_STRING_DATA:
107
+ for ( i= 0; i < field_len && 0 != (c = p[i]) && isspace(UCINT c); i++ ) /* pass by all leading spaces */
108
+ ;
109
+ len = mol_copy_check_empty( (char*)data, &p[i], field_len-i, &q );
110
+ ret = ( q - (char*)data );/* actual data length */
111
+ *q = '\0'; /* add zero termination to data if it is not there yet*/
112
+ *line_ptr += (len+i); /* ptr to the 1st byte of the next input field or to zero termination */
113
+ break;
114
+
115
+ case MOL_CHAR_INT_DATA:
116
+ case MOL_SHORT_INT_DATA:
117
+ case MOL_LONG_INT_DATA:
118
+ { /* block start */
119
+ char str[MOL_MAX_VALUE_LEN+1];
120
+ ldata = 0L;
121
+ if ( field_len > MOL_MAX_VALUE_LEN ) {
122
+ ret = -1;
123
+ }else
124
+ if ( field_len > 0 ) { /* fixed length */
125
+ *line_ptr += ( len = mol_copy_check_empty( str, p, field_len, &q ) );
126
+ *q = '\0';
127
+ if ( !len || !(q-str) ) { /* empty string */
128
+ ret = 0;
129
+ }else
130
+ if ( (ldata=strtol(str,&p_end,10), p_end != q) ){ /* wrong data: incompletely interpreted */
131
+ ret = -1;
132
+ }
133
+ }else{ /* free format: field_len <= 0 */
134
+ ldata = strtol( p, &p_end, 10 );
135
+ *line_ptr += ( len = p_end - p );
136
+ if ( len == 0 ){
137
+ ret = 0;
138
+ }
139
+ }
140
+
141
+ switch( data_type ) {
142
+ case MOL_CHAR_INT_DATA:
143
+ if ( SCHAR_MIN <= ldata && ldata <= SCHAR_MAX ){ /* from || to &&: 11-19-96 */
144
+ *(S_CHAR*)data = (S_CHAR)ldata;
145
+ }else{
146
+ *(S_CHAR*)data = (S_CHAR)0;
147
+ ret = -1;
148
+ }
149
+ break;
150
+ case MOL_SHORT_INT_DATA:
151
+ if ( SHRT_MIN <= ldata && ldata <= SHRT_MAX ){
152
+ *(S_SHORT*)data = (S_SHORT)ldata;
153
+ }else{
154
+ *(S_SHORT*)data = (S_SHORT)0;
155
+ ret = -1;
156
+ }
157
+ break;
158
+ case MOL_LONG_INT_DATA:
159
+ if ( LONG_MIN < ldata && ldata < LONG_MAX ){
160
+ *(long*)data = (long)ldata;
161
+ }else{
162
+ *(long*)data = 0L;
163
+ ret = -1;
164
+ }
165
+ break;
166
+ default:
167
+ ret=-1;
168
+ }
169
+
170
+ } /* block end */
171
+ break;
172
+ case MOL_DOUBLE_DATA:
173
+ case MOL_FLOAT_DATA:
174
+ { /* block start */
175
+ char str[MOL_MAX_VALUE_LEN+1];
176
+ if ( field_len > MOL_MAX_VALUE_LEN ) {
177
+ ret = -1;
178
+ ddata = 0.0;
179
+ }else
180
+ if ( field_len > 0 ) {
181
+ *line_ptr += (len = mol_copy_check_empty( str, p, field_len, &q ));
182
+ *q = '\0';
183
+ if ( !len || !(q-str) ) { /* empty string */
184
+ ddata = 0.0;
185
+ ret = 0;
186
+ }else
187
+ if ( (ddata=strtod(str,&p_end), p_end != q) ){ /* wrong data */
188
+ ret = -1;
189
+ }
190
+ }else{ /* free format */
191
+ ddata = strtod( p, &p_end );
192
+ *line_ptr += ( len = p_end - p );
193
+ if ( len == 0 ){
194
+ ret = 0;
195
+ }
196
+ }
197
+ switch(data_type){
198
+ case MOL_DOUBLE_DATA:
199
+ if ( ddata != HUGE_VAL && /*ldata*/ ddata != -HUGE_VAL ){ /* replaced ldata with ddata 6-30-98 DCh */
200
+ *(double*)data = ddata;
201
+ }else{
202
+ *(double*)data = 0.0;
203
+ ret = -1;
204
+ }
205
+ break;
206
+ case MOL_FLOAT_DATA:
207
+ if ( fabs(ddata) <= (double)FLT_MIN ) {
208
+ *(float*)data = 0.0;
209
+ }else
210
+ if ( fabs(ddata) >= (double)FLT_MAX ) {
211
+ *(float*)data = 0.0;
212
+ ret = -1;
213
+ }else{
214
+ *(float*)data = (float)ddata;
215
+ }
216
+ break;
217
+ }
218
+ } /* block end */
219
+ break;
220
+ case MOL_JUMP_TO_RIGHT:
221
+ for ( i = 0; i < field_len && p[i]; i++ )
222
+ ;
223
+ *line_ptr += i;
224
+ ret = i;
225
+ break;
226
+ default:
227
+ ret = -1;
228
+ }
229
+ return ret;
230
+ }
231
+ /*************************************************************************/
232
+ static int mol_read_hdr(MOL_HEADER_BLOCK *hdr, FILE* inp, char *pStrErr)
233
+ {
234
+ /* All input lines can have are up 80 characters */
235
+ /* Header Block */
236
+ char line[MOLFILEINPLINELEN]; /* + cr +lf +zero termination + reserve */
237
+ int err = 0, len;
238
+ const int line_len = sizeof(line);
239
+ char *p;
240
+
241
+ /* memset( &hdr, 0, sizeof( MOL_HEADER_BLOCK ) ); */
242
+ /*------------ header line #1: name ----------------*/
243
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
244
+ err = 1; /* can't read the input file line */
245
+ /* AddMOLfileError( pStrErr, "Can't read header block name line" ); */
246
+ goto err_fin;
247
+ }
248
+ remove_one_lf( line );
249
+ /* -- Disabled to relax strictness: allow > 80 chars names.
250
+ if ( line[MOLFILEMAXLINELEN] ){
251
+ err = 2; // too long line
252
+ goto err_fin;
253
+ }
254
+ */
255
+ len = mol_read_datum( hdr->szMoleculeName, sizeof(hdr->szMoleculeName)-1, MOL_STRING_DATA, &p );
256
+ /*----------- header line #2 -----------------------*/
257
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
258
+ err = 3; /* can't read the input file line */
259
+ /* AddMOLfileError( pStrErr, "Can't read header block line 2" ); */
260
+ goto err_fin;
261
+ }
262
+ remove_one_lf( line );
263
+ /* -- Disabled to relax strictness: allow > 80 chars names.
264
+ if ( line[MOLFILEMAXLINELEN] ){
265
+ err = 4; // too long input file line
266
+ goto err_fin;
267
+ }
268
+ */
269
+ len = mol_read_datum( hdr->szUserInitials, sizeof(hdr->szUserInitials)-1, MOL_STRING_DATA, &p );
270
+ len = mol_read_datum( hdr->szProgramName, sizeof(hdr->szProgramName)-1, MOL_STRING_DATA, &p );
271
+
272
+ /*------------ Relax strictness -----------------------*/
273
+ len = mol_read_datum( &hdr->cMonth, 2, MOL_CHAR_INT_DATA, &p );
274
+ len = mol_read_datum( &hdr->cDay, 2, MOL_CHAR_INT_DATA, &p );
275
+ len = mol_read_datum( &hdr->cYear, 2, MOL_CHAR_INT_DATA, &p );
276
+ len = mol_read_datum( &hdr->cHour, 2, MOL_CHAR_INT_DATA, &p );
277
+ len = mol_read_datum( &hdr->cMinute, 2, MOL_CHAR_INT_DATA, &p );
278
+ len = mol_read_datum( hdr->szDimCode, sizeof(hdr->szDimCode)-1, MOL_STRING_DATA, &p );
279
+ len = mol_read_datum( &hdr->nScalingFactor1, 2, MOL_SHORT_INT_DATA, &p );
280
+ len = mol_read_datum( &hdr->dScalingFactor2, 10, MOL_DOUBLE_DATA, &p );
281
+ len = mol_read_datum( &hdr->dEnergy, 12, MOL_DOUBLE_DATA, &p );
282
+ len = mol_read_datum( &hdr->lInternalRegistryNumber, 6, MOL_LONG_INT_DATA, &p );
283
+
284
+ /* save the whole line 2 */
285
+ p = line;
286
+ len = mol_read_datum( hdr->szMoleculeLine2, sizeof(hdr->szMoleculeLine2)-1, MOL_STRING_DATA, &p );
287
+
288
+
289
+ /*------------ header line #3: comment ----------------*/
290
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
291
+ err = 7; /* can't read the line */
292
+ /* AddMOLfileError( pStrErr, "Can't read header block comment line" ); */
293
+ goto err_fin;
294
+ }
295
+ remove_one_lf( line );
296
+ /* -- Disabled to relax strictness: allow > 80 chars comments.
297
+ if ( line[MOLFILEMAXLINELEN] ){
298
+ err = 8; // too long line
299
+ goto err_fin;
300
+ }
301
+ */
302
+ len = mol_read_datum( hdr->szComment, sizeof(hdr->szComment)-1, MOL_STRING_DATA, &p );
303
+
304
+ err_fin:
305
+
306
+ return err;
307
+ }
308
+ /***************************************************************/
309
+ int RemoveNonPrintable( char *line )
310
+ {
311
+ int i, c, num = 0;
312
+ if ( line ) {
313
+ for ( i = 0; c = UCINT line[i]; i ++ ) {
314
+ /* assuming ASCII charset */
315
+ if ( c < ' ' || c >= 0x7F ) {
316
+ line[i] = '.';
317
+ num ++;
318
+ }
319
+ }
320
+ }
321
+ return num;
322
+ }
323
+ /***************************************************************/
324
+ static int mol_read_counts_line( MOL_CTAB* ctab, FILE *inp, char *pStrErr )
325
+ {
326
+ char *p;
327
+ char line[MOLFILEINPLINELEN];
328
+ const int line_len = sizeof(line);
329
+ int err = 0, len;
330
+
331
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
332
+ MOLFILE_ERR_FIN (err, 1, err_fin, "Cannot read counts line");
333
+ /* can't read the input file line */
334
+ }
335
+ remove_one_lf( line );
336
+ if ( line[MOLFILEMAXLINELEN] ){
337
+ MOLFILE_ERR_SET (err, 0, "Too long counts line"); /* too long input file line */
338
+ }
339
+ if ( 0 > mol_read_datum( &ctab->nNumberOfAtoms, 3, MOL_SHORT_INT_DATA, &p )
340
+ || 0 > mol_read_datum( &ctab->nNumberOfBonds, 3, MOL_SHORT_INT_DATA, &p )
341
+ #if ( MOL_QUERY == MOL_PRESENT )
342
+ || 0 > mol_read_datum( &ctab->nNumberOfAtomsLists, 3, MOL_SHORT_INT_DATA, &p )
343
+ #else
344
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
345
+ #endif
346
+ || 0 > mol_read_datum( NULL, /*obsolete*/ 3, MOL_JUMP_TO_RIGHT, &p )
347
+ || 0 > mol_read_datum( &ctab->cChiralFlag, 3, MOL_CHAR_INT_DATA, &p )
348
+ || 0 > mol_read_datum( &ctab->nNumberOfStextEntries, 3, MOL_SHORT_INT_DATA, &p )
349
+ #if ( MOL_CPSS == MOL_PRESENT )
350
+ || 0 > mol_read_datum( &ctab->nNumberOfReactionComponentsPlus1, 3, MOL_SHORT_INT_DATA, &p )
351
+ || 0 > mol_read_datum( &ctab->nNumberOfReactants, 3, MOL_SHORT_INT_DATA, &p )
352
+ || 0 > mol_read_datum( &ctab->nNumberOfProducts, 3, MOL_SHORT_INT_DATA, &p )
353
+ || 0 > mol_read_datum( &ctab->nNumberOfIntermediates, 3, MOL_SHORT_INT_DATA, &p )
354
+ #else
355
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
356
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
357
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
358
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
359
+ #endif
360
+ || 0 > mol_read_datum( &ctab->nNumberOfPropertyLines, 3, MOL_SHORT_INT_DATA, &p ) ){
361
+ err = 3; /* can't interpret counts line */
362
+ MOLFILE_ERR_SET (err, 3, "Cannot interpret counts line:"); /* too long input file line */
363
+ RemoveNonPrintable( line );
364
+ AddMOLfileError(pStrErr, line);
365
+ goto err_fin;
366
+ }
367
+ len = mol_read_datum( ctab->csCurrentCtabVersion, sizeof(ctab->csCurrentCtabVersion)-1, MOL_STRING_DATA, &p );
368
+ err_fin:
369
+ return err;
370
+ }
371
+
372
+ /*************************************************************************/
373
+ static int read_atom_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
374
+ {
375
+ char *p;
376
+ char line[MOLFILEINPLINELEN];
377
+ const int line_len = sizeof(line);
378
+ S_SHORT i, chg;
379
+ static S_SHORT charge_val[] = {0, 3, 2, 1, 'R', -1, -2, -3};
380
+ /* 0 1 2 3 4 5 6 7 */
381
+ /*
382
+ if ( NULL == ctab->MolAtom ){
383
+ err = 1;
384
+ goto err_fin; // internal error: MolAtom structure has not been allocated
385
+ }
386
+ */
387
+
388
+ for ( i = 0; i < ctab->nNumberOfAtoms; i++ ) {
389
+
390
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
391
+ if ( !err ) {
392
+ MOLFILE_ERR_SET (err, 2, "Cannot read atom block line");
393
+ }
394
+ break;
395
+ }
396
+ remove_one_lf( line );
397
+ if ( line[MOLFILEMAXLINELEN] ){
398
+ MOLFILE_ERR_SET (err, 0, "Too long atom block line");
399
+ }
400
+ if ( err ) {
401
+ if ( !strcmp( line, SDF_END_OF_DATA ) ) {
402
+ err = -abs(err);
403
+ break;
404
+ }
405
+ continue; /* bypass the rest of the Atom block */
406
+ }
407
+ if ( NULL != ctab->szCoord ) {
408
+ mystrncpy( ctab->szCoord[i], p, 31 ); /* original coordinates */
409
+ }
410
+
411
+ if ( NULL != ctab->MolAtom ) {
412
+ if ( 0 > mol_read_datum( &ctab->MolAtom[i].fX, 10, MOL_DOUBLE_DATA, &p )
413
+ || 0 > mol_read_datum( &ctab->MolAtom[i].fY, 10, MOL_DOUBLE_DATA, &p )
414
+ || 0 > mol_read_datum( &ctab->MolAtom[i].fZ, 10, MOL_DOUBLE_DATA, &p )
415
+ || 0 > mol_read_datum( NULL, /* undescribed in article*/ 1, MOL_JUMP_TO_RIGHT, &p )
416
+ || 0 == mol_read_datum( &ctab->MolAtom[i].szAtomSymbol, 3, MOL_STRING_DATA, &p ) /* was sizeof(ctab->MolAtom[0].szAtomSymbol)-1 */
417
+ #ifdef INCHI_MAIN
418
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cMassDifference, 2, MOL_SHORT_INT_DATA, &p )
419
+ #else
420
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cMassDifference, 2, MOL_CHAR_INT_DATA, &p )
421
+ #endif
422
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cCharge, 3, MOL_CHAR_INT_DATA, &p )
423
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cStereoParity, 3, MOL_CHAR_INT_DATA, &p )
424
+ #if ( MOL_QUERY == MOL_PRESENT )
425
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cH_countPlus1, 3, MOL_CHAR_INT_DATA, &p )
426
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cStereoCare, 3, MOL_CHAR_INT_DATA, &p )
427
+ #else
428
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
429
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
430
+ #endif
431
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cValence, 3, MOL_CHAR_INT_DATA, &p ) ) {
432
+
433
+ err = 4;
434
+ MOLFILE_ERR_SET (err, 4, "Cannot interpret atom block line:");
435
+ RemoveNonPrintable( line );
436
+ AddMOLfileError(pStrErr, line);
437
+ if ( !strcmp( line, SDF_END_OF_DATA ) ) {
438
+ err = -abs(err);
439
+ break;
440
+ }
441
+ continue; /* can't interpret a first half of atom block line */
442
+ }
443
+ if ( 2 == strlen(ctab->MolAtom[i].szAtomSymbol) && isupper(UCINT ctab->MolAtom[i].szAtomSymbol[1]))
444
+ ctab->MolAtom[i].szAtomSymbol[1] = (char)tolower(UCINT ctab->MolAtom[i].szAtomSymbol[1]); /* 5-4-99 DCh*/
445
+
446
+ if ( (chg = (S_SHORT) ctab->MolAtom[i].cCharge)< 0 || chg >= (int)(sizeof ( charge_val ) / sizeof( charge_val[0] )) ) {
447
+ /* ctab->MolAtom[i].cCharge = 0; */ /* error; ignore for now */
448
+ ctab->MolAtom[i].cCharge = (S_CHAR)(4 - chg); /* allow greater charges to accommodate NCI structures. 8-20-2002 */
449
+ ctab->MolAtom[i].cRadical = 0;
450
+ }else
451
+ if ( 'R' == (chg = charge_val[chg]) ){
452
+ ctab->MolAtom[i].cCharge = 0;
453
+ ctab->MolAtom[i].cRadical = RADICAL_DOUBLET;
454
+ }else{
455
+ ctab->MolAtom[i].cCharge = (S_CHAR)chg; /* actual charge value */
456
+ ctab->MolAtom[i].cRadical = 0;
457
+ }
458
+ #ifdef INCHI_MAIN
459
+ if ( ctab->MolAtom[i].cMassDifference ) { /* e_ReadMOL.c specific */
460
+ ctab->MolAtom[i].cMassDifference += ISOTOPIC_SHIFT_FLAG;
461
+ }
462
+ #endif
463
+
464
+ if (
465
+ #if ( MOL_CPSS == MOL_PRESENT )
466
+ 0 > mol_read_datum( &ctab->MolAtom[i].cH0_designator, 3, MOL_CHAR_INT_DATA, &p )
467
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentType, 3, MOL_CHAR_INT_DATA, &p )
468
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentNumber, 3, MOL_CHAR_INT_DATA, &p )
469
+ #else
470
+ 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
471
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
472
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
473
+ #endif
474
+ #if ( MOL_REACT == MOL_PRESENT )
475
+ || 0 > mol_read_datum( &ctab->MolAtom[i].nAtomAtomMappingNumber, 3, MOL_SHORT_INT_DATA, &p )
476
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentType, 3, MOL_CHAR_INT_DATA, &p )
477
+ #else
478
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
479
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
480
+ #endif
481
+ #if ( MOL_REACT == MOL_PRESENT || MOL_QUERY == MOL_PRESENT )
482
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cExactChargeFlag, 3, MOL_CHAR_INT_DATA, &p )
483
+ #else
484
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
485
+ #endif
486
+ ){
487
+ err = 5; /* can't interpret a second half of atom block line */
488
+ MOLFILE_ERR_SET (err, 5, "Cannot interpret atom block line:");
489
+ RemoveNonPrintable( line );
490
+ AddMOLfileError(pStrErr, line);
491
+ if ( !strcmp( line, SDF_END_OF_DATA ) ) {
492
+ err = -abs(err);
493
+ break;
494
+ }
495
+ continue;
496
+ }
497
+ }
498
+ }
499
+ /* err_fin: */
500
+ return err;
501
+ }
502
+ /*************************************************************************/
503
+ static int read_bonds_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
504
+ {
505
+ char *p;
506
+ char line[MOLFILEINPLINELEN];
507
+ const int line_len = sizeof(line);
508
+ S_SHORT i;
509
+ /*
510
+ if ( NULL == ctab->MolBond ){
511
+ err = 1;
512
+ goto err_fin; // internal error: memory has not been allocated for MolBond structure
513
+ }
514
+ */
515
+ for ( i = 0; i < ctab->nNumberOfBonds; i++ ) {
516
+
517
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
518
+ if ( !err ) {
519
+ MOLFILE_ERR_SET (err, 2, "Cannot read bond block line");
520
+ }
521
+ break;
522
+ }
523
+ remove_one_lf( line );
524
+ if ( line[MOLFILEMAXLINELEN] ){
525
+ err = err? err : 3; /* too long input file line */
526
+ }
527
+ if ( err ) {
528
+ if ( !strcmp( line, SDF_END_OF_DATA ) ) {
529
+ err = -abs(err);
530
+ break;
531
+ }
532
+ continue;
533
+ }
534
+
535
+ if ( ctab->MolBond ) {
536
+ if ( 0 > mol_read_datum( &ctab->MolBond[i].nAtomNo1, 3, MOL_SHORT_INT_DATA, &p )
537
+ || 0 > mol_read_datum( &ctab->MolBond[i].nAtomNo2, 3, MOL_SHORT_INT_DATA, &p )
538
+ || 0 > mol_read_datum( &ctab->MolBond[i].cBondType, 3, MOL_CHAR_INT_DATA, &p )
539
+ || 0 > mol_read_datum( &ctab->MolBond[i].cBondStereo, 3, MOL_CHAR_INT_DATA, &p )
540
+ #if ( MOL_QUERY == MOL_PRESENT )
541
+ || 0 > mol_read_datum( &ctab->MolBond[i].cBondTopology, 3, MOL_CHAR_INT_DATA, &p ) /* ring/chain */
542
+ #else
543
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
544
+ #endif
545
+ #if ( MOL_REACT == MOL_PRESENT )
546
+ || 0 > mol_read_datum( &ctab->MolBond[i].cReactingCenterStatus, 3, MOL_CHAR_INT_DATA, &p )
547
+ #else
548
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
549
+ #endif
550
+ ){
551
+ if ( !err ) {
552
+ /* can't interpret bonds block line */
553
+ MOLFILE_ERR_SET (err, 4, "Cannot interpret bond block line:");
554
+ RemoveNonPrintable( line );
555
+ AddMOLfileError(pStrErr, line);
556
+ }
557
+ if ( !strcmp( line, SDF_END_OF_DATA ) ) {
558
+ err = -abs(err);
559
+ break;
560
+ }
561
+ }
562
+ }
563
+ }
564
+ /* err_fin: */
565
+ return err;
566
+ }
567
+ /*************************************************************************/
568
+ static int read_stext_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
569
+ {
570
+ /* just pass by all stext enties without attemp to interpret */
571
+ char *p;
572
+ char line[MOLFILEINPLINELEN];
573
+ const int line_len = sizeof(line);
574
+ S_SHORT i;
575
+
576
+ for ( i = 0; i < 2*ctab->nNumberOfStextEntries; i++ ) {
577
+
578
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
579
+ if ( !err ) {
580
+ MOLFILE_ERR_FIN (err, 2, err_fin, "Cannot read STEXT block line");
581
+ }
582
+ break;
583
+ /* can't read the input file line */
584
+ }
585
+ /*
586
+ remove_one_lf( line );
587
+ if ( line[MOLFILEMAXLINELEN] ){
588
+ MOLFILE_ERR_SET (err, 2, "Warning: Too long STEXT block line");
589
+ // too long input file line
590
+ }
591
+ */
592
+ }
593
+ err_fin:
594
+ return err;
595
+ }
596
+ /*************************************************************************/
597
+ static int read_properties_block( MOL_CTAB* ctab, MOL_HEADER_BLOCK *pHdr, FILE *inp, int err, char *pStrErr )
598
+ {
599
+ enum { MULTI_LINE_MODE_NO_MODE, MULTI_LINE_MODE_ISIS_ALIAS };
600
+ char *p;
601
+ char line[MOLFILEINPLINELEN];
602
+ const int line_len = sizeof(line);
603
+ int nMultiLineMode = MULTI_LINE_MODE_NO_MODE, nAtomNumber=0;
604
+ S_SHORT i, j;
605
+ char charM[2];
606
+ char szBlank[3];
607
+ char szType[4];
608
+ S_SHORT skip_lines=0;
609
+ S_SHORT num_entries;
610
+ S_SHORT num_atoms = ctab->nNumberOfAtoms;
611
+
612
+ int charge_encountered = 0;
613
+ int radical_encountered = 0;
614
+ int isotope_encountered = 0;
615
+ /*
616
+ if ( NULL == ctab->MolAtom ){
617
+ err = 1;
618
+ goto err_fin; internal error: memory has not been allocated for MolAtom structure
619
+ }
620
+ */
621
+ for ( i = 0; ctab->csCurrentCtabVersion[0]? 1 : (i < ctab->nNumberOfPropertyLines); i++ ) { /* the last line should be M END */
622
+ /* ctab->csCurrentCtabVersion[0] == 0:
623
+ exactly ctab->nNumberOfPropertyLines lines including M END */
624
+ /* ctab->csCurrentCtabVersion[0] != 0:
625
+ read until M END line was encountered */
626
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
627
+ if ( !err ) {
628
+ MOLFILE_ERR_SET (err, 2, "Cannot read properties block line");
629
+ }
630
+ goto err_fin;
631
+ }
632
+ remove_one_lf( line );
633
+ if ( line[MOLFILEMAXLINELEN] ){
634
+ MOLFILE_ERR_SET (err, 3, "Too long properties block line");
635
+ continue;
636
+ }
637
+ if ( skip_lines > 0 ) {
638
+ skip_lines --;
639
+ continue;
640
+ }
641
+ /* alias. */
642
+ if ( nMultiLineMode == MULTI_LINE_MODE_ISIS_ALIAS && nAtomNumber ) {
643
+ int len;
644
+ nMultiLineMode = MULTI_LINE_MODE_NO_MODE;
645
+ if ( 0 >= (len=normalize_name( p )) ) {
646
+ nAtomNumber = 0;
647
+ continue;
648
+ }
649
+ if( 0 < len && len < (int)(sizeof(ctab->MolAtom->szAtomSymbol)) ) {
650
+ int nCharge, nRad;
651
+ MOL_ATOM* MolAtom = ctab->MolAtom + nAtomNumber-1;
652
+ /* ctab->MolAtom[nAtomNumber-1].cAtomAliasedFlag = 1; */
653
+ /* extract radicals & charges */
654
+ extract_ChargeRadical( p, &nRad, &nCharge );
655
+ /* Aliased atom cannot have charge, radical & mass difference */
656
+ /* in the atom table or "M CHG", "M RAD", "M ISO" */
657
+ /* if ( nCharge ) */
658
+ MolAtom->cCharge = (S_CHAR)nCharge;
659
+ /* if ( nRad ) */
660
+ MolAtom->cRadical = (char)nRad;
661
+
662
+ if ( 1 == len && 'D' == p[0] ) {
663
+ /* H isotope */
664
+ p[0] = 'H';
665
+ #ifdef INCHI_MAIN
666
+ MolAtom->cMassDifference=(1 + ISOTOPIC_SHIFT_FLAG);
667
+ #else
668
+ MolAtom->cMassDifference=1;
669
+ #endif
670
+ } else
671
+ if ( 1 == len && 'T' == p[0] ) {
672
+ /* H isotope */
673
+ p[0] = 'H';
674
+ #ifdef INCHI_MAIN
675
+ MolAtom->cMassDifference=(2 + ISOTOPIC_SHIFT_FLAG);
676
+ #else
677
+ MolAtom->cMassDifference=2;
678
+ #endif
679
+ } else
680
+ MolAtom->cMassDifference=0;
681
+ if ( strlen(p) < sizeof(ctab->MolAtom[0].szAtomSymbol) ) {
682
+ strcpy(MolAtom->szAtomSymbol, p);
683
+ } else {
684
+ strcpy(MolAtom->szAtomSymbol, "???");
685
+ }
686
+ MolAtom->cAtomAliasedFlag ++;
687
+ }
688
+ skip_lines = 0;
689
+ nAtomNumber = 0;
690
+ continue;
691
+ }
692
+
693
+ if ( 1 != mol_read_datum( charM, sizeof(charM) - 1, MOL_STRING_DATA, &p )
694
+ || 0 != mol_read_datum( szBlank, sizeof(szBlank) - 1, MOL_STRING_DATA, &p ) /* must contain 0 bytes */
695
+ || 0 >= mol_read_datum( szType, sizeof(szType) - 1, MOL_STRING_DATA, &p ) /* must contain 3 bytes */
696
+ ) {
697
+ if ( !strcmp( line, SDF_END_OF_DATA ) ) {
698
+ err = err? -abs(err): -4;
699
+ break;
700
+ }
701
+ continue; /* ignore because cannot recognize */
702
+ }
703
+ if ( charM[0] == 'V' ){
704
+ skip_lines = 0; /* ISIS/Desktop Atom Value: one-line property */
705
+ continue;
706
+ }
707
+ if ( charM[0] == 'G' ){
708
+ skip_lines = 1; /* ISIS/Desktop Group abbreviation: two-line property */
709
+ continue;
710
+ }
711
+ if ( charM[0] == 'A' ) {
712
+ if ( NULL != ctab->MolAtom &&
713
+ 0 < ( nAtomNumber = (int)strtol(szType, NULL, 10) ) &&
714
+ nAtomNumber <= ctab->nNumberOfAtoms ){
715
+ /* Atom Alias [ISIS/Desktop] two-line property */
716
+ nMultiLineMode = MULTI_LINE_MODE_ISIS_ALIAS;
717
+ continue;
718
+ } else {
719
+ nAtomNumber = 0;
720
+ skip_lines = 1;
721
+ continue;
722
+ }
723
+ }
724
+ if ( charM[0] == 'S' && !strcmp( szType, "SKP" ) ){ /* skip lines */
725
+ if ( 0 >= mol_read_datum( &skip_lines, 3, MOL_SHORT_INT_DATA, &p ) ) {
726
+ skip_lines = 0;
727
+ }
728
+ continue;
729
+ }
730
+ if ( charM[0] != 'M' ) {/* cannot recognize a line */
731
+ continue;
732
+ }
733
+ if ( !strcmp( szType, "REG" ) ) {
734
+ int len;
735
+ p = p + strspn( p, " " );
736
+ len = strcspn( p, " " );
737
+ len = inchi_min( len, MOL_MAX_VALUE_LEN );
738
+ mol_read_datum( &pHdr->lInternalRegistryNumber, len, MOL_LONG_INT_DATA, &p );
739
+ continue;
740
+ }
741
+
742
+ if ( !strcmp( szType, "END" ) ){
743
+ if ( ctab->csCurrentCtabVersion[0] )
744
+ break; /* end of property lines */
745
+ continue;
746
+ }
747
+
748
+ if ( NULL == ctab->MolAtom )
749
+ continue; /* ignore because the user requested to bypass all this stuff */
750
+
751
+ /*----------------------------------- charge: Generic */
752
+ if ( !strcmp( szType, "CHG" ) &&
753
+ 0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
754
+ 1 <= num_entries && num_entries <= 8 ) {
755
+ S_SHORT atoms[8];
756
+ S_SHORT charges[8];
757
+ if ( !charge_encountered && !radical_encountered ) {
758
+ /* first charge or radical record clears all Atom Block */
759
+ /* entered charge and radical data to zeroes */
760
+ charge_encountered = -1;
761
+ }
762
+ for ( j = 0; j < num_entries; j++ ) {
763
+ if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
764
+ 0 > mol_read_datum( &charges[j], 0, MOL_SHORT_INT_DATA, &p ) ||
765
+ atoms[j] <= 0 || atoms[j] > num_atoms ||
766
+ charges[j] < -15 || charges[j] > 15 ) {
767
+ goto charge_error;
768
+ }
769
+ }
770
+ if ( charge_encountered == -1 ) {
771
+ for ( j = 0; j < num_atoms; j++ ) {
772
+ if ( !ctab->MolAtom[j].cAtomAliasedFlag ) /* do not clear aliased atoms.*/
773
+ ctab->MolAtom[j].cCharge = ctab->MolAtom[j].cRadical = '\0';
774
+ }
775
+ charge_encountered = 1;
776
+ }
777
+ for ( j = 0; j < num_entries; j++ ) {
778
+ if ( !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag ) /* do not change aliased atoms.*/
779
+ ctab->MolAtom[atoms[j]-1].cCharge = (S_CHAR)charges[j];
780
+ }
781
+ continue;
782
+ charge_error:
783
+ MOLFILE_ERR_SET (err, 0, "Charge not recognized:");
784
+ RemoveNonPrintable( line );
785
+ AddMOLfileError(pStrErr, line);
786
+ continue; /* ignore for now */
787
+ }
788
+ /*-------------------------------------- radical: Generic */
789
+ if ( !strcmp( szType, "RAD" ) &&
790
+ 0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
791
+ 1 <= num_entries && num_entries <= 8 ) {
792
+ S_SHORT atoms[8];
793
+ S_SHORT radicals[8];
794
+ if ( !charge_encountered && !radical_encountered ) {
795
+ /* first charge or radical record clears all Atom Block */
796
+ /* entered charge and radical data to zeroes */
797
+ radical_encountered = -1;
798
+ }
799
+ for ( j = 0; j < num_entries; j++ ) {
800
+ if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
801
+ 0 > mol_read_datum( &radicals[j], 0, MOL_SHORT_INT_DATA, &p ) ||
802
+ atoms[j] <= 0 || atoms[j] > num_atoms ||
803
+ radicals[j] < 0 || radicals[j] > 3 ) {
804
+ goto radical_error;
805
+ }
806
+ }
807
+ if ( radical_encountered == -1 ) {
808
+ for ( j = 0; j < num_atoms; j++ ) {
809
+ if ( !ctab->MolAtom[j].cAtomAliasedFlag ) /* do not clear aliased atoms. 5-3-99 DCh */
810
+ ctab->MolAtom[j].cCharge = ctab->MolAtom[j].cRadical = '\0';
811
+ }
812
+ radical_encountered = 1;
813
+ }
814
+ for ( j = 0; j < num_entries; j++ ) {
815
+ if ( !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag ) { /* do not change aliased atoms. 5-3-99 DCh */
816
+ ctab->MolAtom[atoms[j]-1].cRadical = (S_CHAR)radicals[j];
817
+ }
818
+ }
819
+ continue;
820
+ radical_error:
821
+ MOLFILE_ERR_SET (err, 0, "Radical not recognized:");
822
+ RemoveNonPrintable( line );
823
+ AddMOLfileError(pStrErr, line);
824
+ continue; /* ignore error for now */
825
+ }
826
+ /*-------------------------------------- isotope: Generic */
827
+ if ( !strcmp( szType, "ISO" ) &&
828
+ 0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
829
+ 1 <= num_entries && num_entries <= 8 ) {
830
+ S_SHORT atoms[8];
831
+ S_SHORT iso_mass[8]; /* contains istotope mass number, not difference. 7-14-00 DCh. */
832
+ if ( !isotope_encountered ) {
833
+ /* first charge or radical record clears all Atom Block */
834
+ /* entered charge and radical data to zeroes */
835
+ isotope_encountered = -1;
836
+ }
837
+ for ( j = 0; j < num_entries; j++ ) {
838
+ if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
839
+ 0 > mol_read_datum( &iso_mass[j], 0, MOL_SHORT_INT_DATA, &p ) ||
840
+ atoms[j] <= 0 || atoms[j] > num_atoms
841
+ /*|| iso_mass[j] < -18 || iso_mass[j] > 12*/ ) {
842
+ /* goto isotope_error; */
843
+ atoms[j] = -1; /* flag error */
844
+ MOLFILE_ERR_SET (err, 0, "Isotopic data not recognized:");
845
+ RemoveNonPrintable( line );
846
+ AddMOLfileError(pStrErr, line);
847
+ continue; /* ignore isotopic error for now */
848
+ }
849
+ }
850
+ if ( isotope_encountered == -1 ) {
851
+ for ( j = 0; j < num_atoms; j++ ) {
852
+ /*if ( !ctab->MolAtom[j].cAtomAliasedFlag )*/ /* clear even aliased atoms */
853
+ ctab->MolAtom[j].cMassDifference = 0;
854
+ }
855
+ isotope_encountered = 1;
856
+ }
857
+ for ( j = 0; j < num_entries; j++ ) {
858
+ if ( atoms[j] <= 0 )
859
+ continue; /* ignore isotopic error for now */
860
+ if ( 1 /* !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag */) {
861
+ char *at = ctab->MolAtom[atoms[j]-1].szAtomSymbol;
862
+ if ( at[1] || at[0] != 'D' && at[0] != 'T' ) { /* D & T cannot have ISO */
863
+ /* need atomic weight to calculate isotope difference. 7-14-00 DCh. */
864
+ #ifdef INCHI_MAIN
865
+ ctab->MolAtom[atoms[j]-1].cMassDifference = iso_mass[j]; /* mass, not difference */
866
+ #else
867
+ int atw, atw_diff;
868
+ if ( (atw = get_atw( at )) && abs( atw_diff = (int)iso_mass[j] - atw ) < 20 ) {
869
+ ctab->MolAtom[atoms[j]-1].cMassDifference = (char)(atw_diff? atw_diff : ZERO_ATW_DIFF);
870
+ }
871
+ #endif
872
+ }
873
+ }
874
+ }
875
+ continue;
876
+ }
877
+ }
878
+ err_fin:
879
+ return err;
880
+ }
881
+ /*************************************************************************/
882
+ MOL_DATA* delete_mol_data( MOL_DATA* mol_data )
883
+ {
884
+ if ( mol_data ) {
885
+ if ( mol_data->ctab.MolAtom )
886
+ inchi_free( mol_data->ctab.MolAtom );
887
+ if ( mol_data->ctab.MolBond )
888
+ inchi_free( mol_data->ctab.MolBond );
889
+ if ( mol_data->ctab.szCoord )
890
+ inchi_free( mol_data->ctab.szCoord );
891
+ inchi_free( mol_data );
892
+ mol_data = NULL;
893
+ }
894
+ return mol_data;
895
+ }
896
+ /*************************************************************************/
897
+ /* Comletely ingnore STEXT block, queries, and 3D features
898
+ */
899
+ MOL_DATA* read_mol_file( FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
900
+ int bGetOrigCoord, int *err, char *pStrErr )
901
+ {
902
+ MOL_DATA* mol_data = NULL;
903
+ int ret = 0, prev_ret, bEndOfData = 0;
904
+ int bReadAll = ( OnlyHeaderBlock == NULL );
905
+ MOL_CTAB ctab, *pCtab = NULL;
906
+ MOL_HEADER_BLOCK *pHdr = NULL;
907
+
908
+ *err = 0;
909
+ if ( bReadAll ) {
910
+ if ( NULL == ( mol_data = ( MOL_DATA* )inchi_calloc( 1, sizeof(MOL_DATA) ) ) ){
911
+ ret = 1; /* can't allocate mol_data structure */
912
+ AddMOLfileError( pStrErr, "Out of RAM" );
913
+ goto err_fin;
914
+ }
915
+ pHdr = &mol_data->hdr;
916
+ pCtab = &mol_data->ctab;
917
+ } else {
918
+ pHdr = OnlyHeaderBlock;
919
+ pCtab = OnlyCtab? OnlyCtab : &ctab;
920
+ memset( pHdr, 0, sizeof( MOL_HEADER_BLOCK ) );
921
+ memset( pCtab, 0, sizeof( MOL_CTAB ) );
922
+ }
923
+ pCtab->MolBond = NULL;
924
+ pCtab->MolAtom = NULL;
925
+ pCtab->szCoord = NULL;
926
+
927
+ if ( 0 != ( ret = mol_read_hdr(pHdr, inp, pStrErr) ) ){
928
+ ret += 10;
929
+ goto err_fin; /* most probably end of file */
930
+ }
931
+ if ( 0 != ( ret = mol_read_counts_line( pCtab , inp, pStrErr) ) ){
932
+ ret += 20;
933
+ goto err_fin;
934
+ }
935
+
936
+ if ( bReadAll ) {
937
+ if ( NULL == ( mol_data->ctab.MolAtom = (MOL_ATOM*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfAtoms,1), sizeof(MOL_ATOM)) ) ){
938
+ ret = 2; /* can't allocate MolAtom structure */
939
+ MOLFILE_ERR_FIN (ret, 2, err_fin, "Out of RAM");
940
+ }
941
+ if ( bGetOrigCoord &&
942
+ NULL == ( mol_data->ctab.szCoord = (MOL_COORD*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfAtoms,1), sizeof(MOL_COORD)) ) ){
943
+ ret = 2; /* can't allocate MolAtom structure */
944
+ MOLFILE_ERR_FIN (ret, 2, err_fin, "Out of RAM");
945
+ }
946
+ }
947
+ if ( 0 != ( ret = read_atom_block(pCtab, inp, ret, pStrErr) ) ){
948
+ if ( ret < 0 ) {
949
+ ret = -ret;
950
+ bEndOfData = 1;
951
+ }
952
+ ret += 30;
953
+ /* goto err_fin; */
954
+ }
955
+
956
+ if ( bReadAll && ret < 30 ) {
957
+ if ( !bEndOfData && NULL == ( mol_data->ctab.MolBond = (MOL_BONDS*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfBonds,1), sizeof(MOL_BONDS)) ) ){
958
+ ret = 3; /* can't allocate MolBond structure */
959
+ MOLFILE_ERR_FIN (ret, 3, err_fin, "Out of RAM");
960
+ }
961
+ }
962
+ prev_ret = ret;
963
+ if ( !bEndOfData && 0 != ( ret = read_bonds_block(pCtab, inp, ret, pStrErr) ) ){
964
+ if ( ret < 0 ) {
965
+ ret = -ret;
966
+ bEndOfData = 1;
967
+ }
968
+ ret = prev_ret? prev_ret : ret + 40;
969
+ }
970
+ prev_ret = ret;
971
+ if ( !bEndOfData && 0 != ( ret = read_stext_block(pCtab, inp, ret, pStrErr) ) ){
972
+ ret = prev_ret? prev_ret : ret + 50;
973
+ }
974
+ prev_ret = ret;
975
+ if ( !bEndOfData && 0 != ( ret = read_properties_block(pCtab, pHdr, inp, ret, pStrErr) ) ){
976
+ if ( ret < 0 ) {
977
+ ret = -ret;
978
+ bEndOfData = 1;
979
+ }
980
+ ret = prev_ret? prev_ret : ret + 60;
981
+ }
982
+
983
+ err_fin:
984
+ *err = bEndOfData? -ret : ret;
985
+ if ( bReadAll ) {
986
+ if ( ret )
987
+ mol_data = delete_mol_data( mol_data ); /* delete all results */
988
+ return mol_data;
989
+ } else {
990
+ if ( ret )
991
+ return NULL;
992
+ else
993
+ return (MOL_DATA*)OnlyHeaderBlock;
994
+ }
995
+ }
996
+
997
+ /******************************************************************/
998
+ char sdf_data_hdr_name[] = "NAME";
999
+ char sdf_data_hdr_comm[] = "COMMENT";
1000
+ enum { SDF_START, SDF_DATA_HEADER, SDF_DATA_HEADER_NAME
1001
+ , SDF_DATA_HEADER_COMMENT, SDF_DATA_HEADER_CAS
1002
+ , SDF_DATA_HEADER_USER, SDF_DATA_LINE
1003
+ , SDF_END_OF_DATA_ITEM, SDF_EMPTY_LINE, SDF_END_OF_DATA_BLOCK };
1004
+ /******************************************************************/
1005
+ static long extract_cas_rn( char *line )
1006
+ {
1007
+ int i, j;
1008
+ i = line[0] == '-'? 1 : 0;
1009
+ for ( j = i; line[i]; i ++ ) {
1010
+ if ( isdigit( UCINT line[i] ) ) {
1011
+ line[j++] = line[i];
1012
+ } else
1013
+ if ( line[i] != '-' ) {
1014
+ break;
1015
+ }
1016
+ }
1017
+ line[j] = '\0';
1018
+ return strtol( line, NULL, 10 );
1019
+ }
1020
+ /******************************************************************/
1021
+ static int identify_sdf_label( char* inp_line, const char *pSdfLabel )
1022
+ {
1023
+ char line[MOLFILEMAXLINELEN];
1024
+ char *p, *q;
1025
+ int i, j, len;
1026
+ if ( (p = strchr( inp_line, '<' )) &&
1027
+ (q = strchr( p, '>' )) &&
1028
+ (len = q-p-1) > 0 && len < (int)sizeof(line) ) {
1029
+ memcpy( line, p+1, len );
1030
+ line[len] = '\0';
1031
+ for ( i = 0; isspace( UCINT line[i] ); i ++ )
1032
+ ;
1033
+ for ( j = len-1; j >= i && isspace( UCINT line[i] ); j -- )
1034
+ ;
1035
+ len = j-i+1;
1036
+ p = line+i;
1037
+ if ( pSdfLabel && pSdfLabel[0] && len == (int)strlen(pSdfLabel) && !memicmp( p, pSdfLabel, len ) )
1038
+ return SDF_DATA_HEADER_USER;
1039
+ if ( len == sizeof(sdf_data_hdr_name)-1 && !memicmp( p, sdf_data_hdr_name, len ) )
1040
+ return SDF_DATA_HEADER_NAME;
1041
+ if ( len == sizeof(sdf_data_hdr_comm)-1 && !memicmp( p, sdf_data_hdr_comm, len ) )
1042
+ return SDF_DATA_HEADER_COMMENT;
1043
+ if ( !memicmp( p, "CAS", 3 ) )
1044
+ return SDF_DATA_HEADER_CAS;
1045
+ }
1046
+ return SDF_DATA_HEADER;
1047
+ }
1048
+ /******************************************************************/
1049
+ int bypass_sdf_data_items( FILE* inp, long *cas_reg_no, char* comment,
1050
+ int lcomment, char *name, int lname, int prev_err,
1051
+ const char *pSdfLabel, char *pSdfValue, char *pStrErr )
1052
+ {
1053
+ char line[MOLFILEINPLINELEN];
1054
+ const int line_len = sizeof(line);
1055
+ int err = 0;
1056
+ int current_state = SDF_START;
1057
+ int n_blank_lines = 0;
1058
+ int n_lines = 0;
1059
+ char* p = NULL;
1060
+ int bNeedsName = name && lname > 0 && !name[0];
1061
+ int bNeedsComm = comment && lcomment > 0 && !comment[0];
1062
+ int bNeedsUser = pSdfLabel && pSdfLabel[0] && pSdfValue;
1063
+ int bNeedsCASrn = 0;
1064
+ int bCASrnIsUser = 0;
1065
+
1066
+ if ( cas_reg_no != NULL ) {
1067
+ bNeedsCASrn = 1;
1068
+ *cas_reg_no = 0;
1069
+ bCASrnIsUser = (bNeedsUser && !memicmp(pSdfLabel,"CAS", 3));
1070
+ }
1071
+
1072
+ while ( err == 0 &&
1073
+ current_state !=SDF_END_OF_DATA_BLOCK &&
1074
+ NULL != ( p = fgets_up_to_lf( line, line_len, inp ) ) ) {
1075
+
1076
+ if ( !n_lines && !memcmp(line, "M END", 6) ) {
1077
+ continue; /* allow subtle errors */
1078
+ }
1079
+ n_lines++;
1080
+
1081
+ remove_trailing_spaces( line );
1082
+ if ( line[MOLFILEMAXLINELEN] ){
1083
+ if ( current_state != SDF_DATA_HEADER &&
1084
+ current_state != SDF_DATA_LINE &&
1085
+ current_state != SDF_DATA_HEADER_NAME &&
1086
+ current_state != SDF_DATA_HEADER_USER &&
1087
+ current_state != SDF_DATA_HEADER_COMMENT ) {
1088
+ line[MOLFILEMAXLINELEN] = '\0';
1089
+ if ( !prev_err ) {
1090
+ MOLFILE_ERR_SET (err, 0, "Too long SData line truncated");
1091
+ }
1092
+ } else {
1093
+ /* allow long lines in SDF data. 9-29-00 DCh */
1094
+ line[MOLFILEMAXLINELEN] = '\0';
1095
+ }
1096
+ }
1097
+
1098
+ n_blank_lines += ( *line == '\0' );
1099
+
1100
+ switch( current_state ) {
1101
+
1102
+ case SDF_START:
1103
+ case SDF_END_OF_DATA_ITEM:
1104
+ case SDF_EMPTY_LINE: /* Added 9-25-97 DCh */
1105
+
1106
+ if ( 0 == strcmp( line, SDF_END_OF_DATA ) ) {
1107
+ current_state = SDF_END_OF_DATA_BLOCK;
1108
+ }
1109
+ else
1110
+ if ( '>' == *line ) {
1111
+ current_state = ( bNeedsName || bNeedsComm || bNeedsCASrn || bNeedsUser )? identify_sdf_label(line, pSdfLabel) : SDF_DATA_HEADER;
1112
+ }else
1113
+ if ( *line == '\0' ) { /* Added 9-25-97 DCh */
1114
+ /* Relax the strictness: Allow more than 1 empty line. */
1115
+ current_state=SDF_EMPTY_LINE;
1116
+ } else
1117
+ if ( !prev_err ) {
1118
+ MOLFILE_ERR_SET (err, 3, "Unexpected SData header line:");
1119
+ RemoveNonPrintable( line );
1120
+ AddMOLfileError(pStrErr, line);
1121
+ /* unexpected contents of data header line */
1122
+ } else {
1123
+ err = 3;
1124
+ }
1125
+ break;
1126
+
1127
+ case SDF_DATA_HEADER_NAME:
1128
+ if ( bNeedsName && 0 < normalize_name( line ) ) {
1129
+ bNeedsName = 0;
1130
+ mystrncpy( name, line, lname );
1131
+ }
1132
+ goto got_data_line;
1133
+
1134
+ case SDF_DATA_HEADER_COMMENT:
1135
+ if ( bNeedsComm && 0 < normalize_name( line ) ) {
1136
+ bNeedsComm = 0;
1137
+ mystrncpy( comment, line, lcomment );
1138
+ }
1139
+ goto got_data_line;
1140
+
1141
+ case SDF_DATA_HEADER_USER:
1142
+ if ( bNeedsUser && 0 < normalize_name( line ) ) {
1143
+ bNeedsUser = 0;
1144
+ mystrncpy( pSdfValue, line, MAX_SDF_VALUE+1 );
1145
+ if ( bCASrnIsUser && bNeedsCASrn ) {
1146
+ *cas_reg_no = extract_cas_rn( line );
1147
+ bNeedsCASrn = (0 == *cas_reg_no);
1148
+ }
1149
+ }
1150
+ goto got_data_line;
1151
+
1152
+ case SDF_DATA_HEADER_CAS:
1153
+ if ( bNeedsCASrn && 0 < normalize_name( line ) ) {
1154
+ *cas_reg_no = extract_cas_rn( line );
1155
+ bNeedsCASrn = (0 == *cas_reg_no);
1156
+ }
1157
+ goto got_data_line;
1158
+
1159
+ case SDF_DATA_HEADER:
1160
+ case SDF_DATA_LINE:
1161
+ got_data_line:
1162
+ current_state = *line? SDF_DATA_LINE : SDF_END_OF_DATA_ITEM;
1163
+ break;
1164
+
1165
+ }
1166
+ }
1167
+ if ( 0 == err && SDF_END_OF_DATA_BLOCK != current_state && NULL == p )
1168
+ ; /* err = 4; */ /* unexpected end of file: missing $$$$ */
1169
+ else
1170
+ if (err && ( n_blank_lines == n_lines && *line == '\0' ) )
1171
+ err = 5; /* empty lines -- do not know when this can happen */
1172
+
1173
+ if ( err && err != 5 && current_state != SDF_END_OF_DATA_BLOCK && p ) {
1174
+ /* bypass up to $$$$ */
1175
+ while ( ( p = fgets_up_to_lf( line, line_len, inp ) ) && memcmp( line, SDF_END_OF_DATA, 4 ) )
1176
+ ;
1177
+ if ( p ) {
1178
+ err = 9; /* bypassed to $$$$; non-fatal */
1179
+ AddMOLfileError(pStrErr, "Bypassing to next structure");
1180
+ }
1181
+
1182
+ }
1183
+
1184
+ return err;
1185
+ }
1186
+ /******************************************************************/
1187
+ MOL_DATA* read_sdfile_segment(FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
1188
+ int bGetOrigCoord,
1189
+ char *pname, int lname,
1190
+ long *Id, const char *pSdfLabel, char *pSdfValue,
1191
+ int *err, char *pStrErr )
1192
+ {
1193
+ MOL_DATA* mol_data = read_mol_file( inp, OnlyHeaderBlock, OnlyCtab, bGetOrigCoord, err, pStrErr );
1194
+ int err_bypass_sdf = 0;
1195
+
1196
+ if ( pname && lname ) {
1197
+ pname[0] = '\0';
1198
+ }
1199
+ if ( Id ) {
1200
+ *Id = 0L; /* ignore for now */
1201
+ }
1202
+ /* if ( mol_data && !*err ) { */
1203
+ if ( *err < 0 ) {
1204
+ *err = -*err; /* end of data encountered */
1205
+ } else {
1206
+ err_bypass_sdf = bypass_sdf_data_items( inp, Id, NULL, 0, pname, lname, *err, pSdfLabel, pSdfValue, pStrErr );
1207
+ if ( err_bypass_sdf ) {
1208
+ *err = err_bypass_sdf; /* important to continue to the next good structure */
1209
+ }
1210
+ }
1211
+ /* } */
1212
+ return mol_data;
1213
+ }
1214
+ /****************************************************************************/
1215
+ int CopyMOLfile(FILE *inp_file, long fPtrStart, long fPtrEnd, FILE *prb_file, long lNumb)
1216
+ {
1217
+ char line[MOLFILEINPLINELEN], *p;
1218
+ long fPtr;
1219
+ int ret = 1;
1220
+ char szNumber[32];
1221
+
1222
+ if ( inp_file && prb_file && fPtrStart >= 0L &&
1223
+ fPtrEnd > fPtrStart &&
1224
+ 0 == fseek( inp_file, fPtrStart, SEEK_SET ) ) {
1225
+
1226
+ while ( fPtrEnd > (fPtr = ftell(inp_file)) && fPtr >= 0L &&
1227
+ fgets_up_to_lf( line, sizeof(line)-1, inp_file ) ) {
1228
+ line[sizeof(line)-1] = '\0'; /* unnecessary extra precaution */
1229
+ if ( fPtr == fPtrStart && lNumb ) {
1230
+ int len;
1231
+ LtrimRtrim( line, &len );
1232
+ len = sprintf( szNumber, "#%ld%s", lNumb, len?"/":"" );
1233
+ mystrncpy( line+len, line, sizeof(line)-len-1 );
1234
+ memcpy( line, szNumber, len );
1235
+ }
1236
+ if ( !strchr(line, '\n') ) {
1237
+ p = line+strlen(line);
1238
+ p[0] = '\n';
1239
+ p[1] = '\0';
1240
+ }
1241
+ fputs( line, prb_file );
1242
+ }
1243
+ ret = fseek( inp_file, fPtrEnd, SEEK_SET );
1244
+ }
1245
+ return ret;
1246
+ }