rino 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/README +44 -0
  2. data/Rakefile +123 -0
  3. data/ext/extconf.rb +26 -0
  4. data/ext/ruby_inchi_main.so +0 -0
  5. data/ext/src/aux2atom.h +2786 -0
  6. data/ext/src/comdef.h +148 -0
  7. data/ext/src/e_0dstereo.c +3014 -0
  8. data/ext/src/e_0dstereo.h +31 -0
  9. data/ext/src/e_comdef.h +57 -0
  10. data/ext/src/e_ctl_data.h +147 -0
  11. data/ext/src/e_ichi_io.c +498 -0
  12. data/ext/src/e_ichi_io.h +40 -0
  13. data/ext/src/e_ichi_parms.c +37 -0
  14. data/ext/src/e_ichi_parms.h +41 -0
  15. data/ext/src/e_ichicomp.h +50 -0
  16. data/ext/src/e_ichierr.h +40 -0
  17. data/ext/src/e_ichimain.c +593 -0
  18. data/ext/src/e_ichisize.h +43 -0
  19. data/ext/src/e_inchi_atom.c +75 -0
  20. data/ext/src/e_inchi_atom.h +33 -0
  21. data/ext/src/e_inpdef.h +41 -0
  22. data/ext/src/e_mode.h +706 -0
  23. data/ext/src/e_mol2atom.c +649 -0
  24. data/ext/src/e_readinch.c +58 -0
  25. data/ext/src/e_readmol.c +54 -0
  26. data/ext/src/e_readmol.h +180 -0
  27. data/ext/src/e_readstru.c +251 -0
  28. data/ext/src/e_readstru.h +33 -0
  29. data/ext/src/e_util.c +284 -0
  30. data/ext/src/e_util.h +61 -0
  31. data/ext/src/extr_ct.h +251 -0
  32. data/ext/src/ichi.h +206 -0
  33. data/ext/src/ichi_bns.c +7999 -0
  34. data/ext/src/ichi_bns.h +231 -0
  35. data/ext/src/ichican2.c +5000 -0
  36. data/ext/src/ichicano.c +2195 -0
  37. data/ext/src/ichicano.h +49 -0
  38. data/ext/src/ichicans.c +1625 -0
  39. data/ext/src/ichicant.h +379 -0
  40. data/ext/src/ichicomn.h +260 -0
  41. data/ext/src/ichicomp.h +50 -0
  42. data/ext/src/ichidrp.h +119 -0
  43. data/ext/src/ichierr.h +124 -0
  44. data/ext/src/ichiisot.c +101 -0
  45. data/ext/src/ichilnct.c +286 -0
  46. data/ext/src/ichimain.h +132 -0
  47. data/ext/src/ichimak2.c +1189 -0
  48. data/ext/src/ichimake.c +3812 -0
  49. data/ext/src/ichimake.h +205 -0
  50. data/ext/src/ichimap1.c +851 -0
  51. data/ext/src/ichimap2.c +2856 -0
  52. data/ext/src/ichimap4.c +1609 -0
  53. data/ext/src/ichinorm.c +741 -0
  54. data/ext/src/ichinorm.h +67 -0
  55. data/ext/src/ichiparm.c +45 -0
  56. data/ext/src/ichiparm.h +1441 -0
  57. data/ext/src/ichiprt1.c +3612 -0
  58. data/ext/src/ichiprt2.c +1511 -0
  59. data/ext/src/ichiprt3.c +3011 -0
  60. data/ext/src/ichiqueu.c +1003 -0
  61. data/ext/src/ichiring.c +326 -0
  62. data/ext/src/ichiring.h +49 -0
  63. data/ext/src/ichisize.h +35 -0
  64. data/ext/src/ichisort.c +539 -0
  65. data/ext/src/ichister.c +3538 -0
  66. data/ext/src/ichister.h +35 -0
  67. data/ext/src/ichitaut.c +3843 -0
  68. data/ext/src/ichitaut.h +387 -0
  69. data/ext/src/ichitime.h +74 -0
  70. data/ext/src/inchi_api.h +670 -0
  71. data/ext/src/inchi_dll.c +1480 -0
  72. data/ext/src/inchi_dll.h +34 -0
  73. data/ext/src/inchi_dll_main.c +23 -0
  74. data/ext/src/inchi_dll_main.h +31 -0
  75. data/ext/src/inpdef.h +328 -0
  76. data/ext/src/lreadmol.h +1246 -0
  77. data/ext/src/mode.h +706 -0
  78. data/ext/src/ruby_inchi_main.c +558 -0
  79. data/ext/src/runichi.c +4179 -0
  80. data/ext/src/strutil.c +3861 -0
  81. data/ext/src/strutil.h +182 -0
  82. data/ext/src/util.c +1130 -0
  83. data/ext/src/util.h +85 -0
  84. data/lib/clean_tempfile.rb +220 -0
  85. data/lib/rino.rb +111 -0
  86. data/test/test.rb +386 -0
  87. metadata +130 -0
@@ -0,0 +1,1246 @@
1
+ /*
2
+ * International Union of Pure and Applied Chemistry (IUPAC)
3
+ * International Chemical Identifier (InChI)
4
+ * Version 1
5
+ * Software version 1.00
6
+ * April 13, 2005
7
+ * Developed at NIST
8
+ */
9
+
10
+
11
+ /* local prototypes */
12
+ int bypass_sdf_data_items( FILE* inp, long *cas_reg_no, char* comment, int lcomment, char *name, int lname, int prev_err,
13
+ const char *pSdfLabel, char *pSdfValue, char *pStrErr );
14
+ MOL_DATA* read_mol_file( FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
15
+ int bGetOrigCoord, int *err, char *pStrErr );
16
+
17
+
18
+ static int mol_read_hdr(MOL_HEADER_BLOCK *hdr, FILE* inp, char *pStrErr);
19
+ static int mol_read_counts_line( MOL_CTAB* ctab, FILE *inp, char *pStrErr );
20
+ static int read_atom_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
21
+ static int read_bonds_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
22
+ static int read_stext_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr );
23
+ static int read_properties_block( MOL_CTAB* ctab, MOL_HEADER_BLOCK *pHdr, FILE *inp, int err, char *pStrErr );
24
+
25
+ static int identify_sdf_label( char* inp_line, const char *pSdfLabel );
26
+ static long extract_cas_rn( char *line );
27
+ int RemoveNonPrintable( char *line );
28
+
29
+
30
+ /******/
31
+ #ifndef MOLFILE_ERR_FIN
32
+ #define MOLFILE_ERR_FIN(err, new_err, err_fin, msg) \
33
+ if ( !(err) && (new_err) ) { (err) = (new_err);} AddMOLfileError(pStrErr, (msg)); goto err_fin
34
+ #endif
35
+ #ifndef MOLFILE_ERR_SET
36
+ #define MOLFILE_ERR_SET(err, new_err, msg) \
37
+ if ( !(err) && (new_err) ) { (err) = (new_err);} AddMOLfileError(pStrErr, (msg))
38
+ #endif
39
+
40
+ /*************************************************************************/
41
+ int AddMOLfileError( char *pStrErr, const char *szMsg )
42
+ {
43
+ if ( pStrErr && szMsg && szMsg[0] ) {
44
+ int lenStrErr = strlen( pStrErr );
45
+ int lenMsg = strlen( szMsg );
46
+ char *p = strstr( pStrErr, szMsg );
47
+ if ( p && (p==pStrErr || *(p-1) == ' ' && (*(p-2) == ';' || *(p-2) == ':' )) &&
48
+ (p+lenMsg == pStrErr+lenStrErr ||
49
+ p[lenMsg] == ';' && p[lenMsg+1] == ' ' ||
50
+ p[lenMsg-1]==':' && p[lenMsg]==' ') ) {
51
+ return 1; /* reject duplicates */
52
+ }
53
+ if ( lenStrErr + lenMsg + 2*(lenStrErr > 0) < STR_ERR_LEN ) {
54
+ /* enough room to add */
55
+ if (lenStrErr > 0) {
56
+ if ( pStrErr[lenStrErr-1] != ':' ) {
57
+ strcat( pStrErr, ";" );
58
+ }
59
+ strcat( pStrErr, " " );
60
+ }
61
+ strcat( pStrErr, szMsg );
62
+ return 1;
63
+ }
64
+ /* no room */
65
+ if ( strstr( pStrErr, "..." ) ) {
66
+ return 0; /* no room mark has already been set */
67
+ }
68
+ if ( lenStrErr + 3 < STR_ERR_LEN ) {
69
+ strcat( pStrErr, "..." );
70
+ }
71
+ }
72
+ return 0;
73
+ }
74
+ /*************************************************************************/
75
+ static int mol_copy_check_empty( char* dest, char* source, int len, char **first_space )
76
+ {
77
+ int i, c; /* required len >= 0; dest must have at least len+1 bytes */
78
+ if ( len > 0 )
79
+ strncpy( dest, source, len );
80
+ dest[len]='\0';
81
+ len = ( len > 0 )? (int)strlen( dest) : 0;
82
+ for ( i = (len-1); i >= 0 && 0 != (c = source[i]) && isspace(UCINT c); i-- )
83
+ ;
84
+ *first_space = dest + (i+1); /* first blank or zero terminating byte in dest */
85
+ return len; /* number of actually processed bytes; zero termination not included */
86
+ }
87
+ /*************************************************************************/
88
+ static int mol_read_datum(void* data, int field_len, int data_type, char** line_ptr)
89
+ {
90
+ /* 1. 'field_len' for MOL_STRING_DATA does not include trailing zero,
91
+ * that is actual length of the string pointed by 'data'
92
+ * should be at least field_len+1 bytes.
93
+ * For numerical data 'field_len' is length of input data field
94
+ * For numerical integral data field_len <= 0 means read up to first
95
+ * non-numeric character as strtod() does ("free format")
96
+ * 2. return value: for MOL_STRING_DATA: number of bytes excluding trailing zero
97
+ * for all others: 1=success; 0 = empty; -1= error
98
+ * 3. on exit *line_ptr points to the next byte after the last entered
99
+ */
100
+ char *p = *line_ptr, *q, *p_end;
101
+ int i, ret=1, c, len;
102
+ long ldata;
103
+ double ddata;
104
+
105
+ switch( data_type ) {
106
+ case MOL_STRING_DATA:
107
+ for ( i= 0; i < field_len && 0 != (c = p[i]) && isspace(UCINT c); i++ ) /* pass by all leading spaces */
108
+ ;
109
+ len = mol_copy_check_empty( (char*)data, &p[i], field_len-i, &q );
110
+ ret = ( q - (char*)data );/* actual data length */
111
+ *q = '\0'; /* add zero termination to data if it is not there yet*/
112
+ *line_ptr += (len+i); /* ptr to the 1st byte of the next input field or to zero termination */
113
+ break;
114
+
115
+ case MOL_CHAR_INT_DATA:
116
+ case MOL_SHORT_INT_DATA:
117
+ case MOL_LONG_INT_DATA:
118
+ { /* block start */
119
+ char str[MOL_MAX_VALUE_LEN+1];
120
+ ldata = 0L;
121
+ if ( field_len > MOL_MAX_VALUE_LEN ) {
122
+ ret = -1;
123
+ }else
124
+ if ( field_len > 0 ) { /* fixed length */
125
+ *line_ptr += ( len = mol_copy_check_empty( str, p, field_len, &q ) );
126
+ *q = '\0';
127
+ if ( !len || !(q-str) ) { /* empty string */
128
+ ret = 0;
129
+ }else
130
+ if ( (ldata=strtol(str,&p_end,10), p_end != q) ){ /* wrong data: incompletely interpreted */
131
+ ret = -1;
132
+ }
133
+ }else{ /* free format: field_len <= 0 */
134
+ ldata = strtol( p, &p_end, 10 );
135
+ *line_ptr += ( len = p_end - p );
136
+ if ( len == 0 ){
137
+ ret = 0;
138
+ }
139
+ }
140
+
141
+ switch( data_type ) {
142
+ case MOL_CHAR_INT_DATA:
143
+ if ( SCHAR_MIN <= ldata && ldata <= SCHAR_MAX ){ /* from || to &&: 11-19-96 */
144
+ *(S_CHAR*)data = (S_CHAR)ldata;
145
+ }else{
146
+ *(S_CHAR*)data = (S_CHAR)0;
147
+ ret = -1;
148
+ }
149
+ break;
150
+ case MOL_SHORT_INT_DATA:
151
+ if ( SHRT_MIN <= ldata && ldata <= SHRT_MAX ){
152
+ *(S_SHORT*)data = (S_SHORT)ldata;
153
+ }else{
154
+ *(S_SHORT*)data = (S_SHORT)0;
155
+ ret = -1;
156
+ }
157
+ break;
158
+ case MOL_LONG_INT_DATA:
159
+ if ( LONG_MIN < ldata && ldata < LONG_MAX ){
160
+ *(long*)data = (long)ldata;
161
+ }else{
162
+ *(long*)data = 0L;
163
+ ret = -1;
164
+ }
165
+ break;
166
+ default:
167
+ ret=-1;
168
+ }
169
+
170
+ } /* block end */
171
+ break;
172
+ case MOL_DOUBLE_DATA:
173
+ case MOL_FLOAT_DATA:
174
+ { /* block start */
175
+ char str[MOL_MAX_VALUE_LEN+1];
176
+ if ( field_len > MOL_MAX_VALUE_LEN ) {
177
+ ret = -1;
178
+ ddata = 0.0;
179
+ }else
180
+ if ( field_len > 0 ) {
181
+ *line_ptr += (len = mol_copy_check_empty( str, p, field_len, &q ));
182
+ *q = '\0';
183
+ if ( !len || !(q-str) ) { /* empty string */
184
+ ddata = 0.0;
185
+ ret = 0;
186
+ }else
187
+ if ( (ddata=strtod(str,&p_end), p_end != q) ){ /* wrong data */
188
+ ret = -1;
189
+ }
190
+ }else{ /* free format */
191
+ ddata = strtod( p, &p_end );
192
+ *line_ptr += ( len = p_end - p );
193
+ if ( len == 0 ){
194
+ ret = 0;
195
+ }
196
+ }
197
+ switch(data_type){
198
+ case MOL_DOUBLE_DATA:
199
+ if ( ddata != HUGE_VAL && /*ldata*/ ddata != -HUGE_VAL ){ /* replaced ldata with ddata 6-30-98 DCh */
200
+ *(double*)data = ddata;
201
+ }else{
202
+ *(double*)data = 0.0;
203
+ ret = -1;
204
+ }
205
+ break;
206
+ case MOL_FLOAT_DATA:
207
+ if ( fabs(ddata) <= (double)FLT_MIN ) {
208
+ *(float*)data = 0.0;
209
+ }else
210
+ if ( fabs(ddata) >= (double)FLT_MAX ) {
211
+ *(float*)data = 0.0;
212
+ ret = -1;
213
+ }else{
214
+ *(float*)data = (float)ddata;
215
+ }
216
+ break;
217
+ }
218
+ } /* block end */
219
+ break;
220
+ case MOL_JUMP_TO_RIGHT:
221
+ for ( i = 0; i < field_len && p[i]; i++ )
222
+ ;
223
+ *line_ptr += i;
224
+ ret = i;
225
+ break;
226
+ default:
227
+ ret = -1;
228
+ }
229
+ return ret;
230
+ }
231
+ /*************************************************************************/
232
+ static int mol_read_hdr(MOL_HEADER_BLOCK *hdr, FILE* inp, char *pStrErr)
233
+ {
234
+ /* All input lines can have are up 80 characters */
235
+ /* Header Block */
236
+ char line[MOLFILEINPLINELEN]; /* + cr +lf +zero termination + reserve */
237
+ int err = 0, len;
238
+ const int line_len = sizeof(line);
239
+ char *p;
240
+
241
+ /* memset( &hdr, 0, sizeof( MOL_HEADER_BLOCK ) ); */
242
+ /*------------ header line #1: name ----------------*/
243
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
244
+ err = 1; /* can't read the input file line */
245
+ /* AddMOLfileError( pStrErr, "Can't read header block name line" ); */
246
+ goto err_fin;
247
+ }
248
+ remove_one_lf( line );
249
+ /* -- Disabled to relax strictness: allow > 80 chars names.
250
+ if ( line[MOLFILEMAXLINELEN] ){
251
+ err = 2; // too long line
252
+ goto err_fin;
253
+ }
254
+ */
255
+ len = mol_read_datum( hdr->szMoleculeName, sizeof(hdr->szMoleculeName)-1, MOL_STRING_DATA, &p );
256
+ /*----------- header line #2 -----------------------*/
257
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
258
+ err = 3; /* can't read the input file line */
259
+ /* AddMOLfileError( pStrErr, "Can't read header block line 2" ); */
260
+ goto err_fin;
261
+ }
262
+ remove_one_lf( line );
263
+ /* -- Disabled to relax strictness: allow > 80 chars names.
264
+ if ( line[MOLFILEMAXLINELEN] ){
265
+ err = 4; // too long input file line
266
+ goto err_fin;
267
+ }
268
+ */
269
+ len = mol_read_datum( hdr->szUserInitials, sizeof(hdr->szUserInitials)-1, MOL_STRING_DATA, &p );
270
+ len = mol_read_datum( hdr->szProgramName, sizeof(hdr->szProgramName)-1, MOL_STRING_DATA, &p );
271
+
272
+ /*------------ Relax strictness -----------------------*/
273
+ len = mol_read_datum( &hdr->cMonth, 2, MOL_CHAR_INT_DATA, &p );
274
+ len = mol_read_datum( &hdr->cDay, 2, MOL_CHAR_INT_DATA, &p );
275
+ len = mol_read_datum( &hdr->cYear, 2, MOL_CHAR_INT_DATA, &p );
276
+ len = mol_read_datum( &hdr->cHour, 2, MOL_CHAR_INT_DATA, &p );
277
+ len = mol_read_datum( &hdr->cMinute, 2, MOL_CHAR_INT_DATA, &p );
278
+ len = mol_read_datum( hdr->szDimCode, sizeof(hdr->szDimCode)-1, MOL_STRING_DATA, &p );
279
+ len = mol_read_datum( &hdr->nScalingFactor1, 2, MOL_SHORT_INT_DATA, &p );
280
+ len = mol_read_datum( &hdr->dScalingFactor2, 10, MOL_DOUBLE_DATA, &p );
281
+ len = mol_read_datum( &hdr->dEnergy, 12, MOL_DOUBLE_DATA, &p );
282
+ len = mol_read_datum( &hdr->lInternalRegistryNumber, 6, MOL_LONG_INT_DATA, &p );
283
+
284
+ /* save the whole line 2 */
285
+ p = line;
286
+ len = mol_read_datum( hdr->szMoleculeLine2, sizeof(hdr->szMoleculeLine2)-1, MOL_STRING_DATA, &p );
287
+
288
+
289
+ /*------------ header line #3: comment ----------------*/
290
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
291
+ err = 7; /* can't read the line */
292
+ /* AddMOLfileError( pStrErr, "Can't read header block comment line" ); */
293
+ goto err_fin;
294
+ }
295
+ remove_one_lf( line );
296
+ /* -- Disabled to relax strictness: allow > 80 chars comments.
297
+ if ( line[MOLFILEMAXLINELEN] ){
298
+ err = 8; // too long line
299
+ goto err_fin;
300
+ }
301
+ */
302
+ len = mol_read_datum( hdr->szComment, sizeof(hdr->szComment)-1, MOL_STRING_DATA, &p );
303
+
304
+ err_fin:
305
+
306
+ return err;
307
+ }
308
+ /***************************************************************/
309
+ int RemoveNonPrintable( char *line )
310
+ {
311
+ int i, c, num = 0;
312
+ if ( line ) {
313
+ for ( i = 0; c = UCINT line[i]; i ++ ) {
314
+ /* assuming ASCII charset */
315
+ if ( c < ' ' || c >= 0x7F ) {
316
+ line[i] = '.';
317
+ num ++;
318
+ }
319
+ }
320
+ }
321
+ return num;
322
+ }
323
+ /***************************************************************/
324
+ static int mol_read_counts_line( MOL_CTAB* ctab, FILE *inp, char *pStrErr )
325
+ {
326
+ char *p;
327
+ char line[MOLFILEINPLINELEN];
328
+ const int line_len = sizeof(line);
329
+ int err = 0, len;
330
+
331
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
332
+ MOLFILE_ERR_FIN (err, 1, err_fin, "Cannot read counts line");
333
+ /* can't read the input file line */
334
+ }
335
+ remove_one_lf( line );
336
+ if ( line[MOLFILEMAXLINELEN] ){
337
+ MOLFILE_ERR_SET (err, 0, "Too long counts line"); /* too long input file line */
338
+ }
339
+ if ( 0 > mol_read_datum( &ctab->nNumberOfAtoms, 3, MOL_SHORT_INT_DATA, &p )
340
+ || 0 > mol_read_datum( &ctab->nNumberOfBonds, 3, MOL_SHORT_INT_DATA, &p )
341
+ #if ( MOL_QUERY == MOL_PRESENT )
342
+ || 0 > mol_read_datum( &ctab->nNumberOfAtomsLists, 3, MOL_SHORT_INT_DATA, &p )
343
+ #else
344
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
345
+ #endif
346
+ || 0 > mol_read_datum( NULL, /*obsolete*/ 3, MOL_JUMP_TO_RIGHT, &p )
347
+ || 0 > mol_read_datum( &ctab->cChiralFlag, 3, MOL_CHAR_INT_DATA, &p )
348
+ || 0 > mol_read_datum( &ctab->nNumberOfStextEntries, 3, MOL_SHORT_INT_DATA, &p )
349
+ #if ( MOL_CPSS == MOL_PRESENT )
350
+ || 0 > mol_read_datum( &ctab->nNumberOfReactionComponentsPlus1, 3, MOL_SHORT_INT_DATA, &p )
351
+ || 0 > mol_read_datum( &ctab->nNumberOfReactants, 3, MOL_SHORT_INT_DATA, &p )
352
+ || 0 > mol_read_datum( &ctab->nNumberOfProducts, 3, MOL_SHORT_INT_DATA, &p )
353
+ || 0 > mol_read_datum( &ctab->nNumberOfIntermediates, 3, MOL_SHORT_INT_DATA, &p )
354
+ #else
355
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
356
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
357
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
358
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
359
+ #endif
360
+ || 0 > mol_read_datum( &ctab->nNumberOfPropertyLines, 3, MOL_SHORT_INT_DATA, &p ) ){
361
+ err = 3; /* can't interpret counts line */
362
+ MOLFILE_ERR_SET (err, 3, "Cannot interpret counts line:"); /* too long input file line */
363
+ RemoveNonPrintable( line );
364
+ AddMOLfileError(pStrErr, line);
365
+ goto err_fin;
366
+ }
367
+ len = mol_read_datum( ctab->csCurrentCtabVersion, sizeof(ctab->csCurrentCtabVersion)-1, MOL_STRING_DATA, &p );
368
+ err_fin:
369
+ return err;
370
+ }
371
+
372
+ /*************************************************************************/
373
+ static int read_atom_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
374
+ {
375
+ char *p;
376
+ char line[MOLFILEINPLINELEN];
377
+ const int line_len = sizeof(line);
378
+ S_SHORT i, chg;
379
+ static S_SHORT charge_val[] = {0, 3, 2, 1, 'R', -1, -2, -3};
380
+ /* 0 1 2 3 4 5 6 7 */
381
+ /*
382
+ if ( NULL == ctab->MolAtom ){
383
+ err = 1;
384
+ goto err_fin; // internal error: MolAtom structure has not been allocated
385
+ }
386
+ */
387
+
388
+ for ( i = 0; i < ctab->nNumberOfAtoms; i++ ) {
389
+
390
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
391
+ if ( !err ) {
392
+ MOLFILE_ERR_SET (err, 2, "Cannot read atom block line");
393
+ }
394
+ break;
395
+ }
396
+ remove_one_lf( line );
397
+ if ( line[MOLFILEMAXLINELEN] ){
398
+ MOLFILE_ERR_SET (err, 0, "Too long atom block line");
399
+ }
400
+ if ( err ) {
401
+ if ( !strcmp( line, SDF_END_OF_DATA ) ) {
402
+ err = -abs(err);
403
+ break;
404
+ }
405
+ continue; /* bypass the rest of the Atom block */
406
+ }
407
+ if ( NULL != ctab->szCoord ) {
408
+ mystrncpy( ctab->szCoord[i], p, 31 ); /* original coordinates */
409
+ }
410
+
411
+ if ( NULL != ctab->MolAtom ) {
412
+ if ( 0 > mol_read_datum( &ctab->MolAtom[i].fX, 10, MOL_DOUBLE_DATA, &p )
413
+ || 0 > mol_read_datum( &ctab->MolAtom[i].fY, 10, MOL_DOUBLE_DATA, &p )
414
+ || 0 > mol_read_datum( &ctab->MolAtom[i].fZ, 10, MOL_DOUBLE_DATA, &p )
415
+ || 0 > mol_read_datum( NULL, /* undescribed in article*/ 1, MOL_JUMP_TO_RIGHT, &p )
416
+ || 0 == mol_read_datum( &ctab->MolAtom[i].szAtomSymbol, 3, MOL_STRING_DATA, &p ) /* was sizeof(ctab->MolAtom[0].szAtomSymbol)-1 */
417
+ #ifdef INCHI_MAIN
418
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cMassDifference, 2, MOL_SHORT_INT_DATA, &p )
419
+ #else
420
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cMassDifference, 2, MOL_CHAR_INT_DATA, &p )
421
+ #endif
422
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cCharge, 3, MOL_CHAR_INT_DATA, &p )
423
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cStereoParity, 3, MOL_CHAR_INT_DATA, &p )
424
+ #if ( MOL_QUERY == MOL_PRESENT )
425
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cH_countPlus1, 3, MOL_CHAR_INT_DATA, &p )
426
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cStereoCare, 3, MOL_CHAR_INT_DATA, &p )
427
+ #else
428
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
429
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
430
+ #endif
431
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cValence, 3, MOL_CHAR_INT_DATA, &p ) ) {
432
+
433
+ err = 4;
434
+ MOLFILE_ERR_SET (err, 4, "Cannot interpret atom block line:");
435
+ RemoveNonPrintable( line );
436
+ AddMOLfileError(pStrErr, line);
437
+ if ( !strcmp( line, SDF_END_OF_DATA ) ) {
438
+ err = -abs(err);
439
+ break;
440
+ }
441
+ continue; /* can't interpret a first half of atom block line */
442
+ }
443
+ if ( 2 == strlen(ctab->MolAtom[i].szAtomSymbol) && isupper(UCINT ctab->MolAtom[i].szAtomSymbol[1]))
444
+ ctab->MolAtom[i].szAtomSymbol[1] = (char)tolower(UCINT ctab->MolAtom[i].szAtomSymbol[1]); /* 5-4-99 DCh*/
445
+
446
+ if ( (chg = (S_SHORT) ctab->MolAtom[i].cCharge)< 0 || chg >= (int)(sizeof ( charge_val ) / sizeof( charge_val[0] )) ) {
447
+ /* ctab->MolAtom[i].cCharge = 0; */ /* error; ignore for now */
448
+ ctab->MolAtom[i].cCharge = (S_CHAR)(4 - chg); /* allow greater charges to accommodate NCI structures. 8-20-2002 */
449
+ ctab->MolAtom[i].cRadical = 0;
450
+ }else
451
+ if ( 'R' == (chg = charge_val[chg]) ){
452
+ ctab->MolAtom[i].cCharge = 0;
453
+ ctab->MolAtom[i].cRadical = RADICAL_DOUBLET;
454
+ }else{
455
+ ctab->MolAtom[i].cCharge = (S_CHAR)chg; /* actual charge value */
456
+ ctab->MolAtom[i].cRadical = 0;
457
+ }
458
+ #ifdef INCHI_MAIN
459
+ if ( ctab->MolAtom[i].cMassDifference ) { /* e_ReadMOL.c specific */
460
+ ctab->MolAtom[i].cMassDifference += ISOTOPIC_SHIFT_FLAG;
461
+ }
462
+ #endif
463
+
464
+ if (
465
+ #if ( MOL_CPSS == MOL_PRESENT )
466
+ 0 > mol_read_datum( &ctab->MolAtom[i].cH0_designator, 3, MOL_CHAR_INT_DATA, &p )
467
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentType, 3, MOL_CHAR_INT_DATA, &p )
468
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentNumber, 3, MOL_CHAR_INT_DATA, &p )
469
+ #else
470
+ 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
471
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
472
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
473
+ #endif
474
+ #if ( MOL_REACT == MOL_PRESENT )
475
+ || 0 > mol_read_datum( &ctab->MolAtom[i].nAtomAtomMappingNumber, 3, MOL_SHORT_INT_DATA, &p )
476
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cReactionComponentType, 3, MOL_CHAR_INT_DATA, &p )
477
+ #else
478
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
479
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
480
+ #endif
481
+ #if ( MOL_REACT == MOL_PRESENT || MOL_QUERY == MOL_PRESENT )
482
+ || 0 > mol_read_datum( &ctab->MolAtom[i].cExactChargeFlag, 3, MOL_CHAR_INT_DATA, &p )
483
+ #else
484
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
485
+ #endif
486
+ ){
487
+ err = 5; /* can't interpret a second half of atom block line */
488
+ MOLFILE_ERR_SET (err, 5, "Cannot interpret atom block line:");
489
+ RemoveNonPrintable( line );
490
+ AddMOLfileError(pStrErr, line);
491
+ if ( !strcmp( line, SDF_END_OF_DATA ) ) {
492
+ err = -abs(err);
493
+ break;
494
+ }
495
+ continue;
496
+ }
497
+ }
498
+ }
499
+ /* err_fin: */
500
+ return err;
501
+ }
502
+ /*************************************************************************/
503
+ static int read_bonds_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
504
+ {
505
+ char *p;
506
+ char line[MOLFILEINPLINELEN];
507
+ const int line_len = sizeof(line);
508
+ S_SHORT i;
509
+ /*
510
+ if ( NULL == ctab->MolBond ){
511
+ err = 1;
512
+ goto err_fin; // internal error: memory has not been allocated for MolBond structure
513
+ }
514
+ */
515
+ for ( i = 0; i < ctab->nNumberOfBonds; i++ ) {
516
+
517
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
518
+ if ( !err ) {
519
+ MOLFILE_ERR_SET (err, 2, "Cannot read bond block line");
520
+ }
521
+ break;
522
+ }
523
+ remove_one_lf( line );
524
+ if ( line[MOLFILEMAXLINELEN] ){
525
+ err = err? err : 3; /* too long input file line */
526
+ }
527
+ if ( err ) {
528
+ if ( !strcmp( line, SDF_END_OF_DATA ) ) {
529
+ err = -abs(err);
530
+ break;
531
+ }
532
+ continue;
533
+ }
534
+
535
+ if ( ctab->MolBond ) {
536
+ if ( 0 > mol_read_datum( &ctab->MolBond[i].nAtomNo1, 3, MOL_SHORT_INT_DATA, &p )
537
+ || 0 > mol_read_datum( &ctab->MolBond[i].nAtomNo2, 3, MOL_SHORT_INT_DATA, &p )
538
+ || 0 > mol_read_datum( &ctab->MolBond[i].cBondType, 3, MOL_CHAR_INT_DATA, &p )
539
+ || 0 > mol_read_datum( &ctab->MolBond[i].cBondStereo, 3, MOL_CHAR_INT_DATA, &p )
540
+ #if ( MOL_QUERY == MOL_PRESENT )
541
+ || 0 > mol_read_datum( &ctab->MolBond[i].cBondTopology, 3, MOL_CHAR_INT_DATA, &p ) /* ring/chain */
542
+ #else
543
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
544
+ #endif
545
+ #if ( MOL_REACT == MOL_PRESENT )
546
+ || 0 > mol_read_datum( &ctab->MolBond[i].cReactingCenterStatus, 3, MOL_CHAR_INT_DATA, &p )
547
+ #else
548
+ || 0 > mol_read_datum( NULL, 3, MOL_JUMP_TO_RIGHT, &p )
549
+ #endif
550
+ ){
551
+ if ( !err ) {
552
+ /* can't interpret bonds block line */
553
+ MOLFILE_ERR_SET (err, 4, "Cannot interpret bond block line:");
554
+ RemoveNonPrintable( line );
555
+ AddMOLfileError(pStrErr, line);
556
+ }
557
+ if ( !strcmp( line, SDF_END_OF_DATA ) ) {
558
+ err = -abs(err);
559
+ break;
560
+ }
561
+ }
562
+ }
563
+ }
564
+ /* err_fin: */
565
+ return err;
566
+ }
567
+ /*************************************************************************/
568
+ static int read_stext_block( MOL_CTAB* ctab, FILE *inp, int err, char *pStrErr )
569
+ {
570
+ /* just pass by all stext enties without attemp to interpret */
571
+ char *p;
572
+ char line[MOLFILEINPLINELEN];
573
+ const int line_len = sizeof(line);
574
+ S_SHORT i;
575
+
576
+ for ( i = 0; i < 2*ctab->nNumberOfStextEntries; i++ ) {
577
+
578
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
579
+ if ( !err ) {
580
+ MOLFILE_ERR_FIN (err, 2, err_fin, "Cannot read STEXT block line");
581
+ }
582
+ break;
583
+ /* can't read the input file line */
584
+ }
585
+ /*
586
+ remove_one_lf( line );
587
+ if ( line[MOLFILEMAXLINELEN] ){
588
+ MOLFILE_ERR_SET (err, 2, "Warning: Too long STEXT block line");
589
+ // too long input file line
590
+ }
591
+ */
592
+ }
593
+ err_fin:
594
+ return err;
595
+ }
596
+ /*************************************************************************/
597
+ static int read_properties_block( MOL_CTAB* ctab, MOL_HEADER_BLOCK *pHdr, FILE *inp, int err, char *pStrErr )
598
+ {
599
+ enum { MULTI_LINE_MODE_NO_MODE, MULTI_LINE_MODE_ISIS_ALIAS };
600
+ char *p;
601
+ char line[MOLFILEINPLINELEN];
602
+ const int line_len = sizeof(line);
603
+ int nMultiLineMode = MULTI_LINE_MODE_NO_MODE, nAtomNumber=0;
604
+ S_SHORT i, j;
605
+ char charM[2];
606
+ char szBlank[3];
607
+ char szType[4];
608
+ S_SHORT skip_lines=0;
609
+ S_SHORT num_entries;
610
+ S_SHORT num_atoms = ctab->nNumberOfAtoms;
611
+
612
+ int charge_encountered = 0;
613
+ int radical_encountered = 0;
614
+ int isotope_encountered = 0;
615
+ /*
616
+ if ( NULL == ctab->MolAtom ){
617
+ err = 1;
618
+ goto err_fin; internal error: memory has not been allocated for MolAtom structure
619
+ }
620
+ */
621
+ for ( i = 0; ctab->csCurrentCtabVersion[0]? 1 : (i < ctab->nNumberOfPropertyLines); i++ ) { /* the last line should be M END */
622
+ /* ctab->csCurrentCtabVersion[0] == 0:
623
+ exactly ctab->nNumberOfPropertyLines lines including M END */
624
+ /* ctab->csCurrentCtabVersion[0] != 0:
625
+ read until M END line was encountered */
626
+ if ( NULL == ( p = fgets_up_to_lf( line, line_len, inp ) ) ){
627
+ if ( !err ) {
628
+ MOLFILE_ERR_SET (err, 2, "Cannot read properties block line");
629
+ }
630
+ goto err_fin;
631
+ }
632
+ remove_one_lf( line );
633
+ if ( line[MOLFILEMAXLINELEN] ){
634
+ MOLFILE_ERR_SET (err, 3, "Too long properties block line");
635
+ continue;
636
+ }
637
+ if ( skip_lines > 0 ) {
638
+ skip_lines --;
639
+ continue;
640
+ }
641
+ /* alias. */
642
+ if ( nMultiLineMode == MULTI_LINE_MODE_ISIS_ALIAS && nAtomNumber ) {
643
+ int len;
644
+ nMultiLineMode = MULTI_LINE_MODE_NO_MODE;
645
+ if ( 0 >= (len=normalize_name( p )) ) {
646
+ nAtomNumber = 0;
647
+ continue;
648
+ }
649
+ if( 0 < len && len < (int)(sizeof(ctab->MolAtom->szAtomSymbol)) ) {
650
+ int nCharge, nRad;
651
+ MOL_ATOM* MolAtom = ctab->MolAtom + nAtomNumber-1;
652
+ /* ctab->MolAtom[nAtomNumber-1].cAtomAliasedFlag = 1; */
653
+ /* extract radicals & charges */
654
+ extract_ChargeRadical( p, &nRad, &nCharge );
655
+ /* Aliased atom cannot have charge, radical & mass difference */
656
+ /* in the atom table or "M CHG", "M RAD", "M ISO" */
657
+ /* if ( nCharge ) */
658
+ MolAtom->cCharge = (S_CHAR)nCharge;
659
+ /* if ( nRad ) */
660
+ MolAtom->cRadical = (char)nRad;
661
+
662
+ if ( 1 == len && 'D' == p[0] ) {
663
+ /* H isotope */
664
+ p[0] = 'H';
665
+ #ifdef INCHI_MAIN
666
+ MolAtom->cMassDifference=(1 + ISOTOPIC_SHIFT_FLAG);
667
+ #else
668
+ MolAtom->cMassDifference=1;
669
+ #endif
670
+ } else
671
+ if ( 1 == len && 'T' == p[0] ) {
672
+ /* H isotope */
673
+ p[0] = 'H';
674
+ #ifdef INCHI_MAIN
675
+ MolAtom->cMassDifference=(2 + ISOTOPIC_SHIFT_FLAG);
676
+ #else
677
+ MolAtom->cMassDifference=2;
678
+ #endif
679
+ } else
680
+ MolAtom->cMassDifference=0;
681
+ if ( strlen(p) < sizeof(ctab->MolAtom[0].szAtomSymbol) ) {
682
+ strcpy(MolAtom->szAtomSymbol, p);
683
+ } else {
684
+ strcpy(MolAtom->szAtomSymbol, "???");
685
+ }
686
+ MolAtom->cAtomAliasedFlag ++;
687
+ }
688
+ skip_lines = 0;
689
+ nAtomNumber = 0;
690
+ continue;
691
+ }
692
+
693
+ if ( 1 != mol_read_datum( charM, sizeof(charM) - 1, MOL_STRING_DATA, &p )
694
+ || 0 != mol_read_datum( szBlank, sizeof(szBlank) - 1, MOL_STRING_DATA, &p ) /* must contain 0 bytes */
695
+ || 0 >= mol_read_datum( szType, sizeof(szType) - 1, MOL_STRING_DATA, &p ) /* must contain 3 bytes */
696
+ ) {
697
+ if ( !strcmp( line, SDF_END_OF_DATA ) ) {
698
+ err = err? -abs(err): -4;
699
+ break;
700
+ }
701
+ continue; /* ignore because cannot recognize */
702
+ }
703
+ if ( charM[0] == 'V' ){
704
+ skip_lines = 0; /* ISIS/Desktop Atom Value: one-line property */
705
+ continue;
706
+ }
707
+ if ( charM[0] == 'G' ){
708
+ skip_lines = 1; /* ISIS/Desktop Group abbreviation: two-line property */
709
+ continue;
710
+ }
711
+ if ( charM[0] == 'A' ) {
712
+ if ( NULL != ctab->MolAtom &&
713
+ 0 < ( nAtomNumber = (int)strtol(szType, NULL, 10) ) &&
714
+ nAtomNumber <= ctab->nNumberOfAtoms ){
715
+ /* Atom Alias [ISIS/Desktop] two-line property */
716
+ nMultiLineMode = MULTI_LINE_MODE_ISIS_ALIAS;
717
+ continue;
718
+ } else {
719
+ nAtomNumber = 0;
720
+ skip_lines = 1;
721
+ continue;
722
+ }
723
+ }
724
+ if ( charM[0] == 'S' && !strcmp( szType, "SKP" ) ){ /* skip lines */
725
+ if ( 0 >= mol_read_datum( &skip_lines, 3, MOL_SHORT_INT_DATA, &p ) ) {
726
+ skip_lines = 0;
727
+ }
728
+ continue;
729
+ }
730
+ if ( charM[0] != 'M' ) {/* cannot recognize a line */
731
+ continue;
732
+ }
733
+ if ( !strcmp( szType, "REG" ) ) {
734
+ int len;
735
+ p = p + strspn( p, " " );
736
+ len = strcspn( p, " " );
737
+ len = inchi_min( len, MOL_MAX_VALUE_LEN );
738
+ mol_read_datum( &pHdr->lInternalRegistryNumber, len, MOL_LONG_INT_DATA, &p );
739
+ continue;
740
+ }
741
+
742
+ if ( !strcmp( szType, "END" ) ){
743
+ if ( ctab->csCurrentCtabVersion[0] )
744
+ break; /* end of property lines */
745
+ continue;
746
+ }
747
+
748
+ if ( NULL == ctab->MolAtom )
749
+ continue; /* ignore because the user requested to bypass all this stuff */
750
+
751
+ /*----------------------------------- charge: Generic */
752
+ if ( !strcmp( szType, "CHG" ) &&
753
+ 0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
754
+ 1 <= num_entries && num_entries <= 8 ) {
755
+ S_SHORT atoms[8];
756
+ S_SHORT charges[8];
757
+ if ( !charge_encountered && !radical_encountered ) {
758
+ /* first charge or radical record clears all Atom Block */
759
+ /* entered charge and radical data to zeroes */
760
+ charge_encountered = -1;
761
+ }
762
+ for ( j = 0; j < num_entries; j++ ) {
763
+ if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
764
+ 0 > mol_read_datum( &charges[j], 0, MOL_SHORT_INT_DATA, &p ) ||
765
+ atoms[j] <= 0 || atoms[j] > num_atoms ||
766
+ charges[j] < -15 || charges[j] > 15 ) {
767
+ goto charge_error;
768
+ }
769
+ }
770
+ if ( charge_encountered == -1 ) {
771
+ for ( j = 0; j < num_atoms; j++ ) {
772
+ if ( !ctab->MolAtom[j].cAtomAliasedFlag ) /* do not clear aliased atoms.*/
773
+ ctab->MolAtom[j].cCharge = ctab->MolAtom[j].cRadical = '\0';
774
+ }
775
+ charge_encountered = 1;
776
+ }
777
+ for ( j = 0; j < num_entries; j++ ) {
778
+ if ( !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag ) /* do not change aliased atoms.*/
779
+ ctab->MolAtom[atoms[j]-1].cCharge = (S_CHAR)charges[j];
780
+ }
781
+ continue;
782
+ charge_error:
783
+ MOLFILE_ERR_SET (err, 0, "Charge not recognized:");
784
+ RemoveNonPrintable( line );
785
+ AddMOLfileError(pStrErr, line);
786
+ continue; /* ignore for now */
787
+ }
788
+ /*-------------------------------------- radical: Generic */
789
+ if ( !strcmp( szType, "RAD" ) &&
790
+ 0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
791
+ 1 <= num_entries && num_entries <= 8 ) {
792
+ S_SHORT atoms[8];
793
+ S_SHORT radicals[8];
794
+ if ( !charge_encountered && !radical_encountered ) {
795
+ /* first charge or radical record clears all Atom Block */
796
+ /* entered charge and radical data to zeroes */
797
+ radical_encountered = -1;
798
+ }
799
+ for ( j = 0; j < num_entries; j++ ) {
800
+ if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
801
+ 0 > mol_read_datum( &radicals[j], 0, MOL_SHORT_INT_DATA, &p ) ||
802
+ atoms[j] <= 0 || atoms[j] > num_atoms ||
803
+ radicals[j] < 0 || radicals[j] > 3 ) {
804
+ goto radical_error;
805
+ }
806
+ }
807
+ if ( radical_encountered == -1 ) {
808
+ for ( j = 0; j < num_atoms; j++ ) {
809
+ if ( !ctab->MolAtom[j].cAtomAliasedFlag ) /* do not clear aliased atoms. 5-3-99 DCh */
810
+ ctab->MolAtom[j].cCharge = ctab->MolAtom[j].cRadical = '\0';
811
+ }
812
+ radical_encountered = 1;
813
+ }
814
+ for ( j = 0; j < num_entries; j++ ) {
815
+ if ( !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag ) { /* do not change aliased atoms. 5-3-99 DCh */
816
+ ctab->MolAtom[atoms[j]-1].cRadical = (S_CHAR)radicals[j];
817
+ }
818
+ }
819
+ continue;
820
+ radical_error:
821
+ MOLFILE_ERR_SET (err, 0, "Radical not recognized:");
822
+ RemoveNonPrintable( line );
823
+ AddMOLfileError(pStrErr, line);
824
+ continue; /* ignore error for now */
825
+ }
826
+ /*-------------------------------------- isotope: Generic */
827
+ if ( !strcmp( szType, "ISO" ) &&
828
+ 0 < mol_read_datum( &num_entries, 3, MOL_SHORT_INT_DATA, &p ) &&
829
+ 1 <= num_entries && num_entries <= 8 ) {
830
+ S_SHORT atoms[8];
831
+ S_SHORT iso_mass[8]; /* contains istotope mass number, not difference. 7-14-00 DCh. */
832
+ if ( !isotope_encountered ) {
833
+ /* first charge or radical record clears all Atom Block */
834
+ /* entered charge and radical data to zeroes */
835
+ isotope_encountered = -1;
836
+ }
837
+ for ( j = 0; j < num_entries; j++ ) {
838
+ if ( 0 > mol_read_datum( &atoms[j], 0, MOL_SHORT_INT_DATA, &p ) ||
839
+ 0 > mol_read_datum( &iso_mass[j], 0, MOL_SHORT_INT_DATA, &p ) ||
840
+ atoms[j] <= 0 || atoms[j] > num_atoms
841
+ /*|| iso_mass[j] < -18 || iso_mass[j] > 12*/ ) {
842
+ /* goto isotope_error; */
843
+ atoms[j] = -1; /* flag error */
844
+ MOLFILE_ERR_SET (err, 0, "Isotopic data not recognized:");
845
+ RemoveNonPrintable( line );
846
+ AddMOLfileError(pStrErr, line);
847
+ continue; /* ignore isotopic error for now */
848
+ }
849
+ }
850
+ if ( isotope_encountered == -1 ) {
851
+ for ( j = 0; j < num_atoms; j++ ) {
852
+ /*if ( !ctab->MolAtom[j].cAtomAliasedFlag )*/ /* clear even aliased atoms */
853
+ ctab->MolAtom[j].cMassDifference = 0;
854
+ }
855
+ isotope_encountered = 1;
856
+ }
857
+ for ( j = 0; j < num_entries; j++ ) {
858
+ if ( atoms[j] <= 0 )
859
+ continue; /* ignore isotopic error for now */
860
+ if ( 1 /* !ctab->MolAtom[atoms[j]-1].cAtomAliasedFlag */) {
861
+ char *at = ctab->MolAtom[atoms[j]-1].szAtomSymbol;
862
+ if ( at[1] || at[0] != 'D' && at[0] != 'T' ) { /* D & T cannot have ISO */
863
+ /* need atomic weight to calculate isotope difference. 7-14-00 DCh. */
864
+ #ifdef INCHI_MAIN
865
+ ctab->MolAtom[atoms[j]-1].cMassDifference = iso_mass[j]; /* mass, not difference */
866
+ #else
867
+ int atw, atw_diff;
868
+ if ( (atw = get_atw( at )) && abs( atw_diff = (int)iso_mass[j] - atw ) < 20 ) {
869
+ ctab->MolAtom[atoms[j]-1].cMassDifference = (char)(atw_diff? atw_diff : ZERO_ATW_DIFF);
870
+ }
871
+ #endif
872
+ }
873
+ }
874
+ }
875
+ continue;
876
+ }
877
+ }
878
+ err_fin:
879
+ return err;
880
+ }
881
+ /*************************************************************************/
882
+ MOL_DATA* delete_mol_data( MOL_DATA* mol_data )
883
+ {
884
+ if ( mol_data ) {
885
+ if ( mol_data->ctab.MolAtom )
886
+ inchi_free( mol_data->ctab.MolAtom );
887
+ if ( mol_data->ctab.MolBond )
888
+ inchi_free( mol_data->ctab.MolBond );
889
+ if ( mol_data->ctab.szCoord )
890
+ inchi_free( mol_data->ctab.szCoord );
891
+ inchi_free( mol_data );
892
+ mol_data = NULL;
893
+ }
894
+ return mol_data;
895
+ }
896
+ /*************************************************************************/
897
+ /* Comletely ingnore STEXT block, queries, and 3D features
898
+ */
899
+ MOL_DATA* read_mol_file( FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
900
+ int bGetOrigCoord, int *err, char *pStrErr )
901
+ {
902
+ MOL_DATA* mol_data = NULL;
903
+ int ret = 0, prev_ret, bEndOfData = 0;
904
+ int bReadAll = ( OnlyHeaderBlock == NULL );
905
+ MOL_CTAB ctab, *pCtab = NULL;
906
+ MOL_HEADER_BLOCK *pHdr = NULL;
907
+
908
+ *err = 0;
909
+ if ( bReadAll ) {
910
+ if ( NULL == ( mol_data = ( MOL_DATA* )inchi_calloc( 1, sizeof(MOL_DATA) ) ) ){
911
+ ret = 1; /* can't allocate mol_data structure */
912
+ AddMOLfileError( pStrErr, "Out of RAM" );
913
+ goto err_fin;
914
+ }
915
+ pHdr = &mol_data->hdr;
916
+ pCtab = &mol_data->ctab;
917
+ } else {
918
+ pHdr = OnlyHeaderBlock;
919
+ pCtab = OnlyCtab? OnlyCtab : &ctab;
920
+ memset( pHdr, 0, sizeof( MOL_HEADER_BLOCK ) );
921
+ memset( pCtab, 0, sizeof( MOL_CTAB ) );
922
+ }
923
+ pCtab->MolBond = NULL;
924
+ pCtab->MolAtom = NULL;
925
+ pCtab->szCoord = NULL;
926
+
927
+ if ( 0 != ( ret = mol_read_hdr(pHdr, inp, pStrErr) ) ){
928
+ ret += 10;
929
+ goto err_fin; /* most probably end of file */
930
+ }
931
+ if ( 0 != ( ret = mol_read_counts_line( pCtab , inp, pStrErr) ) ){
932
+ ret += 20;
933
+ goto err_fin;
934
+ }
935
+
936
+ if ( bReadAll ) {
937
+ if ( NULL == ( mol_data->ctab.MolAtom = (MOL_ATOM*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfAtoms,1), sizeof(MOL_ATOM)) ) ){
938
+ ret = 2; /* can't allocate MolAtom structure */
939
+ MOLFILE_ERR_FIN (ret, 2, err_fin, "Out of RAM");
940
+ }
941
+ if ( bGetOrigCoord &&
942
+ NULL == ( mol_data->ctab.szCoord = (MOL_COORD*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfAtoms,1), sizeof(MOL_COORD)) ) ){
943
+ ret = 2; /* can't allocate MolAtom structure */
944
+ MOLFILE_ERR_FIN (ret, 2, err_fin, "Out of RAM");
945
+ }
946
+ }
947
+ if ( 0 != ( ret = read_atom_block(pCtab, inp, ret, pStrErr) ) ){
948
+ if ( ret < 0 ) {
949
+ ret = -ret;
950
+ bEndOfData = 1;
951
+ }
952
+ ret += 30;
953
+ /* goto err_fin; */
954
+ }
955
+
956
+ if ( bReadAll && ret < 30 ) {
957
+ if ( !bEndOfData && NULL == ( mol_data->ctab.MolBond = (MOL_BONDS*)inchi_calloc(inchi_max(mol_data->ctab.nNumberOfBonds,1), sizeof(MOL_BONDS)) ) ){
958
+ ret = 3; /* can't allocate MolBond structure */
959
+ MOLFILE_ERR_FIN (ret, 3, err_fin, "Out of RAM");
960
+ }
961
+ }
962
+ prev_ret = ret;
963
+ if ( !bEndOfData && 0 != ( ret = read_bonds_block(pCtab, inp, ret, pStrErr) ) ){
964
+ if ( ret < 0 ) {
965
+ ret = -ret;
966
+ bEndOfData = 1;
967
+ }
968
+ ret = prev_ret? prev_ret : ret + 40;
969
+ }
970
+ prev_ret = ret;
971
+ if ( !bEndOfData && 0 != ( ret = read_stext_block(pCtab, inp, ret, pStrErr) ) ){
972
+ ret = prev_ret? prev_ret : ret + 50;
973
+ }
974
+ prev_ret = ret;
975
+ if ( !bEndOfData && 0 != ( ret = read_properties_block(pCtab, pHdr, inp, ret, pStrErr) ) ){
976
+ if ( ret < 0 ) {
977
+ ret = -ret;
978
+ bEndOfData = 1;
979
+ }
980
+ ret = prev_ret? prev_ret : ret + 60;
981
+ }
982
+
983
+ err_fin:
984
+ *err = bEndOfData? -ret : ret;
985
+ if ( bReadAll ) {
986
+ if ( ret )
987
+ mol_data = delete_mol_data( mol_data ); /* delete all results */
988
+ return mol_data;
989
+ } else {
990
+ if ( ret )
991
+ return NULL;
992
+ else
993
+ return (MOL_DATA*)OnlyHeaderBlock;
994
+ }
995
+ }
996
+
997
+ /******************************************************************/
998
+ char sdf_data_hdr_name[] = "NAME";
999
+ char sdf_data_hdr_comm[] = "COMMENT";
1000
+ enum { SDF_START, SDF_DATA_HEADER, SDF_DATA_HEADER_NAME
1001
+ , SDF_DATA_HEADER_COMMENT, SDF_DATA_HEADER_CAS
1002
+ , SDF_DATA_HEADER_USER, SDF_DATA_LINE
1003
+ , SDF_END_OF_DATA_ITEM, SDF_EMPTY_LINE, SDF_END_OF_DATA_BLOCK };
1004
+ /******************************************************************/
1005
+ static long extract_cas_rn( char *line )
1006
+ {
1007
+ int i, j;
1008
+ i = line[0] == '-'? 1 : 0;
1009
+ for ( j = i; line[i]; i ++ ) {
1010
+ if ( isdigit( UCINT line[i] ) ) {
1011
+ line[j++] = line[i];
1012
+ } else
1013
+ if ( line[i] != '-' ) {
1014
+ break;
1015
+ }
1016
+ }
1017
+ line[j] = '\0';
1018
+ return strtol( line, NULL, 10 );
1019
+ }
1020
+ /******************************************************************/
1021
+ static int identify_sdf_label( char* inp_line, const char *pSdfLabel )
1022
+ {
1023
+ char line[MOLFILEMAXLINELEN];
1024
+ char *p, *q;
1025
+ int i, j, len;
1026
+ if ( (p = strchr( inp_line, '<' )) &&
1027
+ (q = strchr( p, '>' )) &&
1028
+ (len = q-p-1) > 0 && len < (int)sizeof(line) ) {
1029
+ memcpy( line, p+1, len );
1030
+ line[len] = '\0';
1031
+ for ( i = 0; isspace( UCINT line[i] ); i ++ )
1032
+ ;
1033
+ for ( j = len-1; j >= i && isspace( UCINT line[i] ); j -- )
1034
+ ;
1035
+ len = j-i+1;
1036
+ p = line+i;
1037
+ if ( pSdfLabel && pSdfLabel[0] && len == (int)strlen(pSdfLabel) && !memicmp( p, pSdfLabel, len ) )
1038
+ return SDF_DATA_HEADER_USER;
1039
+ if ( len == sizeof(sdf_data_hdr_name)-1 && !memicmp( p, sdf_data_hdr_name, len ) )
1040
+ return SDF_DATA_HEADER_NAME;
1041
+ if ( len == sizeof(sdf_data_hdr_comm)-1 && !memicmp( p, sdf_data_hdr_comm, len ) )
1042
+ return SDF_DATA_HEADER_COMMENT;
1043
+ if ( !memicmp( p, "CAS", 3 ) )
1044
+ return SDF_DATA_HEADER_CAS;
1045
+ }
1046
+ return SDF_DATA_HEADER;
1047
+ }
1048
+ /******************************************************************/
1049
+ int bypass_sdf_data_items( FILE* inp, long *cas_reg_no, char* comment,
1050
+ int lcomment, char *name, int lname, int prev_err,
1051
+ const char *pSdfLabel, char *pSdfValue, char *pStrErr )
1052
+ {
1053
+ char line[MOLFILEINPLINELEN];
1054
+ const int line_len = sizeof(line);
1055
+ int err = 0;
1056
+ int current_state = SDF_START;
1057
+ int n_blank_lines = 0;
1058
+ int n_lines = 0;
1059
+ char* p = NULL;
1060
+ int bNeedsName = name && lname > 0 && !name[0];
1061
+ int bNeedsComm = comment && lcomment > 0 && !comment[0];
1062
+ int bNeedsUser = pSdfLabel && pSdfLabel[0] && pSdfValue;
1063
+ int bNeedsCASrn = 0;
1064
+ int bCASrnIsUser = 0;
1065
+
1066
+ if ( cas_reg_no != NULL ) {
1067
+ bNeedsCASrn = 1;
1068
+ *cas_reg_no = 0;
1069
+ bCASrnIsUser = (bNeedsUser && !memicmp(pSdfLabel,"CAS", 3));
1070
+ }
1071
+
1072
+ while ( err == 0 &&
1073
+ current_state !=SDF_END_OF_DATA_BLOCK &&
1074
+ NULL != ( p = fgets_up_to_lf( line, line_len, inp ) ) ) {
1075
+
1076
+ if ( !n_lines && !memcmp(line, "M END", 6) ) {
1077
+ continue; /* allow subtle errors */
1078
+ }
1079
+ n_lines++;
1080
+
1081
+ remove_trailing_spaces( line );
1082
+ if ( line[MOLFILEMAXLINELEN] ){
1083
+ if ( current_state != SDF_DATA_HEADER &&
1084
+ current_state != SDF_DATA_LINE &&
1085
+ current_state != SDF_DATA_HEADER_NAME &&
1086
+ current_state != SDF_DATA_HEADER_USER &&
1087
+ current_state != SDF_DATA_HEADER_COMMENT ) {
1088
+ line[MOLFILEMAXLINELEN] = '\0';
1089
+ if ( !prev_err ) {
1090
+ MOLFILE_ERR_SET (err, 0, "Too long SData line truncated");
1091
+ }
1092
+ } else {
1093
+ /* allow long lines in SDF data. 9-29-00 DCh */
1094
+ line[MOLFILEMAXLINELEN] = '\0';
1095
+ }
1096
+ }
1097
+
1098
+ n_blank_lines += ( *line == '\0' );
1099
+
1100
+ switch( current_state ) {
1101
+
1102
+ case SDF_START:
1103
+ case SDF_END_OF_DATA_ITEM:
1104
+ case SDF_EMPTY_LINE: /* Added 9-25-97 DCh */
1105
+
1106
+ if ( 0 == strcmp( line, SDF_END_OF_DATA ) ) {
1107
+ current_state = SDF_END_OF_DATA_BLOCK;
1108
+ }
1109
+ else
1110
+ if ( '>' == *line ) {
1111
+ current_state = ( bNeedsName || bNeedsComm || bNeedsCASrn || bNeedsUser )? identify_sdf_label(line, pSdfLabel) : SDF_DATA_HEADER;
1112
+ }else
1113
+ if ( *line == '\0' ) { /* Added 9-25-97 DCh */
1114
+ /* Relax the strictness: Allow more than 1 empty line. */
1115
+ current_state=SDF_EMPTY_LINE;
1116
+ } else
1117
+ if ( !prev_err ) {
1118
+ MOLFILE_ERR_SET (err, 3, "Unexpected SData header line:");
1119
+ RemoveNonPrintable( line );
1120
+ AddMOLfileError(pStrErr, line);
1121
+ /* unexpected contents of data header line */
1122
+ } else {
1123
+ err = 3;
1124
+ }
1125
+ break;
1126
+
1127
+ case SDF_DATA_HEADER_NAME:
1128
+ if ( bNeedsName && 0 < normalize_name( line ) ) {
1129
+ bNeedsName = 0;
1130
+ mystrncpy( name, line, lname );
1131
+ }
1132
+ goto got_data_line;
1133
+
1134
+ case SDF_DATA_HEADER_COMMENT:
1135
+ if ( bNeedsComm && 0 < normalize_name( line ) ) {
1136
+ bNeedsComm = 0;
1137
+ mystrncpy( comment, line, lcomment );
1138
+ }
1139
+ goto got_data_line;
1140
+
1141
+ case SDF_DATA_HEADER_USER:
1142
+ if ( bNeedsUser && 0 < normalize_name( line ) ) {
1143
+ bNeedsUser = 0;
1144
+ mystrncpy( pSdfValue, line, MAX_SDF_VALUE+1 );
1145
+ if ( bCASrnIsUser && bNeedsCASrn ) {
1146
+ *cas_reg_no = extract_cas_rn( line );
1147
+ bNeedsCASrn = (0 == *cas_reg_no);
1148
+ }
1149
+ }
1150
+ goto got_data_line;
1151
+
1152
+ case SDF_DATA_HEADER_CAS:
1153
+ if ( bNeedsCASrn && 0 < normalize_name( line ) ) {
1154
+ *cas_reg_no = extract_cas_rn( line );
1155
+ bNeedsCASrn = (0 == *cas_reg_no);
1156
+ }
1157
+ goto got_data_line;
1158
+
1159
+ case SDF_DATA_HEADER:
1160
+ case SDF_DATA_LINE:
1161
+ got_data_line:
1162
+ current_state = *line? SDF_DATA_LINE : SDF_END_OF_DATA_ITEM;
1163
+ break;
1164
+
1165
+ }
1166
+ }
1167
+ if ( 0 == err && SDF_END_OF_DATA_BLOCK != current_state && NULL == p )
1168
+ ; /* err = 4; */ /* unexpected end of file: missing $$$$ */
1169
+ else
1170
+ if (err && ( n_blank_lines == n_lines && *line == '\0' ) )
1171
+ err = 5; /* empty lines -- do not know when this can happen */
1172
+
1173
+ if ( err && err != 5 && current_state != SDF_END_OF_DATA_BLOCK && p ) {
1174
+ /* bypass up to $$$$ */
1175
+ while ( ( p = fgets_up_to_lf( line, line_len, inp ) ) && memcmp( line, SDF_END_OF_DATA, 4 ) )
1176
+ ;
1177
+ if ( p ) {
1178
+ err = 9; /* bypassed to $$$$; non-fatal */
1179
+ AddMOLfileError(pStrErr, "Bypassing to next structure");
1180
+ }
1181
+
1182
+ }
1183
+
1184
+ return err;
1185
+ }
1186
+ /******************************************************************/
1187
+ MOL_DATA* read_sdfile_segment(FILE* inp, MOL_HEADER_BLOCK *OnlyHeaderBlock, MOL_CTAB *OnlyCtab,
1188
+ int bGetOrigCoord,
1189
+ char *pname, int lname,
1190
+ long *Id, const char *pSdfLabel, char *pSdfValue,
1191
+ int *err, char *pStrErr )
1192
+ {
1193
+ MOL_DATA* mol_data = read_mol_file( inp, OnlyHeaderBlock, OnlyCtab, bGetOrigCoord, err, pStrErr );
1194
+ int err_bypass_sdf = 0;
1195
+
1196
+ if ( pname && lname ) {
1197
+ pname[0] = '\0';
1198
+ }
1199
+ if ( Id ) {
1200
+ *Id = 0L; /* ignore for now */
1201
+ }
1202
+ /* if ( mol_data && !*err ) { */
1203
+ if ( *err < 0 ) {
1204
+ *err = -*err; /* end of data encountered */
1205
+ } else {
1206
+ err_bypass_sdf = bypass_sdf_data_items( inp, Id, NULL, 0, pname, lname, *err, pSdfLabel, pSdfValue, pStrErr );
1207
+ if ( err_bypass_sdf ) {
1208
+ *err = err_bypass_sdf; /* important to continue to the next good structure */
1209
+ }
1210
+ }
1211
+ /* } */
1212
+ return mol_data;
1213
+ }
1214
+ /****************************************************************************/
1215
+ int CopyMOLfile(FILE *inp_file, long fPtrStart, long fPtrEnd, FILE *prb_file, long lNumb)
1216
+ {
1217
+ char line[MOLFILEINPLINELEN], *p;
1218
+ long fPtr;
1219
+ int ret = 1;
1220
+ char szNumber[32];
1221
+
1222
+ if ( inp_file && prb_file && fPtrStart >= 0L &&
1223
+ fPtrEnd > fPtrStart &&
1224
+ 0 == fseek( inp_file, fPtrStart, SEEK_SET ) ) {
1225
+
1226
+ while ( fPtrEnd > (fPtr = ftell(inp_file)) && fPtr >= 0L &&
1227
+ fgets_up_to_lf( line, sizeof(line)-1, inp_file ) ) {
1228
+ line[sizeof(line)-1] = '\0'; /* unnecessary extra precaution */
1229
+ if ( fPtr == fPtrStart && lNumb ) {
1230
+ int len;
1231
+ LtrimRtrim( line, &len );
1232
+ len = sprintf( szNumber, "#%ld%s", lNumb, len?"/":"" );
1233
+ mystrncpy( line+len, line, sizeof(line)-len-1 );
1234
+ memcpy( line, szNumber, len );
1235
+ }
1236
+ if ( !strchr(line, '\n') ) {
1237
+ p = line+strlen(line);
1238
+ p[0] = '\n';
1239
+ p[1] = '\0';
1240
+ }
1241
+ fputs( line, prb_file );
1242
+ }
1243
+ ret = fseek( inp_file, fPtrEnd, SEEK_SET );
1244
+ }
1245
+ return ret;
1246
+ }