rino 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/README +44 -0
  2. data/Rakefile +123 -0
  3. data/ext/extconf.rb +26 -0
  4. data/ext/ruby_inchi_main.so +0 -0
  5. data/ext/src/aux2atom.h +2786 -0
  6. data/ext/src/comdef.h +148 -0
  7. data/ext/src/e_0dstereo.c +3014 -0
  8. data/ext/src/e_0dstereo.h +31 -0
  9. data/ext/src/e_comdef.h +57 -0
  10. data/ext/src/e_ctl_data.h +147 -0
  11. data/ext/src/e_ichi_io.c +498 -0
  12. data/ext/src/e_ichi_io.h +40 -0
  13. data/ext/src/e_ichi_parms.c +37 -0
  14. data/ext/src/e_ichi_parms.h +41 -0
  15. data/ext/src/e_ichicomp.h +50 -0
  16. data/ext/src/e_ichierr.h +40 -0
  17. data/ext/src/e_ichimain.c +593 -0
  18. data/ext/src/e_ichisize.h +43 -0
  19. data/ext/src/e_inchi_atom.c +75 -0
  20. data/ext/src/e_inchi_atom.h +33 -0
  21. data/ext/src/e_inpdef.h +41 -0
  22. data/ext/src/e_mode.h +706 -0
  23. data/ext/src/e_mol2atom.c +649 -0
  24. data/ext/src/e_readinch.c +58 -0
  25. data/ext/src/e_readmol.c +54 -0
  26. data/ext/src/e_readmol.h +180 -0
  27. data/ext/src/e_readstru.c +251 -0
  28. data/ext/src/e_readstru.h +33 -0
  29. data/ext/src/e_util.c +284 -0
  30. data/ext/src/e_util.h +61 -0
  31. data/ext/src/extr_ct.h +251 -0
  32. data/ext/src/ichi.h +206 -0
  33. data/ext/src/ichi_bns.c +7999 -0
  34. data/ext/src/ichi_bns.h +231 -0
  35. data/ext/src/ichican2.c +5000 -0
  36. data/ext/src/ichicano.c +2195 -0
  37. data/ext/src/ichicano.h +49 -0
  38. data/ext/src/ichicans.c +1625 -0
  39. data/ext/src/ichicant.h +379 -0
  40. data/ext/src/ichicomn.h +260 -0
  41. data/ext/src/ichicomp.h +50 -0
  42. data/ext/src/ichidrp.h +119 -0
  43. data/ext/src/ichierr.h +124 -0
  44. data/ext/src/ichiisot.c +101 -0
  45. data/ext/src/ichilnct.c +286 -0
  46. data/ext/src/ichimain.h +132 -0
  47. data/ext/src/ichimak2.c +1189 -0
  48. data/ext/src/ichimake.c +3812 -0
  49. data/ext/src/ichimake.h +205 -0
  50. data/ext/src/ichimap1.c +851 -0
  51. data/ext/src/ichimap2.c +2856 -0
  52. data/ext/src/ichimap4.c +1609 -0
  53. data/ext/src/ichinorm.c +741 -0
  54. data/ext/src/ichinorm.h +67 -0
  55. data/ext/src/ichiparm.c +45 -0
  56. data/ext/src/ichiparm.h +1441 -0
  57. data/ext/src/ichiprt1.c +3612 -0
  58. data/ext/src/ichiprt2.c +1511 -0
  59. data/ext/src/ichiprt3.c +3011 -0
  60. data/ext/src/ichiqueu.c +1003 -0
  61. data/ext/src/ichiring.c +326 -0
  62. data/ext/src/ichiring.h +49 -0
  63. data/ext/src/ichisize.h +35 -0
  64. data/ext/src/ichisort.c +539 -0
  65. data/ext/src/ichister.c +3538 -0
  66. data/ext/src/ichister.h +35 -0
  67. data/ext/src/ichitaut.c +3843 -0
  68. data/ext/src/ichitaut.h +387 -0
  69. data/ext/src/ichitime.h +74 -0
  70. data/ext/src/inchi_api.h +670 -0
  71. data/ext/src/inchi_dll.c +1480 -0
  72. data/ext/src/inchi_dll.h +34 -0
  73. data/ext/src/inchi_dll_main.c +23 -0
  74. data/ext/src/inchi_dll_main.h +31 -0
  75. data/ext/src/inpdef.h +328 -0
  76. data/ext/src/lreadmol.h +1246 -0
  77. data/ext/src/mode.h +706 -0
  78. data/ext/src/ruby_inchi_main.c +558 -0
  79. data/ext/src/runichi.c +4179 -0
  80. data/ext/src/strutil.c +3861 -0
  81. data/ext/src/strutil.h +182 -0
  82. data/ext/src/util.c +1130 -0
  83. data/ext/src/util.h +85 -0
  84. data/lib/clean_tempfile.rb +220 -0
  85. data/lib/rino.rb +111 -0
  86. data/test/test.rb +386 -0
  87. metadata +130 -0
@@ -0,0 +1,649 @@
1
+ /*
2
+ * International Union of Pure and Applied Chemistry (IUPAC)
3
+ * International Chemical Identifier (InChI)
4
+ * Version 1
5
+ * Software version 1.00
6
+ * April 13, 2005
7
+ * Developed at NIST
8
+ */
9
+
10
+ #include <stdio.h>
11
+ #include <stdlib.h>
12
+ #include <string.h>
13
+ #include <ctype.h>
14
+ #include <stdarg.h>
15
+ #include <errno.h>
16
+ #include <limits.h>
17
+ #include <float.h>
18
+ #include <math.h>
19
+
20
+ #include "e_mode.h"
21
+ #include "inchi_api.h"
22
+ #include "e_ctl_data.h"
23
+
24
+ #include "e_comdef.h"
25
+ #include "e_util.h"
26
+ #include "e_ichicomp.h"
27
+
28
+ #include "e_readmol.h"
29
+ #include "e_inpdef.h"
30
+
31
+ #if( ADD_CMLPP == 1 )
32
+ #include "e_readcml.hpp"
33
+ #endif
34
+
35
+ #include "e_inchi_atom.h"
36
+
37
+
38
+ #define MIN_STDATA_X_COORD 0.0
39
+ #define MAX_STDATA_X_COORD 256.0
40
+ #define MIN_STDATA_Y_COORD 0.0
41
+ #define MAX_STDATA_Y_COORD 256.0
42
+ #define MIN_STDATA_Z_COORD 0.0
43
+ #define MAX_STDATA_Z_COORD 256.0
44
+ #define MAX_STDATA_AVE_BOND_LENGTH 20.0
45
+ #define MIN_STDATA_AVE_BOND_LENGTH 10.0
46
+
47
+
48
+ /* local prototypes */
49
+ inchi_Atom* mol_to_inchi_Atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inchi_Atom* at_inp,
50
+ int bDoNotAddH, int *err, char *pStrErr );
51
+ int mol_to_inchi_Atom_xyz( MOL_DATA* mol_data, int num_atoms, inchi_Atom* at, int *err, char *pStrErr );
52
+
53
+ int MolfileToInchi_Atom( FILE *inp_molfile, int bDoNotAddH, inchi_Atom **at, int max_num_at,
54
+ int *num_dimensions, int *num_bonds, const char *pSdfLabel, char *pSdfValue,
55
+ long *Id, long *lMolfileNumber, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr );
56
+ long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr );
57
+
58
+
59
+ /* too long ave. bond length prevents structure from displaying */
60
+ /* According to Steve, a standard bond length is 10. 9-24-97 DCh */
61
+ /* Ave. bond length in MainLib is 20. Also fixed Average */
62
+ /* bond length calculation by introducing num_avg_bonds */
63
+ /* in mol_to_stdata(). 12-9-99 DCh. */
64
+
65
+ /******************************************************************************************************/
66
+ inchi_Atom* mol_to_inchi_Atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inchi_Atom* at_inp,
67
+ int bDoNotAddH, int *err, char *pStrErr )
68
+ {
69
+ inchi_Atom *at = NULL;
70
+ /* char *bond_stereo = NULL; */
71
+ AT_NUM *p1, *p2;
72
+ int i, a1, a2, n1, n2, bonds;
73
+ S_CHAR cBondStereo, cBondType;
74
+ S_CHAR cStereo1, cStereo2;
75
+
76
+ *err = 0;
77
+ *num_atoms = *num_bonds = 0;
78
+ /* check if MOLfile contains atoms */
79
+ if ( !mol_data || !mol_data->ctab.MolAtom ||
80
+ 0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
81
+ 0 >= (*num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
82
+ /* MOLFILE_ERR_SET (*err, 0, "Empty structure"); */
83
+ goto exit_function; /* no structure */
84
+ }
85
+ /* allocate memory if necessary */
86
+ if ( at_inp ) {
87
+ at = at_inp;
88
+ } else
89
+ if ( !(at = e_CreateInchi_Atom( *num_atoms ) ) ) {
90
+ *err = -1;
91
+ MOLFILE_ERR_FIN (*err, -1, exit_function, "Out of RAM");
92
+ }
93
+
94
+ /* copy atom info */
95
+ for ( i = 0; i < *num_atoms; i ++ ) {
96
+ e_mystrncpy( at[i].elname, mol_data->ctab.MolAtom[i].szAtomSymbol, sizeof(at->elname) );
97
+ /* at[i].chem_bonds_valence = mol_data->ctab.MolAtom[i].cValence; */ /* MOLfile valence; will change */
98
+ at[i].isotopic_mass = mol_data->ctab.MolAtom[i].cMassDifference;
99
+ at[i].charge = mol_data->ctab.MolAtom[i].cCharge;
100
+ at[i].radical = mol_data->ctab.MolAtom[i].cRadical;
101
+ /* coordinates are copied in mol_to_inchi_Atom_xyz() */
102
+ #if( SINGLET_IS_TRIPLET == 1 )
103
+ if ( at[i].radical == RADICAL_SINGLET ) {
104
+ at[i].radical = RADICAL_TRIPLET;
105
+ }
106
+ #endif
107
+ /* removed parsing at[i].elname to extract H, charge, radical from the
108
+ Molfile alias record now this is done in the INChI dll */
109
+ }
110
+
111
+ /* copy bond info */
112
+ for ( i = 0, bonds = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
113
+ cBondStereo = mol_data->ctab.MolBond[i].cBondStereo;
114
+ cBondType = mol_data->ctab.MolBond[i].cBondType;
115
+ a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
116
+ a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
117
+
118
+ if ( a1 < 0 || a1 >= *num_atoms ||
119
+ a2 < 0 || a2 >= *num_atoms ||
120
+ a1 == a2 ) {
121
+ *err |= 1; /* bond for impossible atom number(s); ignored */
122
+ MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
123
+ continue;
124
+ }
125
+ /* check for multiple bonds between same atoms */
126
+ p1 = e_is_in_the_slist( at[a1].neighbor, (AT_NUM)a2, at[a1].num_bonds );
127
+ p2 = e_is_in_the_slist( at[a2].neighbor, (AT_NUM)a1, at[a2].num_bonds );
128
+ if ( (p1 || p2) && (p1 || at[a1].num_bonds < MAXVAL) && (p2 || at[a2].num_bonds < MAXVAL) ) {
129
+ n1 = p1? (p1 - at[a1].neighbor) : at[a1].num_bonds ++;
130
+ n2 = p2? (p2 - at[a2].neighbor) : at[a2].num_bonds ++;
131
+ MOLFILE_ERR_SET (*err, 0, "Multiple bonds between two atoms");
132
+ *err |= 2; /* multiple bonds between atoms */
133
+ } else
134
+ if ( !p1 && !p2 && at[a1].num_bonds < MAXVAL && at[a2].num_bonds < MAXVAL ) {
135
+ n1 = at[a1].num_bonds ++;
136
+ n2 = at[a2].num_bonds ++;
137
+ bonds ++;
138
+ } else {
139
+ char szMsg[64];
140
+ *err |= 4; /* too large number of bonds. Some bonds ignored. */
141
+ sprintf( szMsg, "Atom '%s' has more than %d bonds",
142
+ at[a1].num_bonds>= MAXVAL? at[a1].elname:at[a2].elname, MAXVAL );
143
+ MOLFILE_ERR_SET (*err, 0, szMsg);
144
+ continue;
145
+ }
146
+ if ( cBondType < MIN_INPUT_BOND_TYPE || cBondType > MAX_INPUT_BOND_TYPE ) {
147
+ char szBondType[16];
148
+ sprintf( szBondType, "%d", cBondType );
149
+ cBondType = 1;
150
+ MOLFILE_ERR_SET (*err, 0, "Unrecognized bond type:");
151
+ MOLFILE_ERR_SET (*err, 0, szBondType);
152
+ *err |= 8; /* Unrecognized Bond type replaced with single bond */
153
+ }
154
+ /* bond type */
155
+ at[a1].bond_type[n1] =
156
+ at[a2].bond_type[n2] = cBondType;
157
+ /* connection */
158
+ at[a1].neighbor[n1] = (AT_NUM)a2;
159
+ at[a2].neighbor[n2] = (AT_NUM)a1;
160
+ /* stereo */
161
+ switch ( cBondStereo ) {
162
+ case INPUT_STEREO_DBLE_EITHER: /* 3 */
163
+ cStereo1 = INCHI_BOND_STEREO_DOUBLE_EITHER;
164
+ cStereo2 = INCHI_BOND_STEREO_DOUBLE_EITHER;
165
+ break;
166
+ case INPUT_STEREO_SNGL_UP: /* 1 */
167
+ cStereo1 = INCHI_BOND_STEREO_SINGLE_1UP;
168
+ cStereo2 = INCHI_BOND_STEREO_SINGLE_2UP;
169
+ break;
170
+ case INPUT_STEREO_SNGL_EITHER: /* 4 */
171
+ cStereo1 = INCHI_BOND_STEREO_SINGLE_1EITHER;
172
+ cStereo2 = INCHI_BOND_STEREO_SINGLE_2EITHER;
173
+ break;
174
+ case INPUT_STEREO_SNGL_DOWN: /* 6 */
175
+ cStereo1 = INCHI_BOND_STEREO_SINGLE_1DOWN;
176
+ cStereo2 = INCHI_BOND_STEREO_SINGLE_2DOWN;
177
+ break;
178
+ case 0:
179
+ cStereo1 = INCHI_BOND_STEREO_NONE;
180
+ cStereo2 = INCHI_BOND_STEREO_NONE;
181
+ break;
182
+ default:
183
+ *err |= 16; /* Ignored unrecognized Bond stereo */
184
+ MOLFILE_ERR_SET (*err, 0, "Unrecognized bond stereo");
185
+ continue;
186
+ }
187
+ at[a1].bond_stereo[n1] = cStereo1; /* >0: the wedge (pointed) end is at this atom */
188
+ at[a2].bond_stereo[n2] = cStereo2; /* <0: the wedge (pointed) end is at the opposite atom */
189
+ }
190
+ *num_bonds = bonds;
191
+
192
+ /* special Molfile valences */
193
+ for ( a1 = 0; a1 < *num_atoms; a1 ++ ) {
194
+ int num_bond_type[MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE + 1], bond_type;
195
+ int chem_bonds_valence, valence;
196
+ if ( mol_data->ctab.MolAtom[a1].cValence &&
197
+ (mol_data->ctab.MolAtom[a1].cValence != 15 || at[a1].num_bonds) ) {
198
+ /* Molfile contains special valence => calculate number of H */
199
+ memset( num_bond_type, 0, sizeof(num_bond_type) );
200
+ valence = mol_data->ctab.MolAtom[a1].cValence; /* save atom valence if available */
201
+ for ( n1 = 0; n1 < at[a1].num_bonds; n1 ++ ) {
202
+ bond_type = at[a1].bond_type[n1] - MIN_INPUT_BOND_TYPE;
203
+ if ( bond_type < 0 || bond_type > MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE ) {
204
+ bond_type = 0;
205
+ MOLFILE_ERR_SET (*err, 0, "Unknown bond type in MOLfile assigned as a single bond");
206
+ }
207
+ num_bond_type[ bond_type ] ++;
208
+ }
209
+ chem_bonds_valence = 0;
210
+ for ( n1 = 0; MIN_INPUT_BOND_TYPE + n1 <= 3 && MIN_INPUT_BOND_TYPE + n1 <= MAX_INPUT_BOND_TYPE; n1 ++ ) {
211
+ chem_bonds_valence += (MIN_INPUT_BOND_TYPE + n1) * num_bond_type[n1];
212
+ }
213
+ if ( MIN_INPUT_BOND_TYPE <= INCHI_BOND_TYPE_ALTERN && INCHI_BOND_TYPE_ALTERN <= MAX_INPUT_BOND_TYPE &&
214
+ ( n2 = num_bond_type[INCHI_BOND_TYPE_ALTERN-MIN_INPUT_BOND_TYPE] ) ) {
215
+ /* accept input aromatic bonds for now */
216
+ switch ( n2 ) {
217
+ case 2:
218
+ chem_bonds_valence += 3; /* =A- */
219
+ break;
220
+ case 3:
221
+ chem_bonds_valence += 4; /* =A< */
222
+ break;
223
+ default:
224
+ /* if 1 or >= 4 aromatic bonds then replace such bonds with single bonds */
225
+ for ( n1 = 0; n1 < at[a1].num_bonds; n1 ++ ) {
226
+ if ( at[a1].bond_type[n1] == INCHI_BOND_TYPE_ALTERN ) {
227
+ a2 = at[a1].neighbor[n1];
228
+ p1 = e_is_in_the_slist( at[a2].neighbor, (AT_NUM)a1, at[a2].num_bonds );
229
+ if ( p1 ) {
230
+ at[a1].bond_type[n1] =
231
+ at[a2].bond_type[p1-at[a2].neighbor] = INCHI_BOND_TYPE_SINGLE;
232
+ } else {
233
+ *err = -2; /* Program error */
234
+ MOLFILE_ERR_SET (*err, 0, "Program error interpreting MOLfile");
235
+ goto exit_function; /* no structure */
236
+ }
237
+ }
238
+ }
239
+ chem_bonds_valence += n2;
240
+ *err |= 32;
241
+ MOLFILE_ERR_SET (*err, 0, "Atom has more than 3 aromatic bonds");
242
+ break;
243
+ }
244
+ }
245
+ /*************************************************************************************
246
+ *
247
+ * Set number of hydrogen atoms
248
+ */
249
+ if ( valence >= chem_bonds_valence ) {
250
+ at[a1].num_iso_H[0] = valence - chem_bonds_valence;
251
+ }
252
+ } else
253
+ if ( mol_data->ctab.MolAtom[a1].cAtomAliasedFlag ) {
254
+ at[a1].num_iso_H[0] = 0;
255
+ } else
256
+ if ( mol_data->ctab.MolAtom[a1].cValence == 15 && !at[a1].num_bonds ) {
257
+ at[a1].num_iso_H[0] = 0;
258
+ } else
259
+ if ( !bDoNotAddH ) {
260
+ at[a1].num_iso_H[0] = -1;
261
+ }
262
+ }
263
+
264
+ exit_function:;
265
+ return at;
266
+ }
267
+ /******************************************************************************************************/
268
+ int mol_to_inchi_Atom_xyz( MOL_DATA* mol_data, int num_atoms, inchi_Atom* at, int *err, char *pStrErr )
269
+ {
270
+ int i, num_dimensions=0;
271
+ int num_bonds;
272
+ double max_x=-1.0e32, max_y=-1.0e32, max_z=-1.0e32;
273
+ double min_x= 1.0e32, min_y= 1.0e32, min_z= 1.0e32;
274
+ double macheps = 1.0e-10, small_coeff = 0.00001;
275
+ double x_coeff, y_coeff, z_coeff, coeff, average_bond_length;
276
+
277
+ /* *err = 0; */
278
+ /* check if MOLfile contains atoms */
279
+ if ( !mol_data || !mol_data->ctab.MolAtom ||
280
+ 0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
281
+ 0 >= (num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
282
+ goto exit_function; /* no structure */
283
+ }
284
+ /* copy atom info */
285
+ for ( i = 0; i < num_atoms; i ++ ) {
286
+ max_x = inchi_max(mol_data->ctab.MolAtom[i].fX, max_x);
287
+ min_x = inchi_min(mol_data->ctab.MolAtom[i].fX, min_x);
288
+ max_y = inchi_max(mol_data->ctab.MolAtom[i].fY, max_y);
289
+ min_y = inchi_min(mol_data->ctab.MolAtom[i].fY, min_y);
290
+ max_z = inchi_max(mol_data->ctab.MolAtom[i].fZ, max_z);
291
+ min_z = inchi_min(mol_data->ctab.MolAtom[i].fZ, min_z);
292
+ }
293
+
294
+ /* copy bond info */
295
+ num_bonds = 0;
296
+ average_bond_length = 0.0;
297
+ for ( i = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
298
+ int a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
299
+ int a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
300
+ double dx = mol_data->ctab.MolAtom[a1].fX-mol_data->ctab.MolAtom[a2].fX;
301
+ double dy = mol_data->ctab.MolAtom[a1].fY-mol_data->ctab.MolAtom[a2].fY;
302
+ double dz = mol_data->ctab.MolAtom[a1].fZ-mol_data->ctab.MolAtom[a2].fZ;
303
+
304
+ if ( a1 < 0 || a1 >= num_atoms ||
305
+ a2 < 0 || a2 >= num_atoms ||
306
+ a1 == a2 ) {
307
+ *err |= 1; /* bond for impossible atom number(s); ignored */
308
+ MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
309
+ continue;
310
+ }
311
+ average_bond_length += sqrt( dx*dx + dy*dy + dz*dz );
312
+ num_bonds ++;
313
+ }
314
+
315
+ /* convert to integral coordinates */
316
+
317
+ if ( max_x - min_x <= small_coeff*(fabs(max_x) + fabs(min_x)) )
318
+ x_coeff = 0.0;
319
+ else
320
+ x_coeff = (MAX_STDATA_X_COORD - MIN_STDATA_X_COORD)/(max_x - min_x);
321
+
322
+ if ( max_y - min_y <= small_coeff*(fabs(max_y) + fabs(min_y)) )
323
+ y_coeff = 0.0;
324
+ else
325
+ y_coeff = (MAX_STDATA_Y_COORD - MIN_STDATA_Y_COORD)/(max_y - min_y);
326
+ if ( max_z - min_z <= small_coeff*(fabs(max_z) + fabs(min_z)) )
327
+ z_coeff = 0.0;
328
+ else
329
+ z_coeff = (MAX_STDATA_Z_COORD - MIN_STDATA_Z_COORD)/(max_z - min_z);
330
+
331
+ num_dimensions = ((x_coeff > macheps || y_coeff >macheps ) && fabs(z_coeff) < macheps)? 2:
332
+ (fabs(z_coeff) > macheps)? 3: 0;
333
+
334
+ switch ( num_dimensions ) {
335
+ case 0:
336
+ coeff = 0.0;
337
+ break;
338
+ case 2:
339
+ /* choose the smallest stretching coefficient */
340
+ if ( x_coeff > macheps && y_coeff > macheps ) {
341
+ coeff = inchi_min( x_coeff, y_coeff );
342
+ }else
343
+ if ( x_coeff > macheps ){
344
+ coeff = x_coeff;
345
+ }else
346
+ if ( y_coeff > macheps ){
347
+ coeff = y_coeff;
348
+ }else{
349
+ coeff = 1.0;
350
+ }
351
+ break;
352
+ case 3:
353
+ /* choose the smallest stretching coefficient */
354
+ if ( x_coeff > macheps && y_coeff > macheps ) {
355
+ coeff = inchi_min( x_coeff, y_coeff );
356
+ coeff = inchi_min( coeff, z_coeff );
357
+ }else
358
+ if ( x_coeff > macheps ){
359
+ coeff = inchi_min( x_coeff, z_coeff );
360
+ }else
361
+ if ( y_coeff > macheps ){
362
+ coeff = inchi_min( y_coeff, z_coeff );
363
+ }else{
364
+ coeff = z_coeff;
365
+ }
366
+ break;
367
+ default:
368
+ coeff = 0.0;
369
+ }
370
+
371
+ if ( num_bonds > 0 ) {
372
+ average_bond_length /= (double)num_bonds;
373
+ if ( average_bond_length * coeff > MAX_STDATA_AVE_BOND_LENGTH ) {
374
+ coeff = MAX_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too long bonds */
375
+ } else
376
+ if ( average_bond_length * coeff < macheps ) {
377
+ coeff = 1.0; /* all lengths are of zero length */
378
+ } else
379
+ if ( average_bond_length * coeff < MIN_STDATA_AVE_BOND_LENGTH ) {
380
+ coeff = MIN_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too short bonds */
381
+ }
382
+ }
383
+ #if( NORMALIZE_INP_COORD == 1 )
384
+ /* set integral coordinates */
385
+ for ( i = 0; i < num_atoms; i ++ ) {
386
+ double x = mol_data->ctab.MolAtom[i].fX;
387
+ double y = mol_data->ctab.MolAtom[i].fY;
388
+ double z = mol_data->ctab.MolAtom[i].fZ;
389
+ x = (x - min_x)*coeff + MIN_STDATA_X_COORD;
390
+ y = (y - min_y)*coeff + MIN_STDATA_Y_COORD;
391
+ z = (z - min_z)*coeff + MIN_STDATA_Z_COORD;
392
+ /* floor() behavior is not well defined for negative arguments.
393
+ * Use positive arguments only to get nearest integer.
394
+ */
395
+ at[i].x = ( x >= 0.0 )? (int)floor( x + 0.5 ) : -(int)floor( -x + 0.5 );
396
+ at[i].y = ( y >= 0.0 )? (int)floor( y + 0.5 ) : -(int)floor( -y + 0.5 );
397
+ at[i].z = ( z >= 0.0 )? (int)floor( z + 0.5 ) : -(int)floor( -z + 0.5 );
398
+ }
399
+ #else
400
+ /* set input coordinates */
401
+ for ( i = 0; i < num_atoms; i ++ ) {
402
+ double x = mol_data->ctab.MolAtom[i].fX;
403
+ double y = mol_data->ctab.MolAtom[i].fY;
404
+ double z = mol_data->ctab.MolAtom[i].fZ;
405
+ at[i].x = x;
406
+ at[i].y = y;
407
+ at[i].z = z;
408
+ }
409
+ #endif
410
+
411
+ exit_function:;
412
+ return num_dimensions;
413
+ }
414
+ /****************************************************************************/
415
+ long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr )
416
+ {
417
+ static char sStruct[] = "Structure #";
418
+ static char sINCHI[] = INCHI_NAME;
419
+ long lMolfileNumber = 0;
420
+ char *p, *q = NULL;
421
+ if ( pHdr ) {
422
+ if ( !memicmp( pHdr->szMoleculeName, sStruct, sizeof(sStruct)-1 ) ) {
423
+ p = pHdr->szMoleculeName + sizeof(sStruct)-1;
424
+ lMolfileNumber = strtol( p, &q, 10 );
425
+ p = pHdr->szMoleculeLine2;
426
+ if ( !q || *q ||
427
+ memicmp( p, sINCHI, sizeof(sINCHI)-1) ||
428
+ !strstr( p+sizeof(sINCHI)-1, "SDfile Output" ) ) {
429
+ lMolfileNumber = 0;
430
+ }
431
+ }
432
+ }
433
+ return lMolfileNumber;
434
+ }
435
+
436
+ /****************************************************************************/
437
+ int MolfileToInchi_Atom( FILE *inp_molfile, int bDoNotAddH, inchi_Atom **at, int max_num_at,
438
+ int *num_dimensions, int *num_bonds, const char *pSdfLabel, char *pSdfValue,
439
+ long *Id, long *lMolfileNumber, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
440
+ {
441
+ int num_atoms = 0;
442
+ MOL_DATA *mol_data = NULL;
443
+ MOL_HEADER_BLOCK OnlyHeaderBlock, *pOnlyHeaderBlock = NULL, *pHdr;
444
+ MOL_CTAB OnlyCtab, *pOnlyCtab = NULL;
445
+ char cSdfValueFirstChar = '\0';
446
+ #ifdef CML_DEBUG
447
+ FILE *f_p;
448
+ #endif
449
+ if ( at ) {
450
+ pOnlyHeaderBlock = NULL;
451
+ if ( *at && max_num_at ) {
452
+ memset( *at, 0, max_num_at * sizeof(**at) );
453
+ }
454
+ } else {
455
+ pOnlyHeaderBlock = &OnlyHeaderBlock;
456
+ pOnlyCtab = &OnlyCtab;
457
+ }
458
+ if ( pSdfValue ) {
459
+ cSdfValueFirstChar = pSdfValue[0];
460
+ pSdfValue[0] = '\0';
461
+ }
462
+
463
+ mol_data = e_read_sdfile_segment(inp_molfile, pOnlyHeaderBlock, pOnlyCtab, 0, NULL, 0, Id, pSdfLabel, pSdfValue, err, pStrErr );
464
+
465
+ pHdr = ( mol_data && !pOnlyHeaderBlock )? &mol_data->hdr :
466
+ ( !mol_data && pOnlyHeaderBlock )? pOnlyHeaderBlock : NULL;
467
+ if ( lMolfileNumber && pHdr ) {
468
+ *lMolfileNumber = GetMolfileNumber( pHdr );
469
+ }
470
+ if ( pSdfValue &&
471
+ pSdfLabel && pSdfLabel[0] && pHdr ) {
472
+ if ( !stricmp(pSdfLabel, "MOLFILENAME") ) {
473
+ e_mystrncpy( pSdfValue, pHdr->szMoleculeName, MAX_SDF_VALUE+1 );
474
+ e_LtrimRtrim( pSdfValue, NULL );
475
+ } else
476
+ if ( !stricmp(pSdfLabel, "MOLFILELINE2") ) {
477
+ e_mystrncpy( pSdfValue, pHdr->szMoleculeLine2, MAX_SDF_VALUE+1 );
478
+ e_LtrimRtrim( pSdfValue, NULL );
479
+ } else
480
+ if ( !stricmp(pSdfLabel, "MOLFILECOMMENT") ) {
481
+ e_mystrncpy( pSdfValue, pHdr->szComment, MAX_SDF_VALUE+1 );
482
+ e_LtrimRtrim( pSdfValue, NULL );
483
+ }
484
+ if ( !pSdfValue[0] ) {
485
+ pSdfValue[0] = cSdfValueFirstChar;
486
+ }
487
+ }
488
+
489
+ if ( mol_data && at && !*err ) {
490
+ /* *at points to an allocated memory */
491
+ if ( *at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
492
+ *at = mol_to_inchi_Atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
493
+ if ( *err >= 0 ) {
494
+ *num_dimensions = mol_to_inchi_Atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
495
+ }
496
+ } else
497
+ /* *at points to NULL */
498
+ if ( !*at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
499
+ *at = mol_to_inchi_Atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
500
+ if ( *err >= 0 ) {
501
+ *num_dimensions = mol_to_inchi_Atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
502
+ }
503
+ } else {
504
+ MOLFILE_ERR_SET (*err, 0, "Too many atoms");
505
+ *err = 70;
506
+ num_atoms = -1;
507
+ }
508
+ if ( *err > 0 ) {
509
+ *err += 100;
510
+ }
511
+ /* 11-16-2004: use Chiral flag */
512
+ if ( num_atoms > 0 && at && *at && mol_data && pInpAtomFlags ) {
513
+ if ( mol_data->ctab.cChiralFlag ) {
514
+ *pInpAtomFlags |= FLAG_INP_AT_CHIRAL;
515
+ } else {
516
+ *pInpAtomFlags |= FLAG_INP_AT_NONCHIRAL;
517
+ }
518
+ }
519
+ } else
520
+ if ( !at ) {
521
+ num_atoms = pOnlyCtab->nNumberOfAtoms;
522
+ }
523
+
524
+ if ( !pOnlyHeaderBlock ) {
525
+ e_delete_mol_data( mol_data );
526
+ }
527
+ #ifdef CML_DEBUG
528
+ puts ("MOL");
529
+ f_p = fopen ("mol.dbg", "a");
530
+ if (f_p)
531
+ {
532
+ PrintInpAtom (f_p, *at, num_atoms);
533
+ fclose (f_p);
534
+ }
535
+ else
536
+ {
537
+ puts ("Couldn't open file");
538
+ }
539
+ #endif
540
+
541
+ return num_atoms;
542
+ }
543
+ /**********************************************************************************/
544
+ int e_MolfileToInchi_Input( FILE *inp_molfile, inchi_Input *orig_at_data, int bMergeAllInputStructures,
545
+ int bDoNotAddH, int bAllowEmptyStructure,
546
+ const char *pSdfLabel, char *pSdfValue, long *lSdfId, long *lMolfileNumber,
547
+ INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
548
+ {
549
+ /* inp_ATOM *at = NULL; */
550
+ int num_dimensions_new;
551
+ int num_inp_bonds_new;
552
+ int num_inp_atoms_new;
553
+ inchi_Atom *at_new = NULL;
554
+ inchi_Atom *at_old = NULL;
555
+ int nNumAtoms = 0;
556
+ int i, j;
557
+
558
+ if ( pStrErr ) {
559
+ pStrErr[0] = '\0';
560
+ }
561
+
562
+ /*FreeOrigAtData( orig_at_data );*/
563
+
564
+ do {
565
+
566
+ at_old = orig_at_data? orig_at_data->atom : NULL; /* save pointer to the previous allocation */
567
+ num_inp_atoms_new =
568
+ MolfileToInchi_Atom( inp_molfile, bDoNotAddH, orig_at_data? &at_new:NULL, MAX_ATOMS,
569
+ &num_dimensions_new, &num_inp_bonds_new,
570
+ pSdfLabel, pSdfValue, lSdfId, lMolfileNumber, pInpAtomFlags, err, pStrErr );
571
+
572
+
573
+ if ( num_inp_atoms_new <= 0 && !*err ) {
574
+ if ( !bAllowEmptyStructure ) {
575
+ MOLFILE_ERR_SET (*err, 0, "Empty structure"); /* the message will be issued by the InChI library */
576
+ }
577
+ *err = 98;
578
+ } else
579
+ if ( orig_at_data && !num_inp_atoms_new && 10 < *err && *err < 20 && orig_at_data->num_atoms > 0 && bMergeAllInputStructures ) {
580
+ *err = 0; /* end of file */
581
+ break;
582
+ } else
583
+ if ( num_inp_atoms_new > 0 && orig_at_data ) {
584
+ /* merge pOrigDataTmp + orig_at_data => pOrigDataTmp; */
585
+ nNumAtoms = num_inp_atoms_new + orig_at_data->num_atoms;
586
+ if ( nNumAtoms >= MAX_ATOMS ) {
587
+ MOLFILE_ERR_SET (*err, 0, "Too many atoms");
588
+ *err = 70;
589
+ orig_at_data->num_atoms = -1;
590
+ } else
591
+ if ( !at_old ) {
592
+ /* the first structure */
593
+ orig_at_data->atom = at_new;
594
+ at_new = NULL;
595
+ orig_at_data->num_atoms = num_inp_atoms_new;
596
+ } else
597
+ if ( orig_at_data->atom = e_CreateInchi_Atom( nNumAtoms ) ) {
598
+ /* switch at_new <--> orig_at_data->at; */
599
+ if ( orig_at_data->num_atoms ) {
600
+ memcpy( orig_at_data->atom, at_old, orig_at_data->num_atoms * sizeof(orig_at_data->atom[0]) );
601
+ /* adjust numbering in the newly read structure */
602
+ for ( i = 0; i < num_inp_atoms_new; i ++ ) {
603
+ for ( j = 0; j < at_new[i].num_bonds; j ++ ) {
604
+ at_new[i].neighbor[j] += orig_at_data->num_atoms;
605
+ }
606
+ }
607
+ }
608
+ e_FreeInchi_Atom( &at_old );
609
+ /* copy newly read structure */
610
+ memcpy( orig_at_data->atom + orig_at_data->num_atoms,
611
+ at_new,
612
+ num_inp_atoms_new * sizeof(orig_at_data->atom[0]) );
613
+ /* add other things */
614
+ orig_at_data->num_atoms += num_inp_atoms_new;
615
+ } else {
616
+ MOLFILE_ERR_SET (*err, 0, "Out of RAM");
617
+ *err = -1;
618
+ }
619
+ } else
620
+ if ( num_inp_atoms_new > 0 ) {
621
+ nNumAtoms += num_inp_atoms_new;
622
+ }
623
+ e_FreeInchi_Atom( &at_new );
624
+
625
+ } while ( !*err && bMergeAllInputStructures );
626
+ /*
627
+ if ( !*err ) {
628
+ orig_at_data->num_components =
629
+ MarkDisconnectedComponents( orig_at_data );
630
+ if ( orig_at_data->num_components == 0 ) {
631
+ MOLFILE_ERR_SET (*err, 0, "No components found");
632
+ *err = 99;
633
+ }
634
+ if ( orig_at_data->num_components < 0 ) {
635
+ MOLFILE_ERR_SET (*err, 0, "Too many components");
636
+ *err = 99;
637
+ }
638
+ }
639
+ */
640
+ e_FreeInchi_Atom( &at_new );
641
+ if ( *err ) {
642
+ e_FreeInchi_Input( orig_at_data );
643
+ }
644
+ if ( *err && !(10 < *err && *err < 20) && pStrErr && !pStrErr[0] ) {
645
+ MOLFILE_ERR_SET (*err, 0, "Unknown error"); /* <BRKPT> */
646
+ }
647
+ return orig_at_data? orig_at_data->num_atoms : nNumAtoms;
648
+ }
649
+