rino 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/README +44 -0
  2. data/Rakefile +123 -0
  3. data/ext/extconf.rb +26 -0
  4. data/ext/ruby_inchi_main.so +0 -0
  5. data/ext/src/aux2atom.h +2786 -0
  6. data/ext/src/comdef.h +148 -0
  7. data/ext/src/e_0dstereo.c +3014 -0
  8. data/ext/src/e_0dstereo.h +31 -0
  9. data/ext/src/e_comdef.h +57 -0
  10. data/ext/src/e_ctl_data.h +147 -0
  11. data/ext/src/e_ichi_io.c +498 -0
  12. data/ext/src/e_ichi_io.h +40 -0
  13. data/ext/src/e_ichi_parms.c +37 -0
  14. data/ext/src/e_ichi_parms.h +41 -0
  15. data/ext/src/e_ichicomp.h +50 -0
  16. data/ext/src/e_ichierr.h +40 -0
  17. data/ext/src/e_ichimain.c +593 -0
  18. data/ext/src/e_ichisize.h +43 -0
  19. data/ext/src/e_inchi_atom.c +75 -0
  20. data/ext/src/e_inchi_atom.h +33 -0
  21. data/ext/src/e_inpdef.h +41 -0
  22. data/ext/src/e_mode.h +706 -0
  23. data/ext/src/e_mol2atom.c +649 -0
  24. data/ext/src/e_readinch.c +58 -0
  25. data/ext/src/e_readmol.c +54 -0
  26. data/ext/src/e_readmol.h +180 -0
  27. data/ext/src/e_readstru.c +251 -0
  28. data/ext/src/e_readstru.h +33 -0
  29. data/ext/src/e_util.c +284 -0
  30. data/ext/src/e_util.h +61 -0
  31. data/ext/src/extr_ct.h +251 -0
  32. data/ext/src/ichi.h +206 -0
  33. data/ext/src/ichi_bns.c +7999 -0
  34. data/ext/src/ichi_bns.h +231 -0
  35. data/ext/src/ichican2.c +5000 -0
  36. data/ext/src/ichicano.c +2195 -0
  37. data/ext/src/ichicano.h +49 -0
  38. data/ext/src/ichicans.c +1625 -0
  39. data/ext/src/ichicant.h +379 -0
  40. data/ext/src/ichicomn.h +260 -0
  41. data/ext/src/ichicomp.h +50 -0
  42. data/ext/src/ichidrp.h +119 -0
  43. data/ext/src/ichierr.h +124 -0
  44. data/ext/src/ichiisot.c +101 -0
  45. data/ext/src/ichilnct.c +286 -0
  46. data/ext/src/ichimain.h +132 -0
  47. data/ext/src/ichimak2.c +1189 -0
  48. data/ext/src/ichimake.c +3812 -0
  49. data/ext/src/ichimake.h +205 -0
  50. data/ext/src/ichimap1.c +851 -0
  51. data/ext/src/ichimap2.c +2856 -0
  52. data/ext/src/ichimap4.c +1609 -0
  53. data/ext/src/ichinorm.c +741 -0
  54. data/ext/src/ichinorm.h +67 -0
  55. data/ext/src/ichiparm.c +45 -0
  56. data/ext/src/ichiparm.h +1441 -0
  57. data/ext/src/ichiprt1.c +3612 -0
  58. data/ext/src/ichiprt2.c +1511 -0
  59. data/ext/src/ichiprt3.c +3011 -0
  60. data/ext/src/ichiqueu.c +1003 -0
  61. data/ext/src/ichiring.c +326 -0
  62. data/ext/src/ichiring.h +49 -0
  63. data/ext/src/ichisize.h +35 -0
  64. data/ext/src/ichisort.c +539 -0
  65. data/ext/src/ichister.c +3538 -0
  66. data/ext/src/ichister.h +35 -0
  67. data/ext/src/ichitaut.c +3843 -0
  68. data/ext/src/ichitaut.h +387 -0
  69. data/ext/src/ichitime.h +74 -0
  70. data/ext/src/inchi_api.h +670 -0
  71. data/ext/src/inchi_dll.c +1480 -0
  72. data/ext/src/inchi_dll.h +34 -0
  73. data/ext/src/inchi_dll_main.c +23 -0
  74. data/ext/src/inchi_dll_main.h +31 -0
  75. data/ext/src/inpdef.h +328 -0
  76. data/ext/src/lreadmol.h +1246 -0
  77. data/ext/src/mode.h +706 -0
  78. data/ext/src/ruby_inchi_main.c +558 -0
  79. data/ext/src/runichi.c +4179 -0
  80. data/ext/src/strutil.c +3861 -0
  81. data/ext/src/strutil.h +182 -0
  82. data/ext/src/util.c +1130 -0
  83. data/ext/src/util.h +85 -0
  84. data/lib/clean_tempfile.rb +220 -0
  85. data/lib/rino.rb +111 -0
  86. data/test/test.rb +386 -0
  87. metadata +130 -0
@@ -0,0 +1,649 @@
1
+ /*
2
+ * International Union of Pure and Applied Chemistry (IUPAC)
3
+ * International Chemical Identifier (InChI)
4
+ * Version 1
5
+ * Software version 1.00
6
+ * April 13, 2005
7
+ * Developed at NIST
8
+ */
9
+
10
+ #include <stdio.h>
11
+ #include <stdlib.h>
12
+ #include <string.h>
13
+ #include <ctype.h>
14
+ #include <stdarg.h>
15
+ #include <errno.h>
16
+ #include <limits.h>
17
+ #include <float.h>
18
+ #include <math.h>
19
+
20
+ #include "e_mode.h"
21
+ #include "inchi_api.h"
22
+ #include "e_ctl_data.h"
23
+
24
+ #include "e_comdef.h"
25
+ #include "e_util.h"
26
+ #include "e_ichicomp.h"
27
+
28
+ #include "e_readmol.h"
29
+ #include "e_inpdef.h"
30
+
31
+ #if( ADD_CMLPP == 1 )
32
+ #include "e_readcml.hpp"
33
+ #endif
34
+
35
+ #include "e_inchi_atom.h"
36
+
37
+
38
+ #define MIN_STDATA_X_COORD 0.0
39
+ #define MAX_STDATA_X_COORD 256.0
40
+ #define MIN_STDATA_Y_COORD 0.0
41
+ #define MAX_STDATA_Y_COORD 256.0
42
+ #define MIN_STDATA_Z_COORD 0.0
43
+ #define MAX_STDATA_Z_COORD 256.0
44
+ #define MAX_STDATA_AVE_BOND_LENGTH 20.0
45
+ #define MIN_STDATA_AVE_BOND_LENGTH 10.0
46
+
47
+
48
+ /* local prototypes */
49
+ inchi_Atom* mol_to_inchi_Atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inchi_Atom* at_inp,
50
+ int bDoNotAddH, int *err, char *pStrErr );
51
+ int mol_to_inchi_Atom_xyz( MOL_DATA* mol_data, int num_atoms, inchi_Atom* at, int *err, char *pStrErr );
52
+
53
+ int MolfileToInchi_Atom( FILE *inp_molfile, int bDoNotAddH, inchi_Atom **at, int max_num_at,
54
+ int *num_dimensions, int *num_bonds, const char *pSdfLabel, char *pSdfValue,
55
+ long *Id, long *lMolfileNumber, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr );
56
+ long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr );
57
+
58
+
59
+ /* too long ave. bond length prevents structure from displaying */
60
+ /* According to Steve, a standard bond length is 10. 9-24-97 DCh */
61
+ /* Ave. bond length in MainLib is 20. Also fixed Average */
62
+ /* bond length calculation by introducing num_avg_bonds */
63
+ /* in mol_to_stdata(). 12-9-99 DCh. */
64
+
65
+ /******************************************************************************************************/
66
+ inchi_Atom* mol_to_inchi_Atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inchi_Atom* at_inp,
67
+ int bDoNotAddH, int *err, char *pStrErr )
68
+ {
69
+ inchi_Atom *at = NULL;
70
+ /* char *bond_stereo = NULL; */
71
+ AT_NUM *p1, *p2;
72
+ int i, a1, a2, n1, n2, bonds;
73
+ S_CHAR cBondStereo, cBondType;
74
+ S_CHAR cStereo1, cStereo2;
75
+
76
+ *err = 0;
77
+ *num_atoms = *num_bonds = 0;
78
+ /* check if MOLfile contains atoms */
79
+ if ( !mol_data || !mol_data->ctab.MolAtom ||
80
+ 0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
81
+ 0 >= (*num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
82
+ /* MOLFILE_ERR_SET (*err, 0, "Empty structure"); */
83
+ goto exit_function; /* no structure */
84
+ }
85
+ /* allocate memory if necessary */
86
+ if ( at_inp ) {
87
+ at = at_inp;
88
+ } else
89
+ if ( !(at = e_CreateInchi_Atom( *num_atoms ) ) ) {
90
+ *err = -1;
91
+ MOLFILE_ERR_FIN (*err, -1, exit_function, "Out of RAM");
92
+ }
93
+
94
+ /* copy atom info */
95
+ for ( i = 0; i < *num_atoms; i ++ ) {
96
+ e_mystrncpy( at[i].elname, mol_data->ctab.MolAtom[i].szAtomSymbol, sizeof(at->elname) );
97
+ /* at[i].chem_bonds_valence = mol_data->ctab.MolAtom[i].cValence; */ /* MOLfile valence; will change */
98
+ at[i].isotopic_mass = mol_data->ctab.MolAtom[i].cMassDifference;
99
+ at[i].charge = mol_data->ctab.MolAtom[i].cCharge;
100
+ at[i].radical = mol_data->ctab.MolAtom[i].cRadical;
101
+ /* coordinates are copied in mol_to_inchi_Atom_xyz() */
102
+ #if( SINGLET_IS_TRIPLET == 1 )
103
+ if ( at[i].radical == RADICAL_SINGLET ) {
104
+ at[i].radical = RADICAL_TRIPLET;
105
+ }
106
+ #endif
107
+ /* removed parsing at[i].elname to extract H, charge, radical from the
108
+ Molfile alias record now this is done in the INChI dll */
109
+ }
110
+
111
+ /* copy bond info */
112
+ for ( i = 0, bonds = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
113
+ cBondStereo = mol_data->ctab.MolBond[i].cBondStereo;
114
+ cBondType = mol_data->ctab.MolBond[i].cBondType;
115
+ a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
116
+ a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
117
+
118
+ if ( a1 < 0 || a1 >= *num_atoms ||
119
+ a2 < 0 || a2 >= *num_atoms ||
120
+ a1 == a2 ) {
121
+ *err |= 1; /* bond for impossible atom number(s); ignored */
122
+ MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
123
+ continue;
124
+ }
125
+ /* check for multiple bonds between same atoms */
126
+ p1 = e_is_in_the_slist( at[a1].neighbor, (AT_NUM)a2, at[a1].num_bonds );
127
+ p2 = e_is_in_the_slist( at[a2].neighbor, (AT_NUM)a1, at[a2].num_bonds );
128
+ if ( (p1 || p2) && (p1 || at[a1].num_bonds < MAXVAL) && (p2 || at[a2].num_bonds < MAXVAL) ) {
129
+ n1 = p1? (p1 - at[a1].neighbor) : at[a1].num_bonds ++;
130
+ n2 = p2? (p2 - at[a2].neighbor) : at[a2].num_bonds ++;
131
+ MOLFILE_ERR_SET (*err, 0, "Multiple bonds between two atoms");
132
+ *err |= 2; /* multiple bonds between atoms */
133
+ } else
134
+ if ( !p1 && !p2 && at[a1].num_bonds < MAXVAL && at[a2].num_bonds < MAXVAL ) {
135
+ n1 = at[a1].num_bonds ++;
136
+ n2 = at[a2].num_bonds ++;
137
+ bonds ++;
138
+ } else {
139
+ char szMsg[64];
140
+ *err |= 4; /* too large number of bonds. Some bonds ignored. */
141
+ sprintf( szMsg, "Atom '%s' has more than %d bonds",
142
+ at[a1].num_bonds>= MAXVAL? at[a1].elname:at[a2].elname, MAXVAL );
143
+ MOLFILE_ERR_SET (*err, 0, szMsg);
144
+ continue;
145
+ }
146
+ if ( cBondType < MIN_INPUT_BOND_TYPE || cBondType > MAX_INPUT_BOND_TYPE ) {
147
+ char szBondType[16];
148
+ sprintf( szBondType, "%d", cBondType );
149
+ cBondType = 1;
150
+ MOLFILE_ERR_SET (*err, 0, "Unrecognized bond type:");
151
+ MOLFILE_ERR_SET (*err, 0, szBondType);
152
+ *err |= 8; /* Unrecognized Bond type replaced with single bond */
153
+ }
154
+ /* bond type */
155
+ at[a1].bond_type[n1] =
156
+ at[a2].bond_type[n2] = cBondType;
157
+ /* connection */
158
+ at[a1].neighbor[n1] = (AT_NUM)a2;
159
+ at[a2].neighbor[n2] = (AT_NUM)a1;
160
+ /* stereo */
161
+ switch ( cBondStereo ) {
162
+ case INPUT_STEREO_DBLE_EITHER: /* 3 */
163
+ cStereo1 = INCHI_BOND_STEREO_DOUBLE_EITHER;
164
+ cStereo2 = INCHI_BOND_STEREO_DOUBLE_EITHER;
165
+ break;
166
+ case INPUT_STEREO_SNGL_UP: /* 1 */
167
+ cStereo1 = INCHI_BOND_STEREO_SINGLE_1UP;
168
+ cStereo2 = INCHI_BOND_STEREO_SINGLE_2UP;
169
+ break;
170
+ case INPUT_STEREO_SNGL_EITHER: /* 4 */
171
+ cStereo1 = INCHI_BOND_STEREO_SINGLE_1EITHER;
172
+ cStereo2 = INCHI_BOND_STEREO_SINGLE_2EITHER;
173
+ break;
174
+ case INPUT_STEREO_SNGL_DOWN: /* 6 */
175
+ cStereo1 = INCHI_BOND_STEREO_SINGLE_1DOWN;
176
+ cStereo2 = INCHI_BOND_STEREO_SINGLE_2DOWN;
177
+ break;
178
+ case 0:
179
+ cStereo1 = INCHI_BOND_STEREO_NONE;
180
+ cStereo2 = INCHI_BOND_STEREO_NONE;
181
+ break;
182
+ default:
183
+ *err |= 16; /* Ignored unrecognized Bond stereo */
184
+ MOLFILE_ERR_SET (*err, 0, "Unrecognized bond stereo");
185
+ continue;
186
+ }
187
+ at[a1].bond_stereo[n1] = cStereo1; /* >0: the wedge (pointed) end is at this atom */
188
+ at[a2].bond_stereo[n2] = cStereo2; /* <0: the wedge (pointed) end is at the opposite atom */
189
+ }
190
+ *num_bonds = bonds;
191
+
192
+ /* special Molfile valences */
193
+ for ( a1 = 0; a1 < *num_atoms; a1 ++ ) {
194
+ int num_bond_type[MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE + 1], bond_type;
195
+ int chem_bonds_valence, valence;
196
+ if ( mol_data->ctab.MolAtom[a1].cValence &&
197
+ (mol_data->ctab.MolAtom[a1].cValence != 15 || at[a1].num_bonds) ) {
198
+ /* Molfile contains special valence => calculate number of H */
199
+ memset( num_bond_type, 0, sizeof(num_bond_type) );
200
+ valence = mol_data->ctab.MolAtom[a1].cValence; /* save atom valence if available */
201
+ for ( n1 = 0; n1 < at[a1].num_bonds; n1 ++ ) {
202
+ bond_type = at[a1].bond_type[n1] - MIN_INPUT_BOND_TYPE;
203
+ if ( bond_type < 0 || bond_type > MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE ) {
204
+ bond_type = 0;
205
+ MOLFILE_ERR_SET (*err, 0, "Unknown bond type in MOLfile assigned as a single bond");
206
+ }
207
+ num_bond_type[ bond_type ] ++;
208
+ }
209
+ chem_bonds_valence = 0;
210
+ for ( n1 = 0; MIN_INPUT_BOND_TYPE + n1 <= 3 && MIN_INPUT_BOND_TYPE + n1 <= MAX_INPUT_BOND_TYPE; n1 ++ ) {
211
+ chem_bonds_valence += (MIN_INPUT_BOND_TYPE + n1) * num_bond_type[n1];
212
+ }
213
+ if ( MIN_INPUT_BOND_TYPE <= INCHI_BOND_TYPE_ALTERN && INCHI_BOND_TYPE_ALTERN <= MAX_INPUT_BOND_TYPE &&
214
+ ( n2 = num_bond_type[INCHI_BOND_TYPE_ALTERN-MIN_INPUT_BOND_TYPE] ) ) {
215
+ /* accept input aromatic bonds for now */
216
+ switch ( n2 ) {
217
+ case 2:
218
+ chem_bonds_valence += 3; /* =A- */
219
+ break;
220
+ case 3:
221
+ chem_bonds_valence += 4; /* =A< */
222
+ break;
223
+ default:
224
+ /* if 1 or >= 4 aromatic bonds then replace such bonds with single bonds */
225
+ for ( n1 = 0; n1 < at[a1].num_bonds; n1 ++ ) {
226
+ if ( at[a1].bond_type[n1] == INCHI_BOND_TYPE_ALTERN ) {
227
+ a2 = at[a1].neighbor[n1];
228
+ p1 = e_is_in_the_slist( at[a2].neighbor, (AT_NUM)a1, at[a2].num_bonds );
229
+ if ( p1 ) {
230
+ at[a1].bond_type[n1] =
231
+ at[a2].bond_type[p1-at[a2].neighbor] = INCHI_BOND_TYPE_SINGLE;
232
+ } else {
233
+ *err = -2; /* Program error */
234
+ MOLFILE_ERR_SET (*err, 0, "Program error interpreting MOLfile");
235
+ goto exit_function; /* no structure */
236
+ }
237
+ }
238
+ }
239
+ chem_bonds_valence += n2;
240
+ *err |= 32;
241
+ MOLFILE_ERR_SET (*err, 0, "Atom has more than 3 aromatic bonds");
242
+ break;
243
+ }
244
+ }
245
+ /*************************************************************************************
246
+ *
247
+ * Set number of hydrogen atoms
248
+ */
249
+ if ( valence >= chem_bonds_valence ) {
250
+ at[a1].num_iso_H[0] = valence - chem_bonds_valence;
251
+ }
252
+ } else
253
+ if ( mol_data->ctab.MolAtom[a1].cAtomAliasedFlag ) {
254
+ at[a1].num_iso_H[0] = 0;
255
+ } else
256
+ if ( mol_data->ctab.MolAtom[a1].cValence == 15 && !at[a1].num_bonds ) {
257
+ at[a1].num_iso_H[0] = 0;
258
+ } else
259
+ if ( !bDoNotAddH ) {
260
+ at[a1].num_iso_H[0] = -1;
261
+ }
262
+ }
263
+
264
+ exit_function:;
265
+ return at;
266
+ }
267
+ /******************************************************************************************************/
268
+ int mol_to_inchi_Atom_xyz( MOL_DATA* mol_data, int num_atoms, inchi_Atom* at, int *err, char *pStrErr )
269
+ {
270
+ int i, num_dimensions=0;
271
+ int num_bonds;
272
+ double max_x=-1.0e32, max_y=-1.0e32, max_z=-1.0e32;
273
+ double min_x= 1.0e32, min_y= 1.0e32, min_z= 1.0e32;
274
+ double macheps = 1.0e-10, small_coeff = 0.00001;
275
+ double x_coeff, y_coeff, z_coeff, coeff, average_bond_length;
276
+
277
+ /* *err = 0; */
278
+ /* check if MOLfile contains atoms */
279
+ if ( !mol_data || !mol_data->ctab.MolAtom ||
280
+ 0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
281
+ 0 >= (num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
282
+ goto exit_function; /* no structure */
283
+ }
284
+ /* copy atom info */
285
+ for ( i = 0; i < num_atoms; i ++ ) {
286
+ max_x = inchi_max(mol_data->ctab.MolAtom[i].fX, max_x);
287
+ min_x = inchi_min(mol_data->ctab.MolAtom[i].fX, min_x);
288
+ max_y = inchi_max(mol_data->ctab.MolAtom[i].fY, max_y);
289
+ min_y = inchi_min(mol_data->ctab.MolAtom[i].fY, min_y);
290
+ max_z = inchi_max(mol_data->ctab.MolAtom[i].fZ, max_z);
291
+ min_z = inchi_min(mol_data->ctab.MolAtom[i].fZ, min_z);
292
+ }
293
+
294
+ /* copy bond info */
295
+ num_bonds = 0;
296
+ average_bond_length = 0.0;
297
+ for ( i = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
298
+ int a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
299
+ int a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
300
+ double dx = mol_data->ctab.MolAtom[a1].fX-mol_data->ctab.MolAtom[a2].fX;
301
+ double dy = mol_data->ctab.MolAtom[a1].fY-mol_data->ctab.MolAtom[a2].fY;
302
+ double dz = mol_data->ctab.MolAtom[a1].fZ-mol_data->ctab.MolAtom[a2].fZ;
303
+
304
+ if ( a1 < 0 || a1 >= num_atoms ||
305
+ a2 < 0 || a2 >= num_atoms ||
306
+ a1 == a2 ) {
307
+ *err |= 1; /* bond for impossible atom number(s); ignored */
308
+ MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
309
+ continue;
310
+ }
311
+ average_bond_length += sqrt( dx*dx + dy*dy + dz*dz );
312
+ num_bonds ++;
313
+ }
314
+
315
+ /* convert to integral coordinates */
316
+
317
+ if ( max_x - min_x <= small_coeff*(fabs(max_x) + fabs(min_x)) )
318
+ x_coeff = 0.0;
319
+ else
320
+ x_coeff = (MAX_STDATA_X_COORD - MIN_STDATA_X_COORD)/(max_x - min_x);
321
+
322
+ if ( max_y - min_y <= small_coeff*(fabs(max_y) + fabs(min_y)) )
323
+ y_coeff = 0.0;
324
+ else
325
+ y_coeff = (MAX_STDATA_Y_COORD - MIN_STDATA_Y_COORD)/(max_y - min_y);
326
+ if ( max_z - min_z <= small_coeff*(fabs(max_z) + fabs(min_z)) )
327
+ z_coeff = 0.0;
328
+ else
329
+ z_coeff = (MAX_STDATA_Z_COORD - MIN_STDATA_Z_COORD)/(max_z - min_z);
330
+
331
+ num_dimensions = ((x_coeff > macheps || y_coeff >macheps ) && fabs(z_coeff) < macheps)? 2:
332
+ (fabs(z_coeff) > macheps)? 3: 0;
333
+
334
+ switch ( num_dimensions ) {
335
+ case 0:
336
+ coeff = 0.0;
337
+ break;
338
+ case 2:
339
+ /* choose the smallest stretching coefficient */
340
+ if ( x_coeff > macheps && y_coeff > macheps ) {
341
+ coeff = inchi_min( x_coeff, y_coeff );
342
+ }else
343
+ if ( x_coeff > macheps ){
344
+ coeff = x_coeff;
345
+ }else
346
+ if ( y_coeff > macheps ){
347
+ coeff = y_coeff;
348
+ }else{
349
+ coeff = 1.0;
350
+ }
351
+ break;
352
+ case 3:
353
+ /* choose the smallest stretching coefficient */
354
+ if ( x_coeff > macheps && y_coeff > macheps ) {
355
+ coeff = inchi_min( x_coeff, y_coeff );
356
+ coeff = inchi_min( coeff, z_coeff );
357
+ }else
358
+ if ( x_coeff > macheps ){
359
+ coeff = inchi_min( x_coeff, z_coeff );
360
+ }else
361
+ if ( y_coeff > macheps ){
362
+ coeff = inchi_min( y_coeff, z_coeff );
363
+ }else{
364
+ coeff = z_coeff;
365
+ }
366
+ break;
367
+ default:
368
+ coeff = 0.0;
369
+ }
370
+
371
+ if ( num_bonds > 0 ) {
372
+ average_bond_length /= (double)num_bonds;
373
+ if ( average_bond_length * coeff > MAX_STDATA_AVE_BOND_LENGTH ) {
374
+ coeff = MAX_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too long bonds */
375
+ } else
376
+ if ( average_bond_length * coeff < macheps ) {
377
+ coeff = 1.0; /* all lengths are of zero length */
378
+ } else
379
+ if ( average_bond_length * coeff < MIN_STDATA_AVE_BOND_LENGTH ) {
380
+ coeff = MIN_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too short bonds */
381
+ }
382
+ }
383
+ #if( NORMALIZE_INP_COORD == 1 )
384
+ /* set integral coordinates */
385
+ for ( i = 0; i < num_atoms; i ++ ) {
386
+ double x = mol_data->ctab.MolAtom[i].fX;
387
+ double y = mol_data->ctab.MolAtom[i].fY;
388
+ double z = mol_data->ctab.MolAtom[i].fZ;
389
+ x = (x - min_x)*coeff + MIN_STDATA_X_COORD;
390
+ y = (y - min_y)*coeff + MIN_STDATA_Y_COORD;
391
+ z = (z - min_z)*coeff + MIN_STDATA_Z_COORD;
392
+ /* floor() behavior is not well defined for negative arguments.
393
+ * Use positive arguments only to get nearest integer.
394
+ */
395
+ at[i].x = ( x >= 0.0 )? (int)floor( x + 0.5 ) : -(int)floor( -x + 0.5 );
396
+ at[i].y = ( y >= 0.0 )? (int)floor( y + 0.5 ) : -(int)floor( -y + 0.5 );
397
+ at[i].z = ( z >= 0.0 )? (int)floor( z + 0.5 ) : -(int)floor( -z + 0.5 );
398
+ }
399
+ #else
400
+ /* set input coordinates */
401
+ for ( i = 0; i < num_atoms; i ++ ) {
402
+ double x = mol_data->ctab.MolAtom[i].fX;
403
+ double y = mol_data->ctab.MolAtom[i].fY;
404
+ double z = mol_data->ctab.MolAtom[i].fZ;
405
+ at[i].x = x;
406
+ at[i].y = y;
407
+ at[i].z = z;
408
+ }
409
+ #endif
410
+
411
+ exit_function:;
412
+ return num_dimensions;
413
+ }
414
+ /****************************************************************************/
415
+ long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr )
416
+ {
417
+ static char sStruct[] = "Structure #";
418
+ static char sINCHI[] = INCHI_NAME;
419
+ long lMolfileNumber = 0;
420
+ char *p, *q = NULL;
421
+ if ( pHdr ) {
422
+ if ( !memicmp( pHdr->szMoleculeName, sStruct, sizeof(sStruct)-1 ) ) {
423
+ p = pHdr->szMoleculeName + sizeof(sStruct)-1;
424
+ lMolfileNumber = strtol( p, &q, 10 );
425
+ p = pHdr->szMoleculeLine2;
426
+ if ( !q || *q ||
427
+ memicmp( p, sINCHI, sizeof(sINCHI)-1) ||
428
+ !strstr( p+sizeof(sINCHI)-1, "SDfile Output" ) ) {
429
+ lMolfileNumber = 0;
430
+ }
431
+ }
432
+ }
433
+ return lMolfileNumber;
434
+ }
435
+
436
+ /****************************************************************************/
437
+ int MolfileToInchi_Atom( FILE *inp_molfile, int bDoNotAddH, inchi_Atom **at, int max_num_at,
438
+ int *num_dimensions, int *num_bonds, const char *pSdfLabel, char *pSdfValue,
439
+ long *Id, long *lMolfileNumber, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
440
+ {
441
+ int num_atoms = 0;
442
+ MOL_DATA *mol_data = NULL;
443
+ MOL_HEADER_BLOCK OnlyHeaderBlock, *pOnlyHeaderBlock = NULL, *pHdr;
444
+ MOL_CTAB OnlyCtab, *pOnlyCtab = NULL;
445
+ char cSdfValueFirstChar = '\0';
446
+ #ifdef CML_DEBUG
447
+ FILE *f_p;
448
+ #endif
449
+ if ( at ) {
450
+ pOnlyHeaderBlock = NULL;
451
+ if ( *at && max_num_at ) {
452
+ memset( *at, 0, max_num_at * sizeof(**at) );
453
+ }
454
+ } else {
455
+ pOnlyHeaderBlock = &OnlyHeaderBlock;
456
+ pOnlyCtab = &OnlyCtab;
457
+ }
458
+ if ( pSdfValue ) {
459
+ cSdfValueFirstChar = pSdfValue[0];
460
+ pSdfValue[0] = '\0';
461
+ }
462
+
463
+ mol_data = e_read_sdfile_segment(inp_molfile, pOnlyHeaderBlock, pOnlyCtab, 0, NULL, 0, Id, pSdfLabel, pSdfValue, err, pStrErr );
464
+
465
+ pHdr = ( mol_data && !pOnlyHeaderBlock )? &mol_data->hdr :
466
+ ( !mol_data && pOnlyHeaderBlock )? pOnlyHeaderBlock : NULL;
467
+ if ( lMolfileNumber && pHdr ) {
468
+ *lMolfileNumber = GetMolfileNumber( pHdr );
469
+ }
470
+ if ( pSdfValue &&
471
+ pSdfLabel && pSdfLabel[0] && pHdr ) {
472
+ if ( !stricmp(pSdfLabel, "MOLFILENAME") ) {
473
+ e_mystrncpy( pSdfValue, pHdr->szMoleculeName, MAX_SDF_VALUE+1 );
474
+ e_LtrimRtrim( pSdfValue, NULL );
475
+ } else
476
+ if ( !stricmp(pSdfLabel, "MOLFILELINE2") ) {
477
+ e_mystrncpy( pSdfValue, pHdr->szMoleculeLine2, MAX_SDF_VALUE+1 );
478
+ e_LtrimRtrim( pSdfValue, NULL );
479
+ } else
480
+ if ( !stricmp(pSdfLabel, "MOLFILECOMMENT") ) {
481
+ e_mystrncpy( pSdfValue, pHdr->szComment, MAX_SDF_VALUE+1 );
482
+ e_LtrimRtrim( pSdfValue, NULL );
483
+ }
484
+ if ( !pSdfValue[0] ) {
485
+ pSdfValue[0] = cSdfValueFirstChar;
486
+ }
487
+ }
488
+
489
+ if ( mol_data && at && !*err ) {
490
+ /* *at points to an allocated memory */
491
+ if ( *at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
492
+ *at = mol_to_inchi_Atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
493
+ if ( *err >= 0 ) {
494
+ *num_dimensions = mol_to_inchi_Atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
495
+ }
496
+ } else
497
+ /* *at points to NULL */
498
+ if ( !*at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
499
+ *at = mol_to_inchi_Atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
500
+ if ( *err >= 0 ) {
501
+ *num_dimensions = mol_to_inchi_Atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
502
+ }
503
+ } else {
504
+ MOLFILE_ERR_SET (*err, 0, "Too many atoms");
505
+ *err = 70;
506
+ num_atoms = -1;
507
+ }
508
+ if ( *err > 0 ) {
509
+ *err += 100;
510
+ }
511
+ /* 11-16-2004: use Chiral flag */
512
+ if ( num_atoms > 0 && at && *at && mol_data && pInpAtomFlags ) {
513
+ if ( mol_data->ctab.cChiralFlag ) {
514
+ *pInpAtomFlags |= FLAG_INP_AT_CHIRAL;
515
+ } else {
516
+ *pInpAtomFlags |= FLAG_INP_AT_NONCHIRAL;
517
+ }
518
+ }
519
+ } else
520
+ if ( !at ) {
521
+ num_atoms = pOnlyCtab->nNumberOfAtoms;
522
+ }
523
+
524
+ if ( !pOnlyHeaderBlock ) {
525
+ e_delete_mol_data( mol_data );
526
+ }
527
+ #ifdef CML_DEBUG
528
+ puts ("MOL");
529
+ f_p = fopen ("mol.dbg", "a");
530
+ if (f_p)
531
+ {
532
+ PrintInpAtom (f_p, *at, num_atoms);
533
+ fclose (f_p);
534
+ }
535
+ else
536
+ {
537
+ puts ("Couldn't open file");
538
+ }
539
+ #endif
540
+
541
+ return num_atoms;
542
+ }
543
+ /**********************************************************************************/
544
+ int e_MolfileToInchi_Input( FILE *inp_molfile, inchi_Input *orig_at_data, int bMergeAllInputStructures,
545
+ int bDoNotAddH, int bAllowEmptyStructure,
546
+ const char *pSdfLabel, char *pSdfValue, long *lSdfId, long *lMolfileNumber,
547
+ INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
548
+ {
549
+ /* inp_ATOM *at = NULL; */
550
+ int num_dimensions_new;
551
+ int num_inp_bonds_new;
552
+ int num_inp_atoms_new;
553
+ inchi_Atom *at_new = NULL;
554
+ inchi_Atom *at_old = NULL;
555
+ int nNumAtoms = 0;
556
+ int i, j;
557
+
558
+ if ( pStrErr ) {
559
+ pStrErr[0] = '\0';
560
+ }
561
+
562
+ /*FreeOrigAtData( orig_at_data );*/
563
+
564
+ do {
565
+
566
+ at_old = orig_at_data? orig_at_data->atom : NULL; /* save pointer to the previous allocation */
567
+ num_inp_atoms_new =
568
+ MolfileToInchi_Atom( inp_molfile, bDoNotAddH, orig_at_data? &at_new:NULL, MAX_ATOMS,
569
+ &num_dimensions_new, &num_inp_bonds_new,
570
+ pSdfLabel, pSdfValue, lSdfId, lMolfileNumber, pInpAtomFlags, err, pStrErr );
571
+
572
+
573
+ if ( num_inp_atoms_new <= 0 && !*err ) {
574
+ if ( !bAllowEmptyStructure ) {
575
+ MOLFILE_ERR_SET (*err, 0, "Empty structure"); /* the message will be issued by the InChI library */
576
+ }
577
+ *err = 98;
578
+ } else
579
+ if ( orig_at_data && !num_inp_atoms_new && 10 < *err && *err < 20 && orig_at_data->num_atoms > 0 && bMergeAllInputStructures ) {
580
+ *err = 0; /* end of file */
581
+ break;
582
+ } else
583
+ if ( num_inp_atoms_new > 0 && orig_at_data ) {
584
+ /* merge pOrigDataTmp + orig_at_data => pOrigDataTmp; */
585
+ nNumAtoms = num_inp_atoms_new + orig_at_data->num_atoms;
586
+ if ( nNumAtoms >= MAX_ATOMS ) {
587
+ MOLFILE_ERR_SET (*err, 0, "Too many atoms");
588
+ *err = 70;
589
+ orig_at_data->num_atoms = -1;
590
+ } else
591
+ if ( !at_old ) {
592
+ /* the first structure */
593
+ orig_at_data->atom = at_new;
594
+ at_new = NULL;
595
+ orig_at_data->num_atoms = num_inp_atoms_new;
596
+ } else
597
+ if ( orig_at_data->atom = e_CreateInchi_Atom( nNumAtoms ) ) {
598
+ /* switch at_new <--> orig_at_data->at; */
599
+ if ( orig_at_data->num_atoms ) {
600
+ memcpy( orig_at_data->atom, at_old, orig_at_data->num_atoms * sizeof(orig_at_data->atom[0]) );
601
+ /* adjust numbering in the newly read structure */
602
+ for ( i = 0; i < num_inp_atoms_new; i ++ ) {
603
+ for ( j = 0; j < at_new[i].num_bonds; j ++ ) {
604
+ at_new[i].neighbor[j] += orig_at_data->num_atoms;
605
+ }
606
+ }
607
+ }
608
+ e_FreeInchi_Atom( &at_old );
609
+ /* copy newly read structure */
610
+ memcpy( orig_at_data->atom + orig_at_data->num_atoms,
611
+ at_new,
612
+ num_inp_atoms_new * sizeof(orig_at_data->atom[0]) );
613
+ /* add other things */
614
+ orig_at_data->num_atoms += num_inp_atoms_new;
615
+ } else {
616
+ MOLFILE_ERR_SET (*err, 0, "Out of RAM");
617
+ *err = -1;
618
+ }
619
+ } else
620
+ if ( num_inp_atoms_new > 0 ) {
621
+ nNumAtoms += num_inp_atoms_new;
622
+ }
623
+ e_FreeInchi_Atom( &at_new );
624
+
625
+ } while ( !*err && bMergeAllInputStructures );
626
+ /*
627
+ if ( !*err ) {
628
+ orig_at_data->num_components =
629
+ MarkDisconnectedComponents( orig_at_data );
630
+ if ( orig_at_data->num_components == 0 ) {
631
+ MOLFILE_ERR_SET (*err, 0, "No components found");
632
+ *err = 99;
633
+ }
634
+ if ( orig_at_data->num_components < 0 ) {
635
+ MOLFILE_ERR_SET (*err, 0, "Too many components");
636
+ *err = 99;
637
+ }
638
+ }
639
+ */
640
+ e_FreeInchi_Atom( &at_new );
641
+ if ( *err ) {
642
+ e_FreeInchi_Input( orig_at_data );
643
+ }
644
+ if ( *err && !(10 < *err && *err < 20) && pStrErr && !pStrErr[0] ) {
645
+ MOLFILE_ERR_SET (*err, 0, "Unknown error"); /* <BRKPT> */
646
+ }
647
+ return orig_at_data? orig_at_data->num_atoms : nNumAtoms;
648
+ }
649
+