rino 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +44 -0
- data/Rakefile +123 -0
- data/ext/extconf.rb +26 -0
- data/ext/ruby_inchi_main.so +0 -0
- data/ext/src/aux2atom.h +2786 -0
- data/ext/src/comdef.h +148 -0
- data/ext/src/e_0dstereo.c +3014 -0
- data/ext/src/e_0dstereo.h +31 -0
- data/ext/src/e_comdef.h +57 -0
- data/ext/src/e_ctl_data.h +147 -0
- data/ext/src/e_ichi_io.c +498 -0
- data/ext/src/e_ichi_io.h +40 -0
- data/ext/src/e_ichi_parms.c +37 -0
- data/ext/src/e_ichi_parms.h +41 -0
- data/ext/src/e_ichicomp.h +50 -0
- data/ext/src/e_ichierr.h +40 -0
- data/ext/src/e_ichimain.c +593 -0
- data/ext/src/e_ichisize.h +43 -0
- data/ext/src/e_inchi_atom.c +75 -0
- data/ext/src/e_inchi_atom.h +33 -0
- data/ext/src/e_inpdef.h +41 -0
- data/ext/src/e_mode.h +706 -0
- data/ext/src/e_mol2atom.c +649 -0
- data/ext/src/e_readinch.c +58 -0
- data/ext/src/e_readmol.c +54 -0
- data/ext/src/e_readmol.h +180 -0
- data/ext/src/e_readstru.c +251 -0
- data/ext/src/e_readstru.h +33 -0
- data/ext/src/e_util.c +284 -0
- data/ext/src/e_util.h +61 -0
- data/ext/src/extr_ct.h +251 -0
- data/ext/src/ichi.h +206 -0
- data/ext/src/ichi_bns.c +7999 -0
- data/ext/src/ichi_bns.h +231 -0
- data/ext/src/ichican2.c +5000 -0
- data/ext/src/ichicano.c +2195 -0
- data/ext/src/ichicano.h +49 -0
- data/ext/src/ichicans.c +1625 -0
- data/ext/src/ichicant.h +379 -0
- data/ext/src/ichicomn.h +260 -0
- data/ext/src/ichicomp.h +50 -0
- data/ext/src/ichidrp.h +119 -0
- data/ext/src/ichierr.h +124 -0
- data/ext/src/ichiisot.c +101 -0
- data/ext/src/ichilnct.c +286 -0
- data/ext/src/ichimain.h +132 -0
- data/ext/src/ichimak2.c +1189 -0
- data/ext/src/ichimake.c +3812 -0
- data/ext/src/ichimake.h +205 -0
- data/ext/src/ichimap1.c +851 -0
- data/ext/src/ichimap2.c +2856 -0
- data/ext/src/ichimap4.c +1609 -0
- data/ext/src/ichinorm.c +741 -0
- data/ext/src/ichinorm.h +67 -0
- data/ext/src/ichiparm.c +45 -0
- data/ext/src/ichiparm.h +1441 -0
- data/ext/src/ichiprt1.c +3612 -0
- data/ext/src/ichiprt2.c +1511 -0
- data/ext/src/ichiprt3.c +3011 -0
- data/ext/src/ichiqueu.c +1003 -0
- data/ext/src/ichiring.c +326 -0
- data/ext/src/ichiring.h +49 -0
- data/ext/src/ichisize.h +35 -0
- data/ext/src/ichisort.c +539 -0
- data/ext/src/ichister.c +3538 -0
- data/ext/src/ichister.h +35 -0
- data/ext/src/ichitaut.c +3843 -0
- data/ext/src/ichitaut.h +387 -0
- data/ext/src/ichitime.h +74 -0
- data/ext/src/inchi_api.h +670 -0
- data/ext/src/inchi_dll.c +1480 -0
- data/ext/src/inchi_dll.h +34 -0
- data/ext/src/inchi_dll_main.c +23 -0
- data/ext/src/inchi_dll_main.h +31 -0
- data/ext/src/inpdef.h +328 -0
- data/ext/src/lreadmol.h +1246 -0
- data/ext/src/mode.h +706 -0
- data/ext/src/ruby_inchi_main.c +558 -0
- data/ext/src/runichi.c +4179 -0
- data/ext/src/strutil.c +3861 -0
- data/ext/src/strutil.h +182 -0
- data/ext/src/util.c +1130 -0
- data/ext/src/util.h +85 -0
- data/lib/clean_tempfile.rb +220 -0
- data/lib/rino.rb +111 -0
- data/test/test.rb +386 -0
- metadata +130 -0
@@ -0,0 +1,649 @@
|
|
1
|
+
/*
|
2
|
+
* International Union of Pure and Applied Chemistry (IUPAC)
|
3
|
+
* International Chemical Identifier (InChI)
|
4
|
+
* Version 1
|
5
|
+
* Software version 1.00
|
6
|
+
* April 13, 2005
|
7
|
+
* Developed at NIST
|
8
|
+
*/
|
9
|
+
|
10
|
+
#include <stdio.h>
|
11
|
+
#include <stdlib.h>
|
12
|
+
#include <string.h>
|
13
|
+
#include <ctype.h>
|
14
|
+
#include <stdarg.h>
|
15
|
+
#include <errno.h>
|
16
|
+
#include <limits.h>
|
17
|
+
#include <float.h>
|
18
|
+
#include <math.h>
|
19
|
+
|
20
|
+
#include "e_mode.h"
|
21
|
+
#include "inchi_api.h"
|
22
|
+
#include "e_ctl_data.h"
|
23
|
+
|
24
|
+
#include "e_comdef.h"
|
25
|
+
#include "e_util.h"
|
26
|
+
#include "e_ichicomp.h"
|
27
|
+
|
28
|
+
#include "e_readmol.h"
|
29
|
+
#include "e_inpdef.h"
|
30
|
+
|
31
|
+
#if( ADD_CMLPP == 1 )
|
32
|
+
#include "e_readcml.hpp"
|
33
|
+
#endif
|
34
|
+
|
35
|
+
#include "e_inchi_atom.h"
|
36
|
+
|
37
|
+
|
38
|
+
#define MIN_STDATA_X_COORD 0.0
|
39
|
+
#define MAX_STDATA_X_COORD 256.0
|
40
|
+
#define MIN_STDATA_Y_COORD 0.0
|
41
|
+
#define MAX_STDATA_Y_COORD 256.0
|
42
|
+
#define MIN_STDATA_Z_COORD 0.0
|
43
|
+
#define MAX_STDATA_Z_COORD 256.0
|
44
|
+
#define MAX_STDATA_AVE_BOND_LENGTH 20.0
|
45
|
+
#define MIN_STDATA_AVE_BOND_LENGTH 10.0
|
46
|
+
|
47
|
+
|
48
|
+
/* local prototypes */
|
49
|
+
inchi_Atom* mol_to_inchi_Atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inchi_Atom* at_inp,
|
50
|
+
int bDoNotAddH, int *err, char *pStrErr );
|
51
|
+
int mol_to_inchi_Atom_xyz( MOL_DATA* mol_data, int num_atoms, inchi_Atom* at, int *err, char *pStrErr );
|
52
|
+
|
53
|
+
int MolfileToInchi_Atom( FILE *inp_molfile, int bDoNotAddH, inchi_Atom **at, int max_num_at,
|
54
|
+
int *num_dimensions, int *num_bonds, const char *pSdfLabel, char *pSdfValue,
|
55
|
+
long *Id, long *lMolfileNumber, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr );
|
56
|
+
long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr );
|
57
|
+
|
58
|
+
|
59
|
+
/* too long ave. bond length prevents structure from displaying */
|
60
|
+
/* According to Steve, a standard bond length is 10. 9-24-97 DCh */
|
61
|
+
/* Ave. bond length in MainLib is 20. Also fixed Average */
|
62
|
+
/* bond length calculation by introducing num_avg_bonds */
|
63
|
+
/* in mol_to_stdata(). 12-9-99 DCh. */
|
64
|
+
|
65
|
+
/******************************************************************************************************/
|
66
|
+
inchi_Atom* mol_to_inchi_Atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inchi_Atom* at_inp,
|
67
|
+
int bDoNotAddH, int *err, char *pStrErr )
|
68
|
+
{
|
69
|
+
inchi_Atom *at = NULL;
|
70
|
+
/* char *bond_stereo = NULL; */
|
71
|
+
AT_NUM *p1, *p2;
|
72
|
+
int i, a1, a2, n1, n2, bonds;
|
73
|
+
S_CHAR cBondStereo, cBondType;
|
74
|
+
S_CHAR cStereo1, cStereo2;
|
75
|
+
|
76
|
+
*err = 0;
|
77
|
+
*num_atoms = *num_bonds = 0;
|
78
|
+
/* check if MOLfile contains atoms */
|
79
|
+
if ( !mol_data || !mol_data->ctab.MolAtom ||
|
80
|
+
0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
|
81
|
+
0 >= (*num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
|
82
|
+
/* MOLFILE_ERR_SET (*err, 0, "Empty structure"); */
|
83
|
+
goto exit_function; /* no structure */
|
84
|
+
}
|
85
|
+
/* allocate memory if necessary */
|
86
|
+
if ( at_inp ) {
|
87
|
+
at = at_inp;
|
88
|
+
} else
|
89
|
+
if ( !(at = e_CreateInchi_Atom( *num_atoms ) ) ) {
|
90
|
+
*err = -1;
|
91
|
+
MOLFILE_ERR_FIN (*err, -1, exit_function, "Out of RAM");
|
92
|
+
}
|
93
|
+
|
94
|
+
/* copy atom info */
|
95
|
+
for ( i = 0; i < *num_atoms; i ++ ) {
|
96
|
+
e_mystrncpy( at[i].elname, mol_data->ctab.MolAtom[i].szAtomSymbol, sizeof(at->elname) );
|
97
|
+
/* at[i].chem_bonds_valence = mol_data->ctab.MolAtom[i].cValence; */ /* MOLfile valence; will change */
|
98
|
+
at[i].isotopic_mass = mol_data->ctab.MolAtom[i].cMassDifference;
|
99
|
+
at[i].charge = mol_data->ctab.MolAtom[i].cCharge;
|
100
|
+
at[i].radical = mol_data->ctab.MolAtom[i].cRadical;
|
101
|
+
/* coordinates are copied in mol_to_inchi_Atom_xyz() */
|
102
|
+
#if( SINGLET_IS_TRIPLET == 1 )
|
103
|
+
if ( at[i].radical == RADICAL_SINGLET ) {
|
104
|
+
at[i].radical = RADICAL_TRIPLET;
|
105
|
+
}
|
106
|
+
#endif
|
107
|
+
/* removed parsing at[i].elname to extract H, charge, radical from the
|
108
|
+
Molfile alias record now this is done in the INChI dll */
|
109
|
+
}
|
110
|
+
|
111
|
+
/* copy bond info */
|
112
|
+
for ( i = 0, bonds = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
|
113
|
+
cBondStereo = mol_data->ctab.MolBond[i].cBondStereo;
|
114
|
+
cBondType = mol_data->ctab.MolBond[i].cBondType;
|
115
|
+
a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
|
116
|
+
a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
|
117
|
+
|
118
|
+
if ( a1 < 0 || a1 >= *num_atoms ||
|
119
|
+
a2 < 0 || a2 >= *num_atoms ||
|
120
|
+
a1 == a2 ) {
|
121
|
+
*err |= 1; /* bond for impossible atom number(s); ignored */
|
122
|
+
MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
|
123
|
+
continue;
|
124
|
+
}
|
125
|
+
/* check for multiple bonds between same atoms */
|
126
|
+
p1 = e_is_in_the_slist( at[a1].neighbor, (AT_NUM)a2, at[a1].num_bonds );
|
127
|
+
p2 = e_is_in_the_slist( at[a2].neighbor, (AT_NUM)a1, at[a2].num_bonds );
|
128
|
+
if ( (p1 || p2) && (p1 || at[a1].num_bonds < MAXVAL) && (p2 || at[a2].num_bonds < MAXVAL) ) {
|
129
|
+
n1 = p1? (p1 - at[a1].neighbor) : at[a1].num_bonds ++;
|
130
|
+
n2 = p2? (p2 - at[a2].neighbor) : at[a2].num_bonds ++;
|
131
|
+
MOLFILE_ERR_SET (*err, 0, "Multiple bonds between two atoms");
|
132
|
+
*err |= 2; /* multiple bonds between atoms */
|
133
|
+
} else
|
134
|
+
if ( !p1 && !p2 && at[a1].num_bonds < MAXVAL && at[a2].num_bonds < MAXVAL ) {
|
135
|
+
n1 = at[a1].num_bonds ++;
|
136
|
+
n2 = at[a2].num_bonds ++;
|
137
|
+
bonds ++;
|
138
|
+
} else {
|
139
|
+
char szMsg[64];
|
140
|
+
*err |= 4; /* too large number of bonds. Some bonds ignored. */
|
141
|
+
sprintf( szMsg, "Atom '%s' has more than %d bonds",
|
142
|
+
at[a1].num_bonds>= MAXVAL? at[a1].elname:at[a2].elname, MAXVAL );
|
143
|
+
MOLFILE_ERR_SET (*err, 0, szMsg);
|
144
|
+
continue;
|
145
|
+
}
|
146
|
+
if ( cBondType < MIN_INPUT_BOND_TYPE || cBondType > MAX_INPUT_BOND_TYPE ) {
|
147
|
+
char szBondType[16];
|
148
|
+
sprintf( szBondType, "%d", cBondType );
|
149
|
+
cBondType = 1;
|
150
|
+
MOLFILE_ERR_SET (*err, 0, "Unrecognized bond type:");
|
151
|
+
MOLFILE_ERR_SET (*err, 0, szBondType);
|
152
|
+
*err |= 8; /* Unrecognized Bond type replaced with single bond */
|
153
|
+
}
|
154
|
+
/* bond type */
|
155
|
+
at[a1].bond_type[n1] =
|
156
|
+
at[a2].bond_type[n2] = cBondType;
|
157
|
+
/* connection */
|
158
|
+
at[a1].neighbor[n1] = (AT_NUM)a2;
|
159
|
+
at[a2].neighbor[n2] = (AT_NUM)a1;
|
160
|
+
/* stereo */
|
161
|
+
switch ( cBondStereo ) {
|
162
|
+
case INPUT_STEREO_DBLE_EITHER: /* 3 */
|
163
|
+
cStereo1 = INCHI_BOND_STEREO_DOUBLE_EITHER;
|
164
|
+
cStereo2 = INCHI_BOND_STEREO_DOUBLE_EITHER;
|
165
|
+
break;
|
166
|
+
case INPUT_STEREO_SNGL_UP: /* 1 */
|
167
|
+
cStereo1 = INCHI_BOND_STEREO_SINGLE_1UP;
|
168
|
+
cStereo2 = INCHI_BOND_STEREO_SINGLE_2UP;
|
169
|
+
break;
|
170
|
+
case INPUT_STEREO_SNGL_EITHER: /* 4 */
|
171
|
+
cStereo1 = INCHI_BOND_STEREO_SINGLE_1EITHER;
|
172
|
+
cStereo2 = INCHI_BOND_STEREO_SINGLE_2EITHER;
|
173
|
+
break;
|
174
|
+
case INPUT_STEREO_SNGL_DOWN: /* 6 */
|
175
|
+
cStereo1 = INCHI_BOND_STEREO_SINGLE_1DOWN;
|
176
|
+
cStereo2 = INCHI_BOND_STEREO_SINGLE_2DOWN;
|
177
|
+
break;
|
178
|
+
case 0:
|
179
|
+
cStereo1 = INCHI_BOND_STEREO_NONE;
|
180
|
+
cStereo2 = INCHI_BOND_STEREO_NONE;
|
181
|
+
break;
|
182
|
+
default:
|
183
|
+
*err |= 16; /* Ignored unrecognized Bond stereo */
|
184
|
+
MOLFILE_ERR_SET (*err, 0, "Unrecognized bond stereo");
|
185
|
+
continue;
|
186
|
+
}
|
187
|
+
at[a1].bond_stereo[n1] = cStereo1; /* >0: the wedge (pointed) end is at this atom */
|
188
|
+
at[a2].bond_stereo[n2] = cStereo2; /* <0: the wedge (pointed) end is at the opposite atom */
|
189
|
+
}
|
190
|
+
*num_bonds = bonds;
|
191
|
+
|
192
|
+
/* special Molfile valences */
|
193
|
+
for ( a1 = 0; a1 < *num_atoms; a1 ++ ) {
|
194
|
+
int num_bond_type[MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE + 1], bond_type;
|
195
|
+
int chem_bonds_valence, valence;
|
196
|
+
if ( mol_data->ctab.MolAtom[a1].cValence &&
|
197
|
+
(mol_data->ctab.MolAtom[a1].cValence != 15 || at[a1].num_bonds) ) {
|
198
|
+
/* Molfile contains special valence => calculate number of H */
|
199
|
+
memset( num_bond_type, 0, sizeof(num_bond_type) );
|
200
|
+
valence = mol_data->ctab.MolAtom[a1].cValence; /* save atom valence if available */
|
201
|
+
for ( n1 = 0; n1 < at[a1].num_bonds; n1 ++ ) {
|
202
|
+
bond_type = at[a1].bond_type[n1] - MIN_INPUT_BOND_TYPE;
|
203
|
+
if ( bond_type < 0 || bond_type > MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE ) {
|
204
|
+
bond_type = 0;
|
205
|
+
MOLFILE_ERR_SET (*err, 0, "Unknown bond type in MOLfile assigned as a single bond");
|
206
|
+
}
|
207
|
+
num_bond_type[ bond_type ] ++;
|
208
|
+
}
|
209
|
+
chem_bonds_valence = 0;
|
210
|
+
for ( n1 = 0; MIN_INPUT_BOND_TYPE + n1 <= 3 && MIN_INPUT_BOND_TYPE + n1 <= MAX_INPUT_BOND_TYPE; n1 ++ ) {
|
211
|
+
chem_bonds_valence += (MIN_INPUT_BOND_TYPE + n1) * num_bond_type[n1];
|
212
|
+
}
|
213
|
+
if ( MIN_INPUT_BOND_TYPE <= INCHI_BOND_TYPE_ALTERN && INCHI_BOND_TYPE_ALTERN <= MAX_INPUT_BOND_TYPE &&
|
214
|
+
( n2 = num_bond_type[INCHI_BOND_TYPE_ALTERN-MIN_INPUT_BOND_TYPE] ) ) {
|
215
|
+
/* accept input aromatic bonds for now */
|
216
|
+
switch ( n2 ) {
|
217
|
+
case 2:
|
218
|
+
chem_bonds_valence += 3; /* =A- */
|
219
|
+
break;
|
220
|
+
case 3:
|
221
|
+
chem_bonds_valence += 4; /* =A< */
|
222
|
+
break;
|
223
|
+
default:
|
224
|
+
/* if 1 or >= 4 aromatic bonds then replace such bonds with single bonds */
|
225
|
+
for ( n1 = 0; n1 < at[a1].num_bonds; n1 ++ ) {
|
226
|
+
if ( at[a1].bond_type[n1] == INCHI_BOND_TYPE_ALTERN ) {
|
227
|
+
a2 = at[a1].neighbor[n1];
|
228
|
+
p1 = e_is_in_the_slist( at[a2].neighbor, (AT_NUM)a1, at[a2].num_bonds );
|
229
|
+
if ( p1 ) {
|
230
|
+
at[a1].bond_type[n1] =
|
231
|
+
at[a2].bond_type[p1-at[a2].neighbor] = INCHI_BOND_TYPE_SINGLE;
|
232
|
+
} else {
|
233
|
+
*err = -2; /* Program error */
|
234
|
+
MOLFILE_ERR_SET (*err, 0, "Program error interpreting MOLfile");
|
235
|
+
goto exit_function; /* no structure */
|
236
|
+
}
|
237
|
+
}
|
238
|
+
}
|
239
|
+
chem_bonds_valence += n2;
|
240
|
+
*err |= 32;
|
241
|
+
MOLFILE_ERR_SET (*err, 0, "Atom has more than 3 aromatic bonds");
|
242
|
+
break;
|
243
|
+
}
|
244
|
+
}
|
245
|
+
/*************************************************************************************
|
246
|
+
*
|
247
|
+
* Set number of hydrogen atoms
|
248
|
+
*/
|
249
|
+
if ( valence >= chem_bonds_valence ) {
|
250
|
+
at[a1].num_iso_H[0] = valence - chem_bonds_valence;
|
251
|
+
}
|
252
|
+
} else
|
253
|
+
if ( mol_data->ctab.MolAtom[a1].cAtomAliasedFlag ) {
|
254
|
+
at[a1].num_iso_H[0] = 0;
|
255
|
+
} else
|
256
|
+
if ( mol_data->ctab.MolAtom[a1].cValence == 15 && !at[a1].num_bonds ) {
|
257
|
+
at[a1].num_iso_H[0] = 0;
|
258
|
+
} else
|
259
|
+
if ( !bDoNotAddH ) {
|
260
|
+
at[a1].num_iso_H[0] = -1;
|
261
|
+
}
|
262
|
+
}
|
263
|
+
|
264
|
+
exit_function:;
|
265
|
+
return at;
|
266
|
+
}
|
267
|
+
/******************************************************************************************************/
|
268
|
+
int mol_to_inchi_Atom_xyz( MOL_DATA* mol_data, int num_atoms, inchi_Atom* at, int *err, char *pStrErr )
|
269
|
+
{
|
270
|
+
int i, num_dimensions=0;
|
271
|
+
int num_bonds;
|
272
|
+
double max_x=-1.0e32, max_y=-1.0e32, max_z=-1.0e32;
|
273
|
+
double min_x= 1.0e32, min_y= 1.0e32, min_z= 1.0e32;
|
274
|
+
double macheps = 1.0e-10, small_coeff = 0.00001;
|
275
|
+
double x_coeff, y_coeff, z_coeff, coeff, average_bond_length;
|
276
|
+
|
277
|
+
/* *err = 0; */
|
278
|
+
/* check if MOLfile contains atoms */
|
279
|
+
if ( !mol_data || !mol_data->ctab.MolAtom ||
|
280
|
+
0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
|
281
|
+
0 >= (num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
|
282
|
+
goto exit_function; /* no structure */
|
283
|
+
}
|
284
|
+
/* copy atom info */
|
285
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
286
|
+
max_x = inchi_max(mol_data->ctab.MolAtom[i].fX, max_x);
|
287
|
+
min_x = inchi_min(mol_data->ctab.MolAtom[i].fX, min_x);
|
288
|
+
max_y = inchi_max(mol_data->ctab.MolAtom[i].fY, max_y);
|
289
|
+
min_y = inchi_min(mol_data->ctab.MolAtom[i].fY, min_y);
|
290
|
+
max_z = inchi_max(mol_data->ctab.MolAtom[i].fZ, max_z);
|
291
|
+
min_z = inchi_min(mol_data->ctab.MolAtom[i].fZ, min_z);
|
292
|
+
}
|
293
|
+
|
294
|
+
/* copy bond info */
|
295
|
+
num_bonds = 0;
|
296
|
+
average_bond_length = 0.0;
|
297
|
+
for ( i = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
|
298
|
+
int a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
|
299
|
+
int a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
|
300
|
+
double dx = mol_data->ctab.MolAtom[a1].fX-mol_data->ctab.MolAtom[a2].fX;
|
301
|
+
double dy = mol_data->ctab.MolAtom[a1].fY-mol_data->ctab.MolAtom[a2].fY;
|
302
|
+
double dz = mol_data->ctab.MolAtom[a1].fZ-mol_data->ctab.MolAtom[a2].fZ;
|
303
|
+
|
304
|
+
if ( a1 < 0 || a1 >= num_atoms ||
|
305
|
+
a2 < 0 || a2 >= num_atoms ||
|
306
|
+
a1 == a2 ) {
|
307
|
+
*err |= 1; /* bond for impossible atom number(s); ignored */
|
308
|
+
MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
|
309
|
+
continue;
|
310
|
+
}
|
311
|
+
average_bond_length += sqrt( dx*dx + dy*dy + dz*dz );
|
312
|
+
num_bonds ++;
|
313
|
+
}
|
314
|
+
|
315
|
+
/* convert to integral coordinates */
|
316
|
+
|
317
|
+
if ( max_x - min_x <= small_coeff*(fabs(max_x) + fabs(min_x)) )
|
318
|
+
x_coeff = 0.0;
|
319
|
+
else
|
320
|
+
x_coeff = (MAX_STDATA_X_COORD - MIN_STDATA_X_COORD)/(max_x - min_x);
|
321
|
+
|
322
|
+
if ( max_y - min_y <= small_coeff*(fabs(max_y) + fabs(min_y)) )
|
323
|
+
y_coeff = 0.0;
|
324
|
+
else
|
325
|
+
y_coeff = (MAX_STDATA_Y_COORD - MIN_STDATA_Y_COORD)/(max_y - min_y);
|
326
|
+
if ( max_z - min_z <= small_coeff*(fabs(max_z) + fabs(min_z)) )
|
327
|
+
z_coeff = 0.0;
|
328
|
+
else
|
329
|
+
z_coeff = (MAX_STDATA_Z_COORD - MIN_STDATA_Z_COORD)/(max_z - min_z);
|
330
|
+
|
331
|
+
num_dimensions = ((x_coeff > macheps || y_coeff >macheps ) && fabs(z_coeff) < macheps)? 2:
|
332
|
+
(fabs(z_coeff) > macheps)? 3: 0;
|
333
|
+
|
334
|
+
switch ( num_dimensions ) {
|
335
|
+
case 0:
|
336
|
+
coeff = 0.0;
|
337
|
+
break;
|
338
|
+
case 2:
|
339
|
+
/* choose the smallest stretching coefficient */
|
340
|
+
if ( x_coeff > macheps && y_coeff > macheps ) {
|
341
|
+
coeff = inchi_min( x_coeff, y_coeff );
|
342
|
+
}else
|
343
|
+
if ( x_coeff > macheps ){
|
344
|
+
coeff = x_coeff;
|
345
|
+
}else
|
346
|
+
if ( y_coeff > macheps ){
|
347
|
+
coeff = y_coeff;
|
348
|
+
}else{
|
349
|
+
coeff = 1.0;
|
350
|
+
}
|
351
|
+
break;
|
352
|
+
case 3:
|
353
|
+
/* choose the smallest stretching coefficient */
|
354
|
+
if ( x_coeff > macheps && y_coeff > macheps ) {
|
355
|
+
coeff = inchi_min( x_coeff, y_coeff );
|
356
|
+
coeff = inchi_min( coeff, z_coeff );
|
357
|
+
}else
|
358
|
+
if ( x_coeff > macheps ){
|
359
|
+
coeff = inchi_min( x_coeff, z_coeff );
|
360
|
+
}else
|
361
|
+
if ( y_coeff > macheps ){
|
362
|
+
coeff = inchi_min( y_coeff, z_coeff );
|
363
|
+
}else{
|
364
|
+
coeff = z_coeff;
|
365
|
+
}
|
366
|
+
break;
|
367
|
+
default:
|
368
|
+
coeff = 0.0;
|
369
|
+
}
|
370
|
+
|
371
|
+
if ( num_bonds > 0 ) {
|
372
|
+
average_bond_length /= (double)num_bonds;
|
373
|
+
if ( average_bond_length * coeff > MAX_STDATA_AVE_BOND_LENGTH ) {
|
374
|
+
coeff = MAX_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too long bonds */
|
375
|
+
} else
|
376
|
+
if ( average_bond_length * coeff < macheps ) {
|
377
|
+
coeff = 1.0; /* all lengths are of zero length */
|
378
|
+
} else
|
379
|
+
if ( average_bond_length * coeff < MIN_STDATA_AVE_BOND_LENGTH ) {
|
380
|
+
coeff = MIN_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too short bonds */
|
381
|
+
}
|
382
|
+
}
|
383
|
+
#if( NORMALIZE_INP_COORD == 1 )
|
384
|
+
/* set integral coordinates */
|
385
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
386
|
+
double x = mol_data->ctab.MolAtom[i].fX;
|
387
|
+
double y = mol_data->ctab.MolAtom[i].fY;
|
388
|
+
double z = mol_data->ctab.MolAtom[i].fZ;
|
389
|
+
x = (x - min_x)*coeff + MIN_STDATA_X_COORD;
|
390
|
+
y = (y - min_y)*coeff + MIN_STDATA_Y_COORD;
|
391
|
+
z = (z - min_z)*coeff + MIN_STDATA_Z_COORD;
|
392
|
+
/* floor() behavior is not well defined for negative arguments.
|
393
|
+
* Use positive arguments only to get nearest integer.
|
394
|
+
*/
|
395
|
+
at[i].x = ( x >= 0.0 )? (int)floor( x + 0.5 ) : -(int)floor( -x + 0.5 );
|
396
|
+
at[i].y = ( y >= 0.0 )? (int)floor( y + 0.5 ) : -(int)floor( -y + 0.5 );
|
397
|
+
at[i].z = ( z >= 0.0 )? (int)floor( z + 0.5 ) : -(int)floor( -z + 0.5 );
|
398
|
+
}
|
399
|
+
#else
|
400
|
+
/* set input coordinates */
|
401
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
402
|
+
double x = mol_data->ctab.MolAtom[i].fX;
|
403
|
+
double y = mol_data->ctab.MolAtom[i].fY;
|
404
|
+
double z = mol_data->ctab.MolAtom[i].fZ;
|
405
|
+
at[i].x = x;
|
406
|
+
at[i].y = y;
|
407
|
+
at[i].z = z;
|
408
|
+
}
|
409
|
+
#endif
|
410
|
+
|
411
|
+
exit_function:;
|
412
|
+
return num_dimensions;
|
413
|
+
}
|
414
|
+
/****************************************************************************/
|
415
|
+
long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr )
|
416
|
+
{
|
417
|
+
static char sStruct[] = "Structure #";
|
418
|
+
static char sINCHI[] = INCHI_NAME;
|
419
|
+
long lMolfileNumber = 0;
|
420
|
+
char *p, *q = NULL;
|
421
|
+
if ( pHdr ) {
|
422
|
+
if ( !memicmp( pHdr->szMoleculeName, sStruct, sizeof(sStruct)-1 ) ) {
|
423
|
+
p = pHdr->szMoleculeName + sizeof(sStruct)-1;
|
424
|
+
lMolfileNumber = strtol( p, &q, 10 );
|
425
|
+
p = pHdr->szMoleculeLine2;
|
426
|
+
if ( !q || *q ||
|
427
|
+
memicmp( p, sINCHI, sizeof(sINCHI)-1) ||
|
428
|
+
!strstr( p+sizeof(sINCHI)-1, "SDfile Output" ) ) {
|
429
|
+
lMolfileNumber = 0;
|
430
|
+
}
|
431
|
+
}
|
432
|
+
}
|
433
|
+
return lMolfileNumber;
|
434
|
+
}
|
435
|
+
|
436
|
+
/****************************************************************************/
|
437
|
+
int MolfileToInchi_Atom( FILE *inp_molfile, int bDoNotAddH, inchi_Atom **at, int max_num_at,
|
438
|
+
int *num_dimensions, int *num_bonds, const char *pSdfLabel, char *pSdfValue,
|
439
|
+
long *Id, long *lMolfileNumber, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
|
440
|
+
{
|
441
|
+
int num_atoms = 0;
|
442
|
+
MOL_DATA *mol_data = NULL;
|
443
|
+
MOL_HEADER_BLOCK OnlyHeaderBlock, *pOnlyHeaderBlock = NULL, *pHdr;
|
444
|
+
MOL_CTAB OnlyCtab, *pOnlyCtab = NULL;
|
445
|
+
char cSdfValueFirstChar = '\0';
|
446
|
+
#ifdef CML_DEBUG
|
447
|
+
FILE *f_p;
|
448
|
+
#endif
|
449
|
+
if ( at ) {
|
450
|
+
pOnlyHeaderBlock = NULL;
|
451
|
+
if ( *at && max_num_at ) {
|
452
|
+
memset( *at, 0, max_num_at * sizeof(**at) );
|
453
|
+
}
|
454
|
+
} else {
|
455
|
+
pOnlyHeaderBlock = &OnlyHeaderBlock;
|
456
|
+
pOnlyCtab = &OnlyCtab;
|
457
|
+
}
|
458
|
+
if ( pSdfValue ) {
|
459
|
+
cSdfValueFirstChar = pSdfValue[0];
|
460
|
+
pSdfValue[0] = '\0';
|
461
|
+
}
|
462
|
+
|
463
|
+
mol_data = e_read_sdfile_segment(inp_molfile, pOnlyHeaderBlock, pOnlyCtab, 0, NULL, 0, Id, pSdfLabel, pSdfValue, err, pStrErr );
|
464
|
+
|
465
|
+
pHdr = ( mol_data && !pOnlyHeaderBlock )? &mol_data->hdr :
|
466
|
+
( !mol_data && pOnlyHeaderBlock )? pOnlyHeaderBlock : NULL;
|
467
|
+
if ( lMolfileNumber && pHdr ) {
|
468
|
+
*lMolfileNumber = GetMolfileNumber( pHdr );
|
469
|
+
}
|
470
|
+
if ( pSdfValue &&
|
471
|
+
pSdfLabel && pSdfLabel[0] && pHdr ) {
|
472
|
+
if ( !stricmp(pSdfLabel, "MOLFILENAME") ) {
|
473
|
+
e_mystrncpy( pSdfValue, pHdr->szMoleculeName, MAX_SDF_VALUE+1 );
|
474
|
+
e_LtrimRtrim( pSdfValue, NULL );
|
475
|
+
} else
|
476
|
+
if ( !stricmp(pSdfLabel, "MOLFILELINE2") ) {
|
477
|
+
e_mystrncpy( pSdfValue, pHdr->szMoleculeLine2, MAX_SDF_VALUE+1 );
|
478
|
+
e_LtrimRtrim( pSdfValue, NULL );
|
479
|
+
} else
|
480
|
+
if ( !stricmp(pSdfLabel, "MOLFILECOMMENT") ) {
|
481
|
+
e_mystrncpy( pSdfValue, pHdr->szComment, MAX_SDF_VALUE+1 );
|
482
|
+
e_LtrimRtrim( pSdfValue, NULL );
|
483
|
+
}
|
484
|
+
if ( !pSdfValue[0] ) {
|
485
|
+
pSdfValue[0] = cSdfValueFirstChar;
|
486
|
+
}
|
487
|
+
}
|
488
|
+
|
489
|
+
if ( mol_data && at && !*err ) {
|
490
|
+
/* *at points to an allocated memory */
|
491
|
+
if ( *at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
|
492
|
+
*at = mol_to_inchi_Atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
|
493
|
+
if ( *err >= 0 ) {
|
494
|
+
*num_dimensions = mol_to_inchi_Atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
|
495
|
+
}
|
496
|
+
} else
|
497
|
+
/* *at points to NULL */
|
498
|
+
if ( !*at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
|
499
|
+
*at = mol_to_inchi_Atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
|
500
|
+
if ( *err >= 0 ) {
|
501
|
+
*num_dimensions = mol_to_inchi_Atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
|
502
|
+
}
|
503
|
+
} else {
|
504
|
+
MOLFILE_ERR_SET (*err, 0, "Too many atoms");
|
505
|
+
*err = 70;
|
506
|
+
num_atoms = -1;
|
507
|
+
}
|
508
|
+
if ( *err > 0 ) {
|
509
|
+
*err += 100;
|
510
|
+
}
|
511
|
+
/* 11-16-2004: use Chiral flag */
|
512
|
+
if ( num_atoms > 0 && at && *at && mol_data && pInpAtomFlags ) {
|
513
|
+
if ( mol_data->ctab.cChiralFlag ) {
|
514
|
+
*pInpAtomFlags |= FLAG_INP_AT_CHIRAL;
|
515
|
+
} else {
|
516
|
+
*pInpAtomFlags |= FLAG_INP_AT_NONCHIRAL;
|
517
|
+
}
|
518
|
+
}
|
519
|
+
} else
|
520
|
+
if ( !at ) {
|
521
|
+
num_atoms = pOnlyCtab->nNumberOfAtoms;
|
522
|
+
}
|
523
|
+
|
524
|
+
if ( !pOnlyHeaderBlock ) {
|
525
|
+
e_delete_mol_data( mol_data );
|
526
|
+
}
|
527
|
+
#ifdef CML_DEBUG
|
528
|
+
puts ("MOL");
|
529
|
+
f_p = fopen ("mol.dbg", "a");
|
530
|
+
if (f_p)
|
531
|
+
{
|
532
|
+
PrintInpAtom (f_p, *at, num_atoms);
|
533
|
+
fclose (f_p);
|
534
|
+
}
|
535
|
+
else
|
536
|
+
{
|
537
|
+
puts ("Couldn't open file");
|
538
|
+
}
|
539
|
+
#endif
|
540
|
+
|
541
|
+
return num_atoms;
|
542
|
+
}
|
543
|
+
/**********************************************************************************/
|
544
|
+
int e_MolfileToInchi_Input( FILE *inp_molfile, inchi_Input *orig_at_data, int bMergeAllInputStructures,
|
545
|
+
int bDoNotAddH, int bAllowEmptyStructure,
|
546
|
+
const char *pSdfLabel, char *pSdfValue, long *lSdfId, long *lMolfileNumber,
|
547
|
+
INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
|
548
|
+
{
|
549
|
+
/* inp_ATOM *at = NULL; */
|
550
|
+
int num_dimensions_new;
|
551
|
+
int num_inp_bonds_new;
|
552
|
+
int num_inp_atoms_new;
|
553
|
+
inchi_Atom *at_new = NULL;
|
554
|
+
inchi_Atom *at_old = NULL;
|
555
|
+
int nNumAtoms = 0;
|
556
|
+
int i, j;
|
557
|
+
|
558
|
+
if ( pStrErr ) {
|
559
|
+
pStrErr[0] = '\0';
|
560
|
+
}
|
561
|
+
|
562
|
+
/*FreeOrigAtData( orig_at_data );*/
|
563
|
+
|
564
|
+
do {
|
565
|
+
|
566
|
+
at_old = orig_at_data? orig_at_data->atom : NULL; /* save pointer to the previous allocation */
|
567
|
+
num_inp_atoms_new =
|
568
|
+
MolfileToInchi_Atom( inp_molfile, bDoNotAddH, orig_at_data? &at_new:NULL, MAX_ATOMS,
|
569
|
+
&num_dimensions_new, &num_inp_bonds_new,
|
570
|
+
pSdfLabel, pSdfValue, lSdfId, lMolfileNumber, pInpAtomFlags, err, pStrErr );
|
571
|
+
|
572
|
+
|
573
|
+
if ( num_inp_atoms_new <= 0 && !*err ) {
|
574
|
+
if ( !bAllowEmptyStructure ) {
|
575
|
+
MOLFILE_ERR_SET (*err, 0, "Empty structure"); /* the message will be issued by the InChI library */
|
576
|
+
}
|
577
|
+
*err = 98;
|
578
|
+
} else
|
579
|
+
if ( orig_at_data && !num_inp_atoms_new && 10 < *err && *err < 20 && orig_at_data->num_atoms > 0 && bMergeAllInputStructures ) {
|
580
|
+
*err = 0; /* end of file */
|
581
|
+
break;
|
582
|
+
} else
|
583
|
+
if ( num_inp_atoms_new > 0 && orig_at_data ) {
|
584
|
+
/* merge pOrigDataTmp + orig_at_data => pOrigDataTmp; */
|
585
|
+
nNumAtoms = num_inp_atoms_new + orig_at_data->num_atoms;
|
586
|
+
if ( nNumAtoms >= MAX_ATOMS ) {
|
587
|
+
MOLFILE_ERR_SET (*err, 0, "Too many atoms");
|
588
|
+
*err = 70;
|
589
|
+
orig_at_data->num_atoms = -1;
|
590
|
+
} else
|
591
|
+
if ( !at_old ) {
|
592
|
+
/* the first structure */
|
593
|
+
orig_at_data->atom = at_new;
|
594
|
+
at_new = NULL;
|
595
|
+
orig_at_data->num_atoms = num_inp_atoms_new;
|
596
|
+
} else
|
597
|
+
if ( orig_at_data->atom = e_CreateInchi_Atom( nNumAtoms ) ) {
|
598
|
+
/* switch at_new <--> orig_at_data->at; */
|
599
|
+
if ( orig_at_data->num_atoms ) {
|
600
|
+
memcpy( orig_at_data->atom, at_old, orig_at_data->num_atoms * sizeof(orig_at_data->atom[0]) );
|
601
|
+
/* adjust numbering in the newly read structure */
|
602
|
+
for ( i = 0; i < num_inp_atoms_new; i ++ ) {
|
603
|
+
for ( j = 0; j < at_new[i].num_bonds; j ++ ) {
|
604
|
+
at_new[i].neighbor[j] += orig_at_data->num_atoms;
|
605
|
+
}
|
606
|
+
}
|
607
|
+
}
|
608
|
+
e_FreeInchi_Atom( &at_old );
|
609
|
+
/* copy newly read structure */
|
610
|
+
memcpy( orig_at_data->atom + orig_at_data->num_atoms,
|
611
|
+
at_new,
|
612
|
+
num_inp_atoms_new * sizeof(orig_at_data->atom[0]) );
|
613
|
+
/* add other things */
|
614
|
+
orig_at_data->num_atoms += num_inp_atoms_new;
|
615
|
+
} else {
|
616
|
+
MOLFILE_ERR_SET (*err, 0, "Out of RAM");
|
617
|
+
*err = -1;
|
618
|
+
}
|
619
|
+
} else
|
620
|
+
if ( num_inp_atoms_new > 0 ) {
|
621
|
+
nNumAtoms += num_inp_atoms_new;
|
622
|
+
}
|
623
|
+
e_FreeInchi_Atom( &at_new );
|
624
|
+
|
625
|
+
} while ( !*err && bMergeAllInputStructures );
|
626
|
+
/*
|
627
|
+
if ( !*err ) {
|
628
|
+
orig_at_data->num_components =
|
629
|
+
MarkDisconnectedComponents( orig_at_data );
|
630
|
+
if ( orig_at_data->num_components == 0 ) {
|
631
|
+
MOLFILE_ERR_SET (*err, 0, "No components found");
|
632
|
+
*err = 99;
|
633
|
+
}
|
634
|
+
if ( orig_at_data->num_components < 0 ) {
|
635
|
+
MOLFILE_ERR_SET (*err, 0, "Too many components");
|
636
|
+
*err = 99;
|
637
|
+
}
|
638
|
+
}
|
639
|
+
*/
|
640
|
+
e_FreeInchi_Atom( &at_new );
|
641
|
+
if ( *err ) {
|
642
|
+
e_FreeInchi_Input( orig_at_data );
|
643
|
+
}
|
644
|
+
if ( *err && !(10 < *err && *err < 20) && pStrErr && !pStrErr[0] ) {
|
645
|
+
MOLFILE_ERR_SET (*err, 0, "Unknown error"); /* <BRKPT> */
|
646
|
+
}
|
647
|
+
return orig_at_data? orig_at_data->num_atoms : nNumAtoms;
|
648
|
+
}
|
649
|
+
|