rino 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +44 -0
- data/Rakefile +123 -0
- data/ext/extconf.rb +26 -0
- data/ext/ruby_inchi_main.so +0 -0
- data/ext/src/aux2atom.h +2786 -0
- data/ext/src/comdef.h +148 -0
- data/ext/src/e_0dstereo.c +3014 -0
- data/ext/src/e_0dstereo.h +31 -0
- data/ext/src/e_comdef.h +57 -0
- data/ext/src/e_ctl_data.h +147 -0
- data/ext/src/e_ichi_io.c +498 -0
- data/ext/src/e_ichi_io.h +40 -0
- data/ext/src/e_ichi_parms.c +37 -0
- data/ext/src/e_ichi_parms.h +41 -0
- data/ext/src/e_ichicomp.h +50 -0
- data/ext/src/e_ichierr.h +40 -0
- data/ext/src/e_ichimain.c +593 -0
- data/ext/src/e_ichisize.h +43 -0
- data/ext/src/e_inchi_atom.c +75 -0
- data/ext/src/e_inchi_atom.h +33 -0
- data/ext/src/e_inpdef.h +41 -0
- data/ext/src/e_mode.h +706 -0
- data/ext/src/e_mol2atom.c +649 -0
- data/ext/src/e_readinch.c +58 -0
- data/ext/src/e_readmol.c +54 -0
- data/ext/src/e_readmol.h +180 -0
- data/ext/src/e_readstru.c +251 -0
- data/ext/src/e_readstru.h +33 -0
- data/ext/src/e_util.c +284 -0
- data/ext/src/e_util.h +61 -0
- data/ext/src/extr_ct.h +251 -0
- data/ext/src/ichi.h +206 -0
- data/ext/src/ichi_bns.c +7999 -0
- data/ext/src/ichi_bns.h +231 -0
- data/ext/src/ichican2.c +5000 -0
- data/ext/src/ichicano.c +2195 -0
- data/ext/src/ichicano.h +49 -0
- data/ext/src/ichicans.c +1625 -0
- data/ext/src/ichicant.h +379 -0
- data/ext/src/ichicomn.h +260 -0
- data/ext/src/ichicomp.h +50 -0
- data/ext/src/ichidrp.h +119 -0
- data/ext/src/ichierr.h +124 -0
- data/ext/src/ichiisot.c +101 -0
- data/ext/src/ichilnct.c +286 -0
- data/ext/src/ichimain.h +132 -0
- data/ext/src/ichimak2.c +1189 -0
- data/ext/src/ichimake.c +3812 -0
- data/ext/src/ichimake.h +205 -0
- data/ext/src/ichimap1.c +851 -0
- data/ext/src/ichimap2.c +2856 -0
- data/ext/src/ichimap4.c +1609 -0
- data/ext/src/ichinorm.c +741 -0
- data/ext/src/ichinorm.h +67 -0
- data/ext/src/ichiparm.c +45 -0
- data/ext/src/ichiparm.h +1441 -0
- data/ext/src/ichiprt1.c +3612 -0
- data/ext/src/ichiprt2.c +1511 -0
- data/ext/src/ichiprt3.c +3011 -0
- data/ext/src/ichiqueu.c +1003 -0
- data/ext/src/ichiring.c +326 -0
- data/ext/src/ichiring.h +49 -0
- data/ext/src/ichisize.h +35 -0
- data/ext/src/ichisort.c +539 -0
- data/ext/src/ichister.c +3538 -0
- data/ext/src/ichister.h +35 -0
- data/ext/src/ichitaut.c +3843 -0
- data/ext/src/ichitaut.h +387 -0
- data/ext/src/ichitime.h +74 -0
- data/ext/src/inchi_api.h +670 -0
- data/ext/src/inchi_dll.c +1480 -0
- data/ext/src/inchi_dll.h +34 -0
- data/ext/src/inchi_dll_main.c +23 -0
- data/ext/src/inchi_dll_main.h +31 -0
- data/ext/src/inpdef.h +328 -0
- data/ext/src/lreadmol.h +1246 -0
- data/ext/src/mode.h +706 -0
- data/ext/src/ruby_inchi_main.c +558 -0
- data/ext/src/runichi.c +4179 -0
- data/ext/src/strutil.c +3861 -0
- data/ext/src/strutil.h +182 -0
- data/ext/src/util.c +1130 -0
- data/ext/src/util.h +85 -0
- data/lib/clean_tempfile.rb +220 -0
- data/lib/rino.rb +111 -0
- data/test/test.rb +386 -0
- metadata +130 -0
@@ -0,0 +1,649 @@
|
|
1
|
+
/*
|
2
|
+
* International Union of Pure and Applied Chemistry (IUPAC)
|
3
|
+
* International Chemical Identifier (InChI)
|
4
|
+
* Version 1
|
5
|
+
* Software version 1.00
|
6
|
+
* April 13, 2005
|
7
|
+
* Developed at NIST
|
8
|
+
*/
|
9
|
+
|
10
|
+
#include <stdio.h>
|
11
|
+
#include <stdlib.h>
|
12
|
+
#include <string.h>
|
13
|
+
#include <ctype.h>
|
14
|
+
#include <stdarg.h>
|
15
|
+
#include <errno.h>
|
16
|
+
#include <limits.h>
|
17
|
+
#include <float.h>
|
18
|
+
#include <math.h>
|
19
|
+
|
20
|
+
#include "e_mode.h"
|
21
|
+
#include "inchi_api.h"
|
22
|
+
#include "e_ctl_data.h"
|
23
|
+
|
24
|
+
#include "e_comdef.h"
|
25
|
+
#include "e_util.h"
|
26
|
+
#include "e_ichicomp.h"
|
27
|
+
|
28
|
+
#include "e_readmol.h"
|
29
|
+
#include "e_inpdef.h"
|
30
|
+
|
31
|
+
#if( ADD_CMLPP == 1 )
|
32
|
+
#include "e_readcml.hpp"
|
33
|
+
#endif
|
34
|
+
|
35
|
+
#include "e_inchi_atom.h"
|
36
|
+
|
37
|
+
|
38
|
+
#define MIN_STDATA_X_COORD 0.0
|
39
|
+
#define MAX_STDATA_X_COORD 256.0
|
40
|
+
#define MIN_STDATA_Y_COORD 0.0
|
41
|
+
#define MAX_STDATA_Y_COORD 256.0
|
42
|
+
#define MIN_STDATA_Z_COORD 0.0
|
43
|
+
#define MAX_STDATA_Z_COORD 256.0
|
44
|
+
#define MAX_STDATA_AVE_BOND_LENGTH 20.0
|
45
|
+
#define MIN_STDATA_AVE_BOND_LENGTH 10.0
|
46
|
+
|
47
|
+
|
48
|
+
/* local prototypes */
|
49
|
+
inchi_Atom* mol_to_inchi_Atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inchi_Atom* at_inp,
|
50
|
+
int bDoNotAddH, int *err, char *pStrErr );
|
51
|
+
int mol_to_inchi_Atom_xyz( MOL_DATA* mol_data, int num_atoms, inchi_Atom* at, int *err, char *pStrErr );
|
52
|
+
|
53
|
+
int MolfileToInchi_Atom( FILE *inp_molfile, int bDoNotAddH, inchi_Atom **at, int max_num_at,
|
54
|
+
int *num_dimensions, int *num_bonds, const char *pSdfLabel, char *pSdfValue,
|
55
|
+
long *Id, long *lMolfileNumber, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr );
|
56
|
+
long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr );
|
57
|
+
|
58
|
+
|
59
|
+
/* too long ave. bond length prevents structure from displaying */
|
60
|
+
/* According to Steve, a standard bond length is 10. 9-24-97 DCh */
|
61
|
+
/* Ave. bond length in MainLib is 20. Also fixed Average */
|
62
|
+
/* bond length calculation by introducing num_avg_bonds */
|
63
|
+
/* in mol_to_stdata(). 12-9-99 DCh. */
|
64
|
+
|
65
|
+
/******************************************************************************************************/
|
66
|
+
inchi_Atom* mol_to_inchi_Atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inchi_Atom* at_inp,
|
67
|
+
int bDoNotAddH, int *err, char *pStrErr )
|
68
|
+
{
|
69
|
+
inchi_Atom *at = NULL;
|
70
|
+
/* char *bond_stereo = NULL; */
|
71
|
+
AT_NUM *p1, *p2;
|
72
|
+
int i, a1, a2, n1, n2, bonds;
|
73
|
+
S_CHAR cBondStereo, cBondType;
|
74
|
+
S_CHAR cStereo1, cStereo2;
|
75
|
+
|
76
|
+
*err = 0;
|
77
|
+
*num_atoms = *num_bonds = 0;
|
78
|
+
/* check if MOLfile contains atoms */
|
79
|
+
if ( !mol_data || !mol_data->ctab.MolAtom ||
|
80
|
+
0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
|
81
|
+
0 >= (*num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
|
82
|
+
/* MOLFILE_ERR_SET (*err, 0, "Empty structure"); */
|
83
|
+
goto exit_function; /* no structure */
|
84
|
+
}
|
85
|
+
/* allocate memory if necessary */
|
86
|
+
if ( at_inp ) {
|
87
|
+
at = at_inp;
|
88
|
+
} else
|
89
|
+
if ( !(at = e_CreateInchi_Atom( *num_atoms ) ) ) {
|
90
|
+
*err = -1;
|
91
|
+
MOLFILE_ERR_FIN (*err, -1, exit_function, "Out of RAM");
|
92
|
+
}
|
93
|
+
|
94
|
+
/* copy atom info */
|
95
|
+
for ( i = 0; i < *num_atoms; i ++ ) {
|
96
|
+
e_mystrncpy( at[i].elname, mol_data->ctab.MolAtom[i].szAtomSymbol, sizeof(at->elname) );
|
97
|
+
/* at[i].chem_bonds_valence = mol_data->ctab.MolAtom[i].cValence; */ /* MOLfile valence; will change */
|
98
|
+
at[i].isotopic_mass = mol_data->ctab.MolAtom[i].cMassDifference;
|
99
|
+
at[i].charge = mol_data->ctab.MolAtom[i].cCharge;
|
100
|
+
at[i].radical = mol_data->ctab.MolAtom[i].cRadical;
|
101
|
+
/* coordinates are copied in mol_to_inchi_Atom_xyz() */
|
102
|
+
#if( SINGLET_IS_TRIPLET == 1 )
|
103
|
+
if ( at[i].radical == RADICAL_SINGLET ) {
|
104
|
+
at[i].radical = RADICAL_TRIPLET;
|
105
|
+
}
|
106
|
+
#endif
|
107
|
+
/* removed parsing at[i].elname to extract H, charge, radical from the
|
108
|
+
Molfile alias record now this is done in the INChI dll */
|
109
|
+
}
|
110
|
+
|
111
|
+
/* copy bond info */
|
112
|
+
for ( i = 0, bonds = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
|
113
|
+
cBondStereo = mol_data->ctab.MolBond[i].cBondStereo;
|
114
|
+
cBondType = mol_data->ctab.MolBond[i].cBondType;
|
115
|
+
a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
|
116
|
+
a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
|
117
|
+
|
118
|
+
if ( a1 < 0 || a1 >= *num_atoms ||
|
119
|
+
a2 < 0 || a2 >= *num_atoms ||
|
120
|
+
a1 == a2 ) {
|
121
|
+
*err |= 1; /* bond for impossible atom number(s); ignored */
|
122
|
+
MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
|
123
|
+
continue;
|
124
|
+
}
|
125
|
+
/* check for multiple bonds between same atoms */
|
126
|
+
p1 = e_is_in_the_slist( at[a1].neighbor, (AT_NUM)a2, at[a1].num_bonds );
|
127
|
+
p2 = e_is_in_the_slist( at[a2].neighbor, (AT_NUM)a1, at[a2].num_bonds );
|
128
|
+
if ( (p1 || p2) && (p1 || at[a1].num_bonds < MAXVAL) && (p2 || at[a2].num_bonds < MAXVAL) ) {
|
129
|
+
n1 = p1? (p1 - at[a1].neighbor) : at[a1].num_bonds ++;
|
130
|
+
n2 = p2? (p2 - at[a2].neighbor) : at[a2].num_bonds ++;
|
131
|
+
MOLFILE_ERR_SET (*err, 0, "Multiple bonds between two atoms");
|
132
|
+
*err |= 2; /* multiple bonds between atoms */
|
133
|
+
} else
|
134
|
+
if ( !p1 && !p2 && at[a1].num_bonds < MAXVAL && at[a2].num_bonds < MAXVAL ) {
|
135
|
+
n1 = at[a1].num_bonds ++;
|
136
|
+
n2 = at[a2].num_bonds ++;
|
137
|
+
bonds ++;
|
138
|
+
} else {
|
139
|
+
char szMsg[64];
|
140
|
+
*err |= 4; /* too large number of bonds. Some bonds ignored. */
|
141
|
+
sprintf( szMsg, "Atom '%s' has more than %d bonds",
|
142
|
+
at[a1].num_bonds>= MAXVAL? at[a1].elname:at[a2].elname, MAXVAL );
|
143
|
+
MOLFILE_ERR_SET (*err, 0, szMsg);
|
144
|
+
continue;
|
145
|
+
}
|
146
|
+
if ( cBondType < MIN_INPUT_BOND_TYPE || cBondType > MAX_INPUT_BOND_TYPE ) {
|
147
|
+
char szBondType[16];
|
148
|
+
sprintf( szBondType, "%d", cBondType );
|
149
|
+
cBondType = 1;
|
150
|
+
MOLFILE_ERR_SET (*err, 0, "Unrecognized bond type:");
|
151
|
+
MOLFILE_ERR_SET (*err, 0, szBondType);
|
152
|
+
*err |= 8; /* Unrecognized Bond type replaced with single bond */
|
153
|
+
}
|
154
|
+
/* bond type */
|
155
|
+
at[a1].bond_type[n1] =
|
156
|
+
at[a2].bond_type[n2] = cBondType;
|
157
|
+
/* connection */
|
158
|
+
at[a1].neighbor[n1] = (AT_NUM)a2;
|
159
|
+
at[a2].neighbor[n2] = (AT_NUM)a1;
|
160
|
+
/* stereo */
|
161
|
+
switch ( cBondStereo ) {
|
162
|
+
case INPUT_STEREO_DBLE_EITHER: /* 3 */
|
163
|
+
cStereo1 = INCHI_BOND_STEREO_DOUBLE_EITHER;
|
164
|
+
cStereo2 = INCHI_BOND_STEREO_DOUBLE_EITHER;
|
165
|
+
break;
|
166
|
+
case INPUT_STEREO_SNGL_UP: /* 1 */
|
167
|
+
cStereo1 = INCHI_BOND_STEREO_SINGLE_1UP;
|
168
|
+
cStereo2 = INCHI_BOND_STEREO_SINGLE_2UP;
|
169
|
+
break;
|
170
|
+
case INPUT_STEREO_SNGL_EITHER: /* 4 */
|
171
|
+
cStereo1 = INCHI_BOND_STEREO_SINGLE_1EITHER;
|
172
|
+
cStereo2 = INCHI_BOND_STEREO_SINGLE_2EITHER;
|
173
|
+
break;
|
174
|
+
case INPUT_STEREO_SNGL_DOWN: /* 6 */
|
175
|
+
cStereo1 = INCHI_BOND_STEREO_SINGLE_1DOWN;
|
176
|
+
cStereo2 = INCHI_BOND_STEREO_SINGLE_2DOWN;
|
177
|
+
break;
|
178
|
+
case 0:
|
179
|
+
cStereo1 = INCHI_BOND_STEREO_NONE;
|
180
|
+
cStereo2 = INCHI_BOND_STEREO_NONE;
|
181
|
+
break;
|
182
|
+
default:
|
183
|
+
*err |= 16; /* Ignored unrecognized Bond stereo */
|
184
|
+
MOLFILE_ERR_SET (*err, 0, "Unrecognized bond stereo");
|
185
|
+
continue;
|
186
|
+
}
|
187
|
+
at[a1].bond_stereo[n1] = cStereo1; /* >0: the wedge (pointed) end is at this atom */
|
188
|
+
at[a2].bond_stereo[n2] = cStereo2; /* <0: the wedge (pointed) end is at the opposite atom */
|
189
|
+
}
|
190
|
+
*num_bonds = bonds;
|
191
|
+
|
192
|
+
/* special Molfile valences */
|
193
|
+
for ( a1 = 0; a1 < *num_atoms; a1 ++ ) {
|
194
|
+
int num_bond_type[MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE + 1], bond_type;
|
195
|
+
int chem_bonds_valence, valence;
|
196
|
+
if ( mol_data->ctab.MolAtom[a1].cValence &&
|
197
|
+
(mol_data->ctab.MolAtom[a1].cValence != 15 || at[a1].num_bonds) ) {
|
198
|
+
/* Molfile contains special valence => calculate number of H */
|
199
|
+
memset( num_bond_type, 0, sizeof(num_bond_type) );
|
200
|
+
valence = mol_data->ctab.MolAtom[a1].cValence; /* save atom valence if available */
|
201
|
+
for ( n1 = 0; n1 < at[a1].num_bonds; n1 ++ ) {
|
202
|
+
bond_type = at[a1].bond_type[n1] - MIN_INPUT_BOND_TYPE;
|
203
|
+
if ( bond_type < 0 || bond_type > MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE ) {
|
204
|
+
bond_type = 0;
|
205
|
+
MOLFILE_ERR_SET (*err, 0, "Unknown bond type in MOLfile assigned as a single bond");
|
206
|
+
}
|
207
|
+
num_bond_type[ bond_type ] ++;
|
208
|
+
}
|
209
|
+
chem_bonds_valence = 0;
|
210
|
+
for ( n1 = 0; MIN_INPUT_BOND_TYPE + n1 <= 3 && MIN_INPUT_BOND_TYPE + n1 <= MAX_INPUT_BOND_TYPE; n1 ++ ) {
|
211
|
+
chem_bonds_valence += (MIN_INPUT_BOND_TYPE + n1) * num_bond_type[n1];
|
212
|
+
}
|
213
|
+
if ( MIN_INPUT_BOND_TYPE <= INCHI_BOND_TYPE_ALTERN && INCHI_BOND_TYPE_ALTERN <= MAX_INPUT_BOND_TYPE &&
|
214
|
+
( n2 = num_bond_type[INCHI_BOND_TYPE_ALTERN-MIN_INPUT_BOND_TYPE] ) ) {
|
215
|
+
/* accept input aromatic bonds for now */
|
216
|
+
switch ( n2 ) {
|
217
|
+
case 2:
|
218
|
+
chem_bonds_valence += 3; /* =A- */
|
219
|
+
break;
|
220
|
+
case 3:
|
221
|
+
chem_bonds_valence += 4; /* =A< */
|
222
|
+
break;
|
223
|
+
default:
|
224
|
+
/* if 1 or >= 4 aromatic bonds then replace such bonds with single bonds */
|
225
|
+
for ( n1 = 0; n1 < at[a1].num_bonds; n1 ++ ) {
|
226
|
+
if ( at[a1].bond_type[n1] == INCHI_BOND_TYPE_ALTERN ) {
|
227
|
+
a2 = at[a1].neighbor[n1];
|
228
|
+
p1 = e_is_in_the_slist( at[a2].neighbor, (AT_NUM)a1, at[a2].num_bonds );
|
229
|
+
if ( p1 ) {
|
230
|
+
at[a1].bond_type[n1] =
|
231
|
+
at[a2].bond_type[p1-at[a2].neighbor] = INCHI_BOND_TYPE_SINGLE;
|
232
|
+
} else {
|
233
|
+
*err = -2; /* Program error */
|
234
|
+
MOLFILE_ERR_SET (*err, 0, "Program error interpreting MOLfile");
|
235
|
+
goto exit_function; /* no structure */
|
236
|
+
}
|
237
|
+
}
|
238
|
+
}
|
239
|
+
chem_bonds_valence += n2;
|
240
|
+
*err |= 32;
|
241
|
+
MOLFILE_ERR_SET (*err, 0, "Atom has more than 3 aromatic bonds");
|
242
|
+
break;
|
243
|
+
}
|
244
|
+
}
|
245
|
+
/*************************************************************************************
|
246
|
+
*
|
247
|
+
* Set number of hydrogen atoms
|
248
|
+
*/
|
249
|
+
if ( valence >= chem_bonds_valence ) {
|
250
|
+
at[a1].num_iso_H[0] = valence - chem_bonds_valence;
|
251
|
+
}
|
252
|
+
} else
|
253
|
+
if ( mol_data->ctab.MolAtom[a1].cAtomAliasedFlag ) {
|
254
|
+
at[a1].num_iso_H[0] = 0;
|
255
|
+
} else
|
256
|
+
if ( mol_data->ctab.MolAtom[a1].cValence == 15 && !at[a1].num_bonds ) {
|
257
|
+
at[a1].num_iso_H[0] = 0;
|
258
|
+
} else
|
259
|
+
if ( !bDoNotAddH ) {
|
260
|
+
at[a1].num_iso_H[0] = -1;
|
261
|
+
}
|
262
|
+
}
|
263
|
+
|
264
|
+
exit_function:;
|
265
|
+
return at;
|
266
|
+
}
|
267
|
+
/******************************************************************************************************/
|
268
|
+
int mol_to_inchi_Atom_xyz( MOL_DATA* mol_data, int num_atoms, inchi_Atom* at, int *err, char *pStrErr )
|
269
|
+
{
|
270
|
+
int i, num_dimensions=0;
|
271
|
+
int num_bonds;
|
272
|
+
double max_x=-1.0e32, max_y=-1.0e32, max_z=-1.0e32;
|
273
|
+
double min_x= 1.0e32, min_y= 1.0e32, min_z= 1.0e32;
|
274
|
+
double macheps = 1.0e-10, small_coeff = 0.00001;
|
275
|
+
double x_coeff, y_coeff, z_coeff, coeff, average_bond_length;
|
276
|
+
|
277
|
+
/* *err = 0; */
|
278
|
+
/* check if MOLfile contains atoms */
|
279
|
+
if ( !mol_data || !mol_data->ctab.MolAtom ||
|
280
|
+
0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
|
281
|
+
0 >= (num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
|
282
|
+
goto exit_function; /* no structure */
|
283
|
+
}
|
284
|
+
/* copy atom info */
|
285
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
286
|
+
max_x = inchi_max(mol_data->ctab.MolAtom[i].fX, max_x);
|
287
|
+
min_x = inchi_min(mol_data->ctab.MolAtom[i].fX, min_x);
|
288
|
+
max_y = inchi_max(mol_data->ctab.MolAtom[i].fY, max_y);
|
289
|
+
min_y = inchi_min(mol_data->ctab.MolAtom[i].fY, min_y);
|
290
|
+
max_z = inchi_max(mol_data->ctab.MolAtom[i].fZ, max_z);
|
291
|
+
min_z = inchi_min(mol_data->ctab.MolAtom[i].fZ, min_z);
|
292
|
+
}
|
293
|
+
|
294
|
+
/* copy bond info */
|
295
|
+
num_bonds = 0;
|
296
|
+
average_bond_length = 0.0;
|
297
|
+
for ( i = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
|
298
|
+
int a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
|
299
|
+
int a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
|
300
|
+
double dx = mol_data->ctab.MolAtom[a1].fX-mol_data->ctab.MolAtom[a2].fX;
|
301
|
+
double dy = mol_data->ctab.MolAtom[a1].fY-mol_data->ctab.MolAtom[a2].fY;
|
302
|
+
double dz = mol_data->ctab.MolAtom[a1].fZ-mol_data->ctab.MolAtom[a2].fZ;
|
303
|
+
|
304
|
+
if ( a1 < 0 || a1 >= num_atoms ||
|
305
|
+
a2 < 0 || a2 >= num_atoms ||
|
306
|
+
a1 == a2 ) {
|
307
|
+
*err |= 1; /* bond for impossible atom number(s); ignored */
|
308
|
+
MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
|
309
|
+
continue;
|
310
|
+
}
|
311
|
+
average_bond_length += sqrt( dx*dx + dy*dy + dz*dz );
|
312
|
+
num_bonds ++;
|
313
|
+
}
|
314
|
+
|
315
|
+
/* convert to integral coordinates */
|
316
|
+
|
317
|
+
if ( max_x - min_x <= small_coeff*(fabs(max_x) + fabs(min_x)) )
|
318
|
+
x_coeff = 0.0;
|
319
|
+
else
|
320
|
+
x_coeff = (MAX_STDATA_X_COORD - MIN_STDATA_X_COORD)/(max_x - min_x);
|
321
|
+
|
322
|
+
if ( max_y - min_y <= small_coeff*(fabs(max_y) + fabs(min_y)) )
|
323
|
+
y_coeff = 0.0;
|
324
|
+
else
|
325
|
+
y_coeff = (MAX_STDATA_Y_COORD - MIN_STDATA_Y_COORD)/(max_y - min_y);
|
326
|
+
if ( max_z - min_z <= small_coeff*(fabs(max_z) + fabs(min_z)) )
|
327
|
+
z_coeff = 0.0;
|
328
|
+
else
|
329
|
+
z_coeff = (MAX_STDATA_Z_COORD - MIN_STDATA_Z_COORD)/(max_z - min_z);
|
330
|
+
|
331
|
+
num_dimensions = ((x_coeff > macheps || y_coeff >macheps ) && fabs(z_coeff) < macheps)? 2:
|
332
|
+
(fabs(z_coeff) > macheps)? 3: 0;
|
333
|
+
|
334
|
+
switch ( num_dimensions ) {
|
335
|
+
case 0:
|
336
|
+
coeff = 0.0;
|
337
|
+
break;
|
338
|
+
case 2:
|
339
|
+
/* choose the smallest stretching coefficient */
|
340
|
+
if ( x_coeff > macheps && y_coeff > macheps ) {
|
341
|
+
coeff = inchi_min( x_coeff, y_coeff );
|
342
|
+
}else
|
343
|
+
if ( x_coeff > macheps ){
|
344
|
+
coeff = x_coeff;
|
345
|
+
}else
|
346
|
+
if ( y_coeff > macheps ){
|
347
|
+
coeff = y_coeff;
|
348
|
+
}else{
|
349
|
+
coeff = 1.0;
|
350
|
+
}
|
351
|
+
break;
|
352
|
+
case 3:
|
353
|
+
/* choose the smallest stretching coefficient */
|
354
|
+
if ( x_coeff > macheps && y_coeff > macheps ) {
|
355
|
+
coeff = inchi_min( x_coeff, y_coeff );
|
356
|
+
coeff = inchi_min( coeff, z_coeff );
|
357
|
+
}else
|
358
|
+
if ( x_coeff > macheps ){
|
359
|
+
coeff = inchi_min( x_coeff, z_coeff );
|
360
|
+
}else
|
361
|
+
if ( y_coeff > macheps ){
|
362
|
+
coeff = inchi_min( y_coeff, z_coeff );
|
363
|
+
}else{
|
364
|
+
coeff = z_coeff;
|
365
|
+
}
|
366
|
+
break;
|
367
|
+
default:
|
368
|
+
coeff = 0.0;
|
369
|
+
}
|
370
|
+
|
371
|
+
if ( num_bonds > 0 ) {
|
372
|
+
average_bond_length /= (double)num_bonds;
|
373
|
+
if ( average_bond_length * coeff > MAX_STDATA_AVE_BOND_LENGTH ) {
|
374
|
+
coeff = MAX_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too long bonds */
|
375
|
+
} else
|
376
|
+
if ( average_bond_length * coeff < macheps ) {
|
377
|
+
coeff = 1.0; /* all lengths are of zero length */
|
378
|
+
} else
|
379
|
+
if ( average_bond_length * coeff < MIN_STDATA_AVE_BOND_LENGTH ) {
|
380
|
+
coeff = MIN_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too short bonds */
|
381
|
+
}
|
382
|
+
}
|
383
|
+
#if( NORMALIZE_INP_COORD == 1 )
|
384
|
+
/* set integral coordinates */
|
385
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
386
|
+
double x = mol_data->ctab.MolAtom[i].fX;
|
387
|
+
double y = mol_data->ctab.MolAtom[i].fY;
|
388
|
+
double z = mol_data->ctab.MolAtom[i].fZ;
|
389
|
+
x = (x - min_x)*coeff + MIN_STDATA_X_COORD;
|
390
|
+
y = (y - min_y)*coeff + MIN_STDATA_Y_COORD;
|
391
|
+
z = (z - min_z)*coeff + MIN_STDATA_Z_COORD;
|
392
|
+
/* floor() behavior is not well defined for negative arguments.
|
393
|
+
* Use positive arguments only to get nearest integer.
|
394
|
+
*/
|
395
|
+
at[i].x = ( x >= 0.0 )? (int)floor( x + 0.5 ) : -(int)floor( -x + 0.5 );
|
396
|
+
at[i].y = ( y >= 0.0 )? (int)floor( y + 0.5 ) : -(int)floor( -y + 0.5 );
|
397
|
+
at[i].z = ( z >= 0.0 )? (int)floor( z + 0.5 ) : -(int)floor( -z + 0.5 );
|
398
|
+
}
|
399
|
+
#else
|
400
|
+
/* set input coordinates */
|
401
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
402
|
+
double x = mol_data->ctab.MolAtom[i].fX;
|
403
|
+
double y = mol_data->ctab.MolAtom[i].fY;
|
404
|
+
double z = mol_data->ctab.MolAtom[i].fZ;
|
405
|
+
at[i].x = x;
|
406
|
+
at[i].y = y;
|
407
|
+
at[i].z = z;
|
408
|
+
}
|
409
|
+
#endif
|
410
|
+
|
411
|
+
exit_function:;
|
412
|
+
return num_dimensions;
|
413
|
+
}
|
414
|
+
/****************************************************************************/
|
415
|
+
long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr )
|
416
|
+
{
|
417
|
+
static char sStruct[] = "Structure #";
|
418
|
+
static char sINCHI[] = INCHI_NAME;
|
419
|
+
long lMolfileNumber = 0;
|
420
|
+
char *p, *q = NULL;
|
421
|
+
if ( pHdr ) {
|
422
|
+
if ( !memicmp( pHdr->szMoleculeName, sStruct, sizeof(sStruct)-1 ) ) {
|
423
|
+
p = pHdr->szMoleculeName + sizeof(sStruct)-1;
|
424
|
+
lMolfileNumber = strtol( p, &q, 10 );
|
425
|
+
p = pHdr->szMoleculeLine2;
|
426
|
+
if ( !q || *q ||
|
427
|
+
memicmp( p, sINCHI, sizeof(sINCHI)-1) ||
|
428
|
+
!strstr( p+sizeof(sINCHI)-1, "SDfile Output" ) ) {
|
429
|
+
lMolfileNumber = 0;
|
430
|
+
}
|
431
|
+
}
|
432
|
+
}
|
433
|
+
return lMolfileNumber;
|
434
|
+
}
|
435
|
+
|
436
|
+
/****************************************************************************/
|
437
|
+
int MolfileToInchi_Atom( FILE *inp_molfile, int bDoNotAddH, inchi_Atom **at, int max_num_at,
|
438
|
+
int *num_dimensions, int *num_bonds, const char *pSdfLabel, char *pSdfValue,
|
439
|
+
long *Id, long *lMolfileNumber, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
|
440
|
+
{
|
441
|
+
int num_atoms = 0;
|
442
|
+
MOL_DATA *mol_data = NULL;
|
443
|
+
MOL_HEADER_BLOCK OnlyHeaderBlock, *pOnlyHeaderBlock = NULL, *pHdr;
|
444
|
+
MOL_CTAB OnlyCtab, *pOnlyCtab = NULL;
|
445
|
+
char cSdfValueFirstChar = '\0';
|
446
|
+
#ifdef CML_DEBUG
|
447
|
+
FILE *f_p;
|
448
|
+
#endif
|
449
|
+
if ( at ) {
|
450
|
+
pOnlyHeaderBlock = NULL;
|
451
|
+
if ( *at && max_num_at ) {
|
452
|
+
memset( *at, 0, max_num_at * sizeof(**at) );
|
453
|
+
}
|
454
|
+
} else {
|
455
|
+
pOnlyHeaderBlock = &OnlyHeaderBlock;
|
456
|
+
pOnlyCtab = &OnlyCtab;
|
457
|
+
}
|
458
|
+
if ( pSdfValue ) {
|
459
|
+
cSdfValueFirstChar = pSdfValue[0];
|
460
|
+
pSdfValue[0] = '\0';
|
461
|
+
}
|
462
|
+
|
463
|
+
mol_data = e_read_sdfile_segment(inp_molfile, pOnlyHeaderBlock, pOnlyCtab, 0, NULL, 0, Id, pSdfLabel, pSdfValue, err, pStrErr );
|
464
|
+
|
465
|
+
pHdr = ( mol_data && !pOnlyHeaderBlock )? &mol_data->hdr :
|
466
|
+
( !mol_data && pOnlyHeaderBlock )? pOnlyHeaderBlock : NULL;
|
467
|
+
if ( lMolfileNumber && pHdr ) {
|
468
|
+
*lMolfileNumber = GetMolfileNumber( pHdr );
|
469
|
+
}
|
470
|
+
if ( pSdfValue &&
|
471
|
+
pSdfLabel && pSdfLabel[0] && pHdr ) {
|
472
|
+
if ( !stricmp(pSdfLabel, "MOLFILENAME") ) {
|
473
|
+
e_mystrncpy( pSdfValue, pHdr->szMoleculeName, MAX_SDF_VALUE+1 );
|
474
|
+
e_LtrimRtrim( pSdfValue, NULL );
|
475
|
+
} else
|
476
|
+
if ( !stricmp(pSdfLabel, "MOLFILELINE2") ) {
|
477
|
+
e_mystrncpy( pSdfValue, pHdr->szMoleculeLine2, MAX_SDF_VALUE+1 );
|
478
|
+
e_LtrimRtrim( pSdfValue, NULL );
|
479
|
+
} else
|
480
|
+
if ( !stricmp(pSdfLabel, "MOLFILECOMMENT") ) {
|
481
|
+
e_mystrncpy( pSdfValue, pHdr->szComment, MAX_SDF_VALUE+1 );
|
482
|
+
e_LtrimRtrim( pSdfValue, NULL );
|
483
|
+
}
|
484
|
+
if ( !pSdfValue[0] ) {
|
485
|
+
pSdfValue[0] = cSdfValueFirstChar;
|
486
|
+
}
|
487
|
+
}
|
488
|
+
|
489
|
+
if ( mol_data && at && !*err ) {
|
490
|
+
/* *at points to an allocated memory */
|
491
|
+
if ( *at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
|
492
|
+
*at = mol_to_inchi_Atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
|
493
|
+
if ( *err >= 0 ) {
|
494
|
+
*num_dimensions = mol_to_inchi_Atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
|
495
|
+
}
|
496
|
+
} else
|
497
|
+
/* *at points to NULL */
|
498
|
+
if ( !*at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
|
499
|
+
*at = mol_to_inchi_Atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
|
500
|
+
if ( *err >= 0 ) {
|
501
|
+
*num_dimensions = mol_to_inchi_Atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
|
502
|
+
}
|
503
|
+
} else {
|
504
|
+
MOLFILE_ERR_SET (*err, 0, "Too many atoms");
|
505
|
+
*err = 70;
|
506
|
+
num_atoms = -1;
|
507
|
+
}
|
508
|
+
if ( *err > 0 ) {
|
509
|
+
*err += 100;
|
510
|
+
}
|
511
|
+
/* 11-16-2004: use Chiral flag */
|
512
|
+
if ( num_atoms > 0 && at && *at && mol_data && pInpAtomFlags ) {
|
513
|
+
if ( mol_data->ctab.cChiralFlag ) {
|
514
|
+
*pInpAtomFlags |= FLAG_INP_AT_CHIRAL;
|
515
|
+
} else {
|
516
|
+
*pInpAtomFlags |= FLAG_INP_AT_NONCHIRAL;
|
517
|
+
}
|
518
|
+
}
|
519
|
+
} else
|
520
|
+
if ( !at ) {
|
521
|
+
num_atoms = pOnlyCtab->nNumberOfAtoms;
|
522
|
+
}
|
523
|
+
|
524
|
+
if ( !pOnlyHeaderBlock ) {
|
525
|
+
e_delete_mol_data( mol_data );
|
526
|
+
}
|
527
|
+
#ifdef CML_DEBUG
|
528
|
+
puts ("MOL");
|
529
|
+
f_p = fopen ("mol.dbg", "a");
|
530
|
+
if (f_p)
|
531
|
+
{
|
532
|
+
PrintInpAtom (f_p, *at, num_atoms);
|
533
|
+
fclose (f_p);
|
534
|
+
}
|
535
|
+
else
|
536
|
+
{
|
537
|
+
puts ("Couldn't open file");
|
538
|
+
}
|
539
|
+
#endif
|
540
|
+
|
541
|
+
return num_atoms;
|
542
|
+
}
|
543
|
+
/**********************************************************************************/
|
544
|
+
int e_MolfileToInchi_Input( FILE *inp_molfile, inchi_Input *orig_at_data, int bMergeAllInputStructures,
|
545
|
+
int bDoNotAddH, int bAllowEmptyStructure,
|
546
|
+
const char *pSdfLabel, char *pSdfValue, long *lSdfId, long *lMolfileNumber,
|
547
|
+
INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
|
548
|
+
{
|
549
|
+
/* inp_ATOM *at = NULL; */
|
550
|
+
int num_dimensions_new;
|
551
|
+
int num_inp_bonds_new;
|
552
|
+
int num_inp_atoms_new;
|
553
|
+
inchi_Atom *at_new = NULL;
|
554
|
+
inchi_Atom *at_old = NULL;
|
555
|
+
int nNumAtoms = 0;
|
556
|
+
int i, j;
|
557
|
+
|
558
|
+
if ( pStrErr ) {
|
559
|
+
pStrErr[0] = '\0';
|
560
|
+
}
|
561
|
+
|
562
|
+
/*FreeOrigAtData( orig_at_data );*/
|
563
|
+
|
564
|
+
do {
|
565
|
+
|
566
|
+
at_old = orig_at_data? orig_at_data->atom : NULL; /* save pointer to the previous allocation */
|
567
|
+
num_inp_atoms_new =
|
568
|
+
MolfileToInchi_Atom( inp_molfile, bDoNotAddH, orig_at_data? &at_new:NULL, MAX_ATOMS,
|
569
|
+
&num_dimensions_new, &num_inp_bonds_new,
|
570
|
+
pSdfLabel, pSdfValue, lSdfId, lMolfileNumber, pInpAtomFlags, err, pStrErr );
|
571
|
+
|
572
|
+
|
573
|
+
if ( num_inp_atoms_new <= 0 && !*err ) {
|
574
|
+
if ( !bAllowEmptyStructure ) {
|
575
|
+
MOLFILE_ERR_SET (*err, 0, "Empty structure"); /* the message will be issued by the InChI library */
|
576
|
+
}
|
577
|
+
*err = 98;
|
578
|
+
} else
|
579
|
+
if ( orig_at_data && !num_inp_atoms_new && 10 < *err && *err < 20 && orig_at_data->num_atoms > 0 && bMergeAllInputStructures ) {
|
580
|
+
*err = 0; /* end of file */
|
581
|
+
break;
|
582
|
+
} else
|
583
|
+
if ( num_inp_atoms_new > 0 && orig_at_data ) {
|
584
|
+
/* merge pOrigDataTmp + orig_at_data => pOrigDataTmp; */
|
585
|
+
nNumAtoms = num_inp_atoms_new + orig_at_data->num_atoms;
|
586
|
+
if ( nNumAtoms >= MAX_ATOMS ) {
|
587
|
+
MOLFILE_ERR_SET (*err, 0, "Too many atoms");
|
588
|
+
*err = 70;
|
589
|
+
orig_at_data->num_atoms = -1;
|
590
|
+
} else
|
591
|
+
if ( !at_old ) {
|
592
|
+
/* the first structure */
|
593
|
+
orig_at_data->atom = at_new;
|
594
|
+
at_new = NULL;
|
595
|
+
orig_at_data->num_atoms = num_inp_atoms_new;
|
596
|
+
} else
|
597
|
+
if ( orig_at_data->atom = e_CreateInchi_Atom( nNumAtoms ) ) {
|
598
|
+
/* switch at_new <--> orig_at_data->at; */
|
599
|
+
if ( orig_at_data->num_atoms ) {
|
600
|
+
memcpy( orig_at_data->atom, at_old, orig_at_data->num_atoms * sizeof(orig_at_data->atom[0]) );
|
601
|
+
/* adjust numbering in the newly read structure */
|
602
|
+
for ( i = 0; i < num_inp_atoms_new; i ++ ) {
|
603
|
+
for ( j = 0; j < at_new[i].num_bonds; j ++ ) {
|
604
|
+
at_new[i].neighbor[j] += orig_at_data->num_atoms;
|
605
|
+
}
|
606
|
+
}
|
607
|
+
}
|
608
|
+
e_FreeInchi_Atom( &at_old );
|
609
|
+
/* copy newly read structure */
|
610
|
+
memcpy( orig_at_data->atom + orig_at_data->num_atoms,
|
611
|
+
at_new,
|
612
|
+
num_inp_atoms_new * sizeof(orig_at_data->atom[0]) );
|
613
|
+
/* add other things */
|
614
|
+
orig_at_data->num_atoms += num_inp_atoms_new;
|
615
|
+
} else {
|
616
|
+
MOLFILE_ERR_SET (*err, 0, "Out of RAM");
|
617
|
+
*err = -1;
|
618
|
+
}
|
619
|
+
} else
|
620
|
+
if ( num_inp_atoms_new > 0 ) {
|
621
|
+
nNumAtoms += num_inp_atoms_new;
|
622
|
+
}
|
623
|
+
e_FreeInchi_Atom( &at_new );
|
624
|
+
|
625
|
+
} while ( !*err && bMergeAllInputStructures );
|
626
|
+
/*
|
627
|
+
if ( !*err ) {
|
628
|
+
orig_at_data->num_components =
|
629
|
+
MarkDisconnectedComponents( orig_at_data );
|
630
|
+
if ( orig_at_data->num_components == 0 ) {
|
631
|
+
MOLFILE_ERR_SET (*err, 0, "No components found");
|
632
|
+
*err = 99;
|
633
|
+
}
|
634
|
+
if ( orig_at_data->num_components < 0 ) {
|
635
|
+
MOLFILE_ERR_SET (*err, 0, "Too many components");
|
636
|
+
*err = 99;
|
637
|
+
}
|
638
|
+
}
|
639
|
+
*/
|
640
|
+
e_FreeInchi_Atom( &at_new );
|
641
|
+
if ( *err ) {
|
642
|
+
e_FreeInchi_Input( orig_at_data );
|
643
|
+
}
|
644
|
+
if ( *err && !(10 < *err && *err < 20) && pStrErr && !pStrErr[0] ) {
|
645
|
+
MOLFILE_ERR_SET (*err, 0, "Unknown error"); /* <BRKPT> */
|
646
|
+
}
|
647
|
+
return orig_at_data? orig_at_data->num_atoms : nNumAtoms;
|
648
|
+
}
|
649
|
+
|