rino 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/ext/extconf.rb +1 -24
- data/ext/libinchi.so +0 -0
- data/ext/src/aux2atom.h +120 -39
- data/ext/src/comdef.h +3 -3
- data/ext/src/dispstru.c +2547 -0
- data/ext/src/dispstru.h +73 -0
- data/ext/src/extr_ct.h +5 -2
- data/ext/src/ichi.h +27 -11
- data/ext/src/ichi_bns.c +1800 -254
- data/ext/src/ichi_bns.h +205 -4
- data/ext/src/ichican2.c +197 -86
- data/ext/src/ichicano.c +8 -13
- data/ext/src/ichicano.h +2 -2
- data/ext/src/ichicans.c +11 -6
- data/ext/src/ichicant.h +2 -2
- data/ext/src/ichicomn.h +2 -2
- data/ext/src/ichicomp.h +19 -4
- data/ext/src/ichidrp.h +9 -5
- data/ext/src/ichierr.h +5 -3
- data/ext/src/ichiisot.c +2 -2
- data/ext/src/ichimain.c +461 -0
- data/ext/src/ichimain.h +23 -15
- data/ext/src/ichimak2.c +6 -6
- data/ext/src/ichimake.c +843 -42
- data/ext/src/ichimake.h +4 -2
- data/ext/src/ichimap1.c +5 -5
- data/ext/src/ichimap2.c +2 -2
- data/ext/src/ichimap4.c +34 -21
- data/ext/src/ichinorm.c +11 -5
- data/ext/src/ichinorm.h +3 -2
- data/ext/src/ichiparm.c +2 -2
- data/ext/src/ichiparm.h +232 -30
- data/ext/src/ichiprt1.c +35 -11
- data/ext/src/ichiprt2.c +78 -7
- data/ext/src/ichiprt3.c +300 -120
- data/ext/src/ichiqueu.c +17 -2
- data/ext/src/ichiread.c +6932 -0
- data/ext/src/ichiring.c +3 -2
- data/ext/src/ichiring.h +2 -2
- data/ext/src/ichirvr1.c +4891 -0
- data/ext/src/ichirvr2.c +6344 -0
- data/ext/src/ichirvr3.c +5499 -0
- data/ext/src/ichirvr4.c +3177 -0
- data/ext/src/ichirvr5.c +1166 -0
- data/ext/src/ichirvr6.c +1287 -0
- data/ext/src/ichirvr7.c +2319 -0
- data/ext/src/ichirvrs.h +882 -0
- data/ext/src/ichisize.h +2 -2
- data/ext/src/ichisort.c +5 -5
- data/ext/src/ichister.c +281 -86
- data/ext/src/ichister.h +9 -3
- data/ext/src/ichitaut.c +208 -9
- data/ext/src/ichitaut.h +13 -11
- data/ext/src/ichitime.h +16 -2
- data/ext/src/inchicmp.h +107 -0
- data/ext/src/inpdef.h +6 -3
- data/ext/src/libinchi_wrap.c +912 -0
- data/ext/src/lreadmol.h +34 -31
- data/ext/src/mode.h +244 -7
- data/ext/src/mol2atom.c +1060 -0
- data/ext/src/mol2atom.h +31 -0
- data/ext/src/readinch.c +239 -0
- data/ext/src/readmol.c +28 -0
- data/ext/src/{e_readmol.h → readmol.h} +7 -9
- data/ext/src/runichi.c +251 -177
- data/ext/src/strutil.c +444 -238
- data/ext/src/strutil.h +150 -11
- data/ext/src/util.c +176 -118
- data/ext/src/util.h +15 -3
- data/lib/rino.rb +71 -3
- data/test/test.rb +33 -4
- metadata +22 -34
- data/ext/ruby_inchi_main.so +0 -0
- data/ext/src/e_0dstereo.c +0 -3014
- data/ext/src/e_0dstereo.h +0 -31
- data/ext/src/e_comdef.h +0 -57
- data/ext/src/e_ctl_data.h +0 -147
- data/ext/src/e_ichi_io.c +0 -498
- data/ext/src/e_ichi_io.h +0 -40
- data/ext/src/e_ichi_parms.c +0 -37
- data/ext/src/e_ichi_parms.h +0 -41
- data/ext/src/e_ichicomp.h +0 -50
- data/ext/src/e_ichierr.h +0 -40
- data/ext/src/e_ichimain.c +0 -593
- data/ext/src/e_ichisize.h +0 -43
- data/ext/src/e_inchi_atom.c +0 -75
- data/ext/src/e_inchi_atom.h +0 -33
- data/ext/src/e_inpdef.h +0 -41
- data/ext/src/e_mode.h +0 -706
- data/ext/src/e_mol2atom.c +0 -649
- data/ext/src/e_readinch.c +0 -58
- data/ext/src/e_readmol.c +0 -54
- data/ext/src/e_readstru.c +0 -251
- data/ext/src/e_readstru.h +0 -33
- data/ext/src/e_util.c +0 -284
- data/ext/src/e_util.h +0 -61
- data/ext/src/ichilnct.c +0 -286
- data/ext/src/inchi_api.h +0 -670
- data/ext/src/inchi_dll.c +0 -1480
- data/ext/src/inchi_dll.h +0 -34
- data/ext/src/inchi_dll_main.c +0 -23
- data/ext/src/inchi_dll_main.h +0 -31
- data/ext/src/ruby_inchi_main.c +0 -558
data/ext/src/e_mol2atom.c
DELETED
@@ -1,649 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* International Union of Pure and Applied Chemistry (IUPAC)
|
3
|
-
* International Chemical Identifier (InChI)
|
4
|
-
* Version 1
|
5
|
-
* Software version 1.00
|
6
|
-
* April 13, 2005
|
7
|
-
* Developed at NIST
|
8
|
-
*/
|
9
|
-
|
10
|
-
#include <stdio.h>
|
11
|
-
#include <stdlib.h>
|
12
|
-
#include <string.h>
|
13
|
-
#include <ctype.h>
|
14
|
-
#include <stdarg.h>
|
15
|
-
#include <errno.h>
|
16
|
-
#include <limits.h>
|
17
|
-
#include <float.h>
|
18
|
-
#include <math.h>
|
19
|
-
|
20
|
-
#include "e_mode.h"
|
21
|
-
#include "inchi_api.h"
|
22
|
-
#include "e_ctl_data.h"
|
23
|
-
|
24
|
-
#include "e_comdef.h"
|
25
|
-
#include "e_util.h"
|
26
|
-
#include "e_ichicomp.h"
|
27
|
-
|
28
|
-
#include "e_readmol.h"
|
29
|
-
#include "e_inpdef.h"
|
30
|
-
|
31
|
-
#if( ADD_CMLPP == 1 )
|
32
|
-
#include "e_readcml.hpp"
|
33
|
-
#endif
|
34
|
-
|
35
|
-
#include "e_inchi_atom.h"
|
36
|
-
|
37
|
-
|
38
|
-
#define MIN_STDATA_X_COORD 0.0
|
39
|
-
#define MAX_STDATA_X_COORD 256.0
|
40
|
-
#define MIN_STDATA_Y_COORD 0.0
|
41
|
-
#define MAX_STDATA_Y_COORD 256.0
|
42
|
-
#define MIN_STDATA_Z_COORD 0.0
|
43
|
-
#define MAX_STDATA_Z_COORD 256.0
|
44
|
-
#define MAX_STDATA_AVE_BOND_LENGTH 20.0
|
45
|
-
#define MIN_STDATA_AVE_BOND_LENGTH 10.0
|
46
|
-
|
47
|
-
|
48
|
-
/* local prototypes */
|
49
|
-
inchi_Atom* mol_to_inchi_Atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inchi_Atom* at_inp,
|
50
|
-
int bDoNotAddH, int *err, char *pStrErr );
|
51
|
-
int mol_to_inchi_Atom_xyz( MOL_DATA* mol_data, int num_atoms, inchi_Atom* at, int *err, char *pStrErr );
|
52
|
-
|
53
|
-
int MolfileToInchi_Atom( FILE *inp_molfile, int bDoNotAddH, inchi_Atom **at, int max_num_at,
|
54
|
-
int *num_dimensions, int *num_bonds, const char *pSdfLabel, char *pSdfValue,
|
55
|
-
long *Id, long *lMolfileNumber, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr );
|
56
|
-
long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr );
|
57
|
-
|
58
|
-
|
59
|
-
/* too long ave. bond length prevents structure from displaying */
|
60
|
-
/* According to Steve, a standard bond length is 10. 9-24-97 DCh */
|
61
|
-
/* Ave. bond length in MainLib is 20. Also fixed Average */
|
62
|
-
/* bond length calculation by introducing num_avg_bonds */
|
63
|
-
/* in mol_to_stdata(). 12-9-99 DCh. */
|
64
|
-
|
65
|
-
/******************************************************************************************************/
|
66
|
-
inchi_Atom* mol_to_inchi_Atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inchi_Atom* at_inp,
|
67
|
-
int bDoNotAddH, int *err, char *pStrErr )
|
68
|
-
{
|
69
|
-
inchi_Atom *at = NULL;
|
70
|
-
/* char *bond_stereo = NULL; */
|
71
|
-
AT_NUM *p1, *p2;
|
72
|
-
int i, a1, a2, n1, n2, bonds;
|
73
|
-
S_CHAR cBondStereo, cBondType;
|
74
|
-
S_CHAR cStereo1, cStereo2;
|
75
|
-
|
76
|
-
*err = 0;
|
77
|
-
*num_atoms = *num_bonds = 0;
|
78
|
-
/* check if MOLfile contains atoms */
|
79
|
-
if ( !mol_data || !mol_data->ctab.MolAtom ||
|
80
|
-
0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
|
81
|
-
0 >= (*num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
|
82
|
-
/* MOLFILE_ERR_SET (*err, 0, "Empty structure"); */
|
83
|
-
goto exit_function; /* no structure */
|
84
|
-
}
|
85
|
-
/* allocate memory if necessary */
|
86
|
-
if ( at_inp ) {
|
87
|
-
at = at_inp;
|
88
|
-
} else
|
89
|
-
if ( !(at = e_CreateInchi_Atom( *num_atoms ) ) ) {
|
90
|
-
*err = -1;
|
91
|
-
MOLFILE_ERR_FIN (*err, -1, exit_function, "Out of RAM");
|
92
|
-
}
|
93
|
-
|
94
|
-
/* copy atom info */
|
95
|
-
for ( i = 0; i < *num_atoms; i ++ ) {
|
96
|
-
e_mystrncpy( at[i].elname, mol_data->ctab.MolAtom[i].szAtomSymbol, sizeof(at->elname) );
|
97
|
-
/* at[i].chem_bonds_valence = mol_data->ctab.MolAtom[i].cValence; */ /* MOLfile valence; will change */
|
98
|
-
at[i].isotopic_mass = mol_data->ctab.MolAtom[i].cMassDifference;
|
99
|
-
at[i].charge = mol_data->ctab.MolAtom[i].cCharge;
|
100
|
-
at[i].radical = mol_data->ctab.MolAtom[i].cRadical;
|
101
|
-
/* coordinates are copied in mol_to_inchi_Atom_xyz() */
|
102
|
-
#if( SINGLET_IS_TRIPLET == 1 )
|
103
|
-
if ( at[i].radical == RADICAL_SINGLET ) {
|
104
|
-
at[i].radical = RADICAL_TRIPLET;
|
105
|
-
}
|
106
|
-
#endif
|
107
|
-
/* removed parsing at[i].elname to extract H, charge, radical from the
|
108
|
-
Molfile alias record now this is done in the INChI dll */
|
109
|
-
}
|
110
|
-
|
111
|
-
/* copy bond info */
|
112
|
-
for ( i = 0, bonds = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
|
113
|
-
cBondStereo = mol_data->ctab.MolBond[i].cBondStereo;
|
114
|
-
cBondType = mol_data->ctab.MolBond[i].cBondType;
|
115
|
-
a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
|
116
|
-
a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
|
117
|
-
|
118
|
-
if ( a1 < 0 || a1 >= *num_atoms ||
|
119
|
-
a2 < 0 || a2 >= *num_atoms ||
|
120
|
-
a1 == a2 ) {
|
121
|
-
*err |= 1; /* bond for impossible atom number(s); ignored */
|
122
|
-
MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
|
123
|
-
continue;
|
124
|
-
}
|
125
|
-
/* check for multiple bonds between same atoms */
|
126
|
-
p1 = e_is_in_the_slist( at[a1].neighbor, (AT_NUM)a2, at[a1].num_bonds );
|
127
|
-
p2 = e_is_in_the_slist( at[a2].neighbor, (AT_NUM)a1, at[a2].num_bonds );
|
128
|
-
if ( (p1 || p2) && (p1 || at[a1].num_bonds < MAXVAL) && (p2 || at[a2].num_bonds < MAXVAL) ) {
|
129
|
-
n1 = p1? (p1 - at[a1].neighbor) : at[a1].num_bonds ++;
|
130
|
-
n2 = p2? (p2 - at[a2].neighbor) : at[a2].num_bonds ++;
|
131
|
-
MOLFILE_ERR_SET (*err, 0, "Multiple bonds between two atoms");
|
132
|
-
*err |= 2; /* multiple bonds between atoms */
|
133
|
-
} else
|
134
|
-
if ( !p1 && !p2 && at[a1].num_bonds < MAXVAL && at[a2].num_bonds < MAXVAL ) {
|
135
|
-
n1 = at[a1].num_bonds ++;
|
136
|
-
n2 = at[a2].num_bonds ++;
|
137
|
-
bonds ++;
|
138
|
-
} else {
|
139
|
-
char szMsg[64];
|
140
|
-
*err |= 4; /* too large number of bonds. Some bonds ignored. */
|
141
|
-
sprintf( szMsg, "Atom '%s' has more than %d bonds",
|
142
|
-
at[a1].num_bonds>= MAXVAL? at[a1].elname:at[a2].elname, MAXVAL );
|
143
|
-
MOLFILE_ERR_SET (*err, 0, szMsg);
|
144
|
-
continue;
|
145
|
-
}
|
146
|
-
if ( cBondType < MIN_INPUT_BOND_TYPE || cBondType > MAX_INPUT_BOND_TYPE ) {
|
147
|
-
char szBondType[16];
|
148
|
-
sprintf( szBondType, "%d", cBondType );
|
149
|
-
cBondType = 1;
|
150
|
-
MOLFILE_ERR_SET (*err, 0, "Unrecognized bond type:");
|
151
|
-
MOLFILE_ERR_SET (*err, 0, szBondType);
|
152
|
-
*err |= 8; /* Unrecognized Bond type replaced with single bond */
|
153
|
-
}
|
154
|
-
/* bond type */
|
155
|
-
at[a1].bond_type[n1] =
|
156
|
-
at[a2].bond_type[n2] = cBondType;
|
157
|
-
/* connection */
|
158
|
-
at[a1].neighbor[n1] = (AT_NUM)a2;
|
159
|
-
at[a2].neighbor[n2] = (AT_NUM)a1;
|
160
|
-
/* stereo */
|
161
|
-
switch ( cBondStereo ) {
|
162
|
-
case INPUT_STEREO_DBLE_EITHER: /* 3 */
|
163
|
-
cStereo1 = INCHI_BOND_STEREO_DOUBLE_EITHER;
|
164
|
-
cStereo2 = INCHI_BOND_STEREO_DOUBLE_EITHER;
|
165
|
-
break;
|
166
|
-
case INPUT_STEREO_SNGL_UP: /* 1 */
|
167
|
-
cStereo1 = INCHI_BOND_STEREO_SINGLE_1UP;
|
168
|
-
cStereo2 = INCHI_BOND_STEREO_SINGLE_2UP;
|
169
|
-
break;
|
170
|
-
case INPUT_STEREO_SNGL_EITHER: /* 4 */
|
171
|
-
cStereo1 = INCHI_BOND_STEREO_SINGLE_1EITHER;
|
172
|
-
cStereo2 = INCHI_BOND_STEREO_SINGLE_2EITHER;
|
173
|
-
break;
|
174
|
-
case INPUT_STEREO_SNGL_DOWN: /* 6 */
|
175
|
-
cStereo1 = INCHI_BOND_STEREO_SINGLE_1DOWN;
|
176
|
-
cStereo2 = INCHI_BOND_STEREO_SINGLE_2DOWN;
|
177
|
-
break;
|
178
|
-
case 0:
|
179
|
-
cStereo1 = INCHI_BOND_STEREO_NONE;
|
180
|
-
cStereo2 = INCHI_BOND_STEREO_NONE;
|
181
|
-
break;
|
182
|
-
default:
|
183
|
-
*err |= 16; /* Ignored unrecognized Bond stereo */
|
184
|
-
MOLFILE_ERR_SET (*err, 0, "Unrecognized bond stereo");
|
185
|
-
continue;
|
186
|
-
}
|
187
|
-
at[a1].bond_stereo[n1] = cStereo1; /* >0: the wedge (pointed) end is at this atom */
|
188
|
-
at[a2].bond_stereo[n2] = cStereo2; /* <0: the wedge (pointed) end is at the opposite atom */
|
189
|
-
}
|
190
|
-
*num_bonds = bonds;
|
191
|
-
|
192
|
-
/* special Molfile valences */
|
193
|
-
for ( a1 = 0; a1 < *num_atoms; a1 ++ ) {
|
194
|
-
int num_bond_type[MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE + 1], bond_type;
|
195
|
-
int chem_bonds_valence, valence;
|
196
|
-
if ( mol_data->ctab.MolAtom[a1].cValence &&
|
197
|
-
(mol_data->ctab.MolAtom[a1].cValence != 15 || at[a1].num_bonds) ) {
|
198
|
-
/* Molfile contains special valence => calculate number of H */
|
199
|
-
memset( num_bond_type, 0, sizeof(num_bond_type) );
|
200
|
-
valence = mol_data->ctab.MolAtom[a1].cValence; /* save atom valence if available */
|
201
|
-
for ( n1 = 0; n1 < at[a1].num_bonds; n1 ++ ) {
|
202
|
-
bond_type = at[a1].bond_type[n1] - MIN_INPUT_BOND_TYPE;
|
203
|
-
if ( bond_type < 0 || bond_type > MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE ) {
|
204
|
-
bond_type = 0;
|
205
|
-
MOLFILE_ERR_SET (*err, 0, "Unknown bond type in MOLfile assigned as a single bond");
|
206
|
-
}
|
207
|
-
num_bond_type[ bond_type ] ++;
|
208
|
-
}
|
209
|
-
chem_bonds_valence = 0;
|
210
|
-
for ( n1 = 0; MIN_INPUT_BOND_TYPE + n1 <= 3 && MIN_INPUT_BOND_TYPE + n1 <= MAX_INPUT_BOND_TYPE; n1 ++ ) {
|
211
|
-
chem_bonds_valence += (MIN_INPUT_BOND_TYPE + n1) * num_bond_type[n1];
|
212
|
-
}
|
213
|
-
if ( MIN_INPUT_BOND_TYPE <= INCHI_BOND_TYPE_ALTERN && INCHI_BOND_TYPE_ALTERN <= MAX_INPUT_BOND_TYPE &&
|
214
|
-
( n2 = num_bond_type[INCHI_BOND_TYPE_ALTERN-MIN_INPUT_BOND_TYPE] ) ) {
|
215
|
-
/* accept input aromatic bonds for now */
|
216
|
-
switch ( n2 ) {
|
217
|
-
case 2:
|
218
|
-
chem_bonds_valence += 3; /* =A- */
|
219
|
-
break;
|
220
|
-
case 3:
|
221
|
-
chem_bonds_valence += 4; /* =A< */
|
222
|
-
break;
|
223
|
-
default:
|
224
|
-
/* if 1 or >= 4 aromatic bonds then replace such bonds with single bonds */
|
225
|
-
for ( n1 = 0; n1 < at[a1].num_bonds; n1 ++ ) {
|
226
|
-
if ( at[a1].bond_type[n1] == INCHI_BOND_TYPE_ALTERN ) {
|
227
|
-
a2 = at[a1].neighbor[n1];
|
228
|
-
p1 = e_is_in_the_slist( at[a2].neighbor, (AT_NUM)a1, at[a2].num_bonds );
|
229
|
-
if ( p1 ) {
|
230
|
-
at[a1].bond_type[n1] =
|
231
|
-
at[a2].bond_type[p1-at[a2].neighbor] = INCHI_BOND_TYPE_SINGLE;
|
232
|
-
} else {
|
233
|
-
*err = -2; /* Program error */
|
234
|
-
MOLFILE_ERR_SET (*err, 0, "Program error interpreting MOLfile");
|
235
|
-
goto exit_function; /* no structure */
|
236
|
-
}
|
237
|
-
}
|
238
|
-
}
|
239
|
-
chem_bonds_valence += n2;
|
240
|
-
*err |= 32;
|
241
|
-
MOLFILE_ERR_SET (*err, 0, "Atom has more than 3 aromatic bonds");
|
242
|
-
break;
|
243
|
-
}
|
244
|
-
}
|
245
|
-
/*************************************************************************************
|
246
|
-
*
|
247
|
-
* Set number of hydrogen atoms
|
248
|
-
*/
|
249
|
-
if ( valence >= chem_bonds_valence ) {
|
250
|
-
at[a1].num_iso_H[0] = valence - chem_bonds_valence;
|
251
|
-
}
|
252
|
-
} else
|
253
|
-
if ( mol_data->ctab.MolAtom[a1].cAtomAliasedFlag ) {
|
254
|
-
at[a1].num_iso_H[0] = 0;
|
255
|
-
} else
|
256
|
-
if ( mol_data->ctab.MolAtom[a1].cValence == 15 && !at[a1].num_bonds ) {
|
257
|
-
at[a1].num_iso_H[0] = 0;
|
258
|
-
} else
|
259
|
-
if ( !bDoNotAddH ) {
|
260
|
-
at[a1].num_iso_H[0] = -1;
|
261
|
-
}
|
262
|
-
}
|
263
|
-
|
264
|
-
exit_function:;
|
265
|
-
return at;
|
266
|
-
}
|
267
|
-
/******************************************************************************************************/
|
268
|
-
int mol_to_inchi_Atom_xyz( MOL_DATA* mol_data, int num_atoms, inchi_Atom* at, int *err, char *pStrErr )
|
269
|
-
{
|
270
|
-
int i, num_dimensions=0;
|
271
|
-
int num_bonds;
|
272
|
-
double max_x=-1.0e32, max_y=-1.0e32, max_z=-1.0e32;
|
273
|
-
double min_x= 1.0e32, min_y= 1.0e32, min_z= 1.0e32;
|
274
|
-
double macheps = 1.0e-10, small_coeff = 0.00001;
|
275
|
-
double x_coeff, y_coeff, z_coeff, coeff, average_bond_length;
|
276
|
-
|
277
|
-
/* *err = 0; */
|
278
|
-
/* check if MOLfile contains atoms */
|
279
|
-
if ( !mol_data || !mol_data->ctab.MolAtom ||
|
280
|
-
0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
|
281
|
-
0 >= (num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
|
282
|
-
goto exit_function; /* no structure */
|
283
|
-
}
|
284
|
-
/* copy atom info */
|
285
|
-
for ( i = 0; i < num_atoms; i ++ ) {
|
286
|
-
max_x = inchi_max(mol_data->ctab.MolAtom[i].fX, max_x);
|
287
|
-
min_x = inchi_min(mol_data->ctab.MolAtom[i].fX, min_x);
|
288
|
-
max_y = inchi_max(mol_data->ctab.MolAtom[i].fY, max_y);
|
289
|
-
min_y = inchi_min(mol_data->ctab.MolAtom[i].fY, min_y);
|
290
|
-
max_z = inchi_max(mol_data->ctab.MolAtom[i].fZ, max_z);
|
291
|
-
min_z = inchi_min(mol_data->ctab.MolAtom[i].fZ, min_z);
|
292
|
-
}
|
293
|
-
|
294
|
-
/* copy bond info */
|
295
|
-
num_bonds = 0;
|
296
|
-
average_bond_length = 0.0;
|
297
|
-
for ( i = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
|
298
|
-
int a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
|
299
|
-
int a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
|
300
|
-
double dx = mol_data->ctab.MolAtom[a1].fX-mol_data->ctab.MolAtom[a2].fX;
|
301
|
-
double dy = mol_data->ctab.MolAtom[a1].fY-mol_data->ctab.MolAtom[a2].fY;
|
302
|
-
double dz = mol_data->ctab.MolAtom[a1].fZ-mol_data->ctab.MolAtom[a2].fZ;
|
303
|
-
|
304
|
-
if ( a1 < 0 || a1 >= num_atoms ||
|
305
|
-
a2 < 0 || a2 >= num_atoms ||
|
306
|
-
a1 == a2 ) {
|
307
|
-
*err |= 1; /* bond for impossible atom number(s); ignored */
|
308
|
-
MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
|
309
|
-
continue;
|
310
|
-
}
|
311
|
-
average_bond_length += sqrt( dx*dx + dy*dy + dz*dz );
|
312
|
-
num_bonds ++;
|
313
|
-
}
|
314
|
-
|
315
|
-
/* convert to integral coordinates */
|
316
|
-
|
317
|
-
if ( max_x - min_x <= small_coeff*(fabs(max_x) + fabs(min_x)) )
|
318
|
-
x_coeff = 0.0;
|
319
|
-
else
|
320
|
-
x_coeff = (MAX_STDATA_X_COORD - MIN_STDATA_X_COORD)/(max_x - min_x);
|
321
|
-
|
322
|
-
if ( max_y - min_y <= small_coeff*(fabs(max_y) + fabs(min_y)) )
|
323
|
-
y_coeff = 0.0;
|
324
|
-
else
|
325
|
-
y_coeff = (MAX_STDATA_Y_COORD - MIN_STDATA_Y_COORD)/(max_y - min_y);
|
326
|
-
if ( max_z - min_z <= small_coeff*(fabs(max_z) + fabs(min_z)) )
|
327
|
-
z_coeff = 0.0;
|
328
|
-
else
|
329
|
-
z_coeff = (MAX_STDATA_Z_COORD - MIN_STDATA_Z_COORD)/(max_z - min_z);
|
330
|
-
|
331
|
-
num_dimensions = ((x_coeff > macheps || y_coeff >macheps ) && fabs(z_coeff) < macheps)? 2:
|
332
|
-
(fabs(z_coeff) > macheps)? 3: 0;
|
333
|
-
|
334
|
-
switch ( num_dimensions ) {
|
335
|
-
case 0:
|
336
|
-
coeff = 0.0;
|
337
|
-
break;
|
338
|
-
case 2:
|
339
|
-
/* choose the smallest stretching coefficient */
|
340
|
-
if ( x_coeff > macheps && y_coeff > macheps ) {
|
341
|
-
coeff = inchi_min( x_coeff, y_coeff );
|
342
|
-
}else
|
343
|
-
if ( x_coeff > macheps ){
|
344
|
-
coeff = x_coeff;
|
345
|
-
}else
|
346
|
-
if ( y_coeff > macheps ){
|
347
|
-
coeff = y_coeff;
|
348
|
-
}else{
|
349
|
-
coeff = 1.0;
|
350
|
-
}
|
351
|
-
break;
|
352
|
-
case 3:
|
353
|
-
/* choose the smallest stretching coefficient */
|
354
|
-
if ( x_coeff > macheps && y_coeff > macheps ) {
|
355
|
-
coeff = inchi_min( x_coeff, y_coeff );
|
356
|
-
coeff = inchi_min( coeff, z_coeff );
|
357
|
-
}else
|
358
|
-
if ( x_coeff > macheps ){
|
359
|
-
coeff = inchi_min( x_coeff, z_coeff );
|
360
|
-
}else
|
361
|
-
if ( y_coeff > macheps ){
|
362
|
-
coeff = inchi_min( y_coeff, z_coeff );
|
363
|
-
}else{
|
364
|
-
coeff = z_coeff;
|
365
|
-
}
|
366
|
-
break;
|
367
|
-
default:
|
368
|
-
coeff = 0.0;
|
369
|
-
}
|
370
|
-
|
371
|
-
if ( num_bonds > 0 ) {
|
372
|
-
average_bond_length /= (double)num_bonds;
|
373
|
-
if ( average_bond_length * coeff > MAX_STDATA_AVE_BOND_LENGTH ) {
|
374
|
-
coeff = MAX_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too long bonds */
|
375
|
-
} else
|
376
|
-
if ( average_bond_length * coeff < macheps ) {
|
377
|
-
coeff = 1.0; /* all lengths are of zero length */
|
378
|
-
} else
|
379
|
-
if ( average_bond_length * coeff < MIN_STDATA_AVE_BOND_LENGTH ) {
|
380
|
-
coeff = MIN_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too short bonds */
|
381
|
-
}
|
382
|
-
}
|
383
|
-
#if( NORMALIZE_INP_COORD == 1 )
|
384
|
-
/* set integral coordinates */
|
385
|
-
for ( i = 0; i < num_atoms; i ++ ) {
|
386
|
-
double x = mol_data->ctab.MolAtom[i].fX;
|
387
|
-
double y = mol_data->ctab.MolAtom[i].fY;
|
388
|
-
double z = mol_data->ctab.MolAtom[i].fZ;
|
389
|
-
x = (x - min_x)*coeff + MIN_STDATA_X_COORD;
|
390
|
-
y = (y - min_y)*coeff + MIN_STDATA_Y_COORD;
|
391
|
-
z = (z - min_z)*coeff + MIN_STDATA_Z_COORD;
|
392
|
-
/* floor() behavior is not well defined for negative arguments.
|
393
|
-
* Use positive arguments only to get nearest integer.
|
394
|
-
*/
|
395
|
-
at[i].x = ( x >= 0.0 )? (int)floor( x + 0.5 ) : -(int)floor( -x + 0.5 );
|
396
|
-
at[i].y = ( y >= 0.0 )? (int)floor( y + 0.5 ) : -(int)floor( -y + 0.5 );
|
397
|
-
at[i].z = ( z >= 0.0 )? (int)floor( z + 0.5 ) : -(int)floor( -z + 0.5 );
|
398
|
-
}
|
399
|
-
#else
|
400
|
-
/* set input coordinates */
|
401
|
-
for ( i = 0; i < num_atoms; i ++ ) {
|
402
|
-
double x = mol_data->ctab.MolAtom[i].fX;
|
403
|
-
double y = mol_data->ctab.MolAtom[i].fY;
|
404
|
-
double z = mol_data->ctab.MolAtom[i].fZ;
|
405
|
-
at[i].x = x;
|
406
|
-
at[i].y = y;
|
407
|
-
at[i].z = z;
|
408
|
-
}
|
409
|
-
#endif
|
410
|
-
|
411
|
-
exit_function:;
|
412
|
-
return num_dimensions;
|
413
|
-
}
|
414
|
-
/****************************************************************************/
|
415
|
-
long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr )
|
416
|
-
{
|
417
|
-
static char sStruct[] = "Structure #";
|
418
|
-
static char sINCHI[] = INCHI_NAME;
|
419
|
-
long lMolfileNumber = 0;
|
420
|
-
char *p, *q = NULL;
|
421
|
-
if ( pHdr ) {
|
422
|
-
if ( !memicmp( pHdr->szMoleculeName, sStruct, sizeof(sStruct)-1 ) ) {
|
423
|
-
p = pHdr->szMoleculeName + sizeof(sStruct)-1;
|
424
|
-
lMolfileNumber = strtol( p, &q, 10 );
|
425
|
-
p = pHdr->szMoleculeLine2;
|
426
|
-
if ( !q || *q ||
|
427
|
-
memicmp( p, sINCHI, sizeof(sINCHI)-1) ||
|
428
|
-
!strstr( p+sizeof(sINCHI)-1, "SDfile Output" ) ) {
|
429
|
-
lMolfileNumber = 0;
|
430
|
-
}
|
431
|
-
}
|
432
|
-
}
|
433
|
-
return lMolfileNumber;
|
434
|
-
}
|
435
|
-
|
436
|
-
/****************************************************************************/
|
437
|
-
int MolfileToInchi_Atom( FILE *inp_molfile, int bDoNotAddH, inchi_Atom **at, int max_num_at,
|
438
|
-
int *num_dimensions, int *num_bonds, const char *pSdfLabel, char *pSdfValue,
|
439
|
-
long *Id, long *lMolfileNumber, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
|
440
|
-
{
|
441
|
-
int num_atoms = 0;
|
442
|
-
MOL_DATA *mol_data = NULL;
|
443
|
-
MOL_HEADER_BLOCK OnlyHeaderBlock, *pOnlyHeaderBlock = NULL, *pHdr;
|
444
|
-
MOL_CTAB OnlyCtab, *pOnlyCtab = NULL;
|
445
|
-
char cSdfValueFirstChar = '\0';
|
446
|
-
#ifdef CML_DEBUG
|
447
|
-
FILE *f_p;
|
448
|
-
#endif
|
449
|
-
if ( at ) {
|
450
|
-
pOnlyHeaderBlock = NULL;
|
451
|
-
if ( *at && max_num_at ) {
|
452
|
-
memset( *at, 0, max_num_at * sizeof(**at) );
|
453
|
-
}
|
454
|
-
} else {
|
455
|
-
pOnlyHeaderBlock = &OnlyHeaderBlock;
|
456
|
-
pOnlyCtab = &OnlyCtab;
|
457
|
-
}
|
458
|
-
if ( pSdfValue ) {
|
459
|
-
cSdfValueFirstChar = pSdfValue[0];
|
460
|
-
pSdfValue[0] = '\0';
|
461
|
-
}
|
462
|
-
|
463
|
-
mol_data = e_read_sdfile_segment(inp_molfile, pOnlyHeaderBlock, pOnlyCtab, 0, NULL, 0, Id, pSdfLabel, pSdfValue, err, pStrErr );
|
464
|
-
|
465
|
-
pHdr = ( mol_data && !pOnlyHeaderBlock )? &mol_data->hdr :
|
466
|
-
( !mol_data && pOnlyHeaderBlock )? pOnlyHeaderBlock : NULL;
|
467
|
-
if ( lMolfileNumber && pHdr ) {
|
468
|
-
*lMolfileNumber = GetMolfileNumber( pHdr );
|
469
|
-
}
|
470
|
-
if ( pSdfValue &&
|
471
|
-
pSdfLabel && pSdfLabel[0] && pHdr ) {
|
472
|
-
if ( !stricmp(pSdfLabel, "MOLFILENAME") ) {
|
473
|
-
e_mystrncpy( pSdfValue, pHdr->szMoleculeName, MAX_SDF_VALUE+1 );
|
474
|
-
e_LtrimRtrim( pSdfValue, NULL );
|
475
|
-
} else
|
476
|
-
if ( !stricmp(pSdfLabel, "MOLFILELINE2") ) {
|
477
|
-
e_mystrncpy( pSdfValue, pHdr->szMoleculeLine2, MAX_SDF_VALUE+1 );
|
478
|
-
e_LtrimRtrim( pSdfValue, NULL );
|
479
|
-
} else
|
480
|
-
if ( !stricmp(pSdfLabel, "MOLFILECOMMENT") ) {
|
481
|
-
e_mystrncpy( pSdfValue, pHdr->szComment, MAX_SDF_VALUE+1 );
|
482
|
-
e_LtrimRtrim( pSdfValue, NULL );
|
483
|
-
}
|
484
|
-
if ( !pSdfValue[0] ) {
|
485
|
-
pSdfValue[0] = cSdfValueFirstChar;
|
486
|
-
}
|
487
|
-
}
|
488
|
-
|
489
|
-
if ( mol_data && at && !*err ) {
|
490
|
-
/* *at points to an allocated memory */
|
491
|
-
if ( *at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
|
492
|
-
*at = mol_to_inchi_Atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
|
493
|
-
if ( *err >= 0 ) {
|
494
|
-
*num_dimensions = mol_to_inchi_Atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
|
495
|
-
}
|
496
|
-
} else
|
497
|
-
/* *at points to NULL */
|
498
|
-
if ( !*at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
|
499
|
-
*at = mol_to_inchi_Atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
|
500
|
-
if ( *err >= 0 ) {
|
501
|
-
*num_dimensions = mol_to_inchi_Atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
|
502
|
-
}
|
503
|
-
} else {
|
504
|
-
MOLFILE_ERR_SET (*err, 0, "Too many atoms");
|
505
|
-
*err = 70;
|
506
|
-
num_atoms = -1;
|
507
|
-
}
|
508
|
-
if ( *err > 0 ) {
|
509
|
-
*err += 100;
|
510
|
-
}
|
511
|
-
/* 11-16-2004: use Chiral flag */
|
512
|
-
if ( num_atoms > 0 && at && *at && mol_data && pInpAtomFlags ) {
|
513
|
-
if ( mol_data->ctab.cChiralFlag ) {
|
514
|
-
*pInpAtomFlags |= FLAG_INP_AT_CHIRAL;
|
515
|
-
} else {
|
516
|
-
*pInpAtomFlags |= FLAG_INP_AT_NONCHIRAL;
|
517
|
-
}
|
518
|
-
}
|
519
|
-
} else
|
520
|
-
if ( !at ) {
|
521
|
-
num_atoms = pOnlyCtab->nNumberOfAtoms;
|
522
|
-
}
|
523
|
-
|
524
|
-
if ( !pOnlyHeaderBlock ) {
|
525
|
-
e_delete_mol_data( mol_data );
|
526
|
-
}
|
527
|
-
#ifdef CML_DEBUG
|
528
|
-
puts ("MOL");
|
529
|
-
f_p = fopen ("mol.dbg", "a");
|
530
|
-
if (f_p)
|
531
|
-
{
|
532
|
-
PrintInpAtom (f_p, *at, num_atoms);
|
533
|
-
fclose (f_p);
|
534
|
-
}
|
535
|
-
else
|
536
|
-
{
|
537
|
-
puts ("Couldn't open file");
|
538
|
-
}
|
539
|
-
#endif
|
540
|
-
|
541
|
-
return num_atoms;
|
542
|
-
}
|
543
|
-
/**********************************************************************************/
|
544
|
-
int e_MolfileToInchi_Input( FILE *inp_molfile, inchi_Input *orig_at_data, int bMergeAllInputStructures,
|
545
|
-
int bDoNotAddH, int bAllowEmptyStructure,
|
546
|
-
const char *pSdfLabel, char *pSdfValue, long *lSdfId, long *lMolfileNumber,
|
547
|
-
INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
|
548
|
-
{
|
549
|
-
/* inp_ATOM *at = NULL; */
|
550
|
-
int num_dimensions_new;
|
551
|
-
int num_inp_bonds_new;
|
552
|
-
int num_inp_atoms_new;
|
553
|
-
inchi_Atom *at_new = NULL;
|
554
|
-
inchi_Atom *at_old = NULL;
|
555
|
-
int nNumAtoms = 0;
|
556
|
-
int i, j;
|
557
|
-
|
558
|
-
if ( pStrErr ) {
|
559
|
-
pStrErr[0] = '\0';
|
560
|
-
}
|
561
|
-
|
562
|
-
/*FreeOrigAtData( orig_at_data );*/
|
563
|
-
|
564
|
-
do {
|
565
|
-
|
566
|
-
at_old = orig_at_data? orig_at_data->atom : NULL; /* save pointer to the previous allocation */
|
567
|
-
num_inp_atoms_new =
|
568
|
-
MolfileToInchi_Atom( inp_molfile, bDoNotAddH, orig_at_data? &at_new:NULL, MAX_ATOMS,
|
569
|
-
&num_dimensions_new, &num_inp_bonds_new,
|
570
|
-
pSdfLabel, pSdfValue, lSdfId, lMolfileNumber, pInpAtomFlags, err, pStrErr );
|
571
|
-
|
572
|
-
|
573
|
-
if ( num_inp_atoms_new <= 0 && !*err ) {
|
574
|
-
if ( !bAllowEmptyStructure ) {
|
575
|
-
MOLFILE_ERR_SET (*err, 0, "Empty structure"); /* the message will be issued by the InChI library */
|
576
|
-
}
|
577
|
-
*err = 98;
|
578
|
-
} else
|
579
|
-
if ( orig_at_data && !num_inp_atoms_new && 10 < *err && *err < 20 && orig_at_data->num_atoms > 0 && bMergeAllInputStructures ) {
|
580
|
-
*err = 0; /* end of file */
|
581
|
-
break;
|
582
|
-
} else
|
583
|
-
if ( num_inp_atoms_new > 0 && orig_at_data ) {
|
584
|
-
/* merge pOrigDataTmp + orig_at_data => pOrigDataTmp; */
|
585
|
-
nNumAtoms = num_inp_atoms_new + orig_at_data->num_atoms;
|
586
|
-
if ( nNumAtoms >= MAX_ATOMS ) {
|
587
|
-
MOLFILE_ERR_SET (*err, 0, "Too many atoms");
|
588
|
-
*err = 70;
|
589
|
-
orig_at_data->num_atoms = -1;
|
590
|
-
} else
|
591
|
-
if ( !at_old ) {
|
592
|
-
/* the first structure */
|
593
|
-
orig_at_data->atom = at_new;
|
594
|
-
at_new = NULL;
|
595
|
-
orig_at_data->num_atoms = num_inp_atoms_new;
|
596
|
-
} else
|
597
|
-
if ( orig_at_data->atom = e_CreateInchi_Atom( nNumAtoms ) ) {
|
598
|
-
/* switch at_new <--> orig_at_data->at; */
|
599
|
-
if ( orig_at_data->num_atoms ) {
|
600
|
-
memcpy( orig_at_data->atom, at_old, orig_at_data->num_atoms * sizeof(orig_at_data->atom[0]) );
|
601
|
-
/* adjust numbering in the newly read structure */
|
602
|
-
for ( i = 0; i < num_inp_atoms_new; i ++ ) {
|
603
|
-
for ( j = 0; j < at_new[i].num_bonds; j ++ ) {
|
604
|
-
at_new[i].neighbor[j] += orig_at_data->num_atoms;
|
605
|
-
}
|
606
|
-
}
|
607
|
-
}
|
608
|
-
e_FreeInchi_Atom( &at_old );
|
609
|
-
/* copy newly read structure */
|
610
|
-
memcpy( orig_at_data->atom + orig_at_data->num_atoms,
|
611
|
-
at_new,
|
612
|
-
num_inp_atoms_new * sizeof(orig_at_data->atom[0]) );
|
613
|
-
/* add other things */
|
614
|
-
orig_at_data->num_atoms += num_inp_atoms_new;
|
615
|
-
} else {
|
616
|
-
MOLFILE_ERR_SET (*err, 0, "Out of RAM");
|
617
|
-
*err = -1;
|
618
|
-
}
|
619
|
-
} else
|
620
|
-
if ( num_inp_atoms_new > 0 ) {
|
621
|
-
nNumAtoms += num_inp_atoms_new;
|
622
|
-
}
|
623
|
-
e_FreeInchi_Atom( &at_new );
|
624
|
-
|
625
|
-
} while ( !*err && bMergeAllInputStructures );
|
626
|
-
/*
|
627
|
-
if ( !*err ) {
|
628
|
-
orig_at_data->num_components =
|
629
|
-
MarkDisconnectedComponents( orig_at_data );
|
630
|
-
if ( orig_at_data->num_components == 0 ) {
|
631
|
-
MOLFILE_ERR_SET (*err, 0, "No components found");
|
632
|
-
*err = 99;
|
633
|
-
}
|
634
|
-
if ( orig_at_data->num_components < 0 ) {
|
635
|
-
MOLFILE_ERR_SET (*err, 0, "Too many components");
|
636
|
-
*err = 99;
|
637
|
-
}
|
638
|
-
}
|
639
|
-
*/
|
640
|
-
e_FreeInchi_Atom( &at_new );
|
641
|
-
if ( *err ) {
|
642
|
-
e_FreeInchi_Input( orig_at_data );
|
643
|
-
}
|
644
|
-
if ( *err && !(10 < *err && *err < 20) && pStrErr && !pStrErr[0] ) {
|
645
|
-
MOLFILE_ERR_SET (*err, 0, "Unknown error"); /* <BRKPT> */
|
646
|
-
}
|
647
|
-
return orig_at_data? orig_at_data->num_atoms : nNumAtoms;
|
648
|
-
}
|
649
|
-
|