rino 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/ext/extconf.rb +1 -24
- data/ext/libinchi.so +0 -0
- data/ext/src/aux2atom.h +120 -39
- data/ext/src/comdef.h +3 -3
- data/ext/src/dispstru.c +2547 -0
- data/ext/src/dispstru.h +73 -0
- data/ext/src/extr_ct.h +5 -2
- data/ext/src/ichi.h +27 -11
- data/ext/src/ichi_bns.c +1800 -254
- data/ext/src/ichi_bns.h +205 -4
- data/ext/src/ichican2.c +197 -86
- data/ext/src/ichicano.c +8 -13
- data/ext/src/ichicano.h +2 -2
- data/ext/src/ichicans.c +11 -6
- data/ext/src/ichicant.h +2 -2
- data/ext/src/ichicomn.h +2 -2
- data/ext/src/ichicomp.h +19 -4
- data/ext/src/ichidrp.h +9 -5
- data/ext/src/ichierr.h +5 -3
- data/ext/src/ichiisot.c +2 -2
- data/ext/src/ichimain.c +461 -0
- data/ext/src/ichimain.h +23 -15
- data/ext/src/ichimak2.c +6 -6
- data/ext/src/ichimake.c +843 -42
- data/ext/src/ichimake.h +4 -2
- data/ext/src/ichimap1.c +5 -5
- data/ext/src/ichimap2.c +2 -2
- data/ext/src/ichimap4.c +34 -21
- data/ext/src/ichinorm.c +11 -5
- data/ext/src/ichinorm.h +3 -2
- data/ext/src/ichiparm.c +2 -2
- data/ext/src/ichiparm.h +232 -30
- data/ext/src/ichiprt1.c +35 -11
- data/ext/src/ichiprt2.c +78 -7
- data/ext/src/ichiprt3.c +300 -120
- data/ext/src/ichiqueu.c +17 -2
- data/ext/src/ichiread.c +6932 -0
- data/ext/src/ichiring.c +3 -2
- data/ext/src/ichiring.h +2 -2
- data/ext/src/ichirvr1.c +4891 -0
- data/ext/src/ichirvr2.c +6344 -0
- data/ext/src/ichirvr3.c +5499 -0
- data/ext/src/ichirvr4.c +3177 -0
- data/ext/src/ichirvr5.c +1166 -0
- data/ext/src/ichirvr6.c +1287 -0
- data/ext/src/ichirvr7.c +2319 -0
- data/ext/src/ichirvrs.h +882 -0
- data/ext/src/ichisize.h +2 -2
- data/ext/src/ichisort.c +5 -5
- data/ext/src/ichister.c +281 -86
- data/ext/src/ichister.h +9 -3
- data/ext/src/ichitaut.c +208 -9
- data/ext/src/ichitaut.h +13 -11
- data/ext/src/ichitime.h +16 -2
- data/ext/src/inchicmp.h +107 -0
- data/ext/src/inpdef.h +6 -3
- data/ext/src/libinchi_wrap.c +912 -0
- data/ext/src/lreadmol.h +34 -31
- data/ext/src/mode.h +244 -7
- data/ext/src/mol2atom.c +1060 -0
- data/ext/src/mol2atom.h +31 -0
- data/ext/src/readinch.c +239 -0
- data/ext/src/readmol.c +28 -0
- data/ext/src/{e_readmol.h → readmol.h} +7 -9
- data/ext/src/runichi.c +251 -177
- data/ext/src/strutil.c +444 -238
- data/ext/src/strutil.h +150 -11
- data/ext/src/util.c +176 -118
- data/ext/src/util.h +15 -3
- data/lib/rino.rb +71 -3
- data/test/test.rb +33 -4
- metadata +22 -34
- data/ext/ruby_inchi_main.so +0 -0
- data/ext/src/e_0dstereo.c +0 -3014
- data/ext/src/e_0dstereo.h +0 -31
- data/ext/src/e_comdef.h +0 -57
- data/ext/src/e_ctl_data.h +0 -147
- data/ext/src/e_ichi_io.c +0 -498
- data/ext/src/e_ichi_io.h +0 -40
- data/ext/src/e_ichi_parms.c +0 -37
- data/ext/src/e_ichi_parms.h +0 -41
- data/ext/src/e_ichicomp.h +0 -50
- data/ext/src/e_ichierr.h +0 -40
- data/ext/src/e_ichimain.c +0 -593
- data/ext/src/e_ichisize.h +0 -43
- data/ext/src/e_inchi_atom.c +0 -75
- data/ext/src/e_inchi_atom.h +0 -33
- data/ext/src/e_inpdef.h +0 -41
- data/ext/src/e_mode.h +0 -706
- data/ext/src/e_mol2atom.c +0 -649
- data/ext/src/e_readinch.c +0 -58
- data/ext/src/e_readmol.c +0 -54
- data/ext/src/e_readstru.c +0 -251
- data/ext/src/e_readstru.h +0 -33
- data/ext/src/e_util.c +0 -284
- data/ext/src/e_util.h +0 -61
- data/ext/src/ichilnct.c +0 -286
- data/ext/src/inchi_api.h +0 -670
- data/ext/src/inchi_dll.c +0 -1480
- data/ext/src/inchi_dll.h +0 -34
- data/ext/src/inchi_dll_main.c +0 -23
- data/ext/src/inchi_dll_main.h +0 -31
- data/ext/src/ruby_inchi_main.c +0 -558
data/ext/src/mol2atom.c
ADDED
@@ -0,0 +1,1060 @@
|
|
1
|
+
/*
|
2
|
+
* International Union of Pure and Applied Chemistry (IUPAC)
|
3
|
+
* International Chemical Identifier (InChI)
|
4
|
+
* Version 1
|
5
|
+
* Software version 1.01
|
6
|
+
* July 21, 2006
|
7
|
+
* Developed at NIST
|
8
|
+
*/
|
9
|
+
|
10
|
+
#include <stdio.h>
|
11
|
+
#include <stdlib.h>
|
12
|
+
#include <string.h>
|
13
|
+
#include <math.h>
|
14
|
+
#include <ctype.h>
|
15
|
+
|
16
|
+
#include "mode.h"
|
17
|
+
|
18
|
+
#include "comdef.h"
|
19
|
+
#include "readmol.h"
|
20
|
+
#include "inpdef.h"
|
21
|
+
#include "util.h"
|
22
|
+
|
23
|
+
#include "ichicomp.h"
|
24
|
+
|
25
|
+
#if( ADD_CMLPP == 1 )
|
26
|
+
#include "debug.h"
|
27
|
+
#endif
|
28
|
+
#include "mol2atom.h"
|
29
|
+
|
30
|
+
#define MIN_STDATA_X_COORD 0.0
|
31
|
+
#define MAX_STDATA_X_COORD 256.0
|
32
|
+
#define MIN_STDATA_Y_COORD 0.0
|
33
|
+
#define MAX_STDATA_Y_COORD 256.0
|
34
|
+
#define MIN_STDATA_Z_COORD 0.0
|
35
|
+
#define MAX_STDATA_Z_COORD 256.0
|
36
|
+
#define MAX_STDATA_AVE_BOND_LENGTH 20.0
|
37
|
+
#define MIN_STDATA_AVE_BOND_LENGTH 10.0
|
38
|
+
|
39
|
+
|
40
|
+
/* local prototypes */
|
41
|
+
inp_ATOM* mol_to_atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inp_ATOM* at_inp, int bDoNotAddH, int *err, char *pStrErr );
|
42
|
+
int mol_to_atom_xyz( MOL_DATA* mol_data, int num_atoms, inp_ATOM* at, int *err, char *pStrErr );
|
43
|
+
long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr );
|
44
|
+
|
45
|
+
/******************************************************************************************************/
|
46
|
+
void FreeInpAtom( inp_ATOM **at )
|
47
|
+
{
|
48
|
+
if ( at && *at ) {
|
49
|
+
inchi_free( *at );
|
50
|
+
*at = NULL;
|
51
|
+
}
|
52
|
+
}
|
53
|
+
/******************************************************************************************************/
|
54
|
+
inp_ATOM *CreateInpAtom( int num_atoms )
|
55
|
+
{
|
56
|
+
/*
|
57
|
+
void *p = inchi_calloc(num_atoms, sizeof(inp_ATOM) );
|
58
|
+
if ( p == (void*)0x009143A8 ) {
|
59
|
+
int stop = 1;
|
60
|
+
}
|
61
|
+
return (inp_ATOM* )p;
|
62
|
+
*/
|
63
|
+
return (inp_ATOM* ) inchi_calloc(num_atoms, sizeof(inp_ATOM) );
|
64
|
+
}
|
65
|
+
/******************************************************************************************************/
|
66
|
+
void FreeInpAtomData( INP_ATOM_DATA *inp_at_data )
|
67
|
+
{
|
68
|
+
if ( inp_at_data ) {
|
69
|
+
FreeInpAtom( &inp_at_data->at );
|
70
|
+
FreeInpAtom( &inp_at_data->at_fixed_bonds );
|
71
|
+
memset( inp_at_data, 0, sizeof(*inp_at_data) );
|
72
|
+
}
|
73
|
+
}
|
74
|
+
/******************************************************************************************************/
|
75
|
+
int CreateInpAtomData( INP_ATOM_DATA *inp_at_data, int num_atoms, int create_at_fixed_bonds )
|
76
|
+
{
|
77
|
+
FreeInpAtomData( inp_at_data );
|
78
|
+
if ( (inp_at_data->at = CreateInpAtom( num_atoms )) &&
|
79
|
+
(!create_at_fixed_bonds || (inp_at_data->at_fixed_bonds = CreateInpAtom( num_atoms) ) ) ) {
|
80
|
+
inp_at_data->num_at = num_atoms;
|
81
|
+
return 1;
|
82
|
+
}
|
83
|
+
FreeInpAtomData( inp_at_data );
|
84
|
+
return 0;
|
85
|
+
}
|
86
|
+
/******************************************************************************************************/
|
87
|
+
void FreeCompAtomData( COMP_ATOM_DATA *inp_at_data )
|
88
|
+
{
|
89
|
+
FreeInpAtom( &inp_at_data->at );
|
90
|
+
if ( inp_at_data->nOffsetAtAndH )
|
91
|
+
inchi_free( inp_at_data->nOffsetAtAndH );
|
92
|
+
memset( inp_at_data, 0, sizeof(*inp_at_data) );
|
93
|
+
}
|
94
|
+
#ifndef INCHI_ANSI_ONLY
|
95
|
+
/******************************************************************************************************/
|
96
|
+
int CreateCompAtomData( COMP_ATOM_DATA *inp_at_data, int num_atoms, int num_components, int bIntermediateTaut )
|
97
|
+
{
|
98
|
+
FreeCompAtomData( inp_at_data );
|
99
|
+
if ( (inp_at_data->at = CreateInpAtom( num_atoms )) &&
|
100
|
+
(num_components <= 1 || bIntermediateTaut ||
|
101
|
+
(inp_at_data->nOffsetAtAndH = (AT_NUMB*)inchi_calloc(sizeof(inp_at_data->nOffsetAtAndH[0]), 2*(num_components+1))))) {
|
102
|
+
|
103
|
+
inp_at_data->num_at = num_atoms;
|
104
|
+
inp_at_data->num_components = (num_components>1)? num_components : 0;
|
105
|
+
return 1;
|
106
|
+
}
|
107
|
+
FreeCompAtomData( inp_at_data );
|
108
|
+
return 0;
|
109
|
+
}
|
110
|
+
|
111
|
+
/******************************************************************************************************/
|
112
|
+
void FreeInfAtom( inf_ATOM **at )
|
113
|
+
{
|
114
|
+
if ( at && *at ) {
|
115
|
+
inchi_free( *at );
|
116
|
+
*at = NULL;
|
117
|
+
}
|
118
|
+
}
|
119
|
+
/******************************************************************************************************/
|
120
|
+
inf_ATOM *CreateInfAtom( int num_atoms )
|
121
|
+
{
|
122
|
+
return (inf_ATOM* ) inchi_calloc(num_atoms, sizeof(inf_ATOM) );
|
123
|
+
}
|
124
|
+
/******************************************************************************************************/
|
125
|
+
void FreeInfoAtomData( INF_ATOM_DATA *inf_at_data )
|
126
|
+
{
|
127
|
+
FreeInfAtom( &inf_at_data->at );
|
128
|
+
if ( inf_at_data->pStereoFlags )
|
129
|
+
inchi_free( inf_at_data->pStereoFlags );
|
130
|
+
memset(inf_at_data, 0, sizeof(*inf_at_data));
|
131
|
+
}
|
132
|
+
/******************************************************************************************************/
|
133
|
+
int CreateInfoAtomData( INF_ATOM_DATA *inf_at_data, int num_atoms, int num_components )
|
134
|
+
{
|
135
|
+
FreeInfoAtomData( inf_at_data );
|
136
|
+
memset( inf_at_data, 0, sizeof(*inf_at_data) );
|
137
|
+
if ( (inf_at_data->at = CreateInfAtom( num_atoms )) &&
|
138
|
+
(num_components <= 1 ||
|
139
|
+
(inf_at_data->pStereoFlags = (AT_NUMB *)inchi_calloc(num_components+1, sizeof(inf_at_data->pStereoFlags[0])))
|
140
|
+
)
|
141
|
+
) {
|
142
|
+
inf_at_data->num_at = num_atoms;
|
143
|
+
inf_at_data->num_components = num_components;
|
144
|
+
return 1;
|
145
|
+
}
|
146
|
+
FreeInfoAtomData( inf_at_data );
|
147
|
+
return 0;
|
148
|
+
}
|
149
|
+
/******************************************************************************************************/
|
150
|
+
int AllocateInfoAtomData( INF_ATOM_DATA *inf_at_data, int num_atoms, int num_components )
|
151
|
+
{
|
152
|
+
if ( inf_at_data->at = CreateInfAtom( num_atoms ) ) {
|
153
|
+
if ( num_components > 1 &&
|
154
|
+
!(inf_at_data->pStereoFlags = (AT_NUMB *)inchi_calloc(num_components+1, sizeof(inf_at_data->pStereoFlags[0]))) ) {
|
155
|
+
FreeInfAtom( &inf_at_data->at );
|
156
|
+
return 0;
|
157
|
+
}
|
158
|
+
return 1;
|
159
|
+
}
|
160
|
+
return 0;
|
161
|
+
}
|
162
|
+
/******************************************************************************************************/
|
163
|
+
int DuplicateInfoAtomData( INF_ATOM_DATA *inf_at_data_to, const INF_ATOM_DATA *inf_at_data_from)
|
164
|
+
{
|
165
|
+
*inf_at_data_to = *inf_at_data_from;
|
166
|
+
if ( AllocateInfoAtomData( inf_at_data_to, inf_at_data_from->num_at, inf_at_data_from->num_components ) ) {
|
167
|
+
memcpy( inf_at_data_to->at, inf_at_data_from->at,
|
168
|
+
inf_at_data_from->num_at * sizeof(inf_at_data_to->at[0]));
|
169
|
+
if ( inf_at_data_to->pStereoFlags && inf_at_data_from->pStereoFlags ) {
|
170
|
+
memcpy( inf_at_data_to->pStereoFlags, inf_at_data_from->pStereoFlags,
|
171
|
+
(inf_at_data_from->num_components+1)*sizeof(inf_at_data_to->pStereoFlags[0]));
|
172
|
+
}
|
173
|
+
return 1;
|
174
|
+
}
|
175
|
+
return 0;
|
176
|
+
}
|
177
|
+
#endif /* ifndef INCHI_ANSI_ONLY */
|
178
|
+
|
179
|
+
|
180
|
+
#if( TEST_RENUMB_ATOMS == 1 ) /* { */
|
181
|
+
|
182
|
+
/******************************************************************************************************/
|
183
|
+
int CopyInpAtomData( INP_ATOM_DATA *dest_inp_at_data, INP_ATOM_DATA *src_inp_at_data )
|
184
|
+
{
|
185
|
+
int ret = 1;
|
186
|
+
if ( !dest_inp_at_data->at || dest_inp_at_data->num_at != src_inp_at_data->num_at ) {
|
187
|
+
ret = CreateInpAtomData( dest_inp_at_data, src_inp_at_data->num_at, (NULL != src_inp_at_data->at_fixed_bonds) );
|
188
|
+
} else {
|
189
|
+
inp_ATOM *at = dest_inp_at_data->at; /* save ptr to already allocated memory */
|
190
|
+
inp_ATOM *at2 = dest_inp_at_data->at_fixed_bonds;
|
191
|
+
*dest_inp_at_data = *src_inp_at_data; /* copy all other (scalar) data */
|
192
|
+
dest_inp_at_data->at = at; /* restore ptr to already allocated memory */
|
193
|
+
dest_inp_at_data->at_fixed_bonds = at2;
|
194
|
+
}
|
195
|
+
if ( ret ) {
|
196
|
+
memcpy( dest_inp_at_data->at, src_inp_at_data->at,
|
197
|
+
src_inp_at_data->num_at*sizeof(dest_inp_at_data->at[0]) );
|
198
|
+
if ( dest_inp_at_data->at_fixed_bonds && src_inp_at_data->at_fixed_bonds ) {
|
199
|
+
memcpy( dest_inp_at_data->at_fixed_bonds, src_inp_at_data->at_fixed_bonds,
|
200
|
+
src_inp_at_data->num_at*sizeof(dest_inp_at_data->at_fixed_bonds[0]) );
|
201
|
+
}
|
202
|
+
}
|
203
|
+
return ret;
|
204
|
+
}
|
205
|
+
/******************************************************************************************************/
|
206
|
+
void RenumbInpAtomData( INP_ATOM_DATA *dest_inp_at_data, INP_ATOM_DATA *src_inp_at_data, AT_RANK *new_ord )
|
207
|
+
{
|
208
|
+
int j, n, m, val;
|
209
|
+
#if( TEST_RENUMB_NEIGH == 1 )
|
210
|
+
int i, k;
|
211
|
+
#endif
|
212
|
+
int num_atoms = src_inp_at_data->num_at;
|
213
|
+
inp_ATOM *dest_at = dest_inp_at_data->at;
|
214
|
+
for ( n = 0; n < num_atoms; n ++ ) {
|
215
|
+
m = new_ord[n];
|
216
|
+
dest_at[m] = src_inp_at_data->at[n];
|
217
|
+
dest_at[m].orig_compt_at_numb = (AT_NUMB)(m+1); /* new ordering number within the component */
|
218
|
+
val = dest_at[m].valence;
|
219
|
+
for ( j = 0; j < val; j ++ ) {
|
220
|
+
dest_at[m].neighbor[j] = new_ord[dest_at[m].neighbor[j]];
|
221
|
+
}
|
222
|
+
#if( TEST_RENUMB_NEIGH == 1 )
|
223
|
+
for ( i = 0; i < val; i ++ ) {
|
224
|
+
j = i;
|
225
|
+
k = j + (rand() * (val-j)) / (RAND_MAX+1);
|
226
|
+
if ( k >= val || j == k ) {
|
227
|
+
continue;
|
228
|
+
}
|
229
|
+
swap( (char*)&dest_at[m].neighbor[j], (char*)&dest_at[m].neighbor[k], sizeof(dest_at[0].neighbor[0]) );
|
230
|
+
swap( (char*)&dest_at[m].bond_stereo[j], (char*)&dest_at[m].bond_stereo[k], sizeof(dest_at[0].bond_stereo[0]) );
|
231
|
+
swap( (char*)&dest_at[m].bond_type[j], (char*)&dest_at[m].bond_type[k], sizeof(dest_at[0].bond_type[0]) );
|
232
|
+
/* adjust stereo bond links */
|
233
|
+
if ( dest_at[m].sb_parity[0] ) {
|
234
|
+
int a;
|
235
|
+
for ( a = 0; a < MAX_NUM_STEREO_BONDS && dest_at[m].sb_parity[a]; a ++ ) {
|
236
|
+
|
237
|
+
if ( k == (int)dest_at[m].sb_ord[a] ) {
|
238
|
+
dest_at[m].sb_ord[a] = j;
|
239
|
+
} else
|
240
|
+
if ( j == (int)dest_at[m].sb_ord[a] ) {
|
241
|
+
dest_at[m].sb_ord[a] = k;
|
242
|
+
}
|
243
|
+
|
244
|
+
if ( k == (int)dest_at[m].sn_ord[a] ) {
|
245
|
+
dest_at[m].sn_ord[a] = j;
|
246
|
+
} else
|
247
|
+
if ( j == (int)dest_at[m].sn_ord[a] ) {
|
248
|
+
dest_at[m].sn_ord[a] = k;
|
249
|
+
}
|
250
|
+
}
|
251
|
+
}
|
252
|
+
}
|
253
|
+
#endif
|
254
|
+
}
|
255
|
+
}
|
256
|
+
/******************************************************************************************************/
|
257
|
+
void MakeNewOrd( int num_atoms, AT_RANK *new_ord )
|
258
|
+
{
|
259
|
+
int i, j, k;
|
260
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
261
|
+
j = i;
|
262
|
+
k = (rand() * (num_atoms-i)) / (RAND_MAX+1);
|
263
|
+
if ( k >= num_atoms || j == k ) {
|
264
|
+
continue;
|
265
|
+
}
|
266
|
+
swap( (char*)&new_ord[j], (char*)&new_ord[k], sizeof(new_ord[0]) );
|
267
|
+
}
|
268
|
+
}
|
269
|
+
#endif /* } TEST_RENUMB_ATOMS == 1 */
|
270
|
+
|
271
|
+
/******************************************************************************************************/
|
272
|
+
inp_ATOM* mol_to_atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inp_ATOM* at_inp,
|
273
|
+
int bDoNotAddH, int *err, char *pStrErr )
|
274
|
+
{
|
275
|
+
inp_ATOM *at = NULL;
|
276
|
+
/* char *bond_stereo = NULL; */
|
277
|
+
AT_NUMB *p1, *p2;
|
278
|
+
int i, a1, a2, n1, n2, bonds, iso_atw_diff;
|
279
|
+
char cBondStereo, cBondType;
|
280
|
+
static int el_number_H = 0;
|
281
|
+
|
282
|
+
|
283
|
+
if ( !el_number_H ) {
|
284
|
+
el_number_H = get_periodic_table_number( "H" ); /* one-time initialization */
|
285
|
+
}
|
286
|
+
|
287
|
+
*err = 0;
|
288
|
+
*num_atoms = *num_bonds = 0;
|
289
|
+
/* check if MOLfile contains atoms */
|
290
|
+
if ( !mol_data || !mol_data->ctab.MolAtom ||
|
291
|
+
0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
|
292
|
+
0 >= (*num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
|
293
|
+
/* MOLFILE_ERR_SET (*err, 0, "Empty structure"); */
|
294
|
+
goto exit_function; /* no structure */
|
295
|
+
}
|
296
|
+
/* allocate memory if necessary */
|
297
|
+
if ( at_inp ) {
|
298
|
+
at = at_inp;
|
299
|
+
} else
|
300
|
+
if ( !(at = CreateInpAtom( *num_atoms ) ) ) {
|
301
|
+
*err = -1;
|
302
|
+
MOLFILE_ERR_FIN (*err, -1, exit_function, "Out of RAM");
|
303
|
+
}
|
304
|
+
|
305
|
+
/* copy atom info */
|
306
|
+
for ( i = 0; i < *num_atoms; i ++ ) {
|
307
|
+
mystrncpy( at[i].elname, mol_data->ctab.MolAtom[i].szAtomSymbol, sizeof(at->elname) );
|
308
|
+
/* at[i].chem_bonds_valence = mol_data->ctab.MolAtom[i].cValence; */ /* MOLfile valence; will change */
|
309
|
+
at[i].orig_at_number = (AT_NUMB)(i+1);
|
310
|
+
at[i].iso_atw_diff = mol_data->ctab.MolAtom[i].cMassDifference;
|
311
|
+
at[i].charge = mol_data->ctab.MolAtom[i].cCharge;
|
312
|
+
at[i].radical = mol_data->ctab.MolAtom[i].cRadical;
|
313
|
+
/* see mol_to_atom_xyz()
|
314
|
+
at[i].x = mol_data->ctab.MolAtom[i].fX;
|
315
|
+
at[i].y = mol_data->ctab.MolAtom[i].fY;
|
316
|
+
at[i].z = mol_data->ctab.MolAtom[i].fZ;
|
317
|
+
*/
|
318
|
+
iso_atw_diff = mol_data->ctab.MolAtom[i].cMassDifference;
|
319
|
+
at[i].iso_atw_diff = iso_atw_diff==ZERO_ATW_DIFF? 1:
|
320
|
+
iso_atw_diff> 0? iso_atw_diff+1:
|
321
|
+
iso_atw_diff;
|
322
|
+
#if( SINGLET_IS_TRIPLET == 1 )
|
323
|
+
if ( at[i].radical == RADICAL_SINGLET ) {
|
324
|
+
at[i].radical = RADICAL_TRIPLET;
|
325
|
+
}
|
326
|
+
#endif
|
327
|
+
#if( bRELEASE_VERSION != 1 )
|
328
|
+
if ( isdigit( at[i].elname[0] ) ) { /* for testing */
|
329
|
+
mystrncpy( at[i].elname, "C", sizeof(at->elname) );
|
330
|
+
}
|
331
|
+
#endif
|
332
|
+
if ( ERR_ELEM == (n1 = get_periodic_table_number( at[i].elname ) ) ) {
|
333
|
+
/* Case when elname contains more than 1 element: extract number of H if possible */
|
334
|
+
at[i].num_H = extract_H_atoms( at[i].elname, at[i].num_iso_H );
|
335
|
+
if ( !at[i].elname[0] && NUMH(at, i) ) {
|
336
|
+
/* alias contains only H. Added 2004-07-21, fixed 2004-07-22
|
337
|
+
* move the heaviest isotope to the "central atom"
|
338
|
+
* Note: this must be consistent with H-H treatment in remove_terminal_HDT()
|
339
|
+
*/
|
340
|
+
strcpy( at[i].elname, "H" );
|
341
|
+
if ( NUM_ISO_H(at,i) ) {
|
342
|
+
int j;
|
343
|
+
for ( j = NUM_H_ISOTOPES-1; 0 <= j; j -- ) {
|
344
|
+
if ( at[i].num_iso_H[j] ) {
|
345
|
+
at[i].num_iso_H[j] --;
|
346
|
+
at[i].iso_atw_diff = 1 + j;
|
347
|
+
break;
|
348
|
+
}
|
349
|
+
}
|
350
|
+
} else {
|
351
|
+
at[i].num_H --;
|
352
|
+
}
|
353
|
+
}
|
354
|
+
if ( ERR_ELEM == (n1 = get_periodic_table_number( at[i].elname ) ) ) {
|
355
|
+
n1 = 0;
|
356
|
+
}
|
357
|
+
}
|
358
|
+
|
359
|
+
at[i].el_number = (U_CHAR) n1;
|
360
|
+
if ( !n1 ) {
|
361
|
+
*err |= 64; /* Unrecognized aromatic bond(s) replaced with single */
|
362
|
+
MOLFILE_ERR_SET (*err, 0, "Unknown element(s):");
|
363
|
+
MOLFILE_ERR_SET (*err, 0, at[i].elname);
|
364
|
+
} else
|
365
|
+
/* replace explicit D or T with isotopic H (added 2003-06-02) */
|
366
|
+
if ( el_number_H == n1 && !at[i].iso_atw_diff ) {
|
367
|
+
switch( at[i].elname[0] ) {
|
368
|
+
case 'D':
|
369
|
+
at[i].iso_atw_diff = 2;
|
370
|
+
mystrncpy( at[i].elname, "H", sizeof(at->elname) );
|
371
|
+
break;
|
372
|
+
case 'T':
|
373
|
+
at[i].iso_atw_diff = 3;
|
374
|
+
mystrncpy( at[i].elname, "H", sizeof(at->elname) );
|
375
|
+
break;
|
376
|
+
}
|
377
|
+
}
|
378
|
+
}
|
379
|
+
|
380
|
+
|
381
|
+
/*---------------- stereo information notes. ------------------------
|
382
|
+
|
383
|
+
Currently: 1. stereo sign
|
384
|
+
========= --------------
|
385
|
+
MOLfile (atom number = MOLfile atom number - 1, no stdata as an intermediate)
|
386
|
+
| if mol_data->ctab.MolBond[i].nAtomNo1 < mol_data->ctab.MolBond[i].nAtomNo2
|
387
|
+
v then
|
388
|
+
inp_ATOM stereo > 0
|
389
|
+
else
|
390
|
+
stereo < 0
|
391
|
+
|
392
|
+
2. neighbor z-coordinate
|
393
|
+
------------------------
|
394
|
+
neighbor z-coord > 0 for Up if sign(stdata_bond_no) = sign(at[i].neighbor[j]-i)
|
395
|
+
|
396
|
+
--------------------------------------------------------------------*/
|
397
|
+
|
398
|
+
/* copy bond info */
|
399
|
+
for ( i = 0, bonds = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
|
400
|
+
cBondStereo = mol_data->ctab.MolBond[i].cBondStereo;
|
401
|
+
cBondType = mol_data->ctab.MolBond[i].cBondType;
|
402
|
+
a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
|
403
|
+
a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
|
404
|
+
|
405
|
+
if ( a1 < 0 || a1 >= *num_atoms ||
|
406
|
+
a2 < 0 || a2 >= *num_atoms ||
|
407
|
+
a1 == a2 ) {
|
408
|
+
*err |= 1; /* bond for impossible atom number(s); ignored */
|
409
|
+
MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
|
410
|
+
continue;
|
411
|
+
}
|
412
|
+
/* check for multiple bonds between same atoms */
|
413
|
+
p1 = is_in_the_list( at[a1].neighbor, (AT_NUMB)a2, at[a1].valence );
|
414
|
+
p2 = is_in_the_list( at[a2].neighbor, (AT_NUMB)a1, at[a2].valence );
|
415
|
+
if ( (p1 || p2) && (p1 || at[a1].valence < MAXVAL) && (p2 || at[a2].valence < MAXVAL) ) {
|
416
|
+
n1 = p1? (p1 - at[a1].neighbor) : at[a1].valence ++;
|
417
|
+
n2 = p2? (p2 - at[a2].neighbor) : at[a2].valence ++;
|
418
|
+
MOLFILE_ERR_SET (*err, 0, "Multiple bonds between two atoms");
|
419
|
+
*err |= 2; /* multiple bonds between atoms */
|
420
|
+
} else
|
421
|
+
if ( !p1 && !p2 && at[a1].valence < MAXVAL && at[a2].valence < MAXVAL ) {
|
422
|
+
n1 = at[a1].valence ++;
|
423
|
+
n2 = at[a2].valence ++;
|
424
|
+
bonds ++;
|
425
|
+
} else {
|
426
|
+
char szMsg[64];
|
427
|
+
*err |= 4; /* too large number of bonds. Some bonds ignored. */
|
428
|
+
sprintf( szMsg, "Atom '%s' has more than %d bonds",
|
429
|
+
at[a1].valence>= MAXVAL? at[a1].elname:at[a2].elname, MAXVAL );
|
430
|
+
MOLFILE_ERR_SET (*err, 0, szMsg);
|
431
|
+
continue;
|
432
|
+
}
|
433
|
+
if ( cBondType < MIN_INPUT_BOND_TYPE || cBondType > MAX_INPUT_BOND_TYPE ) {
|
434
|
+
char szBondType[16];
|
435
|
+
sprintf( szBondType, "%d", cBondType );
|
436
|
+
cBondType = 1;
|
437
|
+
MOLFILE_ERR_SET (*err, 0, "Unrecognized bond type:");
|
438
|
+
MOLFILE_ERR_SET (*err, 0, szBondType);
|
439
|
+
*err |= 8; /* Unrecognized Bond type replaced with single bond */
|
440
|
+
}
|
441
|
+
/* bond type */
|
442
|
+
at[a1].bond_type[n1] =
|
443
|
+
at[a2].bond_type[n2] = cBondType;
|
444
|
+
/* connection */
|
445
|
+
at[a1].neighbor[n1] = (AT_NUMB)a2;
|
446
|
+
at[a2].neighbor[n2] = (AT_NUMB)a1;
|
447
|
+
/* stereo */
|
448
|
+
if ( cBondStereo == INPUT_STEREO_DBLE_EITHER /* 3 */ ) {
|
449
|
+
at[a1].bond_stereo[n1] =
|
450
|
+
at[a2].bond_stereo[n2] = STEREO_DBLE_EITHER;
|
451
|
+
} else
|
452
|
+
if ( cBondStereo == INPUT_STEREO_SNGL_UP || /* 1 */
|
453
|
+
cBondStereo == INPUT_STEREO_SNGL_EITHER || /* 4 */
|
454
|
+
cBondStereo == INPUT_STEREO_SNGL_DOWN /* 6 */ ) {
|
455
|
+
char cStereo;
|
456
|
+
switch ( cBondStereo ) {
|
457
|
+
case INPUT_STEREO_SNGL_UP:
|
458
|
+
cStereo = STEREO_SNGL_UP;
|
459
|
+
break;
|
460
|
+
case INPUT_STEREO_SNGL_EITHER:
|
461
|
+
cStereo = STEREO_SNGL_EITHER;
|
462
|
+
break;
|
463
|
+
case INPUT_STEREO_SNGL_DOWN:
|
464
|
+
cStereo = STEREO_SNGL_DOWN;
|
465
|
+
break;
|
466
|
+
}
|
467
|
+
at[a1].bond_stereo[n1] = cStereo; /* >0: the wedge (pointed) end is at this atom, a1 */
|
468
|
+
at[a2].bond_stereo[n2] = -cStereo; /* <0: the wedge (pointed) end is at the opposite atom, a1 */
|
469
|
+
} else
|
470
|
+
if ( cBondStereo ) {
|
471
|
+
*err |= 16; /* Ignored unrecognized Bond stereo */
|
472
|
+
MOLFILE_ERR_SET (*err, 0, "Unrecognized bond stereo");
|
473
|
+
continue;
|
474
|
+
}
|
475
|
+
}
|
476
|
+
*num_bonds = bonds;
|
477
|
+
|
478
|
+
|
479
|
+
/* special valences */
|
480
|
+
calculate_valences (mol_data, at, num_atoms, bDoNotAddH, err, pStrErr);
|
481
|
+
|
482
|
+
exit_function:;
|
483
|
+
return at;
|
484
|
+
}
|
485
|
+
/******************************************************************************************************/
|
486
|
+
void calculate_valences (MOL_DATA* mol_data, inp_ATOM* at, int *num_atoms, int bDoNotAddH, int *err, char *pStrErr)
|
487
|
+
{
|
488
|
+
int bNonMetal;
|
489
|
+
int a1, a2, n1, n2, valence;
|
490
|
+
AT_NUMB *p1;
|
491
|
+
|
492
|
+
/* special valences */
|
493
|
+
for ( bNonMetal = 0; bNonMetal < 2; bNonMetal ++ ) {
|
494
|
+
for ( a1 = 0; a1 < *num_atoms; a1 ++ ) {
|
495
|
+
int num_bond_type[MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE + 1], bond_type, bHasMetalNeighbor;
|
496
|
+
/* should the "!=" be replaced with "==" ??? */
|
497
|
+
if ( bNonMetal == is_el_a_metal( at[a1].el_number ) ) {
|
498
|
+
continue; /* first process all metals, after that all non-metals */
|
499
|
+
}
|
500
|
+
memset( num_bond_type, 0, sizeof(num_bond_type) );
|
501
|
+
|
502
|
+
valence = at[a1].chem_bonds_valence; /* save atom valence if available */
|
503
|
+
|
504
|
+
at[a1].chem_bonds_valence = 0;
|
505
|
+
bHasMetalNeighbor = 0;
|
506
|
+
for ( n1 = 0; n1 < at[a1].valence; n1 ++ ) {
|
507
|
+
bond_type = at[a1].bond_type[n1] - MIN_INPUT_BOND_TYPE;
|
508
|
+
if ( bond_type < 0 || bond_type > MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE ) {
|
509
|
+
bond_type = 0;
|
510
|
+
MOLFILE_ERR_SET (*err, 0, "Unknown bond type in MOLfile assigned as a single bond");
|
511
|
+
}
|
512
|
+
num_bond_type[ bond_type ] ++;
|
513
|
+
/* -- too a radical solution -- removed from next to ver 1.12B --- */
|
514
|
+
}
|
515
|
+
for ( n1 = 0; MIN_INPUT_BOND_TYPE + n1 <= 3 && MIN_INPUT_BOND_TYPE + n1 <= MAX_INPUT_BOND_TYPE; n1 ++ ) {
|
516
|
+
/* add all bond orders except for "aromatic" bonds */
|
517
|
+
at[a1].chem_bonds_valence += (MIN_INPUT_BOND_TYPE + n1) * num_bond_type[n1];
|
518
|
+
}
|
519
|
+
n2 = 0;
|
520
|
+
if ( MIN_INPUT_BOND_TYPE <= BOND_TYPE_ALTERN && BOND_TYPE_ALTERN <= MAX_INPUT_BOND_TYPE &&
|
521
|
+
( n2 = num_bond_type[BOND_TYPE_ALTERN-MIN_INPUT_BOND_TYPE] ) ) {
|
522
|
+
/* accept input aromatic bonds for now */
|
523
|
+
switch ( n2 ) {
|
524
|
+
case 2:
|
525
|
+
at[a1].chem_bonds_valence += 3; /* =A- */
|
526
|
+
break;
|
527
|
+
case 3:
|
528
|
+
at[a1].chem_bonds_valence += 4; /* =A< */
|
529
|
+
break;
|
530
|
+
default:
|
531
|
+
/* if 1 or >= 4 aromatic bonds then replace such bonds with single bonds */
|
532
|
+
/* and detect an error in the input structure */
|
533
|
+
for ( n1 = 0; n1 < at[a1].valence; n1 ++ ) {
|
534
|
+
if ( at[a1].bond_type[n1] == BOND_TYPE_ALTERN ) {
|
535
|
+
a2 = at[a1].neighbor[n1];
|
536
|
+
p1 = is_in_the_list( at[a2].neighbor, (AT_NUMB)a1, at[a2].valence );
|
537
|
+
if ( p1 ) {
|
538
|
+
at[a1].bond_type[n1] =
|
539
|
+
at[a2].bond_type[p1-at[a2].neighbor] = BOND_TYPE_SINGLE;
|
540
|
+
} else {
|
541
|
+
*err = -2; /* Program error */
|
542
|
+
MOLFILE_ERR_SET (*err, 0, "Program error interpreting MOLfile");
|
543
|
+
return; /* no structure */
|
544
|
+
}
|
545
|
+
}
|
546
|
+
}
|
547
|
+
at[a1].chem_bonds_valence += n2;
|
548
|
+
*err |= 32;
|
549
|
+
MOLFILE_ERR_SET (*err, 0, "Atom has 1 or more than 3 aromatic bonds");
|
550
|
+
n2 = 0;
|
551
|
+
break;
|
552
|
+
}
|
553
|
+
}
|
554
|
+
if ( n2 && !valence ) {
|
555
|
+
/* atom has aromatic bonds AND the chemical valence is not known */
|
556
|
+
int num_H = NUMH(at, a1);
|
557
|
+
int chem_valence = at[a1].chem_bonds_valence + num_H;
|
558
|
+
int bUnusualValenceArom =
|
559
|
+
detect_unusual_el_valence( (int)at[a1].el_number, at[a1].charge,
|
560
|
+
at[a1].radical, chem_valence,
|
561
|
+
num_H, at[a1].valence );
|
562
|
+
int bUnusualValenceNoArom =
|
563
|
+
detect_unusual_el_valence( (int)at[a1].el_number, at[a1].charge,
|
564
|
+
at[a1].radical, chem_valence-1,
|
565
|
+
num_H, at[a1].valence );
|
566
|
+
#if ( CHECK_AROMBOND2ALT == 1 )
|
567
|
+
if ( bUnusualValenceArom && !bUnusualValenceNoArom && 0 == nBondsValToMetal( at, a1) )
|
568
|
+
#else
|
569
|
+
if ( bUnusualValenceArom && !bUnusualValenceNoArom )
|
570
|
+
#endif
|
571
|
+
{
|
572
|
+
/* typically NH in 5-member aromatic ring */
|
573
|
+
at[a1].chem_bonds_valence --;
|
574
|
+
}
|
575
|
+
} else
|
576
|
+
if ( n2 && valence ) {
|
577
|
+
/* atom has aromatic bonds AND the chemical valence is known */
|
578
|
+
int num_H = NUMH(at, a1);
|
579
|
+
int chem_valence = at[a1].chem_bonds_valence + num_H;
|
580
|
+
if ( valence == chem_valence-1 ) {
|
581
|
+
/* typically NH in 5-member aromatic ring */
|
582
|
+
at[a1].chem_bonds_valence --;
|
583
|
+
}
|
584
|
+
}
|
585
|
+
|
586
|
+
/*************************************************************************************
|
587
|
+
*
|
588
|
+
* Set number of hydrogen atoms
|
589
|
+
*/
|
590
|
+
if (mol_data) {
|
591
|
+
at[a1].num_H = get_num_H( at[a1].elname, at[a1].num_H, at[a1].num_iso_H,
|
592
|
+
at[a1].charge, at[a1].radical,
|
593
|
+
at[a1].chem_bonds_valence,
|
594
|
+
mol_data->ctab.MolAtom[a1].cValence, /* instead of valence */
|
595
|
+
mol_data->ctab.MolAtom[a1].cAtomAliasedFlag,
|
596
|
+
bDoNotAddH, bHasMetalNeighbor );
|
597
|
+
}
|
598
|
+
}
|
599
|
+
}
|
600
|
+
}
|
601
|
+
/******************************************************************************************************/
|
602
|
+
int mol_to_atom_xyz( MOL_DATA* mol_data, int num_atoms, inp_ATOM* at, int *err, char *pStrErr )
|
603
|
+
{
|
604
|
+
int i, num_dimensions=0;
|
605
|
+
int num_bonds;
|
606
|
+
double max_x=-1.0e32, max_y=-1.0e32, max_z=-1.0e32;
|
607
|
+
double min_x= 1.0e32, min_y= 1.0e32, min_z= 1.0e32;
|
608
|
+
double macheps = 1.0e-10, small_coeff = 0.00001;
|
609
|
+
double x_coeff, y_coeff, z_coeff, coeff, average_bond_length;
|
610
|
+
|
611
|
+
/* *err = 0; */
|
612
|
+
/* check if MOLfile contains atoms */
|
613
|
+
if ( !mol_data || !mol_data->ctab.MolAtom ||
|
614
|
+
0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
|
615
|
+
0 >= (num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
|
616
|
+
goto exit_function; /* no structure */
|
617
|
+
}
|
618
|
+
/* copy atom info */
|
619
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
620
|
+
max_x = inchi_max(mol_data->ctab.MolAtom[i].fX, max_x);
|
621
|
+
min_x = inchi_min(mol_data->ctab.MolAtom[i].fX, min_x);
|
622
|
+
max_y = inchi_max(mol_data->ctab.MolAtom[i].fY, max_y);
|
623
|
+
min_y = inchi_min(mol_data->ctab.MolAtom[i].fY, min_y);
|
624
|
+
max_z = inchi_max(mol_data->ctab.MolAtom[i].fZ, max_z);
|
625
|
+
min_z = inchi_min(mol_data->ctab.MolAtom[i].fZ, min_z);
|
626
|
+
}
|
627
|
+
|
628
|
+
/* copy bond info */
|
629
|
+
num_bonds = 0;
|
630
|
+
average_bond_length = 0.0;
|
631
|
+
for ( i = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
|
632
|
+
int a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
|
633
|
+
int a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
|
634
|
+
double dx = mol_data->ctab.MolAtom[a1].fX-mol_data->ctab.MolAtom[a2].fX;
|
635
|
+
double dy = mol_data->ctab.MolAtom[a1].fY-mol_data->ctab.MolAtom[a2].fY;
|
636
|
+
double dz = mol_data->ctab.MolAtom[a1].fZ-mol_data->ctab.MolAtom[a2].fZ;
|
637
|
+
|
638
|
+
if ( a1 < 0 || a1 >= num_atoms ||
|
639
|
+
a2 < 0 || a2 >= num_atoms ||
|
640
|
+
a1 == a2 ) {
|
641
|
+
*err |= 1; /* bond for impossible atom number(s); ignored */
|
642
|
+
MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
|
643
|
+
continue;
|
644
|
+
}
|
645
|
+
average_bond_length += sqrt( dx*dx + dy*dy + dz*dz );
|
646
|
+
num_bonds ++;
|
647
|
+
}
|
648
|
+
|
649
|
+
/* convert to integral coordinates */
|
650
|
+
|
651
|
+
if ( max_x - min_x <= small_coeff*(fabs(max_x) + fabs(min_x)) )
|
652
|
+
x_coeff = 0.0;
|
653
|
+
else
|
654
|
+
x_coeff = (MAX_STDATA_X_COORD - MIN_STDATA_X_COORD)/(max_x - min_x);
|
655
|
+
|
656
|
+
if ( max_y - min_y <= small_coeff*(fabs(max_y) + fabs(min_y)) )
|
657
|
+
y_coeff = 0.0;
|
658
|
+
else
|
659
|
+
y_coeff = (MAX_STDATA_Y_COORD - MIN_STDATA_Y_COORD)/(max_y - min_y);
|
660
|
+
if ( max_z - min_z <= small_coeff*(fabs(max_z) + fabs(min_z)) )
|
661
|
+
z_coeff = 0.0;
|
662
|
+
else
|
663
|
+
z_coeff = (MAX_STDATA_Z_COORD - MIN_STDATA_Z_COORD)/(max_z - min_z);
|
664
|
+
|
665
|
+
num_dimensions = ((x_coeff > macheps || y_coeff >macheps ) && fabs(z_coeff) < macheps)? 2:
|
666
|
+
(fabs(z_coeff) > macheps)? 3: 0;
|
667
|
+
|
668
|
+
switch ( num_dimensions ) {
|
669
|
+
case 0:
|
670
|
+
coeff = 0.0;
|
671
|
+
break;
|
672
|
+
case 2:
|
673
|
+
/* choose the smallest stretching coefficient */
|
674
|
+
if ( x_coeff > macheps && y_coeff > macheps ) {
|
675
|
+
coeff = inchi_min( x_coeff, y_coeff );
|
676
|
+
}else
|
677
|
+
if ( x_coeff > macheps ){
|
678
|
+
coeff = x_coeff;
|
679
|
+
}else
|
680
|
+
if ( y_coeff > macheps ){
|
681
|
+
coeff = y_coeff;
|
682
|
+
}else{
|
683
|
+
coeff = 1.0;
|
684
|
+
}
|
685
|
+
break;
|
686
|
+
case 3:
|
687
|
+
/* choose the smallest stretching coefficient */
|
688
|
+
if ( x_coeff > macheps && y_coeff > macheps ) {
|
689
|
+
coeff = inchi_min( x_coeff, y_coeff );
|
690
|
+
coeff = inchi_min( coeff, z_coeff );
|
691
|
+
}else
|
692
|
+
if ( x_coeff > macheps ){
|
693
|
+
coeff = inchi_min( x_coeff, z_coeff );
|
694
|
+
}else
|
695
|
+
if ( y_coeff > macheps ){
|
696
|
+
coeff = inchi_min( y_coeff, z_coeff );
|
697
|
+
}else{
|
698
|
+
coeff = z_coeff;
|
699
|
+
}
|
700
|
+
break;
|
701
|
+
default:
|
702
|
+
coeff = 0.0;
|
703
|
+
}
|
704
|
+
|
705
|
+
if ( num_bonds > 0 ) {
|
706
|
+
average_bond_length /= (double)num_bonds;
|
707
|
+
if ( average_bond_length * coeff > MAX_STDATA_AVE_BOND_LENGTH ) {
|
708
|
+
coeff = MAX_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too long bonds */
|
709
|
+
} else
|
710
|
+
if ( average_bond_length * coeff < macheps ) {
|
711
|
+
coeff = 1.0; /* all lengths are of zero length */
|
712
|
+
} else
|
713
|
+
if ( average_bond_length * coeff < MIN_STDATA_AVE_BOND_LENGTH ) {
|
714
|
+
coeff = MIN_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too short bonds */
|
715
|
+
}
|
716
|
+
}
|
717
|
+
#if( NORMALIZE_INP_COORD == 1 )
|
718
|
+
/* set integral coordinates */
|
719
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
720
|
+
double x = mol_data->ctab.MolAtom[i].fX;
|
721
|
+
double y = mol_data->ctab.MolAtom[i].fY;
|
722
|
+
double z = mol_data->ctab.MolAtom[i].fZ;
|
723
|
+
x = (x - min_x)*coeff + MIN_STDATA_X_COORD;
|
724
|
+
y = (y - min_y)*coeff + MIN_STDATA_Y_COORD;
|
725
|
+
z = (z - min_z)*coeff + MIN_STDATA_Z_COORD;
|
726
|
+
/* floor() behavior is not well defined for negative arguments.
|
727
|
+
* Use positive arguments only to get nearest integer.
|
728
|
+
*/
|
729
|
+
at[i].x = ( x >= 0.0 )? (int)floor( x + 0.5 ) : -(int)floor( -x + 0.5 );
|
730
|
+
at[i].y = ( y >= 0.0 )? (int)floor( y + 0.5 ) : -(int)floor( -y + 0.5 );
|
731
|
+
at[i].z = ( z >= 0.0 )? (int)floor( z + 0.5 ) : -(int)floor( -z + 0.5 );
|
732
|
+
}
|
733
|
+
#else
|
734
|
+
/* set input coordinates */
|
735
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
736
|
+
double x = mol_data->ctab.MolAtom[i].fX;
|
737
|
+
double y = mol_data->ctab.MolAtom[i].fY;
|
738
|
+
double z = mol_data->ctab.MolAtom[i].fZ;
|
739
|
+
at[i].x = x;
|
740
|
+
at[i].y = y;
|
741
|
+
at[i].z = z;
|
742
|
+
}
|
743
|
+
#endif
|
744
|
+
|
745
|
+
exit_function:;
|
746
|
+
return num_dimensions;
|
747
|
+
}
|
748
|
+
/****************************************************************************/
|
749
|
+
long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr )
|
750
|
+
{
|
751
|
+
static char sStruct[] = "Structure #";
|
752
|
+
static char sINCHI[] = INCHI_NAME;
|
753
|
+
long lMolfileNumber = 0;
|
754
|
+
char *p, *q = NULL;
|
755
|
+
if ( pHdr ) {
|
756
|
+
if ( !memicmp( pHdr->szMoleculeName, sStruct, sizeof(sStruct)-1 ) ) {
|
757
|
+
p = pHdr->szMoleculeName + sizeof(sStruct)-1;
|
758
|
+
lMolfileNumber = strtol( p, &q, 10 );
|
759
|
+
p = pHdr->szMoleculeLine2;
|
760
|
+
if ( !q || *q ||
|
761
|
+
memicmp( p, sINCHI, sizeof(sINCHI)-1) ||
|
762
|
+
!strstr( p+sizeof(sINCHI)-1, "SDfile Output" ) ) {
|
763
|
+
lMolfileNumber = 0;
|
764
|
+
}
|
765
|
+
}
|
766
|
+
}
|
767
|
+
return lMolfileNumber;
|
768
|
+
}
|
769
|
+
/****************************************************************************/
|
770
|
+
int MolfileToInpAtom( FILE *inp_molfile, int bDoNotAddH, inp_ATOM **at, MOL_COORD **szCoord, int max_num_at,
|
771
|
+
int *num_dimensions, int *num_bonds, const char *pSdfLabel, char *pSdfValue,
|
772
|
+
long *Id, long *lMolfileNumber, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
|
773
|
+
{
|
774
|
+
int num_atoms = 0;
|
775
|
+
MOL_DATA *mol_data = NULL;
|
776
|
+
MOL_HEADER_BLOCK OnlyHeaderBlock, *pOnlyHeaderBlock = NULL, *pHdr;
|
777
|
+
MOL_CTAB OnlyCtab, *pOnlyCtab = NULL;
|
778
|
+
char cSdfValueFirstChar;
|
779
|
+
#ifdef CML_DEBUG
|
780
|
+
FILE *f_p;
|
781
|
+
#endif
|
782
|
+
if ( at ) {
|
783
|
+
pOnlyHeaderBlock = NULL;
|
784
|
+
if ( *at && max_num_at ) {
|
785
|
+
memset( *at, 0, max_num_at * sizeof(inp_ATOM) );
|
786
|
+
}
|
787
|
+
if ( szCoord && *szCoord ) {
|
788
|
+
inchi_free( *szCoord );
|
789
|
+
*szCoord = NULL;
|
790
|
+
}
|
791
|
+
} else {
|
792
|
+
pOnlyHeaderBlock = &OnlyHeaderBlock;
|
793
|
+
pOnlyCtab = &OnlyCtab;
|
794
|
+
}
|
795
|
+
if ( pSdfValue ) {
|
796
|
+
cSdfValueFirstChar = pSdfValue[0];
|
797
|
+
pSdfValue[0] = '\0';
|
798
|
+
}
|
799
|
+
|
800
|
+
mol_data = read_sdfile_segment(inp_molfile, pOnlyHeaderBlock, pOnlyCtab, NULL != szCoord,
|
801
|
+
NULL, 0, Id, pSdfLabel, pSdfValue, err, pStrErr );
|
802
|
+
|
803
|
+
pHdr = ( mol_data && !pOnlyHeaderBlock )? &mol_data->hdr :
|
804
|
+
( !mol_data && pOnlyHeaderBlock )? pOnlyHeaderBlock : NULL;
|
805
|
+
if ( lMolfileNumber && pHdr ) {
|
806
|
+
*lMolfileNumber = GetMolfileNumber( pHdr );
|
807
|
+
}
|
808
|
+
if ( pSdfValue && !pSdfValue[0] &&
|
809
|
+
pSdfLabel && pSdfLabel[0] && pHdr ) {
|
810
|
+
if ( !stricmp(pSdfLabel, "MolfileName") ) {
|
811
|
+
mystrncpy( pSdfValue, pHdr->szMoleculeName, MAX_SDF_VALUE+1 );
|
812
|
+
LtrimRtrim( pSdfValue, NULL );
|
813
|
+
} else
|
814
|
+
if ( !stricmp(pSdfLabel, "MolfileLine2") ) {
|
815
|
+
mystrncpy( pSdfValue, pHdr->szMoleculeLine2, MAX_SDF_VALUE+1 );
|
816
|
+
LtrimRtrim( pSdfValue, NULL );
|
817
|
+
} else
|
818
|
+
if ( !stricmp(pSdfLabel, "MolfileComment") ) {
|
819
|
+
mystrncpy( pSdfValue, pHdr->szComment, MAX_SDF_VALUE+1 );
|
820
|
+
LtrimRtrim( pSdfValue, NULL );
|
821
|
+
} else
|
822
|
+
if ( !stricmp(pSdfLabel, "MolfileIntRegNo") && pHdr->lInternalRegistryNumber ) {
|
823
|
+
sprintf( pSdfValue, "%ld", pHdr->lInternalRegistryNumber );
|
824
|
+
}
|
825
|
+
if ( !pSdfValue[0] ) {
|
826
|
+
pSdfValue[0] = cSdfValueFirstChar;
|
827
|
+
}
|
828
|
+
}
|
829
|
+
|
830
|
+
if ( mol_data && at && !*err ) {
|
831
|
+
/* *at points to an allocated memory */
|
832
|
+
if ( *at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
|
833
|
+
*at = mol_to_atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
|
834
|
+
if ( *err >= 0 ) {
|
835
|
+
*num_dimensions = mol_to_atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
|
836
|
+
|
837
|
+
if ( szCoord ) {
|
838
|
+
*szCoord = mol_data->ctab.szCoord;
|
839
|
+
mol_data->ctab.szCoord = NULL;
|
840
|
+
}
|
841
|
+
|
842
|
+
}
|
843
|
+
} else
|
844
|
+
/* *at points to NULL */
|
845
|
+
if ( !*at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
|
846
|
+
*at = mol_to_atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
|
847
|
+
if ( *err >= 0 ) {
|
848
|
+
*num_dimensions = mol_to_atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
|
849
|
+
|
850
|
+
if ( szCoord ) {
|
851
|
+
*szCoord = mol_data->ctab.szCoord;
|
852
|
+
mol_data->ctab.szCoord = NULL;
|
853
|
+
}
|
854
|
+
|
855
|
+
}
|
856
|
+
} else {
|
857
|
+
MOLFILE_ERR_SET (*err, 0, "Too many atoms");
|
858
|
+
*err = 70;
|
859
|
+
num_atoms = -1;
|
860
|
+
}
|
861
|
+
if ( *err > 0 ) {
|
862
|
+
*err += 100;
|
863
|
+
}
|
864
|
+
/* 11-16-2004: use Chiral flag */
|
865
|
+
if ( num_atoms > 0 && at && *at && mol_data && pInpAtomFlags ) {
|
866
|
+
if ( mol_data->ctab.cChiralFlag ) {
|
867
|
+
*pInpAtomFlags |= FLAG_INP_AT_CHIRAL;
|
868
|
+
} else {
|
869
|
+
*pInpAtomFlags |= FLAG_INP_AT_NONCHIRAL;
|
870
|
+
}
|
871
|
+
}
|
872
|
+
} else
|
873
|
+
if ( !at ) {
|
874
|
+
num_atoms = pOnlyCtab->nNumberOfAtoms;
|
875
|
+
}
|
876
|
+
|
877
|
+
if ( !pOnlyHeaderBlock ) {
|
878
|
+
delete_mol_data( mol_data );
|
879
|
+
}
|
880
|
+
#ifdef CML_DEBUG
|
881
|
+
puts ("MOL");
|
882
|
+
f_p = fopen ("mol.dbg", "a");
|
883
|
+
if (f_p)
|
884
|
+
{
|
885
|
+
PrintInpAtom (f_p, *at, num_atoms);
|
886
|
+
fclose (f_p);
|
887
|
+
}
|
888
|
+
else
|
889
|
+
{
|
890
|
+
puts ("Couldn't open file");
|
891
|
+
}
|
892
|
+
#endif
|
893
|
+
|
894
|
+
return num_atoms;
|
895
|
+
}
|
896
|
+
/**********************************************************************************/
|
897
|
+
void FreeOrigAtData( ORIG_ATOM_DATA *orig_at_data )
|
898
|
+
{
|
899
|
+
if ( !orig_at_data )
|
900
|
+
return;
|
901
|
+
FreeInpAtom( &orig_at_data->at );
|
902
|
+
if ( orig_at_data->nCurAtLen ) {
|
903
|
+
inchi_free( orig_at_data->nCurAtLen );
|
904
|
+
}
|
905
|
+
if ( orig_at_data->nOldCompNumber ) {
|
906
|
+
inchi_free( orig_at_data->nOldCompNumber );
|
907
|
+
}
|
908
|
+
if ( orig_at_data->szCoord ) {
|
909
|
+
inchi_free( orig_at_data->szCoord );
|
910
|
+
}
|
911
|
+
|
912
|
+
if ( orig_at_data->nEquLabels ) {
|
913
|
+
inchi_free( orig_at_data->nEquLabels );
|
914
|
+
}
|
915
|
+
if ( orig_at_data->nSortedOrder ) {
|
916
|
+
inchi_free( orig_at_data->nSortedOrder );
|
917
|
+
}
|
918
|
+
|
919
|
+
memset( orig_at_data, 0, sizeof(*orig_at_data) );
|
920
|
+
}
|
921
|
+
/**********************************************************************************/
|
922
|
+
int MolfileToOrigAtom( FILE *inp_molfile, ORIG_ATOM_DATA *orig_at_data, int bMergeAllInputStructures,
|
923
|
+
int bGetOrigCoord, int bDoNotAddH,
|
924
|
+
const char *pSdfLabel, char *pSdfValue, long *lSdfId, long *lMolfileNumber,
|
925
|
+
INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
|
926
|
+
{
|
927
|
+
/* inp_ATOM *at = NULL; */
|
928
|
+
int num_dimensions_new;
|
929
|
+
int num_inp_bonds_new;
|
930
|
+
int num_inp_atoms_new;
|
931
|
+
inp_ATOM *at_new = NULL;
|
932
|
+
inp_ATOM *at_old = NULL;
|
933
|
+
int nNumAtoms = 0;
|
934
|
+
MOL_COORD *szCoordNew = NULL;
|
935
|
+
MOL_COORD *szCoordOld = NULL;
|
936
|
+
int i, j;
|
937
|
+
|
938
|
+
if ( pStrErr ) {
|
939
|
+
pStrErr[0] = '\0';
|
940
|
+
}
|
941
|
+
|
942
|
+
/*FreeOrigAtData( orig_at_data );*/
|
943
|
+
|
944
|
+
do {
|
945
|
+
|
946
|
+
at_old = orig_at_data? orig_at_data->at : NULL; /* save pointer to the previous allocation */
|
947
|
+
szCoordOld = orig_at_data? orig_at_data->szCoord : NULL;
|
948
|
+
num_inp_atoms_new =
|
949
|
+
MolfileToInpAtom( inp_molfile, bDoNotAddH, orig_at_data? &at_new:NULL, (bGetOrigCoord && orig_at_data)? &szCoordNew : NULL, MAX_ATOMS,
|
950
|
+
&num_dimensions_new, &num_inp_bonds_new,
|
951
|
+
pSdfLabel, pSdfValue, lSdfId, lMolfileNumber, pInpAtomFlags, err, pStrErr );
|
952
|
+
|
953
|
+
|
954
|
+
if ( num_inp_atoms_new <= 0 && !*err ) {
|
955
|
+
MOLFILE_ERR_SET (*err, 0, "Empty structure");
|
956
|
+
*err = 98;
|
957
|
+
} else
|
958
|
+
if ( orig_at_data && !num_inp_atoms_new && 10 < *err && *err < 20 && orig_at_data->num_inp_atoms > 0 && bMergeAllInputStructures ) {
|
959
|
+
*err = 0; /* end of file */
|
960
|
+
break;
|
961
|
+
} else
|
962
|
+
if ( num_inp_atoms_new > 0 && orig_at_data ) {
|
963
|
+
/* merge pOrigDataTmp + orig_at_data => pOrigDataTmp; */
|
964
|
+
nNumAtoms = num_inp_atoms_new + orig_at_data->num_inp_atoms;
|
965
|
+
if ( nNumAtoms >= MAX_ATOMS ) {
|
966
|
+
MOLFILE_ERR_SET (*err, 0, "Too many atoms");
|
967
|
+
*err = 70;
|
968
|
+
orig_at_data->num_inp_atoms = -1;
|
969
|
+
} else
|
970
|
+
if ( !at_old ) {
|
971
|
+
/* the first structure */
|
972
|
+
orig_at_data->at = at_new;
|
973
|
+
orig_at_data->szCoord = szCoordNew;
|
974
|
+
at_new = NULL;
|
975
|
+
szCoordNew = NULL;
|
976
|
+
orig_at_data->num_inp_atoms = num_inp_atoms_new;
|
977
|
+
orig_at_data->num_inp_bonds = num_inp_bonds_new;
|
978
|
+
orig_at_data->num_dimensions = num_dimensions_new;
|
979
|
+
} else
|
980
|
+
if ( (orig_at_data->at = ( inp_ATOM* ) inchi_calloc( nNumAtoms, sizeof(inp_ATOM) )) &&
|
981
|
+
(!szCoordNew || (orig_at_data->szCoord = (MOL_COORD *) inchi_calloc( nNumAtoms, sizeof(MOL_COORD) ))) ) {
|
982
|
+
/* switch at_new <--> orig_at_data->at; */
|
983
|
+
if ( orig_at_data->num_inp_atoms ) {
|
984
|
+
memcpy( orig_at_data->at, at_old, orig_at_data->num_inp_atoms * sizeof(orig_at_data->at[0]) );
|
985
|
+
/* adjust numbering in the newly read structure */
|
986
|
+
for ( i = 0; i < num_inp_atoms_new; i ++ ) {
|
987
|
+
for ( j = 0; j < at_new[i].valence; j ++ ) {
|
988
|
+
at_new[i].neighbor[j] += orig_at_data->num_inp_atoms;
|
989
|
+
}
|
990
|
+
at_new[i].orig_at_number += orig_at_data->num_inp_atoms; /* 12-19-2003 */
|
991
|
+
}
|
992
|
+
if ( orig_at_data->szCoord && szCoordOld ) {
|
993
|
+
memcpy( orig_at_data->szCoord, szCoordOld, orig_at_data->num_inp_atoms * sizeof(MOL_COORD) );
|
994
|
+
}
|
995
|
+
}
|
996
|
+
if ( at_old ) {
|
997
|
+
inchi_free( at_old );
|
998
|
+
at_old = NULL;
|
999
|
+
}
|
1000
|
+
if ( szCoordOld ) {
|
1001
|
+
inchi_free( szCoordOld );
|
1002
|
+
szCoordOld = NULL;
|
1003
|
+
}
|
1004
|
+
/* copy newly read structure */
|
1005
|
+
memcpy( orig_at_data->at + orig_at_data->num_inp_atoms,
|
1006
|
+
at_new,
|
1007
|
+
num_inp_atoms_new * sizeof(orig_at_data->at[0]) );
|
1008
|
+
if ( orig_at_data->szCoord && szCoordNew ) {
|
1009
|
+
memcpy( orig_at_data->szCoord + orig_at_data->num_inp_atoms,
|
1010
|
+
szCoordNew,
|
1011
|
+
num_inp_atoms_new * sizeof(MOL_COORD) );
|
1012
|
+
}
|
1013
|
+
/* add other things */
|
1014
|
+
orig_at_data->num_inp_atoms += num_inp_atoms_new;
|
1015
|
+
orig_at_data->num_inp_bonds += num_inp_bonds_new;
|
1016
|
+
orig_at_data->num_dimensions = inchi_max(num_dimensions_new, orig_at_data->num_dimensions);
|
1017
|
+
} else {
|
1018
|
+
MOLFILE_ERR_SET (*err, 0, "Out of RAM");
|
1019
|
+
*err = -1;
|
1020
|
+
}
|
1021
|
+
} else
|
1022
|
+
if ( num_inp_atoms_new > 0 ) {
|
1023
|
+
nNumAtoms += num_inp_atoms_new;
|
1024
|
+
}
|
1025
|
+
if ( at_new ) {
|
1026
|
+
inchi_free( at_new );
|
1027
|
+
at_new = NULL;
|
1028
|
+
}
|
1029
|
+
|
1030
|
+
} while ( !*err && bMergeAllInputStructures );
|
1031
|
+
/*
|
1032
|
+
if ( !*err ) {
|
1033
|
+
orig_at_data->num_components =
|
1034
|
+
MarkDisconnectedComponents( orig_at_data );
|
1035
|
+
if ( orig_at_data->num_components == 0 ) {
|
1036
|
+
MOLFILE_ERR_SET (*err, 0, "No components found");
|
1037
|
+
*err = 99;
|
1038
|
+
}
|
1039
|
+
if ( orig_at_data->num_components < 0 ) {
|
1040
|
+
MOLFILE_ERR_SET (*err, 0, "Too many components");
|
1041
|
+
*err = 99;
|
1042
|
+
}
|
1043
|
+
}
|
1044
|
+
*/
|
1045
|
+
if ( szCoordNew ) {
|
1046
|
+
inchi_free( szCoordNew );
|
1047
|
+
}
|
1048
|
+
if ( at_new ) {
|
1049
|
+
inchi_free( at_new );
|
1050
|
+
}
|
1051
|
+
if ( *err ) {
|
1052
|
+
FreeOrigAtData( orig_at_data );
|
1053
|
+
}
|
1054
|
+
if ( *err && !(10 < *err && *err < 20) && pStrErr && !pStrErr[0] ) {
|
1055
|
+
MOLFILE_ERR_SET (*err, 0, "Unknown error"); /* <BRKPT> */
|
1056
|
+
}
|
1057
|
+
return orig_at_data? orig_at_data->num_inp_atoms : nNumAtoms;
|
1058
|
+
}
|
1059
|
+
|
1060
|
+
|