rino 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/ext/extconf.rb +1 -24
- data/ext/libinchi.so +0 -0
- data/ext/src/aux2atom.h +120 -39
- data/ext/src/comdef.h +3 -3
- data/ext/src/dispstru.c +2547 -0
- data/ext/src/dispstru.h +73 -0
- data/ext/src/extr_ct.h +5 -2
- data/ext/src/ichi.h +27 -11
- data/ext/src/ichi_bns.c +1800 -254
- data/ext/src/ichi_bns.h +205 -4
- data/ext/src/ichican2.c +197 -86
- data/ext/src/ichicano.c +8 -13
- data/ext/src/ichicano.h +2 -2
- data/ext/src/ichicans.c +11 -6
- data/ext/src/ichicant.h +2 -2
- data/ext/src/ichicomn.h +2 -2
- data/ext/src/ichicomp.h +19 -4
- data/ext/src/ichidrp.h +9 -5
- data/ext/src/ichierr.h +5 -3
- data/ext/src/ichiisot.c +2 -2
- data/ext/src/ichimain.c +461 -0
- data/ext/src/ichimain.h +23 -15
- data/ext/src/ichimak2.c +6 -6
- data/ext/src/ichimake.c +843 -42
- data/ext/src/ichimake.h +4 -2
- data/ext/src/ichimap1.c +5 -5
- data/ext/src/ichimap2.c +2 -2
- data/ext/src/ichimap4.c +34 -21
- data/ext/src/ichinorm.c +11 -5
- data/ext/src/ichinorm.h +3 -2
- data/ext/src/ichiparm.c +2 -2
- data/ext/src/ichiparm.h +232 -30
- data/ext/src/ichiprt1.c +35 -11
- data/ext/src/ichiprt2.c +78 -7
- data/ext/src/ichiprt3.c +300 -120
- data/ext/src/ichiqueu.c +17 -2
- data/ext/src/ichiread.c +6932 -0
- data/ext/src/ichiring.c +3 -2
- data/ext/src/ichiring.h +2 -2
- data/ext/src/ichirvr1.c +4891 -0
- data/ext/src/ichirvr2.c +6344 -0
- data/ext/src/ichirvr3.c +5499 -0
- data/ext/src/ichirvr4.c +3177 -0
- data/ext/src/ichirvr5.c +1166 -0
- data/ext/src/ichirvr6.c +1287 -0
- data/ext/src/ichirvr7.c +2319 -0
- data/ext/src/ichirvrs.h +882 -0
- data/ext/src/ichisize.h +2 -2
- data/ext/src/ichisort.c +5 -5
- data/ext/src/ichister.c +281 -86
- data/ext/src/ichister.h +9 -3
- data/ext/src/ichitaut.c +208 -9
- data/ext/src/ichitaut.h +13 -11
- data/ext/src/ichitime.h +16 -2
- data/ext/src/inchicmp.h +107 -0
- data/ext/src/inpdef.h +6 -3
- data/ext/src/libinchi_wrap.c +912 -0
- data/ext/src/lreadmol.h +34 -31
- data/ext/src/mode.h +244 -7
- data/ext/src/mol2atom.c +1060 -0
- data/ext/src/mol2atom.h +31 -0
- data/ext/src/readinch.c +239 -0
- data/ext/src/readmol.c +28 -0
- data/ext/src/{e_readmol.h → readmol.h} +7 -9
- data/ext/src/runichi.c +251 -177
- data/ext/src/strutil.c +444 -238
- data/ext/src/strutil.h +150 -11
- data/ext/src/util.c +176 -118
- data/ext/src/util.h +15 -3
- data/lib/rino.rb +71 -3
- data/test/test.rb +33 -4
- metadata +22 -34
- data/ext/ruby_inchi_main.so +0 -0
- data/ext/src/e_0dstereo.c +0 -3014
- data/ext/src/e_0dstereo.h +0 -31
- data/ext/src/e_comdef.h +0 -57
- data/ext/src/e_ctl_data.h +0 -147
- data/ext/src/e_ichi_io.c +0 -498
- data/ext/src/e_ichi_io.h +0 -40
- data/ext/src/e_ichi_parms.c +0 -37
- data/ext/src/e_ichi_parms.h +0 -41
- data/ext/src/e_ichicomp.h +0 -50
- data/ext/src/e_ichierr.h +0 -40
- data/ext/src/e_ichimain.c +0 -593
- data/ext/src/e_ichisize.h +0 -43
- data/ext/src/e_inchi_atom.c +0 -75
- data/ext/src/e_inchi_atom.h +0 -33
- data/ext/src/e_inpdef.h +0 -41
- data/ext/src/e_mode.h +0 -706
- data/ext/src/e_mol2atom.c +0 -649
- data/ext/src/e_readinch.c +0 -58
- data/ext/src/e_readmol.c +0 -54
- data/ext/src/e_readstru.c +0 -251
- data/ext/src/e_readstru.h +0 -33
- data/ext/src/e_util.c +0 -284
- data/ext/src/e_util.h +0 -61
- data/ext/src/ichilnct.c +0 -286
- data/ext/src/inchi_api.h +0 -670
- data/ext/src/inchi_dll.c +0 -1480
- data/ext/src/inchi_dll.h +0 -34
- data/ext/src/inchi_dll_main.c +0 -23
- data/ext/src/inchi_dll_main.h +0 -31
- data/ext/src/ruby_inchi_main.c +0 -558
data/ext/src/mol2atom.c
ADDED
@@ -0,0 +1,1060 @@
|
|
1
|
+
/*
|
2
|
+
* International Union of Pure and Applied Chemistry (IUPAC)
|
3
|
+
* International Chemical Identifier (InChI)
|
4
|
+
* Version 1
|
5
|
+
* Software version 1.01
|
6
|
+
* July 21, 2006
|
7
|
+
* Developed at NIST
|
8
|
+
*/
|
9
|
+
|
10
|
+
#include <stdio.h>
|
11
|
+
#include <stdlib.h>
|
12
|
+
#include <string.h>
|
13
|
+
#include <math.h>
|
14
|
+
#include <ctype.h>
|
15
|
+
|
16
|
+
#include "mode.h"
|
17
|
+
|
18
|
+
#include "comdef.h"
|
19
|
+
#include "readmol.h"
|
20
|
+
#include "inpdef.h"
|
21
|
+
#include "util.h"
|
22
|
+
|
23
|
+
#include "ichicomp.h"
|
24
|
+
|
25
|
+
#if( ADD_CMLPP == 1 )
|
26
|
+
#include "debug.h"
|
27
|
+
#endif
|
28
|
+
#include "mol2atom.h"
|
29
|
+
|
30
|
+
#define MIN_STDATA_X_COORD 0.0
|
31
|
+
#define MAX_STDATA_X_COORD 256.0
|
32
|
+
#define MIN_STDATA_Y_COORD 0.0
|
33
|
+
#define MAX_STDATA_Y_COORD 256.0
|
34
|
+
#define MIN_STDATA_Z_COORD 0.0
|
35
|
+
#define MAX_STDATA_Z_COORD 256.0
|
36
|
+
#define MAX_STDATA_AVE_BOND_LENGTH 20.0
|
37
|
+
#define MIN_STDATA_AVE_BOND_LENGTH 10.0
|
38
|
+
|
39
|
+
|
40
|
+
/* local prototypes */
|
41
|
+
inp_ATOM* mol_to_atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inp_ATOM* at_inp, int bDoNotAddH, int *err, char *pStrErr );
|
42
|
+
int mol_to_atom_xyz( MOL_DATA* mol_data, int num_atoms, inp_ATOM* at, int *err, char *pStrErr );
|
43
|
+
long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr );
|
44
|
+
|
45
|
+
/******************************************************************************************************/
|
46
|
+
void FreeInpAtom( inp_ATOM **at )
|
47
|
+
{
|
48
|
+
if ( at && *at ) {
|
49
|
+
inchi_free( *at );
|
50
|
+
*at = NULL;
|
51
|
+
}
|
52
|
+
}
|
53
|
+
/******************************************************************************************************/
|
54
|
+
inp_ATOM *CreateInpAtom( int num_atoms )
|
55
|
+
{
|
56
|
+
/*
|
57
|
+
void *p = inchi_calloc(num_atoms, sizeof(inp_ATOM) );
|
58
|
+
if ( p == (void*)0x009143A8 ) {
|
59
|
+
int stop = 1;
|
60
|
+
}
|
61
|
+
return (inp_ATOM* )p;
|
62
|
+
*/
|
63
|
+
return (inp_ATOM* ) inchi_calloc(num_atoms, sizeof(inp_ATOM) );
|
64
|
+
}
|
65
|
+
/******************************************************************************************************/
|
66
|
+
void FreeInpAtomData( INP_ATOM_DATA *inp_at_data )
|
67
|
+
{
|
68
|
+
if ( inp_at_data ) {
|
69
|
+
FreeInpAtom( &inp_at_data->at );
|
70
|
+
FreeInpAtom( &inp_at_data->at_fixed_bonds );
|
71
|
+
memset( inp_at_data, 0, sizeof(*inp_at_data) );
|
72
|
+
}
|
73
|
+
}
|
74
|
+
/******************************************************************************************************/
|
75
|
+
int CreateInpAtomData( INP_ATOM_DATA *inp_at_data, int num_atoms, int create_at_fixed_bonds )
|
76
|
+
{
|
77
|
+
FreeInpAtomData( inp_at_data );
|
78
|
+
if ( (inp_at_data->at = CreateInpAtom( num_atoms )) &&
|
79
|
+
(!create_at_fixed_bonds || (inp_at_data->at_fixed_bonds = CreateInpAtom( num_atoms) ) ) ) {
|
80
|
+
inp_at_data->num_at = num_atoms;
|
81
|
+
return 1;
|
82
|
+
}
|
83
|
+
FreeInpAtomData( inp_at_data );
|
84
|
+
return 0;
|
85
|
+
}
|
86
|
+
/******************************************************************************************************/
|
87
|
+
void FreeCompAtomData( COMP_ATOM_DATA *inp_at_data )
|
88
|
+
{
|
89
|
+
FreeInpAtom( &inp_at_data->at );
|
90
|
+
if ( inp_at_data->nOffsetAtAndH )
|
91
|
+
inchi_free( inp_at_data->nOffsetAtAndH );
|
92
|
+
memset( inp_at_data, 0, sizeof(*inp_at_data) );
|
93
|
+
}
|
94
|
+
#ifndef INCHI_ANSI_ONLY
|
95
|
+
/******************************************************************************************************/
|
96
|
+
int CreateCompAtomData( COMP_ATOM_DATA *inp_at_data, int num_atoms, int num_components, int bIntermediateTaut )
|
97
|
+
{
|
98
|
+
FreeCompAtomData( inp_at_data );
|
99
|
+
if ( (inp_at_data->at = CreateInpAtom( num_atoms )) &&
|
100
|
+
(num_components <= 1 || bIntermediateTaut ||
|
101
|
+
(inp_at_data->nOffsetAtAndH = (AT_NUMB*)inchi_calloc(sizeof(inp_at_data->nOffsetAtAndH[0]), 2*(num_components+1))))) {
|
102
|
+
|
103
|
+
inp_at_data->num_at = num_atoms;
|
104
|
+
inp_at_data->num_components = (num_components>1)? num_components : 0;
|
105
|
+
return 1;
|
106
|
+
}
|
107
|
+
FreeCompAtomData( inp_at_data );
|
108
|
+
return 0;
|
109
|
+
}
|
110
|
+
|
111
|
+
/******************************************************************************************************/
|
112
|
+
void FreeInfAtom( inf_ATOM **at )
|
113
|
+
{
|
114
|
+
if ( at && *at ) {
|
115
|
+
inchi_free( *at );
|
116
|
+
*at = NULL;
|
117
|
+
}
|
118
|
+
}
|
119
|
+
/******************************************************************************************************/
|
120
|
+
inf_ATOM *CreateInfAtom( int num_atoms )
|
121
|
+
{
|
122
|
+
return (inf_ATOM* ) inchi_calloc(num_atoms, sizeof(inf_ATOM) );
|
123
|
+
}
|
124
|
+
/******************************************************************************************************/
|
125
|
+
void FreeInfoAtomData( INF_ATOM_DATA *inf_at_data )
|
126
|
+
{
|
127
|
+
FreeInfAtom( &inf_at_data->at );
|
128
|
+
if ( inf_at_data->pStereoFlags )
|
129
|
+
inchi_free( inf_at_data->pStereoFlags );
|
130
|
+
memset(inf_at_data, 0, sizeof(*inf_at_data));
|
131
|
+
}
|
132
|
+
/******************************************************************************************************/
|
133
|
+
int CreateInfoAtomData( INF_ATOM_DATA *inf_at_data, int num_atoms, int num_components )
|
134
|
+
{
|
135
|
+
FreeInfoAtomData( inf_at_data );
|
136
|
+
memset( inf_at_data, 0, sizeof(*inf_at_data) );
|
137
|
+
if ( (inf_at_data->at = CreateInfAtom( num_atoms )) &&
|
138
|
+
(num_components <= 1 ||
|
139
|
+
(inf_at_data->pStereoFlags = (AT_NUMB *)inchi_calloc(num_components+1, sizeof(inf_at_data->pStereoFlags[0])))
|
140
|
+
)
|
141
|
+
) {
|
142
|
+
inf_at_data->num_at = num_atoms;
|
143
|
+
inf_at_data->num_components = num_components;
|
144
|
+
return 1;
|
145
|
+
}
|
146
|
+
FreeInfoAtomData( inf_at_data );
|
147
|
+
return 0;
|
148
|
+
}
|
149
|
+
/******************************************************************************************************/
|
150
|
+
int AllocateInfoAtomData( INF_ATOM_DATA *inf_at_data, int num_atoms, int num_components )
|
151
|
+
{
|
152
|
+
if ( inf_at_data->at = CreateInfAtom( num_atoms ) ) {
|
153
|
+
if ( num_components > 1 &&
|
154
|
+
!(inf_at_data->pStereoFlags = (AT_NUMB *)inchi_calloc(num_components+1, sizeof(inf_at_data->pStereoFlags[0]))) ) {
|
155
|
+
FreeInfAtom( &inf_at_data->at );
|
156
|
+
return 0;
|
157
|
+
}
|
158
|
+
return 1;
|
159
|
+
}
|
160
|
+
return 0;
|
161
|
+
}
|
162
|
+
/******************************************************************************************************/
|
163
|
+
int DuplicateInfoAtomData( INF_ATOM_DATA *inf_at_data_to, const INF_ATOM_DATA *inf_at_data_from)
|
164
|
+
{
|
165
|
+
*inf_at_data_to = *inf_at_data_from;
|
166
|
+
if ( AllocateInfoAtomData( inf_at_data_to, inf_at_data_from->num_at, inf_at_data_from->num_components ) ) {
|
167
|
+
memcpy( inf_at_data_to->at, inf_at_data_from->at,
|
168
|
+
inf_at_data_from->num_at * sizeof(inf_at_data_to->at[0]));
|
169
|
+
if ( inf_at_data_to->pStereoFlags && inf_at_data_from->pStereoFlags ) {
|
170
|
+
memcpy( inf_at_data_to->pStereoFlags, inf_at_data_from->pStereoFlags,
|
171
|
+
(inf_at_data_from->num_components+1)*sizeof(inf_at_data_to->pStereoFlags[0]));
|
172
|
+
}
|
173
|
+
return 1;
|
174
|
+
}
|
175
|
+
return 0;
|
176
|
+
}
|
177
|
+
#endif /* ifndef INCHI_ANSI_ONLY */
|
178
|
+
|
179
|
+
|
180
|
+
#if( TEST_RENUMB_ATOMS == 1 ) /* { */
|
181
|
+
|
182
|
+
/******************************************************************************************************/
|
183
|
+
int CopyInpAtomData( INP_ATOM_DATA *dest_inp_at_data, INP_ATOM_DATA *src_inp_at_data )
|
184
|
+
{
|
185
|
+
int ret = 1;
|
186
|
+
if ( !dest_inp_at_data->at || dest_inp_at_data->num_at != src_inp_at_data->num_at ) {
|
187
|
+
ret = CreateInpAtomData( dest_inp_at_data, src_inp_at_data->num_at, (NULL != src_inp_at_data->at_fixed_bonds) );
|
188
|
+
} else {
|
189
|
+
inp_ATOM *at = dest_inp_at_data->at; /* save ptr to already allocated memory */
|
190
|
+
inp_ATOM *at2 = dest_inp_at_data->at_fixed_bonds;
|
191
|
+
*dest_inp_at_data = *src_inp_at_data; /* copy all other (scalar) data */
|
192
|
+
dest_inp_at_data->at = at; /* restore ptr to already allocated memory */
|
193
|
+
dest_inp_at_data->at_fixed_bonds = at2;
|
194
|
+
}
|
195
|
+
if ( ret ) {
|
196
|
+
memcpy( dest_inp_at_data->at, src_inp_at_data->at,
|
197
|
+
src_inp_at_data->num_at*sizeof(dest_inp_at_data->at[0]) );
|
198
|
+
if ( dest_inp_at_data->at_fixed_bonds && src_inp_at_data->at_fixed_bonds ) {
|
199
|
+
memcpy( dest_inp_at_data->at_fixed_bonds, src_inp_at_data->at_fixed_bonds,
|
200
|
+
src_inp_at_data->num_at*sizeof(dest_inp_at_data->at_fixed_bonds[0]) );
|
201
|
+
}
|
202
|
+
}
|
203
|
+
return ret;
|
204
|
+
}
|
205
|
+
/******************************************************************************************************/
|
206
|
+
void RenumbInpAtomData( INP_ATOM_DATA *dest_inp_at_data, INP_ATOM_DATA *src_inp_at_data, AT_RANK *new_ord )
|
207
|
+
{
|
208
|
+
int j, n, m, val;
|
209
|
+
#if( TEST_RENUMB_NEIGH == 1 )
|
210
|
+
int i, k;
|
211
|
+
#endif
|
212
|
+
int num_atoms = src_inp_at_data->num_at;
|
213
|
+
inp_ATOM *dest_at = dest_inp_at_data->at;
|
214
|
+
for ( n = 0; n < num_atoms; n ++ ) {
|
215
|
+
m = new_ord[n];
|
216
|
+
dest_at[m] = src_inp_at_data->at[n];
|
217
|
+
dest_at[m].orig_compt_at_numb = (AT_NUMB)(m+1); /* new ordering number within the component */
|
218
|
+
val = dest_at[m].valence;
|
219
|
+
for ( j = 0; j < val; j ++ ) {
|
220
|
+
dest_at[m].neighbor[j] = new_ord[dest_at[m].neighbor[j]];
|
221
|
+
}
|
222
|
+
#if( TEST_RENUMB_NEIGH == 1 )
|
223
|
+
for ( i = 0; i < val; i ++ ) {
|
224
|
+
j = i;
|
225
|
+
k = j + (rand() * (val-j)) / (RAND_MAX+1);
|
226
|
+
if ( k >= val || j == k ) {
|
227
|
+
continue;
|
228
|
+
}
|
229
|
+
swap( (char*)&dest_at[m].neighbor[j], (char*)&dest_at[m].neighbor[k], sizeof(dest_at[0].neighbor[0]) );
|
230
|
+
swap( (char*)&dest_at[m].bond_stereo[j], (char*)&dest_at[m].bond_stereo[k], sizeof(dest_at[0].bond_stereo[0]) );
|
231
|
+
swap( (char*)&dest_at[m].bond_type[j], (char*)&dest_at[m].bond_type[k], sizeof(dest_at[0].bond_type[0]) );
|
232
|
+
/* adjust stereo bond links */
|
233
|
+
if ( dest_at[m].sb_parity[0] ) {
|
234
|
+
int a;
|
235
|
+
for ( a = 0; a < MAX_NUM_STEREO_BONDS && dest_at[m].sb_parity[a]; a ++ ) {
|
236
|
+
|
237
|
+
if ( k == (int)dest_at[m].sb_ord[a] ) {
|
238
|
+
dest_at[m].sb_ord[a] = j;
|
239
|
+
} else
|
240
|
+
if ( j == (int)dest_at[m].sb_ord[a] ) {
|
241
|
+
dest_at[m].sb_ord[a] = k;
|
242
|
+
}
|
243
|
+
|
244
|
+
if ( k == (int)dest_at[m].sn_ord[a] ) {
|
245
|
+
dest_at[m].sn_ord[a] = j;
|
246
|
+
} else
|
247
|
+
if ( j == (int)dest_at[m].sn_ord[a] ) {
|
248
|
+
dest_at[m].sn_ord[a] = k;
|
249
|
+
}
|
250
|
+
}
|
251
|
+
}
|
252
|
+
}
|
253
|
+
#endif
|
254
|
+
}
|
255
|
+
}
|
256
|
+
/******************************************************************************************************/
|
257
|
+
void MakeNewOrd( int num_atoms, AT_RANK *new_ord )
|
258
|
+
{
|
259
|
+
int i, j, k;
|
260
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
261
|
+
j = i;
|
262
|
+
k = (rand() * (num_atoms-i)) / (RAND_MAX+1);
|
263
|
+
if ( k >= num_atoms || j == k ) {
|
264
|
+
continue;
|
265
|
+
}
|
266
|
+
swap( (char*)&new_ord[j], (char*)&new_ord[k], sizeof(new_ord[0]) );
|
267
|
+
}
|
268
|
+
}
|
269
|
+
#endif /* } TEST_RENUMB_ATOMS == 1 */
|
270
|
+
|
271
|
+
/******************************************************************************************************/
|
272
|
+
inp_ATOM* mol_to_atom( MOL_DATA* mol_data, int *num_atoms, int *num_bonds, inp_ATOM* at_inp,
|
273
|
+
int bDoNotAddH, int *err, char *pStrErr )
|
274
|
+
{
|
275
|
+
inp_ATOM *at = NULL;
|
276
|
+
/* char *bond_stereo = NULL; */
|
277
|
+
AT_NUMB *p1, *p2;
|
278
|
+
int i, a1, a2, n1, n2, bonds, iso_atw_diff;
|
279
|
+
char cBondStereo, cBondType;
|
280
|
+
static int el_number_H = 0;
|
281
|
+
|
282
|
+
|
283
|
+
if ( !el_number_H ) {
|
284
|
+
el_number_H = get_periodic_table_number( "H" ); /* one-time initialization */
|
285
|
+
}
|
286
|
+
|
287
|
+
*err = 0;
|
288
|
+
*num_atoms = *num_bonds = 0;
|
289
|
+
/* check if MOLfile contains atoms */
|
290
|
+
if ( !mol_data || !mol_data->ctab.MolAtom ||
|
291
|
+
0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
|
292
|
+
0 >= (*num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
|
293
|
+
/* MOLFILE_ERR_SET (*err, 0, "Empty structure"); */
|
294
|
+
goto exit_function; /* no structure */
|
295
|
+
}
|
296
|
+
/* allocate memory if necessary */
|
297
|
+
if ( at_inp ) {
|
298
|
+
at = at_inp;
|
299
|
+
} else
|
300
|
+
if ( !(at = CreateInpAtom( *num_atoms ) ) ) {
|
301
|
+
*err = -1;
|
302
|
+
MOLFILE_ERR_FIN (*err, -1, exit_function, "Out of RAM");
|
303
|
+
}
|
304
|
+
|
305
|
+
/* copy atom info */
|
306
|
+
for ( i = 0; i < *num_atoms; i ++ ) {
|
307
|
+
mystrncpy( at[i].elname, mol_data->ctab.MolAtom[i].szAtomSymbol, sizeof(at->elname) );
|
308
|
+
/* at[i].chem_bonds_valence = mol_data->ctab.MolAtom[i].cValence; */ /* MOLfile valence; will change */
|
309
|
+
at[i].orig_at_number = (AT_NUMB)(i+1);
|
310
|
+
at[i].iso_atw_diff = mol_data->ctab.MolAtom[i].cMassDifference;
|
311
|
+
at[i].charge = mol_data->ctab.MolAtom[i].cCharge;
|
312
|
+
at[i].radical = mol_data->ctab.MolAtom[i].cRadical;
|
313
|
+
/* see mol_to_atom_xyz()
|
314
|
+
at[i].x = mol_data->ctab.MolAtom[i].fX;
|
315
|
+
at[i].y = mol_data->ctab.MolAtom[i].fY;
|
316
|
+
at[i].z = mol_data->ctab.MolAtom[i].fZ;
|
317
|
+
*/
|
318
|
+
iso_atw_diff = mol_data->ctab.MolAtom[i].cMassDifference;
|
319
|
+
at[i].iso_atw_diff = iso_atw_diff==ZERO_ATW_DIFF? 1:
|
320
|
+
iso_atw_diff> 0? iso_atw_diff+1:
|
321
|
+
iso_atw_diff;
|
322
|
+
#if( SINGLET_IS_TRIPLET == 1 )
|
323
|
+
if ( at[i].radical == RADICAL_SINGLET ) {
|
324
|
+
at[i].radical = RADICAL_TRIPLET;
|
325
|
+
}
|
326
|
+
#endif
|
327
|
+
#if( bRELEASE_VERSION != 1 )
|
328
|
+
if ( isdigit( at[i].elname[0] ) ) { /* for testing */
|
329
|
+
mystrncpy( at[i].elname, "C", sizeof(at->elname) );
|
330
|
+
}
|
331
|
+
#endif
|
332
|
+
if ( ERR_ELEM == (n1 = get_periodic_table_number( at[i].elname ) ) ) {
|
333
|
+
/* Case when elname contains more than 1 element: extract number of H if possible */
|
334
|
+
at[i].num_H = extract_H_atoms( at[i].elname, at[i].num_iso_H );
|
335
|
+
if ( !at[i].elname[0] && NUMH(at, i) ) {
|
336
|
+
/* alias contains only H. Added 2004-07-21, fixed 2004-07-22
|
337
|
+
* move the heaviest isotope to the "central atom"
|
338
|
+
* Note: this must be consistent with H-H treatment in remove_terminal_HDT()
|
339
|
+
*/
|
340
|
+
strcpy( at[i].elname, "H" );
|
341
|
+
if ( NUM_ISO_H(at,i) ) {
|
342
|
+
int j;
|
343
|
+
for ( j = NUM_H_ISOTOPES-1; 0 <= j; j -- ) {
|
344
|
+
if ( at[i].num_iso_H[j] ) {
|
345
|
+
at[i].num_iso_H[j] --;
|
346
|
+
at[i].iso_atw_diff = 1 + j;
|
347
|
+
break;
|
348
|
+
}
|
349
|
+
}
|
350
|
+
} else {
|
351
|
+
at[i].num_H --;
|
352
|
+
}
|
353
|
+
}
|
354
|
+
if ( ERR_ELEM == (n1 = get_periodic_table_number( at[i].elname ) ) ) {
|
355
|
+
n1 = 0;
|
356
|
+
}
|
357
|
+
}
|
358
|
+
|
359
|
+
at[i].el_number = (U_CHAR) n1;
|
360
|
+
if ( !n1 ) {
|
361
|
+
*err |= 64; /* Unrecognized aromatic bond(s) replaced with single */
|
362
|
+
MOLFILE_ERR_SET (*err, 0, "Unknown element(s):");
|
363
|
+
MOLFILE_ERR_SET (*err, 0, at[i].elname);
|
364
|
+
} else
|
365
|
+
/* replace explicit D or T with isotopic H (added 2003-06-02) */
|
366
|
+
if ( el_number_H == n1 && !at[i].iso_atw_diff ) {
|
367
|
+
switch( at[i].elname[0] ) {
|
368
|
+
case 'D':
|
369
|
+
at[i].iso_atw_diff = 2;
|
370
|
+
mystrncpy( at[i].elname, "H", sizeof(at->elname) );
|
371
|
+
break;
|
372
|
+
case 'T':
|
373
|
+
at[i].iso_atw_diff = 3;
|
374
|
+
mystrncpy( at[i].elname, "H", sizeof(at->elname) );
|
375
|
+
break;
|
376
|
+
}
|
377
|
+
}
|
378
|
+
}
|
379
|
+
|
380
|
+
|
381
|
+
/*---------------- stereo information notes. ------------------------
|
382
|
+
|
383
|
+
Currently: 1. stereo sign
|
384
|
+
========= --------------
|
385
|
+
MOLfile (atom number = MOLfile atom number - 1, no stdata as an intermediate)
|
386
|
+
| if mol_data->ctab.MolBond[i].nAtomNo1 < mol_data->ctab.MolBond[i].nAtomNo2
|
387
|
+
v then
|
388
|
+
inp_ATOM stereo > 0
|
389
|
+
else
|
390
|
+
stereo < 0
|
391
|
+
|
392
|
+
2. neighbor z-coordinate
|
393
|
+
------------------------
|
394
|
+
neighbor z-coord > 0 for Up if sign(stdata_bond_no) = sign(at[i].neighbor[j]-i)
|
395
|
+
|
396
|
+
--------------------------------------------------------------------*/
|
397
|
+
|
398
|
+
/* copy bond info */
|
399
|
+
for ( i = 0, bonds = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
|
400
|
+
cBondStereo = mol_data->ctab.MolBond[i].cBondStereo;
|
401
|
+
cBondType = mol_data->ctab.MolBond[i].cBondType;
|
402
|
+
a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
|
403
|
+
a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
|
404
|
+
|
405
|
+
if ( a1 < 0 || a1 >= *num_atoms ||
|
406
|
+
a2 < 0 || a2 >= *num_atoms ||
|
407
|
+
a1 == a2 ) {
|
408
|
+
*err |= 1; /* bond for impossible atom number(s); ignored */
|
409
|
+
MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
|
410
|
+
continue;
|
411
|
+
}
|
412
|
+
/* check for multiple bonds between same atoms */
|
413
|
+
p1 = is_in_the_list( at[a1].neighbor, (AT_NUMB)a2, at[a1].valence );
|
414
|
+
p2 = is_in_the_list( at[a2].neighbor, (AT_NUMB)a1, at[a2].valence );
|
415
|
+
if ( (p1 || p2) && (p1 || at[a1].valence < MAXVAL) && (p2 || at[a2].valence < MAXVAL) ) {
|
416
|
+
n1 = p1? (p1 - at[a1].neighbor) : at[a1].valence ++;
|
417
|
+
n2 = p2? (p2 - at[a2].neighbor) : at[a2].valence ++;
|
418
|
+
MOLFILE_ERR_SET (*err, 0, "Multiple bonds between two atoms");
|
419
|
+
*err |= 2; /* multiple bonds between atoms */
|
420
|
+
} else
|
421
|
+
if ( !p1 && !p2 && at[a1].valence < MAXVAL && at[a2].valence < MAXVAL ) {
|
422
|
+
n1 = at[a1].valence ++;
|
423
|
+
n2 = at[a2].valence ++;
|
424
|
+
bonds ++;
|
425
|
+
} else {
|
426
|
+
char szMsg[64];
|
427
|
+
*err |= 4; /* too large number of bonds. Some bonds ignored. */
|
428
|
+
sprintf( szMsg, "Atom '%s' has more than %d bonds",
|
429
|
+
at[a1].valence>= MAXVAL? at[a1].elname:at[a2].elname, MAXVAL );
|
430
|
+
MOLFILE_ERR_SET (*err, 0, szMsg);
|
431
|
+
continue;
|
432
|
+
}
|
433
|
+
if ( cBondType < MIN_INPUT_BOND_TYPE || cBondType > MAX_INPUT_BOND_TYPE ) {
|
434
|
+
char szBondType[16];
|
435
|
+
sprintf( szBondType, "%d", cBondType );
|
436
|
+
cBondType = 1;
|
437
|
+
MOLFILE_ERR_SET (*err, 0, "Unrecognized bond type:");
|
438
|
+
MOLFILE_ERR_SET (*err, 0, szBondType);
|
439
|
+
*err |= 8; /* Unrecognized Bond type replaced with single bond */
|
440
|
+
}
|
441
|
+
/* bond type */
|
442
|
+
at[a1].bond_type[n1] =
|
443
|
+
at[a2].bond_type[n2] = cBondType;
|
444
|
+
/* connection */
|
445
|
+
at[a1].neighbor[n1] = (AT_NUMB)a2;
|
446
|
+
at[a2].neighbor[n2] = (AT_NUMB)a1;
|
447
|
+
/* stereo */
|
448
|
+
if ( cBondStereo == INPUT_STEREO_DBLE_EITHER /* 3 */ ) {
|
449
|
+
at[a1].bond_stereo[n1] =
|
450
|
+
at[a2].bond_stereo[n2] = STEREO_DBLE_EITHER;
|
451
|
+
} else
|
452
|
+
if ( cBondStereo == INPUT_STEREO_SNGL_UP || /* 1 */
|
453
|
+
cBondStereo == INPUT_STEREO_SNGL_EITHER || /* 4 */
|
454
|
+
cBondStereo == INPUT_STEREO_SNGL_DOWN /* 6 */ ) {
|
455
|
+
char cStereo;
|
456
|
+
switch ( cBondStereo ) {
|
457
|
+
case INPUT_STEREO_SNGL_UP:
|
458
|
+
cStereo = STEREO_SNGL_UP;
|
459
|
+
break;
|
460
|
+
case INPUT_STEREO_SNGL_EITHER:
|
461
|
+
cStereo = STEREO_SNGL_EITHER;
|
462
|
+
break;
|
463
|
+
case INPUT_STEREO_SNGL_DOWN:
|
464
|
+
cStereo = STEREO_SNGL_DOWN;
|
465
|
+
break;
|
466
|
+
}
|
467
|
+
at[a1].bond_stereo[n1] = cStereo; /* >0: the wedge (pointed) end is at this atom, a1 */
|
468
|
+
at[a2].bond_stereo[n2] = -cStereo; /* <0: the wedge (pointed) end is at the opposite atom, a1 */
|
469
|
+
} else
|
470
|
+
if ( cBondStereo ) {
|
471
|
+
*err |= 16; /* Ignored unrecognized Bond stereo */
|
472
|
+
MOLFILE_ERR_SET (*err, 0, "Unrecognized bond stereo");
|
473
|
+
continue;
|
474
|
+
}
|
475
|
+
}
|
476
|
+
*num_bonds = bonds;
|
477
|
+
|
478
|
+
|
479
|
+
/* special valences */
|
480
|
+
calculate_valences (mol_data, at, num_atoms, bDoNotAddH, err, pStrErr);
|
481
|
+
|
482
|
+
exit_function:;
|
483
|
+
return at;
|
484
|
+
}
|
485
|
+
/******************************************************************************************************/
|
486
|
+
void calculate_valences (MOL_DATA* mol_data, inp_ATOM* at, int *num_atoms, int bDoNotAddH, int *err, char *pStrErr)
|
487
|
+
{
|
488
|
+
int bNonMetal;
|
489
|
+
int a1, a2, n1, n2, valence;
|
490
|
+
AT_NUMB *p1;
|
491
|
+
|
492
|
+
/* special valences */
|
493
|
+
for ( bNonMetal = 0; bNonMetal < 2; bNonMetal ++ ) {
|
494
|
+
for ( a1 = 0; a1 < *num_atoms; a1 ++ ) {
|
495
|
+
int num_bond_type[MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE + 1], bond_type, bHasMetalNeighbor;
|
496
|
+
/* should the "!=" be replaced with "==" ??? */
|
497
|
+
if ( bNonMetal == is_el_a_metal( at[a1].el_number ) ) {
|
498
|
+
continue; /* first process all metals, after that all non-metals */
|
499
|
+
}
|
500
|
+
memset( num_bond_type, 0, sizeof(num_bond_type) );
|
501
|
+
|
502
|
+
valence = at[a1].chem_bonds_valence; /* save atom valence if available */
|
503
|
+
|
504
|
+
at[a1].chem_bonds_valence = 0;
|
505
|
+
bHasMetalNeighbor = 0;
|
506
|
+
for ( n1 = 0; n1 < at[a1].valence; n1 ++ ) {
|
507
|
+
bond_type = at[a1].bond_type[n1] - MIN_INPUT_BOND_TYPE;
|
508
|
+
if ( bond_type < 0 || bond_type > MAX_INPUT_BOND_TYPE - MIN_INPUT_BOND_TYPE ) {
|
509
|
+
bond_type = 0;
|
510
|
+
MOLFILE_ERR_SET (*err, 0, "Unknown bond type in MOLfile assigned as a single bond");
|
511
|
+
}
|
512
|
+
num_bond_type[ bond_type ] ++;
|
513
|
+
/* -- too a radical solution -- removed from next to ver 1.12B --- */
|
514
|
+
}
|
515
|
+
for ( n1 = 0; MIN_INPUT_BOND_TYPE + n1 <= 3 && MIN_INPUT_BOND_TYPE + n1 <= MAX_INPUT_BOND_TYPE; n1 ++ ) {
|
516
|
+
/* add all bond orders except for "aromatic" bonds */
|
517
|
+
at[a1].chem_bonds_valence += (MIN_INPUT_BOND_TYPE + n1) * num_bond_type[n1];
|
518
|
+
}
|
519
|
+
n2 = 0;
|
520
|
+
if ( MIN_INPUT_BOND_TYPE <= BOND_TYPE_ALTERN && BOND_TYPE_ALTERN <= MAX_INPUT_BOND_TYPE &&
|
521
|
+
( n2 = num_bond_type[BOND_TYPE_ALTERN-MIN_INPUT_BOND_TYPE] ) ) {
|
522
|
+
/* accept input aromatic bonds for now */
|
523
|
+
switch ( n2 ) {
|
524
|
+
case 2:
|
525
|
+
at[a1].chem_bonds_valence += 3; /* =A- */
|
526
|
+
break;
|
527
|
+
case 3:
|
528
|
+
at[a1].chem_bonds_valence += 4; /* =A< */
|
529
|
+
break;
|
530
|
+
default:
|
531
|
+
/* if 1 or >= 4 aromatic bonds then replace such bonds with single bonds */
|
532
|
+
/* and detect an error in the input structure */
|
533
|
+
for ( n1 = 0; n1 < at[a1].valence; n1 ++ ) {
|
534
|
+
if ( at[a1].bond_type[n1] == BOND_TYPE_ALTERN ) {
|
535
|
+
a2 = at[a1].neighbor[n1];
|
536
|
+
p1 = is_in_the_list( at[a2].neighbor, (AT_NUMB)a1, at[a2].valence );
|
537
|
+
if ( p1 ) {
|
538
|
+
at[a1].bond_type[n1] =
|
539
|
+
at[a2].bond_type[p1-at[a2].neighbor] = BOND_TYPE_SINGLE;
|
540
|
+
} else {
|
541
|
+
*err = -2; /* Program error */
|
542
|
+
MOLFILE_ERR_SET (*err, 0, "Program error interpreting MOLfile");
|
543
|
+
return; /* no structure */
|
544
|
+
}
|
545
|
+
}
|
546
|
+
}
|
547
|
+
at[a1].chem_bonds_valence += n2;
|
548
|
+
*err |= 32;
|
549
|
+
MOLFILE_ERR_SET (*err, 0, "Atom has 1 or more than 3 aromatic bonds");
|
550
|
+
n2 = 0;
|
551
|
+
break;
|
552
|
+
}
|
553
|
+
}
|
554
|
+
if ( n2 && !valence ) {
|
555
|
+
/* atom has aromatic bonds AND the chemical valence is not known */
|
556
|
+
int num_H = NUMH(at, a1);
|
557
|
+
int chem_valence = at[a1].chem_bonds_valence + num_H;
|
558
|
+
int bUnusualValenceArom =
|
559
|
+
detect_unusual_el_valence( (int)at[a1].el_number, at[a1].charge,
|
560
|
+
at[a1].radical, chem_valence,
|
561
|
+
num_H, at[a1].valence );
|
562
|
+
int bUnusualValenceNoArom =
|
563
|
+
detect_unusual_el_valence( (int)at[a1].el_number, at[a1].charge,
|
564
|
+
at[a1].radical, chem_valence-1,
|
565
|
+
num_H, at[a1].valence );
|
566
|
+
#if ( CHECK_AROMBOND2ALT == 1 )
|
567
|
+
if ( bUnusualValenceArom && !bUnusualValenceNoArom && 0 == nBondsValToMetal( at, a1) )
|
568
|
+
#else
|
569
|
+
if ( bUnusualValenceArom && !bUnusualValenceNoArom )
|
570
|
+
#endif
|
571
|
+
{
|
572
|
+
/* typically NH in 5-member aromatic ring */
|
573
|
+
at[a1].chem_bonds_valence --;
|
574
|
+
}
|
575
|
+
} else
|
576
|
+
if ( n2 && valence ) {
|
577
|
+
/* atom has aromatic bonds AND the chemical valence is known */
|
578
|
+
int num_H = NUMH(at, a1);
|
579
|
+
int chem_valence = at[a1].chem_bonds_valence + num_H;
|
580
|
+
if ( valence == chem_valence-1 ) {
|
581
|
+
/* typically NH in 5-member aromatic ring */
|
582
|
+
at[a1].chem_bonds_valence --;
|
583
|
+
}
|
584
|
+
}
|
585
|
+
|
586
|
+
/*************************************************************************************
|
587
|
+
*
|
588
|
+
* Set number of hydrogen atoms
|
589
|
+
*/
|
590
|
+
if (mol_data) {
|
591
|
+
at[a1].num_H = get_num_H( at[a1].elname, at[a1].num_H, at[a1].num_iso_H,
|
592
|
+
at[a1].charge, at[a1].radical,
|
593
|
+
at[a1].chem_bonds_valence,
|
594
|
+
mol_data->ctab.MolAtom[a1].cValence, /* instead of valence */
|
595
|
+
mol_data->ctab.MolAtom[a1].cAtomAliasedFlag,
|
596
|
+
bDoNotAddH, bHasMetalNeighbor );
|
597
|
+
}
|
598
|
+
}
|
599
|
+
}
|
600
|
+
}
|
601
|
+
/******************************************************************************************************/
|
602
|
+
int mol_to_atom_xyz( MOL_DATA* mol_data, int num_atoms, inp_ATOM* at, int *err, char *pStrErr )
|
603
|
+
{
|
604
|
+
int i, num_dimensions=0;
|
605
|
+
int num_bonds;
|
606
|
+
double max_x=-1.0e32, max_y=-1.0e32, max_z=-1.0e32;
|
607
|
+
double min_x= 1.0e32, min_y= 1.0e32, min_z= 1.0e32;
|
608
|
+
double macheps = 1.0e-10, small_coeff = 0.00001;
|
609
|
+
double x_coeff, y_coeff, z_coeff, coeff, average_bond_length;
|
610
|
+
|
611
|
+
/* *err = 0; */
|
612
|
+
/* check if MOLfile contains atoms */
|
613
|
+
if ( !mol_data || !mol_data->ctab.MolAtom ||
|
614
|
+
0 < mol_data->ctab.nNumberOfBonds && !mol_data->ctab.MolBond ||
|
615
|
+
0 >= (num_atoms = mol_data->ctab.nNumberOfAtoms) ) {
|
616
|
+
goto exit_function; /* no structure */
|
617
|
+
}
|
618
|
+
/* copy atom info */
|
619
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
620
|
+
max_x = inchi_max(mol_data->ctab.MolAtom[i].fX, max_x);
|
621
|
+
min_x = inchi_min(mol_data->ctab.MolAtom[i].fX, min_x);
|
622
|
+
max_y = inchi_max(mol_data->ctab.MolAtom[i].fY, max_y);
|
623
|
+
min_y = inchi_min(mol_data->ctab.MolAtom[i].fY, min_y);
|
624
|
+
max_z = inchi_max(mol_data->ctab.MolAtom[i].fZ, max_z);
|
625
|
+
min_z = inchi_min(mol_data->ctab.MolAtom[i].fZ, min_z);
|
626
|
+
}
|
627
|
+
|
628
|
+
/* copy bond info */
|
629
|
+
num_bonds = 0;
|
630
|
+
average_bond_length = 0.0;
|
631
|
+
for ( i = 0; i < mol_data->ctab.nNumberOfBonds; i ++ ) {
|
632
|
+
int a1 = mol_data->ctab.MolBond[i].nAtomNo1-1;
|
633
|
+
int a2 = mol_data->ctab.MolBond[i].nAtomNo2-1;
|
634
|
+
double dx = mol_data->ctab.MolAtom[a1].fX-mol_data->ctab.MolAtom[a2].fX;
|
635
|
+
double dy = mol_data->ctab.MolAtom[a1].fY-mol_data->ctab.MolAtom[a2].fY;
|
636
|
+
double dz = mol_data->ctab.MolAtom[a1].fZ-mol_data->ctab.MolAtom[a2].fZ;
|
637
|
+
|
638
|
+
if ( a1 < 0 || a1 >= num_atoms ||
|
639
|
+
a2 < 0 || a2 >= num_atoms ||
|
640
|
+
a1 == a2 ) {
|
641
|
+
*err |= 1; /* bond for impossible atom number(s); ignored */
|
642
|
+
MOLFILE_ERR_SET (*err, 0, "Bond to nonexistent atom");
|
643
|
+
continue;
|
644
|
+
}
|
645
|
+
average_bond_length += sqrt( dx*dx + dy*dy + dz*dz );
|
646
|
+
num_bonds ++;
|
647
|
+
}
|
648
|
+
|
649
|
+
/* convert to integral coordinates */
|
650
|
+
|
651
|
+
if ( max_x - min_x <= small_coeff*(fabs(max_x) + fabs(min_x)) )
|
652
|
+
x_coeff = 0.0;
|
653
|
+
else
|
654
|
+
x_coeff = (MAX_STDATA_X_COORD - MIN_STDATA_X_COORD)/(max_x - min_x);
|
655
|
+
|
656
|
+
if ( max_y - min_y <= small_coeff*(fabs(max_y) + fabs(min_y)) )
|
657
|
+
y_coeff = 0.0;
|
658
|
+
else
|
659
|
+
y_coeff = (MAX_STDATA_Y_COORD - MIN_STDATA_Y_COORD)/(max_y - min_y);
|
660
|
+
if ( max_z - min_z <= small_coeff*(fabs(max_z) + fabs(min_z)) )
|
661
|
+
z_coeff = 0.0;
|
662
|
+
else
|
663
|
+
z_coeff = (MAX_STDATA_Z_COORD - MIN_STDATA_Z_COORD)/(max_z - min_z);
|
664
|
+
|
665
|
+
num_dimensions = ((x_coeff > macheps || y_coeff >macheps ) && fabs(z_coeff) < macheps)? 2:
|
666
|
+
(fabs(z_coeff) > macheps)? 3: 0;
|
667
|
+
|
668
|
+
switch ( num_dimensions ) {
|
669
|
+
case 0:
|
670
|
+
coeff = 0.0;
|
671
|
+
break;
|
672
|
+
case 2:
|
673
|
+
/* choose the smallest stretching coefficient */
|
674
|
+
if ( x_coeff > macheps && y_coeff > macheps ) {
|
675
|
+
coeff = inchi_min( x_coeff, y_coeff );
|
676
|
+
}else
|
677
|
+
if ( x_coeff > macheps ){
|
678
|
+
coeff = x_coeff;
|
679
|
+
}else
|
680
|
+
if ( y_coeff > macheps ){
|
681
|
+
coeff = y_coeff;
|
682
|
+
}else{
|
683
|
+
coeff = 1.0;
|
684
|
+
}
|
685
|
+
break;
|
686
|
+
case 3:
|
687
|
+
/* choose the smallest stretching coefficient */
|
688
|
+
if ( x_coeff > macheps && y_coeff > macheps ) {
|
689
|
+
coeff = inchi_min( x_coeff, y_coeff );
|
690
|
+
coeff = inchi_min( coeff, z_coeff );
|
691
|
+
}else
|
692
|
+
if ( x_coeff > macheps ){
|
693
|
+
coeff = inchi_min( x_coeff, z_coeff );
|
694
|
+
}else
|
695
|
+
if ( y_coeff > macheps ){
|
696
|
+
coeff = inchi_min( y_coeff, z_coeff );
|
697
|
+
}else{
|
698
|
+
coeff = z_coeff;
|
699
|
+
}
|
700
|
+
break;
|
701
|
+
default:
|
702
|
+
coeff = 0.0;
|
703
|
+
}
|
704
|
+
|
705
|
+
if ( num_bonds > 0 ) {
|
706
|
+
average_bond_length /= (double)num_bonds;
|
707
|
+
if ( average_bond_length * coeff > MAX_STDATA_AVE_BOND_LENGTH ) {
|
708
|
+
coeff = MAX_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too long bonds */
|
709
|
+
} else
|
710
|
+
if ( average_bond_length * coeff < macheps ) {
|
711
|
+
coeff = 1.0; /* all lengths are of zero length */
|
712
|
+
} else
|
713
|
+
if ( average_bond_length * coeff < MIN_STDATA_AVE_BOND_LENGTH ) {
|
714
|
+
coeff = MIN_STDATA_AVE_BOND_LENGTH / average_bond_length; /* avoid too short bonds */
|
715
|
+
}
|
716
|
+
}
|
717
|
+
#if( NORMALIZE_INP_COORD == 1 )
|
718
|
+
/* set integral coordinates */
|
719
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
720
|
+
double x = mol_data->ctab.MolAtom[i].fX;
|
721
|
+
double y = mol_data->ctab.MolAtom[i].fY;
|
722
|
+
double z = mol_data->ctab.MolAtom[i].fZ;
|
723
|
+
x = (x - min_x)*coeff + MIN_STDATA_X_COORD;
|
724
|
+
y = (y - min_y)*coeff + MIN_STDATA_Y_COORD;
|
725
|
+
z = (z - min_z)*coeff + MIN_STDATA_Z_COORD;
|
726
|
+
/* floor() behavior is not well defined for negative arguments.
|
727
|
+
* Use positive arguments only to get nearest integer.
|
728
|
+
*/
|
729
|
+
at[i].x = ( x >= 0.0 )? (int)floor( x + 0.5 ) : -(int)floor( -x + 0.5 );
|
730
|
+
at[i].y = ( y >= 0.0 )? (int)floor( y + 0.5 ) : -(int)floor( -y + 0.5 );
|
731
|
+
at[i].z = ( z >= 0.0 )? (int)floor( z + 0.5 ) : -(int)floor( -z + 0.5 );
|
732
|
+
}
|
733
|
+
#else
|
734
|
+
/* set input coordinates */
|
735
|
+
for ( i = 0; i < num_atoms; i ++ ) {
|
736
|
+
double x = mol_data->ctab.MolAtom[i].fX;
|
737
|
+
double y = mol_data->ctab.MolAtom[i].fY;
|
738
|
+
double z = mol_data->ctab.MolAtom[i].fZ;
|
739
|
+
at[i].x = x;
|
740
|
+
at[i].y = y;
|
741
|
+
at[i].z = z;
|
742
|
+
}
|
743
|
+
#endif
|
744
|
+
|
745
|
+
exit_function:;
|
746
|
+
return num_dimensions;
|
747
|
+
}
|
748
|
+
/****************************************************************************/
|
749
|
+
long GetMolfileNumber( MOL_HEADER_BLOCK *pHdr )
|
750
|
+
{
|
751
|
+
static char sStruct[] = "Structure #";
|
752
|
+
static char sINCHI[] = INCHI_NAME;
|
753
|
+
long lMolfileNumber = 0;
|
754
|
+
char *p, *q = NULL;
|
755
|
+
if ( pHdr ) {
|
756
|
+
if ( !memicmp( pHdr->szMoleculeName, sStruct, sizeof(sStruct)-1 ) ) {
|
757
|
+
p = pHdr->szMoleculeName + sizeof(sStruct)-1;
|
758
|
+
lMolfileNumber = strtol( p, &q, 10 );
|
759
|
+
p = pHdr->szMoleculeLine2;
|
760
|
+
if ( !q || *q ||
|
761
|
+
memicmp( p, sINCHI, sizeof(sINCHI)-1) ||
|
762
|
+
!strstr( p+sizeof(sINCHI)-1, "SDfile Output" ) ) {
|
763
|
+
lMolfileNumber = 0;
|
764
|
+
}
|
765
|
+
}
|
766
|
+
}
|
767
|
+
return lMolfileNumber;
|
768
|
+
}
|
769
|
+
/****************************************************************************/
|
770
|
+
int MolfileToInpAtom( FILE *inp_molfile, int bDoNotAddH, inp_ATOM **at, MOL_COORD **szCoord, int max_num_at,
|
771
|
+
int *num_dimensions, int *num_bonds, const char *pSdfLabel, char *pSdfValue,
|
772
|
+
long *Id, long *lMolfileNumber, INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
|
773
|
+
{
|
774
|
+
int num_atoms = 0;
|
775
|
+
MOL_DATA *mol_data = NULL;
|
776
|
+
MOL_HEADER_BLOCK OnlyHeaderBlock, *pOnlyHeaderBlock = NULL, *pHdr;
|
777
|
+
MOL_CTAB OnlyCtab, *pOnlyCtab = NULL;
|
778
|
+
char cSdfValueFirstChar;
|
779
|
+
#ifdef CML_DEBUG
|
780
|
+
FILE *f_p;
|
781
|
+
#endif
|
782
|
+
if ( at ) {
|
783
|
+
pOnlyHeaderBlock = NULL;
|
784
|
+
if ( *at && max_num_at ) {
|
785
|
+
memset( *at, 0, max_num_at * sizeof(inp_ATOM) );
|
786
|
+
}
|
787
|
+
if ( szCoord && *szCoord ) {
|
788
|
+
inchi_free( *szCoord );
|
789
|
+
*szCoord = NULL;
|
790
|
+
}
|
791
|
+
} else {
|
792
|
+
pOnlyHeaderBlock = &OnlyHeaderBlock;
|
793
|
+
pOnlyCtab = &OnlyCtab;
|
794
|
+
}
|
795
|
+
if ( pSdfValue ) {
|
796
|
+
cSdfValueFirstChar = pSdfValue[0];
|
797
|
+
pSdfValue[0] = '\0';
|
798
|
+
}
|
799
|
+
|
800
|
+
mol_data = read_sdfile_segment(inp_molfile, pOnlyHeaderBlock, pOnlyCtab, NULL != szCoord,
|
801
|
+
NULL, 0, Id, pSdfLabel, pSdfValue, err, pStrErr );
|
802
|
+
|
803
|
+
pHdr = ( mol_data && !pOnlyHeaderBlock )? &mol_data->hdr :
|
804
|
+
( !mol_data && pOnlyHeaderBlock )? pOnlyHeaderBlock : NULL;
|
805
|
+
if ( lMolfileNumber && pHdr ) {
|
806
|
+
*lMolfileNumber = GetMolfileNumber( pHdr );
|
807
|
+
}
|
808
|
+
if ( pSdfValue && !pSdfValue[0] &&
|
809
|
+
pSdfLabel && pSdfLabel[0] && pHdr ) {
|
810
|
+
if ( !stricmp(pSdfLabel, "MolfileName") ) {
|
811
|
+
mystrncpy( pSdfValue, pHdr->szMoleculeName, MAX_SDF_VALUE+1 );
|
812
|
+
LtrimRtrim( pSdfValue, NULL );
|
813
|
+
} else
|
814
|
+
if ( !stricmp(pSdfLabel, "MolfileLine2") ) {
|
815
|
+
mystrncpy( pSdfValue, pHdr->szMoleculeLine2, MAX_SDF_VALUE+1 );
|
816
|
+
LtrimRtrim( pSdfValue, NULL );
|
817
|
+
} else
|
818
|
+
if ( !stricmp(pSdfLabel, "MolfileComment") ) {
|
819
|
+
mystrncpy( pSdfValue, pHdr->szComment, MAX_SDF_VALUE+1 );
|
820
|
+
LtrimRtrim( pSdfValue, NULL );
|
821
|
+
} else
|
822
|
+
if ( !stricmp(pSdfLabel, "MolfileIntRegNo") && pHdr->lInternalRegistryNumber ) {
|
823
|
+
sprintf( pSdfValue, "%ld", pHdr->lInternalRegistryNumber );
|
824
|
+
}
|
825
|
+
if ( !pSdfValue[0] ) {
|
826
|
+
pSdfValue[0] = cSdfValueFirstChar;
|
827
|
+
}
|
828
|
+
}
|
829
|
+
|
830
|
+
if ( mol_data && at && !*err ) {
|
831
|
+
/* *at points to an allocated memory */
|
832
|
+
if ( *at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
|
833
|
+
*at = mol_to_atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
|
834
|
+
if ( *err >= 0 ) {
|
835
|
+
*num_dimensions = mol_to_atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
|
836
|
+
|
837
|
+
if ( szCoord ) {
|
838
|
+
*szCoord = mol_data->ctab.szCoord;
|
839
|
+
mol_data->ctab.szCoord = NULL;
|
840
|
+
}
|
841
|
+
|
842
|
+
}
|
843
|
+
} else
|
844
|
+
/* *at points to NULL */
|
845
|
+
if ( !*at && mol_data->ctab.nNumberOfAtoms <= max_num_at ) {
|
846
|
+
*at = mol_to_atom( mol_data, &num_atoms, num_bonds, *at, bDoNotAddH, err, pStrErr );
|
847
|
+
if ( *err >= 0 ) {
|
848
|
+
*num_dimensions = mol_to_atom_xyz( mol_data, num_atoms, *at, err, pStrErr );
|
849
|
+
|
850
|
+
if ( szCoord ) {
|
851
|
+
*szCoord = mol_data->ctab.szCoord;
|
852
|
+
mol_data->ctab.szCoord = NULL;
|
853
|
+
}
|
854
|
+
|
855
|
+
}
|
856
|
+
} else {
|
857
|
+
MOLFILE_ERR_SET (*err, 0, "Too many atoms");
|
858
|
+
*err = 70;
|
859
|
+
num_atoms = -1;
|
860
|
+
}
|
861
|
+
if ( *err > 0 ) {
|
862
|
+
*err += 100;
|
863
|
+
}
|
864
|
+
/* 11-16-2004: use Chiral flag */
|
865
|
+
if ( num_atoms > 0 && at && *at && mol_data && pInpAtomFlags ) {
|
866
|
+
if ( mol_data->ctab.cChiralFlag ) {
|
867
|
+
*pInpAtomFlags |= FLAG_INP_AT_CHIRAL;
|
868
|
+
} else {
|
869
|
+
*pInpAtomFlags |= FLAG_INP_AT_NONCHIRAL;
|
870
|
+
}
|
871
|
+
}
|
872
|
+
} else
|
873
|
+
if ( !at ) {
|
874
|
+
num_atoms = pOnlyCtab->nNumberOfAtoms;
|
875
|
+
}
|
876
|
+
|
877
|
+
if ( !pOnlyHeaderBlock ) {
|
878
|
+
delete_mol_data( mol_data );
|
879
|
+
}
|
880
|
+
#ifdef CML_DEBUG
|
881
|
+
puts ("MOL");
|
882
|
+
f_p = fopen ("mol.dbg", "a");
|
883
|
+
if (f_p)
|
884
|
+
{
|
885
|
+
PrintInpAtom (f_p, *at, num_atoms);
|
886
|
+
fclose (f_p);
|
887
|
+
}
|
888
|
+
else
|
889
|
+
{
|
890
|
+
puts ("Couldn't open file");
|
891
|
+
}
|
892
|
+
#endif
|
893
|
+
|
894
|
+
return num_atoms;
|
895
|
+
}
|
896
|
+
/**********************************************************************************/
|
897
|
+
void FreeOrigAtData( ORIG_ATOM_DATA *orig_at_data )
|
898
|
+
{
|
899
|
+
if ( !orig_at_data )
|
900
|
+
return;
|
901
|
+
FreeInpAtom( &orig_at_data->at );
|
902
|
+
if ( orig_at_data->nCurAtLen ) {
|
903
|
+
inchi_free( orig_at_data->nCurAtLen );
|
904
|
+
}
|
905
|
+
if ( orig_at_data->nOldCompNumber ) {
|
906
|
+
inchi_free( orig_at_data->nOldCompNumber );
|
907
|
+
}
|
908
|
+
if ( orig_at_data->szCoord ) {
|
909
|
+
inchi_free( orig_at_data->szCoord );
|
910
|
+
}
|
911
|
+
|
912
|
+
if ( orig_at_data->nEquLabels ) {
|
913
|
+
inchi_free( orig_at_data->nEquLabels );
|
914
|
+
}
|
915
|
+
if ( orig_at_data->nSortedOrder ) {
|
916
|
+
inchi_free( orig_at_data->nSortedOrder );
|
917
|
+
}
|
918
|
+
|
919
|
+
memset( orig_at_data, 0, sizeof(*orig_at_data) );
|
920
|
+
}
|
921
|
+
/**********************************************************************************/
|
922
|
+
int MolfileToOrigAtom( FILE *inp_molfile, ORIG_ATOM_DATA *orig_at_data, int bMergeAllInputStructures,
|
923
|
+
int bGetOrigCoord, int bDoNotAddH,
|
924
|
+
const char *pSdfLabel, char *pSdfValue, long *lSdfId, long *lMolfileNumber,
|
925
|
+
INCHI_MODE *pInpAtomFlags, int *err, char *pStrErr )
|
926
|
+
{
|
927
|
+
/* inp_ATOM *at = NULL; */
|
928
|
+
int num_dimensions_new;
|
929
|
+
int num_inp_bonds_new;
|
930
|
+
int num_inp_atoms_new;
|
931
|
+
inp_ATOM *at_new = NULL;
|
932
|
+
inp_ATOM *at_old = NULL;
|
933
|
+
int nNumAtoms = 0;
|
934
|
+
MOL_COORD *szCoordNew = NULL;
|
935
|
+
MOL_COORD *szCoordOld = NULL;
|
936
|
+
int i, j;
|
937
|
+
|
938
|
+
if ( pStrErr ) {
|
939
|
+
pStrErr[0] = '\0';
|
940
|
+
}
|
941
|
+
|
942
|
+
/*FreeOrigAtData( orig_at_data );*/
|
943
|
+
|
944
|
+
do {
|
945
|
+
|
946
|
+
at_old = orig_at_data? orig_at_data->at : NULL; /* save pointer to the previous allocation */
|
947
|
+
szCoordOld = orig_at_data? orig_at_data->szCoord : NULL;
|
948
|
+
num_inp_atoms_new =
|
949
|
+
MolfileToInpAtom( inp_molfile, bDoNotAddH, orig_at_data? &at_new:NULL, (bGetOrigCoord && orig_at_data)? &szCoordNew : NULL, MAX_ATOMS,
|
950
|
+
&num_dimensions_new, &num_inp_bonds_new,
|
951
|
+
pSdfLabel, pSdfValue, lSdfId, lMolfileNumber, pInpAtomFlags, err, pStrErr );
|
952
|
+
|
953
|
+
|
954
|
+
if ( num_inp_atoms_new <= 0 && !*err ) {
|
955
|
+
MOLFILE_ERR_SET (*err, 0, "Empty structure");
|
956
|
+
*err = 98;
|
957
|
+
} else
|
958
|
+
if ( orig_at_data && !num_inp_atoms_new && 10 < *err && *err < 20 && orig_at_data->num_inp_atoms > 0 && bMergeAllInputStructures ) {
|
959
|
+
*err = 0; /* end of file */
|
960
|
+
break;
|
961
|
+
} else
|
962
|
+
if ( num_inp_atoms_new > 0 && orig_at_data ) {
|
963
|
+
/* merge pOrigDataTmp + orig_at_data => pOrigDataTmp; */
|
964
|
+
nNumAtoms = num_inp_atoms_new + orig_at_data->num_inp_atoms;
|
965
|
+
if ( nNumAtoms >= MAX_ATOMS ) {
|
966
|
+
MOLFILE_ERR_SET (*err, 0, "Too many atoms");
|
967
|
+
*err = 70;
|
968
|
+
orig_at_data->num_inp_atoms = -1;
|
969
|
+
} else
|
970
|
+
if ( !at_old ) {
|
971
|
+
/* the first structure */
|
972
|
+
orig_at_data->at = at_new;
|
973
|
+
orig_at_data->szCoord = szCoordNew;
|
974
|
+
at_new = NULL;
|
975
|
+
szCoordNew = NULL;
|
976
|
+
orig_at_data->num_inp_atoms = num_inp_atoms_new;
|
977
|
+
orig_at_data->num_inp_bonds = num_inp_bonds_new;
|
978
|
+
orig_at_data->num_dimensions = num_dimensions_new;
|
979
|
+
} else
|
980
|
+
if ( (orig_at_data->at = ( inp_ATOM* ) inchi_calloc( nNumAtoms, sizeof(inp_ATOM) )) &&
|
981
|
+
(!szCoordNew || (orig_at_data->szCoord = (MOL_COORD *) inchi_calloc( nNumAtoms, sizeof(MOL_COORD) ))) ) {
|
982
|
+
/* switch at_new <--> orig_at_data->at; */
|
983
|
+
if ( orig_at_data->num_inp_atoms ) {
|
984
|
+
memcpy( orig_at_data->at, at_old, orig_at_data->num_inp_atoms * sizeof(orig_at_data->at[0]) );
|
985
|
+
/* adjust numbering in the newly read structure */
|
986
|
+
for ( i = 0; i < num_inp_atoms_new; i ++ ) {
|
987
|
+
for ( j = 0; j < at_new[i].valence; j ++ ) {
|
988
|
+
at_new[i].neighbor[j] += orig_at_data->num_inp_atoms;
|
989
|
+
}
|
990
|
+
at_new[i].orig_at_number += orig_at_data->num_inp_atoms; /* 12-19-2003 */
|
991
|
+
}
|
992
|
+
if ( orig_at_data->szCoord && szCoordOld ) {
|
993
|
+
memcpy( orig_at_data->szCoord, szCoordOld, orig_at_data->num_inp_atoms * sizeof(MOL_COORD) );
|
994
|
+
}
|
995
|
+
}
|
996
|
+
if ( at_old ) {
|
997
|
+
inchi_free( at_old );
|
998
|
+
at_old = NULL;
|
999
|
+
}
|
1000
|
+
if ( szCoordOld ) {
|
1001
|
+
inchi_free( szCoordOld );
|
1002
|
+
szCoordOld = NULL;
|
1003
|
+
}
|
1004
|
+
/* copy newly read structure */
|
1005
|
+
memcpy( orig_at_data->at + orig_at_data->num_inp_atoms,
|
1006
|
+
at_new,
|
1007
|
+
num_inp_atoms_new * sizeof(orig_at_data->at[0]) );
|
1008
|
+
if ( orig_at_data->szCoord && szCoordNew ) {
|
1009
|
+
memcpy( orig_at_data->szCoord + orig_at_data->num_inp_atoms,
|
1010
|
+
szCoordNew,
|
1011
|
+
num_inp_atoms_new * sizeof(MOL_COORD) );
|
1012
|
+
}
|
1013
|
+
/* add other things */
|
1014
|
+
orig_at_data->num_inp_atoms += num_inp_atoms_new;
|
1015
|
+
orig_at_data->num_inp_bonds += num_inp_bonds_new;
|
1016
|
+
orig_at_data->num_dimensions = inchi_max(num_dimensions_new, orig_at_data->num_dimensions);
|
1017
|
+
} else {
|
1018
|
+
MOLFILE_ERR_SET (*err, 0, "Out of RAM");
|
1019
|
+
*err = -1;
|
1020
|
+
}
|
1021
|
+
} else
|
1022
|
+
if ( num_inp_atoms_new > 0 ) {
|
1023
|
+
nNumAtoms += num_inp_atoms_new;
|
1024
|
+
}
|
1025
|
+
if ( at_new ) {
|
1026
|
+
inchi_free( at_new );
|
1027
|
+
at_new = NULL;
|
1028
|
+
}
|
1029
|
+
|
1030
|
+
} while ( !*err && bMergeAllInputStructures );
|
1031
|
+
/*
|
1032
|
+
if ( !*err ) {
|
1033
|
+
orig_at_data->num_components =
|
1034
|
+
MarkDisconnectedComponents( orig_at_data );
|
1035
|
+
if ( orig_at_data->num_components == 0 ) {
|
1036
|
+
MOLFILE_ERR_SET (*err, 0, "No components found");
|
1037
|
+
*err = 99;
|
1038
|
+
}
|
1039
|
+
if ( orig_at_data->num_components < 0 ) {
|
1040
|
+
MOLFILE_ERR_SET (*err, 0, "Too many components");
|
1041
|
+
*err = 99;
|
1042
|
+
}
|
1043
|
+
}
|
1044
|
+
*/
|
1045
|
+
if ( szCoordNew ) {
|
1046
|
+
inchi_free( szCoordNew );
|
1047
|
+
}
|
1048
|
+
if ( at_new ) {
|
1049
|
+
inchi_free( at_new );
|
1050
|
+
}
|
1051
|
+
if ( *err ) {
|
1052
|
+
FreeOrigAtData( orig_at_data );
|
1053
|
+
}
|
1054
|
+
if ( *err && !(10 < *err && *err < 20) && pStrErr && !pStrErr[0] ) {
|
1055
|
+
MOLFILE_ERR_SET (*err, 0, "Unknown error"); /* <BRKPT> */
|
1056
|
+
}
|
1057
|
+
return orig_at_data? orig_at_data->num_inp_atoms : nNumAtoms;
|
1058
|
+
}
|
1059
|
+
|
1060
|
+
|